]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-3247.1.106.tar.gz os-x-1011 v3247.1.106
authorApple <opensource@apple.com>
Sun, 6 Dec 2015 00:33:28 +0000 (00:33 +0000)
committerApple <opensource@apple.com>
Sun, 6 Dec 2015 00:33:28 +0000 (00:33 +0000)
1155 files changed:
.clang-format [new file with mode: 0644]
.gitignore [new file with mode: 0644]
EXTERNAL_HEADERS/AssertMacros.h
EXTERNAL_HEADERS/Availability.h
EXTERNAL_HEADERS/AvailabilityInternal.h
EXTERNAL_HEADERS/AvailabilityMacros.h
EXTERNAL_HEADERS/Makefile
EXTERNAL_HEADERS/architecture/Makefile
EXTERNAL_HEADERS/corecrypto/cc.h
EXTERNAL_HEADERS/corecrypto/cc_config.h
EXTERNAL_HEADERS/corecrypto/cc_debug.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/cc_macros.h [new file with mode: 0644]
EXTERNAL_HEADERS/corecrypto/cc_priv.h
EXTERNAL_HEADERS/corecrypto/ccaes.h
EXTERNAL_HEADERS/corecrypto/ccasn1.h
EXTERNAL_HEADERS/corecrypto/ccder.h
EXTERNAL_HEADERS/corecrypto/ccdes.h
EXTERNAL_HEADERS/corecrypto/ccdigest.h
EXTERNAL_HEADERS/corecrypto/ccdigest_priv.h
EXTERNAL_HEADERS/corecrypto/ccdrbg.h
EXTERNAL_HEADERS/corecrypto/ccdrbg_impl.h
EXTERNAL_HEADERS/corecrypto/cchmac.h
EXTERNAL_HEADERS/corecrypto/ccmd5.h
EXTERNAL_HEADERS/corecrypto/ccmode.h
EXTERNAL_HEADERS/corecrypto/ccmode_factory.h
EXTERNAL_HEADERS/corecrypto/ccmode_impl.h
EXTERNAL_HEADERS/corecrypto/ccn.h
EXTERNAL_HEADERS/corecrypto/ccpad.h
EXTERNAL_HEADERS/corecrypto/ccpbkdf2.h
EXTERNAL_HEADERS/corecrypto/ccrc4.h
EXTERNAL_HEADERS/corecrypto/ccrng.h
EXTERNAL_HEADERS/corecrypto/ccrng_system.h
EXTERNAL_HEADERS/corecrypto/ccsha1.h
EXTERNAL_HEADERS/corecrypto/ccsha2.h
EXTERNAL_HEADERS/mach-o/loader.h
EXTERNAL_HEADERS/mach-o/nlist.h
EXTERNAL_HEADERS/mach-o/stab.h
Makefile
README
SETUP/Makefile
SETUP/json_compilation_db/Makefile [new file with mode: 0644]
SETUP/json_compilation_db/json_compilation_db.c [new file with mode: 0644]
SETUP/kextsymboltool/Makefile
SETUP/kextsymboltool/kextsymboltool.c
bsd/Makefile
bsd/bsm/audit_fcntl.h
bsd/conf/Makefile.template
bsd/conf/files
bsd/conf/param.c
bsd/dev/dtrace/dtrace.c
bsd/dev/dtrace/dtrace_glue.c
bsd/dev/dtrace/dtrace_subr.c
bsd/dev/dtrace/lockstat.c
bsd/dev/dtrace/scripts/Makefile
bsd/dev/dtrace/scripts/mptcp.d
bsd/dev/dtrace/scripts/sched.d
bsd/dev/dtrace/sdt.c
bsd/dev/dtrace/sdt_subr.c
bsd/dev/i386/dis_tables.c
bsd/dev/i386/kern_machdep.c
bsd/dev/i386/sysctl.c
bsd/dev/i386/systemcalls.c
bsd/dev/memdev.c
bsd/dev/munge.c
bsd/dev/unix_startup.c
bsd/dev/vn/vn.c
bsd/hfs/Makefile
bsd/hfs/hfs.h
bsd/hfs/hfs_attrlist.c
bsd/hfs/hfs_btreeio.c
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_catalog.h
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_cnode.h
bsd/hfs/hfs_cprotect.c
bsd/hfs/hfs_cprotect.h [new file with mode: 0644]
bsd/hfs/hfs_endian.c
bsd/hfs/hfs_extents.c [new file with mode: 0644]
bsd/hfs/hfs_extents.h [new file with mode: 0644]
bsd/hfs/hfs_format.h
bsd/hfs/hfs_fsctl.h
bsd/hfs/hfs_fsinfo.c
bsd/hfs/hfs_hotfiles.c
bsd/hfs/hfs_hotfiles.h
bsd/hfs/hfs_kdebug.h
bsd/hfs/hfs_link.c
bsd/hfs/hfs_lookup.c
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_resize.c
bsd/hfs/hfs_search.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfs_xattr.c
bsd/hfs/hfscommon/BTree/BTree.c
bsd/hfs/hfscommon/BTree/BTreeAllocate.c
bsd/hfs/hfscommon/BTree/BTreeTreeOps.c
bsd/hfs/hfscommon/Catalog/FileIDsServices.c
bsd/hfs/hfscommon/Misc/BTreeWrapper.c
bsd/hfs/hfscommon/Misc/FileExtentMapping.c
bsd/hfs/hfscommon/Misc/VolumeAllocation.c
bsd/hfs/hfscommon/headers/BTreesInternal.h
bsd/hfs/hfscommon/headers/BTreesPrivate.h
bsd/hfs/hfscommon/headers/FileMgrInternal.h
bsd/hfs/rangelist.c
bsd/hfs/rangelist.h
bsd/i386/Makefile
bsd/kern/ast.h
bsd/kern/bsd_init.c
bsd/kern/bsd_stubs.c
bsd/kern/decmpfs.c
bsd/kern/kdebug.c
bsd/kern/kern_aio.c
bsd/kern/kern_control.c
bsd/kern/kern_core.c
bsd/kern/kern_credential.c
bsd/kern/kern_cs.c
bsd/kern/kern_csr.c
bsd/kern/kern_descrip.c
bsd/kern/kern_event.c
bsd/kern/kern_exec.c
bsd/kern/kern_exit.c
bsd/kern/kern_fork.c
bsd/kern/kern_guarded.c
bsd/kern/kern_kpc.c
bsd/kern/kern_lockf.c
bsd/kern/kern_malloc.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_mib.c
bsd/kern/kern_mman.c
bsd/kern/kern_newsysctl.c
bsd/kern/kern_proc.c
bsd/kern/kern_prot.c
bsd/kern/kern_resource.c
bsd/kern/kern_shutdown.c
bsd/kern/kern_sig.c
bsd/kern/kern_subr.c
bsd/kern/kern_symfile.c
bsd/kern/kern_sysctl.c
bsd/kern/kern_tests.c [deleted file]
bsd/kern/kern_xxx.c
bsd/kern/kpi_mbuf.c
bsd/kern/kpi_socket.c
bsd/kern/kpi_socketfilter.c
bsd/kern/mach_loader.c
bsd/kern/mach_loader.h
bsd/kern/mach_process.c
bsd/kern/makesyscalls.sh
bsd/kern/netboot.c
bsd/kern/policy_check.c
bsd/kern/posix_shm.c
bsd/kern/proc_info.c
bsd/kern/qsort.c
bsd/kern/socket_info.c
bsd/kern/subr_prf.c
bsd/kern/sys_coalition.c
bsd/kern/sys_generic.c
bsd/kern/sys_pipe.c
bsd/kern/sys_work_interval.c [new file with mode: 0644]
bsd/kern/syscalls.master
bsd/kern/sysv_shm.c
bsd/kern/trace.codes
bsd/kern/tty.c
bsd/kern/tty_pty.c
bsd/kern/ubc_subr.c
bsd/kern/uipc_domain.c
bsd/kern/uipc_mbuf.c
bsd/kern/uipc_socket.c
bsd/kern/uipc_socket2.c
bsd/kern/uipc_syscalls.c
bsd/kern/uipc_usrreq.c
bsd/kern/vm_pressure.c
bsd/libkern/Makefile
bsd/libkern/memchr.c
bsd/machine/Makefile
bsd/man/man2/Makefile
bsd/man/man2/accept.2
bsd/man/man2/bind.2
bsd/man/man2/chflags.2
bsd/man/man2/connect.2
bsd/man/man2/connectx.2
bsd/man/man2/disconnectx.2
bsd/man/man2/getattrlistbulk.2
bsd/man/man2/getlcid.2 [deleted file]
bsd/man/man2/getsockname.2
bsd/man/man2/gettimeofday.2
bsd/man/man2/intro.2
bsd/man/man2/kevent_qos.2 [new file with mode: 0644]
bsd/man/man2/kqueue.2
bsd/man/man2/listen.2
bsd/man/man2/mmap.2
bsd/man/man2/mount.2
bsd/man/man2/peeloff.2
bsd/man/man2/poll.2
bsd/man/man2/ptrace.2
bsd/man/man2/reboot.2
bsd/man/man2/recv.2
bsd/man/man2/searchfs.2
bsd/man/man2/select.2
bsd/man/man2/sem_open.2
bsd/man/man2/sem_unlink.2
bsd/man/man2/setattrlist.2
bsd/man/man2/setlcid.2 [deleted file]
bsd/man/man2/setpgid.2
bsd/man/man2/shutdown.2
bsd/man/man2/sigaction.2
bsd/man/man2/socket.2
bsd/man/man2/stat.2
bsd/man/man2/write.2
bsd/man/man4/inet.4
bsd/man/man4/route.4
bsd/man/man4/tcp.4
bsd/man/man4/udp.4
bsd/miscfs/devfs/Makefile
bsd/miscfs/devfs/devfs_tree.c
bsd/miscfs/devfs/devfs_vfsops.c
bsd/miscfs/fifofs/Makefile
bsd/miscfs/fifofs/fifo_vnops.c
bsd/miscfs/specfs/Makefile
bsd/miscfs/specfs/spec_vnops.c
bsd/miscfs/union/Makefile
bsd/net/Makefile
bsd/net/altq/Makefile
bsd/net/bpf.c
bsd/net/bpf.h
bsd/net/bpfdesc.h
bsd/net/classq/Makefile
bsd/net/classq/classq.c
bsd/net/classq/classq_sfb.c
bsd/net/classq/classq_sfb.h
bsd/net/classq/classq_subr.c
bsd/net/classq/if_classq.h
bsd/net/content_filter.c
bsd/net/content_filter.h
bsd/net/devtimer.c
bsd/net/dlil.c
bsd/net/dlil.h
bsd/net/ether_if_module.c
bsd/net/if.c
bsd/net/if.h
bsd/net/if_bond.c
bsd/net/if_bridge.c
bsd/net/if_gif.c
bsd/net/if_ipsec.c
bsd/net/if_llreach.h
bsd/net/if_loop.c
bsd/net/if_media.h
bsd/net/if_stf.c
bsd/net/if_utun.c
bsd/net/if_utun.h
bsd/net/if_utun_crypto_dtls.c
bsd/net/if_var.h
bsd/net/if_vlan.c
bsd/net/kpi_interface.c
bsd/net/kpi_interface.h
bsd/net/kpi_protocol.c
bsd/net/lacp.h
bsd/net/necp.c
bsd/net/necp.h
bsd/net/net_perf.c [new file with mode: 0644]
bsd/net/net_perf.h [new file with mode: 0644]
bsd/net/net_stubs.c
bsd/net/network_agent.c [new file with mode: 0644]
bsd/net/network_agent.h [new file with mode: 0644]
bsd/net/ntstat.c
bsd/net/ntstat.h
bsd/net/packet_mangler.c
bsd/net/packet_mangler.h
bsd/net/pf.c
bsd/net/pf_ioctl.c
bsd/net/pf_ruleset.c
bsd/net/pf_table.c
bsd/net/pfkeyv2.h
bsd/net/pfvar.h
bsd/net/pktap.c
bsd/net/pktsched/Makefile
bsd/net/pktsched/pktsched.c
bsd/net/pktsched/pktsched_cbq.c
bsd/net/pktsched/pktsched_fairq.c
bsd/net/pktsched/pktsched_hfsc.c
bsd/net/pktsched/pktsched_priq.c
bsd/net/pktsched/pktsched_qfq.c
bsd/net/pktsched/pktsched_tcq.c
bsd/net/radix.h
bsd/net/raw_usrreq.c
bsd/net/route.c
bsd/net/route.h
bsd/net/rtsock.c
bsd/netinet/Makefile
bsd/netinet/flow_divert.c
bsd/netinet/flow_divert.h
bsd/netinet/flow_divert_proto.h
bsd/netinet/icmp6.h
bsd/netinet/igmp.c
bsd/netinet/in.c
bsd/netinet/in.h
bsd/netinet/in_arp.c
bsd/netinet/in_cksum.c
bsd/netinet/in_dhcp.c [deleted file]
bsd/netinet/in_dhcp.h [deleted file]
bsd/netinet/in_gif.c
bsd/netinet/in_mcast.c
bsd/netinet/in_pcb.c
bsd/netinet/in_pcb.h
bsd/netinet/in_pcblist.c
bsd/netinet/in_proto.c
bsd/netinet/in_systm.h
bsd/netinet/in_tclass.c
bsd/netinet/in_var.h
bsd/netinet/ip_dummynet.c
bsd/netinet/ip_ecn.c
bsd/netinet/ip_ecn.h
bsd/netinet/ip_encap.c
bsd/netinet/ip_flowid.h
bsd/netinet/ip_fw2.c
bsd/netinet/ip_fw2_compat.c
bsd/netinet/ip_icmp.c
bsd/netinet/ip_input.c
bsd/netinet/ip_output.c
bsd/netinet/ip_var.h
bsd/netinet/mp_pcb.c
bsd/netinet/mp_pcb.h
bsd/netinet/mptcp.c
bsd/netinet/mptcp.h
bsd/netinet/mptcp_opt.c
bsd/netinet/mptcp_opt.h
bsd/netinet/mptcp_subr.c
bsd/netinet/mptcp_timer.c
bsd/netinet/mptcp_timer.h
bsd/netinet/mptcp_usrreq.c
bsd/netinet/mptcp_var.h
bsd/netinet/raw_ip.c
bsd/netinet/tcp.h
bsd/netinet/tcp_cache.c [new file with mode: 0644]
bsd/netinet/tcp_cache.h [new file with mode: 0644]
bsd/netinet/tcp_cc.c
bsd/netinet/tcp_cc.h
bsd/netinet/tcp_cubic.c
bsd/netinet/tcp_debug.h
bsd/netinet/tcp_input.c
bsd/netinet/tcp_ledbat.c
bsd/netinet/tcp_newreno.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_sack.c
bsd/netinet/tcp_subr.c
bsd/netinet/tcp_timer.c
bsd/netinet/tcp_timer.h
bsd/netinet/tcp_usrreq.c
bsd/netinet/tcp_var.h
bsd/netinet/udp.h
bsd/netinet/udp_usrreq.c
bsd/netinet/udp_var.h
bsd/netinet6/Makefile
bsd/netinet6/ah_core.c
bsd/netinet6/ah_input.c
bsd/netinet6/esp.h
bsd/netinet6/esp_core.c
bsd/netinet6/esp_input.c
bsd/netinet6/esp_output.c
bsd/netinet6/esp_rijndael.c
bsd/netinet6/esp_rijndael.h
bsd/netinet6/frag6.c
bsd/netinet6/icmp6.c
bsd/netinet6/in6.c
bsd/netinet6/in6.h
bsd/netinet6/in6_gif.c
bsd/netinet6/in6_ifattach.c
bsd/netinet6/in6_mcast.c
bsd/netinet6/in6_pcb.c
bsd/netinet6/in6_proto.c
bsd/netinet6/in6_src.c
bsd/netinet6/in6_var.h
bsd/netinet6/ip6_ecn.h
bsd/netinet6/ip6_forward.c
bsd/netinet6/ip6_input.c
bsd/netinet6/ip6_output.c
bsd/netinet6/ip6_var.h
bsd/netinet6/ipsec.c
bsd/netinet6/ipsec.h
bsd/netinet6/ipsec6.h
bsd/netinet6/mld6.c
bsd/netinet6/nd6.c
bsd/netinet6/nd6.h
bsd/netinet6/nd6_nbr.c
bsd/netinet6/nd6_rtr.c
bsd/netinet6/nd6_var.h [new file with mode: 0644]
bsd/netinet6/raw_ip6.c
bsd/netinet6/scope6_var.h
bsd/netinet6/udp6_output.c
bsd/netinet6/udp6_usrreq.c
bsd/netkey/Makefile
bsd/netkey/key.c
bsd/netkey/keydb.c
bsd/netkey/keydb.h
bsd/netkey/keysock.c
bsd/nfs/nfs.h
bsd/nfs/nfs4_subs.c
bsd/nfs/nfs4_vnops.c
bsd/nfs/nfs_bio.c
bsd/nfs/nfs_gss.c
bsd/nfs/nfs_gss.h
bsd/nfs/nfs_ioctl.h
bsd/nfs/nfs_serv.c
bsd/nfs/nfs_socket.c
bsd/nfs/nfs_subs.c
bsd/nfs/nfs_syscalls.c
bsd/nfs/nfs_upcall.c
bsd/nfs/nfs_vfsops.c
bsd/nfs/nfs_vnops.c
bsd/nfs/nfsm_subs.h
bsd/nfs/nfsmount.h
bsd/pgo/profile_runtime.c [new file with mode: 0644]
bsd/security/audit/audit.c
bsd/security/audit/audit_arg.c
bsd/security/audit/audit_bsm.c
bsd/security/audit/audit_bsm_fcntl.c
bsd/security/audit/audit_mac.c
bsd/security/audit/audit_syscalls.c
bsd/security/audit/audit_worker.c
bsd/sys/Makefile
bsd/sys/_types/Makefile
bsd/sys/_types/_timeval64.h [new file with mode: 0644]
bsd/sys/attr.h
bsd/sys/bsdtask_info.h
bsd/sys/buf.h
bsd/sys/buf_internal.h
bsd/sys/cdefs.h
bsd/sys/coalition.h
bsd/sys/codedir_internal.h
bsd/sys/codesign.h
bsd/sys/content_protection.h
bsd/sys/cprotect.h
bsd/sys/csr.h
bsd/sys/disk.h
bsd/sys/domain.h
bsd/sys/dtrace.h
bsd/sys/dtrace_impl.h
bsd/sys/event.h
bsd/sys/eventvar.h
bsd/sys/fcntl.h
bsd/sys/file_internal.h
bsd/sys/filedesc.h
bsd/sys/fsevents.h
bsd/sys/guarded.h
bsd/sys/imgact.h
bsd/sys/kdebug.h
bsd/sys/kern_control.h
bsd/sys/kern_memorystatus.h
bsd/sys/kern_tests.h [deleted file]
bsd/sys/kpi_mbuf.h
bsd/sys/kpi_private.h [new file with mode: 0644]
bsd/sys/kpi_socket.h
bsd/sys/kpi_socketfilter.h
bsd/sys/lctx.h
bsd/sys/loadable_fs.h
bsd/sys/lockf.h
bsd/sys/malloc.h
bsd/sys/mbuf.h
bsd/sys/memory_maintenance.h [new file with mode: 0644]
bsd/sys/mman.h
bsd/sys/mount_internal.h
bsd/sys/munge.h
bsd/sys/pgo.h [new file with mode: 0644]
bsd/sys/priv.h
bsd/sys/proc.h
bsd/sys/proc_info.h
bsd/sys/proc_internal.h
bsd/sys/protosw.h
bsd/sys/pthread_shims.h
bsd/sys/ptrace.h
bsd/sys/reboot.h
bsd/sys/resource.h
bsd/sys/select.h
bsd/sys/signal.h
bsd/sys/signalvar.h
bsd/sys/socket.h
bsd/sys/socketvar.h
bsd/sys/sockio.h
bsd/sys/spawn_internal.h
bsd/sys/stackshot.h [new file with mode: 0644]
bsd/sys/stat.h
bsd/sys/sysctl.h
bsd/sys/sysent.h
bsd/sys/systm.h
bsd/sys/time.h
bsd/sys/ubc.h
bsd/sys/ubc_internal.h
bsd/sys/uio_internal.h
bsd/sys/user.h
bsd/sys/vnode.h
bsd/sys/vnode_internal.h
bsd/sys/work_interval.h [new file with mode: 0644]
bsd/uuid/Makefile
bsd/uxkern/ux_exception.c
bsd/vfs/kpi_vfs.c
bsd/vfs/vfs_attrlist.c
bsd/vfs/vfs_bio.c
bsd/vfs/vfs_cache.c
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_fsevents.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_journal.h
bsd/vfs/vfs_lookup.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
bsd/vfs/vfs_utfconv.c
bsd/vfs/vfs_vnops.c
bsd/vfs/vfs_xattr.c
bsd/vm/dp_backing_file.c
bsd/vm/vm_compressor_backing_file.c
bsd/vm/vm_unix.c
config/BSDKernel.exports
config/IOKit.exports
config/IOKit.x86_64.exports
config/Libkern.exports
config/MACFramework.exports
config/MASTER
config/MASTER.x86_64
config/Mach.exports
config/MasterVersion
config/Private.exports
config/Private.x86_64.exports
config/Unsupported.exports
config/Unsupported.x86_64.exports
config/Unused.exports
iokit/.clang-format [new file with mode: 0644]
iokit/Families/IONVRAM/IONVRAMController.cpp
iokit/IOKit/IOBSD.h
iokit/IOKit/IOBufferMemoryDescriptor.h
iokit/IOKit/IOCPU.h
iokit/IOKit/IOCatalogue.h
iokit/IOKit/IOCommand.h
iokit/IOKit/IOCommandGate.h
iokit/IOKit/IOCommandPool.h
iokit/IOKit/IOCommandQueue.h
iokit/IOKit/IOConditionLock.h
iokit/IOKit/IODMACommand.h
iokit/IOKit/IODMAController.h
iokit/IOKit/IODMAEventSource.h
iokit/IOKit/IODataQueue.h
iokit/IOKit/IODeviceTreeSupport.h
iokit/IOKit/IOEventSource.h
iokit/IOKit/IOFilterInterruptEventSource.h
iokit/IOKit/IOHibernatePrivate.h
iokit/IOKit/IOInterleavedMemoryDescriptor.h
iokit/IOKit/IOInterruptAccounting.h
iokit/IOKit/IOInterruptAccountingPrivate.h
iokit/IOKit/IOInterruptController.h
iokit/IOKit/IOInterruptEventSource.h
iokit/IOKit/IOKernelReporters.h
iokit/IOKit/IOKitDebug.h
iokit/IOKit/IOKitDiagnosticsUserClient.h [new file with mode: 0644]
iokit/IOKit/IOKitKeysPrivate.h
iokit/IOKit/IOLib.h
iokit/IOKit/IOMapper.h
iokit/IOKit/IOMemoryDescriptor.h
iokit/IOKit/IOMultiMemoryDescriptor.h
iokit/IOKit/IONVRAM.h
iokit/IOKit/IOPlatformExpert.h
iokit/IOKit/IOPolledInterface.h
iokit/IOKit/IORangeAllocator.h
iokit/IOKit/IORegistryEntry.h
iokit/IOKit/IOReportMacros.h
iokit/IOKit/IOReturn.h
iokit/IOKit/IOService.h
iokit/IOKit/IOServicePM.h
iokit/IOKit/IOSharedDataQueue.h
iokit/IOKit/IOSubMemoryDescriptor.h
iokit/IOKit/IOSyncer.h
iokit/IOKit/IOTimeStamp.h
iokit/IOKit/IOTimerEventSource.h
iokit/IOKit/IOTypes.h
iokit/IOKit/IOUserClient.h
iokit/IOKit/IOWorkLoop.h
iokit/IOKit/nvram/IONVRAMController.h
iokit/IOKit/platform/AppleMacIO.h
iokit/IOKit/platform/AppleMacIODevice.h
iokit/IOKit/platform/AppleNMI.h
iokit/IOKit/platform/ApplePlatformExpert.h
iokit/IOKit/pwr_mgt/IOPMPowerSource.h
iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/IOKit/pwr_mgt/IOPMinformee.h
iokit/IOKit/pwr_mgt/IOPMinformeeList.h
iokit/IOKit/pwr_mgt/IOPMlog.h
iokit/IOKit/pwr_mgt/IOPMpowerState.h
iokit/IOKit/pwr_mgt/RootDomain.h
iokit/IOKit/system_management/IOWatchDogTimer.h
iokit/Kernel/IOBufferMemoryDescriptor.cpp
iokit/Kernel/IOCPU.cpp
iokit/Kernel/IOCommandGate.cpp
iokit/Kernel/IODMACommand.cpp
iokit/Kernel/IODMAEventSource.cpp
iokit/Kernel/IODataQueue.cpp
iokit/Kernel/IODeviceTreeSupport.cpp
iokit/Kernel/IOEventSource.cpp
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOHibernateInternal.h
iokit/Kernel/IOHibernateRestoreKernel.c
iokit/Kernel/IOHistogramReporter.cpp
iokit/Kernel/IOKitDebug.cpp
iokit/Kernel/IOKitKernelInternal.h
iokit/Kernel/IOLib.cpp
iokit/Kernel/IOMapper.cpp
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IOMultiMemoryDescriptor.cpp
iokit/Kernel/IONVRAM.cpp
iokit/Kernel/IOPMPowerStateQueue.h
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/Kernel/IOPolledInterface.cpp
iokit/Kernel/IORegistryEntry.cpp
iokit/Kernel/IOReportLegend.cpp
iokit/Kernel/IOReporterDefs.h
iokit/Kernel/IOService.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOServicePMPrivate.h
iokit/Kernel/IOServicePrivate.h
iokit/Kernel/IOStartIOKit.cpp
iokit/Kernel/IOSubMemoryDescriptor.cpp
iokit/Kernel/IOUserClient.cpp
iokit/Kernel/IOWorkLoop.cpp
iokit/Kernel/RootDomainUserClient.h
iokit/Tests/TestIOMemoryDescriptor.cpp [new file with mode: 0644]
iokit/Tests/Tests.cpp
iokit/Tests/Tests.h
iokit/bsddev/IOKitBSDInit.cpp
iokit/conf/Makefile.template
iokit/conf/files
libkdd/kcdata/KCDBasicTypeDescription.h [new file with mode: 0644]
libkdd/kcdata/KCDBasicTypeDescription.m [new file with mode: 0644]
libkdd/kcdata/KCDStructTypeDescription.h [new file with mode: 0644]
libkdd/kcdata/KCDStructTypeDescription.m [new file with mode: 0644]
libkdd/kcdata/kcdata_core.m [new file with mode: 0644]
libkdd/kcdata/kcdtypes.c [new file with mode: 0644]
libkdd/kcdata/kdd.h [new file with mode: 0644]
libkdd/kcdata/kdd.m [new file with mode: 0644]
libkdd/kdd.xcodeproj/project.pbxproj [new file with mode: 0644]
libkern/.clang-format [new file with mode: 0644]
libkern/Makefile
libkern/OSKextVersion.c
libkern/c++/OSArray.cpp
libkern/c++/OSCollectionIterator.cpp
libkern/c++/OSData.cpp
libkern/c++/OSDictionary.cpp
libkern/c++/OSKext.cpp
libkern/c++/OSMetaClass.cpp
libkern/c++/OSObject.cpp
libkern/c++/OSOrderedSet.cpp
libkern/c++/OSRuntime.cpp
libkern/c++/OSSerialize.cpp
libkern/c++/OSSerializeBinary.cpp
libkern/c++/OSString.cpp
libkern/c++/OSSymbol.cpp
libkern/conf/Makefile.template
libkern/conf/files
libkern/conf/files.x86_64
libkern/crypto/corecrypto_aesxts.c
libkern/crypto/corecrypto_sha2.c
libkern/gen/OSAtomicOperations.c
libkern/gen/OSDebug.cpp
libkern/kxld/Makefile
libkern/kxld/kxld.c
libkern/kxld/kxld_demangle.c
libkern/kxld/kxld_demangle.h
libkern/kxld/kxld_object.c
libkern/kxld/kxld_reloc.c
libkern/kxld/kxld_seg.c
libkern/kxld/kxld_stubs.c
libkern/kxld/kxld_sym.c
libkern/kxld/kxld_util.c
libkern/kxld/kxld_util.h
libkern/kxld/kxld_versionmin.c
libkern/kxld/kxld_versionmin.h
libkern/kxld/tests/kextcopyright.c
libkern/libkern/Makefile
libkern/libkern/OSAtomic.h
libkern/libkern/OSKextLib.h
libkern/libkern/OSKextLibPrivate.h
libkern/libkern/c++/OSArray.h
libkern/libkern/c++/OSBoolean.h
libkern/libkern/c++/OSCollection.h
libkern/libkern/c++/OSCollectionIterator.h
libkern/libkern/c++/OSData.h
libkern/libkern/c++/OSDictionary.h
libkern/libkern/c++/OSKext.h
libkern/libkern/c++/OSLib.h
libkern/libkern/c++/OSMetaClass.h
libkern/libkern/c++/OSNumber.h
libkern/libkern/c++/OSObject.h
libkern/libkern/c++/OSOrderedSet.h
libkern/libkern/c++/OSSerialize.h
libkern/libkern/c++/OSSet.h
libkern/libkern/c++/OSString.h
libkern/libkern/c++/OSSymbol.h
libkern/libkern/crypto/sha2.h
libkern/libkern/kxld.h
libkern/libkern/zlib.h
libkern/x86_64/OSAtomic.s [deleted file]
libkern/zlib/deflate.c
libkern/zlib/deflate.h
libsa/conf/Makefile.template
libsa/lastkerneldataconst.c [new file with mode: 0644]
libsyscall/Libsyscall.xcconfig
libsyscall/Libsyscall.xcodeproj/project.pbxproj
libsyscall/mach/host.c [new file with mode: 0644]
libsyscall/mach/mach/mach.h
libsyscall/mach/mach_init.c
libsyscall/mach/mach_msg.c
libsyscall/mach/watchos_prohibited_mig.txt [new file with mode: 0644]
libsyscall/wrappers/cancelable/fcntl-base.c
libsyscall/wrappers/csr.c
libsyscall/wrappers/kdebug_trace.c
libsyscall/wrappers/libproc/libproc.c
libsyscall/wrappers/libproc/libproc.h
libsyscall/wrappers/libproc/libproc_internal.h
libsyscall/wrappers/spawn/posix_spawn.c
libsyscall/wrappers/spawn/spawn.h
libsyscall/wrappers/spawn/spawn_private.h
libsyscall/wrappers/stackshot.c [new file with mode: 0644]
libsyscall/wrappers/work_interval.c [new file with mode: 0644]
libsyscall/xcodescripts/create-syscalls.pl
libsyscall/xcodescripts/filter_mig.awk [new file with mode: 0755]
libsyscall/xcodescripts/mach_install_mig.sh
makedefs/MakeInc.cmd
makedefs/MakeInc.def
makedefs/MakeInc.kernel
makedefs/MakeInc.rule
makedefs/MakeInc.top
osfmk/Makefile
osfmk/UserNotification/Makefile
osfmk/atm/atm.c
osfmk/atm/atm_internal.h
osfmk/atm/atm_types.h
osfmk/bank/bank.c
osfmk/chud/chud_thread.c
osfmk/chud/i386/chud_thread_i386.c
osfmk/conf/Makefile.template
osfmk/conf/files
osfmk/conf/files.x86_64
osfmk/console/i386/serial_console.c
osfmk/console/video_console.c
osfmk/console/video_console.h
osfmk/corecrypto/cc/src/cc_clear.c [new file with mode: 0644]
osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c
osfmk/corecrypto/ccdigest/src/ccdigest_init.c
osfmk/corecrypto/ccdigest/src/ccdigest_update.c
osfmk/corecrypto/cchmac/src/cchmac.c
osfmk/corecrypto/cchmac/src/cchmac_final.c
osfmk/corecrypto/cchmac/src/cchmac_init.c
osfmk/corecrypto/cchmac/src/cchmac_update.c
osfmk/corecrypto/ccn/src/ccn_set.c
osfmk/corecrypto/ccsha1/src/ccdigest_final_64be.c
osfmk/corecrypto/ccsha1/src/ccsha1_eay.c
osfmk/corecrypto/ccsha1/src/ccsha1_initial_state.c
osfmk/corpses/Makefile [new file with mode: 0644]
osfmk/corpses/corpse.c [new file with mode: 0644]
osfmk/corpses/task_corpse.h [new file with mode: 0644]
osfmk/default_pager/default_pager.c
osfmk/default_pager/dp_backing_store.c
osfmk/default_pager/dp_memory_object.c
osfmk/device/Makefile
osfmk/device/device.defs
osfmk/device/device_init.c
osfmk/device/device_types.h
osfmk/gssd/Makefile
osfmk/i386/AT386/model_dep.c
osfmk/i386/Diagnostics.c
osfmk/i386/Makefile
osfmk/i386/acpi.c
osfmk/i386/ast.h [deleted file]
osfmk/i386/ast_types.h [deleted file]
osfmk/i386/atomic.h [new file with mode: 0644]
osfmk/i386/bsd_i386.c
osfmk/i386/commpage/commpage.c
osfmk/i386/commpage/commpage.h
osfmk/i386/cpu.c
osfmk/i386/cpu_capabilities.h
osfmk/i386/cpu_data.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/flipc_page.h [deleted file]
osfmk/i386/fpu.c
osfmk/i386/genassym.c
osfmk/i386/i386_init.c
osfmk/i386/i386_lock.s
osfmk/i386/i386_vm_init.c
osfmk/i386/io_map.c
osfmk/i386/lapic_native.c
osfmk/i386/locks.h
osfmk/i386/locks_i386.c
osfmk/i386/machine_routines.c
osfmk/i386/machine_routines.h
osfmk/i386/misc_protos.h
osfmk/i386/mp.c
osfmk/i386/mp.h
osfmk/i386/mp_desc.c
osfmk/i386/pal_routines.c
osfmk/i386/pal_routines.h
osfmk/i386/panic_hooks.c
osfmk/i386/pcb.c
osfmk/i386/pmCPU.c
osfmk/i386/pmap.h
osfmk/i386/pmap_common.c
osfmk/i386/pmap_internal.h
osfmk/i386/pmap_x86_common.c
osfmk/i386/proc_reg.h
osfmk/i386/rtclock.c
osfmk/i386/smp.h [new file with mode: 0644]
osfmk/i386/thread.h
osfmk/i386/trap.c
osfmk/i386/trap.h
osfmk/i386/tsc.c
osfmk/i386/ucode.c
osfmk/i386/vmx/vmx_shims.c
osfmk/ipc/ipc_importance.c
osfmk/ipc/ipc_importance.h
osfmk/ipc/ipc_init.c
osfmk/ipc/ipc_kmsg.c
osfmk/ipc/ipc_kmsg.h
osfmk/ipc/ipc_mqueue.c
osfmk/ipc/ipc_mqueue.h
osfmk/ipc/ipc_object.c
osfmk/ipc/ipc_object.h
osfmk/ipc/ipc_port.c
osfmk/ipc/ipc_port.h
osfmk/ipc/ipc_pset.c
osfmk/ipc/ipc_pset.h
osfmk/ipc/ipc_right.c
osfmk/ipc/ipc_right.h
osfmk/ipc/ipc_table.c
osfmk/ipc/ipc_voucher.c
osfmk/ipc/mach_debug.c
osfmk/ipc/mach_msg.c
osfmk/ipc/mach_port.c
osfmk/kdp/kdp_core.c [new file with mode: 0644]
osfmk/kdp/kdp_core.h
osfmk/kdp/kdp_udp.c
osfmk/kdp/ml/i386/kdp_x86_common.c
osfmk/kdp/ml/i386/kdp_x86_common.h
osfmk/kdp/ml/x86_64/kdp_machdep.c
osfmk/kdp/ml/x86_64/kdp_vm.c
osfmk/kern/Makefile
osfmk/kern/assert.h
osfmk/kern/ast.c
osfmk/kern/ast.h
osfmk/kern/bsd_kern.c
osfmk/kern/btlog.c
osfmk/kern/call_entry.h
osfmk/kern/clock.c
osfmk/kern/clock.h
osfmk/kern/coalition.c
osfmk/kern/coalition.h
osfmk/kern/debug.c
osfmk/kern/debug.h
osfmk/kern/ecc.h
osfmk/kern/energy_perf.c
osfmk/kern/energy_perf.h
osfmk/kern/exception.c
osfmk/kern/exception.h
osfmk/kern/gzalloc.c
osfmk/kern/hibernate.c
osfmk/kern/host.c
osfmk/kern/hv_support.c
osfmk/kern/hv_support.h
osfmk/kern/ipc_host.c
osfmk/kern/ipc_kobject.c
osfmk/kern/ipc_mig.c
osfmk/kern/ipc_sync.c
osfmk/kern/ipc_sync.h
osfmk/kern/ipc_tt.c
osfmk/kern/kalloc.c
osfmk/kern/kalloc.h
osfmk/kern/kern_cdata.c [new file with mode: 0644]
osfmk/kern/kern_cdata.h [new file with mode: 0644]
osfmk/kern/kern_ecc.c
osfmk/kern/kern_stackshot.c
osfmk/kern/kern_types.h
osfmk/kern/kext_alloc.c
osfmk/kern/kpc.h
osfmk/kern/kpc_common.c
osfmk/kern/kpc_thread.c
osfmk/kern/ledger.c
osfmk/kern/ledger.h
osfmk/kern/locks.c
osfmk/kern/locks.h
osfmk/kern/machine.h
osfmk/kern/misc_protos.h
osfmk/kern/page_decrypt.h
osfmk/kern/printf.c
osfmk/kern/priority.c
osfmk/kern/processor.c
osfmk/kern/processor.h
osfmk/kern/processor_data.h
osfmk/kern/queue.h
osfmk/kern/sched.h
osfmk/kern/sched_average.c
osfmk/kern/sched_dualq.c
osfmk/kern/sched_grrr.c
osfmk/kern/sched_multiq.c
osfmk/kern/sched_prim.c
osfmk/kern/sched_prim.h
osfmk/kern/sched_proto.c
osfmk/kern/sched_traditional.c [new file with mode: 0644]
osfmk/kern/sfi.c
osfmk/kern/smp.h [new file with mode: 0644]
osfmk/kern/stack.c
osfmk/kern/startup.c
osfmk/kern/sync_lock.h
osfmk/kern/sync_sema.c
osfmk/kern/sync_sema.h
osfmk/kern/syscall_subr.c
osfmk/kern/syscall_sw.c
osfmk/kern/syscall_sw.h
osfmk/kern/sysdiagnose.c [new file with mode: 0644]
osfmk/kern/task.c
osfmk/kern/task.h
osfmk/kern/task_policy.c
osfmk/kern/telemetry.c
osfmk/kern/thread.c
osfmk/kern/thread.h
osfmk/kern/thread_act.c
osfmk/kern/thread_call.c
osfmk/kern/thread_policy.c
osfmk/kern/timer_call.c
osfmk/kern/wait_queue.c [deleted file]
osfmk/kern/wait_queue.h [deleted file]
osfmk/kern/waitq.c [new file with mode: 0644]
osfmk/kern/waitq.h [new file with mode: 0644]
osfmk/kern/zalloc.c
osfmk/kern/zalloc.h
osfmk/kextd/Makefile
osfmk/kperf/action.c
osfmk/kperf/action.h
osfmk/kperf/buffer.h
osfmk/kperf/callstack.c
osfmk/kperf/context.h
osfmk/kperf/kperf.h
osfmk/kperf/kperf_arch.h
osfmk/kperf/kperf_kpc.c
osfmk/kperf/kperf_kpc.h
osfmk/kperf/kperfbsd.c
osfmk/kperf/meminfo.c [new file with mode: 0644]
osfmk/kperf/meminfo.h [new file with mode: 0644]
osfmk/kperf/pet.c
osfmk/kperf/sample.h
osfmk/kperf/threadinfo.c
osfmk/kperf/x86_64/kperf_meminfo.c [new file with mode: 0644]
osfmk/lockd/Makefile
osfmk/mach/Makefile
osfmk/mach/coalition.h [new file with mode: 0644]
osfmk/mach/exception_types.h
osfmk/mach/flipc_cb.h [deleted file]
osfmk/mach/flipc_debug.h [deleted file]
osfmk/mach/flipc_device.h [deleted file]
osfmk/mach/flipc_locks.h [deleted file]
osfmk/mach/flipc_types.h [deleted file]
osfmk/mach/host_info.h
osfmk/mach/host_special_ports.h
osfmk/mach/i386/Makefile
osfmk/mach/i386/exception.h
osfmk/mach/i386/flipc_dep.h [deleted file]
osfmk/mach/mach_host.defs
osfmk/mach/mach_types.defs
osfmk/mach/machine.h
osfmk/mach/machine/Makefile
osfmk/mach/machine/sdt.h
osfmk/mach/memory_object_types.h
osfmk/mach/message.h
osfmk/mach/port.h
osfmk/mach/shared_region.h
osfmk/mach/sync_policy.h
osfmk/mach/sysdiagnose_notification.defs [new file with mode: 0644]
osfmk/mach/task_info.h
osfmk/mach/thread_info.h
osfmk/mach/vm_behavior.h
osfmk/mach/vm_param.h
osfmk/mach/vm_prot.h
osfmk/mach/vm_statistics.h
osfmk/mach/vm_types.h
osfmk/mach_debug/mach_debug_types.defs
osfmk/mach_debug/mach_debug_types.h
osfmk/mach_debug/zone_info.h
osfmk/machine/Makefile
osfmk/machine/ast.h [deleted file]
osfmk/machine/ast_types.h [deleted file]
osfmk/machine/atomic.h [new file with mode: 0644]
osfmk/machine/smp.h [new file with mode: 0644]
osfmk/pmc/Makefile [deleted file]
osfmk/pmc/pmc.c [deleted file]
osfmk/pmc/pmc.h [deleted file]
osfmk/prng/prng_yarrow.c
osfmk/prng/random.c
osfmk/profiling/Makefile
osfmk/vm/WKdm_new.h
osfmk/vm/bsd_vm.c
osfmk/vm/device_vm.c
osfmk/vm/memory_object.c
osfmk/vm/memory_object.h
osfmk/vm/pmap.h
osfmk/vm/vm_apple_protect.c
osfmk/vm/vm_compressor.c
osfmk/vm/vm_compressor.h
osfmk/vm/vm_compressor_backing_store.c
osfmk/vm/vm_compressor_backing_store.h
osfmk/vm/vm_compressor_pager.c
osfmk/vm/vm_compressor_pager.h
osfmk/vm/vm_debug.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_fault.h
osfmk/vm/vm_fourk_pager.c [new file with mode: 0644]
osfmk/vm/vm_init.c
osfmk/vm/vm_kern.c
osfmk/vm/vm_kern.h
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_map_store.c
osfmk/vm/vm_map_store.h
osfmk/vm/vm_map_store_ll.c
osfmk/vm/vm_map_store_rb.c
osfmk/vm/vm_map_store_rb.h
osfmk/vm/vm_object.c
osfmk/vm/vm_object.h
osfmk/vm/vm_page.h
osfmk/vm/vm_pageout.c
osfmk/vm/vm_pageout.h
osfmk/vm/vm_phantom_cache.c
osfmk/vm/vm_protos.h
osfmk/vm/vm_purgeable.c
osfmk/vm/vm_purgeable_internal.h
osfmk/vm/vm_resident.c
osfmk/vm/vm_shared_region.c
osfmk/vm/vm_swapfile_pager.c
osfmk/vm/vm_user.c
osfmk/x86_64/WKdmCompress_new.s
osfmk/x86_64/WKdmDecompress_new.s
osfmk/x86_64/bcopy.s
osfmk/x86_64/bzero.s
osfmk/x86_64/copyio.c
osfmk/x86_64/cswitch.s
osfmk/x86_64/idt64.s
osfmk/x86_64/kpc_x86.c
osfmk/x86_64/locore.s
osfmk/x86_64/loose_ends.c
osfmk/x86_64/machine_kpc.h
osfmk/x86_64/machine_routines_asm.s
osfmk/x86_64/pmap.c
osfmk/x86_64/start.s
pexpert/Makefile
pexpert/conf/Makefile.template
pexpert/gen/bootargs.c
pexpert/gen/pe_gen.c
pexpert/i386/pe_init.c
pexpert/i386/pe_kprintf.c
pexpert/pexpert/Makefile
pexpert/pexpert/i386/boot.h
pexpert/pexpert/pexpert.h
pexpert/pexpert/protos.h
security/Makefile
security/conf/Makefile.template
security/conf/files
security/mac.h
security/mac_audit.c
security/mac_base.c
security/mac_file.c
security/mac_framework.h
security/mac_internal.h
security/mac_iokit.c
security/mac_kext.c
security/mac_mach.c [new file with mode: 0644]
security/mac_mach_internal.h
security/mac_pipe.c
security/mac_policy.h
security/mac_posix_sem.c
security/mac_posix_shm.c
security/mac_process.c
security/mac_socket.c
security/mac_system.c
security/mac_sysv_msg.c
security/mac_sysv_sem.c
security/mac_sysv_shm.c
security/mac_vfs.c
tools/lldbmacros/Makefile
tools/lldbmacros/README
tools/lldbmacros/atm.py
tools/lldbmacros/core/kernelcore.py
tools/lldbmacros/core/operating_system.py
tools/lldbmacros/core/xnu_lldb_init.py
tools/lldbmacros/ioreg.py
tools/lldbmacros/ipc.py
tools/lldbmacros/kauth.py [new file with mode: 0644]
tools/lldbmacros/kcdata.py [new file with mode: 0644]
tools/lldbmacros/mbufdefines.py
tools/lldbmacros/mbufs.py
tools/lldbmacros/memory.py
tools/lldbmacros/misc.py
tools/lldbmacros/net.py
tools/lldbmacros/pmap.py
tools/lldbmacros/process.py
tools/lldbmacros/scheduler.py
tools/lldbmacros/userspace.py
tools/lldbmacros/usertaskgdbserver.py [new file with mode: 0644]
tools/lldbmacros/utils.py
tools/lldbmacros/waitq.py [new file with mode: 0644]
tools/lldbmacros/xnu.py
tools/lldbmacros/xnudefines.py
tools/reindent.sh [new file with mode: 0755]
tools/remote_build.sh
tools/tests/MPMMTest/MPMMtest.c
tools/tests/MPMMTest/Makefile
tools/tests/Makefile
tools/tests/Makefile.common [new file with mode: 0644]
tools/tests/affinity/Makefile
tools/tests/execperf/Makefile
tools/tests/jitter/Makefile
tools/tests/kqueue_tests/Makefile
tools/tests/libMicro/bench.sh
tools/tests/libMicro/benchDS.sh
tools/tests/libMicro/coreos_bench.sh
tools/tests/libMicro/embd_bench.sh
tools/tests/libMicro/od_account_create.sh
tools/tests/libMicro/od_account_delete.sh
tools/tests/memorystatus/Makefile [deleted file]
tools/tests/memorystatus/memorystatus.c [deleted file]
tools/tests/memorystatus/memorystatus_groups.c [deleted file]
tools/tests/perf_index/Makefile
tools/tests/superpages/Makefile
tools/tests/testkext/testthreadcall-Info.plist
tools/tests/testkext/testthreadcall.cpp
tools/tests/xnu_quick_test/32bit_inode_tests.c [deleted file]
tools/tests/xnu_quick_test/README [deleted file]
tools/tests/xnu_quick_test/atomic_fifo_queue_test.c [deleted file]
tools/tests/xnu_quick_test/commpage_tests.c [deleted file]
tools/tests/xnu_quick_test/content_protection_test.c [deleted file]
tools/tests/xnu_quick_test/helpers/arch.c [deleted file]
tools/tests/xnu_quick_test/helpers/data_exec.c [deleted file]
tools/tests/xnu_quick_test/helpers/launch.c [deleted file]
tools/tests/xnu_quick_test/helpers/sleep.c [deleted file]
tools/tests/xnu_quick_test/kqueue_tests.c [deleted file]
tools/tests/xnu_quick_test/machvm_tests.c [deleted file]
tools/tests/xnu_quick_test/main.c [deleted file]
tools/tests/xnu_quick_test/makefile [deleted file]
tools/tests/xnu_quick_test/memory_tests.c [deleted file]
tools/tests/xnu_quick_test/misc.c [deleted file]
tools/tests/xnu_quick_test/pipes_tests.c [deleted file]
tools/tests/xnu_quick_test/sched_tests.c [deleted file]
tools/tests/xnu_quick_test/sema_tests.c [deleted file]
tools/tests/xnu_quick_test/shared_memory_tests.c [deleted file]
tools/tests/xnu_quick_test/socket_tests.c [deleted file]
tools/tests/xnu_quick_test/tests.c [deleted file]
tools/tests/xnu_quick_test/tests.h [deleted file]
tools/tests/xnu_quick_test/xattr_tests.c [deleted file]
tools/tests/xnu_quick_test/xnu_quick_test.entitlements [deleted file]
tools/tests/zero-to-n/Makefile
tools/tests/zero-to-n/zero-to-n.c

diff --git a/.clang-format b/.clang-format
new file mode 100644 (file)
index 0000000..566d0ef
--- /dev/null
@@ -0,0 +1,121 @@
+# Format of this file is YAML
+# Minimum clang-format version required: clang-format version 3.6.0
+# Detailed description of options available at http://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+AlignEscapedNewlinesLeft: true
+# Bad:
+# void foo() {
+#        someFunction();
+#  someOtherFunction();
+# }
+# Good:
+# void foo() {
+#    someFunction();
+#    someOtherFunction();
+# }
+
+AlignTrailingComments: true
+# align all comments to right based of //
+# == Avoid using // based comments altogether ==
+
+AllowAllParametersOfDeclarationOnNextLine: true
+# allow funtion definition as
+# someFunction(foo,
+#             bar,
+#             baz);
+
+AlignConsecutiveAssignments:  true
+# aligns consecutive assignments with '=' operator
+
+AllowShortBlocksOnASingleLine: true
+# single statement block can be merged on one line 
+# e.g if (a) { return; }
+
+AllowShortCaseLabelsOnASingleLine: false
+# Single statement case statements should be on their own lines
+
+AllowShortFunctionsOnASingleLine: None
+# Bad:
+# int foo() { return 123; }
+
+AllowShortIfStatementsOnASingleLine: false
+# Bad: 
+# if (someOtherVar) return; 
+# Good:
+# if (someOtherVar) 
+#     return;
+
+AllowShortLoopsOnASingleLine: false
+# Bad:
+# while(i>0) i--;
+# Good:
+# while(i>0) {
+#         i--;
+#     }
+
+AlwaysBreakAfterDefinitionReturnType: true
+# Ensures return type is one its own line
+# e.g. unsigned int
+# function(char param) { }
+
+AlwaysBreakBeforeMultilineStrings: true
+# multine strings should begin on new line
+
+BinPackArguments: true
+BinPackParameters: false
+# functions arguments should all be on one line or have a single line for each param
+
+BreakBeforeBinaryOperators: None
+# break for new line after binary operator in case of length is over ColumnLimit
+# e.g.
+# int foo = bar +
+#           baz;
+
+BreakBeforeBraces: Linux
+# Always attach braces to surrounding context except -
+# break before braces on function, namespace and class definitions
+
+ColumnLimit: 132
+# every body has wide screen now. 132 seems to be reasonable limit now.
+
+IndentCaseLabels: false
+# case labels have same indentation as switch statement.
+
+IndentWidth: 4
+# 4 spaces for indentation
+TabWidth: 4
+# tabwidth is 4 spaces
+
+UseTab: ForIndentation
+# tab for indentation only. All alignment should happen with spaces
+# Simple rule to check. 
+# No tabs allowed after first 'non-tab' character in a line
+
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+# remove excess empty lines at start of blocks.
+
+PointerAlignment: Middle
+
+SpaceAfterCStyleCast: false
+# No space after (cast). E.g
+# int blah = (int)((void *)foo + bar)
+
+SpaceBeforeAssignmentOperators: true
+# Assignment = should be seperated by spaces on both sides.
+
+SpaceBeforeParens: ControlStatements
+# for control statements a space is required before '{'
+# Bad: for(){ statement; }
+# Good: for() { statement; }
+
+SpaceInEmptyParentheses: false
+# No spaces required for empty ()
+
+SpacesInCStyleCastParentheses: false
+# No spaces required for (unsigned int) type cast
+
+SpacesInParentheses: false
+
+SpacesInSquareBrackets: false
+# No spaces in [count] style invocations of []
diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..ddcc480
--- /dev/null
@@ -0,0 +1,50 @@
+# Any level
+BUILD/
+build/
+.DS_Store
+
+# /
+/.remotebuild_credential
+/cscope.*
+/TAGS
+/tags
+
+# /libkern/c++/Tests/TestSerialization/test1/test1.xcodeproj/
+/libkern/c++/Tests/TestSerialization/test1/test1.xcodeproj/xcuserdata
+
+# /libkern/c++/Tests/TestSerialization/test2/test2.xcodeproj/
+/libkern/c++/Tests/TestSerialization/test2/test2.xcodeproj/xcuserdata
+
+# /libkern/kmod/libkmod.xcodeproj/
+/libkern/kmod/libkmod.xcodeproj/xcuserdata
+
+# /libsyscall/Libsyscall.xcodeproj/
+/libsyscall/Libsyscall.xcodeproj/xcuserdata
+/libsyscall/Libsyscall.xcodeproj/project.xcworkspace
+
+# /tools/lldbmacros/
+/tools/lldbmacros/*.pyc
+
+# /tools/lldbmacros/core/
+/tools/lldbmacros/core/*.pyc
+
+# /tools/lldbmacros/plugins/
+/tools/lldbmacros/plugins/*.pyc
+
+# /tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.xcodeproj/
+/tools/tests/perf_index/PerfIndex_COPS_Module/PerfIndex.xcodeproj/xcuserdata
+
+# /tools/tests/testkext/testkext.xcodeproj/
+/tools/tests/testkext/testkext.xcodeproj/xcuserdata
+
+# /tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/
+/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/xcuserdata
+
+# /tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/
+/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata
+
+# /tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/
+/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata
+
+# /tools/tests/zero-to-n
+/tools/tests/zero-to-n/zn*
index 99214497baf32abb428e37bb7ece415774cde223..f2d3274885c5e8b4c533e3614c14e90c6cf2c771 100644 (file)
                  if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) {                   \
                          DEBUG_ASSERT_MESSAGE(                                              \
                                  DEBUG_ASSERT_COMPONENT_NAME_STRING,                            \
-                                 #errorCode " == 0 ", 0, 0, __FILE__, __LINE__, 0 );            \
+                                 #errorCode " == 0 ", 0, 0, __FILE__, __LINE__, evalOnceErrorCode );            \
                          action;                                                            \
                  }                                                                      \
               } while (0)
index ec18d512c1e21cf91a1f87381dbc31c603bfbba6..79b5894b4aa57d5374a75e82ff069c2c99d52012 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2011 by Apple Inc.. All rights reserved.
+ * Copyright (c) 2007-2015 by Apple Inc.. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
 #define __MAC_10_8            1080
 #define __MAC_10_9            1090
 #define __MAC_10_10         101000
+#define __MAC_10_10_2       101002
+#define __MAC_10_10_3       101003
+#define __MAC_10_11         101100
 /* __MAC_NA is not defined to a value but is uses as a token by macros to indicate that the API is unavailable */
 
-#define __IPHONE_2_0         20000
-#define __IPHONE_2_1         20100
-#define __IPHONE_2_2         20200
-#define __IPHONE_3_0         30000
-#define __IPHONE_3_1         30100
-#define __IPHONE_3_2         30200
-#define __IPHONE_4_0         40000
-#define __IPHONE_4_1         40100
-#define __IPHONE_4_2         40200
-#define __IPHONE_4_3         40300
-#define __IPHONE_5_0         50000
-#define __IPHONE_5_1         50100
-#define __IPHONE_6_0         60000
-#define __IPHONE_6_1         60100
-#define __IPHONE_7_0         70000
-#define __IPHONE_7_1         70100
-#define __IPHONE_8_0         80000
+#define __IPHONE_2_0     20000
+#define __IPHONE_2_1     20100
+#define __IPHONE_2_2     20200
+#define __IPHONE_3_0     30000
+#define __IPHONE_3_1     30100
+#define __IPHONE_3_2     30200
+#define __IPHONE_4_0     40000
+#define __IPHONE_4_1     40100
+#define __IPHONE_4_2     40200
+#define __IPHONE_4_3     40300
+#define __IPHONE_5_0     50000
+#define __IPHONE_5_1     50100
+#define __IPHONE_6_0     60000
+#define __IPHONE_6_1     60100
+#define __IPHONE_7_0     70000
+#define __IPHONE_7_1     70100
+#define __IPHONE_8_0     80000
+#define __IPHONE_8_1     80100
+#define __IPHONE_8_2     80200
+#define __IPHONE_8_3     80300
+#define __IPHONE_8_4     80400
+#define __IPHONE_9_0     90000
 /* __IPHONE_NA is not defined to a value but is uses as a token by macros to indicate that the API is unavailable */
 
-#include <AvailabilityInternal.h>
+#define __TVOS_9_0       90000
+
+#define __WATCHOS_1_0    10000
+#define __WATCHOS_2_0    20000
 
+#include <AvailabilityInternal.h>
 
 #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
     #define __OSX_AVAILABLE_STARTING(_osx, _ios) __AVAILABILITY_INTERNAL##_ios
 #endif
 
 
+#if defined(__has_feature)
+  #if __has_feature(attribute_availability_with_message)
+    #define __OS_AVAILABILITY(_target, _availability)            __attribute__((availability(_target,_availability)))
+    #define __OS_AVAILABILITY_MSG(_target, _availability, _msg)  __attribute__((availability(_target,_availability,message=_msg)))
+  #else
+    #define __OS_AVAILABILITY(_target, _availability)
+    #define __OS_AVAILABILITY_MSG(_target, _availability, _msg)
+  #endif
+#else
+    #define __OS_AVAILABILITY(_target, _availability)
+    #define __OS_AVAILABILITY_MSG(_target, _availability, _msg)
+#endif
+
+
+/* for use to document app extension usage */
+#if defined(__has_feature)
+  #if __has_feature(attribute_availability_app_extension)
+    #define __OSX_EXTENSION_UNAVAILABLE(_msg)  __OS_AVAILABILITY_MSG(macosx_app_extension,unavailable,_msg)
+    #define __IOS_EXTENSION_UNAVAILABLE(_msg)  __OS_AVAILABILITY_MSG(ios_app_extension,unavailable,_msg)
+  #else
+    #define __OSX_EXTENSION_UNAVAILABLE(_msg)
+    #define __IOS_EXTENSION_UNAVAILABLE(_msg)
+  #endif
+#else
+    #define __OSX_EXTENSION_UNAVAILABLE(_msg)
+    #define __IOS_EXTENSION_UNAVAILABLE(_msg)
+#endif
+
+#define __OS_EXTENSION_UNAVAILABLE(_msg)  __OSX_EXTENSION_UNAVAILABLE(_msg) __IOS_EXTENSION_UNAVAILABLE(_msg)
+
+
+
+/* for use marking APIs available info for Mac OSX */
+#if defined(__has_feature)
+  #if __has_attribute(availability)
+    #define __OSX_UNAVAILABLE                    __OS_AVAILABILITY(macosx,unavailable)
+    #define __OSX_AVAILABLE(_vers)               __OS_AVAILABILITY(macosx,introduced=_vers)
+    #define __OSX_DEPRECATED(_start, _dep, _msg) __OSX_AVAILABLE(_start) __OS_AVAILABILITY_MSG(macosx,deprecated=_dep,_msg)
+  #endif
+#endif
+
+#ifndef __OSX_UNAVAILABLE
+  #define __OSX_UNAVAILABLE
+#endif
+
+#ifndef __OSX_AVAILABLE
+  #define __OSX_AVAILABLE(_vers)
+#endif
+
+#ifndef __OSX_DEPRECATED
+  #define __OSX_DEPRECATED(_start, _dep, _msg)
+#endif
+
+
+/* for use marking APIs available info for iOS */
+#if defined(__has_feature)
+  #if __has_attribute(availability)
+    #define __IOS_UNAVAILABLE                    __OS_AVAILABILITY(ios,unavailable)
+    #define __IOS_PROHIBITED                     __OS_AVAILABILITY(ios,unavailable)
+    #define __IOS_AVAILABLE(_vers)               __OS_AVAILABILITY(ios,introduced=_vers)
+    #define __IOS_DEPRECATED(_start, _dep, _msg) __IOS_AVAILABLE(_start) __OS_AVAILABILITY_MSG(ios,deprecated=_dep,_msg)
+  #endif
+#endif
+
+#ifndef __IOS_UNAVAILABLE
+  #define __IOS_UNAVAILABLE
+#endif
+
+#ifndef __IOS_PROHIBITED
+  #define __IOS_PROHIBITED
+#endif
+
+#ifndef __IOS_AVAILABLE
+  #define __IOS_AVAILABLE(_vers)
+#endif
+
+#ifndef __IOS_DEPRECATED
+  #define __IOS_DEPRECATED(_start, _dep, _msg)
+#endif
+
+
+/* for use marking APIs available info for tvOS */
+#if defined(__has_feature)
+  #if __has_feature(attribute_availability_tvos)
+    #define __TVOS_UNAVAILABLE                    __OS_AVAILABILITY(tvos,unavailable)
+    #define __TVOS_PROHIBITED                     __OS_AVAILABILITY(tvos,unavailable)
+    #define __TVOS_AVAILABLE(_vers)               __OS_AVAILABILITY(tvos,introduced=_vers)
+    #define __TVOS_DEPRECATED(_start, _dep, _msg) __TVOS_AVAILABLE(_start) __OS_AVAILABILITY_MSG(tvos,deprecated=_dep,_msg)
+  #endif
+#endif
+
+#ifndef __TVOS_UNAVAILABLE
+  #define __TVOS_UNAVAILABLE
+#endif
+
+#ifndef __TVOS_PROHIBITED
+  #define __TVOS_PROHIBITED
+#endif
+
+#ifndef __TVOS_AVAILABLE
+  #define __TVOS_AVAILABLE(_vers)
+#endif
+
+#ifndef __TVOS_DEPRECATED
+  #define __TVOS_DEPRECATED(_start, _dep, _msg)
+#endif
+
+
+/* for use marking APIs available info for Watch OS */
+#if defined(__has_feature)
+  #if __has_feature(attribute_availability_watchos)
+    #define __WATCHOS_UNAVAILABLE                    __OS_AVAILABILITY(watchos,unavailable)
+    #define __WATCHOS_PROHIBITED                     __OS_AVAILABILITY(watchos,unavailable)
+    #define __WATCHOS_AVAILABLE(_vers)               __OS_AVAILABILITY(watchos,introduced=_vers)
+    #define __WATCHOS_DEPRECATED(_start, _dep, _msg) __WATCHOS_AVAILABLE(_start) __OS_AVAILABILITY_MSG(watchos,deprecated=_dep,_msg)
+  #endif
+#endif
+
+#ifndef __WATCHOS_UNAVAILABLE
+  #define __WATCHOS_UNAVAILABLE
+#endif
+
+#ifndef __WATCHOS_PROHIBITED
+  #define __WATCHOS_PROHIBITED
+#endif
+
+#ifndef __WATCHOS_AVAILABLE
+  #define __WATCHOS_AVAILABLE(_vers)
+#endif
+
+#ifndef __WATCHOS_DEPRECATED
+  #define __WATCHOS_DEPRECATED(_start, _dep, _msg)
+#endif
+
+
 #endif /* __AVAILABILITY__ */
index e8b7b3de765590a125816da2555bfa25fa49a8b9..81bbd59b746770a2551d690b010f961f78593ac2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 by Apple Inc.. All rights reserved.
+ * Copyright (c) 2007-2015 by Apple Inc.. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
     #endif
 #endif
 
+#ifndef __TV_OS_VERSION_MIN_REQUIRED
+    #ifdef __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__
+        /* compiler sets __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ when -mtvos-version-min is used */
+        #define __TV_OS_VERSION_MIN_REQUIRED __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__
+        #define __TV_OS_VERSION_MAX_ALLOWED __IPHONE_9_0
+        /* for compatibility with existing code.  New code should use platform specific checks */
+        #define __IPHONE_OS_VERSION_MIN_REQUIRED 90000
+    #endif
+#endif
+
+#ifndef __WATCH_OS_VERSION_MIN_REQUIRED
+    #ifdef __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__
+        /* compiler sets __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ when -mwatchos-version-min is used */
+        #define __WATCH_OS_VERSION_MIN_REQUIRED __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__
+        #define __WATCH_OS_VERSION_MAX_ALLOWED 20000
+        /* for compatibility with existing code.  New code should use platform specific checks */
+        #define __IPHONE_OS_VERSION_MIN_REQUIRED 90000
+    #endif
+#endif
+
+
+
 #define __AVAILABILITY_INTERNAL_DEPRECATED            __attribute__((deprecated))
 #ifdef __has_feature
     #if __has_feature(attribute_deprecated_with_message)
@@ -58,7 +80,7 @@
 #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
     /* make sure a default max version is set */
     #ifndef __IPHONE_OS_VERSION_MAX_ALLOWED
-        #define __IPHONE_OS_VERSION_MAX_ALLOWED     __IPHONE_8_0
+        #define __IPHONE_OS_VERSION_MAX_ALLOWED     __IPHONE_9_0
     #endif
     /* make sure a valid min is set */
     #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=2.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=2.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=2.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=2.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=2.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.0,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=2.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=2.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_2_1                    __attribute__((availability(ios,introduced=2.1)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=2.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=2.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=2.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=2.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=2.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.1,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=2.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=2.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_2_2                    __attribute__((availability(ios,introduced=2.2)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=2.2,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=2.2,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=2.2,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=2.2,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=2.2,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=2.2,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=2.2)))
             #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=2.2)))
             #define __AVAILABILITY_INTERNAL__IPHONE_3_0                    __attribute__((availability(ios,introduced=3.0)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=3.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=3.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=3.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=3.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=3.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.0,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=3.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=3.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_3_1                    __attribute__((availability(ios,introduced=3.1)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=3.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=3.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=3.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=3.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=3.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.1,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=3.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=3.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_3_2                    __attribute__((availability(ios,introduced=3.2)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=3.2,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=3.2,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=3.2,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=3.2,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=3.2,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=3.2,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=3.2)))
             #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=3.2)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_0                    __attribute__((availability(ios,introduced=4.0)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=4.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=4.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=4.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=4.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=4.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.0,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=4.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=4.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_1                    __attribute__((availability(ios,introduced=4.1)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=4.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=4.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=4.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=4.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=4.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.1,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=4.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=4.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_2                    __attribute__((availability(ios,introduced=4.2)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=4.2,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=4.2,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=4.2,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=4.2,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=4.2,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.2,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=4.2)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=4.2)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_3                    __attribute__((availability(ios,introduced=4.3)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=4.3,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=4.3,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=4.3,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=4.3,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=4.3,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=4.3,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=4.3)))
             #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=4.3)))
             #define __AVAILABILITY_INTERNAL__IPHONE_5_0                    __attribute__((availability(ios,introduced=5.0)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=5.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=5.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=5.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=5.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=5.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=5.0,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=5.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=5.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_5_1                    __attribute__((availability(ios,introduced=5.1)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=5.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=5.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=5.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=5.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=5.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=5.1,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=5.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=5.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_6_0                    __attribute__((availability(ios,introduced=6.0)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=6.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=6.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=6.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=6.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=6.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=6.0,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=6.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=6.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_6_1                    __attribute__((availability(ios,introduced=6.1)))
             #else
                     #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.0)))
             #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=6.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=6.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=6.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=6.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=6.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=6.1,deprecated=9.0)))
+            #endif
             #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=6.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=6.1)))
             #define __AVAILABILITY_INTERNAL__IPHONE_7_0                    __attribute__((availability(ios,introduced=7.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0    __attribute__((availability(ios,introduced=7.0,deprecated=7.0)))
             #if __has_feature(attribute_availability_with_message)
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.0,message=_msg)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1    __attribute__((availability(ios,introduced=7.0,deprecated=7.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0    __attribute__((availability(ios,introduced=7.0,deprecated=8.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=7.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=7.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=7.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=7.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=7.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=9.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=7.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=7.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1                    __attribute__((availability(ios,introduced=7.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1    __attribute__((availability(ios,introduced=7.1,deprecated=7.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=7.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=7.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0    __attribute__((availability(ios,introduced=7.1,deprecated=8.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=7.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=7.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=7.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=7.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=7.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=9.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=7.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=7.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0                    __attribute__((availability(ios,introduced=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0    __attribute__((availability(ios,introduced=8.0,deprecated=8.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=8.0,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=8.0,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=8.0,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=8.0,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=8.0,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=9.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1                    __attribute__((availability(ios,introduced=8.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1    __attribute__((availability(ios,introduced=8.1,deprecated=8.1)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.1,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.1)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=8.1,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=8.1,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=8.1,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=8.1,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.1,deprecated=9.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=8.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=8.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2                    __attribute__((availability(ios,introduced=8.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2    __attribute__((availability(ios,introduced=8.2,deprecated=8.2)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=8.2,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=8.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=8.2,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=8.3,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=8.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=8.2,deprecated=8.4)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=8.4,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=8.4)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=8.2,deprecated=9.0)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=9.0,message=_msg)))
+            #else
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.2,deprecated=9.0)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=8.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=8.2)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3                    __attribute__((availability(ios,introduced=8.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3    __attribute__((availability(ios,introduced=8.3,deprecated=8.3)))
+            #if __has_feature(attribute_availability_with_message)
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.3,deprecated=8.3,message=_msg)))
             #else
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.0)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __attribute__((availability(ios,introduced=8.3,deprecated=8.3)))
             #endif
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1    __attribute__((availability(ios,introduced=7.0,deprecated=7.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=8.3,deprecated=8.4)))
             #if __has_feature(attribute_availability_with_message)
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.1,message=_msg)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.3,deprecated=8.4,message=_msg)))
             #else
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=7.1)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.3,deprecated=8.4)))
             #endif
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0    __attribute__((availability(ios,introduced=7.0,deprecated=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=8.3,deprecated=9.0)))
             #if __has_feature(attribute_availability_with_message)
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.0,message=_msg)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.3,deprecated=9.0,message=_msg)))
             #else
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.0,deprecated=8.0)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.3,deprecated=9.0)))
             #endif
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=7.0)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=7.0)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_1                    __attribute__((availability(ios,introduced=7.1)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1    __attribute__((availability(ios,introduced=7.1,deprecated=7.1)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=8.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=8.3)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4                    __attribute__((availability(ios,introduced=8.4)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4    __attribute__((availability(ios,introduced=8.4,deprecated=8.4)))
             #if __has_feature(attribute_availability_with_message)
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=7.1,message=_msg)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.4,deprecated=8.4,message=_msg)))
             #else
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_7_1_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=7.1)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __attribute__((availability(ios,introduced=8.4,deprecated=8.4)))
             #endif
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0    __attribute__((availability(ios,introduced=7.1,deprecated=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=8.4,deprecated=9.0)))
             #if __has_feature(attribute_availability_with_message)
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.0,message=_msg)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.4,deprecated=9.0,message=_msg)))
             #else
-                    #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=7.1,deprecated=8.0)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.4,deprecated=9.0)))
             #endif
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=7.1)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=7.1)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_8_0                    __attribute__((availability(ios,introduced=8.0)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0    __attribute__((availability(ios,introduced=8.0,deprecated=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=8.4)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=8.4)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0                    __attribute__((availability(ios,introduced=9.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0    __attribute__((availability(ios,introduced=9.0,deprecated=9.0)))
             #if __has_feature(attribute_availability_with_message)
-                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.0,message=_msg)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=9.0,deprecated=9.0,message=_msg)))
             #else
-                    #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg)    __attribute__((availability(ios,introduced=8.0,deprecated=8.0)))
+                    #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __attribute__((availability(ios,introduced=9.0,deprecated=9.0)))
             #endif
-            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=8.0)))
-            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=8.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA               __attribute__((availability(ios,introduced=9.0)))
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA_MSG(_msg)     __attribute__((availability(ios,introduced=9.0)))
             #define __AVAILABILITY_INTERNAL__IPHONE_NA                               __attribute__((availability(ios,unavailable)))
             #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA                __attribute__((availability(ios,unavailable)))
             #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA_MSG(_msg)      __attribute__((availability(ios,unavailable)))
             #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0              __AVAILABILITY_INTERNAL_DEPRECATED
             #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
         #endif
+        /* set up old style internal macros (up to 8.1) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1                      __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1                      __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1                      __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA           __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_1_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #endif
+        /* set up old style internal macros (up to 8.2) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2                      __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2                      __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2                      __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA           __AVAILABILITY_INTERNAL__IPHONE_8_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_2
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #endif
+        /* set up old style internal macros (up to 8.3) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3                      __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3                      __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3                      __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA           __AVAILABILITY_INTERNAL__IPHONE_8_3
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_3
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_3
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #endif
+        /* set up old style internal macros (up to 8.4) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4                      __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4                      __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4                      __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA           __AVAILABILITY_INTERNAL__IPHONE_8_4
+        #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_8_4
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL__IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_4
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_8_4_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #endif
+        /* set up old style internal macros (up to 9.0) */
+        #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_9_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0                      __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0                      __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0                      __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA           __AVAILABILITY_INTERNAL__IPHONE_9_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_9_0
+        #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_2_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_3_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_3_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_4_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_4_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_5_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_5_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_6_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_6_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_7_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_7_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_0
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_1
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_1
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_2
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_2
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_3
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_3
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_8_4
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_8_4
+        #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_REGULAR
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL__IPHONE_9_0
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL__IPHONE_9_0
+        #else
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_7_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_1_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_2_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_3_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_8_4_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__IPHONE_9_0_DEP__IPHONE_9_0_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #endif
         /* set up internal macros (n/a) */
         #define __AVAILABILITY_INTERNAL__IPHONE_NA                               __AVAILABILITY_INTERNAL_UNAVAILABLE
         #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA                __AVAILABILITY_INTERNAL_UNAVAILABLE
     #define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
     /* make sure a default max version is set */
     #ifndef __MAC_OS_X_VERSION_MAX_ALLOWED
-        #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_10
+        #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_11
     #endif
 
     #if defined(__has_attribute) && defined(__has_feature)
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.0,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.0,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.0)))
             #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.0)))
             #define __AVAILABILITY_INTERNAL__MAC_10_1                  __attribute__((availability(macosx,introduced=10.1)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.1,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.1,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.1)))
             #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.1)))
             #define __AVAILABILITY_INTERNAL__MAC_10_2                  __attribute__((availability(macosx,introduced=10.2)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.2,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.2,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.2)))
             #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.2)))
             #define __AVAILABILITY_INTERNAL__MAC_10_3                  __attribute__((availability(macosx,introduced=10.3)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.3,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.3,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.3)))
             #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.3)))
             #define __AVAILABILITY_INTERNAL__MAC_10_4                  __attribute__((availability(macosx,introduced=10.4)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.4,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.4,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.4)))
             #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.4)))
             #define __AVAILABILITY_INTERNAL__MAC_10_5                  __attribute__((availability(macosx,introduced=10.5)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.5,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.5,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.5)))
             #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.5)))
             #define __AVAILABILITY_INTERNAL__MAC_10_6                  __attribute__((availability(macosx,introduced=10.6)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.6,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.6,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.6)))
             #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.6)))
             #define __AVAILABILITY_INTERNAL__MAC_10_7                  __attribute__((availability(macosx,introduced=10.7)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.7,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.7,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.7)))
             #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.7)))
             #define __AVAILABILITY_INTERNAL__MAC_10_8                  __attribute__((availability(macosx,introduced=10.8)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.8,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.8,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.8)))
             #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.8)))
             #define __AVAILABILITY_INTERNAL__MAC_10_9                  __attribute__((availability(macosx,introduced=10.9)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.9,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.9,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.9)))
             #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.9)))
             #define __AVAILABILITY_INTERNAL__MAC_10_10                  __attribute__((availability(macosx,introduced=10.10)))
             #else
                 #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10)))
             #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.10,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10,deprecated=10.11)))
+            #endif
             #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.10)))
             #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.10)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2                  __attribute__((availability(macosx,introduced=10.10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.2)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.2,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.2)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.2,deprecated=10.11)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.10.2)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3                  __attribute__((availability(macosx,introduced=10.10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3    __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.10.3)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.10.3,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.10.3)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.10.3,deprecated=10.11)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.10.3)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_11                  __attribute__((availability(macosx,introduced=10.11)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11    __attribute__((availability(macosx,introduced=10.11,deprecated=10.11)))
+            #if __has_feature(attribute_availability_with_message)
+                #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.11,deprecated=10.11,message=_msg)))
+            #else
+                #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg)    __attribute__((availability(macosx,introduced=10.11,deprecated=10.11)))
+            #endif
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA_MSG(_msg)      __attribute__((availability(macosx,introduced=10.11)))
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA                __attribute__((availability(macosx,introduced=10.11)))
             #define __AVAILABILITY_INTERNAL__MAC_NA                        __attribute__((availability(macosx,unavailable)))
             #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA            __attribute__((availability(macosx,unavailable)))
             #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg)  __attribute__((availability(macosx,unavailable)))
 
     #ifndef __AVAILABILITY_INTERNAL__MAC_10_0
         /* use old style attributes */
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_11
+            #define __AVAILABILITY_INTERNAL__MAC_10_11        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_11
+            #define __AVAILABILITY_INTERNAL__MAC_10_11        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_11        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
+        #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2        __AVAILABILITY_INTERNAL_UNAVAILABLE
+        #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2        __AVAILABILITY_INTERNAL_REGULAR
+        #endif
         #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_10
             #define __AVAILABILITY_INTERNAL__MAC_10_10        __AVAILABILITY_INTERNAL_UNAVAILABLE
         #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_10
             #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10              __AVAILABILITY_INTERNAL__MAC_10_10
             #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10
         #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_9
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_9
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_10
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2              __AVAILABILITY_INTERNAL__MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_2_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10_2
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_9
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_9
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_10
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3              __AVAILABILITY_INTERNAL__MAC_10_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_10_3_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10_3
+        #endif
+        #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_11
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11              __AVAILABILITY_INTERNAL_DEPRECATED
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg)
+        #else
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_0
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_1
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_4
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_5
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_6
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_7
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_8
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_9
+            #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_9
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_10
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10_2
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_10_3
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11              __AVAILABILITY_INTERNAL__MAC_10_11
+            #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_10_11_MSG(_msg)    __AVAILABILITY_INTERNAL__MAC_10_11
+        #endif
         #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_0
         #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg)   __AVAILABILITY_INTERNAL__MAC_10_0
         #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_1
         #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg)   __AVAILABILITY_INTERNAL__MAC_10_9
         #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_10
         #define __AVAILABILITY_INTERNAL__MAC_10_10_DEP__MAC_NA_MSG(_msg)   __AVAILABILITY_INTERNAL__MAC_10_10
+        #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_10_2_DEP__MAC_NA_MSG(_msg)   __AVAILABILITY_INTERNAL__MAC_10_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_10_3_DEP__MAC_NA_MSG(_msg)   __AVAILABILITY_INTERNAL__MAC_10_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_11
+        #define __AVAILABILITY_INTERNAL__MAC_10_11_DEP__MAC_NA_MSG(_msg)   __AVAILABILITY_INTERNAL__MAC_10_11
         #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA               __AVAILABILITY_INTERNAL_UNAVAILABLE
         #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg)     __AVAILABILITY_INTERNAL_UNAVAILABLE
     #endif
index 6296979082e0c9861017561cc15933bc88e9b587..9ff820a8014834ee7395773938ca4daf62fa3ad3 100644 (file)
 #define MAC_OS_X_VERSION_10_8         1080
 #define MAC_OS_X_VERSION_10_9         1090
 #define MAC_OS_X_VERSION_10_10      101000
+#define MAC_OS_X_VERSION_10_10_2    101002
+#define MAC_OS_X_VERSION_10_10_3    101003
+#define MAC_OS_X_VERSION_10_11      101100
 
 /* 
- * If min OS not specified, assume 10.1 for ppc and 10.4 for all others
- * Note: gcc driver may set _ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED_ based on MACOSX_DEPLOYMENT_TARGET environment variable
+ * If min OS not specified, assume 10.4 for intel
+ * Note: compiler driver may set _ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED_ based on MACOSX_DEPLOYMENT_TARGET environment variable
  */
 #ifndef MAC_OS_X_VERSION_MIN_REQUIRED
     #ifdef __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
         #if (__i386__ || __x86_64__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < MAC_OS_X_VERSION_10_4)
             #warning Building for Intel with Mac OS X Deployment Target < 10.4 is invalid.
-        #elif __ppc64__ && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < MAC_OS_X_VERSION_10_4)
-            #warning Building for ppc64 with Mac OS X Deployment Target < 10.4 is invalid.
         #endif
         #define MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
     #else
-        #if __ppc64__ || __i386__ || __x86_64__
+        #if __i386__ || __x86_64__
             #define MAC_OS_X_VERSION_MIN_REQUIRED MAC_OS_X_VERSION_10_4
         #else
             #define MAC_OS_X_VERSION_MIN_REQUIRED MAC_OS_X_VERSION_10_1
         #endif
-    #endif
+     #endif
 #endif
 
 /*
- * if max OS not specified, assume larger of (10.10, min)
+ * if max OS not specified, assume larger of (10.11, min)
  */
 #ifndef MAC_OS_X_VERSION_MAX_ALLOWED
-    #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_10
+    #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_11
         #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_MIN_REQUIRED
     #else
-        #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_10
+        #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_11
     #endif
 #endif
 
 #endif
 
 
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.10.2 
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER     __OSX_AVAILABLE_STARTING(__MAC_10_10_2, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED
+ *
+ * Used on declarations introduced in Mac OS X 10.10.2,
+ * and deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED     __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_2, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.6,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.7,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.8,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.9,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2
+ *
+ * Used on declarations introduced in Mac OS X 10.10,
+ * but later deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2    AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER
+ *
+ * Used on types deprecated in Mac OS X 10.10.2
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_2, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_2
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_2_AND_LATER
+#endif
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.10.3 
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER     __OSX_AVAILABLE_STARTING(__MAC_10_10_3, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED
+ *
+ * Used on declarations introduced in Mac OS X 10.10.3,
+ * and deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED     __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_3, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.6,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.7,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.8,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.9,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.10,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3
+ *
+ * Used on declarations introduced in Mac OS X 10.10.2,
+ * but later deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_2, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3    AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER
+ *
+ * Used on types deprecated in Mac OS X 10.10.3
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_10_3, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10_3
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_10_3_AND_LATER
+#endif
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.11 
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER     __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED
+ *
+ * Used on declarations introduced in Mac OS X 10.11,
+ * and deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED     __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_11, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_11_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.0,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.1,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.2,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.3,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.4,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.5,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.6,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.7,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.8,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.9,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.10,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_10_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.10.2,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_2, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11
+ *
+ * Used on declarations introduced in Mac OS X 10.10.3,
+ * but later deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_10_3, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_11    AVAILABLE_MAC_OS_X_VERSION_10_10_3_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER
+ *
+ * Used on types deprecated in Mac OS X 10.11
+ */
+#if __AVAILABILITY_MACROS_USES_AVAILABILITY
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER    __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_11, __IPHONE_NA, __IPHONE_NA)
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_11_AND_LATER
+#endif
+
+
 
 
 #endif  /* __AVAILABILITYMACROS__ */
index 39cb58f90d9a907a386858d54ee6810560292cfb..edf9fe869fdc89b58deb19c77deb680be54b7d44 100644 (file)
@@ -20,6 +20,8 @@ INSTINC_SUBDIRS_X86_64H =     \
 INSTINC_SUBDIRS_ARM =  \
        architecture
 
+INSTINC_SUBDIRS_ARM64 =        \
+       architecture
 
 EXPORT_FILES = \
        Availability.h  \
index 1054ba8f179da8d9dac3b6c9e5df44ea134ada70..ea393a5bf73e9098912c9df1f04d70a9402d56ca 100644 (file)
@@ -18,6 +18,8 @@ INSTINC_SUBDIRS_X86_64H =     \
 INSTINC_SUBDIRS_ARM =  \
        arm
 
+INSTINC_SUBDIRS_ARM64 =        \
+       arm
 
 EXPORT_FILES = 
 
index f4438451348a8b8ac615a82c524bde6186f8446a..6a05f106c8a5181070b925ce629004e3f0a57355 100644 (file)
@@ -2,8 +2,9 @@
  *  cc.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/16/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/16/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
 #include <string.h>
 #include <stdint.h>
 
+/* Manage asserts here because a few functions in header public files do use asserts */
+#define cc_assert(x) assert(x)
 #if CC_KERNEL
 #include <kern/assert.h>
+#elif CC_USE_S3
+#define assert(args)  // No assert in S3
 #else
 #include <assert.h>
-#include <stdio.h>
 #endif
 
 /* Declare a struct element with a guarenteed alignment of _alignment_.
 #define cc_zero(_size_,_data_) memset((_data_),0 ,(_size_))
 #endif
 
-#if CC_KERNEL
-#define cc_printf(x...) printf(x)
-#else
-#define cc_printf(x...) fprintf(stderr, x)
-#endif
-
-#define cc_assert(x) assert(x)
+/* cc_clear:
+ Set "len" bytes of memory to zero at address "dst".
+ cc_clear has been developed so that it won't be optimized out.
+ To be used to clear key buffers or sensitive data.
+*/
+CC_NONNULL2
+void cc_clear(size_t len, void *dst);
 
 #define cc_copy(_size_, _dst_, _src_) memcpy(_dst_, _src_, _size_)
 
 CC_INLINE CC_NONNULL2 CC_NONNULL3 CC_NONNULL4
 void cc_xor(size_t size, void *r, const void *s, const void *t) {
     uint8_t *_r=(uint8_t *)r;
-    const uint8_t *_s=(uint8_t *)s;
-    const uint8_t *_t=(uint8_t *)t;
+    const uint8_t *_s=(const uint8_t *)s;
+    const uint8_t *_t=(const uint8_t *)t;
     while (size--) {
         _r[size] = _s[size] ^ _t[size];
     }
 }
 
+/* cc_cmp_safe:
+ Compare "num" pointed by ptr1 and ptr2, array of identical size.
+ Functional behavior: Returns 0 if the "num" bytes starting at ptr1 are identical to the "num"
+    bytes starting at ptr2.
+    Return !=0 if they are different or if "num" is 0 (empty arrays)
+ Security: The execution time/cycles is *independent* of the data and therefore guarantees
+    no leak about the data.
+    However, the execution time depends on "num".
+*/
+CC_NONNULL2 CC_NONNULL3
+int cc_cmp_safe (size_t num, const void * ptr1, const void * ptr2);
+
+
 /* Exchange S and T of any type.  NOTE: Both and S and T are evaluated
    mutliple times and MUST NOT be expressions. */
 #define CC_SWAP(S,T)  do { \
index 9149edb00eef196501957b84c50e66f0b0a87d64..45979d8cf81c60174c06a304fb7b61f3867025f7 100644 (file)
@@ -2,8 +2,9 @@
  *  cc_config.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 10/18/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 11/16/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -44,7 +45,7 @@
 
 */
 
-#if defined(DEBUG) && (DEBUG)
+#if (defined(DEBUG) && (DEBUG))
 /* CC_DEBUG is already used in CommonCrypto */
 #define CORECRYPTO_DEBUG 1
 #else
 #endif
 
 #if defined(KERNEL) && (KERNEL)
-#define CC_KERNEL 1
+#define CC_KERNEL 1 // KEXT, XNU repo or kernel components such as AppleKeyStore
 #else
 #define CC_KERNEL 0
 #endif
 
+// LINUX_BUILD_TEST is for sanity check of the configuration
+// > xcodebuild -scheme "corecrypto_test" OTHER_CFLAGS="$(values) -DLINUX_BUILD_TEST"
+#if defined(__linux__) || defined(LINUX_BUILD_TEST)
+#define CC_LINUX 1
+#else
+#define CC_LINUX 0
+#endif
+
 #if defined(USE_L4) && (USE_L4)
 #define CC_USE_L4 1
 #else
 #define CC_USE_L4 0
 #endif
 
+#if defined(USE_SEPROM) && (USE_SEPROM)
+#define CC_USE_SEPROM 1
+#else
+#define CC_USE_SEPROM 0
+#endif
+
+#if defined(USE_S3) && (USE_S3)
+#define CC_USE_S3 1
+#else
+#define CC_USE_S3 0
+#endif
+
 #if defined(MAVERICK) && (MAVERICK)
 #define CC_MAVERICK 1
 #else
 #define CCN_OSX                                   1
 #endif 
 
+#if CC_USE_L4 || CC_USE_S3
 /* No dynamic linking allowed in L4, e.g. avoid nonlazy symbols */
-/* For corecrypto kext, CC_STATIC should be 0 */
-#if CC_USE_L4
+/* For corecrypto kext, CC_STATIC should be undefined */
 #define CC_STATIC              1
 #endif
 
+#if CC_USE_L4 || CC_IBOOT
+/* For L4, stack is too short, need to use HEAP for some computations */
+/* CC_USE_HEAP_FOR_WORKSPACE not supported for KERNEL!  */
+#define CC_USE_HEAP_FOR_WORKSPACE 1
+#else
+#define CC_USE_HEAP_FOR_WORKSPACE 0
+#endif
+
 /* L4 do not have bzero, neither does hexagon of ARMCC even with gnu compatibility mode */
 #if CC_USE_L4 || defined(__CC_ARM) || defined(__hexagon__)
 #define CC_HAS_BZERO 0
 #define CC_HAS_BZERO 1
 #endif
 
-#if defined(__CC_ARM) || defined(__hexagon__)
-// ARMASM.exe does not to like the file syntax of the asm implementation
+/* memset_s is only available in few target */
+#if CC_USE_L4 || CC_KERNEL || CC_IBOOT || CC_USE_SEPROM || defined(__CC_ARM) || defined(__hexagon__)
+#define CC_HAS_MEMSET_S 0
+#else
+#define CC_HAS_MEMSET_S 1
+#endif
+
 
+#if defined(__CC_ARM) || defined(__hexagon__) || CC_LINUX || defined(__NO_ASM__)
+// ARMASM.exe does not to like the file syntax of the asm implementation
+#define CCN_DEDICATED_SQR      1
+#define CCN_MUL_KARATSUBA      1 // 4*n CCN_UNIT extra memory required.
 #define CCN_ADD_ASM            0
 #define CCN_SUB_ASM            0
 #define CCN_MUL_ASM            0
 #define CCAES_ARM              0
 #define CCAES_INTEL            0
 #define CCN_USE_BUILTIN_CLZ    0
+#if !defined(__NO_ASM__)
 #define CCSHA1_VNG_INTEL       0
 #define CCSHA2_VNG_INTEL       0
+#define CCSHA1_VNG_ARMV7NEON   0
+#define CCSHA2_VNG_ARMV7NEON   0
+#endif
+#define CCAES_MUX              0
 
 #elif defined(__x86_64__) || defined(__i386__)
-
+#define CCN_DEDICATED_SQR      1
+#define CCN_MUL_KARATSUBA      1 // 4*n CCN_UNIT extra memory required.
 /* These assembly routines only work for a single CCN_UNIT_SIZE. */
 #if (defined(__x86_64__) && CCN_UNIT_SIZE == 8) || (defined(__i386__) && CCN_UNIT_SIZE == 4)
 #define CCN_ADD_ASM            1
 #define CCN_SUB_ASM            1
-#define CCN_MUL_ASM            1
+#define CCN_MUL_ASM            0
 #else
 #define CCN_ADD_ASM            0
 #define CCN_SUB_ASM            0
 #define CCN_MUL_ASM            0
 #endif
 
+#if (defined(__x86_64__) && CCN_UNIT_SIZE == 8)
+#define CCN_CMP_ASM            1
+#define CCN_N_ASM              1
+#else
+#define CCN_CMP_ASM            0
+#define CCN_N_ASM              0
+#endif
+
 #define CCN_ADDMUL1_ASM        0
 #define CCN_MUL1_ASM           0
-#define CCN_CMP_ASM            0
 #define CCN_ADD1_ASM           0
 #define CCN_SUB1_ASM           0
-#define CCN_N_ASM              0
 #define CCN_SET_ASM            0
 #define CCAES_ARM              0
 #define CCAES_INTEL            1
 #define CCSHA2_VNG_ARMV7NEON   0
 
 #else
-
+#define CCN_DEDICATED_SQR      1
+#define CCN_MUL_KARATSUBA      1 // 4*n CCN_UNIT extra memory required.
 #define CCN_ADD_ASM            0
 #define CCN_SUB_ASM            0
 #define CCN_MUL_ASM            0
 
 #endif /* !defined(__i386__) */
 
-#define CCN_N_INLINE           0
-#define CCN_CMP_INLINE         0
-
 #define CC_INLINE static inline
 
 #ifdef __GNUC__
 #define CC_MALLOC
 #endif /* !__GNUC__ */
 
+
 #endif /* _CORECRYPTO_CC_CONFIG_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/cc_debug.h b/EXTERNAL_HEADERS/corecrypto/cc_debug.h
new file mode 100644 (file)
index 0000000..a044022
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ *  cc_debug.h
+ *  corecrypto
+ *
+ *  Created on 01/25/2012
+ *
+ *  Copyright (c) 2012,2014,2015 Apple Inc. All rights reserved.
+ *
+ */
+
+//debug configuration header file
+#ifndef _CORECRYPTO_CCN_DEBUG_H_
+#define _CORECRYPTO_CCN_DEBUG_H_
+
+#include <corecrypto/cc_config.h>
+
+// DO NOT INCLUDE this HEADER file in CoreCrypto files added for XNU project or headers
+// included by external clients.
+
+// ========================
+// Printf for corecrypto
+// ========================
+#if CC_KERNEL
+#include <pexpert/pexpert.h>
+#define cc_printf(x...) printf(x)
+extern int printf(const char *format, ...) __printflike(1,2);
+#elif CC_USE_S3
+#define cc_printf(x...) printf(x)
+#else
+#include <stdio.h>
+#define cc_printf(x...) fprintf(stderr, x)
+#endif
+
+// ========================
+// Integer types
+// ========================
+
+#if CC_KERNEL
+/* Those are not defined in libkern */
+#define PRIx64 "llx"
+#define PRIx32 "x"
+#define PRIx16 "hx"
+#define PRIx8  "hhx"
+#else
+#include <inttypes.h>
+#endif
+
+#if  CCN_UNIT_SIZE == 8
+#define CCPRIx_UNIT ".016" PRIx64
+#elif  CCN_UNIT_SIZE == 4
+#define CCPRIx_UNIT ".08" PRIx32
+#elif CCN_UNIT_SIZE == 2
+#define CCPRIx_UNIT ".04" PRIx16
+#elif CCN_UNIT_SIZE == 1
+#define CCPRIx_UNIT ".02" PRIx8
+#else
+#error invalid CCN_UNIT_SIZE
+#endif
+
+// ========================
+// Print utilities for corecrypto
+// ========================
+/* Print a byte array of arbitrary size */
+void cc_print(const char *label, unsigned long count, const uint8_t *s);
+
+#endif /* _CORECRYPTO_CCN_DEBUG_H_ */
diff --git a/EXTERNAL_HEADERS/corecrypto/cc_macros.h b/EXTERNAL_HEADERS/corecrypto/cc_macros.h
new file mode 100644 (file)
index 0000000..4d0b0be
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ *  cc_macros.h
+ *  corecrypto
+ *
+ *  Created on 01/11/2012
+ *
+ *  Copyright (c) 2012,2015 Apple Inc. All rights reserved.
+ *
+ */
+
+#ifndef _CORECRYPTO_CC_MACROS_H_
+#define _CORECRYPTO_CC_MACROS_H_
+
+#include <corecrypto/cc_config.h>
+
+#ifndef __CC_DEBUG_ASSERT_COMPONENT_NAME_STRING
+#define __CC_DEBUG_ASSERT_COMPONENT_NAME_STRING ""
+#endif
+
+#ifndef __CC_DEBUG_ASSERT_PRODUCTION_CODE
+#define __CC_DEBUG_ASSERT_PRODUCTION_CODE !CORECRYPTO_DEBUG
+#endif
+
+#ifndef __CC_DEBUG_ASSERT_MESSAGE
+#define __CC_DEBUG_ASSERT_MESSAGE(name, assertion, label, message, file, line, value) \
+cc_printf( "CCAssertMacros: %s, %s file: %s, line: %d\n", assertion, (message!=0) ? message : "", file, line);
+#endif
+
+#ifndef cc_require
+#if __CC_DEBUG_ASSERT_PRODUCTION_CODE
+    #define cc_require(assertion, exceptionLabel) \
+        do { \
+            if ( __builtin_expect(!(assertion), 0) ) { \
+                goto exceptionLabel; \
+            } \
+        } while ( 0 )
+#else
+    #define cc_require(assertion, exceptionLabel) \
+        do { \
+            if ( __builtin_expect(!(assertion), 0) ) { \
+                __CC_DEBUG_ASSERT_MESSAGE(__CC_DEBUG_ASSERT_COMPONENT_NAME_STRING, \
+                    #assertion, #exceptionLabel, 0, __FILE__, __LINE__,  0); \
+                goto exceptionLabel; \
+            } \
+        } while ( 0 )
+#endif
+#endif
+
+#ifndef cc_require_action
+#if __CC_DEBUG_ASSERT_PRODUCTION_CODE
+    #define cc_require_action(assertion, exceptionLabel, action)                \
+        do                                                                      \
+        {                                                                       \
+            if ( __builtin_expect(!(assertion), 0) )                            \
+            {                                                                   \
+                {                                                               \
+                    action;                                                     \
+                }                                                               \
+                goto exceptionLabel;                                            \
+            }                                                                   \
+        } while ( 0 )
+#else
+    #define cc_require_action(assertion, exceptionLabel, action)                \
+        do                                                                      \
+        {                                                                       \
+            if ( __builtin_expect(!(assertion), 0) )                            \
+            {                                                                   \
+                __CC_DEBUG_ASSERT_MESSAGE(                                      \
+                    __CC_DEBUG_ASSERT_COMPONENT_NAME_STRING,                    \
+                    #assertion, #exceptionLabel, 0,   __FILE__, __LINE__, 0);   \
+                {                                                               \
+                    action;                                                     \
+                }                                                               \
+                goto exceptionLabel;                                            \
+            }                                                                   \
+        } while ( 0 )
+#endif
+#endif
+
+#endif /* _CORECRYPTO_CC_MACROS_H_ */
index fbfadddcc65d9858e5131fe238bab7eefa869205..2d0a47a5b3e7e4e04a52a723de387f8690794662 100644 (file)
@@ -2,8 +2,9 @@
  *  cc_priv.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/1/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/01/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -18,7 +19,7 @@
  CC_MEMCPY  : optimized memcpy.
  CC_MEMMOVE : optimized memmove.
  CC_MEMSET  : optimized memset.
- CC_BZERO   : optimized bzero.
+ CC_BZERO   : optimized bzero
 
  CC_STORE32_BE : store 32 bit value in big endian in unaligned buffer.
  CC_STORE32_LE : store 32 bit value in little endian in unaligned buffer.
@@ -71,7 +72,7 @@ The following are not defined yet... define them if needed.
 #define CC_MEMCPY(D,S,L) memcpy((D),(S),(L))
 #define CC_MEMMOVE(D,S,L) memmove((D),(S),(L))
 #define CC_MEMSET(D,V,L) memset((D),(V),(L))
-#define CC_BZERO(D,L) memset((D),0,(L))
+#define CC_BZERO(D,L) memset((D),0,(L)) // Deprecated, DO NOT USE
 
 
 // MARK: - Loads and Store
@@ -88,10 +89,10 @@ The following are not defined yet... define them if needed.
 } while(0)
 
 #define        CC_LOAD32_LE(x, y) do {                                     \
-x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) |                            \
-    ((uint32_t)(((unsigned char *)(y))[2] & 255)<<16) |                            \
-    ((uint32_t)(((unsigned char *)(y))[1] & 255)<<8)  |                            \
-    ((uint32_t)(((unsigned char *)(y))[0] & 255));                                 \
+x = ((uint32_t)(((const unsigned char *)(y))[3] & 255)<<24) |                      \
+    ((uint32_t)(((const unsigned char *)(y))[2] & 255)<<16) |                      \
+    ((uint32_t)(((const unsigned char *)(y))[1] & 255)<<8)  |                      \
+    ((uint32_t)(((const unsigned char *)(y))[0] & 255));                                   \
 } while(0)
 
 // MARK: -- 64 bits - little endian
@@ -108,14 +109,14 @@ x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) |                       \
 } while(0)
 
 #define        CC_LOAD64_LE(x, y) do {                                     \
-x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) |           \
-    (((uint64_t)(((unsigned char *)(y))[6] & 255))<<48) |           \
-    (((uint64_t)(((unsigned char *)(y))[5] & 255))<<40) |           \
-    (((uint64_t)(((unsigned char *)(y))[4] & 255))<<32) |           \
-    (((uint64_t)(((unsigned char *)(y))[3] & 255))<<24) |           \
-    (((uint64_t)(((unsigned char *)(y))[2] & 255))<<16) |           \
-    (((uint64_t)(((unsigned char *)(y))[1] & 255))<<8)  |           \
-    (((uint64_t)(((unsigned char *)(y))[0] & 255)));                \
+x = (((uint64_t)(((const unsigned char *)(y))[7] & 255))<<56) |           \
+    (((uint64_t)(((const unsigned char *)(y))[6] & 255))<<48) |           \
+    (((uint64_t)(((const unsigned char *)(y))[5] & 255))<<40) |           \
+    (((uint64_t)(((const unsigned char *)(y))[4] & 255))<<32) |           \
+    (((uint64_t)(((const unsigned char *)(y))[3] & 255))<<24) |           \
+    (((uint64_t)(((const unsigned char *)(y))[2] & 255))<<16) |           \
+    (((uint64_t)(((const unsigned char *)(y))[1] & 255))<<8)  |           \
+    (((uint64_t)(((const unsigned char *)(y))[0] & 255)));                \
 } while(0)
 
 // MARK: -- 32 bits - big endian
@@ -146,10 +147,10 @@ x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) |           \
 } while(0)
 
 #define        CC_LOAD32_BE(x, y) do {                             \
-x = ((uint32_t)(((unsigned char *)(y))[0] & 255)<<24) |            \
-    ((uint32_t)(((unsigned char *)(y))[1] & 255)<<16) |                \
-    ((uint32_t)(((unsigned char *)(y))[2] & 255)<<8)  |                \
-    ((uint32_t)(((unsigned char *)(y))[3] & 255));          \
+x = ((uint32_t)(((const unsigned char *)(y))[0] & 255)<<24) |      \
+    ((uint32_t)(((const unsigned char *)(y))[1] & 255)<<16) |          \
+    ((uint32_t)(((const unsigned char *)(y))[2] & 255)<<8)  |          \
+    ((uint32_t)(((const unsigned char *)(y))[3] & 255));          \
 } while(0)
 
 #endif
@@ -189,14 +190,14 @@ __asm__ __volatile__ (        \
 } while(0)
 
 #define        CC_LOAD64_BE(x, y) do {                                     \
-x = (((uint64_t)(((unsigned char *)(y))[0] & 255))<<56) |           \
-    (((uint64_t)(((unsigned char *)(y))[1] & 255))<<48) |           \
-    (((uint64_t)(((unsigned char *)(y))[2] & 255))<<40) |           \
-    (((uint64_t)(((unsigned char *)(y))[3] & 255))<<32) |           \
-    (((uint64_t)(((unsigned char *)(y))[4] & 255))<<24) |           \
-    (((uint64_t)(((unsigned char *)(y))[5] & 255))<<16) |           \
-    (((uint64_t)(((unsigned char *)(y))[6] & 255))<<8)  |              \
-    (((uint64_t)(((unsigned char *)(y))[7] & 255)));               \
+x = (((uint64_t)(((const unsigned char *)(y))[0] & 255))<<56) |           \
+    (((uint64_t)(((const unsigned char *)(y))[1] & 255))<<48) |           \
+    (((uint64_t)(((const unsigned char *)(y))[2] & 255))<<40) |           \
+    (((uint64_t)(((const unsigned char *)(y))[3] & 255))<<32) |           \
+    (((uint64_t)(((const unsigned char *)(y))[4] & 255))<<24) |           \
+    (((uint64_t)(((const unsigned char *)(y))[5] & 255))<<16) |           \
+    (((uint64_t)(((const unsigned char *)(y))[6] & 255))<<8)  |                \
+    (((uint64_t)(((const unsigned char *)(y))[7] & 255)));                 \
 } while(0)
 
 #endif
@@ -378,35 +379,26 @@ static inline uint32_t CC_BSWAP(uint32_t x)
    Run in constant time (log2(<bitsize of x>))  
    Useful to run constant time checks
 */
-#define HEAVISIDE_STEP_UINT64(x) {unsigned long t; \
-    t=(((uint64_t)x>>32) | (unsigned long)x); \
-    t=((t>>16) | t); \
-    t=((t>>8) | t); \
-    t=((t>>4) | t); \
-    t=((t>>2) | t); \
-    t=((t>>1) | t); \
-    x=t & 0x1;}
-
-#define HEAVISIDE_STEP_UINT32(x) {uint16_t t; \
-    t=(((unsigned long)x>>16) | (uint16_t)x); \
-    t=((t>>8) | t); \
-    t=((t>>4) | t); \
-    t=((t>>2) | t); \
-    t=((t>>1) | t); \
-    x=t & 0x1;}
-
-#define HEAVISIDE_STEP_UINT16(x) {uint8_t t; \
-    t=(((uint16_t)x>>8) | (uint8_t)x); \
-    t=((t>>4) | t); \
-    t=((t>>2) | t); \
-    t=((t>>1) | t); \
-    x=t & 0x1;}
-
-#define HEAVISIDE_STEP_UINT8(x) {uint8_t t; \
-    t=(((uint8_t)x>>4) | (uint8_t)x); \
-    t=((t>>2) | t); \
-    t=((t>>1) | t); \
-    x=t & 0x1;}
+#define HEAVISIDE_STEP_UINT64(x) {uint64_t _t; \
+    _t=(((uint64_t)x>>32) | x); \
+    _t=(0xFFFFFFFF + (_t & 0xFFFFFFFF)); \
+    x=_t >> 32;}
+
+#define HEAVISIDE_STEP_UINT32(x) {uint32_t _t; \
+    _t=(((uint32_t)x>>16) | x); \
+    _t=(0xFFFF + (_t & 0xFFFF)); \
+    x=_t >> 16;}
+
+#define HEAVISIDE_STEP_UINT16(x) {uint16_t _t; \
+    _t=(((uint16_t)x>>8) | x); \
+    _t=(0xFF + (_t & 0xFF)); \
+    x=_t >> 8;}
+
+#define HEAVISIDE_STEP_UINT8(x) {uint8_t _t; \
+    _t=(((uint8_t)x>>4) | (uint8_t)x); \
+    _t=((_t>>2) | _t); \
+    _t=((_t>>1) | _t); \
+    x=_t & 0x1;}
 
 #define CC_HEAVISIDE_STEP(x) { \
     if (sizeof(x) == 1) {HEAVISIDE_STEP_UINT8(x);}  \
@@ -416,9 +408,14 @@ static inline uint32_t CC_BSWAP(uint32_t x)
     else {x=((x==0)?0:1);} \
     }
 
+/* Return 1 if x mod 4 =1,2,3, 0 otherwise */
+#define CC_CARRY_2BITS(x) (((x>>1) | x) & 0x1)
+#define CC_CARRY_3BITS(x) (((x>>2) | (x>>1) | x) & 0x1)
 
 /* Set a variable to the biggest power of 2 which can be represented */ 
 #define MAX_POWER_OF_2(x)   ((__typeof__(x))1<<(8*sizeof(x)-1))
+
+#define cc_ceiling(a,b)  (((a)+((b)-1))/(b))
+#define CC_BITLEN_TO_BYTELEN(x) cc_ceiling((x), 8)
 
 #endif /* _CORECRYPTO_CC_PRIV_H_ */
index 67c4404ca15e195689b3bd00e6f545b6f7cecfed..85adca2fe555c7418d81e636d3eb44be2be93177 100644 (file)
@@ -2,8 +2,9 @@
  *  ccaes.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/10/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/10/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2013,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -24,12 +25,21 @@ extern const struct ccmode_ecb ccaes_ltc_ecb_encrypt_mode;
 extern const struct ccmode_cbc ccaes_gladman_cbc_encrypt_mode;
 extern const struct ccmode_cbc ccaes_gladman_cbc_decrypt_mode;
 
-#if CCAES_ARM
+#if !defined(__NO_ASM__) && CCAES_ARM
 extern const struct ccmode_ecb ccaes_arm_ecb_encrypt_mode;
 extern const struct ccmode_ecb ccaes_arm_ecb_decrypt_mode;
 
 extern const struct ccmode_cbc ccaes_arm_cbc_encrypt_mode;
 extern const struct ccmode_cbc ccaes_arm_cbc_decrypt_mode;
+
+extern const struct ccmode_xts ccaes_arm_xts_encrypt_mode;
+extern const struct ccmode_xts ccaes_arm_xts_decrypt_mode;
+
+extern const struct ccmode_cfb ccaes_arm_cfb_encrypt_mode;
+extern const struct ccmode_cfb ccaes_arm_cfb_decrypt_mode;
+
+extern const struct ccmode_ofb ccaes_arm_ofb_crypt_mode;
+
 #endif
 
 #if CCAES_MUX
@@ -40,7 +50,7 @@ extern const struct ccmode_cbc *ccaes_ios_mux_cbc_encrypt_mode(void);
 extern const struct ccmode_cbc *ccaes_ios_mux_cbc_decrypt_mode(void);
 #endif
 
-#if CCAES_INTEL
+#if !defined(__NO_ASM__) && CCAES_INTEL
 //extern const struct ccmode_ecb ccaes_intel_ecb_encrypt_mode;
 //extern const struct ccmode_ecb ccaes_intel_ecb_decrypt_mode;
 
index 3f67e2e6fd26bf8a5fdf080be116e0de35e37b28..7fe1cc66c6ce94959c22b59a0e8d1ac3c8e6fe0f 100644 (file)
@@ -2,8 +2,9 @@
  *  ccasn1.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 8/6/10.
- *  Copyright 2010-2012 Apple Inc. All rights reserved.
+ *  Created on 11/16/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -66,10 +67,6 @@ enum {
 
     CCASN1_CONSTRUCTED_SET = CCASN1_SET | CCASN1_CONSTRUCTED,
     CCASN1_CONSTRUCTED_SEQUENCE = CCASN1_SEQUENCE | CCASN1_CONSTRUCTED,
-
-    // TODO: Remove these 2: */
-    // ASN1_INTEGER = 0x02,
-    ASN1_CONSTRUCTED_SEQUENCE = 0x30
 };
 
 typedef union {
index 7c7f08be61e76a29e5e41c4ef73f964db3292bee..12e940cc073bcd0c767589c47cf6202971faf91f 100644 (file)
@@ -2,8 +2,9 @@
  *  ccder.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 2/28/12.
- *  Copyright 2012 Apple Inc. All rights reserved.
+ *  Created on 03/14/2012
+ *
+ *  Copyright (c) 2012,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -246,7 +247,7 @@ CC_NO_INLINE CC_NONNULL((2, 4))
 const uint8_t *ccder_decode_uint(cc_size n, cc_unit *r,
                                  const uint8_t *der, const uint8_t *der_end);
 
-CC_NO_INLINE CC_NONNULL((1, 3))
+CC_NO_INLINE CC_NONNULL((3))
 const uint8_t *ccder_decode_uint64(uint64_t* r,
                                    const uint8_t *der, const uint8_t *der_end);
 
@@ -260,12 +261,12 @@ CC_NO_INLINE CC_NONNULL_TU((1)) CC_NONNULL((3))
 const uint8_t *ccder_decode_oid(ccoid_t *oidp,
                                 const uint8_t *der, const uint8_t *der_end);
 
-CC_NO_INLINE CC_NONNULL_ALL
+CC_NO_INLINE CC_NONNULL((1,2,4))
 const uint8_t *ccder_decode_bitstring(const uint8_t **bit_string,
                                 size_t *bit_length,
                                 const uint8_t *der, const uint8_t *der_end);
 
-CC_NO_INLINE CC_NONNULL_ALL
+CC_NO_INLINE CC_NONNULL_TU((4)) CC_NONNULL((1,2,3,5,6,8))
 const uint8_t *ccder_decode_eckey(uint64_t *version,
                                   size_t *priv_size, const uint8_t **priv_key,
                                   ccoid_t *oid,
index aff622bfbda7990249f562faabc3b387133837fe..6ca3c2a1e5af52fa11348a238b75754bd52cb633 100644 (file)
@@ -2,8 +2,9 @@
  *  ccdes.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/20/10.
- *  Copyright 2010 Apple, Inc. All rights reserved.
+ *  Created on 12/20/2010
+ *
+ *  Copyright (c) 2010,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 9079c4a18a0c5bef0113080f38d6526661cc385a..0857678ffa813d7c5c8ad51fc5d966d6186257c8 100644 (file)
@@ -2,8 +2,9 @@
  *  ccdigest.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 11/30/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 11/30/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -73,11 +74,11 @@ struct ccdigest_info {
    size_t _block_size_, named _name_.  Can be used in structs or on the
    stack. */
 #define ccdigest_ctx_decl(_state_size_, _block_size_, _name_)  cc_ctx_decl(struct ccdigest_ctx, ccdigest_ctx_size(_state_size_, _block_size_), _name_)
-#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_zero(ccdigest_ctx_size(_state_size_, _block_size_), _name_)
+#define ccdigest_ctx_clear(_state_size_, _block_size_, _name_) cc_clear(ccdigest_ctx_size(_state_size_, _block_size_), _name_)
 /* Declare a ccdigest_ctx for a given size_t _state_size_ and
    size_t _block_size_, named _name_.  Can be used on the stack. */
 #define ccdigest_di_decl(_di_, _name_)  cc_ctx_decl(struct ccdigest_ctx, ccdigest_di_size(_di_), _name_)
-#define ccdigest_di_clear(_di_, _name_) cc_zero(ccdigest_di_size(_di_), _name_)
+#define ccdigest_di_clear(_di_, _name_) cc_clear(ccdigest_di_size(_di_), _name_)
 
 /* Digest context field accessors.  Consider the implementation private. */
 
@@ -136,34 +137,25 @@ int ccdigest_test_vector(const struct ccdigest_info *di, const struct ccdigest_v
 int ccdigest_test_chunk_vector(const struct ccdigest_info *di, const struct ccdigest_vector *v, unsigned long chunk);
 
 #ifdef USE_SUPER_COOL_NEW_CCOID_T
-#define OID_DEF(_NAME_, _VALUE_) _NAME_ {((unsigned char *) _VALUE_)}
-#define CC_DIGEST_OID_MD2 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02")}
-#define CC_DIGEST_OID_MD4 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04")}
-#define CC_DIGEST_OID_MD5 {((unsigned char *)"\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05")}
-#define CC_DIGEST_OID_SHA1 {((unsigned char *)"\x06\x05\x2b\x0e\x03\x02\x1a")}
-#define CC_DIGEST_OID_SHA224 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04")}
-#define CC_DIGEST_OID_SHA256 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01")}
-#define CC_DIGEST_OID_SHA384 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02")}
-#define CC_DIGEST_OID_SHA512 {((unsigned char *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03")}
-#define CC_DIGEST_OID_RMD128 {((unsigned char *)"\x06\x06\x28\xCF\x06\x03\x00\x32")}
-#define CC_DIGEST_OID_RMD160 {((unsigned char *)"\x06\x05\x2B\x24\x03\x02\x01")}
-#define CC_DIGEST_OID_RMD256 {((unsigned char *)"\x06\x05\x2B\x24\x03\x02\x03")}
-#define CC_DIGEST_OID_RMD320 {((unsigned char *)NULL)}
+#define OID_DEF(_VALUE_)  {((const unsigned char *) _VALUE_)}
 #else
-#define CC_DIGEST_OID_MD2    "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02"
-#define CC_DIGEST_OID_MD4    "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04"
-#define CC_DIGEST_OID_MD5    "\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05"
-#define CC_DIGEST_OID_SHA1   "\x06\x05\x2b\x0e\x03\x02\x1a"
-#define CC_DIGEST_OID_SHA224 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04"
-#define CC_DIGEST_OID_SHA256 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01"
-#define CC_DIGEST_OID_SHA384 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02"
-#define CC_DIGEST_OID_SHA512 "\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03"
-#define CC_DIGEST_OID_RMD128 "\x06\x06\x28\xCF\x06\x03\x00\x32"
-#define CC_DIGEST_OID_RMD160 "\x06\x05\x2B\x24\x03\x02\x01"
-#define CC_DIGEST_OID_RMD256 "\x06\x05\x2B\x24\x03\x02\x03"
-#define CC_DIGEST_OID_RMD320 NULL
+#define OID_DEF(_VALUE_)  _VALUE_
 #endif
 
+#define CC_DIGEST_OID_MD2       OID_DEF("\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x02")
+#define CC_DIGEST_OID_MD4       OID_DEF("\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x04")
+#define CC_DIGEST_OID_MD5       OID_DEF("\x06\x08\x2A\x86\x48\x86\xF7\x0D\x02\x05")
+#define CC_DIGEST_OID_SHA1      OID_DEF("\x06\x05\x2b\x0e\x03\x02\x1a")
+#define CC_DIGEST_OID_SHA224    OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04")
+#define CC_DIGEST_OID_SHA256    OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01")
+#define CC_DIGEST_OID_SHA384    OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02")
+#define CC_DIGEST_OID_SHA512    OID_DEF("\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03")
+#define CC_DIGEST_OID_RMD128    OID_DEF("\x06\x06\x28\xCF\x06\x03\x00\x32")
+#define CC_DIGEST_OID_RMD160    OID_DEF("\x06\x05\x2B\x24\x03\x02\x01")
+#define CC_DIGEST_OID_RMD256    OID_DEF("\x06\x05\x2B\x24\x03\x02\x03")
+#define CC_DIGEST_OID_RMD320    OID_DEF(NULL)
+
+
 #ifdef USE_SUPER_COOL_NEW_CCOID_T
 CC_INLINE CC_NONNULL_TU((1)) CC_NONNULL_TU((2))
 bool ccdigest_oid_equal(const struct ccdigest_info *di, ccoid_t oid) {
index 407a9b19b9cdf0944a3a9aad4dfda80293e8d135..fa8d85de6613310262c915be13458c7a35652875 100644 (file)
@@ -2,8 +2,9 @@
  *  ccdigest_priv.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/7/10.
- *  Copyright 2010,2011 Apple, Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 152e0801f806f44d8bc5d949aa341ab779776d84..fdf450e13b7c184ea4d3d90f5e07e5c874c74270 100644 (file)
@@ -1,24 +1,11 @@
 /*
- * Copyright (c) 2007-2010 Apple Inc. All Rights Reserved.
+ *  ccdrbg.h
+ *  corecrypto
  *
- * @APPLE_LICENSE_HEADER_START@
+ *  Created on 08/17/2010
  *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
  */
 
 /*!
 #include <corecrypto/cc.h>
 #include <corecrypto/ccdrbg_impl.h>
 
-/* TODO: Error codes ? */
+/* error codes */
 #define CCDRBG_STATUS_OK 0
 #define CCDRBG_STATUS_ERROR (-1)
 #define CCDRBG_STATUS_NEED_RESEED (-2)
 #define CCDRBG_STATUS_PARAM_ERROR (-3)
 
-CC_INLINE size_t ccdrbg_context_size(const struct ccdrbg_info *drbg)
-{
-    return drbg->size;
-}
+/*
+ * The maximum length of the entropy_input,  additional_input (max_additional_input_length) , personalization string 
+ * (max_personalization_string_length) and max_number_of_bits_per_request  are implementation dependent
+ * but shall fit in a 32 bit register and be be less than or equal to the specified maximum length for the 
+ * selected DRBG mechanism (NIST 800-90A Section 10).
+ */
+
+#define CCDRBG_MAX_ENTROPY_SIZE         ((uint32_t)1<<16)
+#define CCDRBG_MAX_ADDITIONALINPUT_SIZE ((uint32_t)1<<16)
+#define CCDRBG_MAX_PSINPUT_SIZE         ((uint32_t)1<<16)
+#define CCDRBG_MAX_REQUEST_SIZE         ((uint32_t)1<<16) //this is the the absolute maximum in NIST 800-90A
+#define CCDRBG_RESEED_INTERVAL          ((uint64_t)1<<30) // must be able to fit the NIST maximum of 2^48
+
+
+/*
+ * The entropyLength is forced to be greater or equal than the security strength.
+ * Nonce is not forced. It either needs to have 0.5*security strength entropy. Or, a vale that is repeated
+ * less than a 0.5*security strength bit random string.
+ * see below or NIST  800-90A for the definition of security strength
+ */
 
 CC_INLINE int ccdrbg_init(const struct ccdrbg_info *info,
                        struct ccdrbg_state *drbg,
@@ -54,33 +57,42 @@ CC_INLINE int ccdrbg_init(const struct ccdrbg_info *info,
        return info->init(info, drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps);
 }
 
+/*
+ *  The entropyLength is forced to be greater or equal than the security strength.
+ */
 CC_INLINE int ccdrbg_reseed(const struct ccdrbg_info *info,
-               struct ccdrbg_state *prng,
-               unsigned long entropylen, const void *entropy,
-               unsigned long inlen, const void *in)
+       struct ccdrbg_state *drbg,
+       unsigned long entropyLength, const void *entropy,
+       unsigned long additionalLength, const void *additional)
 {
-       return info->reseed(prng, entropylen, entropy, inlen, in);
+    return info->reseed(drbg, entropyLength, entropy, additionalLength, additional);
 }
 
 
 CC_INLINE int ccdrbg_generate(const struct ccdrbg_info *info,
-               struct ccdrbg_state *prng,
-               unsigned long outlen, void *out,
-               unsigned long inlen, const void *in)
+         struct ccdrbg_state *drbg,
+         unsigned long dataOutLength, void *dataOut,
+         unsigned long additionalLength, const void *additional)
 {
-       return info->generate(prng, outlen, out, inlen, in);
+    return info->generate(drbg, dataOutLength, dataOut, additionalLength, additional);
 }
 
 CC_INLINE void ccdrbg_done(const struct ccdrbg_info *info,
-               struct ccdrbg_state *prng)
+               struct ccdrbg_state *drbg)
 {
-       info->done(prng);
+       info->done(drbg);
 }
 
+CC_INLINE size_t ccdrbg_context_size(const struct ccdrbg_info *drbg)
+{
+    return drbg->size;
+}
 
-extern struct ccdrbg_info ccdrbg_dummy_info;
-extern struct ccdrbg_info ccdrbg_fipssha1_info;
 
+/*
+ * NIST SP 800-90 CTR_DRBG
+ * the mximum security strengh of drbg equals to the block size of the corresponding ECB.
+ */
 struct ccdrbg_nistctr_custom {
     const struct ccmode_ecb *ecb;
     unsigned long keylen;
@@ -90,6 +102,10 @@ struct ccdrbg_nistctr_custom {
 
 void ccdrbg_factory_nistctr(struct ccdrbg_info *info, const struct ccdrbg_nistctr_custom *custom);
 
+/*
+ * NIST SP 800-90 HMAC_DRBG
+ * the mximum security strengh of drbg is half of output size of the input hash function and it internally is limited to 256 bits
+ */
 extern struct ccdrbg_info ccdrbg_nistdigest_info;
 
 struct ccdrbg_nisthmac_custom {
@@ -97,9 +113,12 @@ struct ccdrbg_nisthmac_custom {
     int strictFIPS;
 };
 
-// "class" method on nisthmac dbrg's to ask about their security_strength for a given di
-int ccdbrg_nisthmac_security_strength(const struct ccdrbg_nisthmac_custom *custom);
-
 void ccdrbg_factory_nisthmac(struct ccdrbg_info *info, const struct ccdrbg_nisthmac_custom *custom);
 
+
+/*
+ * Dummy DRBG
+ */
+extern struct ccdrbg_info ccdrbg_dummy_info;
+
 #endif /* _CORECRYPTO_CCDRBG_H_ */
index efa1ef9ba3af24cc932c8c80dec3441e0f739993..129f92e7c364a3cea9ea233bdb48dae598f560bf 100644 (file)
@@ -2,8 +2,9 @@
  *  ccdrbg_impl.h
  *  corecrypto
  *
- *  Created by James Murphy on 12/9/11.
- *  Copyright (c) 2011 Apple Inc. All rights reserved.
+ *  Created on 01/03/2012
+ *
+ *  Copyright (c) 2012,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -14,7 +15,7 @@
 struct ccdrbg_state;
 
 struct ccdrbg_info {
-    /** Size of the DRBG state in bytes **/
+    /*! Size of the DRBG state in bytes **/
     size_t size;
 
     /** Instantiate the PRNG
@@ -30,7 +31,7 @@ struct ccdrbg_info {
                 unsigned long nonceLength, const void* nonce,
                 unsigned long psLength, const void* ps);
 
-    /** Add entropy to the PRNG
+    /*! Add entropy to the PRNG
      @param prng       The PRNG state
      @param entropylen Length of entropy
      @param entropy    Entropy bytes
@@ -42,7 +43,7 @@ struct ccdrbg_info {
                   unsigned long entropylen, const void *entropy,
                   unsigned long inlen, const void *in);
 
-    /** Read from the PRNG in a FIPS Testing compliant manor
+    /*! Read from the PRNG in a FIPS Testing compliant manor
      @param prng    The PRNG state to read from
      @param out     [out] Where to store the data
      @param outlen  Length of data desired (octets)
@@ -54,7 +55,7 @@ struct ccdrbg_info {
                     unsigned long outlen, void *out,
                     unsigned long inlen, const void *in);
 
-    /** Terminate a PRNG state
+    /*! Terminate a PRNG state
      @param prng   The PRNG state to terminate
      */
     void (*done)(struct ccdrbg_state *prng);
index 17e295fb65c63ef2a5965a22ccb6b025de8145eb..6e8d5134cf384e0c5ede8eaf61c1360ddba8382d 100644 (file)
@@ -2,8 +2,9 @@
  *  cchmac.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/7/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -29,7 +30,7 @@ typedef union {
 #define cchmac_ctx_n(STATE_SIZE, BLOCK_SIZE)  ccn_nof_size(cchmac_ctx_size((STATE_SIZE), (BLOCK_SIZE)))
 
 #define cchmac_ctx_decl(STATE_SIZE, BLOCK_SIZE, _name_) cc_ctx_decl(struct cchmac_ctx, cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_)
-#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_zero(cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_)
+#define cchmac_ctx_clear(STATE_SIZE, BLOCK_SIZE, _name_) cc_clear(cchmac_ctx_size(STATE_SIZE, BLOCK_SIZE), _name_)
 #define cchmac_di_decl(_di_, _name_) cchmac_ctx_decl((_di_)->state_size, (_di_)->block_size, _name_)
 #define cchmac_di_clear(_di_, _name_) cchmac_ctx_clear((_di_)->state_size, (_di_)->block_size, _name_)
 
index 12852250055ec4d5618a936aeb3d18a8c0b2d10e..602fb0868974a034d63b695480074661cf48cf3f 100644 (file)
@@ -2,8 +2,9 @@
  *  ccmd5.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/3/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/06/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 0c7a19479c5f1e2b1d45ca10a43ee9c5bcf1504b..4a8c789583a54460bc1f4b930402021053d5eca2 100644 (file)
@@ -2,8 +2,9 @@
  *  ccmode.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/6/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -18,7 +19,7 @@
 /* Declare a ecb key named _name_.  Pass the size field of a struct ccmode_ecb
    for _size_. */
 #define ccecb_ctx_decl(_size_, _name_) cc_ctx_decl(ccecb_ctx, _size_, _name_)
-#define ccecb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccecb_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 CC_INLINE size_t ccecb_context_size(const struct ccmode_ecb *mode)
 {
@@ -63,12 +64,12 @@ CC_INLINE void ccecb_one_shot(const struct ccmode_ecb *mode,
 /* Declare a cbc key named _name_.  Pass the size field of a struct ccmode_cbc
    for _size_. */
 #define cccbc_ctx_decl(_size_, _name_) cc_ctx_decl(cccbc_ctx, _size_, _name_)
-#define cccbc_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define cccbc_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 /* Declare a cbc iv tweak named _name_.  Pass the blocksize field of a
    struct ccmode_cbc for _size_. */
 #define cccbc_iv_decl(_size_, _name_) cc_ctx_decl(cccbc_iv, _size_, _name_)
-#define cccbc_iv_clear(_size_, _name_) cc_ctx_clear(cccbc_iv, _size_, _name_)
+#define cccbc_iv_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 /* Actual symmetric algorithm implementation can provide you one of these.
 
@@ -131,7 +132,7 @@ CC_INLINE void cccbc_one_shot(const struct ccmode_cbc *mode,
 /* Declare a cfb key named _name_.  Pass the size field of a struct ccmode_cfb
    for _size_. */
 #define cccfb_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb_ctx, _size_, _name_)
-#define cccfb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define cccfb_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 CC_INLINE size_t cccfb_context_size(const struct ccmode_cfb *mode)
 {
@@ -171,7 +172,7 @@ CC_INLINE void cccfb_one_shot(const struct ccmode_cfb *mode,
 /* Declare a cfb8 key named _name_.  Pass the size field of a struct ccmode_cfb8
  for _size_. */
 #define cccfb8_ctx_decl(_size_, _name_) cc_ctx_decl(cccfb8_ctx, _size_, _name_)
-#define cccfb8_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define cccfb8_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 CC_INLINE size_t cccfb8_context_size(const struct ccmode_cfb8 *mode)
 {
@@ -210,7 +211,7 @@ CC_INLINE void cccfb8_one_shot(const struct ccmode_cfb8 *mode,
 /* Declare a ctr key named _name_.  Pass the size field of a struct ccmode_ctr
  for _size_. */
 #define ccctr_ctx_decl(_size_, _name_) cc_ctx_decl(ccctr_ctx, _size_, _name_)
-#define ccctr_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccctr_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 /* This is Integer Counter Mode: The IV is the initial value of the counter
  that is incremented by 1 for each new block. Use the mode flags to select
@@ -254,7 +255,7 @@ CC_INLINE void ccctr_one_shot(const struct ccmode_ctr *mode,
 /* Declare a ofb key named _name_.  Pass the size field of a struct ccmode_ofb
  for _size_. */
 #define ccofb_ctx_decl(_size_, _name_) cc_ctx_decl(ccofb_ctx, _size_, _name_)
-#define ccofb_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccofb_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 CC_INLINE size_t ccofb_context_size(const struct ccmode_ofb *mode)
 {
@@ -295,12 +296,12 @@ CC_INLINE void ccofb_one_shot(const struct ccmode_ofb *mode,
 /* Declare a xts key named _name_.  Pass the size field of a struct ccmode_xts
  for _size_. */
 #define ccxts_ctx_decl(_size_, _name_) cc_ctx_decl(ccxts_ctx, _size_, _name_)
-#define ccxts_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccxts_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 /* Declare a xts tweak named _name_.  Pass the tweak_size field of a
    struct ccmode_xts for _size_. */
 #define ccxts_tweak_decl(_size_, _name_) cc_ctx_decl(ccxts_tweak, _size_, _name_)
-#define ccxts_tweak_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccxts_tweak_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 /* Actual symmetric algorithm implementation can provide you one of these.
 
@@ -363,7 +364,7 @@ CC_INLINE void ccxts_one_shot(const struct ccmode_xts *mode,
 /* Declare a gcm key named _name_.  Pass the size field of a struct ccmode_gcm
  for _size_. */
 #define ccgcm_ctx_decl(_size_, _name_) cc_ctx_decl(ccgcm_ctx, _size_, _name_)
-#define ccgcm_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccgcm_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 CC_INLINE size_t ccgcm_context_size(const struct ccmode_gcm *mode)
 {
@@ -430,11 +431,11 @@ CC_INLINE void ccgcm_one_shot(const struct ccmode_gcm *mode,
 /* CCM */
 
 #define ccccm_ctx_decl(_size_, _name_) cc_ctx_decl(ccccm_ctx, _size_, _name_)
-#define ccccm_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccccm_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 /* Declare a ccm nonce named _name_.  Pass the mode->nonce_ctx_size for _size_. */
 #define ccccm_nonce_decl(_size_, _name_) cc_ctx_decl(ccccm_nonce, _size_, _name_)
-#define ccccm_nonce_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccccm_nonce_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 
 CC_INLINE size_t ccccm_context_size(const struct ccmode_ccm *mode)
@@ -509,7 +510,7 @@ CC_INLINE void ccccm_one_shot(const struct ccmode_ccm *mode,
 /* Declare a omac key named _name_.  Pass the size field of a struct ccmode_omac
  for _size_. */
 #define ccomac_ctx_decl(_size_, _name_) cc_ctx_decl(ccomac_ctx, _size_, _name_)
-#define ccomac_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccomac_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 CC_INLINE size_t ccomac_context_size(const struct ccmode_omac *mode)
 {
index 8ffe1fbd6e43428ce01db4e3d49b8eaa46f7449a..3a29111ae64d7eb2c1cf86172397a690ba7c4383 100644 (file)
@@ -2,8 +2,9 @@
  *  ccmode_factory.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 1/21/11.
- *  Copyright 2011 Apple, Inc. All rights reserved.
+ *  Created on 01/21/2011
+ *
+ *  Copyright (c) 2011,2012,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
 #include <corecrypto/ccn.h>  /* TODO: Remove dependency on this header. */
 #include <corecrypto/ccmode_impl.h>
 
+#if !defined(__NO_ASM__) 
+#if    (defined(__x86_64__) && CCAES_INTEL) || (CCAES_ARM && defined(__ARM_NEON__))
+#define        CCMODE_GCM_VNG_SPEEDUP  1
+#define        CCMODE_CCM_VNG_SPEEDUP  1
+#else
+#define        CCMODE_GCM_VNG_SPEEDUP  0
+#define        CCMODE_CCM_VNG_SPEEDUP  0
+#endif
+
+#if    (  (defined(__x86_64__) && CCAES_INTEL) \
+    || (defined(__arm64__) && CCAES_ARM) \
+    || defined(__ARM_NEON__))  // Supported even when not using the ARM AES
+
+#define        CCMODE_CTR_VNG_SPEEDUP  1
+#else
+#define        CCMODE_CTR_VNG_SPEEDUP  0
+#endif
+#endif /* !defined(__NO_ASM__) */
+
 /* For CBC, direction of underlying ecb is the same as the cbc direction */
 #define CCMODE_CBC_FACTORY(_cipher_, _dir_)                                     \
 static struct ccmode_cbc cbc_##_cipher_##_##_dir_;                              \
@@ -170,7 +190,6 @@ void ccmode_cfb_decrypt(cccfb_ctx *ctx, size_t nbytes,
                         const void *in, void *out);
 void ccmode_cfb_encrypt(cccfb_ctx *ctx, size_t nbytes,
                         const void *in, void *out);
-
 struct _ccmode_cfb_key {
     const struct ccmode_ecb *ecb;
     size_t pad_len;
@@ -217,7 +236,6 @@ void ccmode_factory_cfb_encrypt(struct ccmode_cfb *cfb,
     *cfb = cfb_encrypt;
 }
 
-
 void ccmode_cfb8_init(const struct ccmode_cfb8 *cfb8, cccfb8_ctx *ctx,
                       size_t rawkey_len, const void *rawkey, const void *iv);
 void ccmode_cfb8_decrypt(cccfb8_ctx *ctx, size_t nbytes,
@@ -290,6 +308,22 @@ struct _ccmode_ctr_key {
 .custom = (ECB_ENCRYPT) \
 }
 
+#if !defined(__NO_ASM__) 
+#if CCMODE_CTR_VNG_SPEEDUP
+void ccmode_aes_ctr_crypt_vng(ccctr_ctx *ctx, size_t nbytes,
+                      const void *in, void *out);
+
+/* Use this to statically initialize a ccmode_ctr object for decryption. */
+#define CCMODE_VNG_AES_CTR_CRYPT(ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_ctr_key)) + 2 * ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \
+.block_size = 1, \
+.init = ccmode_ctr_init, \
+.ctr = ccmode_aes_ctr_crypt_vng, \
+.custom = (ECB_ENCRYPT) \
+}
+#endif /* CCMODE_CTR_VNG_SPEEDUP */
+#endif /* defined(__NO_ASM__) */
+
 /* Use these function to runtime initialize a ccmode_ctr decrypt object (for
  example if it's part of a larger structure). Normally you would pass a
  ecb encrypt mode implementation of some underlying algorithm as the ecb
@@ -315,9 +349,6 @@ void ccmode_factory_ctr_crypt(struct ccmode_ctr *ctr,
 
 extern const unsigned char gcm_shift_table[256*2];
 #endif
-#if    defined(__x86_64__) || defined(__arm64__)
-#define        VNG_SPEEDUP     1
-#endif
 
 /* Create a gcm key from a gcm mode object.
  key must point to at least sizeof(CCMODE_GCM_KEY(ecb)) bytes of free
@@ -358,10 +389,15 @@ struct _ccmode_gcm_key {
     ;
 #endif /* CCMODE_GCM_TABLES */
 
-#ifdef VNG_SPEEDUP
+#if !defined(__NO_ASM__) 
+#if CCMODE_GCM_VNG_SPEEDUP
+#if !defined(__arm64__) && defined(__ARM_NEON__)
+       unsigned char Htable[8*2] __attribute__((aligned(16)));
+#else
        unsigned char Htable[16*8*2] __attribute__((aligned(16)));
 #endif
-    
+#endif /* CCMODE_GCM_VNG_SPEEDUP */
+#endif  /* !defined(__NO_ASM__)  */   
     cc_unit u[];
 
 };
@@ -430,6 +466,14 @@ void ccmode_ccm_decrypt(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, c
                         void *out);
 void ccmode_ccm_encrypt(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in,
                         void *out);
+#if !defined(__NO_ASM__) 
+#if CCMODE_CCM_VNG_SPEEDUP
+void ccmode_ccm_decrypt_vector(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in,
+                        void *out);
+void ccmode_ccm_encrypt_vector(ccccm_ctx *ctx, ccccm_nonce *nonce_ctx, size_t nbytes, const void *in,
+                        void *out);
+#endif /* CCMODE_CCM_VNG_SPEEDUP */
+#endif /* !defined(__NO_ASM__) */
 void ccmode_ccm_finalize(ccccm_ctx *key, ccccm_nonce *nonce_ctx, void *mac);
 void ccmode_ccm_reset(ccccm_ctx *key, ccccm_nonce *nonce_ctx);
 
@@ -480,6 +524,39 @@ struct _ccmode_ccm_nonce {
 .custom = (ECB_ENCRYPT) \
 }
 
+#if !defined(__NO_ASM__) 
+/* for x86_64/arm64 speedup */
+#if CCMODE_CCM_VNG_SPEEDUP
+/* Use this to statically initialize a ccmode_ccm object for decryption. */
+#define CCMODE_VNG_CCM_DECRYPT(ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_key)) + ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \
+.nonce_size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_nonce)), \
+.block_size = 1, \
+.init = ccmode_ccm_init, \
+.set_iv = ccmode_ccm_set_iv, \
+.cbcmac = ccmode_ccm_cbcmac, \
+.ccm = ccmode_ccm_decrypt_vector, \
+.finalize = ccmode_ccm_finalize, \
+.reset = ccmode_ccm_reset, \
+.custom = (ECB_ENCRYPT) \
+}
+
+/* Use this to statically initialize a ccmode_ccm object for encryption. */
+#define CCMODE_VNG_CCM_ENCRYPT(ECB_ENCRYPT) { \
+.size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_key)) + ccn_sizeof_size((ECB_ENCRYPT)->block_size) + ccn_sizeof_size((ECB_ENCRYPT)->size), \
+.nonce_size = ccn_sizeof_size(sizeof(struct _ccmode_ccm_nonce)), \
+.block_size = 1, \
+.init = ccmode_ccm_init, \
+.set_iv = ccmode_ccm_set_iv, \
+.cbcmac = ccmode_ccm_cbcmac, \
+.ccm = ccmode_ccm_encrypt_vector, \
+.finalize = ccmode_ccm_finalize, \
+.reset = ccmode_ccm_reset, \
+.custom = (ECB_ENCRYPT) \
+}
+#endif /* CCMODE_CCM_VNG_SPEEDUP */
+#endif /* !defined(__NO_ASM__)  */
+
 /* Use these function to runtime initialize a ccmode_ccm decrypt object (for
  example if it's part of a larger structure). For CCM you always pass a
  ecb encrypt mode implementation of some underlying algorithm as the ecb
@@ -487,7 +564,11 @@ struct _ccmode_ccm_nonce {
 CC_INLINE
 void ccmode_factory_ccm_decrypt(struct ccmode_ccm *ccm,
                                 const struct ccmode_ecb *ecb_encrypt) {
+#if !defined(__NO_ASM__) && CCMODE_CCM_VNG_SPEEDUP
+    struct ccmode_ccm ccm_decrypt = CCMODE_VNG_CCM_DECRYPT(ecb_encrypt);
+#else
     struct ccmode_ccm ccm_decrypt = CCMODE_FACTORY_CCM_DECRYPT(ecb_encrypt);
+#endif /* CCMODE_CCM_VNG_SPEEDUP */
     *ccm = ccm_decrypt;
 }
 
@@ -498,7 +579,11 @@ void ccmode_factory_ccm_decrypt(struct ccmode_ccm *ccm,
 CC_INLINE
 void ccmode_factory_ccm_encrypt(struct ccmode_ccm *ccm,
                                 const struct ccmode_ecb *ecb_encrypt) {
+#if !defined(__NO_ASM__) && CCMODE_CCM_VNG_SPEEDUP
+    struct ccmode_ccm ccm_encrypt = CCMODE_VNG_CCM_ENCRYPT(ecb_encrypt);
+#else
     struct ccmode_ccm ccm_encrypt = CCMODE_FACTORY_CCM_ENCRYPT(ecb_encrypt);
+#endif /* CCMODE_CCM_VNG_SPEEDUP */
     *ccm = ccm_encrypt;
 }
 
index ce1d1e1145728ea9fb18a569cdf8113166fd2eb7..94279d7e4984f1f640f74a0bb2f268b7142cc392 100644 (file)
@@ -2,8 +2,9 @@
  *  ccmode_impl.h
  *  corecrypto
  *
- *  Created by James Murphy on 12/9/11.
- *  Copyright (c) 2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 3aa1bd8c53971c90a1fd7061f78bda98dd6c664f..a66d0d61887c39263e3bb551e9d3f2e6b2b3b402 100644 (file)
@@ -2,26 +2,25 @@
  *  ccn.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 7/25/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 11/16/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
 #ifndef _CORECRYPTO_CCN_H_
 #define _CORECRYPTO_CCN_H_
 
-#include <corecrypto/cc_config.h>
-#include <corecrypto/cc_priv.h>  /* TODO: Get rid of this include in this header. */
+#include <corecrypto/cc.h>
 #include <stdint.h>
 #include <stdarg.h>
 
-
 typedef uint8_t cc_byte;
 typedef size_t cc_size;
 
 #if  CCN_UNIT_SIZE == 8
 typedef uint64_t cc_unit;          // 64 bit unit
-//typedef uint128_t cc_dunit;         // 128 bit double width unit
+typedef unsigned cc_dunit __attribute__((mode(TI)));         // 128 bit double width unit
 #define CCN_LOG2_BITS_PER_UNIT  6  // 2^6 = 64 bits
 #define CC_UNIT_C(x) UINT64_C(x)
 #elif  CCN_UNIT_SIZE == 4
@@ -56,6 +55,10 @@ typedef const cc_unit *cc2np2_in_t;    // 2 * n + 2 unit long mp
 #define CCN_UNIT_BITS  (sizeof(cc_unit) * 8)
 #define CCN_UNIT_MASK  ((cc_unit)~0)
 
+typedef struct {
+    cc_unit *start;      // First cc_unit of the workspace
+    cc_unit *end;        // address and beyond NOT TO BE TOUCHED
+} cc_ws,*cc_ws_t;
 
 /* Conversions between n sizeof and bits */
 
@@ -84,6 +87,7 @@ typedef const cc_unit *cc2np2_in_t;    // 2 * n + 2 unit long mp
 #define ccn_bit(_ccn_, _k_) ({__typeof__ (_k_) __k = (_k_); \
     1 & ((_ccn_)[__k / CCN_UNIT_BITS] >> (__k & (CCN_UNIT_BITS - 1)));})
 
+/* Set the value of bit _k_ of _ccn_ to the value _v_  */
 #define ccn_set_bit(_ccn_, _k_, _v_) ({__typeof__ (_k_) __k = (_k_);        \
     if (_v_)                                                                \
         (_ccn_)[__k/CCN_UNIT_BITS] |= CC_UNIT_C(1) << (__k & (CCN_UNIT_BITS - 1));     \
@@ -156,12 +160,12 @@ typedef const cc_unit *cc2np2_in_t;    // 2 * n + 2 unit long mp
  64 bit units respectively. */
 #if CCN_UNIT_SIZE == 8
 
-#define ccn64_32(a1,a0) (((cc_unit)a1) << 32 | ((cc_unit)a0))
+#define ccn64_32(a1,a0) (((const cc_unit)a1) << 32 | ((const cc_unit)a0))
 #define ccn32_32(a0) a0
 #if __LITTLE_ENDIAN__
-#define ccn32_32_parse(p,i) (((uint32_t *)p)[i])
+#define ccn32_32_parse(p,i) (((const uint32_t *)p)[i])
 #else
-#define ccn32_32_parse(p,i) (((uint32_t *)p)[i^1])
+#define ccn32_32_parse(p,i) (((const uint32_t *)p)[i^1])
 #endif
 #define ccn32_32_null 0
 
@@ -255,72 +259,12 @@ typedef const cc_unit *cc2np2_in_t;    // 2 * n + 2 unit long mp
 #define CCN224_N  ccn_nof(224)
 #define CCN256_N  ccn_nof(256)
 #define CCN384_N  ccn_nof(384)
+#define CCN512_N  ccn_nof(512)
 #define CCN521_N  ccn_nof(521)
 
-#if defined(_ARM_ARCH_6) || defined(_ARM_ARCH_7)
-#if CCN_USE_BUILTIN_CLZ
-CC_INLINE CC_CONST
-cc_unit cc_clz(cc_unit data)
-{
-    return __builtin_clzl(data);
-}
-#else
-CC_INLINE CC_CONST
-cc_unit cc_clz(cc_unit data)
-{
-    __asm__ ("clz %0, %1\n" : "=l" (data) : "l" (data));
-    return data;
-}
-#endif /* CCN_USE_BUILTIN_CLZ */
-#endif /* !defined(_ARM_ARCH_6) && !defined(_ARM_ARCH_7) */
-
-
-#if CCN_N_INLINE
-/* Return the number of used units after stripping leading 0 units.  */
-CC_INLINE CC_PURE CC_NONNULL2 
-cc_size ccn_n(cc_size n, const cc_unit *s) {
-#if 1
-    while (n-- && s[n] == 0) {}
-    return n + 1;
-#elif 0
-    while (n && s[n - 1] == 0) {
-        n -= 1;
-    }
-    return n;
-#else
-    if (n & 1) {
-        if (s[n - 1])
-            return n;
-        n &= ~1;
-    }
-    if (n & 2) {
-        cc_unit a[2] = { s[n - 1], s[n - 2] };
-        if (a[0])
-            return n - 1;
-        if (a[1])
-            return n - 2;
-        n &= ~2;
-    }
-    while (n) {
-        cc_unit a[4] = { s[n - 1], s[n - 2], s[n - 3], s[n - 4] };
-        if (a[0])
-            return n - 1;
-        if (a[1])
-            return n - 2;
-        if (a[2])
-            return n - 3;
-        if (a[3])
-            return n - 4;
-        n -= 4;
-    }
-    return n;
-#endif
-}
-#else
 /* Return the number of used units after stripping leading 0 units.  */
 CC_PURE CC_NONNULL2
 cc_size ccn_n(cc_size n, const cc_unit *s);
-#endif
 
 /* s >> k -> r return bits shifted out of least significant word in bits [0, n>
  { N bit, scalar -> N bit } N = n * sizeof(cc_unit) * 8
@@ -361,24 +305,10 @@ size_t ccn_trailing_zeros(cc_size n, const cc_unit *s);
 
 #define ccn_is_zero_or_one(_n_, _s_) (((_n_)==0) || ((ccn_n(_n_, _s_) <= 1) && (_s_[0] <= 1)))
 
-#if CCN_CMP_INLINE
-CC_INLINE CC_PURE CC_NONNULL((2, 3))
-int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t) {
-       while (n) {
-        n--;
-        cc_unit si = s[n];
-        cc_unit ti = t[n];
-        if (si != ti)
-            return si > ti ? 1 : -1;
-       }
-       return n;
-}
-#else
 /* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1
  { N bit, N bit -> int } N = n * sizeof(cc_unit) * 8 */
 CC_PURE CC_NONNULL((2, 3))
 int ccn_cmp(cc_size n, const cc_unit *s, const cc_unit *t);
-#endif
 
 /* s < t -> return - 1 | s == t -> return 0 | s > t -> return 1
  { N bit, M bit -> int } N = ns * sizeof(cc_unit) * 8  M = nt * sizeof(cc_unit) * 8 */
@@ -448,6 +378,13 @@ void ccn_lcm(cc_size n, cc_unit *r2n, const cc_unit *s, const cc_unit *t);
 CC_NONNULL((2, 3, 4))
 void ccn_mul(cc_size n, cc_unit *r_2n, const cc_unit *s, const cc_unit *t);
 
+/* s * t -> r_2n                   r_2n must not overlap with s nor t
+ { n bit, n bit -> 2 * n bit } n = count * sizeof(cc_unit) * 8
+ { N bit, N bit -> 2N bit } N = ccn_bitsof(n) 
+ Provide a workspace for potential speedup */
+CC_NONNULL((2, 3, 4, 5))
+void ccn_mul_ws(cc_size count, cc_unit *r, const cc_unit *s, const cc_unit *t, cc_ws_t ws);
+
 /* s[0..n) * v -> r[0..n)+return value
  { N bit, sizeof(cc_unit) * 8 bit -> N + sizeof(cc_unit) * 8 bit } N = n * sizeof(cc_unit) * 8 */
 CC_NONNULL((2, 3))
@@ -534,6 +471,19 @@ size_t ccn_write_int_size(cc_size n, const cc_unit *s);
 CC_NONNULL((2, 4))
 void ccn_write_int(cc_size n, const cc_unit *s, size_t out_size, void *out);
 
+#if CCN_DEDICATED_SQR
+
+/* s^2 -> r
+ { n bit -> 2 * n bit } */
+CC_NONNULL((2, 3))
+void ccn_sqr(cc_size n, cc_unit *r, const cc_unit *s);
+
+/* s^2 -> r
+ { n bit -> 2 * n bit } */
+CC_NONNULL((2, 3, 4))
+void ccn_sqr_ws(cc_size n, cc_unit *r, const cc_unit *s, cc_ws_t ws);
+
+#else
 
 /* s^2 -> r
  { n bit -> 2 * n bit } */
@@ -542,6 +492,15 @@ void ccn_sqr(cc_size n, cc_unit *r, const cc_unit *s) {
     ccn_mul(n, r, s, s);
 }
 
+/* s^2 -> r
+ { n bit -> 2 * n bit } */
+CC_INLINE CC_NONNULL((2, 3, 4))
+void ccn_sqr_ws(cc_size n, cc_unit *r, const cc_unit *s, cc_ws_t ws) {
+    ccn_mul_ws(n, r, s, s, ws);
+}
+
+#endif
+
 /* s -> r
  { n bit -> n bit } */
 CC_NONNULL((2, 3))
@@ -549,15 +508,17 @@ void ccn_set(cc_size n, cc_unit *r, const cc_unit *s);
 
 CC_INLINE CC_NONNULL2
 void ccn_zero(cc_size n, cc_unit *r) {
-    CC_BZERO(r, ccn_sizeof_n(n));
+    cc_zero(ccn_sizeof_n(n),r);
+}
+
+CC_INLINE CC_NONNULL2
+void ccn_clear(cc_size n, cc_unit *r) {
+    cc_clear(ccn_sizeof_n(n),r);
 }
 
 CC_NONNULL2
 void ccn_zero_multi(cc_size n, cc_unit *r, ...);
 
-/* Burn (zero fill or otherwise overwrite) n cc_units of stack space. */
-void ccn_burn_stack(cc_size n);
-
 CC_INLINE CC_NONNULL2
 void ccn_seti(cc_size n, cc_unit *r, cc_unit v) {
     /* assert(n > 0); */
index 86001c2e61034d7e288eeeaf94df5fa6488b650b..4514366153e5e91535cab9b80209b36956047909 100644 (file)
@@ -2,8 +2,9 @@
  *  ccpad.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/6/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -62,6 +63,9 @@ size_t ccpad_pkcs7_ecb_decrypt(const struct ccmode_ecb *ecb, ccecb_ctx *ecb_key,
 void ccpad_pkcs7_ecb_encrypt(const struct ccmode_ecb *ecb, ccecb_ctx *ctx,
                              size_t nbytes, const void *in, void *out);
 
+/* Function common to ccpad_pkcs7_ecb_decrypt and ccpad_pkcs7_decrypt */
+size_t ccpad_pkcs7_decode(const size_t block_size, const uint8_t* last_block);
+
 /* Contract is nbytes is at least 1 block + 1 byte.  Also in is nbytes long out is nbytes long. */
 void ccpad_xts_decrypt(const struct ccmode_xts *xts, ccxts_ctx *ctx, ccxts_tweak *tweak,
                        size_t nbytes, const void *in, void *out);
index ee980159f314ab743ae0eb98f7f04d4707d865ef..9e296ff198eb438f8082d26979fff5ec5cd65dc3 100644 (file)
@@ -1,10 +1,10 @@
 /*
- *  ccpbkdf.h
+ *  ccpbkdf2.h
  *  corecrypto
  *
- *  Copyright 1999-2001, 2010 Apple Inc. All rights reserved.
+ *  Created on 12/15/2010
  *
- *  Derived from pbkdf2.h by Mitch Adler on 09-12-2010. 
+ *  Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 84204bb35aa6dfffbfd735fd01a964b99d693077..6e1ec736a9090ee33389edc43dc919d309721741 100644 (file)
@@ -2,8 +2,9 @@
  *  ccrc4.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/22/10.
- *  Copyright 2010,2011 Apple, Inc. All rights reserved.
+ *  Created on 12/22/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -17,7 +18,7 @@ cc_aligned_struct(16) ccrc4_ctx;
 /* Declare a rc4 key named _name_.  Pass the size field of a struct ccmode_ecb
  for _size_. */
 #define ccrc4_ctx_decl(_size_, _name_) cc_ctx_decl(ccrc4_ctx, _size_, _name_)
-#define ccrc4_ctx_clear(_size_, _name_) cc_zero(_size_, _name_)
+#define ccrc4_ctx_clear(_size_, _name_) cc_clear(_size_, _name_)
 
 struct ccrc4_info {
     size_t size;        /* first argument to ccrc4_ctx_decl(). */
index c748bc6e6a1739a9034f6aa08b4b5cf00168730e..a3291c83027e624201ddbe961b7635290b3d20c0 100644 (file)
@@ -2,8 +2,9 @@
  *  ccrng.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/13/10.
- *  Copyright 2010 Apple, Inc. All rights reserved.
+ *  Created on 12/13/2010
+ *
+ *  Copyright (c) 2010,2011,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
 
 #include <stdint.h>
 
-#define CC_ERR_DEVICE -100
-#define CC_ERR_INTERUPTS -101
-#define CC_ERR_CRYPTO_CONFIG -102
-#define CC_ERR_PERMS -103
-#define CC_ERR_PARAMETER -104
-#define CC_ERR_MEMORY -105
+#define CC_ERR_DEVICE           -100
+#define CC_ERR_INTERUPTS        -101
+#define CC_ERR_CRYPTO_CONFIG    -102
+#define CC_ERR_PERMS            -103
+#define CC_ERR_PARAMETER        -104
+#define CC_ERR_MEMORY           -105
+#define CC_ERR_FILEDESC         -106
+#define CC_ERR_OUT_OF_ENTROPY   -107
 
 #define CCRNG_STATE_COMMON                                                          \
     int (*generate)(struct ccrng_state *rng, unsigned long outlen, void *out);
index 3ecc428f70853eb72a25a3eb37f3545a77769f08..b6c8c06fd7c6d892c6fc2900f95f0664b73c2511 100644 (file)
@@ -2,8 +2,9 @@
  *  ccrng_system.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/13/10.
- *  Copyright 2010 Apple, Inc. All rights reserved.
+ *  Created on 12/13/2010
+ *
+ *  Copyright (c) 2010,2013,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -17,8 +18,11 @@ struct ccrng_system_state {
     int fd;
 };
 
+// Setup the system RNG (open descriptor on file /dev/random)
 int ccrng_system_init(struct ccrng_system_state *rng);
 
+// Close the system RNG
+// Mandatory step to avoid leaking file descriptor
 void ccrng_system_done(struct ccrng_system_state *rng);
 
 #endif /* _CORECRYPTO_CCRNG_SYSTEM_H_ */
index 8e4480168ef219286778616785e81d5a1860b386..1990c197e951a5461dfa13322aa71f989332d07f 100644 (file)
@@ -2,8 +2,9 @@
  *  ccsha1.h
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/1/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/01/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -31,13 +32,13 @@ void ccsha1_final(const struct ccdigest_info *di, ccdigest_ctx_t,
 extern const struct ccdigest_info ccsha1_ltc_di;
 extern const struct ccdigest_info ccsha1_eay_di;
 
-#if CCSHA1_VNG_INTEL
+#if !defined(__NO_ASM__) && CCSHA1_VNG_INTEL
 //extern const struct ccdigest_info ccsha1_vng_intel_di;
 extern const struct ccdigest_info ccsha1_vng_intel_SupplementalSSE3_di;
 extern const struct ccdigest_info ccsha1_vng_intel_NOSupplementalSSE3_di;
 #endif
 
-#if CCSHA1_VNG_ARMV7NEON
+#if !defined(__NO_ASM__) && CCSHA1_VNG_ARMV7NEON
 extern const struct ccdigest_info ccsha1_vng_armv7neon_di;
 #endif
 
index 5f55b9f4016051099f2c684bfb847ed6b052e042..2029e327b08510865ade9c14634b0e6e93579769 100644 (file)
@@ -2,8 +2,9 @@
  *  ccsha2.h
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/3/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/03/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -37,7 +38,7 @@ const struct ccdigest_info *ccsha512_di(void);
 #define        CCSHA256_OUTPUT_SIZE 32
 #define        CCSHA256_STATE_SIZE  32
 extern const struct ccdigest_info ccsha256_ltc_di;
-#if CCSHA2_VNG_INTEL
+#if !defined(__NO_ASM__) && CCSHA2_VNG_INTEL
 #if defined __x86_64__
 extern const struct ccdigest_info ccsha256_vng_intel_AVX2_di;
 extern const struct ccdigest_info ccsha256_vng_intel_AVX1_di;
@@ -45,7 +46,7 @@ extern const struct ccdigest_info ccsha256_vng_intel_AVX1_di;
 extern const struct ccdigest_info ccsha256_vng_intel_SupplementalSSE3_di;
 extern const struct ccdigest_info ccsha256_vng_intel_NOSupplementalSSE3_di;
 #endif
-#if CCSHA2_VNG_ARMV7NEON
+#if !defined(__NO_ASM__) && CCSHA2_VNG_ARMV7NEON
 extern const struct ccdigest_info ccsha256_vng_armv7neon_di;
 #endif
 extern const uint32_t ccsha256_K[64];
@@ -53,9 +54,13 @@ extern const uint32_t ccsha256_K[64];
 /* SHA224 */
 #define        CCSHA224_OUTPUT_SIZE 28
 extern const struct ccdigest_info ccsha224_ltc_di;
+#if !defined(__NO_ASM__) && CCSHA2_VNG_INTEL
 extern const struct ccdigest_info ccsha224_vng_intel_SupplementalSSE3_di;
 extern const struct ccdigest_info ccsha224_vng_intel_NOSupplementalSSE3_di;
+#endif
+#if !defined(__NO_ASM__) && CCSHA2_VNG_ARMV7NEON
 extern const struct ccdigest_info ccsha224_vng_armv7neon_di;
+#endif
 
 /* SHA512 */
 #define CCSHA512_BLOCK_SIZE  128
index d26ad14e3d80ff807d4b1d052c9f85b07577d36b..aba7cb7f695382c46d71971a32cbdcc3858f83ba 100644 (file)
@@ -207,6 +207,9 @@ struct mach_header_64 {
                                           require it. Only used in MH_EXECUTE
                                           filetypes. */
 
+#define MH_APP_EXTENSION_SAFE 0x02000000 /* The code was linked for use in an
+                                           application extension. */
+
 /*
  * The load commands directly follow the mach_header.  The total size of all
  * of the commands is given by the sizeofcmds field in the mach_header.  All
@@ -295,7 +298,9 @@ struct load_command {
 #define LC_SOURCE_VERSION 0x2A /* source version used to build binary */
 #define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */
 #define        LC_ENCRYPTION_INFO_64 0x2C /* 64-bit encrypted segment information */
-
+#define LC_LINKER_OPTION 0x2D /* linker options in MH_OBJECT files */
+#define LC_LINKER_OPTIMIZATION_HINT 0x2E /* optimization hints in MH_OBJECT files */
+#define LC_VERSION_MIN_WATCHOS 0x30 /* build for Watch min OS version */
 
 /*
  * A variable length string in a load command is represented by an lc_str
@@ -1156,7 +1161,8 @@ struct rpath_command {
 struct linkedit_data_command {
     uint32_t   cmd;            /* LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO,
                                    LC_FUNCTION_STARTS, LC_DATA_IN_CODE,
-                                  or LC_DYLIB_CODE_SIGN_DRS */
+                                  LC_DYLIB_CODE_SIGN_DRS or
+                                  LC_LINKER_OPTIMIZATION_HINT. */
     uint32_t   cmdsize;        /* sizeof(struct linkedit_data_command) */
     uint32_t   dataoff;        /* file offset of data in __LINKEDIT segment */
     uint32_t   datasize;       /* file size of data in __LINKEDIT segment  */
@@ -1177,7 +1183,7 @@ struct encryption_info_command {
 
 /*
  * The encryption_info_command_64 contains the file offset and size of an
- * of an encrypted segment (for use in 64-bit targets).
+ * of an encrypted segment (for use in x86_64 targets).
  */
 struct encryption_info_command_64 {
    uint32_t    cmd;            /* LC_ENCRYPTION_INFO_64 */
@@ -1196,7 +1202,8 @@ struct encryption_info_command_64 {
  */
 struct version_min_command {
     uint32_t   cmd;            /* LC_VERSION_MIN_MACOSX or
-                                  LC_VERSION_MIN_IPHONEOS  */
+                                  LC_VERSION_MIN_IPHONEOS
+                                  LC_VERSION_MIN_WATCHOS */
     uint32_t   cmdsize;        /* sizeof(struct min_version_command) */
     uint32_t   version;        /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
     uint32_t   sdk;            /* X.Y.Z is encoded in nibbles xxxx.yy.zz */
@@ -1375,6 +1382,17 @@ struct dyld_info_command {
 #define EXPORT_SYMBOL_FLAGS_REEXPORT                           0x08
 #define EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER                  0x10
 
+/*
+ * The linker_option_command contains linker options embedded in object files.
+ */
+struct linker_option_command {
+    uint32_t  cmd;     /* LC_LINKER_OPTION only used in MH_OBJECT filetypes */
+    uint32_t  cmdsize;
+    uint32_t  count;   /* number of strings */
+    /* concatenation of zero terminated UTF8 strings.
+       Zero filled at end to align */
+};
+
 /*
  * The symseg_command contains the offset and size of the GNU style
  * symbol table information as described in the header file <symseg.h>.
index 1c1941012e9edc663bdfaabba9a7afdd2c087adb..133e36b49872ef3e7201f82d6361547f83fd736d 100644 (file)
@@ -78,7 +78,7 @@ struct nlist {
 #ifndef __LP64__
                char *n_name;   /* for use when in-core */
 #endif
-               int32_t n_strx; /* index into the string table */
+               uint32_t n_strx;        /* index into the string table */
        } n_un;
        uint8_t n_type;         /* type flag, see below */
        uint8_t n_sect;         /* section number or NO_SECT */
@@ -296,15 +296,21 @@ struct nlist_64 {
  */
 #define N_SYMBOL_RESOLVER  0x0100 
 
+/*
+ * The N_ALT_ENTRY bit of the n_desc field indicates that the
+ * symbol is pinned to the previous content.
+ */
+#define N_ALT_ENTRY 0x0200
+
 #ifndef __STRICT_BSD__
-#if __cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 /*
  * The function nlist(3) from the C library.
  */
 extern int nlist (const char *filename, struct nlist *list);
-#if __cplusplus
+#ifdef __cplusplus
 }
 #endif /* __cplusplus */
 #endif /* __STRICT_BSD__ */
index e9e15b27ab8db3c1cba0df9001c4f7060b2a2bcb..0c89939ef9df26dc1657830f186a741b23ccac33 100644 (file)
@@ -90,6 +90,7 @@
 #define        N_STSYM 0x26    /* static symbol: name,,n_sect,type,address */
 #define        N_LCSYM 0x28    /* .lcomm symbol: name,,n_sect,type,address */
 #define N_BNSYM 0x2e   /* begin nsect sym: 0,,n_sect,0,address */
+#define N_AST  0x32    /* AST file path: name,,NO_SECT,0,0 */
 #define N_OPT  0x3c    /* emitted with gcc2_compiled and in gcc source */
 #define        N_RSYM  0x40    /* register sym: name,,NO_SECT,type,register */
 #define        N_SLINE 0x44    /* src line: 0,,n_sect,linenumber,address */
index c4036c896f6ba28a069b312a48d41d231bb3a247..7fbedaceba696afbb7d5181e920fd920459ad773 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -138,20 +138,22 @@ else
 MAKEJOBS := --jobs=$(SYSCTL_HW_LOGICALCPU)
 endif
 
-TOP_TARGETS =                                                                                          \
-       clean                                                                                           \
-       installsrc                                                                                      \
-       exporthdrs                                                                                      \
-       all all_desktop all_embedded                                                                    \
-       all_release_embedded all_development_embedded                                           \
-       installhdrs installhdrs_desktop installhdrs_embedded                                            \
-       installhdrs_release_embedded installhdrs_development_embedded                           \
-       install install_desktop install_embedded                                                        \
-       install_release_embedded install_development_embedded                                   \
-       installopensource                                                                               \
-       cscope tags                                                                                     \
+TOP_TARGETS =                                                          \
+       clean                                                           \
+       installsrc                                                      \
+       exporthdrs                                                      \
+       all all_desktop all_embedded                                    \
+       all_release_embedded all_development_embedded                   \
+       installhdrs installhdrs_desktop installhdrs_embedded            \
+       installhdrs_release_embedded installhdrs_development_embedded   \
+       install install_desktop install_embedded                        \
+       install_release_embedded install_development_embedded           \
+       installopensource                                               \
+       cscope tags TAGS reindent                                       \
        help
 
+DEFAULT_TARGET = all
+
 # Targets for internal build system debugging
 TOP_TARGETS +=                                                 \
        print_exports print_exports_first_build_config  \
@@ -161,16 +163,21 @@ TOP_TARGETS +=                                            \
        install_textfiles                               \
        install_config
 
+ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1)
+MAKEARGS += -B
+DEFAULT_TARGET := build
+endif
+
 .PHONY: $(TOP_TARGETS)
 
-default: all
+default: $(DEFAULT_TARGET)
 
 ifneq ($(REMOTEBUILD),)
 $(TOP_TARGETS):
        $(_v)$(VERSDIR)/tools/remote_build.sh _REMOTEBUILD_TARGET=$@ _REMOTEBUILD_MAKE=$(MAKE) $(if $(filter --,$(MAKEFLAGS)),-,)$(MAKEFLAGS)
 else
 $(TOP_TARGETS):
-       $(_v)$(MAKE) -r $(if $(filter -j,$(MAKEFLAGS)),,$(MAKEJOBS)) -f $(MakeInc_top) $@
+       $(_v)$(MAKE) $(MAKEARGS) -r $(if $(filter -j,$(MAKEFLAGS)),,$(MAKEJOBS)) -f $(MakeInc_top) $@
 endif
 
 else # CURRENT_BUILD_CONFIG
@@ -194,17 +201,20 @@ INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS
 INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS)
 INSTINC_SUBDIRS_X86_64H = $(INSTINC_SUBDIRS)
 INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS)
+INSTINC_SUBDIRS_ARM64 = $(INSTINC_SUBDIRS)
 
 EXPINC_SUBDIRS = $(ALL_SUBDIRS)
 EXPINC_SUBDIRS_X86_64 = $(EXPINC_SUBDIRS)
 EXPINC_SUBDIRS_X86_64H = $(EXPINC_SUBDIRS)
 EXPINC_SUBDIRS_ARM = $(EXPINC_SUBDIRS)
+EXPINC_SUBDIRS_ARM64 = $(EXPINC_SUBDIRS)
 
 SETUP_SUBDIRS = SETUP
 
 COMP_SUBDIRS_X86_64 = $(ALL_SUBDIRS)
 COMP_SUBDIRS_X86_64H = $(ALL_SUBDIRS)
 COMP_SUBDIRS_ARM = $(ALL_SUBDIRS)
+COMP_SUBDIRS_ARM64 = $(ALL_SUBDIRS)
 
 INSTTEXTFILES_SUBDIRS =        \
        bsd
@@ -217,6 +227,16 @@ endif # CURRENT_BUILD_CONFIG
 
 endif # all other RC_ProjectName
 
+installhdrs_libkdd install_libkdd:
+       cd libkdd; \
+               xcodebuild $(subst _libkdd,,$@)         \
+                       "SRCROOT=$(SRCROOT)/libkdd"             \
+                       "OBJROOT=$(OBJROOT)"                    \
+                       "SYMROOT=$(SYMROOT)"                    \
+                       "DSTROOT=$(DSTROOT)"                    \
+                       "SDKROOT=$(SDKROOT)"
+
+
 # "xnu_quick_test" and "testbots" are targets that can be invoked via a standalone
 # "make xnu_quick_test" or via buildit/XBS with the RC_ProjectName=xnu_quick_test.
 # Define the target here in the outermost scope of the initial Makefile
diff --git a/README b/README
index f6b6323353890b21362f2122ea8b84f824b22d8f..1294b6726f2fb316d8d94feb175bc84677d4ad3e 100644 (file)
--- a/README
+++ b/README
@@ -118,7 +118,18 @@ A. How to build XNU:
 
     $ make cscope      # this will build cscope database
 
-8) Other makefile options
+8) Reindenting files
+
+  Source files can be reindented using clang-format setup in .clang-format. XNU follow a variant of WebKit style for source code formatting. Please refer to format styles at http://www.webkit.org/coding/coding-style.html. Further options about style options is available at http://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+  Note: clang-format binary may not be part of base installation. It can be compiled from llvm clang sources and is reachable in $PATH.
+
+  From the top directory, run:
+
+   $ make reindent      # reindent all source files using clang format.
+
+
+9) Other makefile options
 
    $ make MAKEJOBS=-j8    # this will use 8 processes during the build. The default is 2x the number of active CPUS.
    $ make -j8             # the standard command-line option is also accepted
@@ -129,6 +140,8 @@ A. How to build XNU:
 
    $ make REMOTEBUILD=user@remotehost # perform build on remote host
 
+   $ make BUILD_JSON_COMPILATION_DATABASE=1 # Build Clang JSON Compilation Database
+
 =============================================
 B. How to install a new header file from XNU
 
@@ -137,9 +150,8 @@ B. How to install a new header file from XNU
 1) XNU installs header files at the following locations -
        a. $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers
        b. $(DSTROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders
-       c. $(DSTROOT)/System/Library/Frameworks/System.framework/Headers
+       c. $(DSTROOT)/usr/include/
        d. $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
-       e. $(DSTROOT)/usr/include/
 
        Kernel.framework is used by kernel extensions.  System.framework 
        and /usr/include are used by user level applications.  The header 
@@ -156,9 +168,7 @@ B. How to install a new header file from XNU
    from each file list are -
 
    a. DATAFILES : To make header file available in user level -
-         $(DSTROOT)/System/Library/Frameworks/System.framework/Headers
-         $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
-         $(DSTROOT)/usr/include/
+         $(DSTROOT)/usr/include
 
    b. PRIVATE_DATAFILES : To make header file available to Apple internal in 
       user level -
@@ -179,20 +189,19 @@ B. How to install a new header file from XNU
    by adding the appropriate file lists.  The default install lists, its 
    member file lists and their default location are described below - 
 
-   a. INSTALL_MI_LIST : Installs header file to location that is available to 
+   a. INSTALL_MI_LIST : Installs header file to a location that is available to
       everyone in user level. 
       Locations -
-         $(DSTROOT)/System/Library/Frameworks/System.framework/Headers
-         $(DSTROOT)/usr/include/
+         $(DSTROOT)/usr/include
       Definition -
          INSTALL_MI_LIST = ${DATAFILES}
 
-   b. INSTALL_MI_LCL_LIST : Installs header file to location that is available
+   b. INSTALL_MI_LCL_LIST : Installs header file to location that is available
       for Apple internal in user level.
       Locations -
          $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
       Definition -
-         INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+         INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
    c. INSTALL_KF_MI_LIST : Installs header file to location that is available
       to everyone for kernel extensions.
index 4ef2047d4bacf21e525a9297c01015832f11b010..6236960ac8e35ceeb408c9cd4e661318a8f59372 100644 (file)
@@ -14,5 +14,9 @@ SETUP_SUBDIRS =       \
        installfile     \
        replacecontents
 
+ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1)
+SETUP_SUBDIRS += json_compilation_db
+endif
+
 include $(MakeInc_rule)
 include $(MakeInc_dir)
diff --git a/SETUP/json_compilation_db/Makefile b/SETUP/json_compilation_db/Makefile
new file mode 100644 (file)
index 0000000..c3634fe
--- /dev/null
@@ -0,0 +1,30 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+OBJS = json_compilation_db.o
+
+CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I.
+
+WARNFLAGS = -Wall
+
+LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION)
+
+json_compilation_db: $(OBJS)
+       @echo HOST_LD $@
+       $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
+       @echo HOST_CODESIGN $@
+       $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@
+
+%.o: %.c
+       @echo HOST_CC $@
+       $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $<
+
+do_build_setup:: json_compilation_db
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/SETUP/json_compilation_db/json_compilation_db.c b/SETUP/json_compilation_db/json_compilation_db.c
new file mode 100644 (file)
index 0000000..7a148ae
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ * json_compilation_db is a helper tool that takes a compiler invocation, and
+ * appends it in JSON format to the specified database.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <err.h>
+#include <sysexits.h>
+
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/param.h>
+
+void usage(void);
+char *escape_string(const char *);
+
+/*
+ * We support appending to two databases.
+ *
+ * 0-byte: ""
+ *
+ * or
+ *
+ * "["
+ * "{"
+ * "  ..."
+ * "}"
+ * "]"
+ */
+
+int main(int argc, char * argv[])
+{
+       struct stat sb;
+       int ret;
+       int dstfd;
+       FILE *dst = NULL;
+       const char *json_output = NULL;
+       const char *cwd = NULL;
+       const char *input_file = NULL;
+       char start[2];
+       size_t read_bytes;
+       int i;
+       size_t input_file_len;
+
+       if (argc < 5) {
+               usage();
+       }
+
+       json_output = argv[1];
+       cwd = argv[2];
+       input_file = argv[3];
+
+       argv += 4;
+       argc -= 4;
+
+       input_file_len = strlen(input_file);
+       if (!(input_file_len > 2 && 0 == strcmp(".c",   input_file + input_file_len - 2)) &&
+               !(input_file_len > 3 && 0 == strcmp(".cp",  input_file + input_file_len - 3)) &&
+               !(input_file_len > 4 && 0 == strcmp(".cpp", input_file + input_file_len - 4))) {
+               /* Not a C/C++ file, just skip it */
+               return 0;
+       }
+
+       dstfd = open(json_output, O_RDWR | O_CREAT | O_EXLOCK, DEFFILEMODE);
+       if (dstfd < 0)
+               err(EX_NOINPUT, "open(%s)", json_output);
+
+       ret = fstat(dstfd, &sb);
+       if (ret < 0)
+               err(EX_NOINPUT, "fstat(%s)", json_output);
+
+       if (!S_ISREG(sb.st_mode))
+               err(EX_USAGE, "%s is not a regular file", json_output);
+
+       dst = fdopen(dstfd, "w+");
+       if (dst == NULL)
+               err(EX_UNAVAILABLE, "fdopen");
+
+       read_bytes = fread(start, sizeof(start[0]), sizeof(start)/sizeof(start[0]), dst);
+       if ((read_bytes != sizeof(start)) || (0 != memcmp(start, "[\n", sizeof(start)/sizeof(start[0])))) {
+               /* no JSON start, we don't really care why */
+               ret = fseeko(dst, 0, SEEK_SET);
+               if (ret < 0)
+                       err(EX_UNAVAILABLE, "fseeko");
+
+               ret = fputs("[", dst);
+               if (ret < 0)
+                       err(EX_UNAVAILABLE, "fputs");
+       } else {
+               /* has at least two bytes at the start. Seek to 3 bytes before the end */
+               ret = fseeko(dst, -3, SEEK_END);
+               if (ret < 0)
+                       err(EX_UNAVAILABLE, "fseeko");
+
+               ret = fputs(",", dst);
+               if (ret < 0)
+                       err(EX_UNAVAILABLE, "fputs");
+       }
+
+       fprintf(dst, "\n");
+       fprintf(dst, "{\n");
+       fprintf(dst, "  \"directory\": \"%s\",\n", cwd);
+       fprintf(dst, "  \"file\": \"%s\",\n", input_file);
+       fprintf(dst, "  \"command\": \"");
+       for (i=0; i < argc; i++) {
+               bool needs_escape = strchr(argv[i], '\\') || strchr(argv[i], '"') || strchr(argv[i], ' ');
+               
+               if (needs_escape) {
+                       char *escaped_string = escape_string(argv[i]);
+                       fprintf(dst, "%s\\\"%s\\\"", i == 0 ? "" : " ", escaped_string);
+                       free(escaped_string);
+               } else {
+                       fprintf(dst, "%s%s", i == 0 ? "" : " ", argv[i]);
+               }
+       }
+       fprintf(dst, "\"\n");
+       fprintf(dst, "}\n");
+       fprintf(dst, "]\n");
+
+       ret = fclose(dst);
+       if (ret < 0)
+               err(EX_UNAVAILABLE, "fclose");
+
+       return 0;
+}
+
+void usage(void)
+{
+       fprintf(stderr, "Usage: %s <json_output> <cwd> <input_file> <compiler> [<invocation> ...]\n", getprogname());
+       exit(EX_USAGE);
+}
+
+/*
+ * A valid JSON string can't contain \ or ", so we look for these in our argv[] array (which
+ * our parent shell would have done shell metacharacter evaluation on, and escape just these.
+ * The entire string is put in \" escaped quotes to handle spaces that are valid JSON
+ * but should be used for grouping when running the compiler for real.
+ */
+char *
+escape_string(const char *input)
+{
+       size_t len = strlen(input);
+       size_t i, j;
+       char *output = malloc(len * 4 + 1);
+
+       for (i=0, j=0; i < len; i++) {
+               char ch = input[i];
+
+               if (ch == '\\' || ch == '"') {
+                       output[j++] = '\\';
+                       output[j++] = '\\'; /* output \\ in JSON, which the final shell will see as \ */
+                       output[j++] = '\\'; /* escape \ or ", which the final shell will see and pass to the compiler */
+               }
+               output[j++] = ch;
+       }
+
+       output[j] = '\0';
+
+       return output;
+}
index 3ae439d365793953a17046df47f78f10bddd0546..4c765d82808e4ba4a4ae365d2fde50043dd1a41a 100644 (file)
@@ -13,17 +13,11 @@ CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O
 
 WARNFLAGS = -Wall
 
-LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -L$(HOST_SPARSE_SDKROOT)/usr/local/lib/system -lstdc++
-ifneq ($(HOST_SPARSE_SDKROOT),/)
-LDFLAGS += -lmacho
-endif
+LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -lstdc++
 
-.SparseSDK: ALWAYS
-       $(_v)echo '$(HOST_SPARSE_SDKROOT)' | cmp -s - $@ || echo '$(HOST_SPARSE_SDKROOT)' > $@
-
-kextsymboltool: $(OBJS) .SparseSDK
+kextsymboltool: $(OBJS)
        @echo HOST_LD $@
-       $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $(OBJS)
+       $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
        @echo HOST_CODESIGN $@
        $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@
 
index 9b9125baea85e54ef166bf7f83e5d8b1eee36edd..8bd2c293c2edb8f8ac367ec2d6d48a6042b2ad54 100644 (file)
@@ -464,6 +464,28 @@ store_symbols(char * file, vm_size_t file_size, struct symbol * symbols, uint32_
     return strtabsize;
 }
 
+static const NXArchInfo *
+lookup_arch(const char *archstring)
+{
+       /*
+        * As new architectures are supported by xnu, add a mapping function
+        * without relying on host libraries.
+        */
+       static const NXArchInfo archlist[] = {
+               { "x86_64", 0x01000007 /* CPU_TYPE_X86_64 */, 3 /* CPU_SUBTYPE_X86_64_ALL */, NX_LittleEndian, NULL },
+               { "x86_64h", 0x01000007 /* CPU_TYPE_X86_64 */, 8 /* CPU_SUBTYPE_X86_64_H */, NX_LittleEndian, NULL },
+       };
+       unsigned long i;
+
+       for (i=0; i < sizeof(archlist)/sizeof(archlist[0]); i++) {
+               if (0 == strcmp(archstring, archlist[i].name)) {
+                       return &archlist[i];
+               }
+       }
+
+       return NULL;
+}
+
 /*********************************************************************
 *********************************************************************/
 int main(int argc, char * argv[])
@@ -523,7 +545,7 @@ int main(int argc, char * argv[])
 
         if (!strcmp("-arch", argv[i]))
         {
-            target_arch = NXGetArchInfoFromName(argv[i + 1]);
+            target_arch = lookup_arch(argv[i + 1]);
            if (!target_arch)
            {
                fprintf(stderr, "unknown architecture name: %s\n", argv[i+1]);
@@ -780,7 +802,7 @@ int main(int argc, char * argv[])
        hdr.magic       = MH_MAGIC;
        hdr.cputype     = target_arch->cputype;
        hdr.cpusubtype  = target_arch->cpusubtype;
-       hdr.filetype    = (target_arch->cputype == CPU_TYPE_I386) ? MH_OBJECT : MH_KEXT_BUNDLE;
+       hdr.filetype    = MH_KEXT_BUNDLE;
        hdr.ncmds       = 3;
        hdr.sizeofcmds  = sizeof(segcmd) + sizeof(symcmd) + sizeof(uuidcmd);
        hdr.flags       = MH_INCRLINK;
@@ -969,7 +991,7 @@ finish:
 
     if (kErrorNone != err)
     {
-       if (output_name)
+       if (output_name && strncmp(output_name, "/dev/", 5))
            unlink(output_name);
         exit(1);
     }
index e03ce2014ee87fc2935cd4183c861af5a3a3a97b..99cd721762b16317e814fcde4b249278a11d60cb 100644 (file)
@@ -36,6 +36,8 @@ INSTINC_SUBDIRS_X86_64H = \
 INSTINC_SUBDIRS_ARM = \
        arm 
 
+INSTINC_SUBDIRS_ARM64 = \
+       arm
 
 EXPINC_SUBDIRS = \
        bsm \
@@ -63,6 +65,8 @@ EXPINC_SUBDIRS_X86_64H = \
 EXPINC_SUBDIRS_ARM = \
        arm 
 
+EXPINC_SUBDIRS_ARM64 = \
+       arm
 
 COMP_SUBDIRS =         \
        conf
index 1398b2f0ddde123a612ba12cb8dc54aebc4e3f5e..5a5995ee2aabbaaa9b555da7d2ae0442b20d2e77 100644 (file)
 #define        BSM_F_CHECK_OPENEVT     358     /* Darwin-specific. */
 #define        BSM_F_ADDSIGS           359     /* Darwin-specific. */
 #define        BSM_F_MARKDEPENDENCY    360     /* Darwin-specific. */
+#define        BSM_F_BARRIERFSYNC      361     /* Darwin-specific. */
 
 /*
  * Darwin file system specific (400-499).
index ece1262df4554e352a6f72ba96ed115948259da1..9636b05f7add82cfb490381fc0fa7d8ba70b68c2 100644 (file)
@@ -99,6 +99,7 @@ OBJS_NO_SIGN_COMPARE =                \
                ip_input.o      \
                ip_output.o     \
                raw_ip.o        \
+               tcp_cache.o     \
                tcp_input.o     \
                tcp_output.o    \
                tcp_subr.o      \
@@ -236,7 +237,9 @@ OBJS_NO_CAST_ALIGN =                        \
                munge.o                 \
                aes.o                   \
                aeskey.o                \
-               sdt_arm.o
+               sdt_arm.o               \
+               uipc_mbuf.o             \
+               kern_guarded.o
 
 $(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Wno-cast-align)))
 
@@ -263,9 +266,9 @@ $(SOBJS): .SFLAGS
 
 $(COMPONENT).filelist: $(OBJS) 
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh
 
index d4ce218f8b52406760d772b9143d64f203dbd94d..2e3baa92b6d6cbdc22531aa3b972bf792f9c0b3f 100644 (file)
@@ -47,7 +47,6 @@ OPTIONS/sysv_msg                      optional sysv_msg
 OPTIONS/sysv_shm                       optional sysv_shm
 OPTIONS/importance_inheritance         optional importance_inheritance
 OPTIONS/importance_debug               optional importance_debug
-OPTIONS/in_kernel_tests                        optional config_in_kernel_tests
 OPTIONS/config_ecc_logging             optional config_ecc_logging
 
 #
@@ -215,18 +214,20 @@ bsd/net/route.c                           optional networking
 bsd/net/rtsock.c                       optional networking
 bsd/net/netsrc.c                       optional networking
 bsd/net/ntstat.c                       optional networking
+bsd/net/net_perf.c                     optional networking
 bsd/net/if_gif.c                       optional gif
 bsd/net/if_stf.c                       optional stf
 bsd/net/kpi_interface.c                optional networking
 bsd/net/kpi_protocol.c         optional networking
 bsd/net/kpi_interfacefilter.c  optional networking
 bsd/net/net_str_id.c                   optional networking
-bsd/net/if_utun.c                      optional networking
+bsd/net/if_utun.c                      optional networking
 bsd/net/if_utun_crypto.c               optional networking
 bsd/net/if_utun_crypto_dtls.c          optional networking
 bsd/net/if_utun_crypto_ipsec.c         optional networking
-bsd/net/if_ipsec.c                     optional ipsec
-bsd/net/necp.c                         optional necp
+bsd/net/if_ipsec.c                     optional ipsec
+bsd/net/necp.c                         optional necp
+bsd/net/network_agent.c                        optional networking
 bsd/net/if_pflog.c                     optional pflog
 bsd/net/pf.c                           optional pf
 bsd/net/pf_if.c                                optional pf
@@ -269,7 +270,6 @@ bsd/net/altq/altq_subr.c            optional pf_altq
 
 bsd/netinet/igmp.c                     optional inet
 bsd/netinet/in.c                       optional inet
-bsd/netinet/in_dhcp.c                  optional inet
 bsd/netinet/dhcp_options.c             optional inet
 bsd/netinet/in_arp.c                   optional inet
 bsd/netinet/in_mcast.c                 optional inet
@@ -287,6 +287,7 @@ bsd/netinet/ip_id.c                 optional inet
 bsd/netinet/ip_input.c                 optional inet
 bsd/netinet/ip_output.c                        optional inet
 bsd/netinet/raw_ip.c                   optional inet
+bsd/netinet/tcp_cache.c                        optional inet
 bsd/netinet/tcp_debug.c                        optional tcpdebug
 bsd/netinet/tcp_input.c                        optional inet
 bsd/netinet/tcp_output.c               optional inet
@@ -395,6 +396,7 @@ bsd/hfs/hfs_vfsutils.c                              optional hfs
 bsd/hfs/hfs_vnops.c                            optional hfs
 bsd/hfs/hfs_xattr.c                            optional hfs
 bsd/hfs/MacOSStubs.c                           optional hfs
+bsd/hfs/hfs_extents.c                          optional hfs
 bsd/hfs/hfs_cprotect.c                         standard
 bsd/hfs/rangelist.c                            optional hfs
 bsd/hfs/hfscommon/BTree/BTree.c                        optional hfs
@@ -481,6 +483,7 @@ bsd/kern/sys_pipe.c                 standard
 bsd/kern/sys_socket.c                  optional sockets
 bsd/kern/sys_domain.c                  optional sockets
 bsd/kern/sys_coalition.c               optional config_coalitions
+bsd/kern/sys_work_interval.c           standard
 ./syscalls.c                           standard
 bsd/kern/tty.c                         standard
 bsd/kern/tty_compat.c                  standard
@@ -538,6 +541,7 @@ bsd/kern/imageboot.c                        optional config_imageboot
 osfmk/kperf/kperfbsd.c                 optional kperf
 bsd/kern/kern_kpc.c                    optional kpc
 
-bsd/kern/kern_tests.c                  optional config_in_kernel_tests
-
 bsd/kern/proc_uuid_policy.c            optional config_proc_uuid_policy
+
+bsd/pgo/profile_runtime.c              standard
+
index 49d0f006b5da8babf435b32011f9875008177c4b..f9feaa2c070fb9f0ddf2610e972a9c82925bda04 100644 (file)
@@ -84,10 +84,13 @@ struct      timezone tz = { 0, 0 };
 
 #define        NPROC (20 + 16 * 32)
 #define        NPROC_PER_UID (NPROC/2)
+
 #define HNPROC 2500    /* based on thread_max */
 int    maxproc = NPROC;
 int    maxprocperuid = NPROC_PER_UID;
-/*__private_extern__*/ int hard_maxproc = HNPROC;      /* hardcoded limit */
+
+int hard_maxproc = HNPROC;     /* hardcoded limit */
+
 int nprocs = 0; /* XXX */
 
 //#define      NTEXT (80 + NPROC / 8)                  /* actually the object cache */
index dd02ad5026d4b620eec8e57ce1ad157ce37152ca..ae38f260a6923e56abb32c1330cfa270b2c10f27 100644 (file)
@@ -20,8 +20,8 @@
  */
 
 /*
- * Portions Copyright (c) 2011, Joyent, Inc. All rights reserved.
- * Portions Copyright (c) 2012 by Delphix. All rights reserved.
+ * Portions Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Portions Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -1457,7 +1457,7 @@ dtrace_priv_proc(dtrace_state_t *state)
        if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
                goto bad;
 
-       if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
+       if (dtrace_is_restricted() && !dtrace_is_running_apple_internal() && !dtrace_can_attach_to_proc(current_proc()))
                goto bad;
 
        if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
@@ -1489,7 +1489,7 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state)
 static int
 dtrace_priv_kernel(dtrace_state_t *state)
 {
-       if (dtrace_is_restricted())
+       if (dtrace_is_restricted() && !dtrace_is_running_apple_internal())
                goto bad;
 
        if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
@@ -3127,6 +3127,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
                }
                return (mstate->dtms_machtimestamp);
 
+       case DIF_VAR_CPU:
+               return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread()));
+
        case DIF_VAR_IPL:
                if (!dtrace_priv_kernel(state))
                        return (0);
@@ -3324,7 +3327,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
                        return (0);
 
                mstate->dtms_scratch_ptr += scratch_size;
-               proc_selfname( xname, MAXCOMLEN );
+               proc_selfname( xname, scratch_size );
 
                return ((uint64_t)(uintptr_t)xname);
        }
@@ -4707,6 +4710,15 @@ inetout: regs[rd] = (uintptr_t)end + 1;
                break;
        }
 
+       case DIF_SUBR_VM_KERNEL_ADDRPERM: {
+               if (!dtrace_priv_kernel(state)) {
+                       regs[rd] = 0;
+               } else {
+                       regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value);
+               }
+
+               break;
+       }
 /*
  * APPLE NOTE:
  * CoreProfile callback ('core_profile (uint64_t, [uint64_t], [uint64_t] ...)')
@@ -5870,6 +5882,63 @@ out:
        mstate->dtms_scratch_ptr = old;
 }
 
+static void
+dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
+    size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
+{
+       volatile uint16_t *flags;
+       uint64_t val = *valp;
+       size_t valoffs = *valoffsp;
+
+       flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
+       ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
+
+       /*
+        * If this is a string, we're going to only load until we find the zero
+        * byte -- after which we'll store zero bytes.
+        */
+       if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
+               char c = '\0' + 1;
+               size_t s;
+
+               for (s = 0; s < size; s++) {
+                       if (c != '\0' && dtkind == DIF_TF_BYREF) {
+                               c = dtrace_load8(val++);
+                       } else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
+                               DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+                               c = dtrace_fuword8((user_addr_t)(uintptr_t)val++);
+                               DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+                               if (*flags & CPU_DTRACE_FAULT)
+                                       break;
+                       }
+
+                       DTRACE_STORE(uint8_t, tomax, valoffs++, c);
+
+                       if (c == '\0' && intuple)
+                               break;
+               }
+       } else {
+               uint8_t c;
+               while (valoffs < end) {
+                       if (dtkind == DIF_TF_BYREF) {
+                               c = dtrace_load8(val++);
+                       } else if (dtkind == DIF_TF_BYUREF) {
+                               DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+                               c = dtrace_fuword8((user_addr_t)(uintptr_t)val++);
+                               DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+                               if (*flags & CPU_DTRACE_FAULT)
+                                       break;
+                       }
+
+                       DTRACE_STORE(uint8_t, tomax,
+                           valoffs++, c);
+               }
+       }
+
+       *valp = val;
+       *valoffsp = valoffs;
+}
+
 /*
  * If you're looking for the epicenter of DTrace, you just found it.  This
  * is the function called by the provider to fire a probe -- from which all
@@ -6463,7 +6532,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
                                ASSERT(0);
                        }
 
-                       if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
+                       if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) {
                                uintptr_t end = valoffs + size;
 
                                if (tracememsize != 0 &&
@@ -6473,39 +6542,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
                                         tracememsize = 0;
                                 }
 
-                               if (!dtrace_vcanload((void *)(uintptr_t)val,
-                                   &dp->dtdo_rtype, &mstate, vstate))
-                                       continue;
-
-                               /*
-                                * If this is a string, we're going to only
-                                * load until we find the zero byte -- after
-                                * which we'll store zero bytes.
-                                */
-                               if (dp->dtdo_rtype.dtdt_kind ==
-                                   DIF_TYPE_STRING) {
-                                       char c = '\0' + 1;
-                                       int intuple = act->dta_intuple;
-                                       size_t s;
-
-                                       for (s = 0; s < size; s++) {
-                                               if (c != '\0')
-                                                       c = dtrace_load8(val++);
-
-                                               DTRACE_STORE(uint8_t, tomax,
-                                                   valoffs++, c);
-
-                                               if (c == '\0' && intuple)
-                                                       break;
-                                       }
-
+                               if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
+                                   !dtrace_vcanload((void *)(uintptr_t)val,
+                                                    &dp->dtdo_rtype, &mstate, vstate))
+                               {
                                        continue;
                                }
 
-                               while (valoffs < end) {
-                                       DTRACE_STORE(uint8_t, tomax, valoffs++,
-                                           dtrace_load8(val++));
-                               }
+                               dtrace_store_by_ref(dp, tomax, size, &valoffs,
+                                   &val, end, act->dta_intuple,
+                                   dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
+                                   DIF_TF_BYREF: DIF_TF_BYUREF);
 
                                continue;
                        }
@@ -8663,7 +8710,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
                    "expected 'ret' as last DIF instruction\n");
        }
 
-       if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) {
+       if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
                /*
                 * If we're not returning by reference, the size must be either
                 * 0 or the size of one of the base types.
@@ -10055,12 +10102,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
                case DTRACEACT_PRINTA:
                case DTRACEACT_SYSTEM:
                case DTRACEACT_FREOPEN:
+               case DTRACEACT_DIFEXPR:
                        /*
                         * We know that our arg is a string -- turn it into a
                         * format.
                         */
                        if (arg == 0) {
-                               ASSERT(desc->dtad_kind == DTRACEACT_PRINTA);
+                               ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
+                                      desc->dtad_kind == DTRACEACT_DIFEXPR);
                                format = 0;
                        } else {
                                ASSERT(arg != 0);
@@ -10071,7 +10120,6 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
 
                        /*FALLTHROUGH*/
                case DTRACEACT_LIBACT:
-               case DTRACEACT_DIFEXPR:
                case DTRACEACT_TRACEMEM:
                case DTRACEACT_TRACEMEM_DYNSIZE:
                case DTRACEACT_APPLEBINARY:     /* __APPLE__ */
@@ -12074,15 +12122,19 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
                    (uintptr_t)sec->dofs_offset + offs);
                kind = (dtrace_actkind_t)desc->dofa_kind;
 
-               if (DTRACEACT_ISPRINTFLIKE(kind) &&
-                   (kind != DTRACEACT_PRINTA ||
-                   desc->dofa_strtab != DOF_SECIDX_NONE)) {
+               if ((DTRACEACT_ISPRINTFLIKE(kind) &&
+                   (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) ||
+                   (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE))
+               {
                        dof_sec_t *strtab;
                        char *str, *fmt;
                        uint64_t i;
 
                        /*
-                        * printf()-like actions must have a format string.
+                        * The argument to these actions is an index into the
+                        * DOF string table.  For printf()-like actions, this
+                        * is the format string.  For print(), this is the
+                        * CTF type of the expression result.
                         */
                        if ((strtab = dtrace_dof_sect(dof,
                            DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
@@ -16852,7 +16904,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv
                 * Security restrictions make this operation illegal, if this is enabled DTrace
                 * must refuse to provide any fbt probes.
                 */
-               if (dtrace_is_restricted()) {
+               if (dtrace_fbt_probes_restricted()) {
                        cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");      
                        return (EPERM);
                }
@@ -17007,7 +17059,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv
                 * Security restrictions make this operation illegal, if this is enabled DTrace
                 * must refuse to provide any fbt probes.
                 */
-               if (dtrace_is_restricted()) {
+               if (dtrace_fbt_probes_restricted()) {
                        cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");      
                        return (EPERM);
                }
@@ -17121,7 +17173,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv
 
        case DTRACEIOC_PROCWAITFOR: {
                dtrace_procdesc_t pdesc = {
-                       .p_comm = {0},
+                       .p_name = {0},
                        .p_pid  = -1
                };
 
@@ -17604,7 +17656,6 @@ dtrace_init( void )
                (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
 
                dtrace_isa_init();
-               
                /*
                 * See dtrace_impl.h for a description of dof modes.
                 * The default is lazy dof.
@@ -17645,7 +17696,9 @@ dtrace_init( void )
                if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) {
                        dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
                }
-                               
+
+               dtrace_restriction_policy_load();
+
                gDTraceInited = 1;
 
        } else
index d7588c0caadca4f562cca86f62732da1aaef3d18..4e7ede1d2d9719802b5a4d750859313480ccc382 100644 (file)
 #include <vm/pmap.h>
 #include <vm/vm_map.h> /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */
 
-/* missing prototypes, not exported by Mach */
-extern kern_return_t task_suspend_internal(task_t);
-extern kern_return_t task_resume_internal(task_t);
-
 /*
  * pid/proc
  */
index aa8fb6c1a36ac4441652665b140cf29225e2b79a..ef857094da7cc6c6d055cebd90019ed33d616cdd 100644 (file)
@@ -35,6 +35,7 @@
 #include <sys/dtrace.h>
 #include <sys/dtrace_impl.h>
 #include <sys/proc_internal.h>
+#include <sys/vnode.h>
 #include <kern/debug.h>
 #include <kern/sched_prim.h>
 #include <kern/task.h>
@@ -119,9 +120,40 @@ LIST_HEAD(listhead, dtrace_proc_awaited_entry) dtrace_proc_awaited_head
 
 void (*dtrace_proc_waitfor_exec_ptr)(proc_t*) = NULL;
 
+static int
+dtrace_proc_get_execpath(proc_t *p, char *buffer, int *maxlen)
+{
+       int err = 0, vid = 0;
+       vnode_t tvp = NULLVP, nvp = NULLVP;
+
+       ASSERT(p);
+       ASSERT(buffer);
+       ASSERT(maxlen);
+
+       if ((tvp = p->p_textvp) == NULLVP)
+               return ESRCH;
+
+       vid = vnode_vid(tvp);
+       if ((err = vnode_getwithvid(tvp, vid)) != 0)
+               return err;
+
+       if ((err = vn_getpath_fsenter(tvp, buffer, maxlen)) != 0)
+               return err;
+       vnode_put(tvp);
+
+       if ((err = vnode_lookup(buffer, 0, &nvp, vfs_context_current())) != 0)
+               return err;
+       if (nvp != NULLVP)
+               vnode_put(nvp);
+
+       return 0;
+}
+
+
 static void
 dtrace_proc_exec_notification(proc_t *p) {
        dtrace_proc_awaited_entry_t *entry, *tmp;
+       static char execpath[MAXPATHLEN];
 
        ASSERT(p);
        ASSERT(p->p_pid != -1);
@@ -129,16 +161,31 @@ dtrace_proc_exec_notification(proc_t *p) {
 
        lck_mtx_lock(&dtrace_procwaitfor_lock);
 
-       /*
-        * For each entry, if it has not been matched with a process yet we
-        * try to match it with the newly created process. If they match, the
-        * entry is initialized with the process id and the process task is
-        * suspended. Finally, we wake up the client's waiting thread.
-        */
        LIST_FOREACH_SAFE(entry, &dtrace_proc_awaited_head, entries, tmp) {
-               if ((entry->pdesc->p_pid == -1)
-                   && !strncmp(entry->pdesc->p_comm, &p->p_comm[0], sizeof(p->p_comm)))
-               {
+               /* By default consider we're using p_comm. */
+               char *pname = p->p_comm;
+
+               /* Already matched with another process. */
+               if ((entry->pdesc->p_pid != -1))
+                       continue;
+
+               /* p_comm is too short, use the execpath. */
+               if (entry->pdesc->p_name_length >= MAXCOMLEN) {
+                       /*
+                        * Retrieve the executable path. After the call, length contains
+                        * the length of the string + 1.
+                        */
+                       int length = sizeof(execpath);
+                       if (dtrace_proc_get_execpath(p, execpath, &length) != 0)
+                               continue;
+                       /* Move the cursor to the position after the last / */
+                       pname = &execpath[length - 1];
+                       while (pname != execpath && *pname != '/')
+                               pname--;
+                       pname = (*pname == '/') ? pname + 1 : pname;
+               }
+
+               if (!strcmp(entry->pdesc->p_name, pname)) {
                        entry->pdesc->p_pid = p->p_pid;
                        task_pidsuspend(p->task);
                        wakeup(entry);
@@ -154,7 +201,15 @@ dtrace_proc_waitfor(dtrace_procdesc_t* pdesc) {
        int res;
 
        ASSERT(pdesc);
-       ASSERT(pdesc->p_comm);
+       ASSERT(pdesc->p_name);
+
+       /*
+        * Never trust user input, compute the length of the process name and ensure the
+        * string is null terminated.
+        */
+       pdesc->p_name_length = strnlen(pdesc->p_name, sizeof(pdesc->p_name));
+       if (pdesc->p_name_length >= (int) sizeof(pdesc->p_name))
+               return -1;
 
        lck_mtx_lock(&dtrace_procwaitfor_lock);
 
@@ -240,6 +295,14 @@ dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t))
        kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t));
 }
 
+
+
+
+void
+dtrace_restriction_policy_load(void)
+{
+}
+
 /*
  * Check if DTrace has been restricted by the current security policy.
  */
@@ -254,6 +317,32 @@ dtrace_is_restricted(void)
        return FALSE;
 }
 
+/*
+ * Check if DTrace is running on a machine currently configured for Apple Internal development
+ */
+boolean_t
+dtrace_is_running_apple_internal(void)
+{
+#if CONFIG_CSR
+       if (csr_check(CSR_ALLOW_APPLE_INTERNAL) == 0)
+               return TRUE;
+#endif
+
+       return FALSE;
+}
+
+boolean_t
+dtrace_fbt_probes_restricted(void)
+{
+
+#if CONFIG_CSR
+       if (dtrace_is_restricted() && !dtrace_is_running_apple_internal())
+               return TRUE;
+#endif
+
+       return FALSE;
+}
+
 /*
  * Check if the process can be attached.
  */
@@ -264,7 +353,7 @@ dtrace_can_attach_to_proc(proc_t *proc)
        ASSERT(proc != NULL);
 
 #if CONFIG_CSR
-       if ((cs_entitlement_flags(proc) & CS_GET_TASK_ALLOW) == 0)
+       if (cs_restricted(proc))
                return FALSE;
 #endif
 
index 4417a812ad3bfcba4d09ed92b2cd2bcb821b768d..ef1d9f1e775bd0531597c6ca081272b877b22415 100644 (file)
@@ -165,6 +165,7 @@ typedef struct lockstat_assembly_probe {
                { LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE,      (vm_offset_t *) lck_rw_try_lock_exclusive_lockstat_patch_point },
                { LS_LCK_MTX_LOCK_SPIN_ACQUIRE,         (vm_offset_t *) lck_mtx_lock_spin_lockstat_patch_point },
 #endif
+               /* No assembly patch points for ARM */
 #endif /* CONFIG_DTRACE */
                { LS_LCK_INVALID, NULL }
 };
index 79b907e00c9c2c77467d331c1ad0fb16a44b70b0..3e55851a93c15e230eb5dc5d95461e1b891ac9fe 100644 (file)
@@ -18,7 +18,7 @@ INSTALL_DTRACE_SCRIPTS_LIST = \
        tcp.d \
        unistd.d
 
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
+ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
 INSTALL_DTRACE_SCRIPTS_LIST += mptcp.d
 endif
 
index 5e5c60db54fef8c140622b99517267a710101a96..331f82928a37302e8a7221cc8fd1ee82a6fb0338 100644 (file)
@@ -153,7 +153,7 @@ translator mppsinfo_t < struct mppcb *T> {
 typedef struct mptsesinfo {
        uint16_t        numflows;
        uint16_t        nummpcapflows;
-       connid_t        connid_last;
+       sae_connid_t    connid_last;
        uint8_t         flags;
        struct mptses   *mptses;
 } mptsesinfo_t;
@@ -217,7 +217,7 @@ typedef struct mptsubinfo {
        uint32_t        flags;
        uint32_t        evctl;
        uint32_t        family;
-       connid_t        connid;
+       sae_connid_t    connid;
        uint32_t        rank;
        int32_t         error;
        uint64_t        sndnxt;
index d86772554093a0f5b79e438d0c925e37d1ce3d46..e7cddd544215b2e6bd2c2464b8609700bbb0ccb7 100644 (file)
@@ -59,9 +59,9 @@ inline cpuinfo_t *curcpu = xlate <cpuinfo_t *> (curthread->last_processor);
 #pragma D attributes Stable/Stable/Common curcpu
 #pragma D binding "1.0" curcpu
 
-inline processorid_t cpu = curcpu->cpu_id;
-#pragma D attributes Stable/Stable/Common cpu
-#pragma D binding "1.0" cpu
+/*
+ * XXX: 'cpu' is now a built-in variable in dtrace.
+ */
 
 inline psetid_t pset = curcpu->cpu_pset;
 #pragma D attributes Stable/Stable/Common pset
index 610de106b6ea4e55fbb7469e4baa3e5a16746f4f..f31f21be13eefcba1d01f552dc5512ae81d1680d 100644 (file)
@@ -436,6 +436,10 @@ void sdt_init( void )
                        return;
                }
 
+               if (dtrace_fbt_probes_restricted()) {
+                       return;
+               }
+
                if (MH_MAGIC_KERNEL != _mh_execute_header.magic) {
                        g_sdt_kernctl.mod_address = (vm_address_t)NULL;
                        g_sdt_kernctl.mod_size = 0;
index cde9701e49d00ad15ba6c7d32f2ad42db4be975e..18c16e074b927e8cf90a9e1acd78a24d2b1d52b4 100644 (file)
@@ -948,15 +948,15 @@ sdt_argdesc_t sdt_args[] = {
        { "mptcp", "timer", 1, 1, "struct mptcb *", "mptsinfo_t *" },
        { "mptcp", "error", 0, 0, "struct mptcb *", "mptsinfo_t *" },
        { "mptcp", "connectx", 0, 0, "struct mptses *", "mptsesinfo_t *" },
-       { "mptcp", "connectx", 1, 1, "associd_t", "associd_t" },
+       { "mptcp", "connectx", 1, 1, "sae_associd_t", "sae_associd_t" },
        { "mptcp", "connectx", 2, 2, "struct socket *", "socketinfo_t *" },
        { "mptcp", "disconnectx", 0, 0, "struct mptses *", "mptsesinfo_t *" },
-       { "mptcp", "disconnectx", 1, 1, "associd_t", "associd_t" },
-       { "mptcp", "disconnectx", 2, 2, "connid_t", "connid_t" },
+       { "mptcp", "disconnectx", 1, 1, "sae_associd_t", "sae_associd_t" },
+       { "mptcp", "disconnectx", 2, 2, "sae_connid_t", "sae_connid_t" },
        { "mptcp", "disconnectx", 3, 3, "struct socket *", "sockinfo_t *" },
        { "mptcp", "disconnectx", 4, 4, "struct mptcb *", "mptsinfo_t *" },
        { "mptcp", "peeloff", 0, 0, "struct mptses *", "mptsesinfo_t *" },
-       { "mptcp", "peeloff", 1, 1, "associd_t", "associd_t" },
+       { "mptcp", "peeloff", 1, 1, "sae_associd_t", "sae_associd_t" },
        { "mptcp", "peeloff", 2, 2, "struct socket *", "sockinfo_t *" },
        { NULL, NULL, 0, 0, NULL, NULL }
 };
index e0031d61856ee0435382397c2af5036db0dae69c..b57481d27a993a5ff8dcedc1c6aa8b21f6822d73 100644 (file)
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
+ * Copyright (c) 2008 Sun Microsystems, Inc.  All rights reserved.
+ *
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
 /*     Copyright (c) 1988 AT&T */
 /*       All Rights Reserved   */
 
@@ -155,6 +163,7 @@ enum {
        CRC32,          /* for crc32, with different size operands */
        XADDB,          /* for xaddb */
        MOVSXZ,         /* AMD64 mov sign extend 32 to 64 bit instruction */
+       MOVBE,          /* movbe instruction */
 
 /*
  * MMX/SIMD addressing modes.
@@ -207,9 +216,33 @@ enum {
         XMMX2I,                /* SIMD                         xmm -> xmm, imm, imm */
         XMM2I,         /* SIMD                         xmm, imm, imm */
        XMMFENCE,       /* SIMD lfence or mfence */
-       XMMSFNC         /* SIMD sfence (none or mem) */
+       XMMSFNC,        /* SIMD sfence (none or mem) */
+       XGETBV_XSETBV,
+       VEX_NONE,       /* VEX  no operand */
+       VEX_MO,         /* VEX  mod_rm                         -> implicit reg */
+       VEX_RMrX,       /* VEX  VEX.vvvv, mod_rm               -> mod_reg */
+       VEX_RRX,        /* VEX  VEX.vvvv, mod_reg              -> mod_rm */
+       VEX_RMRX,       /* VEX  VEX.vvvv, mod_rm, imm8[7:4]    -> mod_reg */
+       VEX_MX,         /* VEX  mod_rm                         -> mod_reg */
+       VEX_MXI,        /* VEX  mod_rm, imm8                   -> mod_reg */
+       VEX_XXI,        /* VEX  mod_rm, imm8                   -> VEX.vvvv */
+       VEX_MR,         /* VEX  mod_rm                         -> mod_reg */
+       VEX_RRI,        /* VEX  mod_reg, mod_rm                -> implicit(eflags/r32) */
+       VEX_RX,         /* VEX  mod_reg                        -> mod_rm */
+       VEX_RR,         /* VEX  mod_rm                         -> mod_reg */
+       VEX_RRi,        /* VEX  mod_rm, imm8                   -> mod_reg */
+       VEX_RM,         /* VEX  mod_reg                        -> mod_rm */
+       VEX_RIM,        /* VEX  mod_reg, imm8                  -> mod_rm */
+       VEX_RRM,        /* VEX  VEX.vvvv, mod_reg              -> mod_rm */
+       VEX_RMX,        /* VEX  VEX.vvvv, mod_rm               -> mod_reg */
 };
 
+/*
+ * VEX prefixes
+ */
+#define VEX_2bytes     0xC5    /* the first byte of two-byte form */
+#define VEX_3bytes     0xC4    /* the first byte of three-byte form */
+
 #define        FILL    0x90    /* Fill byte used for alignment (nop)   */
 
 /*
@@ -418,6 +451,11 @@ const char *const dis_XMMREG[16] = {
     "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15"
 };
 
+const char *const dis_YMMREG[16] = {
+    "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7",
+    "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14", "%ymm15"
+};
+
 const char *const dis_SEGREG[16] = {
        "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "<reserved>", "<reserved>",
        "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "<reserved>", "<reserved>"
@@ -430,7 +468,12 @@ const char *const dis_PREDSUFFIX[8] = {
        "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord"
 };
 
-
+const char *const dis_AVXvgrp7[3][8] = {
+       /*0     1       2               3               4               5       6               7*/
+/*71*/ {"",    "",     "vpsrlw",       "",             "vpsraw",       "",     "vpsllw",       ""},
+/*72*/ {"",    "",     "vpsrld",       "",             "vpsrad",       "",     "vpslld",       ""},
+/*73*/ {"",    "",     "vpsrlq",       "vpsrldq",      "",             "",     "vpsllq",       "vpslldq"}
+};
 
 #endif /* DIS_TEXT */
 
@@ -462,7 +505,7 @@ const instable_t dis_op0F00[8] = {
  */
 const instable_t dis_op0F01[8] = {
 
-/*  [0]  */    TNSZ("sgdt",MO,6),      TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",MO,6),        TNSZ("lidt",MO,6),
+/*  [0]  */    TNSZ("sgdt",MO,6),      TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",XGETBV_XSETBV,6),     TNSZ("lidt",MO,6),
 /*  [4]  */    TNSZ("smsw",M,2),       INVALID,                TNSZ("lmsw",M,2),       TNS("invlpg",SWAPGS),
 };
 
@@ -472,7 +515,7 @@ const instable_t dis_op0F01[8] = {
 const instable_t dis_op0F18[8] = {
 
 /*  [0]  */    TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF),
-/*  [4]  */    INVALID,                INVALID,                INVALID,                INVALID,
+/*  [4]  */    TNSZ("xsave",M,512),    TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC),
 };
 
 /*
@@ -643,6 +686,88 @@ const instable_t dis_opSIMDdata16[256] = {
 /*  [FC]  */   TNSZ("paddb",XMM,16),   TNSZ("paddw",XMM,16),   TNSZ("paddd",XMM,16),   INVALID,
 };
 
+const instable_t dis_opAVX660F[256] = {
+/*  [00]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [04]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [08]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [0C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [10]  */   TNSZ("vmovupd",VEX_MX,16),      TNSZ("vmovupd",VEX_RX,16),      TNSZ("vmovlpd",VEX_RMrX,8),     TNSZ("vmovlpd",VEX_RM,8),
+/*  [14]  */   TNSZ("vunpcklpd",VEX_RMrX,16),TNSZ("vunpckhpd",VEX_RMrX,16),TNSZ("vmovhpd",VEX_RMrX,8), TNSZ("vmovhpd",VEX_RM,8),
+/*  [18]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [1C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [20]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [24]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [28]  */   TNSZ("vmovapd",VEX_MX,16),      TNSZ("vmovapd",VEX_RX,16),      INVALID,                TNSZ("vmovntpd",VEX_RM,16),
+/*  [2C]  */   INVALID,                INVALID,                TNSZ("vucomisd",VEX_MX,8),TNSZ("vcomisd",VEX_MX,8),
+
+/*  [30]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [34]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [38]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [3C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [40]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [44]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [48]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [4C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [50]  */   TNS("vmovmskpd",VEX_MR),        TNSZ("vsqrtpd",VEX_MX,16),      INVALID,                INVALID,
+/*  [54]  */   TNSZ("vandpd",VEX_RMrX,16),     TNSZ("vandnpd",VEX_RMrX,16),    TNSZ("vorpd",VEX_RMrX,16),      TNSZ("vxorpd",VEX_RMrX,16),
+/*  [58]  */   TNSZ("vaddpd",VEX_RMrX,16),     TNSZ("vmulpd",VEX_RMrX,16),     TNSZ("vcvtpd2ps",VEX_MX,16),TNSZ("vcvtps2dq",VEX_MX,16),
+/*  [5C]  */   TNSZ("vsubpd",VEX_RMrX,16),     TNSZ("vminpd",VEX_RMrX,16),     TNSZ("vdivpd",VEX_RMrX,16),     TNSZ("vmaxpd",VEX_RMrX,16),
+
+/*  [60]  */   TNSZ("vpunpcklbw",VEX_RMrX,16),TNSZ("vpunpcklwd",VEX_RMrX,16),TNSZ("vpunpckldq",VEX_RMrX,16),TNSZ("vpacksswb",VEX_RMrX,16),
+/*  [64]  */   TNSZ("vpcmpgtb",VEX_RMrX,16),   TNSZ("vpcmpgtw",VEX_RMrX,16),   TNSZ("vpcmpgtd",VEX_RMrX,16),   TNSZ("vpackuswb",VEX_RMrX,16),
+/*  [68]  */   TNSZ("vpunpckhbw",VEX_RMrX,16),TNSZ("vpunpckhwd",VEX_RMrX,16),TNSZ("vpunpckhdq",VEX_RMrX,16),TNSZ("vpackssdw",VEX_RMrX,16),
+/*  [6C]  */   TNSZ("vpunpcklqdq",VEX_RMrX,16),TNSZ("vpunpckhqdq",VEX_RMrX,16),TNSZ("vmovd",VEX_MX,4),TNSZ("vmovdqa",VEX_MX,16),
+
+/*  [70]  */   TNSZ("vpshufd",VEX_MXI,16),     TNSZ("vgrp71",VEX_XXI,16),      TNSZ("vgrp72",VEX_XXI,16),              TNSZ("vgrp73",VEX_XXI,16),
+/*  [74]  */   TNSZ("vpcmpeqb",VEX_RMrX,16),   TNSZ("vpcmpeqw",VEX_RMrX,16),   TNSZ("vpcmpeqd",VEX_RMrX,16),   INVALID,
+/*  [78]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [7C]  */   TNSZ("vhaddpd",VEX_RMrX,16),    TNSZ("vhsubpd",VEX_RMrX,16),    TNSZ("vmovd",VEX_RR,4), TNSZ("vmovdqa",VEX_RX,16),
+
+/*  [80]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [84]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [88]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [8C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [90]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [94]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [98]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [9C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [A0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [AC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [B0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [BC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [C0]  */   INVALID,                INVALID,                TNSZ("vcmppd",VEX_RMRX,16),     INVALID,
+/*  [C4]  */   TNSZ("vpinsrw",VEX_RMRX,2),TNS("vpextrw",VEX_MR),       TNSZ("vshufpd",VEX_RMRX,16),    INVALID,
+/*  [C8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [CC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [D0]  */   TNSZ("vaddsubpd",VEX_RMrX,16),TNSZ("vpsrlw",VEX_RMrX,16),       TNSZ("vpsrld",VEX_RMrX,16),     TNSZ("vpsrlq",VEX_RMrX,16),
+/*  [D4]  */   TNSZ("vpaddq",VEX_RMrX,16),     TNSZ("vpmullw",VEX_RMrX,16),    TNSZ("vmovq",VEX_RX,8), TNS("vpmovmskb",VEX_MR),
+/*  [D8]  */   TNSZ("vpsubusb",VEX_RMrX,16),   TNSZ("vpsubusw",VEX_RMrX,16),   TNSZ("vpminub",VEX_RMrX,16),    TNSZ("vpand",VEX_RMrX,16),
+/*  [DC]  */   TNSZ("vpaddusb",VEX_RMrX,16),   TNSZ("vpaddusw",VEX_RMrX,16),   TNSZ("vpmaxub",VEX_RMrX,16),    TNSZ("vpandn",VEX_RMrX,16),
+
+/*  [E0]  */   TNSZ("vpavgb",VEX_RMrX,16),     TNSZ("vpsraw",VEX_RMrX,16),     TNSZ("vpsrad",VEX_RMrX,16),     TNSZ("vpavgw",VEX_RMrX,16),
+/*  [E4]  */   TNSZ("vpmulhuw",VEX_RMrX,16),   TNSZ("vpmulhw",VEX_RMrX,16),    TNSZ("vcvttpd2dq",VEX_MX,16),TNSZ("vmovntdq",VEX_RM,16),
+/*  [E8]  */   TNSZ("vpsubsb",VEX_RMrX,16),    TNSZ("vpsubsw",VEX_RMrX,16),    TNSZ("vpminsw",VEX_RMrX,16),    TNSZ("vpor",VEX_RMrX,16),
+/*  [EC]  */   TNSZ("vpaddsb",VEX_RMrX,16),    TNSZ("vpaddsw",VEX_RMrX,16),    TNSZ("vpmaxsw",VEX_RMrX,16),    TNSZ("vpxor",VEX_RMrX,16),
+
+/*  [F0]  */   INVALID,                TNSZ("vpsllw",VEX_RMrX,16),     TNSZ("vpslld",VEX_RMrX,16),     TNSZ("vpsllq",VEX_RMrX,16),
+/*  [F4]  */   TNSZ("vpmuludq",VEX_RMrX,16),   TNSZ("vpmaddwd",VEX_RMrX,16),   TNSZ("vpsadbw",VEX_RMrX,16),    TNS("vmaskmovdqu",VEX_MX),
+/*  [F8]  */   TNSZ("vpsubb",VEX_RMrX,16),     TNSZ("vpsubw",VEX_RMrX,16),     TNSZ("vpsubd",VEX_RMrX,16),     TNSZ("vpsubq",VEX_RMrX,16),
+/*  [FC]  */   TNSZ("vpaddb",VEX_RMrX,16),     TNSZ("vpaddw",VEX_RMrX,16),     TNSZ("vpaddd",VEX_RMrX,16),     INVALID,
+};
+
 /*
  *     Decode table for SIMD instructions with the repnz (0xf2) prefix.
  */
@@ -728,6 +853,88 @@ const instable_t dis_opSIMDrepnz[256] = {
 /*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
 };
 
+const instable_t dis_opAVXF20F[256] = {
+/*  [00]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [04]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [08]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [0C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [10]  */   TNSZ("vmovsd",VEX_RMrX,8),      TNSZ("vmovsd",VEX_RRX,8),       TNSZ("vmovddup",VEX_MX,8),      INVALID,
+/*  [14]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [18]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [1C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [20]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [24]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [28]  */   INVALID,                INVALID,                TNSZ("vcvtsi2sd",VEX_RMrX,4),INVALID,
+/*  [2C]  */   TNSZ("vcvttsd2si",VEX_MR,8),TNSZ("vcvtsd2si",VEX_MR,8),INVALID,         INVALID,
+
+/*  [30]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [34]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [38]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [3C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [40]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [44]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [48]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [4C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [50]  */   INVALID,                TNSZ("vsqrtsd",VEX_RMrX,8),     INVALID,                INVALID,
+/*  [54]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [58]  */   TNSZ("vaddsd",VEX_RMrX,8),      TNSZ("vmulsd",VEX_RMrX,8),      TNSZ("vcvtsd2ss",VEX_RMrX,8),   INVALID,
+/*  [5C]  */   TNSZ("vsubsd",VEX_RMrX,8),      TNSZ("vminsd",VEX_RMrX,8),      TNSZ("vdivsd",VEX_RMrX,8),      TNSZ("vmaxsd",VEX_RMrX,8),
+
+/*  [60]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [64]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [68]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [6C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [70]  */   TNSZ("vpshuflw",VEX_MXI,16),INVALID,            INVALID,                INVALID,
+/*  [74]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [78]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [7C]  */   TNSZ("vhaddps",VEX_RMrX,8),     TNSZ("vhsubps",VEX_RMrX,8),     INVALID,                INVALID,
+
+/*  [80]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [84]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [88]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [0C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [90]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [94]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [98]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [9C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [A0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [AC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [B0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [BC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [C0]  */   INVALID,                INVALID,                TNSZ("vcmpsd",VEX_RMRX,8),      INVALID,
+/*  [C4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [C8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [CC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [D0]  */   TNSZ("vaddsubps",VEX_RMrX,8),   INVALID,                INVALID,                INVALID,
+/*  [D4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [DC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [E0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E4]  */   INVALID,                INVALID,                TNSZ("vcvtpd2dq",VEX_MX,16),INVALID,
+/*  [E8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [EC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [F0]  */   TNSZ("vlddqu",VEX_MX,16),       INVALID,                INVALID,                INVALID,
+/*  [F4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+};
+
 /*
  *     Decode table for SIMD instructions with the repz (0xf3) prefix.
  */
@@ -813,6 +1020,103 @@ const instable_t dis_opSIMDrepz[256] = {
 /*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
 };
 
+const instable_t dis_opAVXF30F[256] = {
+/*  [00]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [04]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [08]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [0C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [10]  */   TNSZ("vmovss",VEX_RMrX,4),      TNSZ("vmovss",VEX_RRX,4),       TNSZ("vmovsldup",VEX_MX,4),     INVALID,
+/*  [14]  */   INVALID,                INVALID,                TNSZ("vmovshdup",VEX_MX,4),     INVALID,
+/*  [18]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [1C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [20]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [24]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [28]  */   INVALID,                INVALID,                TNSZ("vcvtsi2ss",VEX_RMrX,4),INVALID,
+/*  [2C]  */   TNSZ("vcvttss2si",VEX_MR,4),TNSZ("vcvtss2si",VEX_MR,4),INVALID,         INVALID,
+
+/*  [30]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [34]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [38]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [3C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [40]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [44]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [48]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [4C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [50]  */   INVALID,                TNSZ("vsqrtss",VEX_RMrX,4),     TNSZ("vrsqrtss",VEX_RMrX,4),    TNSZ("vrcpss",VEX_RMrX,4),
+/*  [54]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [58]  */   TNSZ("vaddss",VEX_RMrX,4),      TNSZ("vmulss",VEX_RMrX,4),      TNSZ("vcvtss2sd",VEX_RMrX,4),   TNSZ("vcvttps2dq",VEX_MX,16),
+/*  [5C]  */   TNSZ("vsubss",VEX_RMrX,4),      TNSZ("vminss",VEX_RMrX,4),      TNSZ("vdivss",VEX_RMrX,4),      TNSZ("vmaxss",VEX_RMrX,4),
+
+/*  [60]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [64]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [68]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [6C]  */   INVALID,                INVALID,                INVALID,                TNSZ("vmovdqu",VEX_MX,16),
+
+/*  [70]  */   TNSZ("vpshufhw",VEX_MXI,16),INVALID,            INVALID,                INVALID,
+/*  [74]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [78]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [7C]  */   INVALID,                INVALID,                TNSZ("vmovq",VEX_MX,8), TNSZ("vmovdqu",VEX_RX,16),
+
+/*  [80]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [84]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [88]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [0C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [90]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [94]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [98]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [9C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [A0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [AC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [B0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [BC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [C0]  */   INVALID,                INVALID,                TNSZ("vcmpss",VEX_RMRX,4),      INVALID,
+/*  [C4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [C8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [CC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [D0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [DC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [E0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E4]  */   INVALID,                INVALID,                TNSZ("vcvtdq2pd",VEX_MX,8),     INVALID,
+/*  [E8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [EC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [F0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+};
+
+/*
+ * The following two tables are used to encode crc32 and movbe
+ * since they share the same opcodes.
+ */
+const instable_t dis_op0F38F0[2] = {
+/*  [00]  */   TNS("crc32b",CRC32),
+               TS("movbe",MOVBE),
+};
+
+const instable_t dis_op0F38F1[2] = {
+/*  [00]  */   TS("crc32",CRC32),
+               TS("movbe",MOVBE),
+};
+
+
 const instable_t dis_op0F38[256] = {
 /*  [00]  */   TNSZ("pshufb",XMM_66o,16),TNSZ("phaddw",XMM_66o,16),TNSZ("phaddd",XMM_66o,16),TNSZ("phaddsw",XMM_66o,16),
 /*  [04]  */   TNSZ("pmaddubsw",XMM_66o,16),TNSZ("phsubw",XMM_66o,16), TNSZ("phsubd",XMM_66o,16),TNSZ("phsubsw",XMM_66o,16),
@@ -895,6 +1199,87 @@ const instable_t dis_op0F38[256] = {
 /*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
 };
 
+const instable_t dis_opAVX660F38[256] = {
+/*  [00]  */   TNSZ("vpshufb",VEX_RMrX,16),TNSZ("vphaddw",VEX_RMrX,16),TNSZ("vphaddd",VEX_RMrX,16),TNSZ("vphaddsw",VEX_RMrX,16),
+/*  [04]  */   TNSZ("vpmaddubsw",VEX_RMrX,16),TNSZ("vphsubw",VEX_RMrX,16),     TNSZ("vphsubd",VEX_RMrX,16),TNSZ("vphsubsw",VEX_RMrX,16),
+/*  [08]  */   TNSZ("vpsignb",VEX_RMrX,16),TNSZ("vpsignw",VEX_RMrX,16),TNSZ("vpsignd",VEX_RMrX,16),TNSZ("vpmulhrsw",VEX_RMrX,16),
+/*  [0C]  */   TNSZ("vpermilps",VEX_RMrX,8),TNSZ("vpermilpd",VEX_RMrX,16),TNSZ("vtestps",VEX_RRI,8),   TNSZ("vtestpd",VEX_RRI,16),
+
+/*  [10]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [14]  */   INVALID,                INVALID,                INVALID,                TNSZ("vptest",VEX_RRI,16),
+/*  [18]  */   TNSZ("vbroadcastss",VEX_MX,4),TNSZ("vbroadcastsd",VEX_MX,8),TNSZ("vbroadcastf128",VEX_MX,16),INVALID,
+/*  [1C]  */   TNSZ("vpabsb",VEX_MX,16),TNSZ("vpabsw",VEX_MX,16),TNSZ("vpabsd",VEX_MX,16),INVALID,
+
+/*  [20]  */   TNSZ("vpmovsxbw",VEX_MX,16),TNSZ("vpmovsxbd",VEX_MX,16),TNSZ("vpmovsxbq",VEX_MX,16),TNSZ("vpmovsxwd",VEX_MX,16),
+/*  [24]  */   TNSZ("vpmovsxwq",VEX_MX,16),TNSZ("vpmovsxdq",VEX_MX,16),INVALID,        INVALID,
+/*  [28]  */   TNSZ("vpmuldq",VEX_RMrX,16),TNSZ("vpcmpeqq",VEX_RMrX,16),TNSZ("vmovntdqa",VEX_MX,16),TNSZ("vpackusdw",VEX_RMrX,16),
+/*  [2C]  */   TNSZ("vmaskmovps",VEX_RMrX,8),TNSZ("vmaskmovpd",VEX_RMrX,16),TNSZ("vmaskmovps",VEX_RRM,8),TNSZ("vmaskmovpd",VEX_RRM,16),
+
+/*  [30]  */   TNSZ("vpmovzxbw",VEX_MX,16),TNSZ("vpmovzxbd",VEX_MX,16),TNSZ("vpmovzxbq",VEX_MX,16),TNSZ("vpmovzxwd",VEX_MX,16),
+/*  [34]  */   TNSZ("vpmovzxwq",VEX_MX,16),TNSZ("vpmovzxdq",VEX_MX,16),TNSZ("vpermd",VEX_RMrX,16),TNSZ("vpcmpgtq",VEX_RMrX,16),
+/*  [38]  */   TNSZ("vpminsb",VEX_RMrX,16),TNSZ("vpminsd",VEX_RMrX,16),TNSZ("vpminuw",VEX_RMrX,16),TNSZ("vpminud",VEX_RMrX,16),
+/*  [3C]  */   TNSZ("vpmaxsb",VEX_RMrX,16),TNSZ("vpmaxsd",VEX_RMrX,16),TNSZ("vpmaxuw",VEX_RMrX,16),TNSZ("vpmaxud",VEX_RMrX,16),
+
+/*  [40]  */   TNSZ("vpmulld",VEX_RMrX,16),TNSZ("vphminposuw",VEX_MX,16),INVALID,      INVALID,
+/*  [44]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [48]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [4C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [50]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [54]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [58]  */   TNSZ("vpbroadcastd",VEX_MX,16),TNSZ("vpbroadcastq",VEX_MX,16),TNSZ("vbroadcasti128",VEX_MX,16),INVALID,
+/*  [5C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [60]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [64]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [68]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [6C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [70]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [74]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [78]  */   TNSZ("vpbroadcastb",VEX_MX,16),TNSZ("vpbroadcastw",VEX_MX,16),INVALID,  INVALID,
+/*  [7C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [80]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [84]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [88]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [8C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [90]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [94]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [98]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [9C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [A0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [AC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [B0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [BC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [C0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [C4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [C8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [CC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [D0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D8]  */   INVALID,                INVALID,                INVALID,                TNSZ("vaesimc",VEX_MX,16),
+/*  [DC]  */   TNSZ("vaesenc",VEX_RMrX,16),TNSZ("vaesenclast",VEX_RMrX,16),TNSZ("vaesdec",VEX_RMrX,16),TNSZ("vaesdeclast",VEX_RMrX,16),
+
+/*  [E0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [EC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F0]  */   IND(dis_op0F38F0),      IND(dis_op0F38F1),      INVALID,                INVALID,
+/*  [F4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+};
+
 const instable_t dis_op0F3A[256] = {
 /*  [00]  */   INVALID,                INVALID,                INVALID,                INVALID,
 /*  [04]  */   INVALID,                INVALID,                INVALID,                INVALID,
@@ -977,6 +1362,88 @@ const instable_t dis_op0F3A[256] = {
 /*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
 };
 
+const instable_t dis_opAVX660F3A[256] = {
+/*  [00]  */   TNSZ("vpermq",VEX_MXI,16),TNSZ("vpermpd",VEX_MXI,16),TNSZ("vpblendd",VEX_RMRX,16),INVALID,
+/*  [04]  */   TNSZ("vpermilps",VEX_MXI,8),TNSZ("vpermilpd",VEX_MXI,16),TNSZ("vperm2f128",VEX_RMRX,16),INVALID,
+/*  [08]  */   TNSZ("vroundps",VEX_MXI,16),TNSZ("vroundpd",VEX_MXI,16),TNSZ("vroundss",VEX_RMRX,16),TNSZ("vroundsd",VEX_RMRX,16),
+/*  [0C]  */   TNSZ("vblendps",VEX_RMRX,16),TNSZ("vblendpd",VEX_RMRX,16),TNSZ("vpblendw",VEX_RMRX,16),TNSZ("vpalignr",VEX_RMRX,16),
+
+/*  [10]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [14]  */   TNSZ("vpextrb",VEX_RRi,8),TNSZ("vpextrw",VEX_RRi,16),TNSZ("vpextrd",VEX_RRi,16),TNSZ("vextractps",VEX_RM,16),
+/*  [18]  */   TNSZ("vinsertf128",VEX_RMRX,16),TNSZ("vextractf128",VEX_RX,16),INVALID,         INVALID,
+/*  [1C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [20]  */   TNSZ("vpinsrb",VEX_RMRX,8),TNSZ("vinsertps",VEX_RMRX,16),TNSZ("vpinsrd",VEX_RMRX,16),INVALID,
+/*  [24]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [28]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [2C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [30]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [34]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [38]  */   TNSZ("vinserti128",VEX_RMRX,16),TNSZ("vextracti128",VEX_RIM,16),INVALID,                INVALID,
+/*  [3C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [40]  */   TNSZ("vdpps",VEX_RMRX,16),TNSZ("vdppd",VEX_RMRX,16),TNSZ("vmpsadbw",VEX_RMRX,16),INVALID,
+/*  [44]  */   TNSZ("vpclmulqdq",VEX_RMRX,16),INVALID,         INVALID,                INVALID,
+/*  [48]  */   INVALID,                INVALID,                TNSZ("vblendvps",VEX_RMRX,8),   TNSZ("vblendvpd",VEX_RMRX,16),
+/*  [4C]  */   TNSZ("vpblendvb",VEX_RMRX,16),INVALID,          INVALID,                INVALID,
+
+/*  [50]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [54]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [58]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [5C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [60]  */   TNSZ("vpcmpestrm",VEX_MXI,16),TNSZ("vpcmpestri",VEX_MXI,16),TNSZ("vpcmpistrm",VEX_MXI,16),TNSZ("vpcmpistri",VEX_MXI,16),
+/*  [64]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [68]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [6C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [70]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [74]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [78]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [7C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [80]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [84]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [88]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [8C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [90]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [94]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [98]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [9C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [A0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [AC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [B0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [BC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [C0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [C4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [C8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [CC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [D0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [DC]  */   INVALID,                INVALID,                INVALID,                TNSZ("vaeskeygenassist",VEX_MXI,16),
+
+/*  [E0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [EC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+
+/*  [F0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+};
+
 /*
  *     Decode table for 0x0F opcodes
  */
@@ -1065,6 +1532,88 @@ const instable_t dis_op0F[16][16] = {
 /*  [FC]  */   TNSZ("paddb",MMO,8),    TNSZ("paddw",MMO,8),    TNSZ("paddd",MMO,8),    INVALID,
 } };
 
+const instable_t dis_opAVX0F[16][16] = {
+{
+/*  [00]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [04]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [08]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [0C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [10]  */   TNSZ("vmovups",VEX_MX,16),      TNSZ("vmovups",VEX_RM,16),TNSZ("vmovlps",VEX_RMrX,8),   TNSZ("vmovlps",VEX_RM,8),
+/*  [14]  */   TNSZ("vunpcklps",VEX_RMrX,16),TNSZ("vunpckhps",VEX_RMrX,16),TNSZ("vmovhps",VEX_RMrX,8),TNSZ("vmovhps",VEX_RM,8),
+/*  [18]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [1C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [20]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [24]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [28]  */   TNSZ("vmovaps",VEX_MX,16),      TNSZ("vmovaps",VEX_RX,16),INVALID,              TNSZ("vmovntps",VEX_RM,16),
+/*  [2C]  */   INVALID,                INVALID,                TNSZ("vucomiss",VEX_MX,4),TNSZ("vcomiss",VEX_MX,4),
+}, {
+/*  [30]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [34]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [38]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [3C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [40]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [44]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [48]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [4C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [50]  */   TNS("vmovmskps",VEX_MR),        TNSZ("vsqrtps",VEX_MX,16),      TNSZ("vrsqrtps",VEX_MX,16),TNSZ("vrcpps",VEX_MX,16),
+/*  [54]  */   TNSZ("vandps",VEX_RMrX,16),     TNSZ("vandnps",VEX_RMrX,16),    TNSZ("vorps",VEX_RMrX,16),      TNSZ("vxorps",VEX_RMrX,16),
+/*  [58]  */   TNSZ("vaddps",VEX_RMrX,16),     TNSZ("vmulps",VEX_RMrX,16),     TNSZ("vcvtps2pd",VEX_MX,8),TNSZ("vcvtdq2ps",VEX_MX,16),
+/*  [5C]  */   TNSZ("vsubps",VEX_RMrX,16),     TNSZ("vminps",VEX_RMrX,16),     TNSZ("vdivps",VEX_RMrX,16),     TNSZ("vmaxps",VEX_RMrX,16),
+}, {
+/*  [60]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [64]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [68]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [6C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [70]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [74]  */   INVALID,                INVALID,                INVALID,                TNS("vzeroupper", VEX_NONE),
+/*  [78]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [7C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [80]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [84]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [88]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [8C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [90]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [94]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [98]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [9C]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [A0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [A8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [AC]  */   INVALID,                INVALID,                TNSZ("vldmxcsr",VEX_MO,2),              INVALID,
+}, {
+/*  [B0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [B8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [BC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [C0]  */   INVALID,                INVALID,                TNSZ("vcmpps",VEX_RMRX,16),INVALID,
+/*  [C4]  */   INVALID,                INVALID,                TNSZ("vshufps",VEX_RMRX,16),INVALID,
+/*  [C8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [CC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [D0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [D8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [DC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [E0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [E8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [EC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+}, {
+/*  [F0]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F4]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [F8]  */   INVALID,                INVALID,                INVALID,                INVALID,
+/*  [FC]  */   INVALID,                INVALID,                INVALID,                INVALID,
+} };
 
 /*
  *     Decode table for 0x80 opcodes
@@ -1416,6 +1965,29 @@ const instable_t dis_distable[16][16] = {
 #define        REX_X 0x02      /* high order bit extension of SIB index field */
 #define        REX_B 0x01      /* extends ModRM r_m, SIB base, or opcode reg */
 
+/*
+ * These are the individual fields of a VEX prefix.
+ */
+#define        VEX_R 0x08      /* REX.R in 1's complement form */
+#define        VEX_X 0x04      /* REX.X in 1's complement form */
+#define        VEX_B 0x02      /* REX.B in 1's complement form */
+/* Vector Length, 0: scalar or 128-bit vector, 1: 256-bit vector */
+#define        VEX_L 0x04
+#define        VEX_W 0x08      /* opcode specific, use like REX.W */
+#define        VEX_m 0x1F      /* VEX m-mmmm field */
+#define        VEX_v 0x78      /* VEX register specifier */
+#define        VEX_p 0x03      /* VEX pp field, opcode extension */
+
+/* VEX m-mmmm field, only used by three bytes prefix */
+#define        VEX_m_0F 0x01   /* implied 0F leading opcode byte */
+#define        VEX_m_0F38 0x02 /* implied 0F 38 leading opcode byte */
+#define        VEX_m_0F3A 0x03 /* implied 0F 3A leading opcode byte */
+
+/* VEX pp field, providing equivalent functionality of a SIMD prefix */
+#define        VEX_p_66 0x01
+#define        VEX_p_F3 0x02
+#define        VEX_p_F2 0x03
+
 /*
  * Even in 64 bit mode, usually only 4 byte immediate operands are supported.
  */
@@ -1442,6 +2014,7 @@ static int isize64[] = {1, 2, 4, 8};
 #define        DEBUG_OPND      6       /* "value" used to indicate a debug reg */
 #define        TEST_OPND       7       /* "value" used to indicate a test reg */
 #define        WORD_OPND       8       /* w-bit value indicating word size reg */
+#define        YMM_OPND        9       /* "value" used to indicate a ymm reg */
 
 /*
  * Get the next byte and separate the op code into the high and low nibbles.
@@ -1521,11 +2094,30 @@ dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m)
        } else {
                if (reg != NULL && (REX_R & rex_prefix) != 0)
                        *reg += 8;
-               if (r_m != NULL && (REX_B & rex_prefix) != 0)
+               if (r_m != NULL && (REX_B & rex_prefix) != 0)
+                       *r_m += 8;
+       }
+}
+
+/*
+ * Adjust register selection based on any VEX prefix bits present.
+ * Notes: VEX.R, VEX.X and VEX.B use the inverted form compared with REX prefix
+ */
+/*ARGSUSED*/
+static void
+dtrace_vex_adjust(uint_t vex_byte1, uint_t mode, uint_t *reg, uint_t *r_m)
+{
+#pragma unused (mode)
+       if (reg != NULL && r_m == NULL) {
+               if (!(vex_byte1 & VEX_B))
+                       *reg += 8;
+       } else {
+               if (reg != NULL && ((VEX_R & vex_byte1) == 0))
+                       *reg += 8;
+               if (r_m != NULL && ((VEX_B & vex_byte1) == 0))
                        *r_m += 8;
        }
 }
-
 /*
  * Get an immediate operand of the given size, with sign extension.
  */
@@ -1553,6 +2145,7 @@ dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex)
                break;
        case MM_OPND:
        case XMM_OPND:
+       case YMM_OPND:
        case SEG_OPND:
        case CONTROL_OPND:
        case DEBUG_OPND:
@@ -1667,6 +2260,9 @@ dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex)
                case XMM_OPND:
                        (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN);
                        break;
+               case YMM_OPND:
+                       (void) strlcat(opnd, dis_YMMREG[r_m], OPLEN);
+                       break;
                case SEG_OPND:
                        (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN);
                        break;
@@ -1940,13 +2536,43 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
        uint_t  lock_prefix = 0;
        uint_t  rep_prefix = 0;
        uint_t  rex_prefix = 0; /* amd64 register extension prefix */
+
+       /*
+        * Intel VEX instruction encoding prefix and fields
+        */
+
+       /* 0xC4 means 3 bytes prefix, 0xC5 means 2 bytes prefix */
+       uint_t vex_prefix = 0;
+
+       /*
+        * VEX prefix byte 1, includes vex.r, vex.x and vex.b
+        * (for 3 bytes prefix)
+        */
+       uint_t vex_byte1 = 0;
+
+       /*
+        * For 32-bit mode, it should prefetch the next byte to
+        * distinguish between AVX and les/lds
+        */
+       uint_t vex_prefetch = 0;
+
+       uint_t vex_m = 0;
+       uint_t vex_v = 0;
+       uint_t vex_p = 0;
+       uint_t vex_R = 1;
+       uint_t vex_X = 1;
+       uint_t vex_B = 1;
+       uint_t vex_W = 0;
+       uint_t vex_L;
+
        size_t  off;
 
        instable_t dp_mmx;
 
        x->d86_len = 0;
        x->d86_rmindex = -1;
-       x->d86_error = 0;
+       x->d86_rex_prefix = 0;
+       x->d86_got_modrm = 0;
 #ifdef DIS_TEXT
        x->d86_numopnds = 0;
        x->d86_seg_prefix = NULL;
@@ -2040,7 +2666,150 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
                        if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0)
                                goto error;
                        dp = (instable_t *)&dis_distable[opcode1][opcode2];
+               } else if (opcode1 == 0xC &&
+                   (opcode2 == 0x4 || opcode2 == 0x5)) {
+                       /* AVX instructions */
+                       vex_prefix = (opcode1 << 4) | opcode2;
+                       x->d86_rex_prefix = 0x40;
+               }
+       } else if (opcode1 == 0xC && (opcode2 == 0x4 || opcode2 == 0x5)) {
+               /* LDS, LES or AVX */
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               vex_prefetch = 1;
+
+               if (mode == REG_ONLY) {
+                       /* AVX */
+                       vex_prefix = (opcode1 << 4) | opcode2;
+                       x->d86_rex_prefix = 0x40;
+                       opcode3 = (((mode << 3) | reg)>>1) & 0x0F;
+                       opcode4 = ((reg << 3) | r_m) & 0x0F;
+               }
+       }
+
+       if (vex_prefix == VEX_2bytes) {
+               if (!vex_prefetch) {
+                       if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0)
+                               goto error;
+               }
+               vex_R = ((opcode3 & VEX_R) & 0x0F) >> 3;
+               vex_L = ((opcode4 & VEX_L) & 0x0F) >> 2;
+               vex_v = (((opcode3 << 4) | opcode4) & VEX_v) >> 3;
+               vex_p = opcode4 & VEX_p;
+               /*
+                * The vex.x and vex.b bits are not defined in two bytes
+                * mode vex prefix, their default values are 1
+                */
+               vex_byte1 = (opcode3 & VEX_R) | VEX_X | VEX_B;
+
+               if (vex_R == 0)
+                       x->d86_rex_prefix |= REX_R;
+
+               if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0)
+                       goto error;
+
+               switch (vex_p) {
+                       case VEX_p_66:
+                               dp = (instable_t *)
+                                   &dis_opAVX660F[(opcode1 << 4) | opcode2];
+                               break;
+                       case VEX_p_F3:
+                               dp = (instable_t *)
+                                   &dis_opAVXF30F[(opcode1 << 4) | opcode2];
+                               break;
+                       case VEX_p_F2:
+                               dp = (instable_t *)
+                                   &dis_opAVXF20F [(opcode1 << 4) | opcode2];
+                               break;
+                       default:
+                               dp = (instable_t *)
+                                   &dis_opAVX0F[opcode1][opcode2];
+
+               }
+
+       } else if (vex_prefix == VEX_3bytes) {
+               if (!vex_prefetch) {
+                       if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0)
+                               goto error;
                }
+               vex_R = (opcode3 & VEX_R) >> 3;
+               vex_X = (opcode3 & VEX_X) >> 2;
+               vex_B = (opcode3 & VEX_B) >> 1;
+               vex_m = (((opcode3 << 4) | opcode4) & VEX_m);
+               vex_byte1 = opcode3 & (VEX_R | VEX_X | VEX_B);
+
+               if (vex_R == 0)
+                       x->d86_rex_prefix |= REX_R;
+               if (vex_X == 0)
+                       x->d86_rex_prefix |= REX_X;
+               if (vex_B == 0)
+                       x->d86_rex_prefix |= REX_B;
+
+               if (dtrace_get_opcode(x, &opcode5, &opcode6) != 0)
+                       goto error;
+               vex_W = (opcode5 & VEX_W) >> 3;
+               vex_L = (opcode6 & VEX_L) >> 2;
+               vex_v = (((opcode5 << 4) | opcode6) & VEX_v) >> 3;
+               vex_p = opcode6 & VEX_p;
+
+               if (vex_W)
+                       x->d86_rex_prefix |= REX_W;
+
+               /* Only these three vex_m values valid; others are reserved */
+               if ((vex_m != VEX_m_0F) && (vex_m != VEX_m_0F38) &&
+                   (vex_m != VEX_m_0F3A))
+                       goto error;
+
+               if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0)
+                       goto error;
+
+               switch (vex_p) {
+                       case VEX_p_66:
+                               if (vex_m == VEX_m_0F) {
+                                       dp = (instable_t *)
+                                           &dis_opAVX660F
+                                           [(opcode1 << 4) | opcode2];
+                               } else if (vex_m == VEX_m_0F38) {
+                                       dp = (instable_t *)
+                                           &dis_opAVX660F38
+                                           [(opcode1 << 4) | opcode2];
+                               } else if (vex_m == VEX_m_0F3A) {
+                                       dp = (instable_t *)
+                                           &dis_opAVX660F3A
+                                           [(opcode1 << 4) | opcode2];
+                               } else {
+                                       goto error;
+                               }
+                               break;
+                       case VEX_p_F3:
+                               if (vex_m == VEX_m_0F) {
+                                       dp = (instable_t *)
+                                           &dis_opAVXF30F
+                                           [(opcode1 << 4) | opcode2];
+                               } else {
+                                       goto error;
+                               }
+                               break;
+                       case VEX_p_F2:
+                               if (vex_m == VEX_m_0F) {
+                                       dp = (instable_t *)
+                                           &dis_opAVXF20F
+                                           [(opcode1 << 4) | opcode2];
+                               } else {
+                                       goto error;
+                               }
+                               break;
+                       default:
+                               dp = (instable_t *)
+                                   &dis_opAVX0F[opcode1][opcode2];
+
+               }
+       }
+
+       if (vex_prefix) {
+               if (vex_L)
+                       wbit = YMM_OPND;
+               else
+                       wbit = XMM_OPND;
        }
 
        /*
@@ -2049,7 +2818,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
         * ignored.
         */
        if (cpu_mode == SIZE64) {
-               if (rex_prefix & REX_W)
+               if ((rex_prefix & REX_W) || vex_W)
                        opnd_size = SIZE64;
                else if (opnd_size_prefix)
                        opnd_size = SIZE16;
@@ -2210,8 +2979,11 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
         * In amd64 bit mode, ARPL opcode is changed to MOVSXD
         * (sign extend 32bit to 64 bit)
         */
-       if (cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3)
+       if ((vex_prefix == 0) && cpu_mode == SIZE64 &&
+           opcode1 == 0x6 && opcode2 == 0x3)
+       {
                dp = (instable_t *)&dis_opMOVSLD;
+       }
 
        /*
         * at this point we should have a correct (or invalid) opcode
@@ -2389,7 +3161,11 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
         * Process operands based on the addressing modes.
         */
        x->d86_mode = cpu_mode;
-       x->d86_rex_prefix = rex_prefix;
+       /*
+        * In vex mode the rex_prefix has no meaning
+        */
+       if (!vex_prefix)
+               x->d86_rex_prefix = rex_prefix;
        x->d86_opnd_size = opnd_size;
        x->d86_addr_size = addr_size;
        vbit = 0;               /* initialize for mem/reg -> reg */
@@ -2688,7 +3464,26 @@ just_mem:
                        }
                }
                /*FALLTHROUGH*/
+       case XGETBV_XSETBV:
+               if (mode == 3) {
+                       if (r_m == 0) {
+#ifdef DIS_TEXT
+                               (void) strncpy(x->d86_mnem, "xgetbv", OPLEN);
+#endif
+                               NOMEM;
+                               break;
+                       } else if (r_m == 1) {
+#ifdef DIS_TEXT
+                               (void) strncpy(x->d86_mnem, "xsetbv", OPLEN);
+#endif
+                               NOMEM;
+                               break;
+                       } else {
+                               goto error;
+                       }
 
+               }
+               /*FALLTHROUGH*/
        case MO:
                /* Similar to M, but only memory (no direct registers) */
                wbit = LONG_OPND;
@@ -2781,6 +3576,9 @@ just_mem:
 
        /* memory or register operand to register */
        case MR:
+               if (vex_prefetch) {
+                       x->d86_got_modrm = 1;
+               }
                wbit = LONG_OPND;
                STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0);
                break;
@@ -3284,7 +4082,7 @@ xmmprm:
 #else
                if (mode != REG_ONLY) {
                        dtrace_rex_adjust(rex_prefix, mode, &reg, &r_m);
-                       dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0);
+                       dtrace_get_operand(x, mode, r_m, LONG_OPND, 0);
                        NOMEM;
                }
 #endif
@@ -3303,15 +4101,27 @@ xmmprm:
 
        case XMMFENCE:
                /*
-                * Only the following exact byte sequences are allowed:
-                *
-                *      0f ae e8        lfence
-                *      0f ae f0        mfence
+                * XRSTOR and LFENCE share the same opcode but differ in mode
                 */
-               if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 &&
-                   (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0)
-                       goto error;
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
 
+               if (mode == REG_ONLY) {
+                       /*
+                        * Only the following exact byte sequences are allowed:
+                        *
+                        *      0f ae e8        lfence
+                        *      0f ae f0        mfence
+                        */
+                       if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 &&
+                           (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0)
+                               goto error;
+               } else {
+#ifdef DIS_TEXT
+                       (void) strncpy(x->d86_mnem, "xrstor", OPLEN);
+#endif
+                       dtrace_rex_adjust(rex_prefix, mode, &reg, &r_m);
+                       dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0);
+               }
                break;
 
 
@@ -3339,6 +4149,371 @@ xmmprm:
                NOMEM;
                break;
 
+       /* AVX instructions */
+       case VEX_MO:
+               /* op(ModR/M.r/m) */
+               x->d86_numopnds = 1;
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+#ifdef DIS_TEXT
+               if ((dp == &dis_opAVX0F[0xA][0xE]) && (reg == 3))
+                       (void) strncpy(x->d86_mnem, "vstmxcsr", OPLEN);
+#endif
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, mode, r_m, wbit, 0);
+               break;
+       case VEX_RMrX:
+               /* ModR/M.reg := op(VEX.vvvv, ModR/M.r/m) */
+               x->d86_numopnds = 3;
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+               if (mode != REG_ONLY) {
+                       if ((dp == &dis_opAVXF20F[0x10]) ||
+                           (dp == &dis_opAVXF30F[0x10])) {
+                               /* vmovsd <m64>, <xmm> */
+                               /* or vmovss <m64>, <xmm> */
+                               x->d86_numopnds = 2;
+                               goto L_VEX_MX;
+                       }
+               }
+
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 2);
+               /*
+                * VEX prefix uses the 1's complement form to encode the
+                * XMM/YMM regs
+                */
+               dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1);
+
+               if ((dp == &dis_opAVXF20F[0x2A]) ||
+                   (dp == &dis_opAVXF30F[0x2A])) {
+                       /*
+                        * vcvtsi2si </r,m>, <xmm>, <xmm> or vcvtsi2ss </r,m>,
+                        * <xmm>, <xmm>
+                        */
+                       wbit = LONG_OPND;
+               }
+#ifdef DIS_TEXT
+               else if ((mode == REG_ONLY) &&
+                   (dp == &dis_opAVX0F[0x1][0x6])) {   /* vmovlhps */
+                       (void) strncpy(x->d86_mnem, "vmovlhps", OPLEN);
+               } else if ((mode == REG_ONLY) &&
+                   (dp == &dis_opAVX0F[0x1][0x2])) {   /* vmovhlps */
+                       (void) strncpy(x->d86_mnem, "vmovhlps", OPLEN);
+               }
+#endif
+               dtrace_get_operand(x, mode, r_m, wbit, 0);
+
+               break;
+
+       case VEX_RRX:
+               /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+               if (mode != REG_ONLY) {
+                       if ((dp == &dis_opAVXF20F[0x11]) ||
+                           (dp == &dis_opAVXF30F[0x11])) {
+                               /* vmovsd <xmm>, <m64> */
+                               /* or vmovss <xmm>, <m64> */
+                               x->d86_numopnds = 2;
+                               goto L_VEX_RM;
+                       }
+               }
+
+               dtrace_get_operand(x, mode, r_m, wbit, 2);
+               dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 0);
+               break;
+
+       case VEX_RMRX:
+               /* ModR/M.reg := op(VEX.vvvv, ModR/M.r_m, imm8[7:4]) */
+               x->d86_numopnds = 4;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 3);
+               dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2);
+               if (dp == &dis_opAVX660F3A[0x18]) {
+                       /* vinsertf128 <imm8>, <xmm>, <ymm>, <ymm> */
+                       dtrace_get_operand(x, mode, r_m, XMM_OPND, 1);
+               } else if ((dp == &dis_opAVX660F3A[0x20]) ||
+                   (dp == & dis_opAVX660F[0xC4])) {
+                       /* vpinsrb <imm8>, <reg/mm>, <xmm>, <xmm> */
+                       /* or vpinsrw <imm8>, <reg/mm>, <xmm>, <xmm> */
+                       dtrace_get_operand(x, mode, r_m, LONG_OPND, 1);
+               } else if (dp == &dis_opAVX660F3A[0x22]) {
+                       /* vpinsrd/q <imm8>, <reg/mm>, <xmm>, <xmm> */
+#ifdef DIS_TEXT
+                       if (vex_W)
+                               x->d86_mnem[6] = 'q';
+#endif
+                       dtrace_get_operand(x, mode, r_m, LONG_OPND, 1);
+               } else {
+                       dtrace_get_operand(x, mode, r_m, wbit, 1);
+               }
+
+               /* one byte immediate number */
+               dtrace_imm_opnd(x, wbit, 1, 0);
+
+               /* vblendvpd, vblendvps, vblendvb use the imm encode the regs */
+               if ((dp == &dis_opAVX660F3A[0x4A]) ||
+                   (dp == &dis_opAVX660F3A[0x4B]) ||
+                   (dp == &dis_opAVX660F3A[0x4C])) {
+#ifdef DIS_TEXT
+                       int regnum = (x->d86_opnd[0].d86_value & 0xF0) >> 4;
+#endif
+                       x->d86_opnd[0].d86_mode = MODE_NONE;
+#ifdef DIS_TEXT
+                       if (vex_L)
+                               (void) strncpy(x->d86_opnd[0].d86_opnd,
+                                   dis_YMMREG[regnum], OPLEN);
+                       else
+                               (void) strncpy(x->d86_opnd[0].d86_opnd,
+                                   dis_XMMREG[regnum], OPLEN);
+#endif
+               }
+               break;
+
+       case VEX_MX:
+               /* ModR/M.reg := op(ModR/M.rm) */
+               x->d86_numopnds = 2;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+L_VEX_MX:
+
+               if ((dp == &dis_opAVXF20F[0xE6]) ||
+                   (dp == &dis_opAVX660F[0x5A]) ||
+                   (dp == &dis_opAVX660F[0xE6])) {
+                       /* vcvtpd2dq <ymm>, <xmm> */
+                       /* or vcvtpd2ps <ymm>, <xmm> */
+                       /* or vcvttpd2dq <ymm>, <xmm> */
+                       dtrace_get_operand(x, REG_ONLY, reg, XMM_OPND, 1);
+                       dtrace_get_operand(x, mode, r_m, wbit, 0);
+               } else if ((dp == &dis_opAVXF30F[0xE6]) ||
+                   (dp == &dis_opAVX0F[0x5][0xA]) ||
+                   (dp == &dis_opAVX660F38[0x58]) ||
+                   (dp == &dis_opAVX660F38[0x59]) ||
+                   (dp == &dis_opAVX660F38[0x78]) ||
+                   (dp == &dis_opAVX660F38[0x79])) {
+                       /* vcvtdq2pd <xmm>, <ymm> */
+                       /* or vcvtps2pd <xmm>, <ymm> */
+                       /* or vbroadcasts* <xmm>, <ymm> */
+                       dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+                       dtrace_get_operand(x, mode, r_m, XMM_OPND, 0);
+               } else if (dp == &dis_opAVX660F[0x6E]) {
+                       /* vmovd/q <reg/mem 32/64>, <xmm> */
+#ifdef DIS_TEXT
+                       if (vex_W)
+                               x->d86_mnem[4] = 'q';
+#endif
+                       dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+                       dtrace_get_operand(x, mode, r_m, LONG_OPND, 0);
+               } else {
+                       dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+                       dtrace_get_operand(x, mode, r_m, wbit, 0);
+               }
+
+               break;
+
+       case VEX_MXI:
+               /* ModR/M.reg := op(ModR/M.rm, imm8) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 2);
+               dtrace_get_operand(x, mode, r_m, wbit, 1);
+
+               /* one byte immediate number */
+               dtrace_imm_opnd(x, wbit, 1, 0);
+               break;
+
+       case VEX_XXI:
+               /* VEX.vvvv := op(ModR/M.rm, imm8) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+#ifdef DIS_TEXT
+               (void) strncpy(x->d86_mnem, dis_AVXvgrp7[opcode2 - 1][reg],
+                   OPLEN);
+#endif
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+               dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2);
+               dtrace_get_operand(x, REG_ONLY, r_m, wbit, 1);
+
+               /* one byte immediate number */
+               dtrace_imm_opnd(x, wbit, 1, 0);
+               break;
+
+       case VEX_MR:
+               /* ModR/M.reg (reg32/64) := op(ModR/M.rm) */
+               if (dp == &dis_opAVX660F[0xC5]) {
+                       /* vpextrw <imm8>, <xmm>, <reg> */
+                       x->d86_numopnds = 2;
+                       vbit = 2;
+               } else {
+                       x->d86_numopnds = 2;
+                       vbit = 1;
+               }
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, vbit);
+               dtrace_get_operand(x, mode, r_m, wbit, vbit - 1);
+
+               if (vbit == 2)
+                       dtrace_imm_opnd(x, wbit, 1, 0);
+
+               break;
+
+       case VEX_RRI:
+               /* implicit(eflags/r32) := op(ModR/M.reg, ModR/M.rm) */
+               x->d86_numopnds = 2;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+               dtrace_get_operand(x, mode, r_m, wbit, 0);
+               break;
+
+       case VEX_RX:
+               /* ModR/M.rm := op(ModR/M.reg) */
+               if (dp == &dis_opAVX660F3A[0x19]) {     /* vextractf128 */
+                       x->d86_numopnds = 3;
+
+                       dtrace_get_modrm(x, &mode, &reg, &r_m);
+                       dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+                       dtrace_get_operand(x, mode, r_m, XMM_OPND, 2);
+                       dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+
+                       /* one byte immediate number */
+                       dtrace_imm_opnd(x, wbit, 1, 0);
+                       break;
+               }
+
+               x->d86_numopnds = 2;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, mode, r_m, wbit, 1);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 0);
+               break;
+
+       case VEX_RR:
+               /* ModR/M.rm := op(ModR/M.reg) */
+               x->d86_numopnds = 2;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+               if (dp == &dis_opAVX660F[0x7E]) {
+                       /* vmovd/q <reg/mem 32/64>, <xmm> */
+#ifdef DIS_TEXT
+                       if (vex_W)
+                               x->d86_mnem[4] = 'q';
+#endif
+                       dtrace_get_operand(x, mode, r_m, LONG_OPND, 1);
+               } else
+                       dtrace_get_operand(x, mode, r_m, wbit, 1);
+
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 0);
+               break;
+
+       case VEX_RRi:
+               /* ModR/M.rm := op(ModR/M.reg, imm) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+#ifdef DIS_TEXT
+               if (dp == &dis_opAVX660F3A[0x16]) {
+                       /* vpextrd/q <imm>, <xmm>, <reg/mem 32/64> */
+                       if (vex_W)
+                               x->d86_mnem[6] = 'q';
+               }
+#endif
+               dtrace_get_operand(x, mode, r_m, LONG_OPND, 2);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+
+               /* one byte immediate number */
+               dtrace_imm_opnd(x, wbit, 1, 0);
+               break;
+
+       case VEX_RIM:
+               /* ModR/M.rm := op(ModR/M.reg, imm) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+               dtrace_get_operand(x, mode, r_m, XMM_OPND, 2);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+               /* one byte immediate number */
+               dtrace_imm_opnd(x, wbit, 1, 0);
+               break;
+
+       case VEX_RM:
+               /* ModR/M.rm := op(ModR/M.reg) */
+               if (dp == &dis_opAVX660F3A[0x17]) {     /* vextractps */
+                       x->d86_numopnds = 3;
+
+                       dtrace_get_modrm(x, &mode, &reg, &r_m);
+                       dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+
+                       dtrace_get_operand(x, mode, r_m, LONG_OPND, 2);
+                       dtrace_get_operand(x, REG_ONLY, reg, wbit, 1);
+                       /* one byte immediate number */
+                       dtrace_imm_opnd(x, wbit, 1, 0);
+                       break;
+               }
+               x->d86_numopnds = 2;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+L_VEX_RM:
+               vbit = 1;
+               dtrace_get_operand(x, mode, r_m, wbit, vbit);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit - 1);
+
+               break;
+
+       case VEX_RRM:
+               /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, mode, r_m, wbit, 2);
+               /* VEX use the 1's complement form encode the XMM/YMM regs */
+               dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 0);
+               break;
+
+       case VEX_RMX:
+               /* ModR/M.reg := op(VEX.vvvv, ModR/M.rm) */
+               x->d86_numopnds = 3;
+
+               dtrace_get_modrm(x, &mode, &reg, &r_m);
+               dtrace_vex_adjust(vex_byte1, mode, &reg, &r_m);
+               dtrace_get_operand(x, REG_ONLY, reg, wbit, 2);
+               dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1);
+               dtrace_get_operand(x, REG_ONLY, r_m, wbit, 0);
+               break;
+
+       case VEX_NONE:
+#ifdef DIS_TEXT
+               if (vex_L)
+                       (void) strncpy(x->d86_mnem, "vzeroall", OPLEN);
+#endif
+               break;
+
        /* an invalid op code */
        case AM:
        case DM:
index 8627c26ba2d3bff6d2e0c12b8307921d5fe26d46..4d7891ba4cc1a7f16ae64bec7c2c6cade28bfefe 100644 (file)
@@ -78,16 +78,6 @@ grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype)
        return 0;
 }
 
-extern void md_prepare_for_shutdown(int, int, char *);
-
-void
-md_prepare_for_shutdown(
-       __unused int paniced,
-       __unused int howto,
-       __unused char * command)
-{
-}
-
 boolean_t
 pie_required(cpu_type_t exectype __unused, cpu_subtype_t execsubtype __unused)
 {
index a314e46b532a3ef003be31d427fdf425c079777b..f7c09455dc2837540ac151fa1d2c2d7c8a3fd214 100644 (file)
@@ -498,9 +498,15 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, xsave, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 
 SYSCTL_PROC(_machdep_cpu_xsave, OID_AUTO, extended_state,
            CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
-           (void *)offsetof(cpuid_xsave_leaf_t, extended_state),
+           (void *) 0,
            sizeof(cpuid_xsave_leaf_t),
-           cpu_xsave, "IU", "XSAVE Extended State");
+           cpu_xsave, "IU", "XSAVE Extended State Main Leaf");
+
+SYSCTL_PROC(_machdep_cpu_xsave, OID_AUTO, extended_state1,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *) sizeof(cpuid_xsave_leaf_t),
+           sizeof(cpuid_xsave_leaf_t),
+           cpu_xsave, "IU", "XSAVE Extended State Sub-leaf 1");
 
 
 SYSCTL_NODE(_machdep_cpu, OID_AUTO, arch_perf, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
@@ -681,7 +687,7 @@ SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, max,
            cpu_flex_ratio_max, "I", "Flex ratio max (non-turbo)");
 
 SYSCTL_PROC(_machdep_cpu, OID_AUTO, ucupdate, 
-                       CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0,
+           CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0,
             cpu_ucode_update, "S", "Microcode update interface");
 
 static const uint32_t apic_timer_vector = (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT);
@@ -690,8 +696,8 @@ static const uint32_t apic_IPI_vector = (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_IN
 SYSCTL_NODE(_machdep, OID_AUTO, vectors, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
        "Interrupt vector assignments");
 
-SYSCTL_UINT     (_machdep_vectors, OID_AUTO, timer, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (uint32_t *)&apic_timer_vector, 0, "");
-SYSCTL_UINT     (_machdep_vectors, OID_AUTO, IPI, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (uint32_t *)&apic_IPI_vector, 0, "");
+SYSCTL_UINT     (_machdep_vectors, OID_AUTO, timer, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, __DECONST(uint32_t *,&apic_timer_vector), 0, "");
+SYSCTL_UINT     (_machdep_vectors, OID_AUTO, IPI, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, __DECONST(uint32_t *,&apic_IPI_vector), 0, "");
 
 uint64_t pmap_pv_hashlist_walks;
 uint64_t pmap_pv_hashlist_cnts;
@@ -743,19 +749,19 @@ SYSCTL_NODE(_machdep_tsc, OID_AUTO, nanotime,
        CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "TSC to ns conversion");
 SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, tsc_base,
        CTLFLAG_RD | CTLFLAG_LOCKED,
-       (uint64_t *) &pal_rtc_nanotime_info.tsc_base, "");
+       __DECONST(uint64_t *, &pal_rtc_nanotime_info.tsc_base), "");
 SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, ns_base,
        CTLFLAG_RD | CTLFLAG_LOCKED,
-       (uint64_t *)&pal_rtc_nanotime_info.ns_base, "");
+       __DECONST(uint64_t *, &pal_rtc_nanotime_info.ns_base), "");
 SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, scale,
        CTLFLAG_RD | CTLFLAG_LOCKED,
-       (uint32_t *)&pal_rtc_nanotime_info.scale, 0, "");
+       __DECONST(uint32_t *, &pal_rtc_nanotime_info.scale), 0, "");
 SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, shift,
        CTLFLAG_RD | CTLFLAG_LOCKED,
-       (uint32_t *)&pal_rtc_nanotime_info.shift, 0, "");
+       __DECONST(uint32_t *, &pal_rtc_nanotime_info.shift), 0, "");
 SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, generation,
        CTLFLAG_RD | CTLFLAG_LOCKED,
-       (uint32_t *)&pal_rtc_nanotime_info.generation, 0, "");
+       __DECONST(uint32_t *, &pal_rtc_nanotime_info.generation), 0, "");
 
 SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "Miscellaneous x86 kernel parameters");
@@ -775,7 +781,14 @@ SYSCTL_PROC(_machdep_misc, OID_AUTO, machine_check_panic,
            0, 0,
            misc_machine_check_panic, "A", "Machine-check exception test");
 
-
+#if DEVELOPMENT || DEBUG
+SYSCTL_QUAD(_machdep, OID_AUTO, reportphyreadabs,
+               CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+               &reportphyreaddelayabs, "");
+SYSCTL_INT(_machdep, OID_AUTO, reportphyreadosbt,
+               CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+               &reportphyreadosbt, 0, "");
+#endif
 
 extern void timer_queue_trace_cpu(int);
 static int
@@ -848,3 +861,7 @@ extern uint64_t ml_timer_eager_evaluation_max;
 SYSCTL_QUAD(_machdep, OID_AUTO, eager_timer_evaluation_max,
                CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
                &ml_timer_eager_evaluation_max, "");
+extern uint64_t x86_isr_fp_simd_use;
+SYSCTL_QUAD(_machdep, OID_AUTO, x86_fp_simd_isr_uses,
+               CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+               &x86_isr_fp_simd_use, "");
index 9f57943df18a8df3c96dadd70861d44e7fd60e6c..2c7e93ea2c5da5efffd822604b1c5f43b15585bb 100644 (file)
@@ -69,7 +69,9 @@ extern void *find_user_regs(thread_t);
 /* dynamically generated at build time based on syscalls.master */
 extern const char *syscallnames[];
 
-#define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || ((code) == SYS_kdebug_trace64))
+#define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) ||   \
+                                    ((code) == SYS_kdebug_trace64) || \
+                                    ((code) == SYS_kdebug_trace_string))
 
 /*
  * Function:   unix_syscall
@@ -102,6 +104,10 @@ unix_syscall(x86_saved_state_t *state)
        thread = current_thread();
        uthread = get_bsdthread_info(thread);
 
+#if PROC_REF_DEBUG
+       uthread_reset_proc_refcount(uthread);
+#endif
+
        /* Get the approriate proc; may be different from task's for vfork() */
        is_vfork = uthread->uu_flag & UT_VFORK;
        if (__improbable(is_vfork != 0))
@@ -250,6 +256,12 @@ unix_syscall(x86_saved_state_t *state)
                pal_execve_return(thread);
        }
 
+#if PROC_REF_DEBUG
+       if (__improbable(uthread_get_proc_refcount(uthread) != 0)) {
+               panic("system call returned with uu_proc_refcount != 0");
+       }
+#endif
+
        thread_exception_return();
        /* NOTREACHED */
 }
@@ -278,6 +290,10 @@ unix_syscall64(x86_saved_state_t *state)
        thread = current_thread();
        uthread = get_bsdthread_info(thread);
 
+#if PROC_REF_DEBUG
+       uthread_reset_proc_refcount(uthread);
+#endif
+
        /* Get the approriate proc; may be different from task's for vfork() */
        if (__probable(!(uthread->uu_flag & UT_VFORK)))
                p = (struct proc *)get_bsdtask_info(current_task());
@@ -439,6 +455,12 @@ unix_syscall64(x86_saved_state_t *state)
                        BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
                        error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
 
+#if PROC_REF_DEBUG
+       if (__improbable(uthread_get_proc_refcount(uthread))) {
+               panic("system call returned with uu_proc_refcount != 0");
+       }
+#endif
+
        thread_exception_return();
        /* NOTREACHED */
 }
index 5a659bebd3039b815e168024abc104721e8e304a..ac6dd485e22781956db844472a85a693f4487af2 100644 (file)
@@ -191,7 +191,7 @@ static      int mdevopen(dev_t dev, int flags, __unused int devtype, __unused struct
 
        devid = minor(dev);                                                                     /* Get minor device number */
 
-       if (devid > 16) return (ENXIO);                                         /* Not valid */
+       if (devid >= 16) return (ENXIO);                                                /* Not valid */
 
        if ((flags & FWRITE) && (mdev[devid].mdFlags & mdRO)) return (EACCES);  /* Currently mounted RO */
 
@@ -206,7 +206,7 @@ static int mdevrw(dev_t dev, struct uio *uio, __unused int ioflag) {
 
        devid = minor(dev);                                                                     /* Get minor device number */
 
-       if (devid > 16) return (ENXIO);                                         /* Not valid */
+       if (devid >= 16) return (ENXIO);                                                /* Not valid */
        if (!(mdev[devid].mdFlags & mdInited))  return (ENXIO); /* Have we actually been defined yet? */
 
        mdata = ((addr64_t)mdev[devid].mdBase << 12) + uio->uio_offset; /* Point to the area in "file" */
@@ -358,7 +358,7 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag,
 
        devid = minor(dev);                                                                     /* Get minor device number */
 
-       if (devid > 16) return (ENXIO);                                         /* Not valid */
+       if (devid >= 16) return (ENXIO);                                                /* Not valid */
 
        error = proc_suser(p);                  /* Are we superman? */
        if (error) return (error);                                                      /* Nope... */
@@ -401,11 +401,6 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag,
                        *f = 1;
                        break;
                        
-               case DKIOCGETBLOCKCOUNT32:
-                       if(!(mdev[devid].mdFlags & mdInited)) return (ENXIO);
-                       *f = ((mdev[devid].mdSize << 12) + mdev[devid].mdSecsize - 1) / mdev[devid].mdSecsize;
-                       break;
-                       
                case DKIOCGETBLOCKCOUNT:
                        if(!(mdev[devid].mdFlags & mdInited)) return (ENXIO);
                        *o = ((mdev[devid].mdSize << 12) + mdev[devid].mdSecsize - 1) / mdev[devid].mdSecsize;
@@ -439,7 +434,7 @@ static      int mdevsize(dev_t dev) {
        int devid;
 
        devid = minor(dev);                                                                     /* Get minor device number */
-       if (devid > 16) return (ENXIO);                                         /* Not valid */
+       if (devid >= 16) return (ENXIO);                                                /* Not valid */
 
        if ((mdev[devid].mdFlags & mdInited) == 0) return(-1);          /* Not inited yet */
 
index adaba8e1ae43bf60c9b6960ba84caf6662317a84..edd1b7273e0ae0b5ff09b9e36150e25e1a7b2f0a 100644 (file)
@@ -92,7 +92,7 @@ munge_wl(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -102,7 +102,7 @@ munge_wwl(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[2] = *(uint64_t*)&in_args[2];
+       out_args[2] = *(volatile uint64_t*)&in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
 }
@@ -114,7 +114,7 @@ munge_wwlw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[3] = in_args[4];
-       out_args[2] = *(uint64_t*)&in_args[2];
+       out_args[2] = *(volatile uint64_t*)&in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
 }
@@ -124,9 +124,9 @@ munge_wwlll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[4] = *(uint64_t*)&in_args[6];
-       out_args[3] = *(uint64_t*)&in_args[4];
-       out_args[2] = *(uint64_t*)&in_args[2];
+       out_args[4] = *(volatile uint64_t*)&in_args[6];
+       out_args[3] = *(volatile uint64_t*)&in_args[4];
+       out_args[2] = *(volatile uint64_t*)&in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
 }
@@ -139,8 +139,8 @@ munge_wwllww(void *args)
 
        out_args[5] = in_args[7];
        out_args[4] = in_args[6];
-       out_args[3] = *(uint64_t*)&in_args[4];
-       out_args[2] = *(uint64_t*)&in_args[2];
+       out_args[3] = *(volatile uint64_t*)&in_args[4];
+       out_args[2] = *(volatile uint64_t*)&in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
 }
@@ -152,7 +152,19 @@ munge_wlw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[2] = in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
+       out_args[0] = in_args[0];
+}
+
+void
+munge_wlww(void *args)
+{
+       volatile uint64_t *out_args = (volatile uint64_t*)args;
+       volatile uint32_t *in_args = (volatile uint32_t*)args;
+
+       out_args[3] = in_args[4];
+       out_args[2] = in_args[3];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -162,12 +174,12 @@ munge_wlwwwll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[6] = *(uint64_t*)&in_args[8];
-       out_args[5] = *(uint64_t*)&in_args[6];
+       out_args[6] = *(volatile uint64_t*)&in_args[8];
+       out_args[5] = *(volatile uint64_t*)&in_args[6];
        out_args[4] = in_args[5];
        out_args[3] = in_args[4];
        out_args[2] = in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -188,12 +200,12 @@ munge_wlwwlwlw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[7] = in_args[10];
-       out_args[6] = *(uint64_t*)&in_args[8];
+       out_args[6] = *(volatile uint64_t*)&in_args[8];
        out_args[5] = in_args[7];
-       out_args[4] = *(uint64_t*)&in_args[5];
+       out_args[4] = *(volatile uint64_t*)&in_args[5];
        out_args[3] = in_args[4];
        out_args[2] = in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -203,8 +215,8 @@ munge_wll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[2] = *(uint64_t*)&in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[2] = *(volatile uint64_t*)&in_args[3];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -214,9 +226,9 @@ munge_wlll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[3] = *(uint64_t*)&in_args[5];
-       out_args[2] = *(uint64_t*)&in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[3] = *(volatile uint64_t*)&in_args[5];
+       out_args[2] = *(volatile uint64_t*)&in_args[3];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -226,10 +238,10 @@ munge_wllll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[4] = *(uint64_t*)&in_args[7];
-       out_args[3] = *(uint64_t*)&in_args[5];
-       out_args[2] = *(uint64_t*)&in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[4] = *(volatile uint64_t*)&in_args[7];
+       out_args[3] = *(volatile uint64_t*)&in_args[5];
+       out_args[2] = *(volatile uint64_t*)&in_args[3];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -241,8 +253,8 @@ munge_wllww(void *args)
 
        out_args[4] = in_args[6];
        out_args[3] = in_args[5];
-       out_args[2] = *(uint64_t*)&in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[2] = *(volatile uint64_t*)&in_args[3];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -252,12 +264,12 @@ munge_wllwwll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[6] = *(uint64_t*)&in_args[9];
-       out_args[5] = *(uint64_t*)&in_args[7];
+       out_args[6] = *(volatile uint64_t*)&in_args[9];
+       out_args[5] = *(volatile uint64_t*)&in_args[7];
        out_args[4] = in_args[6];
        out_args[3] = in_args[5];
-       out_args[2] = *(uint64_t*)&in_args[3];
-       out_args[1] = *(uint64_t*)&in_args[1];
+       out_args[2] = *(volatile uint64_t*)&in_args[3];
+       out_args[1] = *(volatile uint64_t*)&in_args[1];
        out_args[0] = in_args[0];
 }
 
@@ -268,7 +280,7 @@ munge_wwwlw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[4] = in_args[5];
-       out_args[3] = *(uint64_t*)&in_args[3];
+       out_args[3] = *(volatile uint64_t*)&in_args[3];
        out_args[2] = in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
@@ -282,7 +294,7 @@ munge_wwwlww(void *args)
 
        out_args[5] = in_args[6];
        out_args[4] = in_args[5];
-       out_args[3] = *(uint64_t*)&in_args[3];
+       out_args[3] = *(volatile uint64_t*)&in_args[3];
        out_args[2] = in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
@@ -294,7 +306,7 @@ munge_wwwl(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[3] = *(uint64_t*)&in_args[3];
+       out_args[3] = *(volatile uint64_t*)&in_args[3];
        out_args[2] = in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
@@ -307,7 +319,7 @@ munge_wwwwlw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[5] = in_args[6];
-       out_args[4] = *(uint64_t*)&in_args[4];
+       out_args[4] = *(volatile uint64_t*)&in_args[4];
        out_args[3] = in_args[3];
        out_args[2] = in_args[2];
        out_args[1] = in_args[1];
@@ -320,7 +332,7 @@ munge_wwwwl(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[4] = *(uint64_t*)&in_args[4];
+       out_args[4] = *(volatile uint64_t*)&in_args[4];
        out_args[3] = in_args[3];
        out_args[2] = in_args[2];
        out_args[1] = in_args[1];
@@ -333,7 +345,7 @@ munge_wwwwwl(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[5] = *(uint64_t*)&in_args[5];
+       out_args[5] = *(volatile uint64_t*)&in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
        out_args[2] = in_args[2];
@@ -349,7 +361,7 @@ munge_wwwwwlww(void *args)
 
        out_args[7] = in_args[8];
        out_args[6] = in_args[7];
-       out_args[5] = *(uint64_t*)&in_args[5];
+       out_args[5] = *(volatile uint64_t*)&in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
        out_args[2] = in_args[2];
@@ -364,8 +376,8 @@ munge_wwwwwllw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[7] = in_args[9];
-       out_args[6] = *(uint64_t*)&in_args[7];
-       out_args[5] = *(uint64_t*)&in_args[5];
+       out_args[6] = *(volatile uint64_t*)&in_args[7];
+       out_args[5] = *(volatile uint64_t*)&in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
        out_args[2] = in_args[2];
@@ -379,9 +391,9 @@ munge_wwwwwlll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[7] = *(uint64_t*)&in_args[9];
-       out_args[6] = *(uint64_t*)&in_args[7];
-       out_args[5] = *(uint64_t*)&in_args[5];
+       out_args[7] = *(volatile uint64_t*)&in_args[9];
+       out_args[6] = *(volatile uint64_t*)&in_args[7];
+       out_args[5] = *(volatile uint64_t*)&in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
        out_args[2] = in_args[2];
@@ -395,7 +407,7 @@ munge_wwwwwwl(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[6] = *(uint64_t*)&in_args[6];
+       out_args[6] = *(volatile uint64_t*)&in_args[6];
        out_args[5] = in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
@@ -411,7 +423,7 @@ munge_wwwwwwlw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[7] = in_args[8];
-       out_args[6] = *(uint64_t*)&in_args[6];
+       out_args[6] = *(volatile uint64_t*)&in_args[6];
        out_args[5] = in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
@@ -426,8 +438,8 @@ munge_wwwwwwll(void *args)
        volatile uint64_t *out_args = (volatile uint64_t*)args;
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
-       out_args[7] = *(uint64_t*)&in_args[8];
-       out_args[6] = *(uint64_t*)&in_args[6];
+       out_args[7] = *(volatile uint64_t*)&in_args[8];
+       out_args[6] = *(volatile uint64_t*)&in_args[6];
        out_args[5] = in_args[5];
        out_args[4] = in_args[4];
        out_args[3] = in_args[3];
@@ -459,6 +471,19 @@ munge_wws(void *args)
 }
 
 void 
+munge_wwws(void *args)
+{
+       volatile uint64_t *out_args = (volatile uint64_t*)args;
+       volatile uint32_t *in_args = (volatile uint32_t*)args;
+
+       out_args[3] = (int64_t)(int)in_args[3]; /* Sign-extend */
+       out_args[2] = in_args[2];
+       out_args[1] = in_args[1];
+       out_args[0] = in_args[0];
+}
+
+
+void
 munge_wwwsw(void *args)
 {
        volatile uint64_t *out_args = (volatile uint64_t*)args;
@@ -496,7 +521,7 @@ munge_lw(void *args)
        volatile uint32_t *in_args = (volatile uint32_t*)args;
 
        out_args[1] = in_args[2];
-       out_args[0] = *(uint64_t*)&in_args[0];
+       out_args[0] = *(volatile uint64_t*)&in_args[0];
 }
 
 void 
@@ -508,7 +533,7 @@ munge_lwww(void *args)
        out_args[3] = in_args[4]; 
        out_args[2] = in_args[3];
        out_args[1] = in_args[2];
-       out_args[0] = *(uint64_t*)&in_args[0];
+       out_args[0] = *(volatile uint64_t*)&in_args[0];
 }
 
 void
@@ -520,7 +545,7 @@ munge_wwlwww(void *args)
        out_args[5] = in_args[6];
        out_args[4] = in_args[5];
        out_args[3] = in_args[4];
-       out_args[2] = *(uint64_t*)&in_args[2];
+       out_args[2] = *(volatile uint64_t*)&in_args[2];
        out_args[1] = in_args[1];
        out_args[0] = in_args[0];
 }
index ddb2baa685264622cd21c8a3ac332e85739abb20..25c3610d0652e758ea9384bead9aceca40fabad3 100644 (file)
@@ -52,6 +52,7 @@
 #include <pexpert/pexpert.h>
 #include <sys/socketvar.h>
 #include <pexpert/pexpert.h>
+#include <netinet/tcp_var.h>
 
 extern uint32_t kern_maxvnodes;
 extern vm_map_t mb_map;
@@ -62,7 +63,6 @@ extern uint32_t   tcp_recvspace;
 #endif
 
 void            bsd_bufferinit(void);
-extern void     md_prepare_for_shutdown(int, int, char *);
 
 unsigned int   bsd_mbuf_cluster_reserve(boolean_t *);
 void bsd_scale_setup(int);
@@ -140,7 +140,7 @@ bsd_startupearly(void)
                            &firstaddr,
                            size,
                            FALSE,
-                           VM_FLAGS_ANYWHERE,
+                           VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_FILE),
                            &bufferhdr_map);
 
        if (ret != KERN_SUCCESS)
@@ -150,7 +150,8 @@ bsd_startupearly(void)
                                     &firstaddr,
                                     size,
                                     0,
-                                    KMA_HERE | KMA_KOBJECT);
+                                    KMA_HERE | KMA_KOBJECT,
+                                    VM_KERN_MEMORY_FILE);
 
        if (ret != KERN_SUCCESS)
                panic("Failed to allocate bufferhdr_map");
@@ -215,10 +216,10 @@ bsd_bufferinit(void)
 
 #if SOCKETS
        ret = kmem_suballoc(kernel_map,
-                           (vm_offset_t *) & mbutl,
+                           (vm_offset_t *) &mbutl,
                            (vm_size_t) (nmbclusters * MCLBYTES),
                            FALSE,
-                           VM_FLAGS_ANYWHERE,
+                           VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_MBUF),
                            &mb_map);
 
        if (ret != KERN_SUCCESS)
@@ -291,8 +292,8 @@ bsd_mbuf_cluster_reserve(boolean_t *overridden)
                                nmbclusters = MAX_NCL;
                }
 
-               /* Round it down to nearest multiple of 4KB clusters */
-               nmbclusters = P2ROUNDDOWN(nmbclusters, NCLPBG);
+               /* Round it down to nearest multiple of PAGE_SIZE */
+               nmbclusters = P2ROUNDDOWN(nmbclusters, NCLPG);
        }
        mbuf_poolsz = nmbclusters << MCLSHIFT;
 done:
@@ -327,15 +328,16 @@ bsd_scale_setup(int scale)
                maxfilesperproc = maxfiles/2;
                desiredvnodes = maxfiles;
                vnodes_sized = 1;
+               tcp_tfo_backlog = 100 * scale;
                if (scale > 4) {
                        /* clip somaxconn at 32G level */
                        somaxconn = 2048;
-                       /* 
-                        * For scale > 4 (> 32G), clip 
+                       /*
+                        * For scale > 4 (> 32G), clip
                         * tcp_tcbhashsize to 32K
                         */
                        tcp_tcbhashsize = 32 *1024;
-                       
+
                        if (scale > 7) {
                                /* clip at 64G level */
                                max_cached_sock_count = 165000;
index 457e58370eea79547ba971df410aeb807f67501f..703a8ad7a0d86eaf03782b8f7c02d7966a9e859f 100644 (file)
@@ -908,7 +908,6 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data,
        case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
        case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
        case DKIOCGETBLOCKCOUNT:
-       case DKIOCGETBLOCKCOUNT32:
                if ((vn->sc_flags & VNF_INITED) == 0) {
                        error = ENXIO;
                        goto done;
@@ -979,9 +978,6 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data,
        case DKIOCISWRITABLE:
                *f = 1;
                break;
-       case DKIOCGETBLOCKCOUNT32:
-               *f = vn->sc_size;
-               break;
        case DKIOCGETBLOCKCOUNT:
                *o = vn->sc_size;
                break;
index 0f11a9737b6606dbfde525b9c1ae838036f28e1e..ccf82f04f0940320ca7614be372c43b36ccc6a68 100644 (file)
@@ -14,15 +14,17 @@ PRIVATE_DATAFILES = \
        hfs.h hfs_attrlist.h hfs_catalog.h hfs_cnode.h hfs_endian.h \
        hfs_fsctl.h hfs_macos_defs.h hfs_quota.h rangelist.h
 
+KERNELFILES = ${DATAFILES}
+
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = hfs
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR = hfs
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 52b8faca1ad91c36c9e90240e2197907413ce55f..b19e2320898c532c0db8ba060e1001bfa8d1a9b0 100644 (file)
@@ -43,6 +43,8 @@
 #define HFS_CHECK_LOCK_ORDER 1
 #endif
 
+#define HFS_TMPDBG 0
+
 #include <sys/appleapiopts.h>
 
 #ifdef KERNEL
@@ -72,6 +74,8 @@
 #if CONFIG_PROTECT
 /* Forward declare the cprotect struct */
 struct cprotect;
+
+
 #endif
 
 /*
@@ -194,7 +198,9 @@ typedef struct hfsmount {
        time_t        hfs_mtime;          /* file system last modification time */
        u_int32_t     hfs_filecount;      /* number of files in file system */
        u_int32_t     hfs_dircount;       /* number of directories in file system */
-       u_int32_t     freeBlocks;         /* free allocation blocks */
+       u_int32_t     freeBlocks;                 /* free allocation blocks */
+       u_int32_t         reclaimBlocks;          /* number of blocks we are reclaiming during resize */
+       u_int32_t         tentativeBlocks;        /* tentative allocation blocks -- see note below */
        u_int32_t     nextAllocation;     /* start of next allocation search */
        u_int32_t     sparseAllocation;   /* start of allocations for sparse devices */
        u_int32_t     vcbNxtCNID;         /* next unused catalog node ID - protected by catalog lock */
@@ -205,7 +211,13 @@ typedef struct hfsmount {
 
        /* Persistent fields (on disk, static) */
        u_int16_t                       vcbSigWord;
-       int16_t                         vcbFlags; /* Runtime flag to indicate if volume is dirty/clean */
+
+       // Volume will be inconsistent if header is not flushed
+       bool                            hfs_header_dirty;
+
+       // Volume header is dirty, but won't be inconsistent if not flushed
+       bool                            hfs_header_minor_change;
+
        u_int32_t                       vcbAtrb;
        u_int32_t                       vcbJinfoBlock;
        u_int32_t                       localCreateDate;/* volume create time from volume header (For HFS+, value is in local time) */
@@ -247,7 +259,7 @@ typedef struct hfsmount {
 
        u_int32_t               reserveBlocks;          /* free block reserve */
        u_int32_t               loanedBlocks;           /* blocks on loan for delayed allocations */
-       
+       u_int32_t               lockedBlocks;           /* blocks reserved and locked */
 
        /*
         * HFS+ Private system directories (two). Any access
@@ -272,8 +284,8 @@ typedef struct hfsmount {
        u_int32_t            hfs_jnlfileid;
        u_int32_t            hfs_jnlinfoblkid;
        lck_rw_t                 hfs_global_lock;
-       u_int32_t            hfs_global_lock_nesting;
        thread_t                         hfs_global_lockowner;
+       u_int32_t            hfs_transaction_nesting;
 
        /* Notification variables: */
        u_int32_t               hfs_notification_conditions;
@@ -292,7 +304,9 @@ typedef struct hfsmount {
        u_int32_t       hfs_hotfile_end;
         u_int32_t       hfs_min_alloc_start;
        u_int32_t       hfs_freed_block_count;
+       u_int64_t       hfs_cs_hotfile_size;     // in bytes
        int             hfs_hotfile_freeblks;
+       int             hfs_hotfile_blk_adjust;
        int             hfs_hotfile_maxblks;
        int             hfs_overflow_maxblks;
        int             hfs_catalog_maxblks;
@@ -303,7 +317,7 @@ typedef struct hfsmount {
        time_t          hfc_timebase;   /* recording period start time */
        time_t          hfc_timeout;    /* recording period stop time */
        void *          hfc_recdata;    /* recording data (opaque) */
-       int             hfc_maxfiles;   /* maximum files to track */
+       uint32_t        hfc_maxfiles;   /* maximum files to track */
        struct vnode *  hfc_filevp;
 
 #if HFS_SPARSE_DEV
@@ -348,14 +362,19 @@ typedef struct hfsmount {
        u_int32_t               hfs_resize_progress;
 #if CONFIG_PROTECT
        /* Data Protection fields */
-       struct cprotect *hfs_resize_cpentry;
+       cpx_t                   hfs_resize_cpx;
        u_int16_t               hfs_running_cp_major_vers;
        uint32_t                default_cp_class; /* default effective class value */
        uint64_t                cproot_flags;
        uint8_t                 cp_crypto_generation; 
        uint8_t                 hfs_cp_lock_state;  /* per-mount device lock state info */ 
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+       boolean_t               hfs_cp_verbose;
+#endif
 #endif
 
+#endif
 
        /* Per mount cnode hash variables: */
        lck_mtx_t      hfs_chash_mutex; /* protects access to cnode hash table */
@@ -380,6 +399,19 @@ typedef struct hfsmount {
 
     // Not currently used except for debugging purposes
        uint32_t        hfs_active_threads;
+
+       enum {
+               // These are indices into the array below
+
+               // Tentative ranges can be claimed back at any time
+               HFS_TENTATIVE_BLOCKS    = 0,
+
+               // Locked ranges cannot be claimed back, but the allocation
+               // won't have been written to disk yet
+               HFS_LOCKED_BLOCKS               = 1,
+       };
+       // These lists are not sorted like a range list usually is
+       struct rl_head hfs_reserved_ranges[2];
 } hfsmount_t;
 
 /*
@@ -405,28 +437,40 @@ typedef hfsmount_t  ExtendedVCB;
 #define vcbFilCnt          hfs_filecount
 #define vcbDirCnt          hfs_dircount
 
-/* Inline functions to set/reset vcbFlags.  Upper 8 bits indicate if the volume 
- * header/VCB is clean/dirty --- if set, volume header is dirty, and 
- * if clear, volume header is clean.  This value is checked to determine
- * if the in-memory copy of volume header should be flushed to the disk
- * or not. 
- */
-/* Set runtime flag to indicate that volume is dirty */
-static __inline__ void MarkVCBDirty(ExtendedVCB *vcb)
+static inline void MarkVCBDirty(hfsmount_t *hfsmp)
 { 
-       vcb->vcbFlags |= 0xFF00;
+       hfsmp->hfs_header_dirty = true;
+}
+
+static inline void MarkVCBClean(hfsmount_t *hfsmp)
+{
+       hfsmp->hfs_header_dirty = false;
+       hfsmp->hfs_header_minor_change = false;
+}
+
+static inline bool IsVCBDirty(ExtendedVCB *vcb)
+{
+       return vcb->hfs_header_minor_change || vcb->hfs_header_dirty;
 }
 
-/* Clear runtime flag to indicate that volume is dirty */
-static __inline__ void MarkVCBClean(ExtendedVCB *vcb)
+// Header is changed but won't be inconsistent if we don't write it
+static inline void hfs_note_header_minor_change(hfsmount_t *hfsmp)
 {
-       vcb->vcbFlags &= 0x00FF;
+       hfsmp->hfs_header_minor_change = true;
 }
 
-/* Check runtime flag to determine if the volume is dirty or not */
-static __inline__ Boolean IsVCBDirty(ExtendedVCB *vcb)
+// Must header be flushed for volume to be consistent?
+static inline bool hfs_header_needs_flushing(hfsmount_t *hfsmp)
 {
-       return (vcb->vcbFlags & 0xFF00 ? true  : false);
+       return (hfsmp->hfs_header_dirty
+                       || ISSET(hfsmp->hfs_catalog_cp->c_flag, C_MODIFIED)
+                       || ISSET(hfsmp->hfs_extents_cp->c_flag, C_MODIFIED)
+                       || (hfsmp->hfs_attribute_cp
+                               && ISSET(hfsmp->hfs_attribute_cp->c_flag, C_MODIFIED))
+                       || (hfsmp->hfs_allocation_cp
+                               && ISSET(hfsmp->hfs_allocation_cp->c_flag, C_MODIFIED))
+                       || (hfsmp->hfs_startup_cp
+                               && ISSET(hfsmp->hfs_startup_cp->c_flag, C_MODIFIED)));
 }
 
 /*
@@ -473,7 +517,10 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
 #define HFS_SSD                  0x400000
 #define HFS_SUMMARY_TABLE        0x800000
 #define HFS_CS                  0x1000000
-
+#define HFS_CS_METADATA_PIN     0x2000000
+#define HFS_CS_HOTFILE_PIN      0x4000000      /* cooperative fusion (enables a hotfile variant) */
+#define HFS_FEATURE_BARRIER     0x8000000      /* device supports barrier-only flush */
+#define HFS_CS_SWAPFILE_PIN    0x10000000
 
 /* Macro to update next allocation block in the HFS mount structure.  If 
  * the HFS_SKIP_UPDATE_NEXT_ALLOCATION is set, do not update 
@@ -597,11 +644,29 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 };
 #define MAC_GMT_FACTOR         2082844800UL
 
 static inline __attribute__((const))
-uint64_t hfs_blk_to_bytes(uint32_t blk, uint32_t blk_size)
+off_t hfs_blk_to_bytes(uint32_t blk, uint32_t blk_size)
 {
-       return (uint64_t)blk * blk_size;                // Avoid the overflow
+       return (off_t)blk * blk_size;           // Avoid the overflow
 }
 
+/*
+ * For now, we use EIO to indicate consistency issues.  It is safe to
+ * return or assign an error value to HFS_EINCONSISTENT but it is
+ * *not* safe to compare against it because EIO can be generated for
+ * other reasons.  We take advantage of the fact that == has
+ * left-to-right associativity and so any uses of:
+ *
+ *    if (error == HFS_EINCONSISTENT)
+ *
+ * will produce a compiler warning: "comparison between pointer and
+ * integer".
+ *
+ * Note that not everwhere is consistent with the use of
+ * HFS_EINCONSISTENT.  Some places return EINVAL, EIO directly or
+ * other error codes.
+ */
+#define HFS_EINCONSISTENT              (void *)0 == (void *)0 ? EIO : EIO
+
 /*****************************************************************************
        FUNCTION PROTOTYPES 
 ******************************************************************************/
@@ -636,6 +701,7 @@ int hfs_vnop_bwrite(struct vnop_bwrite_args *);       /* in hfs_readwrite.c */
 int hfs_vnop_blktooff(struct vnop_blktooff_args *);   /* in hfs_readwrite.c */
 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);   /* in hfs_readwrite.c */
 int hfs_vnop_blockmap(struct vnop_blockmap_args *);   /* in hfs_readwrite.c */
+errno_t hfs_flush_invalid_ranges(vnode_t vp);            /* in hfs_readwrite.c */
 
 int hfs_vnop_getxattr(struct vnop_getxattr_args *);        /* in hfs_xattr.c */
 int hfs_vnop_setxattr(struct vnop_setxattr_args *);        /* in hfs_xattr.c */
@@ -704,8 +770,29 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp);
 ******************************************************************************/
 extern int  hfs_relocate(struct  vnode *, u_int32_t, kauth_cred_t, struct  proc *);
 
+/* flags for hfs_pin_block_range() and hfs_pin_vnode() */
+#define HFS_PIN_IT       0x0001
+#define HFS_UNPIN_IT     0x0002
+#define HFS_TEMP_PIN     0x0004
+#define HFS_EVICT_PIN    0x0008
+#define HFS_DATALESS_PIN 0x0010
+
+//
+// pin/un-pin an explicit range of blocks to the "fast" (usually ssd) device
+//
+int hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks, vfs_context_t ctx);
+
+//
+// pin/un-pin all the extents belonging to a vnode.
+// also, if it is non-null, "num_blocks_pinned" returns the number of blocks pin/unpinned by the function
+//
+int hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned, vfs_context_t ctx);
+
+
+int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid, uint8_t forktype, uint32_t *pinned);
+                                     
+
 /* Flags for HFS truncate */
-#define HFS_TRUNCATE_SKIPUPDATE        0x00000001
 #define HFS_TRUNCATE_SKIPTIMES         0x00000002 /* implied by skipupdate; it is a subset */
                                                                                        
 
@@ -743,8 +830,13 @@ extern void hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding);
 enum volop {VOL_UPDATE, VOL_MKDIR, VOL_RMDIR, VOL_MKFILE, VOL_RMFILE};
 extern int hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot);
 
-int hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush);
-#define HFS_ALTFLUSH   1
+enum {
+       HFS_FVH_WAIT                                    = 0x0001,
+       HFS_FVH_WRITE_ALT                               = 0x0002,
+       HFS_FVH_FLUSH_IF_DIRTY                  = 0x0004,
+};
+typedef uint32_t hfs_flush_volume_header_options_t;
+int hfs_flushvolumeheader(struct hfsmount *hfsmp, hfs_flush_volume_header_options_t);
 
 extern int  hfs_extendfs(struct hfsmount *, u_int64_t, vfs_context_t);
 extern int  hfs_truncatefs(struct hfsmount *, u_int64_t, vfs_context_t);
@@ -798,6 +890,7 @@ extern int hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_
 extern int check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg);
 extern int check_for_dataless_file(struct vnode *vp, uint64_t op_type);
 extern int hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid);
+extern void hfs_pin_fs_metadata(struct hfsmount *hfsmp);
 
 /* Return information about number of metadata blocks for volume */
 extern int hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo);
@@ -822,9 +915,6 @@ void hfs_unlock_mount (struct hfsmount *hfsmp);
 #define SFL_VM_PRIV    0x0020
 #define SFL_VALIDMASK   (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE | SFL_STARTUP | SFL_VM_PRIV)
 
-extern int  hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype);
-extern void hfs_systemfile_unlock(struct hfsmount *, int);
-
 extern u_int32_t  GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name,
                                                   struct cat_attr *fattr, struct cat_fork *forkinfo);
 
@@ -856,7 +946,6 @@ extern int hfs_start_transaction(struct hfsmount *hfsmp);
 extern int hfs_end_transaction(struct hfsmount *hfsmp);
 extern void hfs_journal_lock(struct hfsmount *hfsmp);
 extern void hfs_journal_unlock(struct hfsmount *hfsmp);
-extern int hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO);
 extern void hfs_syncer_lock(struct hfsmount *hfsmp);
 extern void hfs_syncer_unlock(struct hfsmount *hfsmp);
 extern void hfs_syncer_wait(struct hfsmount *hfsmp);
@@ -864,6 +953,17 @@ extern void hfs_syncer_wakeup(struct hfsmount *hfsmp);
 extern void hfs_syncer_queue(thread_call_t syncer);
 extern void hfs_sync_ejectable(struct hfsmount *hfsmp);
 
+typedef enum hfs_flush_mode {
+       HFS_FLUSH_JOURNAL,              // Flush journal
+       HFS_FLUSH_JOURNAL_META,         // Flush journal and metadata blocks
+       HFS_FLUSH_FULL,                 // Flush journal and does a cache flush
+       HFS_FLUSH_CACHE,                // Flush track cache to media
+       HFS_FLUSH_BARRIER,              // Barrier-only flush to ensure write order
+       HFS_FLUSH_JOURNAL_BARRIER       // Flush journal with barrier
+} hfs_flush_mode_t;
+
+extern errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode);
+
 extern void hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents);
 
 /* Erase unused Catalog nodes due to <rdar://problem/6947811>. */
@@ -893,9 +993,26 @@ extern void replace_desc(struct cnode *cp, struct cat_desc *cdp);
 extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
                                                struct vnode **rvpp);
 
-extern int hfs_update(struct vnode *, int);
+typedef enum {
+       // Push all modifications to disk (including minor ones)
+       HFS_UPDATE_FORCE = 0x01,
+} hfs_update_options_t;
+
+extern int hfs_update(struct vnode *, int options);
+
+typedef enum hfs_sync_mode {
+       HFS_FSYNC,
+       HFS_FSYNC_FULL,
+       HFS_FSYNC_BARRIER
+} hfs_fsync_mode_t;
+
+extern int hfs_fsync(struct vnode *, int, hfs_fsync_mode_t, struct proc *);
 
-extern int hfs_fsync(struct vnode *, int, int, struct proc *);
+const struct cat_fork *
+hfs_prepare_fork_for_update(filefork_t *ff,
+                                                       const struct cat_fork *cf,
+                                                       struct cat_fork *cf_buf,
+                                                       uint32_t block_size);
 
 /*****************************************************************************
        Functions from hfs_xattr.c
@@ -921,7 +1038,8 @@ int hfs_getxattr_internal(cnode_t *, struct vnop_getxattr_args *,
 int hfs_xattr_write(vnode_t vp, const char *name, const void *data, size_t size);
 int hfs_setxattr_internal(struct cnode *, const void *, size_t, 
                           struct vnop_setxattr_args *, struct hfsmount *, u_int32_t);
-extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid);
+extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid, 
+                                                        bool *open_transaction);
 extern int hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state);
 
 
@@ -942,18 +1060,18 @@ extern void  hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid);
 extern void  hfs_relorigins(struct cnode *cp);
 extern void  hfs_relorigin(struct cnode *cp, cnid_t parentcnid);
 extern int   hfs_haslinkorigin(cnode_t *cp);
-extern cnid_t  hfs_currentparent(cnode_t *cp);
+extern cnid_t  hfs_currentparent(cnode_t *cp, bool have_lock);
 extern cnid_t  hfs_currentcnid(cnode_t *cp);
+errno_t hfs_first_link(hfsmount_t *hfsmp, cnode_t *cp, cnid_t *link_id);
 
 
 /*****************************************************************************
        Functions from VolumeAllocation.c
  ******************************************************************************/
-extern int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock,
-                                                  u_int32_t numBlocks);
+extern int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks);
 
-extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
-                                                          u_int32_t numBlocks, u_int32_t *alloc_count);
+extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, 
+               u_int32_t numBlocks, u_int32_t *alloc_count);
 
 extern int hfs_isrbtree_active (struct hfsmount *hfsmp);
 
index 8483b45ae978d33dd566fdfc1a4d50ab49bbf756..3ee064859e5f7f3eb5c6f8db12f3c7a43a8310f4 100644 (file)
@@ -338,7 +338,7 @@ hfs_readdirattr_internal(struct vnode *dvp, struct attrlist *alist,
         */
        if ((dcp->c_entries == 0) && (ce_list->realentries > 0)) {
                dcp->c_entries++;
-               dcp->c_flag |= (C_MODIFIED | C_FORCEUPDATE);
+               dcp->c_flag |= C_MODIFIED;
                printf("hfs_vnop_readdirattr: repairing valence to non-zero! \n");
                /* force an update on dcp while we're still holding the lock. */
                hfs_update(dvp, 0);
index fefc36ad32de9d5b3f91e1e61dde344394752db0..f6084e31fe1737c8b6fe1b8fcddd1dbdf88f6718 100644 (file)
@@ -545,17 +545,17 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
        /*
         * Update the Alternate MDB or Alternate VolumeHeader
         */
+       VTOC(vp)->c_flag |= C_MODIFIED;
        if ((VTOC(vp)->c_fileid == kHFSExtentsFileID)   ||
            (VTOC(vp)->c_fileid == kHFSCatalogFileID)   ||
            (VTOC(vp)->c_fileid == kHFSAttributesFileID)
           ) {
-               VTOC(vp)->c_flag |= C_MODIFIED;
                MarkVCBDirty( vcb );
-               ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
+               ret = hfs_flushvolumeheader(VCBTOHFS(vcb), HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
        } else {
                VTOC(vp)->c_touch_chgtime = TRUE;
                VTOC(vp)->c_touch_modtime = TRUE;
-               (void) hfs_update(vp, TRUE);
+               (void) hfs_update(vp, 0);
        }
 
        ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize));
@@ -889,7 +889,7 @@ again:
        hfsmp->hfs_attribute_vp = vp;
        hfs_unlock_mount (hfsmp);
 
-       (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+       (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
 
        if (intrans) {
                hfs_end_transaction(hfsmp);
index ef0b4a61e4935aba3cb77626fbab95943e192295..8e0a65c4f2294c22ee703276a144f62213a06c1e 100644 (file)
@@ -267,7 +267,7 @@ nextid:
        } else {
                hfsmp->vcbNxtCNID++;
        }
-       MarkVCBDirty(hfsmp);
+       hfs_note_header_minor_change(hfsmp);
 
        /* First check that there are not any entries pending in the hash table with this ID */
        if (cat_check_idhash (hfsmp, nextCNID)) {
@@ -4359,8 +4359,9 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct
                case S_IFBLK:
                        attrp->ca_rdev = bsd->special.rawDevice;
                        break;
-                       
-               case S_IFDIR: /* fall through */
+               case S_IFIFO:
+               case S_IFSOCK:
+               case S_IFDIR:
                case S_IFREG:
                        /* Pick up the hard link count */
                        if (bsd->special.linkCount > 0)
@@ -4812,3 +4813,11 @@ cat_update_dirlink(struct hfsmount *hfsmp, u_int8_t forktype,
        } 
 }
 
+void hfs_fork_copy(struct cat_fork *dst, const struct cat_fork *src,
+                                  HFSPlusExtentDescriptor *extents)
+{
+       /* Copy everything but the extents into the dest fork */
+       memcpy(dst, src, offsetof(struct cat_fork, cf_extents));
+       /* Then copy the supplied extents into the fork */
+       memcpy(dst->cf_extents, extents, sizeof(HFSPlusExtentRecord));
+}
index a48ca2fb6e408e6a1dcab6151214ec39199c4c72..a4719ea41bf13cb8d5df7c6d0869254f71ffecec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -148,11 +148,18 @@ struct cat_fork {
        u_int32_t      cf_vblocks;     /* virtual (unalloated) blocks */
        u_int32_t      cf_blocks;      /* total blocks used by this fork */
        struct HFSPlusExtentDescriptor  cf_extents[8];  /* initial set of extents */
+
+       /*
+        * NOTE: If you change this structure, make sure you change you change
+        * hfs_fork_copy.
+        */
 };
 
 #define cf_clump       cf_union.cfu_clump
 #define cf_bytesread   cf_union.cfu_bytesread
 
+void hfs_fork_copy(struct cat_fork *dst, const struct cat_fork *src,
+                                  HFSPlusExtentDescriptor *extents);
 
 /*
  * Directory Hint
index 89589de2855cb5e5105e57a2068131ba7f9f4c2f..668cc7870836265b3c40937e89d7e96a091c3e07 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -38,6 +38,7 @@
 #include <sys/kdebug.h>
 #include <libkern/OSByteOrder.h>
 #include <sys/buf_internal.h>
+#include <sys/namei.h>
 
 #include <kern/locks.h>
 
@@ -50,6 +51,7 @@
 #include <hfs/hfs_quota.h>
 #include <hfs/hfs_format.h>
 #include <hfs/hfs_kdebug.h>
+#include <hfs/hfs_cprotect.h>
 
 extern int prtactive;
 
@@ -57,7 +59,7 @@ extern lck_attr_t *  hfs_lock_attr;
 extern lck_grp_t *  hfs_mutex_group;
 extern lck_grp_t *  hfs_rwlock_group;
 
-static void  hfs_reclaim_cnode(struct cnode *);
+static void  hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *);
 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
 static int hfs_isordered(struct cnode *, struct cnode *);
 
@@ -182,7 +184,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
        enum vtype v_type;
        struct cnode *cp;
        int error = 0;
-       int started_tr = 0;
+       bool started_tr = false;
        struct hfsmount *hfsmp = VTOHFS(vp);
        struct proc *p = vfs_context_proc(ctx);
        int truncated = 0;
@@ -200,36 +202,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
        if (cp->c_rsrcfork) {
                ++forkcount;
        }
-       
-       
-       /*
-        * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
-        * The dirty regions would have already been synced to disk, so informing UBC
-        * that they can toss the pages doesn't help anyone at this point. 
-        * 
-        * Note that this is a performance problem if the vnode goes straight to reclaim
-        * (and skips inactive), since there would be no way for anyone to notify the UBC
-        * that all pages in this file are basically useless.
-        */     
-       if (reclaim == 0) {
-               /*
-                * Check whether we are tearing down a cnode with only one remaining fork.
-                * If there are blocks in its filefork, then we need to unlock the cnode
-                * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
-                * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
-                * panic.  
-                */
-               
-               if ((v_type == VREG || v_type == VLNK) &&
-                       (cp->c_flag & C_DELETED) &&
-                       (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
-                       hfs_unlock(cp); 
-                       /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
-                       ubc_setsize(vp, 0);
-                       (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-               }       
-       }
-       
+
        /* 
         * Push file data out for normal files that haven't been evicted from 
         * the namespace.  We only do this if this function was not called from reclaim,
@@ -245,10 +218,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
                (VTOF(vp)->ff_blocks) &&
                (reclaim == 0)) {
                /* 
-                * Note that if content protection is enabled, then this is where we will
-                * attempt to issue IOs for all dirty regions of this file.  
-                *
-                * If we're called from hfs_vnop_inactive, all this means is at the time 
+                * If we're called from hfs_vnop_inactive, all this means is at the time
                 * the logic for deciding to call this function, there were not any lingering
                 * mmap/fd references for this file.  However, there is nothing preventing the system
                 * from creating a new reference in between the time that logic was checked
@@ -258,21 +228,6 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
                hfs_filedone(vp, ctx, 0);
        }
 
-       /* 
-        * We're holding the cnode lock now.  Stall behind any shadow BPs that may
-        * be involved with this vnode if it is a symlink.  We don't want to allow 
-        * the blocks that we're about to release to be put back into the pool if there
-        * is pending I/O to them.
-        */
-       if (v_type == VLNK) {   
-               /* 
-                * This will block if the asynchronous journal flush is in progress.
-                * If this symlink is not being renamed over and doesn't have any open FDs,
-                * then we'll remove it from the journal's bufs below in kill_block.
-                */
-               buf_wait_for_shadow_io (vp, 0);
-       }
-
        /* 
         * Remove any directory hints or cached origins
         */
@@ -282,328 +237,326 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
        if (cp->c_flag & C_HARDLINK) {
                hfs_relorigins(cp);
        }
-       
+
        /*
-        * This check is slightly complicated.  We should only truncate data 
-        * in very specific cases for open-unlinked files.  This is because
-        * we want to ensure that the resource fork continues to be available
-        * if the caller has the data fork open.  However, this is not symmetric; 
-        * someone who has the resource fork open need not be able to access the data
-        * fork once the data fork has gone inactive.
-        * 
-        * If we're the last fork, then we have cleaning up to do.
-        * 
-        * A) last fork, and vp == c_vp
-        *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
-        *
-        * B) last fork, and vp == c_rsrc_vp
-        *      Truncate ourselves, assume data fork has been cleaned due to C).
-        *
-        * If we're not the last fork, then things are a little different:
+        * -- Handle open unlinked files --
         *
-        * C) not the last fork, vp == c_vp
-        *      Truncate ourselves.  Once the file has gone out of the namespace,
-        *      it cannot be further opened.  Further access to the rsrc fork may 
-        *      continue, however.
-        *
-        * D) not the last fork, vp == c_rsrc_vp
-        *      Don't enter the block below, just clean up vnode and push it out of core.
+        * If the vnode is in use, it means a force unmount is in progress
+        * in which case we defer cleaning up until either we come back
+        * through here via hfs_vnop_reclaim, at which point the UBC
+        * information will have been torn down and the vnode might no
+        * longer be in use, or if it's still in use, it will get cleaned
+        * up when next remounted.
         */
+       if (ISSET(cp->c_flag, C_DELETED) && !vnode_isinuse(vp, 0)) {
+               /*
+                * This check is slightly complicated.  We should only truncate data 
+                * in very specific cases for open-unlinked files.  This is because
+                * we want to ensure that the resource fork continues to be available
+                * if the caller has the data fork open.  However, this is not symmetric; 
+                * someone who has the resource fork open need not be able to access the data
+                * fork once the data fork has gone inactive.
+                * 
+                * If we're the last fork, then we have cleaning up to do.
+                * 
+                * A) last fork, and vp == c_vp
+                *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
+                *
+                * B) last fork, and vp == c_rsrc_vp
+                *      Truncate ourselves, assume data fork has been cleaned due to C).
+                *
+                * If we're not the last fork, then things are a little different:
+                *
+                * C) not the last fork, vp == c_vp
+                *      Truncate ourselves.  Once the file has gone out of the namespace,
+                *      it cannot be further opened.  Further access to the rsrc fork may 
+                *      continue, however.
+                *
+                * D) not the last fork, vp == c_rsrc_vp
+                *      Don't enter the block below, just clean up vnode and push it out of core.
+                */
        
-       if ((v_type == VREG || v_type == VLNK) && 
-                       (cp->c_flag & C_DELETED) &&
-                       ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
-                       
-               /* Truncate away our own fork data. (Case A, B, C above) */
-               if (VTOF(vp)->ff_blocks != 0) {
+               if ((v_type == VREG || v_type == VLNK) && 
+                               ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
+                               
+                       /* Truncate away our own fork data. (Case A, B, C above) */
+                       if (VTOF(vp)->ff_blocks != 0) {
+                               /*
+                                * SYMLINKS only:
+                                *
+                                * Encapsulate the entire change (including truncating the link) in 
+                                * nested transactions if we are modifying a symlink, because we know that its
+                                * file length will be at most 4k, and we can fit both the truncation and 
+                                * any relevant bitmap changes into a single journal transaction.  We also want
+                                * the kill_block code to execute in the same transaction so that any dirty symlink
+                                * blocks will not be written. Otherwise, rely on
+                                * hfs_truncate doing its own transactions to ensure that we don't blow up
+                                * the journal.
+                                */ 
+                               if (!started_tr && (v_type == VLNK)) {
+                                       if (hfs_start_transaction(hfsmp) != 0) {
+                                               error = EINVAL;
+                                               goto out;
+                                       }
+                                       else {
+                                               started_tr = true;
+                                       }
+                               }
 
-                       /* 
-                        * SYMLINKS only:
-                        *
-                        * Encapsulate the entire change (including truncating the link) in 
-                        * nested transactions if we are modifying a symlink, because we know that its
-                        * file length will be at most 4k, and we can fit both the truncation and 
-                        * any relevant bitmap changes into a single journal transaction.  We also want
-                        * the kill_block code to execute in the same transaction so that any dirty symlink
-                        * blocks will not be written. Otherwise, rely on
-                        * hfs_truncate doing its own transactions to ensure that we don't blow up
-                        * the journal.
-                        */ 
-                       if ((started_tr == 0) && (v_type == VLNK)) {
-                               if (hfs_start_transaction(hfsmp) != 0) {
-                                       error = EINVAL;
+                               /*
+                                * At this point, we have decided that this cnode is
+                                * suitable for full removal.  We are about to deallocate
+                                * its blocks and remove its entry from the catalog. 
+                                * If it was a symlink, then it's possible that the operation
+                                * which created it is still in the current transaction group
+                                * due to coalescing.  Take action here to kill the data blocks
+                                * of the symlink out of the journal before moving to 
+                                * deallocate the blocks.  We need to be in the middle of
+                                * a transaction before calling buf_iterate like this.
+                                * 
+                                * Note: we have to kill any potential symlink buffers out of 
+                                * the journal prior to deallocating their blocks.  This is so 
+                                * that we don't race with another thread that may be doing an 
+                                * an allocation concurrently and pick up these blocks. It could
+                                * generate I/O against them which could go out ahead of our journal
+                                * transaction.
+                                */
+
+                               if (hfsmp->jnl && vnode_islnk(vp)) {
+                                       buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+                               }
+
+
+                               /*
+                                * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
+                                * context because we're only removing blocks, not zero-filling new 
+                                * ones.  The C_DELETED check above makes things much simpler. 
+                                */
+                               error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx);
+                               if (error) {
                                        goto out;
                                }
-                               else {
-                                       started_tr = 1;
+                               truncated = 1;
+
+                               /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
+                               if (started_tr) {
+                                       hfs_end_transaction(hfsmp);
+                                       started_tr = false;
                                }
-                       }
 
-                       /*
-                        * At this point, we have decided that this cnode is
-                        * suitable for full removal.  We are about to deallocate
-                        * its blocks and remove its entry from the catalog. 
-                        * If it was a symlink, then it's possible that the operation
-                        * which created it is still in the current transaction group
-                        * due to coalescing.  Take action here to kill the data blocks
-                        * of the symlink out of the journal before moving to 
-                        * deallocate the blocks.  We need to be in the middle of
-                        * a transaction before calling buf_iterate like this.
+                       }
+                       
+                       /* 
+                        * Truncate away the resource fork, if we represent the data fork and
+                        * it is the last fork.  That means, by definition, the rsrc fork is not in 
+                        * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
+                        * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
+                        * to get rid of the resource fork's data. Note that because we are holding the 
+                        * cnode lock, it is impossible for a competing thread to create the resource fork
+                        * vnode from underneath us while we do this.
                         * 
-                        * Note: we have to kill any potential symlink buffers out of 
-                        * the journal prior to deallocating their blocks.  This is so 
-                        * that we don't race with another thread that may be doing an 
-                        * an allocation concurrently and pick up these blocks. It could
-                        * generate I/O against them which could go out ahead of our journal
-                        * transaction.
+                        * This is invoked via case A above only.
                         */
+                       if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
+                               struct cat_lookup_buffer *lookup_rsrc = NULL;
+                               struct cat_desc *desc_ptr = NULL;
+                               lockflags = 0;
+
+                               MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
+                               if (lookup_rsrc == NULL) {
+                                       printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
+                                       error = ENOMEM;
+                                       goto out;
+                               }
+                               else {
+                                       bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
+                               }
 
-                       if (hfsmp->jnl && vnode_islnk(vp)) {
-                               buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
-                       }
+                               if (cp->c_desc.cd_namelen == 0) {
+                                       /* Initialize the rsrc descriptor for lookup if necessary*/
+                                       MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
+                                       
+                                       lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
+                                       lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
+                                       lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+                                       lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;  
+                                       
+                                       desc_ptr = &lookup_rsrc->lookup_desc;
+                               }
+                               else {
+                                       desc_ptr = &cp->c_desc; 
+                               }
 
+                               lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 
-                       /*
-                        * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
-                        * context because we're only removing blocks, not zero-filling new 
-                        * ones.  The C_DELETED check above makes things much simpler. 
-                        */
-                       error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx);
-                       if (error) {
-                               goto out;
-                       }
-                       truncated = 1;
+                               error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, 
+                                               (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
 
-                       /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
-                       if (started_tr) {
-                               hfs_end_transaction(hfsmp);
-                               started_tr = 0;
-                       }
+                               hfs_systemfile_unlock (hfsmp, lockflags);
+                               
+                               if (error) {
+                                       FREE (lookup_rsrc, M_TEMP);
+                                       goto out;
+                               }
 
-               }
-               
-               /* 
-                * Truncate away the resource fork, if we represent the data fork and
-                * it is the last fork.  That means, by definition, the rsrc fork is not in 
-                * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
-                * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
-                * to get rid of the resource fork's data. Note that because we are holding the 
-                * cnode lock, it is impossible for a competing thread to create the resource fork
-                * vnode from underneath us while we do this.
-                * 
-                * This is invoked via case A above only.
-                */
-               if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
-                       struct cat_lookup_buffer *lookup_rsrc = NULL;
-                       struct cat_desc *desc_ptr = NULL;
-                       lockflags = 0;
-
-                       MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
-                       if (lookup_rsrc == NULL) {
-                               printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
-                               error = ENOMEM;
-                               goto out;
-                       }
-                       else {
-                               bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
-                       }
+                               /*
+                                * Make the filefork in our temporary struct look like a real 
+                                * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
+                                */
+                               rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
+                               lookup_rsrc->lookup_fork.ff_cp = cp;
 
-                       if (cp->c_desc.cd_namelen == 0) {
-                               /* Initialize the rsrc descriptor for lookup if necessary*/
-                               MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
-                               
-                               lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
-                               lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
-                               lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
-                               lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;  
-                               
-                               desc_ptr = &lookup_rsrc->lookup_desc;
-                       }
-                       else {
-                               desc_ptr = &cp->c_desc; 
-                       }
+                               /* 
+                                * If there were no errors, then we have the catalog's fork information 
+                                * for the resource fork in question.  Go ahead and delete the data in it now.
+                                */
 
-                       lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                               error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
+                               FREE(lookup_rsrc, M_TEMP);
 
-                       error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, 
-                                       (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
+                               if (error) {
+                                       goto out;
+                               }
 
-                       hfs_systemfile_unlock (hfsmp, lockflags);
-                       
-                       if (error) {
-                               FREE (lookup_rsrc, M_TEMP);
-                               goto out;
+                               /*
+                                * This fileid's resource fork extents have now been fully deleted on-disk
+                                * and this CNID is no longer valid. At this point, we should be able to
+                                * zero out cp->c_blocks to indicate there is no data left in this file.
+                                */
+                               cp->c_blocks = 0;
                        }
+               }
 
+               /*
+                * If we represent the last fork (or none in the case of a dir), 
+                * and the cnode has become open-unlinked...
+                *
+                * We check c_blocks here because it is possible in the force
+                * unmount case for the data fork to be in use but the resource
+                * fork to not be in use in which case we will truncate the 
+                * resource fork, but not the data fork.  It will get cleaned
+                * up upon next mount.
+                */
+               if (forkcount <= 1 && !cp->c_blocks) {
                        /*
-                        * Make the filefork in our temporary struct look like a real 
-                        * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
+                        * If it has EA's, then we need to get rid of them.
+                        *
+                        * Note that this must happen outside of any other transactions
+                        * because it starts/ends its own transactions and grabs its
+                        * own locks.  This is to prevent a file with a lot of attributes
+                        * from creating a transaction that is too large (which panics).
                         */
-                       rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
-                       lookup_rsrc->lookup_fork.ff_cp = cp;
+                       if (ISSET(cp->c_attr.ca_recflags, kHFSHasAttributesMask))
+                               ea_error = hfs_removeallattr(hfsmp, cp->c_fileid, &started_tr);
 
-                       /* 
-                        * If there were no errors, then we have the catalog's fork information 
-                        * for the resource fork in question.  Go ahead and delete the data in it now.
+                       /*
+                        * Remove the cnode's catalog entry and release all blocks it
+                        * may have been using.
                         */
 
-                       error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
-                       FREE(lookup_rsrc, M_TEMP);
-
-                       if (error) {
-                               goto out;
+                       /*
+                        * Mark cnode in transit so that no one can get this 
+                        * cnode from cnode hash.
+                        */
+                       // hfs_chash_mark_in_transit(hfsmp, cp);
+                       // XXXdbg - remove the cnode from the hash table since it's deleted
+                       //          otherwise someone could go to sleep on the cnode and not
+                       //          be woken up until this vnode gets recycled which could be
+                       //          a very long time...
+                       hfs_chashremove(hfsmp, cp);
+                       
+                       cp->c_flag |= C_NOEXISTS;   // XXXdbg
+                       cp->c_rdev = 0;
+                       
+                       if (!started_tr) {
+                               if (hfs_start_transaction(hfsmp) != 0) {
+                                       error = EINVAL;
+                                       goto out;
+                               }
+                               started_tr = true;
                        }
-
+                       
                        /*
-                        * This fileid's resource fork extents have now been fully deleted on-disk
-                        * and this CNID is no longer valid. At this point, we should be able to
-                        * zero out cp->c_blocks to indicate there is no data left in this file.
+                        * Reserve some space in the Catalog file.
                         */
-                       cp->c_blocks = 0;
-               }
-       }
-       
-       /*
-        * If we represent the last fork (or none in the case of a dir), 
-        * and the cnode has become open-unlinked,
-        * AND it has EA's, then we need to get rid of them.
-        *
-        * Note that this must happen outside of any other transactions
-        * because it starts/ends its own transactions and grabs its
-        * own locks.  This is to prevent a file with a lot of attributes
-        * from creating a transaction that is too large (which panics).
-        */
-    if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
-               (cp->c_flag & C_DELETED) && 
-               (forkcount <= 1)) {
-               
-        ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
-    }
-       
-       
-       /*
-        * If the cnode represented an open-unlinked file, then now
-        * actually remove the cnode's catalog entry and release all blocks
-        * it may have been using.  
-        */
-    if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
-        /*
-         * Mark cnode in transit so that no one can get this 
-         * cnode from cnode hash.
-         */
-               // hfs_chash_mark_in_transit(hfsmp, cp);
-               // XXXdbg - remove the cnode from the hash table since it's deleted
-               //          otherwise someone could go to sleep on the cnode and not
-               //          be woken up until this vnode gets recycled which could be
-               //          a very long time...
-        hfs_chashremove(hfsmp, cp);
-               
-        cp->c_flag |= C_NOEXISTS;   // XXXdbg
-        cp->c_rdev = 0;
-               
-        if (started_tr == 0) {
-            if (hfs_start_transaction(hfsmp) != 0) {
-                               error = EINVAL;
+                       if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
                                goto out;
-            }
-            started_tr = 1;
-        }
-               
-        /*
-         * Reserve some space in the Catalog file.
-         */
-        if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
-            goto out;
-        }
-        cat_reserve = 1;
-               
-        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-               
-        if (cp->c_blocks > 0) {
-            printf("hfs_inactive: deleting non-empty%sfile %d, "
-                   "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
-                   (int)cp->c_fileid, (int)cp->c_blocks);
-        }
-               
-               //
-        // release the name pointer in the descriptor so that
-        // cat_delete() will use the file-id to do the deletion.
-        // in the case of hard links this is imperative (in the
-        // case of regular files the fileid and cnid are the
-        // same so it doesn't matter).
-        //
-        cat_releasedesc(&cp->c_desc);
-               
-        /*
-         * The descriptor name may be zero,
-         * in which case the fileid is used.
-         */
-        error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
-               
-        if (error && truncated && (error != ENXIO)) {
-            printf("hfs_inactive: couldn't delete a truncated file!");
-       }
-               
-        /* Update HFS Private Data dir */
-        if (error == 0) {
-            hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
-            if (vnode_isdir(vp)) {
-                DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
-            }
-            (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
-                                                        &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
-        }
-               
-        hfs_systemfile_unlock(hfsmp, lockflags);
-               
-        if (error) {                   
-                       goto out;
-               }
-               
-#if QUOTA
-        if (hfsmp->hfs_flags & HFS_QUOTAS)
-            (void)hfs_chkiq(cp, -1, NOCRED, 0);
-#endif /* QUOTA */
-               
-        /* Already set C_NOEXISTS at the beginning of this block */
-        cp->c_flag &= ~C_DELETED;
-        cp->c_touch_chgtime = TRUE;
-        cp->c_touch_modtime = TRUE;
-               
-        if (error == 0)
-            hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
-    }
-       
-       /*
-     * A file may have had delayed allocations, in which case hfs_update
-     * would not have updated the catalog record (cat_update).  We need
-     * to do that now, before we lose our fork data.  We also need to
-     * force the update, or hfs_update will again skip the cat_update.
-        *
-        * If the file has C_NOEXISTS set, then we can skip the hfs_update call
-        * because the catalog entry has already been removed.  There would be no point
-     * to looking up the entry in the catalog to modify it when we already know it's gone
-        */
-    if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
-               ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || 
-                cp->c_touch_chgtime || cp->c_touch_modtime)) {
+                       }
+                       cat_reserve = 1;
+                       
+                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+                       
+                       if (cp->c_blocks > 0) {
+                               printf("hfs_inactive: deleting non-empty%sfile %d, "
+                                          "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+                                          (int)cp->c_fileid, (int)cp->c_blocks);
+                       }
+                       
+                       //
+                       // release the name pointer in the descriptor so that
+                       // cat_delete() will use the file-id to do the deletion.
+                       // in the case of hard links this is imperative (in the
+                       // case of regular files the fileid and cnid are the
+                       // same so it doesn't matter).
+                       //
+                       cat_releasedesc(&cp->c_desc);
                        
-                       if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
-                               cp->c_flag |= C_FORCEUPDATE;
+                       /*
+                        * The descriptor name may be zero,
+                        * in which case the fileid is used.
+                        */
+                       error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+                       
+                       if (error && truncated && (error != ENXIO)) {
+                               printf("hfs_inactive: couldn't delete a truncated file!");
                        }
-                       hfs_update(vp, 0);
+                       
+                       /* Update HFS Private Data dir */
+                       if (error == 0) {
+                               hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+                               if (vnode_isdir(vp)) {
+                                       DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+                               }
+                               (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+                                                                &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+                       }
+                       
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       
+                       if (error) {                    
+                               goto out;
+                       }
+                       
+       #if QUOTA
+                       if (hfsmp->hfs_flags & HFS_QUOTAS)
+                               (void)hfs_chkiq(cp, -1, NOCRED, 0);
+       #endif /* QUOTA */
+                       
+                       /* Already set C_NOEXISTS at the beginning of this block */
+                       cp->c_flag &= ~C_DELETED;
+                       cp->c_touch_chgtime = TRUE;
+                       cp->c_touch_modtime = TRUE;
+                       
+                       if (error == 0)
+                               hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
                }
+       } // if <open unlinked>
+
+       hfs_update(vp, reclaim ? HFS_UPDATE_FORCE : 0);
 
        /*
         * Since we are about to finish what might be an inactive call, propagate
         * any remaining modified or touch bits from the cnode to the vnode.  This
         * serves as a hint to vnode recycling that we shouldn't recycle this vnode
         * synchronously.
+        *
+        * For now, if the node *only* has a dirty atime, we don't mark
+        * the vnode as dirty.  VFS's asynchronous recycling can actually
+        * lead to worse performance than having it synchronous.  When VFS
+        * is fixed to be more performant, we can be more honest about
+        * marking vnodes as dirty when it's only the atime that's dirty.
         */
-       if (ISSET(cp->c_flag, C_MODIFIED) || ISSET(cp->c_flag, C_FORCEUPDATE) ||
-               cp->c_touch_acctime || cp->c_touch_chgtime ||
-               cp->c_touch_modtime || ISSET(cp->c_flag, C_NEEDS_DATEADDED) ||
-               ISSET(cp->c_flag, C_DELETED)) {
+       if (hfs_is_dirty(cp) == HFS_DIRTY || ISSET(cp->c_flag, C_DELETED)) {
                vnode_setdirty(vp);
        } else {
                vnode_cleardirty(vp);
@@ -613,46 +566,12 @@ out:
     if (cat_reserve)
         cat_postflight(hfsmp, &cookie, p);
        
-    // XXXdbg - have to do this because a goto could have come here
     if (started_tr) {
         hfs_end_transaction(hfsmp);
-        started_tr = 0;
+        started_tr = false;
     }
 
-#if 0
-#if CONFIG_PROTECT
-       /* 
-        * cnode truncate lock and cnode lock are both held exclusive here. 
-        *
-        * Go ahead and flush the keys out if this cnode is the last fork
-        * and it is not class F.  Class F keys should not be purged because they only
-        * exist in memory and have no persistent keys.  Only do this 
-        * if we haven't already done it yet (maybe a vnode skipped inactive 
-        * and went straight to reclaim).  This function gets called from both reclaim and
-        * inactive, so it will happen first in inactive if possible.
-        * 
-        * We need to be mindful that all pending IO for this file has already been
-        * issued and completed before we bzero out the key.  This is because
-        * if it isn't, tossing the key here could result in garbage IO being
-        * written (by using the bzero'd key) if the writes are happening asynchronously.
-        * 
-        * In addition, class A files may have already been purged due to the 
-        * lock event occurring.
-        */
-       if (forkcount == 1) {
-               struct cprotect *entry = cp->c_cpentry;
-               if ((entry) && ( CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F)) {
-                       if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
-                               cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
-                               bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
-                               bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
-                       }
-               }
-       }
-#endif
-#endif
-       
-       return error;   
+       return error;
 }
 
 
@@ -762,12 +681,8 @@ hfs_filedone(struct vnode *vp, vfs_context_t context,
        struct cnode *cp;
        struct filefork *fp;
        struct hfsmount *hfsmp;
-       struct rl_entry *invalid_range;
        off_t leof;
        u_int32_t blks, blocksize;
-       /* flags for zero-filling sparse ranges */
-       int cluster_flags = IO_CLOSE;
-       int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 
        cp = VTOC(vp);
        fp = VTOF(vp);
@@ -777,53 +692,8 @@ hfs_filedone(struct vnode *vp, vfs_context_t context,
        if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
                return (0);
 
-       if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) {
-#if CONFIG_PROTECT
-               /* 
-                * Figure out if we need to do synchronous IO. 
-                * 
-                * If the file represents a content-protected file, we may need
-                * to issue synchronous IO when we dispatch to the cluster layer.
-                * If we didn't, then the IO would go out to the disk asynchronously.
-                * If the vnode hits the end of inactive before getting reclaimed, the
-                * content protection keys would be wiped/bzeroed out, and we'd end up
-                * trying to issue the IO with an invalid key.  This will lead to file 
-                * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
-                * have completed (though they may be in the track cache).
-                */
-               if (cp_fs_protected(VTOVFS(vp))) {
-                       cluster_flags |= IO_SYNC;
-                       cluster_zero_flags |= IO_SYNC;
-               }
-#endif
-
-               hfs_unlock(cp);
-               (void) cluster_push(vp, cluster_flags);
-               hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-       }
+       hfs_flush_invalid_ranges(vp);
 
-       /*
-        * Explicitly zero out the areas of file
-        * that are currently marked invalid.
-        */
-       while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
-               off_t start = invalid_range->rl_start;
-               off_t end = invalid_range->rl_end;
-       
-               /* The range about to be written must be validated
-                * first, so that VNOP_BLOCKMAP() will return the
-                * appropriate mapping for the cluster code:
-                */
-               rl_remove(start, end, &fp->ff_invalidranges);
-
-               hfs_unlock(cp);
-               (void) cluster_write(vp, (struct uio *) 0,
-                                    leof, end + 1, start, (off_t)0, cluster_zero_flags);
-               hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-               cp->c_flag |= C_MODIFIED;
-       }
-       cp->c_flag &= ~C_ZFWANTSYNC;
-       cp->c_zftimeout = 0;
        blocksize = VTOVCB(vp)->blockSize;
        blks = leof / blocksize;
        if (((off_t)blks * (off_t)blocksize) != leof)
@@ -837,17 +707,15 @@ hfs_filedone(struct vnode *vp, vfs_context_t context,
 
        if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) {
                hfs_unlock(cp);
-               (void) cluster_push(vp, cluster_flags);
+               cluster_push(vp, IO_CLOSE);
                hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-       
+
                /*
                 * If the hfs_truncate didn't happen to flush the vnode's
                 * information out to disk, force it to be updated now that
                 * all invalid ranges have been zero-filled and validated:
                 */
-               if (cp->c_flag & C_MODIFIED) {
-                       hfs_update(vp, 0);
-               }
+               hfs_update(vp, 0);
        }
 
        return (0);
@@ -892,11 +760,13 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
        }
 
        /*
-        * Keep track of an inactive hot file.
+        * Keep track of an inactive hot file.  Don't bother on ssd's since
+        * the tracking is done differently (it's done at read() time)
         */
        if (!vnode_isdir(vp) &&
            !vnode_issystem(vp) &&
-           !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) {
+           !(cp->c_flag & (C_DELETED | C_NOEXISTS)) &&
+           !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
                (void) hfs_addhotfile(vp);
        }
        vnode_removefsref(vp);
@@ -943,7 +813,8 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
                /* Dump cached symlink data */
                if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
                        FREE(fp->ff_symlinkptr, M_TEMP);
-               }               
+               }
+               rl_remove_all(&fp->ff_invalidranges);
                FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
        }
 
@@ -953,7 +824,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
        if (reclaim_cnode) {
                hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
                hfs_unlock(cp);
-               hfs_reclaim_cnode(cp);
+               hfs_reclaim_cnode(hfsmp, cp);
        } 
        else  {
                /* 
@@ -981,7 +852,39 @@ extern int (**hfs_std_vnodeop_p) (void *);
 /*
  * hfs_getnewvnode - get new default vnode
  *
- * The vnode is returned with an iocount and the cnode locked
+ * The vnode is returned with an iocount and the cnode locked.  
+ * The cnode of the parent vnode 'dvp' may or may not be locked, depending on 
+ * the circumstances.   The cnode in question (if acquiring the resource fork),
+ * may also already be locked at the time we enter this function.
+ *
+ * Note that there are both input and output flag arguments to this function.  
+ * If one of the input flags (specifically, GNV_USE_VP), is set, then 
+ * hfs_getnewvnode will use the parameter *vpp, which is traditionally only 
+ * an output parameter, as both an input and output parameter.  It will use 
+ * the vnode provided in the output, and pass it to vnode_create with the 
+ * proper flavor so that a new vnode is _NOT_ created on our behalf when 
+ * we dispatch to VFS.  This may be important in various HFS vnode creation
+ * routines, such a create or get-resource-fork, because we risk deadlock if
+ * jetsam is involved.
+ *
+ * Deadlock potential exists if jetsam is synchronously invoked while we are waiting
+ * for a vnode to be recycled in order to give it the identity we want.  If jetsam
+ * happens to target a process for termination that is blocked in-kernel, waiting to 
+ * acquire the cnode lock on our parent 'dvp', while our current thread has it locked, 
+ * neither side will make forward progress and the watchdog timer will eventually fire. 
+ * To prevent this, a caller of hfs_getnewvnode may choose to proactively force 
+ * any necessary vnode reclamation/recycling while it is not holding any locks and 
+ * thus not prone to deadlock.  If this is the case, GNV_USE_VP will be set and
+ * the parameter will be used as described above. 
+ *
+ *  !!! <NOTE> !!!!
+ * In circumstances when GNV_USE_VP is set, this function _MUST_ clean up and either consume
+ * or dispose of the provided vnode. We funnel all errors to a single return value so that
+ * if provided_vp is still non-NULL, then we will dispose of the vnode. This will occur in
+ * all error cases of this function --  anywhere we zero/NULL out the *vpp parameter. It may 
+ * also occur if the current thread raced with another to create the same vnode, and we 
+ * find the entry already present in the cnode hash.
+ * !!! </NOTE> !!!
  */
 int
 hfs_getnewvnode(
@@ -1002,27 +905,43 @@ hfs_getnewvnode(
        struct cnode *cp = NULL;
        struct filefork *fp = NULL;
        int hfs_standard = 0;
-       int retval;
+       int retval = 0;
        int issystemfile;
        int wantrsrc;
        int hflags = 0;
+       int need_update_identity = 0;
        struct vnode_fsparam vfsp;
        enum vtype vtype;
+
+       struct vnode *provided_vp = NULL;
+
+
 #if QUOTA
        int i;
 #endif /* QUOTA */
        
        hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
 
+       if (flags & GNV_USE_VP) {
+               /* Store the provided VP for later use */
+               provided_vp = *vpp;
+       }
+
+       /* Zero out the vpp regardless of provided input */
+       *vpp = NULL;
+
+       /* Zero out the out_flags */
+       *out_flags = 0;
+
        if (attrp->ca_fileid == 0) {
-               *vpp = NULL;
-               return (ENOENT);
+               retval = ENOENT;
+               goto gnv_exit;
        }
 
 #if !FIFO
        if (IFTOVT(attrp->ca_mode) == VFIFO) {
-               *vpp = NULL;
-               return (ENOTSUP);
+               retval = ENOTSUP;
+               goto gnv_exit;
        }
 #endif /* !FIFO */
        vtype = IFTOVT(attrp->ca_mode);
@@ -1033,15 +952,13 @@ hfs_getnewvnode(
        if (vtype == VBAD) {
                /* Mark the FS as corrupt and bail out */
                hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
-               return EINVAL;
+               retval = EINVAL;
+               goto gnv_exit;
        }
-
-       /* Zero out the out_flags */
-       *out_flags = 0;
-
+       
 #ifdef HFS_CHECK_LOCK_ORDER
        /*
-        * The only case were its permissible to hold the parent cnode
+        * The only case where it's permissible to hold the parent cnode
         * lock is during a create operation (hfs_makenode) or when
         * we don't need the cnode lock (GNV_SKIPLOCK).
         */
@@ -1062,8 +979,18 @@ hfs_getnewvnode(
         * If the id is no longer valid for lookups we'll get back a NULL cp.
         */
        if (cp == NULL) {
-               return (ENOENT);
+               retval = ENOENT;
+               goto gnv_exit;
        }
+       /*
+        * We may have been provided a vnode via 
+        * GNV_USE_VP.  In this case, we have raced with
+        * a 2nd thread to create the target vnode. The provided
+        * vnode that was passed in will be dealt with at the 
+        * end of the function, as we don't zero out the field
+        * until we're ready to pass responsibility to VFS. 
+        */
+
 
        /* 
         * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the 
@@ -1083,10 +1010,28 @@ hfs_getnewvnode(
         */
        
        if (!(hfs_checkdeleted(cp))) {
+               //
+               // If the bytes of the filename in the descp do not match the bytes in the
+               // cnp (and we're not looking up the resource fork), then we want to update
+               // the vnode identity to contain the bytes that HFS stores so that when an
+               // fsevent gets generated, it has the correct filename.  otherwise daemons
+               // that match filenames produced by fsevents with filenames they have stored
+               // elsewhere (e.g. bladerunner, backupd, mds), the filenames will not match.
+               // See: <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+               // for more details.
+               //
+#ifdef CN_WANTSRSRCFORK
+               if (*vpp && cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) {
+#else
+               if (*vpp && cnp && cnp->cn_nameptr && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) {
+#endif
+                       vnode_update_identity (*vpp, dvp, (const char *)descp->cd_nameptr, descp->cd_namelen, 0, VNODE_UPDATE_NAME);
+               }
                if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
                        /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
                        if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
                                        (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
+                               
                                if ((flags & GNV_SKIPLOCK) == 0) {
                                        /* 
                                         * Then we took the lock. Drop it before calling
@@ -1100,7 +1045,7 @@ hfs_getnewvnode(
                                 * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to 
                                 * force a re-drive in the lookup routine.  
                                 * Drop the iocount on the vnode obtained from 
-                                * chash_getcnode if needed.
+                                * chash_getcnode if needed. 
                                 */     
                                if (*vpp != NULL) {
                                        vnode_put (*vpp);
@@ -1120,7 +1065,8 @@ hfs_getnewvnode(
                                }
                                
                                *out_flags = GNV_CAT_ATTRCHANGED;
-                               return ERECYCLE;        
+                               retval = ERECYCLE;
+                               goto gnv_exit;
                        }
                        else {
                                /* 
@@ -1140,18 +1086,37 @@ hfs_getnewvnode(
                                 * that the new link lived in the same directory as the alternative name for
                                 * this item.  
                                 */
-                               if ((*vpp != NULL) && (cnp)) {
+                               if ((*vpp != NULL) && (cnp || cp->c_desc.cd_nameptr)) {
                                        /* we could be requesting the rsrc of a hardlink file... */
-                                       vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash,
-                                                       (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
+#ifdef CN_WANTSRSRCFORK
+                                       if (cp->c_desc.cd_nameptr && (cnp == NULL || !(cnp->cn_flags & CN_WANTSRSRCFORK))) {
+#else
+                                       if (cp->c_desc.cd_nameptr) {
+#endif
+                                               //
+                                               // Update the identity with what we have stored on disk as
+                                               // the name of this file.  This is related to:
+                                               //    <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+                                               //
+                                               vnode_update_identity (*vpp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0,
+                                                              (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
+                                       } else if (cnp) {
+                                               vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash,
+                                                                      (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
+                                       }
                                }
                        }
                }
        }
        
-       /* Check if we found a matching vnode */
+       /* 
+        * At this point, we have performed hardlink and open-unlinked checks
+        * above.  We have now validated the state of the vnode that was given back
+        * to us from the cnode hash code and find it safe to return. 
+        */
        if (*vpp != NULL) {
-               return (0);
+               retval = 0;
+               goto gnv_exit;
        }
 
        /*
@@ -1171,7 +1136,7 @@ hfs_getnewvnode(
                                if ((flags & GNV_SKIPLOCK) == 0) {
                                        hfs_unlock(cp);
                                }
-                               hfs_reclaim_cnode(cp);
+                               hfs_reclaim_cnode(hfsmp, cp);
                                *vpp = NULL;
                                /* 
                                 * If we hit this case, that means that the entry was there in the catalog when
@@ -1185,7 +1150,8 @@ hfs_getnewvnode(
                                 */
                                if (error == ENOENT) {
                                        *out_flags = GNV_CAT_DELETED;
-                                       return ENOENT;  
+                                       retval = ENOENT;
+                                       goto gnv_exit;
                                }
 
                                /*
@@ -1196,7 +1162,8 @@ hfs_getnewvnode(
                                 */
                                if (error == ERECYCLE) {
                                        *out_flags = GNV_CAT_ATTRCHANGED;
-                                       return (ERECYCLE);
+                                       retval = ERECYCLE;
+                                       goto gnv_exit;
                                }
                        }
                }
@@ -1209,9 +1176,10 @@ hfs_getnewvnode(
                descp->cd_flags &= ~CD_HASBUF;
 
                /* Tag hardlinks */
-               if ((vtype == VREG || vtype == VDIR) &&
-                   ((descp->cd_cnid != attrp->ca_fileid) ||
-                    (attrp->ca_recflags & kHFSHasLinkChainMask))) {
+               if ((vtype == VREG || vtype == VDIR
+                        || vtype == VSOCK || vtype == VFIFO)
+                       && (descp->cd_cnid != attrp->ca_fileid
+                               || ISSET(attrp->ca_recflags, kHFSHasLinkChainMask))) {
                        cp->c_flag |= C_HARDLINK;
                }
                /*
@@ -1327,6 +1295,7 @@ hfs_getnewvnode(
                vfsp.vnfs_dvp = dvp;
                vfsp.vnfs_cnp = cnp;
        }
+
        vfsp.vnfs_fsnode = cp;
 
        /*
@@ -1359,8 +1328,23 @@ hfs_getnewvnode(
                vfsp.vnfs_filesize = 0;
 
        vfsp.vnfs_flags = VNFS_ADDFSREF;
-       if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
+#ifdef CN_WANTSRSRCFORK
+       if (cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) {
+#else
+       if (cnp && cnp->cn_nameptr && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) {
+#endif
+               //
+               // We don't want VFS to add an entry for this vnode because the name in the
+               // cnp does not match the bytes stored on disk for this file.  Instead we'll
+               // update the identity later after the vnode is created and we'll do so with
+               // the correct bytes for this filename.  For more details, see:
+               //   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+               //
+               vfsp.vnfs_flags |= VNFS_NOCACHE;
+               need_update_identity = 1;
+       } else if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) {
                vfsp.vnfs_flags |= VNFS_NOCACHE;
+       }
 
        /* Tag system files */
        vfsp.vnfs_marksystem = issystemfile;
@@ -1370,15 +1354,45 @@ hfs_getnewvnode(
                vfsp.vnfs_markroot = 1;
        else    
                vfsp.vnfs_markroot = 0;
+       
+       /*
+        * If provided_vp was non-NULL, then it is an already-allocated (but not 
+        * initialized) vnode. We simply need to initialize it to this identity.  
+        * If it was NULL, then assume that we need to call vnode_create with the 
+        * normal arguments/types.
+        */ 
+       if (provided_vp) {
+               vp = provided_vp;
+               /* 
+                * After we assign the value of provided_vp into 'vp' (so that it can be
+                * mutated safely by vnode_initialize), we can NULL it out.  At this point, the disposal
+                * and handling of the provided vnode will be the responsibility of VFS, which will
+                * clean it up and vnode_put it properly if vnode_initialize fails. 
+                */
+               provided_vp = NULL;
 
-       if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) {
-               if (fp) {
+               retval = vnode_initialize (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp);
+               /* See error handling below for resolving provided_vp */
+       }
+       else {
+               /* Do a standard vnode_create */
+               retval = vnode_create (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp);
+       }
+
+       /* 
+        * We used a local variable to hold the result of vnode_create/vnode_initialize so that
+        * on error cases in vnode_create we won't accidentally harm the cnode's fields
+        */
+       
+       if (retval) {
+               /* Clean up if we encountered an error */       
+               if (fp) {
                        if (fp == cp->c_datafork)
-                               cp->c_datafork = NULL;
+                               cp->c_datafork = NULL;
                        else
-                               cp->c_rsrcfork = NULL;
+                               cp->c_rsrcfork = NULL;
 
-                       FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
+                       FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
                }
                /*
                 * If this is a newly created cnode or a vnode reclaim
@@ -1386,7 +1400,7 @@ hfs_getnewvnode(
                 */
                if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
                        hfs_chash_abort(hfsmp, cp);
-                       hfs_reclaim_cnode(cp);
+                       hfs_reclaim_cnode(hfsmp, cp);
                } 
                else {
                        hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
@@ -1395,13 +1409,38 @@ hfs_getnewvnode(
                        }
                }
                *vpp = NULL;
-               return (retval);
+               goto gnv_exit;
        }
-       vp = *cvpp;
+
+       /* If no error, then assign the value into the cnode's fields  */       
+       *cvpp = vp;
+
        vnode_settag(vp, VT_HFS);
        if (cp->c_flag & C_HARDLINK) {
                vnode_setmultipath(vp);
        }
+
+       if (cp->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
+               vnode_setfastdevicecandidate(vp);
+       }
+
+       if (cp->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+               vnode_setautocandidate(vp);
+       }
+
+
+
+
+       if (vp && need_update_identity) {
+               //
+               // As above, update the name of the vnode if the bytes stored in hfs do not match
+               // the bytes in the cnp.  See this radar:
+               //    <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+               // for more details.
+               //
+               vnode_update_identity (vp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, VNODE_UPDATE_NAME);
+       }
+
        /*
         * Tag resource fork vnodes as needing an VNOP_INACTIVE
         * so that any deferred removes (open unlinked files)
@@ -1423,7 +1462,7 @@ hfs_getnewvnode(
        /*
         * Stop tracking an active hot file.
         */
-       if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
+       if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile && !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
                (void) hfs_removehotfile(vp);
        }
        
@@ -1435,12 +1474,19 @@ hfs_getnewvnode(
 #endif
 
        *vpp = vp;
-       return (0);
+       retval = 0;
+
+gnv_exit:
+       if (provided_vp) {
+               /* Release our empty vnode if it was not used */
+               vnode_put (provided_vp);
+       }
+       return retval;
 }
 
 
 static void
-hfs_reclaim_cnode(struct cnode *cp)
+hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *cp)
 {
 #if QUOTA
        int i;
@@ -1483,11 +1529,12 @@ hfs_reclaim_cnode(struct cnode *cp)
        }
 #endif
 #if CONFIG_PROTECT
-       cp_entry_destroy(cp->c_cpentry);
+       cp_entry_destroy(hfsmp, cp->c_cpentry);
        cp->c_cpentry = NULL;
+#else
+       (void)hfsmp;    // Prevent compiler warning
 #endif
-       
-       
+
        bzero(cp, sizeof(struct cnode));
        FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
 }
@@ -1657,24 +1704,24 @@ void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
 static u_int32_t
 hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode)
 {
-       u_int8_t *finfo = NULL;
+       const uint8_t *finfo = NULL;
        u_int32_t dateadded = 0;
 
 
 
        /* overlay the FinderInfo to the correct pointer, and advance */
-       finfo = (u_int8_t*)finderinfo + 16;
+       finfo = finderinfo + 16;
 
        /* 
         * FinderInfo is written out in big endian... make sure to convert it to host
         * native before we use it.
         */
        if (S_ISREG(mode)) {
-               struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+               const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
                dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
        }
        else if (S_ISDIR(mode)) {
-               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+               const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo;
                dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
        }
 
@@ -1812,7 +1859,7 @@ uint32_t hfs_incr_gencount (struct cnode *cp) {
                }
                extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount);
 
-               SET(cp->c_flag, C_MODIFIED);
+               SET(cp->c_flag, C_MINOR_MOD);
        }
        else {
                gcount = 0;
@@ -1829,11 +1876,11 @@ uint32_t hfs_incr_gencount (struct cnode *cp) {
 static u_int32_t
 hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode)
 {
-       u_int8_t *finfo = NULL;
+       const uint8_t *finfo = NULL;
        u_int32_t gcount = 0;
 
        /* overlay the FinderInfo to the correct pointer, and advance */
-       finfo = (u_int8_t*)finderinfo;
+       finfo = finderinfo;
        finfo = finfo + 16;
 
        /* 
@@ -1845,7 +1892,7 @@ hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode)
         *       last 32-bit word) so it is safe to have one code path here.
         */
        if (S_ISDIR(mode) || S_ISREG(mode)) {
-               struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+               const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
                gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
                
                /* 
@@ -1893,11 +1940,12 @@ void
 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 {
        vfs_context_t ctx;
-       /* don't modify times if volume is read-only */
-       if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+
+       if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY) || ISSET(cp->c_flag, C_NOEXISTS)) {
                cp->c_touch_acctime = FALSE;
                cp->c_touch_chgtime = FALSE;
                cp->c_touch_modtime = FALSE;
+               CLR(cp->c_flag, C_NEEDS_DATEADDED);
                return;
        }
 #if CONFIG_HFS_STD
@@ -1935,42 +1983,45 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
                microtime(&tv);
                    
                if (cp->c_touch_acctime) {
-                       cp->c_atime = tv.tv_sec;
                        /*
-                        * When the access time is the only thing changing
-                        * then make sure its sufficiently newer before
-                        * committing it to disk.
+                        * When the access time is the only thing changing, we
+                        * won't necessarily write it to disk immediately.  We
+                        * only do the atime update at vnode recycle time, when
+                        * fsync is called or when there's another reason to write
+                        * to the metadata.
                         */
-                       if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) >
-                             ATIME_ONDISK_ACCURACY)) {
-                               cp->c_flag |= C_MODIFIED;
-                       }
+                       cp->c_atime = tv.tv_sec;
                        cp->c_touch_acctime = FALSE;
                }
                if (cp->c_touch_modtime) {
-                       cp->c_mtime = tv.tv_sec;
                        cp->c_touch_modtime = FALSE;
-                       cp->c_flag |= C_MODIFIED;
-                       touchvol = 1;
+                       time_t new_time = tv.tv_sec;
 #if CONFIG_HFS_STD
                        /*
                         * HFS dates that WE set must be adjusted for DST
                         */
                        if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
-                               cp->c_mtime += 3600;
+                               new_time += 3600;
                        }
 #endif
+                       if (cp->c_mtime != new_time) {
+                               cp->c_mtime = new_time;
+                               cp->c_flag |= C_MINOR_MOD;
+                               touchvol = 1;
+                       }
                }
                if (cp->c_touch_chgtime) {
-                       cp->c_ctime = tv.tv_sec;
                        cp->c_touch_chgtime = FALSE;
-                       cp->c_flag |= C_MODIFIED;
-                       touchvol = 1;
+                       if (cp->c_ctime != tv.tv_sec) {
+                               cp->c_ctime = tv.tv_sec;
+                               cp->c_flag |= C_MINOR_MOD;
+                               touchvol = 1;
+                       }
                }
 
                if (cp->c_flag & C_NEEDS_DATEADDED) {
                        hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
-                       cp->c_flag |= C_MODIFIED;
+                       cp->c_flag |= C_MINOR_MOD;
                        /* untwiddle the bit */
                        cp->c_flag &= ~C_NEEDS_DATEADDED;
                        touchvol = 1;
@@ -1978,7 +2029,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 
                /* Touch the volume modtime if needed */
                if (touchvol) {
-                       MarkVCBDirty(hfsmp);
+                       hfs_note_header_minor_change(hfsmp);
                        HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
                }
        }
@@ -2000,7 +2051,12 @@ hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
        thread_t thread = current_thread();
 
        if (cp->c_lockowner == thread) {
-               /* Only the extents and bitmap files support lock recursion. */
+               /*
+                * Only the extents and bitmap files support lock recursion
+                * here.  The other system files support lock recursion in
+                * hfs_systemfile_lock.  Eventually, we should change to
+                * handle recursion solely in hfs_systemfile_lock.
+                */
                if ((cp->c_fileid == kHFSExtentsFileID) ||
                    (cp->c_fileid == kHFSAllocationFileID)) {
                        cp->c_syslockcount++;
@@ -2070,6 +2126,15 @@ hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
        return (0);
 }
 
+bool hfs_lock_upgrade(cnode_t *cp)
+{
+       if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock)) {
+               cp->c_lockowner = current_thread();
+               return true;
+       } else
+               return false;
+}
+
 /*
  * Lock a pair of cnodes.
  */
@@ -2368,6 +2433,21 @@ hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockfla
        }
 }
 
+bool hfs_truncate_lock_upgrade(struct cnode *cp)
+{
+       assert(cp->c_truncatelockowner == HFS_SHARED_OWNER);
+       if (!lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock))
+               return false;
+       cp->c_truncatelockowner = current_thread();
+       return true;
+}
+
+void hfs_truncate_lock_downgrade(struct cnode *cp)
+{
+       assert(cp->c_truncatelockowner == current_thread());
+       lck_rw_lock_exclusive_to_shared(&cp->c_truncatelock);
+       cp->c_truncatelockowner = HFS_SHARED_OWNER;
+}
 
 /*
  * Attempt to get the truncate lock.  If it cannot be acquired, error out.
index 1fa4f2d8b3212e5dd40790abd684ffafd2b708b3..d45b9d23696a6e3836f6813dc1979cef64370eee 100644 (file)
@@ -49,7 +49,7 @@
 #if CONFIG_PROTECT
 #include <sys/cprotect.h>
 #endif
-
+#include <kern/assert.h>
 
 /*
  * The filefork is used to represent an HFS file fork (data or resource).
@@ -95,9 +95,19 @@ struct cat_lookup_buffer {
 #define ff_new_size      ff_data.cf_new_size
 #define ff_clumpsize     ff_data.cf_clump
 #define ff_bytesread     ff_data.cf_bytesread
-#define ff_blocks        ff_data.cf_blocks
 #define ff_extents       ff_data.cf_extents
+
+/*
+ * Note that the blocks fields are protected by the cnode lock, *not*
+ * the truncate lock.
+ */
+#define ff_blocks        ff_data.cf_blocks
 #define ff_unallocblocks ff_data.cf_vblocks
+static inline uint32_t ff_allocblocks(filefork_t *ff)
+{
+       assert(ff->ff_blocks >= ff->ff_unallocblocks);
+       return ff->ff_blocks - ff->ff_unallocblocks;
+}
 
 #define ff_symlinkptr    ff_union.ffu_symlinkptr
 #define ff_sysfileinfo   ff_union.ffu_sysfileinfo
@@ -172,6 +182,14 @@ struct cnode {
                uint8_t c_tflags;
        };
 
+       /*
+        * Where we're using a journal, we keep track of the last
+        * transaction that we did an update in.  If a minor modification
+        * is made, we'll still push it if we're still on the same
+        * transaction.
+        */
+       uint32_t c_update_txn;
+
 #if HFS_COMPRESSION
        decmpfs_cnode  *c_decmp;
 #endif /* HFS_COMPRESSION */
@@ -229,7 +247,12 @@ typedef struct cnode cnode_t;
 #define C_DELETED           0x0000040  /* CNode has been marked to be deleted */
 #define C_HARDLINK          0x0000080  /* CNode is a hard link (file or dir) */
 
-#define C_FORCEUPDATE       0x0000100  /* force the catalog entry update */
+/*
+ * A minor modification is one where the volume would not be inconsistent if
+ * the change was not pushed to disk.  For example, changes to times.
+ */
+#define C_MINOR_MOD                    0x0000100  /* CNode has a minor modification */
+
 #define C_HASXATTRS         0x0000200  /* cnode has extended attributes */
 #define C_NEG_ENTRIES       0x0000400  /* directory has negative name entries */
 /* 
@@ -336,6 +359,37 @@ int hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int s
 
 #define ATIME_ONDISK_ACCURACY  300
 
+static inline bool hfs_should_save_atime(cnode_t *cp)
+{
+       /*
+        * We only write atime updates to disk if the delta is greater
+        * than ATIME_ONDISK_ACCURACY.
+        */
+       return (cp->c_atime < cp->c_attr.ca_atimeondisk
+                       || cp->c_atime - cp->c_attr.ca_atimeondisk > ATIME_ONDISK_ACCURACY);
+}
+
+typedef enum {
+       HFS_NOT_DIRTY   = 0,
+       HFS_DIRTY       = 1,
+       HFS_DIRTY_ATIME = 2
+} hfs_dirty_t;
+
+static inline hfs_dirty_t hfs_is_dirty(cnode_t *cp)
+{
+       if (ISSET(cp->c_flag, C_NOEXISTS))
+               return HFS_NOT_DIRTY;
+
+       if (ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED)
+               || cp->c_touch_chgtime || cp->c_touch_modtime) {
+               return HFS_DIRTY;
+       }
+
+       if (cp->c_touch_acctime || hfs_should_save_atime(cp))
+               return HFS_DIRTY_ATIME;
+
+       return HFS_NOT_DIRTY;
+}
 
 /* This overlays the FileID portion of NFS file handles. */
 struct hfsfid {
@@ -355,6 +409,7 @@ extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct com
 #define GNV_SKIPLOCK   0x02  /* Skip taking the cnode lock (when getting resource fork). */
 #define GNV_CREATE     0x04  /* The vnode is for a newly created item. */
 #define GNV_NOCACHE       0x08  /* Delay entering this item in the name cache */
+#define GNV_USE_VP     0x10  /* Use the vnode provided in *vpp instead of creating a new one */  
 
 /* Output flags for hfs_getnewvnode */
 #define GNV_CHASH_RENAMED      0x01    /* The cnode was renamed in-flight */
@@ -362,6 +417,7 @@ extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct com
 #define GNV_NEW_CNODE          0x04    /* We are vending out a newly initialized cnode */
 #define GNV_CAT_ATTRCHANGED    0x08    /* Something in struct cat_attr changed in between cat_lookups */
 
+
 /* Touch cnode times based on c_touch_xxx flags */
 extern void hfs_touchtimes(struct hfsmount *, struct cnode *);
 extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded);
@@ -421,13 +477,17 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
  *       are issues with this (see #16620278).
  *
  *        + If locking multiple cnodes then the truncate lock must be taken on
- *       both (in address order), before taking the cnode locks.
+ *       all (in address order), before taking the cnode locks.
+ *
+ *  2. Hot Files stage mutex (grabbed before manipulating individual vnodes/cnodes)
  *
- *  2. cnode lock (in parent-child order if related, otherwise by address order)
+ *  3. cnode locks in address order (if needed)
  *
- *  3. journal (if needed)
+ *  4. journal (if needed)
  *
- *  4. system files (as needed)
+ *  5. Hot Files B-Tree lock (not treated as a system file)
+ *
+ *  6. system files (as needed)
  *
  *       A. Catalog B-tree file
  *       B. Attributes B-tree file
@@ -435,7 +495,7 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
  *       D. Allocation Bitmap file (always exclusive, supports recursion)
  *       E. Overflow Extents B-tree file (always exclusive, supports recursion)
  *
- *  5. hfs mount point (always last)
+ *  7. hfs mount point (always last)
  *
  *
  * I. HFS cnode hash lock (must not acquire any new locks while holding this lock, always taken last)
@@ -494,11 +554,15 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
  *    pages, we will deadlock.  (See #16620278.)
  *
  *  + If you do anything that requires blocks to not be deleted or
- *    encrpytion keys to remain valid, you must take the truncate lock
+ *    encryption keys to remain valid, you must take the truncate lock
  *    shared.
  *
  *  + And it follows therefore, that if you want to delete blocks or
- *    delete keys, you must take the truncate lock exclusively.
+ *    delete keys, you must take the truncate lock exclusively.  Note 
+ *    that for asynchronous writes, the truncate lock will be dropped 
+ *    after issuing I/O but before the I/O has completed which means
+ *    that before manipulating keys, you *must* issue
+ *    vnode_wait_for_writes in addition to holding the truncate lock.
  *
  * N.B. ff_size is actually protected by the cnode lock and so you
  * must hold the cnode lock exclusively to change it and shared to
@@ -524,18 +588,23 @@ enum hfs_lockflags {
 
 void hfs_lock_always(cnode_t *cnode, enum hfs_locktype);
 int hfs_lock(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
+bool hfs_lock_upgrade(cnode_t *cp);
 int hfs_lockpair(struct cnode *, struct cnode *, enum hfs_locktype);
 int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *,
                         enum hfs_locktype, struct cnode **);
-
 void hfs_unlock(struct cnode *);
 void hfs_unlockpair(struct cnode *, struct cnode *);
 void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *);
 
 void hfs_lock_truncate(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
+bool hfs_truncate_lock_upgrade(struct cnode *cp);
+void hfs_truncate_lock_downgrade(struct cnode *cp);
 void hfs_unlock_truncate(struct cnode *, enum hfs_lockflags);
 int hfs_try_trunclock(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
 
+extern int  hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype);
+extern void hfs_systemfile_unlock(struct hfsmount *, int);
+
 void hfs_clear_might_be_dirty_flag(cnode_t *cp);
 
 // cnode must be locked
index ebb58b7fff669c8e2bbf25f4d22c0cb98b6b2d3b..963305e02374ad67e8706734746bc808bdc17dcc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -25,8 +25,8 @@
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#include <sys/cprotect.h>
-#include <sys/mman.h>
+#if CONFIG_PROTECT
+
 #include <sys/mount.h>
 #include <sys/random.h>
 #include <sys/xattr.h>
 #include <sys/vnode_internal.h>
 #include <sys/fcntl.h>
 #include <libkern/OSByteOrder.h>
+#include <libkern/crypto/sha1.h>
 #include <sys/proc.h>
 #include <sys/kauth.h>
 
 #include "hfs.h"
 #include "hfs_cnode.h"
 #include "hfs_fsctl.h"
+#include "hfs_cprotect.h"
 
-#if CONFIG_PROTECT
-/* 
- * The wrap function pointers and the variable to indicate if they 
+
+#define PTR_ADD(type, base, offset)            (type)((uintptr_t)(base) + (offset))
+
+/*
+ * The wrap function pointers and the variable to indicate if they
  * are initialized are system-wide, and hence are defined globally.
  */ 
 static struct cp_wrap_func g_cp_wrap_func = {};
@@ -58,27 +62,15 @@ extern int (**hfs_vnodeop_p) (void *);
  */
 static int cp_root_major_vers(mount_t mp);
 static int cp_getxattr(cnode_t *, struct hfsmount *hfsmp, struct cprotect **);
-static struct cprotect *cp_entry_alloc(size_t);
-static void cp_entry_dealloc(struct cprotect *entry);
+static void cp_entry_dealloc(hfsmount_t *hfsmp, struct cprotect *entry);
 static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp, struct cnode *);
 static int cp_lock_vfs_callback(mount_t, void *);
 static int cp_lock_vnode_callback(vnode_t, void *);
 static int cp_vnode_is_eligible (vnode_t);
 static int cp_check_access (cnode_t *cp, struct hfsmount *hfsmp, int vnop);
-static int cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, 
-               uint32_t flags, struct cprotect **output_entry);
-static int cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass);
 static int cp_unwrap(struct hfsmount *, struct cprotect *, struct cnode *);
-static int cp_setup_aes_ctx(struct cprotect *entry);
 static void cp_init_access(cp_cred_t access, struct cnode *cp);
 
-static inline int cp_get_crypto_generation (uint32_t protclass) {
-       if (protclass & CP_CRYPTO_G1) {
-               return 1;
-       }       
-       else return 0;
-}
-
 
 #if DEVELOPMENT || DEBUG
 #define CP_ASSERT(x)           \
@@ -89,6 +81,294 @@ static inline int cp_get_crypto_generation (uint32_t protclass) {
 #define CP_ASSERT(x)
 #endif
 
+// -- cpx_t accessors --
+
+size_t cpx_size(size_t key_size)
+{
+       size_t size = sizeof(struct cpx) + key_size;
+
+#if DEBUG
+       size += 4; // Extra for magic
+#endif
+
+       return size;
+}
+
+static size_t cpx_sizex(const struct cpx *cpx)
+{
+       return cpx_size(cpx->cpx_max_key_len);
+}
+
+cpx_t cpx_alloc(size_t key_len)
+{
+       cpx_t cpx;
+
+       MALLOC(cpx, cpx_t, cpx_size(key_len), M_TEMP, M_WAITOK);
+
+       cpx_init(cpx, key_len);
+
+       return cpx;
+}
+
+#if DEBUG
+static const uint32_t cpx_magic1 = 0x7b787063;         // cpx{
+static const uint32_t cpx_magic2 = 0x7870637d;         // }cpx
+#endif
+
+void cpx_free(cpx_t cpx)
+{
+#if DEBUG
+       assert(cpx->cpx_magic1 == cpx_magic1);
+       assert(*PTR_ADD(uint32_t *, cpx, cpx_sizex(cpx) - 4) == cpx_magic2);
+#endif
+       bzero(cpx->cpx_cached_key, cpx->cpx_max_key_len);
+       FREE(cpx, M_TEMP);
+}
+
+void cpx_init(cpx_t cpx, size_t key_len)
+{
+#if DEBUG
+       cpx->cpx_magic1 = cpx_magic1;
+       *PTR_ADD(uint32_t *, cpx, cpx_size(key_len) - 4) = cpx_magic2;
+#endif
+       cpx->cpx_flags = 0;
+       cpx->cpx_key_len = 0;
+       cpx->cpx_max_key_len = key_len;
+}
+
+bool cpx_is_sep_wrapped_key(const struct cpx *cpx)
+{
+       return ISSET(cpx->cpx_flags, CPX_SEP_WRAPPEDKEY);
+}
+
+void cpx_set_is_sep_wrapped_key(struct cpx *cpx, bool v)
+{
+       if (v)
+               SET(cpx->cpx_flags, CPX_SEP_WRAPPEDKEY);
+       else
+               CLR(cpx->cpx_flags, CPX_SEP_WRAPPEDKEY);
+}
+
+bool cpx_use_offset_for_iv(const struct cpx *cpx)
+{
+       return ISSET(cpx->cpx_flags, CPX_USE_OFFSET_FOR_IV);
+}
+
+void cpx_set_use_offset_for_iv(struct cpx *cpx, bool v)
+{
+       if (v)
+               SET(cpx->cpx_flags, CPX_USE_OFFSET_FOR_IV);
+       else
+               CLR(cpx->cpx_flags, CPX_USE_OFFSET_FOR_IV);
+}
+
+uint16_t cpx_max_key_len(const struct cpx *cpx)
+{
+       return cpx->cpx_max_key_len;
+}
+
+uint16_t cpx_key_len(const struct cpx *cpx)
+{
+       return cpx->cpx_key_len;
+}
+
+void cpx_set_key_len(struct cpx *cpx, uint16_t key_len)
+{
+       cpx->cpx_key_len = key_len;
+
+       if (ISSET(cpx->cpx_flags, CPX_IV_AES_CTX_HFS)) {
+               /*
+                * We assume that if the key length is being modified, the key
+                * has changed.  As a result, un-set any bits related to the
+                * AES context, if needed. They should be re-generated
+                * on-demand.
+                */ 
+               CLR(cpx->cpx_flags, CPX_IV_AES_CTX_INITIALIZED | CPX_IV_AES_CTX_HFS);
+       }
+}
+
+bool cpx_has_key(const struct cpx *cpx)
+{
+       return cpx->cpx_key_len > 0;
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+void *cpx_key(const struct cpx *cpx)
+{
+       return (void *)cpx->cpx_cached_key;
+}
+#pragma clang diagnostic pop
+
+static void cpx_set_aes_iv_key(struct cpx *cpx, void *iv_key)
+{
+       aes_encrypt_key128(iv_key, &cpx->cpx_iv_aes_ctx);
+       SET(cpx->cpx_flags, CPX_IV_AES_CTX_INITIALIZED | CPX_USE_OFFSET_FOR_IV);
+       CLR(cpx->cpx_flags, CPX_IV_AES_CTX_HFS);
+}
+
+aes_encrypt_ctx *cpx_iv_aes_ctx(struct cpx *cpx)
+{
+       if (ISSET(cpx->cpx_flags, CPX_IV_AES_CTX_INITIALIZED))
+               return &cpx->cpx_iv_aes_ctx;
+       SHA1_CTX sha1ctxt;
+       uint8_t digest[SHA_DIGEST_LENGTH]; /* Kiv */
+
+       /* First init the cp_cache_iv_key[] */
+       SHA1Init(&sha1ctxt);
+       /*
+        * We can only use this when the keys are generated in the AP; As a result
+        * we only use the first 32 bytes of key length in the cache key 
+        */
+       SHA1Update(&sha1ctxt, cpx->cpx_cached_key, cpx->cpx_key_len);
+       SHA1Final(digest, &sha1ctxt);
+
+       cpx_set_aes_iv_key(cpx, digest);
+       SET(cpx->cpx_flags, CPX_IV_AES_CTX_HFS);
+
+       return &cpx->cpx_iv_aes_ctx;
+}
+
+static void cpx_flush(cpx_t cpx)
+{
+       bzero(cpx->cpx_cached_key, cpx->cpx_max_key_len);
+       bzero(&cpx->cpx_iv_aes_ctx, sizeof(cpx->cpx_iv_aes_ctx));
+       cpx->cpx_flags = 0;
+       cpx->cpx_key_len = 0;
+}
+
+static bool cpx_can_copy(const struct cpx *src, const struct cpx *dst)
+{
+       return src->cpx_key_len <= dst->cpx_max_key_len;
+}
+
+void cpx_copy(const struct cpx *src, cpx_t dst)
+{
+       uint16_t key_len = cpx_key_len(src);
+       cpx_set_key_len(dst, key_len);
+       memcpy(cpx_key(dst), cpx_key(src), key_len);
+       dst->cpx_flags = src->cpx_flags;
+       if (ISSET(dst->cpx_flags, CPX_IV_AES_CTX_INITIALIZED))
+               dst->cpx_iv_aes_ctx = src->cpx_iv_aes_ctx;
+}
+
+// -- cp_key_pair accessors --
+
+void cpkp_init(cp_key_pair_t *cpkp, uint16_t max_pers_key_len,
+                          uint16_t max_cached_key_len)
+{
+       cpkp->cpkp_max_pers_key_len = max_pers_key_len;
+       cpkp->cpkp_pers_key_len = 0;
+       cpx_init(&cpkp->cpkp_cpx, max_cached_key_len);
+
+       // Default to using offsets
+       cpx_set_use_offset_for_iv(&cpkp->cpkp_cpx, true);
+}
+
+uint16_t cpkp_max_pers_key_len(const cp_key_pair_t *cpkp)
+{
+       return cpkp->cpkp_max_pers_key_len;
+}
+
+uint16_t cpkp_pers_key_len(const cp_key_pair_t *cpkp)
+{
+       return cpkp->cpkp_pers_key_len;
+}
+
+static bool cpkp_has_pers_key(const cp_key_pair_t *cpkp)
+{
+       return cpkp->cpkp_pers_key_len > 0;
+}
+
+static void *cpkp_pers_key(const cp_key_pair_t *cpkp)
+{
+       return PTR_ADD(void *, &cpkp->cpkp_cpx, cpx_sizex(&cpkp->cpkp_cpx));
+}
+
+static void cpkp_set_pers_key_len(cp_key_pair_t *cpkp, uint16_t key_len)
+{
+       if (key_len > cpkp->cpkp_max_pers_key_len)
+               panic("hfs_cprotect: key too big!");
+       cpkp->cpkp_pers_key_len = key_len;
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+cpx_t cpkp_cpx(const cp_key_pair_t *cpkp)
+{
+       // Cast to remove const qualifier
+       return (cpx_t)&cpkp->cpkp_cpx;
+}
+#pragma clang diagnostic pop
+
+size_t cpkp_size(uint16_t pers_key_len, uint16_t cached_key_len)
+{
+       return (sizeof(cp_key_pair_t) - sizeof(struct cpx)
+                       + pers_key_len + cpx_size(cached_key_len));
+}
+
+size_t cpkp_sizex(const cp_key_pair_t *cpkp)
+{
+       return cpkp_size(cpkp->cpkp_max_pers_key_len, cpkp->cpkp_cpx.cpx_max_key_len);
+}
+
+void cpkp_flush(cp_key_pair_t *cpkp)
+{
+       cpx_flush(&cpkp->cpkp_cpx);
+       cpkp->cpkp_pers_key_len = 0;
+       bzero(cpkp_pers_key(cpkp), cpkp->cpkp_max_pers_key_len);
+}
+
+bool cpkp_can_copy(const cp_key_pair_t *src, const cp_key_pair_t *dst)
+{
+       return (cpkp_pers_key_len(src) <= dst->cpkp_max_pers_key_len
+                       && cpx_can_copy(&src->cpkp_cpx, &dst->cpkp_cpx));
+}
+
+void cpkp_copy(const cp_key_pair_t *src, cp_key_pair_t *dst)
+{
+       const uint16_t key_len = cpkp_pers_key_len(src);
+       cpkp_set_pers_key_len(dst, key_len);
+       memcpy(cpkp_pers_key(dst), cpkp_pers_key(src), key_len);
+       cpx_copy(&src->cpkp_cpx, &dst->cpkp_cpx);
+}
+
+// --
+
+bool cp_is_supported_version(uint16_t vers)
+{
+       return vers == CP_VERS_4 || vers == CP_VERS_5;
+}
+
+/*
+ * Return the appropriate key and, if requested, the physical offset and
+ * maximum length for a particular I/O operation.
+ */
+void cp_io_params(__unused hfsmount_t *hfsmp, cprotect_t cpr,
+                                 __unused off_rsrc_t off_rsrc,
+                                 __unused int direction, cp_io_params_t *io_params)
+{
+
+       io_params->max_len = INT64_MAX;
+       io_params->phys_offset = -1;
+       io_params->cpx = cpkp_cpx(&cpr->cp_keys);
+}
+
+static void cp_flush_cached_keys(cprotect_t cpr)
+{
+       cpx_flush(cpkp_cpx(&cpr->cp_keys));
+}
+
+static bool cp_needs_pers_key(cprotect_t cpr)
+{
+       if (CP_CLASS(cpr->cp_pclass) == PROTECTION_CLASS_F)
+               return !cpx_has_key(cpkp_cpx(&cpr->cp_keys));
+       else
+               return !cpkp_has_pers_key(&cpr->cp_keys);
+}
+
 int
 cp_key_store_action(int action)
 {
@@ -107,8 +387,7 @@ cp_key_store_action(int action)
         * Upcast the value in 'action' to be a pointer-width unsigned integer.
         * This avoids issues relating to pointer-width. 
         */
-       unsigned long action_arg = (unsigned long) action;
-       return vfs_iterate(0, cp_lock_vfs_callback, (void*)action_arg);
+       return vfs_iterate(0, cp_lock_vfs_callback, (void*)(uintptr_t)action);
 }
 
 
@@ -128,6 +407,19 @@ cp_register_wraps(cp_wrap_func_t key_store_func)
        return 0;
 }
 
+static cp_key_revision_t cp_initial_key_revision(__unused hfsmount_t *hfsmp)
+{
+       return 1;
+}
+
+cp_key_revision_t cp_next_key_revision(cp_key_revision_t rev)
+{
+       rev = (rev + 0x0100) ^ (mach_absolute_time() & 0xff);
+       if (!rev)
+               rev = 1;
+       return rev;
+}
+
 /*
  * Allocate and initialize a cprotect blob for a new cnode.
  * Called from hfs_getnewvnode: cnode is locked exclusive.
@@ -171,22 +463,7 @@ cp_entry_init(struct cnode *cp, struct mount *mp)
        CP_ASSERT (cp->c_cpentry == NULL);
 
        error = cp_getxattr(cp, hfsmp, &entry);
-       if (error == 0) {
-               /* 
-                * Success; attribute was found, though it may not have keys.
-                * If the entry is not returned without keys, we will delay generating
-                * keys until the first I/O.
-                */
-               if (S_ISREG(cp->c_mode)) {
-                       if (entry->cp_flags & CP_NEEDS_KEYS) {
-                               entry->cp_flags &= ~CP_KEY_FLUSHED;
-                       }
-                       else {
-                               entry->cp_flags |= CP_KEY_FLUSHED;
-                       }
-               }
-       } 
-       else if (error == ENOATTR) {
+       if (error == ENOATTR) {
                /*
                 * Normally, we should always have a CP EA for a file or directory that
                 * we are initializing here. However, there are some extenuating circumstances,
@@ -195,15 +472,21 @@ cp_entry_init(struct cnode *cp, struct mount *mp)
                 * As a result, we leave code here to deal with an ENOATTR which will always
                 * default to a 'D/NONE' key, though we don't expect to use it much.
                 */
-               int target_class = PROTECTION_CLASS_D;
-               
+               cp_key_class_t target_class = PROTECTION_CLASS_D;
+
                if (S_ISDIR(cp->c_mode)) {
                        target_class = PROTECTION_CLASS_DIR_NONE;
-               }       
+               }
+
+               cp_key_revision_t key_revision = cp_initial_key_revision(hfsmp);
+
                /* allow keybag to override our class preferences */
-               uint32_t keyflags = CP_KEYWRAP_DIFFCLASS;
-               error = cp_new (target_class, hfsmp, cp, cp->c_mode, keyflags, &entry);
+               error = cp_new (&target_class, hfsmp, cp, cp->c_mode, CP_KEYWRAP_DIFFCLASS,
+                                               key_revision, (cp_new_alloc_fn)cp_entry_alloc, (void **)&entry);
                if (error == 0) {
+                       entry->cp_pclass = target_class;
+                       entry->cp_key_os_version = cp_os_version();
+                       entry->cp_key_revision = key_revision;
                        error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE);
                }
        }
@@ -226,7 +509,7 @@ out:
        }
        else {
                if (entry) {
-                       cp_entry_destroy(entry);
+                       cp_entry_destroy(hfsmp, entry);
                }
                cp->c_cpentry = NULL;
        }
@@ -246,8 +529,9 @@ out:
  * keys for us, we could have to fail the open(2) call and back out the entry.
  */
 
-int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, 
-               mode_t cmode, struct cprotect **tmpentry) 
+int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp,
+                                          cp_key_class_t suppliedclass, mode_t cmode,
+                                          struct cprotect **tmpentry)
 {
        int isdir = 0;
        struct cprotect *entry = NULL;
@@ -309,7 +593,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli
        }
 
        /* Generate the cprotect to vend out */
-       entry = cp_entry_alloc (0);
+       entry = cp_entry_alloc(NULL, 0, 0, NULL);
        if (entry == NULL) {
                *tmpentry = NULL;
                return ENOMEM;
@@ -320,7 +604,7 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli
         * this blob has no keys and it has no backing xattr.  We just know the
         * target class.
         */
-       entry->cp_flags = (CP_NEEDS_KEYS | CP_NO_XATTR);
+       entry->cp_flags = CP_NO_XATTR;
        /* Note this is only the effective class */
        entry->cp_pclass = target_class;
        *tmpentry = entry;
@@ -328,34 +612,6 @@ int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppli
        return 0;
 }
 
-
-/*
- * cp_needs_tempkeys
- * 
- * Relay to caller whether or not the filesystem should generate temporary keys
- * during resize operations.
- */
-
-int cp_needs_tempkeys (struct hfsmount *hfsmp, int *needs) 
-{
-
-       if (hfsmp->hfs_running_cp_major_vers < CP_PREV_MAJOR_VERS || 
-                       hfsmp->hfs_running_cp_major_vers > CP_NEW_MAJOR_VERS)  {
-               return -1;
-       }
-
-       /* CP_NEW_MAJOR_VERS implies CP_OFF_IV_ENABLED */
-       if (hfsmp->hfs_running_cp_major_vers < CP_NEW_MAJOR_VERS) {
-               *needs = 0;
-       }
-       else {
-               *needs = 1;
-       }
-
-       return 0;
-}
-
-
 /*
  * Set up an initial key/class pair for a disassociated cprotect entry.
  * This function is used to generate transient keys that will never be
@@ -367,41 +623,17 @@ int cp_needs_tempkeys (struct hfsmount *hfsmp, int *needs)
  * where we may rely on AES symmetry to relocate encrypted data from
  * one spot in the disk to another.
  */
-int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) 
+int cpx_gentempkeys(cpx_t *pcpx, __unused struct hfsmount *hfsmp)
 {
+       cpx_t cpx = cpx_alloc(CP_MAX_KEYSIZE);
 
-       struct cprotect *entry = NULL;
-
-       if (hfsmp->hfs_running_cp_major_vers < CP_NEW_MAJOR_VERS) {
-               return EPERM;
-       }
-
-       /*
-        * This should only be  used for files and won't be written out.  
-        * We don't need a persistent key.
-        */
-       entry = cp_entry_alloc (0);
-       if (entry == NULL) {
-               *entry_ptr = NULL;
-               return ENOMEM;
-       }
-       /* This is generated in-kernel so we leave it at the max key*/
-       entry->cp_cache_key_len = CP_MAX_KEYSIZE;
-
-       /* This pclass is only the effective class */
-       entry->cp_pclass = PROTECTION_CLASS_F;
-       entry->cp_persistent_key_len = 0;
+       cpx_set_key_len(cpx, CP_MAX_KEYSIZE);
+       read_random(cpx_key(cpx), CP_MAX_KEYSIZE);
+       cpx_set_use_offset_for_iv(cpx, true);
 
-       /* Generate the class F key */
-       read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len);
+       *pcpx = cpx;
 
-       /* Generate the IV key */
-       cp_setup_aes_ctx(entry);
-       entry->cp_flags |= CP_OFF_IV_ENABLED;
-
-       *entry_ptr = entry;
        return 0;
-
 }
 
 /*
@@ -409,13 +641,13 @@ int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp)
  * Called at hfs_reclaim_cnode: cnode is locked exclusive.
  */
 void
-cp_entry_destroy(struct cprotect *entry_ptr) 
+cp_entry_destroy(hfsmount_t *hfsmp, struct cprotect *entry_ptr)
 {
        if (entry_ptr == NULL) {
                /* nothing to clean up */
                return;
        }
-       cp_entry_dealloc(entry_ptr);
+       cp_entry_dealloc(hfsmp, entry_ptr);
 }
 
 
@@ -511,7 +743,6 @@ cp_vnode_getclass(struct vnode *vp, int *class)
        return error;
 }
 
-
 /*
  * Sets persistent class for this file or directory.
  * If vnode cannot be protected (system file, non-regular file, non-hfs), EBADF.
@@ -592,61 +823,80 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass)
                 * target class (since B allows I/O but an unwrap prior to the next unlock
                 * will not be allowed).
                 */
-               if (entry->cp_flags & CP_KEY_FLUSHED) {
+               if (!cpx_has_key(&entry->cp_keys.cpkp_cpx)) {
                        error = cp_restore_keys (entry, hfsmp, cp);
                        if (error) {
                                goto out;
                        }
                }
+
                if (newclass == PROTECTION_CLASS_F) {
                        /* Verify that file is blockless if switching to class F */
                        if (cp->c_datafork->ff_size > 0) {
                                error = EINVAL;
-                               goto out;       
+                               goto out;
                        }
 
-                       /* newclass is only the effective class */
-                       entry->cp_pclass = newclass;
+                       cp_key_pair_t *cpkp;
+                       cprotect_t new_entry = cp_entry_alloc(NULL, 0, CP_MAX_KEYSIZE, &cpkp);
 
-                       /* Class F files are not wrapped, so they continue to use MAX_KEYSIZE */
-                       entry->cp_cache_key_len = CP_MAX_KEYSIZE;
-                       read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len);
-                       if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) {
-                               cp_setup_aes_ctx (entry);
-                               entry->cp_flags |= CP_OFF_IV_ENABLED;
-                       }       
-                       bzero(entry->cp_persistent_key, entry->cp_persistent_key_len);
-                       entry->cp_persistent_key_len = 0;
-               } else {
-                       /* Deny the setclass if file is to be moved from F to something else */
-                       if (entry->cp_pclass == PROTECTION_CLASS_F) {
-                               error = EPERM;
-                               goto out;
-                       }
-                       /* We cannot call cp_rewrap unless the keys were already in existence. */
-                       if (entry->cp_flags & CP_NEEDS_KEYS) {
-                               struct cprotect *newentry = NULL;
-                               /* 
-                                * We want to fail if we can't wrap to the target class. By not setting
-                                * CP_KEYWRAP_DIFFCLASS, we tell keygeneration that if it can't wrap 
-                                * to 'newclass' then error out.
-                                */
-                               uint32_t flags = 0;
-                               error = cp_generate_keys (hfsmp, cp, newclass, flags,  &newentry);
-                               if (error == 0) {
-                                       cp_replace_entry (cp, newentry);
-                               }
-                               /* Bypass the setxattr code below since generate_keys does it for us */
+                       if (!new_entry) {
+                               error = ENOMEM;
                                goto out;
                        }
-                       else {
-                               error = cp_rewrap(cp, hfsmp, newclass);
+
+                       /* newclass is only the effective class */
+                       new_entry->cp_pclass = newclass;
+                       new_entry->cp_key_os_version = cp_os_version();
+                       new_entry->cp_key_revision = cp_next_key_revision(entry->cp_key_revision);
+
+                       cpx_t cpx = cpkp_cpx(cpkp);
+
+                       /* Class F files are not wrapped, so they continue to use MAX_KEYSIZE */
+                       cpx_set_key_len(cpx, CP_MAX_KEYSIZE);
+                       read_random (cpx_key(cpx), CP_MAX_KEYSIZE);
+
+                       cp_replace_entry(hfsmp, cp, new_entry);
+
+                       error = 0;
+                       goto out;
+               }
+
+               /* Deny the setclass if file is to be moved from F to something else */
+               if (entry->cp_pclass == PROTECTION_CLASS_F) {
+                       error = EPERM;
+                       goto out;
+               }
+
+               if (!cpkp_has_pers_key(&entry->cp_keys)) {
+                       struct cprotect *new_entry = NULL;
+                       /*
+                        * We want to fail if we can't wrap to the target class. By not setting
+                        * CP_KEYWRAP_DIFFCLASS, we tell keygeneration that if it can't wrap
+                        * to 'newclass' then error out.
+                        */
+                       uint32_t flags = 0;
+                       error = cp_generate_keys (hfsmp, cp, newclass, flags,  &new_entry);
+                       if (error == 0) {
+                               cp_replace_entry (hfsmp, cp, new_entry);
                        }
+                       /* Bypass the setxattr code below since generate_keys does it for us */
+                       goto out;
                }
+
+               cprotect_t new_entry;
+               error = cp_rewrap(cp, hfsmp, &newclass, &entry->cp_keys, entry,
+                                                 (cp_new_alloc_fn)cp_entry_alloc, (void **)&new_entry);
                if (error) {
                        /* we didn't have perms to set this class. leave file as-is and error out */
                        goto out;
                }
+
+
+               new_entry->cp_pclass = newclass;
+
+               cp_replace_entry(hfsmp, cp, new_entry);
+               entry = new_entry;
        }
        else if (vnode_isdir(vp)) {
                /* For directories, just update the pclass.  newclass is only effective class */
@@ -722,16 +972,6 @@ int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len)
                goto out;
        }
 
-       if ((entry->cp_flags & CP_NEEDS_KEYS)) {
-               /*
-                * If we are transcoding keys for AKB, then we should have already established
-                * a set of keys for this vnode. IF we don't have keys yet, then something bad
-                * happened.
-                */
-               error = EINVAL;
-               goto out;
-       }
-
        /* Send the per-file key in wrapped form for re-wrap with the current class information
         * Send NULLs in the output parameters of the wrapper() and AKS will do the rest.
         * Don't need to process any outputs, so just clear the locks and pass along the error. */
@@ -749,8 +989,18 @@ int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len)
 
                bzero(&wrapped_key_in, sizeof(wrapped_key_in));
                bzero(&wrapped_key_out, sizeof(wrapped_key_out));
-               wrapped_key_in.key = entry->cp_persistent_key;
-               wrapped_key_in.key_len = entry->cp_persistent_key_len;
+
+               cp_key_pair_t *cpkp = &entry->cp_keys;
+
+
+               wrapped_key_in.key = cpkp_pers_key(cpkp);
+               wrapped_key_in.key_len = cpkp_pers_key_len(cpkp);
+
+               if (!wrapped_key_in.key_len) {
+                       error = EINVAL;
+                       goto out;
+               }
+
                /* Use the actual persistent class when talking to AKS */
                wrapped_key_in.dp_class = entry->cp_pclass;
                wrapped_key_out.key = key;
@@ -863,7 +1113,8 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
 
        if ((error = cp_check_access(cp, hfsmp, vnop))) {
                /* check for raw encrypted access before bailing out */
-               if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) {
+               if ((ioflag & IO_ENCRYPTED)
+                       && (vnop == CP_READ_ACCESS)) {
                        /*
                         * read access only + asking for the raw encrypted bytes
                         * is legitimate, so reset the error value to 0
@@ -875,9 +1126,15 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
                }
        }
 
-       if (entry->cp_flags == 0) {
-               /* no more work to do */
-               goto out;
+       if (!ISSET(entry->cp_flags, CP_NO_XATTR)) {
+               if (!S_ISREG(cp->c_mode))
+                       goto out;
+
+               // If we have a persistent key and the cached key, we're done
+               if (!cp_needs_pers_key(entry)
+                       && cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
+                       goto out;
+               }
        }
 
        /* upgrade to exclusive lock */
@@ -890,7 +1147,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
        }
 
        /* generate new keys if none have ever been saved */
-       if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+       if (cp_needs_pers_key(entry)) {
                struct cprotect *newentry = NULL;
                /* 
                 * It's ok if this ends up being wrapped in a different class than 'pclass'.
@@ -900,7 +1157,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
 
                error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry);     
                if (error == 0) {
-                       cp_replace_entry (cp, newentry);
+                       cp_replace_entry (hfsmp, cp, newentry);
                        entry = newentry;
                }
                else {
@@ -909,7 +1166,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
        }
 
        /* unwrap keys if needed */
-       if (entry->cp_flags & CP_KEY_FLUSHED) {
+       if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
                if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) {
                        /* no need to try to restore keys; they are not going to be used */
                        error = 0;
@@ -932,6 +1189,30 @@ out:
        return error;
 }
 
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+static void cp_log_eperm (struct vnode* vp, int pclass, boolean_t create) {
+       char procname[256] = {};
+       const char *fname = "unknown";
+       const char *dbgop = "open";
+
+       int ppid = proc_selfpid();
+       /* selfname does a strlcpy so we're OK */
+       proc_selfname(procname, sizeof(procname));
+       if (vp && vp->v_name) {
+               /* steal from the namecache */
+               fname = vp->v_name;
+       }
+
+       if (create) {
+               dbgop = "create";       
+       }
+       
+       printf("proc %s (pid %d) class %d, op: %s failure @ file %s\n", procname, ppid, pclass, dbgop, fname);
+}
+#endif
+#endif
+
 
 int
 cp_handle_open(struct vnode *vp, int mode)
@@ -951,8 +1232,17 @@ cp_handle_open(struct vnode *vp, int mode)
                return 0;
        }
 
-       /* We know the vnode is in a valid state. Acquire cnode and validate */
        cp = VTOC(vp);
+
+       // Allow if raw encrypted mode requested
+       if (ISSET(mode, FENCRYPTED)) {
+               return 0;
+       }
+       if (ISSET(mode, FUNENCRYPTED)) {
+               return 0;
+       }
+
+       /* We know the vnode is in a valid state. Acquire cnode and validate */
        hfsmp = VTOHFS(vp);
 
        if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
@@ -977,13 +1267,13 @@ cp_handle_open(struct vnode *vp, int mode)
        /*
         * Does the cnode have keys yet?  If not, then generate them.
         */
-       if (entry->cp_flags & CP_NEEDS_KEYS) {
+       if (cp_needs_pers_key(entry)) {
                struct cprotect *newentry = NULL;
                /* Allow the keybag to override our class preferences */
                uint32_t flags = CP_KEYWRAP_DIFFCLASS;
                error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry);
                if (error == 0) {
-                       cp_replace_entry (cp, newentry);
+                       cp_replace_entry (hfsmp, cp, newentry);
                        entry = newentry;
                }       
                else {
@@ -1011,7 +1301,7 @@ cp_handle_open(struct vnode *vp, int mode)
                                break;
                        }
                        
-                       if ((entry->cp_flags & CP_KEY_FLUSHED) == 0) {
+                       if (cpx_has_key(cpkp_cpx(&entry->cp_keys)) && !ISSET(mode, FENCRYPTED)) {
                                /*
                                 * For a class B file, attempt the unwrap if we have the key in
                                 * core already. 
@@ -1024,8 +1314,8 @@ cp_handle_open(struct vnode *vp, int mode)
 
                                cp_init_access(&access_in, cp);
                                bzero(&wrapped_key_in, sizeof(wrapped_key_in));
-                               wrapped_key_in.key = entry->cp_persistent_key;
-                               wrapped_key_in.key_len = entry->cp_persistent_key_len;
+                               wrapped_key_in.key = cpkp_pers_key(&entry->cp_keys);
+                               wrapped_key_in.key_len = cpkp_pers_key_len(&entry->cp_keys);
                                /* Use the persistent class when talking to AKS */
                                wrapped_key_in.dp_class = entry->cp_pclass;
                                error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, NULL);
@@ -1052,7 +1342,7 @@ cp_handle_open(struct vnode *vp, int mode)
                         * Since this function is bypassed entirely if we're opening a raw encrypted file, 
                         * we can always attempt the restore.
                         */
-                       if (entry->cp_flags & CP_KEY_FLUSHED) {
+                       if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
                                error = cp_restore_keys(entry, hfsmp, cp);
                        }
        
@@ -1068,61 +1358,20 @@ cp_handle_open(struct vnode *vp, int mode)
        }
 
 out:
-       hfs_unlock(cp);
-       return error;
-}
 
-
-/*
- * During hfs resize operations, we have slightly different constraints than during
- * normal VNOPS that read/write data to files.  Specifically, we already have the cnode
- * locked (so nobody else can modify it), and we are doing the IO with root privileges, since
- * we are moving the data behind the user's back.  So, we skip access checks here (for unlock
- * vs. lock), and don't worry about non-existing keys.  If the file exists on-disk with valid
- * payload, then it must have keys set up already by definition.
- */
-int
-cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) 
-{
-       struct cprotect *entry;
-       int error = -1;
-
-       /* cp is already locked */
-       entry = cp->c_cpentry;
-       if (!entry)
-               goto out;
-
-       /*
-        * Still need to validate whether to permit access to the file or not
-        * based on lock status
-        */
-       if ((error = cp_check_access(cp, hfsmp,  CP_READ_ACCESS | CP_WRITE_ACCESS))) {
-               goto out;
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+       if ((hfsmp->hfs_cp_verbose) && (error == EPERM)) {
+               cp_log_eperm (vp, CP_CLASS(entry->cp_pclass), false);
        }
+#endif
+#endif
 
-       if (entry->cp_flags == 0) {
-               /* no more work to do */
-               error = 0;
-               goto out;
-       }
-
-       /* it must have keys since it is an existing file with actual payload */
-
-       /* unwrap keys if needed */
-       if (entry->cp_flags & CP_KEY_FLUSHED) {
-               error = cp_restore_keys(entry, hfsmp, cp);
-       }
-
-       /*
-        * Don't need to write out the EA since if the file has actual extents,
-        * it must have an EA
-        */
-out:
-
-       /* return the cp still locked */
+       hfs_unlock(cp);
        return error;
 }
 
+
 /*
  * cp_getrootxattr:
  * Gets the EA we set on the root folder (fileid 1) to get information about the
@@ -1135,7 +1384,14 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr)
 {
        uio_t   auio;
        char    uio_buf[UIO_SIZEOF(1)];
-       size_t attrsize = sizeof(struct cp_root_xattr);
+       void    *buf;
+
+       /*
+        * We allow for an extra 64 bytes to cater for upgrades.  This wouldn't
+        * be necessary if the xattr routines just returned what we asked for.
+        */
+       size_t attrsize = roundup(sizeof(struct cp_root_xattr) + 64, 64);
+
        int error = 0;
        struct vnop_getxattr_args args;
 
@@ -1143,8 +1399,10 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr)
                panic("Content Protection: cp_xattr called with xattr == NULL");
        }
 
+       MALLOC(buf, void *, attrsize, M_TEMP, M_WAITOK);
+
        auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
-       uio_addiov(auio, CAST_USER_ADDR_T(outxattr), attrsize);
+       uio_addiov(auio, CAST_USER_ADDR_T(buf), attrsize);
 
        args.a_desc = NULL; // unused
        args.a_vp = NULL; //unused since we're writing EA to root folder.
@@ -1156,17 +1414,34 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr)
 
        error = hfs_getxattr_internal(NULL, &args, hfsmp, 1);
 
-       /* Now convert the multi-byte fields to native endianness */
-       outxattr->major_version = OSSwapLittleToHostInt16(outxattr->major_version);
-       outxattr->minor_version = OSSwapLittleToHostInt16(outxattr->minor_version);
-       outxattr->flags = OSSwapLittleToHostInt64(outxattr->flags);
-
        if (error != 0) {
                goto out;
        }
 
+       if (attrsize < CP_ROOT_XATTR_MIN_LEN) {
+               error = HFS_EINCONSISTENT;
+               goto out;
+       }
+
+       const struct cp_root_xattr *xattr = buf;
+
+       bzero(outxattr, sizeof(*outxattr));
+
+       /* Now convert the multi-byte fields to native endianness */
+       outxattr->major_version = OSSwapLittleToHostInt16(xattr->major_version);
+       outxattr->minor_version = OSSwapLittleToHostInt16(xattr->minor_version);
+       outxattr->flags = OSSwapLittleToHostInt64(xattr->flags);
+
+       if (outxattr->major_version >= CP_VERS_5) {
+               if (attrsize < sizeof(struct cp_root_xattr)) {
+                       error = HFS_EINCONSISTENT;
+                       goto out;
+               }
+       }
+
 out:
        uio_free(auio);
+       FREE(buf, M_TEMP);
        return error;
 }
 
@@ -1193,13 +1468,24 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr)
        args.a_options = 0;
        args.a_context = NULL; //no context needed, only done from mount.
 
+       const uint32_t flags = newxattr->flags;
+
        /* Now convert the multi-byte fields to little endian before writing to disk. */
+       newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags);
+
+       int xattr_size = sizeof(struct cp_root_xattr);
+
+
        newxattr->major_version = OSSwapHostToLittleInt16(newxattr->major_version);
        newxattr->minor_version = OSSwapHostToLittleInt16(newxattr->minor_version);
-       newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags);
 
        error = hfs_setxattr_internal(NULL, (caddr_t)newxattr,
-                       sizeof(struct cp_root_xattr), &args, hfsmp, 1);
+                       xattr_size, &args, hfsmp, 1);
+
+       if (!error) {
+               hfsmp->cproot_flags = flags;
+       }
+
        return error;
 }
 
@@ -1210,110 +1496,69 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr)
  *
  * This function is also invoked during file creation.
  */
-int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options)
+int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp,
+                               uint32_t fileid, int options)
 {
        int error = 0;
-       size_t attrsize;
-       struct vnop_setxattr_args args;
-       uint32_t target_fileid;
-       struct cnode *arg_cp = NULL;
-       uint32_t tempflags = 0;
+       cp_key_pair_t *cpkp = &entry->cp_keys;
 
-       args.a_desc = NULL;
-       
        if (hfsmp->hfs_flags & HFS_READ_ONLY) {
                return EROFS;
        }
-       
-       if (cp) {
-               args.a_vp = cp->c_vp;
-               target_fileid = 0;
-               arg_cp = cp;
-       }
-       else {
-               /*
-                * When we set the EA in the same txn as the file creation,
-                * we do not have a vnode/cnode yet. Use the specified fileid.
-                */
-               args.a_vp = NULL;
-               target_fileid = fileid;
-       }
-       args.a_name = CONTENT_PROTECTION_XATTR_NAME;
-       args.a_uio = NULL; //pass data ptr instead
-       args.a_options = options;
-       args.a_context = vfs_context_current();
 
-       /* Note that it's OK to write out an XATTR without keys. */
-       /* Disable flags that will be invalid as we're writing the EA out at this point. */
-       tempflags = entry->cp_flags;
+       if (hfsmp->hfs_running_cp_major_vers < CP_CURRENT_VERS) {
+               // Upgrade
+               printf("hfs: upgrading to cp version %u\n", CP_CURRENT_VERS);
 
-       /* we're writing the EA; CP_NO_XATTR is invalid */
-       tempflags &= ~CP_NO_XATTR;
-       
-       /* CP_SEP_WRAPPEDKEY is informational/runtime only. */
-       tempflags &= ~CP_SEP_WRAPPEDKEY;
-
-       switch(hfsmp->hfs_running_cp_major_vers) {
-               case CP_NEW_MAJOR_VERS: {
-                       struct cp_xattr_v4 *newxattr = NULL; // 70+ bytes; don't alloc on stack.
-                       MALLOC (newxattr, struct cp_xattr_v4*, sizeof(struct cp_xattr_v4), M_TEMP, M_WAITOK);
-                       if (newxattr == NULL) {
-                               error = ENOMEM;
-                               break;
-                       }
-                       bzero (newxattr, sizeof(struct cp_xattr_v4));
+               struct cp_root_xattr root_xattr;
 
-                       attrsize = sizeof(*newxattr) - CP_MAX_WRAPPEDKEYSIZE + entry->cp_persistent_key_len;
+               error = cp_getrootxattr(hfsmp, &root_xattr);
+               if (error)
+                       return error;
 
-                       /* Endian swap the multi-byte fields into L.E from host. */
-                       newxattr->xattr_major_version = OSSwapHostToLittleInt16 (hfsmp->hfs_running_cp_major_vers);
-                       newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS);
-                       newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len);
-                       newxattr->flags = OSSwapHostToLittleInt32(tempflags);
-                       newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass);
-                       bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len);
+               root_xattr.major_version = CP_CURRENT_VERS;
+               root_xattr.minor_version = CP_MINOR_VERS;
 
-                       error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid);
+               error = cp_setrootxattr(hfsmp, &root_xattr);
+               if (error)
+                       return error;
 
-                       FREE(newxattr, M_TEMP);
-                       break;
-               }
-               case CP_PREV_MAJOR_VERS: {
-                       struct cp_xattr_v2 *newxattr = NULL;
-                       MALLOC (newxattr, struct cp_xattr_v2*, sizeof(struct cp_xattr_v2), M_TEMP, M_WAITOK);
-                       if (newxattr == NULL) {
-                               error = ENOMEM;
-                               break;
-                       }
-                       bzero (newxattr, sizeof(struct cp_xattr_v2));
+               hfsmp->hfs_running_cp_major_vers = CP_CURRENT_VERS;
+       }
 
-                       attrsize = sizeof(*newxattr);
+       struct cp_xattr_v5 *xattr;
+       MALLOC(xattr, struct cp_xattr_v5 *, sizeof(*xattr), M_TEMP, M_WAITOK);
 
-                       /* Endian swap the multi-byte fields into L.E from host. */
-                       newxattr->xattr_major_version = OSSwapHostToLittleInt16(hfsmp->hfs_running_cp_major_vers);
-                       newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS);
-                       newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len);
-                       newxattr->flags = OSSwapHostToLittleInt32(tempflags);
-                       newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass);
-                       bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len);
+       xattr->xattr_major_version      = OSSwapHostToLittleConstInt16(CP_VERS_5);
+       xattr->xattr_minor_version      = OSSwapHostToLittleConstInt16(CP_MINOR_VERS);
+       xattr->flags                            = 0;
+       xattr->persistent_class         = OSSwapHostToLittleInt32(entry->cp_pclass);
+       xattr->key_os_version           = OSSwapHostToLittleInt32(entry->cp_key_os_version);
+       xattr->key_revision                     = OSSwapHostToLittleInt16(entry->cp_key_revision);
 
-                       error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid);
+       uint16_t key_len = cpkp_pers_key_len(cpkp);
+       xattr->key_len  = OSSwapHostToLittleInt16(key_len);
+       memcpy(xattr->persistent_key, cpkp_pers_key(cpkp), key_len);
 
-                       FREE (newxattr, M_TEMP);
-                       break;
-               }
-               default:
-                       printf("hfs: cp_setxattr: Unknown CP version running \n");
-                       break;
-       }
+       size_t xattr_len = offsetof(struct cp_xattr_v5, persistent_key) + key_len;
+
+
+       struct vnop_setxattr_args args = {
+               .a_vp           = cp ? cp->c_vp : NULL,
+               .a_name         = CONTENT_PROTECTION_XATTR_NAME,
+               .a_options      = options,
+               .a_context      = vfs_context_current(),
+       };
+
+       error = hfs_setxattr_internal(cp, xattr, xattr_len, &args, hfsmp, fileid);
+
+       FREE(xattr, M_TEMP);
 
        if (error == 0 ) {
                entry->cp_flags &= ~CP_NO_XATTR;
        }
 
        return error;
-
-
 }
 
 /*
@@ -1420,216 +1665,244 @@ cp_is_valid_class(int isdir, int32_t protectionclass)
        }
 }
 
+#if DEBUG
+static const uint32_t cp_magic1 = 0x7b727063;  // cpr{
+static const uint32_t cp_magic2 = 0x7270637d;  // }cpr
+#endif
 
-static struct cprotect *
-cp_entry_alloc(size_t keylen)
+struct cprotect *
+cp_entry_alloc(cprotect_t old, uint16_t pers_key_len,
+                          uint16_t cached_key_len, cp_key_pair_t **pcpkp)
 {
        struct cprotect *cp_entry;
 
-       if (keylen > CP_MAX_WRAPPEDKEYSIZE)
+       if (pers_key_len > CP_MAX_WRAPPEDKEYSIZE)
                return (NULL);
 
-       MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect) + keylen,
-                  M_TEMP, M_WAITOK);
-       if (cp_entry == NULL)
-               return (NULL);
+       size_t size = (sizeof(struct cprotect) - sizeof(cp_key_pair_t)
+                                  + cpkp_size(pers_key_len, cached_key_len));
+
+#if DEBUG
+       size += 4;      // Extra for magic2
+#endif
 
-       bzero(cp_entry, sizeof(*cp_entry) + keylen);
-       cp_entry->cp_persistent_key_len = keylen;
-       return (cp_entry);
+       MALLOC(cp_entry, struct cprotect *, size, M_TEMP, M_WAITOK);
+
+       if (old) {
+               memcpy(cp_entry, old, offsetof(struct cprotect, cp_keys));
+
+       } else {
+               bzero(cp_entry, offsetof(struct cprotect, cp_keys));
+       }
+
+#if DEBUG
+       cp_entry->cp_magic1 = cp_magic1;
+       *PTR_ADD(uint32_t *, cp_entry, size - 4) = cp_magic2;
+#endif
+
+       cpkp_init(&cp_entry->cp_keys, pers_key_len, cached_key_len);
+
+       /*
+        * If we've been passed the old entry, then we are in the process of
+        * rewrapping in which case we need to copy the cached key.  This is
+        * important for class B files when the device is locked because we
+        * won't be able to unwrap whilst in this state, yet we still need the
+        * unwrapped key.
+        */
+       if (old)
+               cpx_copy(cpkp_cpx(&old->cp_keys), cpkp_cpx(&cp_entry->cp_keys));
+
+       if (pcpkp)
+               *pcpkp = &cp_entry->cp_keys;
+
+       return cp_entry;
 }
 
 static void
-cp_entry_dealloc(struct cprotect *entry)
+cp_entry_dealloc(__unused hfsmount_t *hfsmp, struct cprotect *entry)
 {
-       uint32_t keylen = entry->cp_persistent_key_len;
-       bzero(entry, (sizeof(*entry) + keylen));
+
+       cpkp_flush(&entry->cp_keys);
+
+#if DEBUG
+       assert(entry->cp_magic1 == cp_magic1);
+       assert(*PTR_ADD(uint32_t *, entry, (sizeof(struct cprotect) - sizeof(cp_key_pair_t)
+                                                                               + cpkp_sizex(&entry->cp_keys) == cp_magic2)));
+#endif
+
        FREE(entry, M_TEMP);
 }
 
-
-/*
- * Initializes a new cprotect entry with xattr data from the cnode.
- * cnode lock held shared
- */
-static int
-cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry)
+static int cp_read_xattr_v4(__unused hfsmount_t *hfsmp, struct cp_xattr_v4 *xattr,
+                                                       size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options)
 {
-       int error = 0;
-       uio_t auio;
-       size_t attrsize;
-       char uio_buf[UIO_SIZEOF(1)];
-       struct vnop_getxattr_args args;
-       struct cprotect *entry = NULL;
+       /* Endian swap the multi-byte fields into host endianness from L.E. */
+       xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version);
+       xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
+       xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size);
+       xattr->flags = OSSwapLittleToHostInt32(xattr->flags);
+       xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class);
+       xattr->key_os_version = OSSwapLittleToHostInt32(xattr->key_os_version);
 
-       auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
-       args.a_desc = NULL; // unused
-       args.a_vp = cp->c_vp;
-       args.a_name = CONTENT_PROTECTION_XATTR_NAME;
-       args.a_uio = auio;
-       args.a_options = XATTR_REPLACE;
-       args.a_context = vfs_context_current(); // unused
+       /*
+        * Prevent a buffer overflow, and validate the key length obtained from the
+        * EA. If it's too big, then bail out, because the EA can't be trusted at this
+        * point.
+        */
+       if (xattr->key_size > CP_MAX_WRAPPEDKEYSIZE)
+               return HFS_EINCONSISTENT;
 
-       switch (hfsmp->hfs_running_cp_major_vers) {
-               case CP_NEW_MAJOR_VERS: {
-                       struct cp_xattr_v4 *xattr = NULL;
-                       MALLOC (xattr, struct cp_xattr_v4*, sizeof(struct cp_xattr_v4), M_TEMP, M_WAITOK);
-                       if (xattr == NULL) {
-                               error = ENOMEM;
-                               break;
-                       }
-                       bzero(xattr, sizeof (struct cp_xattr_v4));
-                       attrsize = sizeof(*xattr);
+       size_t min_len = offsetof(struct cp_xattr_v4, persistent_key) + xattr->key_size;
+       if (xattr_len < min_len)
+               return HFS_EINCONSISTENT;
 
-                       uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize);
-                       args.a_size = &attrsize;
+       /*
+        * Class F files have no backing key; their keylength should be 0,
+        * though they should have the proper flags set.
+        *
+        * A request to instantiate a CP for a class F file should result
+        * in a bzero'd cp that just says class F, with key_flushed set.
+        */
+       if (CP_CLASS(xattr->persistent_class) == PROTECTION_CLASS_F
+               || ISSET(xattr->flags, CP_XAF_NEEDS_KEYS)) {
+               xattr->key_size = 0;
+       }
 
-                       error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0);
-                       if (error != 0) {
-                               FREE (xattr, M_TEMP);
-                               goto out;
-                       }
+       /* set up entry with information from xattr */
+       cp_key_pair_t *cpkp;
+       cprotect_t entry;
+       
+       if (ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+               /* caller passed in a pre-allocated structure to get the basic info */
+               entry = *pcpr;
+               bzero(entry, offsetof(struct cprotect, cp_keys));
+       }
+       else {
+               entry = cp_entry_alloc(NULL, xattr->key_size, CP_MAX_CACHEBUFLEN, &cpkp);
+       }
 
-                       /* Endian swap the multi-byte fields into host endianness from L.E. */
-                       xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version);
-                       xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
-                       xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size);
-                       xattr->flags = OSSwapLittleToHostInt32(xattr->flags);
-                       xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class);
+       entry->cp_pclass = xattr->persistent_class;
+       entry->cp_key_os_version = xattr->key_os_version;
 
-                       if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers ) {
-                               printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n",
-                                       xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers);
-                               error = EINVAL;
-                               FREE (xattr, M_TEMP);
 
-                               goto out;
-                       }
-                       /*
-                        * Prevent a buffer overflow, and validate the key length obtained from the
-                        * EA. If it's too big, then bail out, because the EA can't be trusted at this
-                        * point.
-                        */
-                       if (xattr->key_size > CP_MAX_WRAPPEDKEYSIZE) {
-                               error = EINVAL;
-                               FREE (xattr, M_TEMP);
+       if (!ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+               if (xattr->key_size) {
+                       cpkp_set_pers_key_len(cpkp, xattr->key_size);
+                       memcpy(cpkp_pers_key(cpkp), xattr->persistent_key, xattr->key_size);
+               }
 
-                               goto out;
-                       }
+               *pcpr = entry;
+       }
+       else if (xattr->key_size) {
+               SET(entry->cp_flags, CP_HAS_A_KEY);
+       }
 
-                       /* 
-                        * Class F files have no backing key; their keylength should be 0,
-                        * though they should have the proper flags set.
-                        *
-                        * A request to instantiate a CP for a class F file should result 
-                        * in a bzero'd cp that just says class F, with key_flushed set.
-                        */
+       return 0;
+}
 
-                       /* set up entry with information from xattr */
-                       entry = cp_entry_alloc(xattr->key_size);
-                       if (!entry) {
-                               FREE (xattr, M_TEMP);
+int cp_read_xattr_v5(hfsmount_t *hfsmp, struct cp_xattr_v5 *xattr,
+                                        size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options)
+{
+       if (xattr->xattr_major_version == OSSwapHostToLittleConstInt16(CP_VERS_4)) {
+               return cp_read_xattr_v4(hfsmp, (struct cp_xattr_v4 *)xattr, xattr_len, pcpr, options);
+       }
 
-                               return ENOMEM;
-                       }
+       xattr->xattr_major_version      = OSSwapLittleToHostInt16(xattr->xattr_major_version);
 
-                       entry->cp_pclass = xattr->persistent_class;
+       if (xattr->xattr_major_version != CP_VERS_5) {
+               printf("hfs: cp_getxattr: unsupported xattr version %d\n",
+                          xattr->xattr_major_version);
+               return ENOTSUP;
+       }
 
-                       /* 
-                        * Suppress invalid flags that should not be set. 
-                        * If we have gotten this far, then CP_NO_XATTR cannot possibly
-                        * be valid; the EA exists.
-                        */
-                       xattr->flags &= ~CP_NO_XATTR;
+       size_t min_len = offsetof(struct cp_xattr_v5, persistent_key);
 
-                       entry->cp_flags = xattr->flags;
-                       if (xattr->xattr_major_version >= CP_NEW_MAJOR_VERS) {
-                               entry->cp_flags |= CP_OFF_IV_ENABLED;
-                       }
+       if (xattr_len < min_len)
+               return HFS_EINCONSISTENT;
 
-                       if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F ) {
-                               bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size);
-                       }
+       xattr->xattr_minor_version      = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
+       xattr->flags                            = OSSwapLittleToHostInt32(xattr->flags);
+       xattr->persistent_class         = OSSwapLittleToHostInt32(xattr->persistent_class);
+       xattr->key_os_version           = OSSwapLittleToHostInt32(xattr->key_os_version);
+       xattr->key_revision                     = OSSwapLittleToHostInt16(xattr->key_revision);
+       xattr->key_len                          = OSSwapLittleToHostInt16(xattr->key_len);
 
-                       FREE (xattr, M_TEMP);
+       uint16_t pers_key_len = xattr->key_len;
 
-                       break;
-               }
-               case CP_PREV_MAJOR_VERS: {
-                       struct cp_xattr_v2 *xattr = NULL;
-                       MALLOC (xattr, struct cp_xattr_v2*, sizeof(struct cp_xattr_v2), M_TEMP, M_WAITOK);
-                       if (xattr == NULL) {
-                               error = ENOMEM;
-                               break;
-                       }
-                       bzero (xattr, sizeof (struct cp_xattr_v2));
-                       attrsize = sizeof(*xattr);
+       min_len += pers_key_len;
+       if (xattr_len < min_len)
+               return HFS_EINCONSISTENT;
 
-                       uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize);
-                       args.a_size = &attrsize;
 
-                       error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0);
-                       if (error != 0) {
-                               FREE (xattr, M_TEMP);
-                               goto out;
-                       }
+       cp_key_pair_t *cpkp;
+       cprotect_t entry;
+       
+       /* 
+        * If option CP_GET_XATTR_BASIC_INFO is set, we only return basic
+        * information about the file's protection (and not the key) and
+        * we store the result in the structure the caller passed to us.
+        */
+       if (ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+               entry = *pcpr;
+               bzero(entry, offsetof(struct cprotect, cp_keys));
+       } else {
+               entry = cp_entry_alloc(NULL, xattr->key_len, CP_MAX_CACHEBUFLEN, &cpkp);
+       }
 
-                       /* Endian swap the multi-byte fields into host endianness from L.E. */
-                       xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version);
-                       xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
-                       xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size);
-                       xattr->flags = OSSwapLittleToHostInt32(xattr->flags);
-                       xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class);
+       entry->cp_pclass                        = xattr->persistent_class;
+       entry->cp_key_os_version        = xattr->key_os_version;
+       entry->cp_key_revision          = xattr->key_revision;
 
-                       if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers) {
-                               printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n",
-                                       xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers);
-                               error = EINVAL;
-                               FREE (xattr, M_TEMP);
-                               goto out;
-                       }
+       if (!ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+               if (xattr->key_len) {
+                       cpkp_set_pers_key_len(cpkp, xattr->key_len);
+                       memcpy(cpkp_pers_key(cpkp), xattr->persistent_key, xattr->key_len);
+               }
 
-                       /*
-                        * Prevent a buffer overflow, and validate the key length obtained from the
-                        * EA. If it's too big, then bail out, because the EA can't be trusted at this
-                        * point.
-                        */
-                       if (xattr->key_size > CP_V2_WRAPPEDKEYSIZE) {
-                               error = EINVAL;
-                               FREE (xattr, M_TEMP);
-                               goto out;
-                       }
-                       /* set up entry with information from xattr */
-                       entry = cp_entry_alloc(xattr->key_size);
-                       if (!entry) {
-                               FREE (xattr, M_TEMP);
-                               return ENOMEM;
-                       }
 
-                       entry->cp_pclass = xattr->persistent_class;
+               *pcpr = entry;
+       }
+       else if (xattr->key_len) {
+               SET(entry->cp_flags, CP_HAS_A_KEY);
+       }
 
-                       /* 
-                        * Suppress invalid flags that should not be set. 
-                        * If we have gotten this far, then CP_NO_XATTR cannot possibly
-                        * be valid; the EA exists.
-                        */
-                       xattr->flags &= ~CP_NO_XATTR;
+       return 0;
+}
 
-                       entry->cp_flags = xattr->flags;
+/*
+ * Initializes a new cprotect entry with xattr data from the cnode.
+ * cnode lock held shared
+ */
+static int
+cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, cprotect_t *outentry)
+{
+       size_t xattr_len = sizeof(struct cp_xattr_v5);
+       struct cp_xattr_v5 *xattr;
 
-                       if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F ) {
-                               bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size);
-                       }
+       MALLOC (xattr, struct cp_xattr_v5 *, xattr_len,
+                       M_TEMP, M_WAITOK);
 
-                       FREE (xattr, M_TEMP);
-                       break;
-               }
+       int error = hfs_xattr_read(cp->c_vp, CONTENT_PROTECTION_XATTR_NAME,
+                                                          xattr, &xattr_len);
+
+       if (!error) {
+               if (xattr_len < CP_XATTR_MIN_LEN)
+                       error = HFS_EINCONSISTENT;
+               else
+                       error = cp_read_xattr_v5(hfsmp, xattr, xattr_len, outentry, 0);
        }
 
-out:
-       uio_free(auio);
+#if DEBUG
+       if (error && error != ENOATTR) {
+               printf("cp_getxattr: bad cp xattr (%d):\n", error);
+               for (size_t i = 0; i < xattr_len; ++i)
+                       printf("%02x ", ((uint8_t *)xattr)[i]);
+               printf("\n");
+       }
+#endif
+
+       FREE(xattr, M_TEMP);
 
-       *outentry = entry;
        return error;
 }
 
@@ -1645,15 +1918,9 @@ cp_restore_keys(struct cprotect *entry, struct hfsmount *hfsmp, struct cnode *cp
 
        error = cp_unwrap(hfsmp, entry, cp);
        if (error) {
-               entry->cp_flags |= CP_KEY_FLUSHED;
-               bzero(entry->cp_cache_key, entry->cp_cache_key_len);
+               cp_flush_cached_keys(entry);
                error = EPERM;
        }
-       else {
-               /* ready for business */
-               entry->cp_flags &= ~CP_KEY_FLUSHED;
-
-       }
        return error;
 }
 
@@ -1819,9 +2086,7 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg)
                         * is no key leakage in that layer.
                         */
 
-                       entry->cp_flags |= CP_KEY_FLUSHED;
-                       bzero(&entry->cp_cache_key, entry->cp_cache_key_len);
-                       bzero(&entry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
+                       cp_flush_cached_keys(entry);
 
                        /* some write may have arrived in the mean time. dump those pages */
                        hfs_unlock(cp);
@@ -1858,15 +2123,17 @@ out:
  * Generate a new wrapped key based on the existing cache key.
  */
 
-static int
-cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) 
+int
+cp_rewrap(struct cnode *cp, __unused hfsmount_t *hfsmp,
+                 cp_key_class_t *newclass, cp_key_pair_t *cpkp, const void *old_holder,
+                 cp_new_alloc_fn alloc_fn, void **pholder)
 {
-
        struct cprotect *entry = cp->c_cpentry;
+
        uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE];
        size_t keylen = CP_MAX_WRAPPEDKEYSIZE;
        int error = 0;
-       newclass = CP_CLASS(newclass);
+       const cp_key_class_t key_class = CP_CLASS(*newclass);
 
        /* Structures passed between HFS and AKS */
        cp_cred_s access_in;
@@ -1878,15 +2145,15 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass)
         * key that is only good as long as the file is open.  There is no
         * wrapped key, so there isn't anything to wrap.
         */
-       if (newclass == PROTECTION_CLASS_F) {
+       if (key_class == PROTECTION_CLASS_F) {
                return EINVAL;
        }
 
        cp_init_access(&access_in, cp);
 
        bzero(&wrapped_key_in, sizeof(wrapped_key_in));
-       wrapped_key_in.key = entry->cp_persistent_key;
-       wrapped_key_in.key_len = entry->cp_persistent_key_len;
+       wrapped_key_in.key = cpkp_pers_key(cpkp);
+       wrapped_key_in.key_len = cpkp_pers_key_len(cpkp);
        /* Use the persistent class when talking to AKS */
        wrapped_key_in.dp_class = entry->cp_pclass;
 
@@ -1902,21 +2169,20 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass)
         * don't lose.
         */
        error = g_cp_wrap_func.rewrapper(&access_in,
-                       newclass, /* new class */
+                       key_class, /* new class */
                        &wrapped_key_in,
                        &wrapped_key_out);
 
        keylen = wrapped_key_out.key_len;
 
        if (error == 0) {
-               struct cprotect *newentry = NULL;
-               /* 
+               /*
                 * Verify that AKS returned to us a wrapped key of the 
                 * target class requested.   
                 */
                /* Get the effective class here */
-               int effective = CP_CLASS(wrapped_key_out.dp_class);
-               if (effective != newclass) {
+               cp_key_class_t effective = CP_CLASS(wrapped_key_out.dp_class);
+               if (effective != key_class) {
                        /* 
                         * Fail the operation if defaults or some other enforcement
                         * dictated that the class be wrapped differently. 
@@ -1926,29 +2192,16 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass)
                        return EPERM;
                }
 
-               /* v2 EA's don't support the larger class B keys */
-               if ((keylen != CP_V2_WRAPPEDKEYSIZE) &&
-                               (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) {
-                       return EINVAL;
-               }
-
                /* Allocate a new cpentry */
-               newentry = cp_entry_alloc (keylen);
-               bcopy (entry, newentry, sizeof(struct cprotect));
+               cp_key_pair_t *new_cpkp;
+               *pholder = alloc_fn(old_holder, keylen, CP_MAX_CACHEBUFLEN, &new_cpkp);
 
                /* copy the new key into the entry */
-               bcopy (new_persistent_key, newentry->cp_persistent_key, keylen);
-               newentry->cp_persistent_key_len = keylen;
-               newentry->cp_backing_cnode = cp;
+               cpkp_set_pers_key_len(new_cpkp, keylen);
+               memcpy(cpkp_pers_key(new_cpkp), new_persistent_key, keylen);
 
                /* Actually record/store what AKS reported back, not the effective class stored in newclass */
-               newentry->cp_pclass = wrapped_key_out.dp_class;
-
-               /* Attach the new entry to the cnode */
-               cp->c_cpentry = newentry;
-
-               /* destroy the old entry */
-               cp_entry_destroy (entry);
+               *newclass = wrapped_key_out.dp_class;
        }
        else {
                error = EPERM;
@@ -1957,46 +2210,36 @@ cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass)
        return error;
 }
 
-
-static int
-cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp)
+static int cpkp_unwrap(cnode_t *cp, cp_key_class_t key_class, cp_key_pair_t *cpkp)
 {
        int error = 0;
        uint8_t iv_key[CP_IV_KEYSIZE];
+       cpx_t cpx = cpkp_cpx(cpkp);
 
        /* Structures passed between HFS and AKS */
        cp_cred_s access_in;
        cp_wrapped_key_s wrapped_key_in;
        cp_raw_key_s key_out;
 
-       /*
-        * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient
-        * key that is only good as long as the file is open.  There is no
-        * wrapped key, so there isn't anything to unwrap.
-        */
-       if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) {
-               return EPERM;
-       }
-
        cp_init_access(&access_in, cp);
 
        bzero(&wrapped_key_in, sizeof(wrapped_key_in));
-       wrapped_key_in.key = entry->cp_persistent_key;
-       wrapped_key_in.key_len = entry->cp_persistent_key_len;
+       wrapped_key_in.key = cpkp_pers_key(cpkp);
+       wrapped_key_in.key_len = cpkp_max_pers_key_len(cpkp);
        /* Use the persistent class when talking to AKS */
-       wrapped_key_in.dp_class = entry->cp_pclass;
+       wrapped_key_in.dp_class = key_class;
 
        bzero(&key_out, sizeof(key_out));
        key_out.iv_key = iv_key;
-       key_out.key = entry->cp_cache_key;
-       /* 
-        * The unwrapper should validate/set the key length for 
+       key_out.key = cpx_key(cpx);
+       /*
+        * The unwrapper should validate/set the key length for
         * the IV key length and the cache key length, however we need
         * to supply the correct buffer length so that AKS knows how
         * many bytes it has to work with.
         */
        key_out.iv_key_len = CP_IV_KEYSIZE;
-       key_out.key_len = CP_MAX_CACHEBUFLEN;
+       key_out.key_len = cpx_max_key_len(cpx);
 
        error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, &key_out);
        if (!error) {
@@ -2004,24 +2247,13 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp)
                        panic ("cp_unwrap: invalid key length! (%ul)\n", key_out.key_len);
                }
 
-               if (key_out.iv_key_len == 0 || key_out.iv_key_len > CP_IV_KEYSIZE) {
+               if (key_out.iv_key_len != CP_IV_KEYSIZE)
                        panic ("cp_unwrap: invalid iv key length! (%ul)\n", key_out.iv_key_len);
-               }
-               
-               entry->cp_cache_key_len = key_out.key_len;
-
-               /* No need to go here for older EAs */
-               if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) {
-                       aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx);
-                       entry->cp_flags |= CP_OFF_IV_ENABLED;
-               }
 
-               /* Is the key a raw wrapped key? */
-               if (key_out.flags & CP_RAW_KEY_WRAPPEDKEY) {
-                       /* OR in the right bit for the cprotect */
-                       entry->cp_flags |= CP_SEP_WRAPPEDKEY;
-               }
+               cpx_set_key_len(cpx, key_out.key_len);
 
+               cpx_set_aes_iv_key(cpx, iv_key);
+               cpx_set_is_sep_wrapped_key(cpx, ISSET(key_out.flags, CP_RAW_KEY_WRAPPEDKEY));
        } else {
                error = EPERM;
        }
@@ -2029,26 +2261,22 @@ cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp)
        return error;
 }
 
-/* Setup AES context */
 static int
-cp_setup_aes_ctx(struct cprotect *entry)
+cp_unwrap(__unused struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp)
 {
-    SHA1_CTX sha1ctxt;
-    uint8_t cp_cache_iv_key[CP_IV_KEYSIZE]; /* Kiv */
-
-    /* First init the cp_cache_iv_key[] */
-    SHA1Init(&sha1ctxt);
-       
        /*
-        * We can only use this when the keys are generated in the AP; As a result
-        * we only use the first 32 bytes of key length in the cache key 
+        * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient
+        * key that is only good as long as the file is open.  There is no
+        * wrapped key, so there isn't anything to unwrap.
         */
-    SHA1Update(&sha1ctxt, &entry->cp_cache_key[0], CP_MAX_KEYSIZE);
-    SHA1Final(&cp_cache_iv_key[0], &sha1ctxt);
+       if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) {
+               return EPERM;
+       }
+
+       int error = cpkp_unwrap(cp, entry->cp_pclass, &entry->cp_keys);
 
-    aes_encrypt_key128(&cp_cache_iv_key[0], &entry->cp_cache_iv_ctx);
 
-    return 0;
+       return error;
 }
 
 /*
@@ -2060,7 +2288,7 @@ cp_setup_aes_ctx(struct cprotect *entry)
  * on 'cp'.
  * 
  */
-int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, 
+int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, cp_key_class_t targetclass,
                uint32_t keyflags, struct cprotect **newentry) 
 {
 
@@ -2085,13 +2313,16 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass,
        }
 
        if (S_ISREG(cp->c_mode)) {
-               if ((cp->c_cpentry->cp_flags & CP_NEEDS_KEYS) == 0){
+               if (!cp_needs_pers_key(cp->c_cpentry)) {
                        error = EINVAL;
                        goto out;
                }
        }
 
-       error = cp_new (targetclass, hfsmp, cp, cp->c_mode, keyflags, &newcp);
+       cp_key_revision_t key_revision = cp_initial_key_revision(hfsmp);
+
+       error = cp_new (&targetclass, hfsmp, cp, cp->c_mode, keyflags, key_revision,
+                                       (cp_new_alloc_fn)cp_entry_alloc, (void **)&newcp);
        if (error) {
                /* 
                 * Key generation failed. This is not necessarily fatal
@@ -2101,8 +2332,12 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass,
                error = EPERM;
                goto out;
        }
-       
-       /* 
+
+       newcp->cp_pclass                        = targetclass;
+       newcp->cp_key_os_version        = cp_os_version();
+       newcp->cp_key_revision          = key_revision;
+
+       /*
         * If we got here, then we have a new cprotect.
         * Attempt to write the new one out.
         */
@@ -2112,7 +2347,7 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass,
                /* Tear down the new cprotect; Tell MKB that it's invalid. Bail out */
                /* TODO: rdar://12170074 needs to be fixed before we can tell MKB */
                if (newcp) {
-                       cp_entry_destroy(newcp);
+                       cp_entry_destroy(hfsmp, newcp);
                }       
                goto out;
        }
@@ -2123,20 +2358,19 @@ int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass,
         * 2) wrote the new keys to disk.
         * 3) cprotect is ready to go.
         */
-       
-       newcp->cp_flags &= ~CP_NEEDS_KEYS;
+
        *newentry = newcp;
-       
+
 out:
        return error;
 
 }
 
-void cp_replace_entry (struct cnode *cp, struct cprotect *newentry) 
+void cp_replace_entry (hfsmount_t *hfsmp, struct cnode *cp, struct cprotect *newentry)
 {
-       
        if (cp->c_cpentry) {
-               cp_entry_destroy (cp->c_cpentry);       
+
+               cp_entry_destroy (hfsmp, cp->c_cpentry);
        }
        cp->c_cpentry = newentry;
        newentry->cp_backing_cnode = cp;
@@ -2154,13 +2388,13 @@ void cp_replace_entry (struct cnode *cp, struct cprotect *newentry)
  * Additionally, decide if keys are even needed -- directories get cprotect data structures
  * but they do not have keys.
  *
- */ 
+ */
 
-static int
-cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, 
-               uint32_t keyflags, struct cprotect **output_entry)
+int
+cp_new(cp_key_class_t *newclass_eff, __unused struct hfsmount *hfsmp, struct cnode *cp,
+          mode_t cmode, int32_t keyflags, cp_key_revision_t key_revision,
+          cp_new_alloc_fn alloc_fn, void **pholder)
 {
-       struct cprotect *entry = NULL;
        int error = 0;
        uint8_t new_key[CP_MAX_CACHEBUFLEN];
        size_t new_key_len = CP_MAX_CACHEBUFLEN;  /* AKS tell us the proper key length, how much of this is used */
@@ -2169,18 +2403,13 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode,
        uint8_t iv_key[CP_IV_KEYSIZE];
        size_t iv_key_len = CP_IV_KEYSIZE;
        int iswrapped = 0;
-
-       newclass_eff = CP_CLASS(newclass_eff);
+       cp_key_class_t key_class = CP_CLASS(*newclass_eff);
 
        /* Structures passed between HFS and AKS */
        cp_cred_s access_in;
        cp_wrapped_key_s wrapped_key_out;
        cp_raw_key_s key_out;
 
-       if (*output_entry != NULL) {
-               panic ("cp_new with non-null entry!");
-       }
-
        if (are_wraps_initialized == false) {
                printf("hfs: cp_new: wrap/gen functions not yet set\n");
                return ENXIO;
@@ -2212,7 +2441,7 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode,
        }
        else {
                /* Must be a file */         
-               if (newclass_eff == PROTECTION_CLASS_F) {
+               if (key_class == PROTECTION_CLASS_F) {
                        /* class F files are not wrapped; they can still use the max key size */
                        new_key_len = CP_MAX_KEYSIZE;
                        read_random (&new_key[0], new_key_len);
@@ -2245,8 +2474,10 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode,
                        wrapped_key_out.key = new_persistent_key;
                        wrapped_key_out.key_len = new_persistent_len;
 
-                       error = g_cp_wrap_func.new_key(&access_in, 
-                                       newclass_eff, 
+                       access_in.key_revision = key_revision;
+
+                       error = g_cp_wrap_func.new_key(&access_in,
+                                       key_class,
                                        &key_out,
                                        &wrapped_key_out);
 
@@ -2261,7 +2492,7 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode,
                                panic ("cp_new: invalid key length! (%ul) \n", key_out.key_len);
                        }
 
-                       if (key_out.iv_key_len == 0 || key_out.iv_key_len > CP_IV_KEYSIZE) {
+                       if (key_out.iv_key_len != CP_IV_KEYSIZE) {
                                panic ("cp_new: invalid iv key length! (%ul) \n", key_out.iv_key_len);
                        }       
                
@@ -2272,17 +2503,15 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode,
                         * if that occurred.  Check that the effective class returned by 
                         * AKS is the same as our effective new class 
                         */
-                       if ((int)(CP_CLASS(wrapped_key_out.dp_class)) != newclass_eff) {
-                               if (keyflags & CP_KEYWRAP_DIFFCLASS) {
-                                       newclass_eff = CP_CLASS(wrapped_key_out.dp_class);
-                               }
-                               else {
-                                       error = EPERM;  
+                       if (CP_CLASS(wrapped_key_out.dp_class) != key_class) {
+                               if (!ISSET(keyflags, CP_KEYWRAP_DIFFCLASS)) {
+                                       error = EPERM;
                                        /* TODO: When 12170074 fixed, release/invalidate the key! */
                                        goto cpnew_fail;
                                }
                        }
 
+                       *newclass_eff = wrapped_key_out.dp_class;
                        new_key_len = key_out.key_len;
                        iv_key_len = key_out.iv_key_len;
                        new_persistent_len = wrapped_key_out.key_len;
@@ -2298,67 +2527,41 @@ cp_new(int newclass_eff, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode,
         * Step 2: allocate cprotect and initialize it.
         */
 
-
-       /*
-        * v2 EA's don't support the larger class B keys
-        */
-       if ((new_persistent_len != CP_V2_WRAPPEDKEYSIZE) &&
-                       (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) {
-               return EINVAL;
-       }
-
-       entry = cp_entry_alloc (new_persistent_len);
-       if (entry == NULL) {
+       cp_key_pair_t *cpkp;
+       *pholder = alloc_fn(NULL, new_persistent_len, new_key_len, &cpkp);
+       if (*pholder == NULL) {
                return ENOMEM;
        }
 
-       *output_entry = entry;
-
-       /*
-        * For directories and class F files, just store the effective new class. 
-        * AKS does not interact with us in generating keys for F files, and directories
-        * don't actually have keys. 
-        */
-       if ( S_ISDIR (cmode) || (newclass_eff == PROTECTION_CLASS_F)) {
-               entry->cp_pclass = newclass_eff;
-       }
-       else {                  
-               /* 
-                * otherwise, store what AKS actually returned back to us. 
-                * wrapped_key_out is only valid if we have round-tripped to AKS
-                */
-               entry->cp_pclass = wrapped_key_out.dp_class;
-       }
-
        /* Copy the cache key & IV keys into place if needed. */
        if (new_key_len > 0) {
-               bcopy (new_key, entry->cp_cache_key, new_key_len);
-               entry->cp_cache_key_len = new_key_len;
+               cpx_t cpx = cpkp_cpx(cpkp);
 
+               cpx_set_key_len(cpx, new_key_len);
+               memcpy(cpx_key(cpx), new_key, new_key_len);
 
                /* Initialize the IV key */
-               if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) {
-                       if (newclass_eff == PROTECTION_CLASS_F) {
-                               /* class F needs a full IV initialize */
-                               cp_setup_aes_ctx(entry);
-                       }
-                       else {
-                               /* Key store gave us an iv key. Just need to wrap it.*/
-                               aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx);
-                       }
-                       entry->cp_flags |= CP_OFF_IV_ENABLED;
-               }
+               if (key_class != PROTECTION_CLASS_F)
+                       cpx_set_aes_iv_key(cpx, iv_key);
+
+               cpx_set_is_sep_wrapped_key(cpx, iswrapped);
        }
        if (new_persistent_len > 0) {
-               bcopy(new_persistent_key, entry->cp_persistent_key, new_persistent_len);
+               cpkp_set_pers_key_len(cpkp, new_persistent_len);
+               memcpy(cpkp_pers_key(cpkp), new_persistent_key, new_persistent_len);
        }
 
-       /* Mark it as a wrapped key if necessary */
-       if (iswrapped) {
-               entry->cp_flags |= CP_SEP_WRAPPEDKEY;
+cpnew_fail:
+
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+       if ((hfsmp->hfs_cp_verbose) && (error == EPERM)) {
+               /* Only introspect the data fork */
+               cp_log_eperm (cp->c_vp, *newclass_eff, true);
        }
+#endif
+#endif
 
-cpnew_fail:
        return error;
 }
 
@@ -2376,20 +2579,261 @@ static void cp_init_access(cp_cred_t access, struct cnode *cp)
        access->pid = proc_pid(proc);
        access->uid = kauth_cred_getuid(cred);
 
+       if (cp->c_cpentry)
+               access->key_revision = cp->c_cpentry->cp_key_revision;
+
        return;
 }
 
-#else
+/*
+ * Parses versions of the form 12A316, i.e. <major><minor><revision> and
+ * returns a uint32_t in the form 0xaabbcccc where aa = <major>, 
+ * bb = <ASCII char>, cccc = <revision>.
+ */
+static cp_key_os_version_t parse_os_version(void)
+{
+       const char *p = osversion;
+
+       int a = 0;
+       while (*p >= '0' && *p <= '9') {
+               a = a * 10 + *p - '0';
+               ++p;
+       }
+
+       if (!a)
+               return 0;
+
+       int b = *p++;
+       if (!b)
+               return 0;
+
+       int c = 0;
+       while (*p >= '0' && *p <= '9') {
+               c = c * 10 + *p - '0';
+               ++p;
+       }
+
+       if (!c)
+               return 0;
+
+       return (a & 0xff) << 24 | b << 16 | (c & 0xffff);
+}
+
+cp_key_os_version_t cp_os_version(void)
+{
+       static cp_key_os_version_t cp_os_version;
+
+       if (cp_os_version)
+               return cp_os_version;
+
+       if (!osversion[0])
+               return 0;
+
+       cp_os_version = parse_os_version();
+       if (!cp_os_version) {
+               printf("cp_os_version: unable to parse osversion `%s'\n", osversion);
+               cp_os_version = 1;
+       }
+
+       return cp_os_version;
+}
+
+
+errno_t cp_handle_strategy(buf_t bp)
+{
+       vnode_t vp = buf_vnode(bp);
+       cnode_t *cp = NULL;
+
+       if (bufattr_rawencrypted(buf_attr(bp))
+               || !(cp = cp_get_protected_cnode(vp))
+               || !cp->c_cpentry) {
+               // Nothing to do
+               return 0;
+       }
+
+       /*
+        * For filesystem resize, we may not have access to the underlying
+        * file's cache key for whatever reason (device may be locked).
+        * However, we do not need it since we are going to use the
+        * temporary HFS-wide resize key which is generated once we start
+        * relocating file content.  If this file's I/O should be done
+        * using the resize key, it will have been supplied already, so do
+        * not attach the file's cp blob to the buffer.
+        */
+       if (ISSET(cp->c_cpentry->cp_flags, CP_RELOCATION_INFLIGHT))
+               return 0;
+
+       {
+               // Fast path
+               cpx_t cpx = cpkp_cpx(&cp->c_cpentry->cp_keys);
+
+               if (cpx_has_key(cpx)) {
+                       bufattr_setcpx(buf_attr(bp), cpx);
+                       return 0;
+               }
+       }
+
+       /*
+        * We rely mostly (see note below) upon the truncate lock to
+        * protect the CP cache key from getting tossed prior to our IO
+        * finishing here.  Nearly all cluster io calls to manipulate file
+        * payload from HFS take the truncate lock before calling into the
+        * cluster layer to ensure the file size does not change, or that
+        * they have exclusive right to change the EOF of the file.  That
+        * same guarantee protects us here since the code that deals with
+        * CP lock events must now take the truncate lock before doing
+        * anything.
+        *
+        * If you want to change content protection structures, then the
+        * truncate lock is not sufficient; you must take the truncate
+        * lock and then wait for outstanding writes to complete.  This is
+        * necessary because asynchronous I/O only holds the truncate lock
+        * whilst I/O is being queued.
+        *
+        * One exception should be the VM swapfile IO, because HFS will
+        * funnel the VNOP_PAGEOUT directly into a cluster_pageout call
+        * for the swapfile code only without holding the truncate lock.
+        * This is because individual swapfiles are maintained at
+        * fixed-length sizes by the VM code.  In non-swapfile IO we use
+        * PAGEOUT_V2 semantics which allow us to create our own UPL and
+        * thus take the truncate lock before calling into the cluster
+        * layer.  In that case, however, we are not concerned with the CP
+        * blob being wiped out in the middle of the IO because there
+        * isn't anything to toss; the VM swapfile key stays in-core as
+        * long as the file is open.
+        */
+
+       off_rsrc_t off_rsrc = off_rsrc_make(buf_lblkno(bp) * GetLogicalBlockSize(vp),
+                                                                               VNODE_IS_RSRC(vp));
+       cp_io_params_t io_params;
+
+
+       /*
+        * We want to take the cnode lock here and because the vnode write
+        * count is a pseudo-lock, we need to do something to preserve
+        * lock ordering; the cnode lock comes before the write count.
+        * Ideally, the write count would be incremented after the
+        * strategy routine returns, but that becomes complicated if the
+        * strategy routine where to call buf_iodone before returning.
+        * For now, we drop the write count here and then pick it up again
+        * later.
+        */
+       if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW))
+               vnode_writedone(vp);
+
+       hfs_lock_always(cp, HFS_SHARED_LOCK);
+       cp_io_params(VTOHFS(vp), cp->c_cpentry, off_rsrc,
+                                ISSET(buf_flags(bp), B_READ) ? VNODE_READ : VNODE_WRITE,
+                                &io_params);
+       hfs_unlock(cp);
+
+       /*
+        * Last chance: If this data protected I/O does not have unwrapped
+        * keys present, then try to get them.  We already know that it
+        * should, by this point.
+        */
+       if (!cpx_has_key(io_params.cpx)) {
+               int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS);
+               errno_t error = cp_handle_vnop(vp, io_op, 0);
+               if (error) {
+                       /*
+                        * We have to be careful here.  By this point in the I/O
+                        * path, VM or the cluster engine has prepared a buf_t
+                        * with the proper file offsets and all the rest, so
+                        * simply erroring out will result in us leaking this
+                        * particular buf_t.  We need to properly decorate the
+                        * buf_t just as buf_strategy would so as to make it
+                        * appear that the I/O errored out with the particular
+                        * error code.
+                        */
+                       if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW))
+                               vnode_startwrite(vp);
+                       buf_seterror (bp, error);
+                       buf_biodone(bp);
+                       return error;
+               }
+
+               hfs_lock_always(cp, HFS_SHARED_LOCK);
+               cp_io_params(VTOHFS(vp), cp->c_cpentry, off_rsrc,
+                                        ISSET(buf_flags(bp), B_READ) ? VNODE_READ : VNODE_WRITE,
+                                        &io_params);
+               hfs_unlock(cp);
+       }
+
+       assert(buf_count(bp) <= io_params.max_len);
+       bufattr_setcpx(buf_attr(bp), io_params.cpx);
+
+       if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW))
+               vnode_startwrite(vp);
+
+       return 0;
+}
+
+#else // !CONFIG_PROTECT
+
+#include <sys/cdefs.h>
+#include <sys/cprotect.h>
+#include <sys/errno.h>
 
 int cp_key_store_action(int action __unused)
 {
        return ENOTSUP;
 }
 
-
 int cp_register_wraps(cp_wrap_func_t key_store_func __unused)
 {
        return ENOTSUP;
 }
 
+size_t cpx_size(__unused size_t key_size)
+{
+       return 0;
+}
+
+cpx_t cpx_alloc(__unused size_t key_size)
+{
+       return NULL;
+}
+
+void cpx_free(__unused cpx_t cpx)
+{
+}
+
+bool cpx_is_sep_wrapped_key(__unused const struct cpx *cpx)
+{
+       return false;
+}
+
+void cpx_set_is_sep_wrapped_key(__unused struct cpx *cpx, __unused bool v)
+{
+}
+
+bool cpx_use_offset_for_iv(__unused const struct cpx *cpx)
+{
+       return false;
+}
+
+void cpx_set_use_offset_for_iv(__unused struct cpx *cpx, __unused bool v)
+{
+}
+
+uint16_t cpx_key_len(__unused const struct cpx *cpx)
+{
+       return 0;
+}
+
+void cpx_set_key_len(__unused struct cpx *cpx, __unused uint16_t key_len)
+{
+}
+
+void *cpx_key(__unused const struct cpx *cpx)
+{
+       return NULL;
+}
+
+aes_encrypt_ctx *cpx_iv_aes_ctx(__unused cpx_t cpx)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_PROTECT */
diff --git a/bsd/hfs/hfs_cprotect.h b/bsd/hfs/hfs_cprotect.h
new file mode 100644 (file)
index 0000000..b25ecc7
--- /dev/null
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2009-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef HFS_CPROTECT_H_
+#define        HFS_CPROTECT_H_
+
+#if KERNEL_PRIVATE
+
+#include <sys/cprotect.h>
+
+#include <sys/cdefs.h>
+#include <sys/content_protection.h>
+#include <sys/kernel_types.h>
+#include <crypto/aes.h>
+#include <sys/kdebug.h>
+
+#include "hfs.h"
+#include "hfs_fsctl.h"
+
+__BEGIN_DECLS
+
+#define CP_IV_KEYSIZE             16   /* 16x8 = 128 */
+#define CP_MAX_KEYSIZE                   32    /* 8x4 = 32, 32x8 = 256 */
+#define CP_MAX_CACHEBUFLEN        64   /* Maximum size of cp cache buffer/array */
+
+#define CP_INITIAL_WRAPPEDKEYSIZE 40
+#define CP_V2_WRAPPEDKEYSIZE      40   /* Size of the wrapped key in a v2 EA */
+#define CP_V4_RESERVEDBYTES       16   /* Number of reserved bytes in EA still present */
+
+#define CP_LOCKED_KEYCHAIN        0
+#define CP_UNLOCKED_KEYCHAIN      1
+
+#define CONTENT_PROTECTION_XATTR_NAME  "com.apple.system.cprotect"
+#define CONTENT_PROTECTION_XATTR_NAME_CHARS                            \
+       { 'c', 'o', 'm', '.', 'a', 'p', 'p', 'l', 'e',          \
+       '.', 's', 'y', 's', 't', 'e', 'm',                                      \
+       '.', 'c', 'p', 'r', 'o', 't', 'e', 'c', 't' }
+#define CP_CURRENT_VERS                        CP_VERS_5
+#define CP_VERS_5                              5               // iOS 8.1
+#define CP_VERS_4                              4               // iOS 5
+#define CP_VERS_2                              2               // iOS 4
+#define CP_MINOR_VERS           0
+
+/* the class occupies the lowest 5 bits, so there are 32 values (0-31) */
+#define CP_EFFECTIVE_CLASSMASK 0x0000001f
+
+typedef uint32_t cp_key_class_t;
+typedef uint32_t cp_key_os_version_t;
+
+/* macros for quick access/typing to mask out the classmask */
+#define CP_CLASS(x) ((cp_key_class_t)(CP_EFFECTIVE_CLASSMASK & (x)))
+
+#define CP_CRYPTO_G1   0x00000020
+
+typedef struct cp_xattr *cp_xattr_t;
+typedef struct cnode * cnode_ptr_t;
+//forward declare the struct.
+struct hfsmount;
+
+/* 
+ * Flags for Key Generation Behavior 
+ *
+ * These are passed to cp_generate_keys() and cp_new() in the 
+ * flags arguments
+ */
+#define CP_KEYWRAP_DIFFCLASS    0x00000001 /* wrapping with a different class bag is OK */
+
+/*
+ * off_rsrc_t: this structure represents an offset and whether or not it's
+ * the resource fork.  It's done this way so that we can easily do comparisons
+ * i.e.
+ *
+ *   { 0, data-fork } < { 100, rsrc-fork }
+ */
+
+enum {
+       OFF_RSRC_BIT = 0x4000000000000000,
+};
+
+typedef int64_t off_rsrc_t;
+
+static inline bool off_rsrc_is_rsrc(off_rsrc_t off_rsrc)
+{
+       return off_rsrc & OFF_RSRC_BIT;
+}
+
+static inline off_t off_rsrc_get_off(off_rsrc_t off_rsrc)
+{
+       return off_rsrc & (OFF_RSRC_BIT - 1);
+}
+
+static inline off_rsrc_t off_rsrc_make(off_t offset, bool is_rsrc)
+{
+       return offset | (is_rsrc ? OFF_RSRC_BIT : 0);
+}
+
+// -- struct cpx --
+
+/*
+ * This structure contains the unwrapped key and is passed to the lower layers.
+ * It is private so users must use the accessors declared in sys/cprotect.h
+ * to read/write it.
+ */
+
+// cpx_flags
+typedef uint32_t cpx_flags_t;
+enum {
+       CPX_SEP_WRAPPEDKEY                      = 0x01,
+       CPX_IV_AES_CTX_INITIALIZED      = 0x02,
+       CPX_USE_OFFSET_FOR_IV           = 0x04,
+
+       // Using AES IV context generated from key
+       CPX_IV_AES_CTX_HFS                      = 0x08,
+};
+
+struct cpx {
+#if DEBUG
+       uint32_t                cpx_magic1;
+#endif
+       cpx_flags_t             cpx_flags;
+       uint16_t                cpx_max_key_len;
+       uint16_t                cpx_key_len;
+       aes_encrypt_ctx cpx_iv_aes_ctx;         // Context used for generating the IV
+       uint8_t                 cpx_cached_key[];
+} __attribute__((packed));
+
+// -- struct cp_key_pair --
+
+/*
+ * This structure maintains the pair of keys; the persistent, wrapped key that
+ * is written to disk, and the unwrapped key (cpx_t) that we pass to lower
+ * layers.
+ */
+
+typedef struct cp_key_pair {
+       uint16_t        cpkp_max_pers_key_len;
+       uint16_t        cpkp_pers_key_len;
+       struct cpx      cpkp_cpx;
+
+       // cpkp_cpx is variable length so the location of the persistent key varies
+       // uint8_t cpkp_persistent_key[];
+} cp_key_pair_t;
+
+// -- struct cprotect --
+
+/*
+ * Runtime-only structure containing the content protection status for
+ * the given file.  This is referenced by the cnode.  It has the
+ * variable length key pair at the end.
+ */
+
+typedef uint32_t cp_flags_t;
+enum {
+       CP_NO_XATTR                             = 0x01, /* Key info has not been saved as EA to the FS */
+       CP_RELOCATION_INFLIGHT  = 0x02, /* File with offset IVs is in the process of being relocated. */
+
+       CP_HAS_A_KEY            = 0x08, /* File has a non-zero length key */
+};
+
+struct cprotect {
+#if DEBUG
+       uint32_t                                                cp_magic1;
+#endif
+       cp_flags_t                                              cp_flags;
+       cp_key_class_t                                  cp_pclass;  /* persistent class stored on-disk */
+       void*                                                   cp_backing_cnode;
+       cp_key_os_version_t                             cp_key_os_version;
+       cp_key_revision_t                               cp_key_revision;
+       uint16_t                                                cp_raw_open_count;
+       cp_key_pair_t                                   cp_keys;        // Variable length
+};
+
+// -- On-Disk Structures --
+
+typedef uint32_t cp_xattr_flags_t;
+enum {
+       /* 
+        * Be careful about using flags 0x02 to 0x20.  Older code used to write
+        * flags that were used for in-memory purposes to disk and therefore
+        * they might be used in V4 structures.  Here's what they were:
+        *
+        *        CP_KEY_FLUSHED                        0x02    Should never have made it to disk
+        *    CP_NO_XATTR                               0x04    Should never have made it to disk
+        *        CP_OFF_IV_ENABLED                     0x08    Probably made it to disk
+        *        CP_RELOCATION_INFLIGHT        0x10    Should never have made it to disk
+        *        CP_SEP_WRAPPEDKEY                     0x20    Probably made it to disk
+        *
+        */
+
+       CP_XAF_NEEDS_KEYS                       = 0x0001,       /* V4 only: file needs persistent keys */
+
+};
+
+/*
+ * V2 structure written as the per-file EA payload
+ * All on-disk multi-byte fields for the CP XATTR must be stored
+ * little-endian on-disk.  This means they must be endian swapped to
+ * L.E on getxattr() and converted to LE on setxattr().
+ *
+ * This structure is a fixed length and is tightly packed.
+ * 56 bytes total.
+ */
+struct cp_xattr_v2 {
+       u_int16_t xattr_major_version;
+       u_int16_t xattr_minor_version;
+       cp_xattr_flags_t flags;
+       u_int32_t persistent_class;
+       u_int32_t key_size;
+       uint8_t   persistent_key[CP_V2_WRAPPEDKEYSIZE];
+} __attribute__((aligned(2), packed));
+
+
+/*
+ * V4 Content Protection EA On-Disk Layout.
+ *
+ * This structure must be tightly packed, but the *size can vary*
+ * depending on the length of the key.  At MOST, the key length will be
+ * CP_MAX_WRAPPEDKEYSIZE, but the length is defined by the key_size field.
+ *
+ * Either way, the packing must be applied to ensure that the key data is
+ * retrievable in the right location relative to the start of the struct.
+ *
+ * Fully packed, this structure can range from :
+ *             MIN: 36 bytes (no key -- used with directories)
+ *             MAX: 164 bytes (with 128 byte key)
+ *
+ * During runtime we always allocate with the full 128 byte key, but only
+ * use as much of the key buffer as needed. It must be tightly packed, though.
+ */
+
+struct cp_xattr_v4 {
+       u_int16_t                       xattr_major_version;
+       u_int16_t                       xattr_minor_version;
+       cp_xattr_flags_t        flags;
+       cp_key_class_t          persistent_class;
+       u_int32_t                       key_size;
+       // This field will be zero on older systems
+       cp_key_os_version_t     key_os_version;
+       /* CP V4 Reserved Bytes == 16 */
+       u_int8_t                        reserved[CP_V4_RESERVEDBYTES];
+       /* All above fields are fixed regardless of key length (36 bytes) */
+       /* Max Wrapped Size == 128 */
+       uint8_t                         persistent_key[CP_MAX_WRAPPEDKEYSIZE];
+} __attribute__((aligned(2), packed));
+
+// -- Version 5 --
+
+
+struct cp_xattr_v5 {
+       uint16_t                        xattr_major_version;
+       uint16_t                        xattr_minor_version;
+       cp_xattr_flags_t        flags;
+       cp_key_class_t          persistent_class;
+       cp_key_os_version_t     key_os_version;
+       cp_key_revision_t       key_revision;
+       uint16_t                        key_len;
+
+       // 20 bytes to here
+
+       // Variable length from here
+       uint8_t                         persistent_key[CP_MAX_WRAPPEDKEYSIZE];
+
+
+       // Wouldn't be necessary if xattr routines returned just what we ask for
+       uint8_t                         spare[512];
+} __attribute__((aligned(2), packed));
+
+enum {
+       CP_XATTR_MIN_LEN = 20,                  // Minimum length for all versions
+};
+
+/*
+ * The Root Directory's EA (fileid 1) is special; it defines information about
+ * what capabilities the filesystem is using.
+ *
+ * The data is still stored little endian.
+ */
+struct cp_root_xattr {
+       u_int16_t major_version;
+       u_int16_t minor_version;
+       u_int64_t flags;
+} __attribute__((aligned(2), packed));
+
+enum {
+       CP_ROOT_XATTR_MIN_LEN = 12,
+};
+
+
+// -- Function Prototypes --
+
+int cp_entry_init(cnode_ptr_t, struct mount *);
+int cpx_gentempkeys(cpx_t *pcpx, struct hfsmount *hfsmp);
+void cp_entry_destroy(struct hfsmount *hfsmp, struct cprotect *entry_ptr);
+void cp_replace_entry (struct hfsmount *hfsmp, struct cnode *cp, struct cprotect *newentry);
+cnode_ptr_t cp_get_protected_cnode(vnode_t);
+int cp_fs_protected (mount_t);
+int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr);
+int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr);
+int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp,
+                                         cp_key_class_t targetclass, uint32_t flags,
+                                         struct cprotect **newentry);
+int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp,
+                                          cp_key_class_t suppliedclass, mode_t cmode,
+                                          struct cprotect **tmpentry);
+int cp_is_valid_class (int isdir, int32_t protectionclass);
+int cp_set_trimmed(struct hfsmount*);
+int cp_set_rewrapped(struct hfsmount *);
+int cp_flop_generation (struct hfsmount*);
+bool cp_is_supported_version(uint16_t version);
+
+
+typedef struct cp_io_params {
+       // The key to use
+       cpx_t   cpx;
+
+       /*
+        * The physical offset for this I/O or -1 if unknown (i.e. caller must
+        * do a regular look up).
+        */
+       off_t   phys_offset;
+
+       // The maximum length allowed for this I/O
+       off_t   max_len;
+} cp_io_params_t;
+
+// Return the I/O parameters for this I/O
+void cp_io_params(struct hfsmount *hfsmp, cprotect_t cpr, off_rsrc_t off_rsrc,
+                                 int direction, cp_io_params_t *io_params);
+
+int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp,
+                               uint32_t fileid, int xattr_opts);
+
+typedef void * (* cp_new_alloc_fn)(const void *old, uint16_t pers_key_len,
+                                                                  uint16_t cached_key_len,
+                                                                  cp_key_pair_t **pcpkp);
+
+int cp_new(cp_key_class_t *newclass_eff, struct hfsmount *hfsmp,
+                  struct cnode *cp, mode_t cmode, int32_t keyflags,
+                  cp_key_revision_t key_revision,
+                  cp_new_alloc_fn alloc_fn, void **pholder);
+
+int cp_rewrap(struct cnode *cp, __unused struct hfsmount *hfsmp,
+                         cp_key_class_t *newclass, cp_key_pair_t *cpkp, const void *old_holder,
+                         cp_new_alloc_fn alloc_fn, void **pholder);
+
+cprotect_t cp_entry_alloc(cprotect_t old, uint16_t pers_keylen,
+                                                 uint16_t cached_key_len, cp_key_pair_t **pcpkp);
+
+cp_key_os_version_t cp_os_version(void);
+
+cp_key_revision_t cp_next_key_revision(cp_key_revision_t rev);
+
+typedef uint32_t cp_getxattr_options_t;
+enum {
+       // Return just basic information (not the key)
+       CP_GET_XATTR_BASIC_INFO     = 1,
+};
+
+int cp_read_xattr_v5(struct hfsmount *hfsmp, struct cp_xattr_v5 *xattr,
+                                        size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options);
+
+
+errno_t cp_handle_strategy(buf_t bp);
+
+// -- cp_key_pair_t functions --
+
+size_t cpkp_size(uint16_t pers_key_len, uint16_t cached_key_len);
+size_t cpkp_sizex(const cp_key_pair_t *cpkp);
+void cpkp_init(cp_key_pair_t *cpkp, uint16_t max_pers_key_len,
+                          uint16_t max_cached_key_len);
+void cpkp_flush(cp_key_pair_t *cpkp);
+void cpkp_copy(const cp_key_pair_t *src, cp_key_pair_t *dst);
+uint16_t cpkp_max_pers_key_len(const cp_key_pair_t *cpkp);
+uint16_t cpkp_pers_key_len(const cp_key_pair_t *cpkp);
+bool cpkp_can_copy(const cp_key_pair_t *src, const cp_key_pair_t *dst);
+
+// -- Private cpx functions --
+
+void cpx_init(cpx_t, size_t key_len);
+bool cpx_has_key(const struct cpx *cpx);
+uint16_t cpx_max_key_len(const struct cpx *cpx);
+cpx_t cpkp_cpx(const cp_key_pair_t *cpkp);
+void cpx_copy(const struct cpx *src, cpx_t dst);
+
+// -- Helper Functions --
+
+static inline int cp_get_crypto_generation (cp_key_class_t protclass) {
+       if (protclass & CP_CRYPTO_G1) {
+               return 1;
+       }
+       else return 0;
+}
+
+__END_DECLS
+
+#endif /* KERNEL_PRIVATE */
+
+#endif /* !HFS_CPROTECT_H_ */
index 50fb1ddd9a509d3d76d7ad5e4439149225a7ab0f..eb242b37f03f859995fb69b23a3a8c6bcb605f2a 100644 (file)
@@ -126,25 +126,43 @@ hfs_swap_BTNode (
         */
        if (btcb->totalNodes != 0) {
                        if (srcDesc->fLink >= btcb->totalNodes) {
+#if DEVELOPMENT || DEBUG
+                               panic("hfs_swap_BTNode: invalid forward link (0x%08x >= 0x%08x)\n", srcDesc->fLink, btcb->totalNodes);
+#else
                                printf("hfs_swap_BTNode: invalid forward link (0x%08x >= 0x%08x)\n", srcDesc->fLink, btcb->totalNodes);
+#endif
                                error = fsBTInvalidHeaderErr;
                                goto fail;
                        }
                        if (srcDesc->bLink >= btcb->totalNodes) {
+#if DEVELOPMENT || DEBUG
+                               panic("hfs_swap_BTNode: invalid backward link (0x%08x >= 0x%08x)\n", srcDesc->bLink, btcb->totalNodes);
+#else
                                printf("hfs_swap_BTNode: invalid backward link (0x%08x >= 0x%08x)\n", srcDesc->bLink, btcb->totalNodes);
+#endif
                                error = fsBTInvalidHeaderErr;
                                goto fail;
                        }
                        
                        if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+#if DEVELOPMENT || DEBUG
+                               panic("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n",
+                                               srcDesc->fLink, (u_int32_t) src->blockNum);
+#else
                                printf("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n",
                                                srcDesc->fLink, (u_int32_t) src->blockNum);
+#endif
                                error = fsBTInvalidHeaderErr;
                                goto fail;
                        }
                        if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+#if DEVELOPMENT || DEBUG
+                               panic("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n",
+                                               srcDesc->bLink, (u_int32_t) src->blockNum);
+#else
                                printf("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n",
                                                srcDesc->bLink, (u_int32_t) src->blockNum);
+#endif
                                error = fsBTInvalidHeaderErr;
                                goto fail;
                        }
diff --git a/bsd/hfs/hfs_extents.c b/bsd/hfs/hfs_extents.c
new file mode 100644 (file)
index 0000000..509de32
--- /dev/null
@@ -0,0 +1,770 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#if HFS_EXTENTS_TEST
+
+#include "hfs_extents_test.h"
+#include "hfs_extents.h"
+
+#else
+
+#include "hfs_extents.h"
+
+// In this file, group refers to a set of 8 extents
+
+static uint32_t hfs_total_blocks(const HFSPlusExtentDescriptor *ext, int count);
+static errno_t hfs_ext_iter_next_group(struct hfs_ext_iter *iter);
+static errno_t hfs_ext_iter_update(struct hfs_ext_iter *iter,
+                                                                  HFSPlusExtentDescriptor *extents,
+                                                                  int count,
+                                                                  HFSPlusExtentRecord cat_extents);
+static errno_t hfs_ext_iter_check_group(hfs_ext_iter_t *iter);
+
+#endif
+
+#define CHECK(x, var, goto_label)                                                                      \
+       do {                                                                                                                    \
+               var = (x);                                                                                                      \
+               if (var) {                                                                                                      \
+                       printf("%s:%u error: %d\n", __func__, __LINE__, var);   \
+                       goto goto_label;                                                                                \
+               }                                                                                                                       \
+       } while (0)
+
+#define min(a,b) \
+       ({ typeof (a) _a = (a); typeof (b) _b = (b); _a < _b ? _a : _b; })
+
+static __attribute__((pure))
+const HFSPlusExtentKey *hfs_ext_iter_key(const hfs_ext_iter_t *iter)
+{
+       return (const HFSPlusExtentKey *)&iter->bt_iter.key;
+}
+
+static __attribute__((pure))
+HFSPlusExtentKey *hfs_ext_iter_key_mut(hfs_ext_iter_t *iter)
+{
+       return (HFSPlusExtentKey *)&iter->bt_iter.key;
+}
+
+// Returns the total number of blocks for the @count extents provided
+uint32_t hfs_total_blocks(const HFSPlusExtentDescriptor *extents, int count)
+{
+       uint32_t block_count = 0;
+       for (int i = 0; i < count; ++i)
+               block_count += extents[i].blockCount;
+       return block_count;
+}
+
+/*
+ * Checks a group of extents: makes sure that if it's the last group
+ * for a fork, that all the remaining extents are properly zeroed and
+ * if it's not then checks that all extents are set.  This also sets
+ * @group_block_count and @last_in_fork.  Returns ESTALE if
+ * inconsistent.
+ */
+errno_t hfs_ext_iter_check_group(hfs_ext_iter_t *iter)
+{
+       filefork_t *ff = VTOF(iter->vp);
+       const HFSPlusExtentKey *key = hfs_ext_iter_key(iter);
+       uint32_t count = 0;
+       int i;
+
+       for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+               if (!iter->group[i].blockCount)
+                       break;
+               count += iter->group[i].blockCount;
+       }
+
+       if (i < kHFSPlusExtentDensity) {
+               iter->last_in_fork = true;
+               if (key->startBlock + count != ff_allocblocks(ff))
+                       goto bad;
+
+               // Check remainder of extents
+               for (++i; i < kHFSPlusExtentDensity; ++i) {
+                       if (iter->group[i].blockCount)
+                               goto bad;
+               }
+       } else {
+               if (key->startBlock + count > ff_allocblocks(ff))
+                       goto bad;
+
+               iter->last_in_fork = (key->startBlock + count == ff_allocblocks(ff));
+       }
+
+       iter->group_block_count = count;
+
+       return 0;
+
+bad:
+
+#if DEBUG
+       printf("hfs_ext_iter_check_group: bad group; start: %u, total blocks: %u\n",
+                  key->startBlock, ff_allocblocks(ff));
+
+       for (int j = 0; j < kHFSPlusExtentDensity; ++j) {
+               printf("%s<%u, %u>", j ? ", " : "",
+                          iter->group[j].startBlock, iter->group[j].blockCount);
+       }
+
+       printf("\n");
+#endif
+
+       return ESTALE;
+}
+
+// NOTE: doesn't copy group data
+static void hfs_ext_iter_copy(const hfs_ext_iter_t *src, hfs_ext_iter_t *dst)
+{
+       dst->vp = src->vp;
+       memcpy(&dst->bt_iter.key, &src->bt_iter.key, sizeof(HFSPlusExtentKey));
+
+       dst->file_block = src->file_block;
+       dst->ndx = src->ndx;
+
+       dst->bt_iter.hint                       = src->bt_iter.hint;
+       dst->bt_iter.version            = 0;
+       dst->bt_iter.reserved           = 0;
+       dst->bt_iter.hitCount           = 0;
+       dst->bt_iter.maxLeafRecs        = 0;
+}
+
+bool hfs_ext_iter_is_catalog_extents(hfs_ext_iter_t *iter)
+{
+       return hfs_ext_iter_key(iter)->startBlock == 0;
+}
+
+#if !HFS_EXTENTS_TEST
+
+/*
+ * Finds the extent for offset.  It might be in the catalog or the extents
+ * file.
+ */
+errno_t hfs_ext_find(vnode_t vp, off_t offset, hfs_ext_iter_t *iter)
+{
+       errno_t ret;
+       hfsmount_t *hfsmp = VTOHFS(vp);
+
+       iter->vp = vp;
+
+       uint32_t end_block, index;
+       HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter);
+
+       filefork_t *ff = VTOF(vp);
+
+       CHECK(SearchExtentFile(hfsmp, ff, offset,
+                                                  key, iter->group, &index,
+                                                  &iter->bt_iter.hint.nodeNum, &end_block), ret, exit);
+
+       iter->ndx = index;
+       iter->file_block = end_block - iter->group[index].blockCount;
+
+       if (!key->keyLength) {
+               // We're pointing at the catalog record extents so fix up the key
+               key->keyLength  = kHFSPlusExtentKeyMaximumLength;
+               key->forkType   = (VNODE_IS_RSRC(iter->vp)
+                                                  ? kHFSResourceForkType : kHFSDataForkType);
+               key->pad                = 0;
+               key->fileID             = VTOC(iter->vp)->c_fileid;
+               key->startBlock = 0;
+       }
+
+       CHECK(hfs_ext_iter_check_group(iter), ret, exit);
+
+       ret = 0;
+
+exit:
+
+       return MacToVFSError(ret);
+}
+
+static uint32_t hfs_ext_iter_next_group_block(const hfs_ext_iter_t *iter)
+{
+       const HFSPlusExtentKey *key = hfs_ext_iter_key(iter);
+
+       return key->startBlock + iter->group_block_count;
+}
+
+/*
+ * Move the iterator to the next group.  Don't call if there's a chance
+ * there is no entry; the caller should check last_in_fork instead.
+ */
+static errno_t hfs_ext_iter_next_group(hfs_ext_iter_t *iter)
+{
+       errno_t ret;
+       hfsmount_t *hfsmp = VTOHFS(iter->vp);
+       filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork;
+       HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter);
+       const bool catalog_extents = hfs_ext_iter_is_catalog_extents(iter);
+       const uint32_t next_block = hfs_ext_iter_next_group_block(iter);
+
+       FSBufferDescriptor fbd = {
+               .bufferAddress = &iter->group,
+               .itemCount = 1,
+               .itemSize = sizeof(iter->group)
+       };
+
+       if (catalog_extents) {
+               key->startBlock = next_block;
+
+               CHECK(BTSearchRecord(tree, &iter->bt_iter, &fbd, NULL,
+                                                        &iter->bt_iter), ret, exit);
+       } else {
+               const uint32_t   file_id = key->fileID;
+               const uint8_t    fork_type = key->forkType;
+
+               CHECK(BTIterateRecord(tree, kBTreeNextRecord, &iter->bt_iter,
+                                                         &fbd, NULL), ret, exit);
+
+               if (key->fileID != file_id
+                       || key->forkType != fork_type
+                       || key->startBlock != next_block) {
+                       // This indicates an inconsistency
+                       ret = ESTALE;
+                       goto exit;
+               }
+       }
+
+       iter->file_block = key->startBlock;
+       iter->ndx = 0;
+
+       CHECK(hfs_ext_iter_check_group(iter), ret, exit);
+
+       ret = 0;
+
+exit:
+
+       return MacToVFSError(ret);
+}
+
+/*
+ * Updates with the extents provided and sets the key up for the next group.
+ * It is assumed that any previous record that might collide has been deleted.
+ * NOTE: @extents must point to a buffer that can be zero padded to multiple
+ * of 8 extents.
+ */
+errno_t hfs_ext_iter_update(hfs_ext_iter_t *iter,
+                                                       HFSPlusExtentDescriptor *extents,
+                                                       int count,
+                                                       HFSPlusExtentRecord cat_extents)
+{
+       errno_t                          ret;
+       hfsmount_t                      *hfsmp  = VTOHFS(iter->vp);
+       cnode_t                         *cp             = VTOC(iter->vp);
+       HFSPlusExtentKey        *key    = hfs_ext_iter_key_mut(iter);
+       int                                      ndx    = 0;
+
+       if (!extents)
+               extents = iter->group;
+
+       if (count % kHFSPlusExtentDensity) {
+               // Zero out last group
+               bzero(&extents[count], (kHFSPlusExtentDensity
+                                                               - (count % 8)) * sizeof(*extents));
+       }
+
+       if (hfs_ext_iter_is_catalog_extents(iter)) {
+               // Caller is responsible for in-memory updates
+
+               if (cat_extents)
+                       hfs_ext_copy_rec(extents, cat_extents);
+
+               struct cat_fork fork;
+
+               hfs_fork_copy(&fork, &VTOF(iter->vp)->ff_data, extents);
+               hfs_prepare_fork_for_update(VTOF(iter->vp), &fork, &fork, hfsmp->blockSize);
+
+               bool is_rsrc = VNODE_IS_RSRC(iter->vp);
+               CHECK(cat_update(hfsmp, &cp->c_desc, &cp->c_attr,
+                                                is_rsrc ? NULL : &fork,
+                                                is_rsrc ? &fork : NULL), ret, exit);
+
+               // Set the key to the next group
+               key->startBlock = hfs_total_blocks(extents, kHFSPlusExtentDensity);
+
+               ndx += 8;
+       }
+
+       // Deal with the remainder which must be overflow extents
+       for (; ndx < count; ndx += 8) {
+               filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork;
+
+               FSBufferDescriptor fbd = {
+                       .bufferAddress = &extents[ndx],
+                       .itemCount = 1,
+                       .itemSize = sizeof(HFSPlusExtentRecord)
+               };
+
+               CHECK(BTInsertRecord(tree, &iter->bt_iter, &fbd,
+                                                        sizeof(HFSPlusExtentRecord)), ret, exit);
+
+               // Set the key to the next group
+               key->startBlock += hfs_total_blocks(&extents[ndx], kHFSPlusExtentDensity);
+       }
+
+       ret = 0;
+
+exit:
+
+       return ret;
+}
+
+#endif // !HFS_EXTENTS_TEST
+
+static void push_ext(HFSPlusExtentDescriptor *extents, int *count,
+                                        const HFSPlusExtentDescriptor *ext)
+{
+       if (!ext->blockCount)
+               return;
+
+       if (*count && hfs_ext_end(&extents[*count - 1]) == ext->startBlock)
+               extents[*count - 1].blockCount += ext->blockCount;
+       else
+               extents[(*count)++] = *ext;
+}
+
+/*
+ * NOTE: Here we rely on the replacement extents not being too big as
+ * otherwise the number of BTree records that we have to delete could be
+ * too large.
+ */
+errno_t hfs_ext_replace(hfsmount_t *hfsmp, vnode_t vp,
+                                               uint32_t file_block,
+                                               const HFSPlusExtentDescriptor *repl,
+                                               int repl_count,
+                                               HFSPlusExtentRecord catalog_extents)
+{
+       errno_t                                          ret;
+       filefork_t * const                       tree = hfsmp->hfs_extents_cp->c_datafork;
+       hfs_ext_iter_t                          *iter_in = NULL, *iter_out;
+       HFSPlusExtentDescriptor         *extents = NULL;
+       HFSPlusExtentDescriptor         *roll_back_extents = NULL;
+       int                                                      roll_back_count = 0;
+       const uint32_t                           end_file_block = file_block + hfs_total_blocks(repl, repl_count);
+       filefork_t                                      *ff = VTOF(vp);
+
+       // Indicate we haven't touched catalog extents
+       catalog_extents[0].blockCount = 0;
+
+       if (end_file_block > ff_allocblocks(ff)) {
+               ret = EINVAL;
+               goto exit;
+       }
+
+       MALLOC(iter_in, hfs_ext_iter_t *, sizeof(*iter_in) * 2, M_TEMP, M_WAITOK);
+       iter_out = iter_in + 1;
+       HFSPlusExtentKey *key_in = hfs_ext_iter_key_mut(iter_in);
+
+       // Get to where we want to start
+       off_t offset = hfs_blk_to_bytes(file_block, hfsmp->blockSize);
+
+       /*
+        * If the replacement is at the start of a group, we want to pull in the
+        * group before so that we tidy up any padding that we might have done
+        * in a prior hfs_ext_replace call.
+        */
+       if (offset > 0)
+               --offset;
+
+       CHECK(hfs_ext_find(vp, offset, iter_in), ret, exit);
+
+       const uint32_t start_group_block = key_in->startBlock;
+
+       const int max_roll_back_extents = 128 * 1024 / sizeof(HFSPlusExtentDescriptor);
+       MALLOC(roll_back_extents, HFSPlusExtentDescriptor *, 128 * 1024, M_TEMP, M_WAITOK);
+
+       // Move to the first extent in this group
+       iter_in->ndx = 0;
+
+       hfs_ext_iter_copy(iter_in, iter_out);
+
+       // Create a buffer for our extents
+       const int buffered_extents = roundup(3 * kHFSPlusExtentDensity + repl_count,
+                                                                                kHFSPlusExtentDensity);
+       MALLOC(extents, HFSPlusExtentDescriptor *,
+                  sizeof(*extents) * buffered_extents, M_TEMP, M_WAITOK);
+       int count = 0;
+
+       /*
+        * Iterate through the extents that are affected by this replace operation.
+        * We cannot push more than 16 + repl_count extents here; 8 for the group
+        * containing the replacement start, repl_count for the replacements and 8
+        * for the group containing the end.  If we went back a group due to
+        * decrementing the offset above, it's still the same because we know in 
+        * that case the replacement starts at the beginning of the next group.
+        */
+       uint32_t block = start_group_block;
+       for (;;) {
+               if (!iter_in->ndx) {
+                       hfs_ext_copy_rec(iter_in->group, &roll_back_extents[roll_back_count]);
+                       roll_back_count += kHFSPlusExtentDensity;
+
+                       if (!hfs_ext_iter_is_catalog_extents(iter_in)) {
+                               // Delete this extent group; we're going to replace it
+                               CHECK(BTDeleteRecord(tree, &iter_in->bt_iter), ret, exit);
+                       }
+               }
+
+               HFSPlusExtentDescriptor *ext = &iter_in->group[iter_in->ndx];
+               if (!ext->blockCount) {
+                   /*
+                        * We ran out of existing extents so we just write the
+                        * extents and we're done.
+                        */
+                       goto finish;
+               }
+
+               // If the current extent does not overlap replacement...
+               if (block + ext->blockCount <= file_block || block >= end_file_block) {
+                       // Keep the current extent exactly as it is
+                       push_ext(extents, &count, ext);
+               } else {
+                       HFSPlusExtentDescriptor dealloc_ext = *ext;
+
+                       if (block <= file_block) {
+                               /*
+                                * The middle or tail of the current extent overlaps
+                                * the replacement extents.  Keep the non-overlapping
+                                * head of the current extent.
+                                */
+                               uint32_t trimmed_len = file_block - block;
+
+                               if (trimmed_len) {
+                                       // Push (keep) non-overlapping head of current extent
+                                       push_ext(extents, &count,
+                                                        &(HFSPlusExtentDescriptor){ ext->startBlock,
+                                                                trimmed_len });
+
+                                       /*
+                                        * Deallocate the part of the current extent that
+                                        * overlaps the replacement extents.  That starts
+                                        * at @file_block.  For now, assume it goes
+                                        * through the end of the current extent.  (If the
+                                        * current extent extends beyond the end of the
+                                        * replacement extents, we'll update the
+                                        * blockCount below.)
+                                        */
+                                       dealloc_ext.startBlock += trimmed_len;
+                                       dealloc_ext.blockCount -= trimmed_len;
+                               }
+
+                               // Insert the replacements
+                               for (int i = 0; i < repl_count; ++i)
+                                       push_ext(extents, &count, &repl[i]);
+                       }
+
+                       if (block + ext->blockCount > end_file_block) {
+                               /*
+                                * The head or middle of the current extent overlaps
+                                * the replacement extents.  Keep the non-overlapping
+                                * tail of the current extent.
+                                */
+                               uint32_t overlap = end_file_block - block;
+
+                               // Push (keep) non-overlapping tail of current extent
+                               push_ext(extents, &count,
+                                                &(HFSPlusExtentDescriptor){ ext->startBlock + overlap,
+                                                        ext->blockCount - overlap });
+
+                               /*
+                                * Deallocate the part of current extent that overlaps
+                                * the replacements.
+                                */
+                               dealloc_ext.blockCount = (ext->startBlock + overlap
+                                                                                 - dealloc_ext.startBlock);
+                       }
+
+                       CHECK(BlockDeallocate(hfsmp, dealloc_ext.startBlock,
+                                                                 dealloc_ext.blockCount, 0), ret, exit);
+               }
+
+               // Move to next (existing) extent from iterator
+               block += ext->blockCount;
+
+               if (++iter_in->ndx >= kHFSPlusExtentDensity) {
+                       if (block >= end_file_block) {
+                               if (iter_in->last_in_fork || !(count % kHFSPlusExtentDensity)) {
+                                       /*
+                                        * This is the easy case.  We've hit the end or we have a 
+                                        * multiple of 8, so we can just write out the extents we 
+                                        * have and it should all fit within a transaction.
+                                        */
+
+                                       goto finish;
+                               }
+
+                               if (count + kHFSPlusExtentDensity > buffered_extents
+                                       || (roll_back_count
+                                               + kHFSPlusExtentDensity > max_roll_back_extents)) {
+                                       /*
+                                        * We've run out of room for the next group, so drop out
+                                        * and take a different strategy.
+                                        */
+                                       break;
+                               }
+                       }
+
+                       CHECK(hfs_ext_iter_next_group(iter_in), ret, exit);
+               }
+       } // for (;;)
+
+       /*
+        * We're not at the end so we need to try and pad to a multiple of 8
+        * so that we don't have to touch all the subsequent records.  We pad
+        * by stealing single blocks.
+        */
+
+       int stop_at = 0;
+
+       for (;;) {
+               // @in points to the record we're stealing from
+               int in = count - 1;
+
+               count = roundup(count, kHFSPlusExtentDensity);
+
+               // @out is where we put the stolen single blocks
+               int out = count - 1;
+
+               do {
+                       if (out <= in) {
+                               // We suceeded in padding; we're done
+                               goto finish;
+                       }
+
+                       /*
+                        * "Steal" a block, or move a one-block extent within the
+                        * @extents array.
+                        *
+                        * If the extent we're "stealing" from (@in) is only one
+                        * block long, we'll end up copying it to @out, setting
+                        * @in's blockCount to zero, and decrementing @in.  So, we
+                        * either split a multi-block extent; or move it within
+                        * the @extents array.
+                        */
+                       extents[out].blockCount = 1;
+                       extents[out].startBlock = (extents[in].startBlock
+                                                                          + extents[in].blockCount - 1);
+                       --out;
+               } while (--extents[in].blockCount || --in >= stop_at);
+
+               // We ran out of extents
+               if (roll_back_count + kHFSPlusExtentDensity > max_roll_back_extents) {
+                       ret = ENOSPC;
+                       goto exit;
+               }
+
+               // Need to shift extents starting at out + 1
+               ++out;
+               memmove(&extents[stop_at], &extents[out],
+                               (count - out) * sizeof(*extents));
+               count -= out - stop_at;
+
+               // Pull in the next group
+               CHECK(hfs_ext_iter_next_group(iter_in), ret, exit);
+
+               // Take a copy of these extents for roll back purposes
+               hfs_ext_copy_rec(iter_in->group, &roll_back_extents[roll_back_count]);
+               roll_back_count += kHFSPlusExtentDensity;
+
+               // Delete this group; we're going to replace it
+               CHECK(BTDeleteRecord(tree, &iter_in->bt_iter), ret, exit);
+
+               if (iter_in->last_in_fork) {
+                       // Great!  We've hit the end.  Coalesce and write out.
+                       int old_count = count;
+                       count = 0;
+
+                       /*
+                        * First coalesce the extents we already have.  Takes
+                        * advantage of push_ext coalescing the input extent with
+                        * the last extent in @extents.  If the extents are not
+                        * contiguous, then this just copies the extents over
+                        * themselves and sets @count back to @old_count.
+                        */
+                       for (int i = 0; i < old_count; ++i)
+                               push_ext(extents, &count, &extents[i]);
+
+                       // Make room if necessary
+                       const int flush_count = buffered_extents - kHFSPlusExtentDensity;
+                       if (count > flush_count) {
+                               CHECK(hfs_ext_iter_update(iter_out, extents,
+                                                                                 flush_count, catalog_extents), ret, exit);
+
+                               memmove(&extents[0], &extents[flush_count],
+                                               (count - flush_count) * sizeof(*extents));
+
+                               count -= flush_count;
+                       }
+
+                       // Add in the extents we just read in
+                       for (int i = 0; i < kHFSPlusExtentDensity; ++i) {
+                               HFSPlusExtentDescriptor *ext = &iter_in->group[i];
+                               if (!ext->blockCount)
+                                       break;
+                               push_ext(extents, &count, ext);
+                       }
+
+                       goto finish;
+               } // if (iter_in->last_in_fork)
+
+               /*
+                * Otherwise, we're not at the end, so we add these extents and then
+                * try and pad out again to a multiple of 8.  We start by making room.
+                */
+               if (count > buffered_extents - kHFSPlusExtentDensity) {
+                       // Only write out one group here
+                       CHECK(hfs_ext_iter_update(iter_out, extents,
+                                                                         kHFSPlusExtentDensity,
+                                                                         catalog_extents), ret, exit);
+
+                       memmove(&extents[0], &extents[kHFSPlusExtentDensity],
+                                       (count - kHFSPlusExtentDensity) * sizeof(*extents));
+
+                       count -= kHFSPlusExtentDensity;
+               }
+
+               // Record where to stop when padding above
+               stop_at = count;
+
+               // Copy in the new extents
+               hfs_ext_copy_rec(iter_in->group, &extents[count]);
+               count += kHFSPlusExtentDensity;
+       } // for (;;)
+
+finish:
+
+       // Write the remaining extents
+       CHECK(hfs_ext_iter_update(iter_out, extents, count,
+                                                         catalog_extents), ret, exit);
+
+       CHECK(BTFlushPath(hfsmp->hfs_catalog_cp->c_datafork), ret, exit);
+       CHECK(BTFlushPath(hfsmp->hfs_extents_cp->c_datafork), ret, exit);
+
+exit:
+
+       if (ret && roll_back_count) {
+
+#define RB_FAILED                                                                                                              \
+       do {                                                                                                                            \
+               printf("hfs_ext_replace:%u: roll back failed\n", __LINE__);             \
+               hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);                              \
+               goto roll_back_failed;                                                                                  \
+       } while (0)
+
+               // First delete any groups we inserted
+               HFSPlusExtentKey *key_out = hfs_ext_iter_key_mut(iter_out);
+
+               key_in->startBlock = start_group_block;
+               if (!key_in->startBlock && key_out->startBlock > key_in->startBlock) {
+                       key_in->startBlock += hfs_total_blocks(catalog_extents,
+                                                                                                  kHFSPlusExtentDensity);
+               }
+
+               if (key_out->startBlock > key_in->startBlock) {
+                       FSBufferDescriptor fbd = {
+                               .bufferAddress = &iter_in->group,
+                               .itemCount = 1,
+                               .itemSize = sizeof(iter_in->group)
+                       };
+
+                       if (BTSearchRecord(tree, &iter_in->bt_iter, &fbd, NULL,
+                                                          &iter_in->bt_iter)) {
+                               RB_FAILED;
+                       }
+
+                       for (;;) {
+                               if (BTDeleteRecord(tree, &iter_in->bt_iter))
+                                       RB_FAILED;
+
+                               key_in->startBlock += hfs_total_blocks(iter_in->group,
+                                                                                                          kHFSPlusExtentDensity);
+
+                               if (key_in->startBlock >= key_out->startBlock)
+                                       break;
+
+                               if (BTSearchRecord(tree, &iter_in->bt_iter, &fbd, NULL,
+                                                                  &iter_in->bt_iter)) {
+                                       RB_FAILED;
+                               }
+                       }
+               }
+
+               // Position iter_out
+               key_out->startBlock = start_group_block;
+
+               // Roll back all the extents
+               if (hfs_ext_iter_update(iter_out, roll_back_extents, roll_back_count,
+                                                               catalog_extents)) {
+                       RB_FAILED;
+               }
+
+               // And we need to reallocate the blocks we deallocated
+               const uint32_t end_block = min(block, end_file_block);
+               block = start_group_block;
+               for (int i = 0; i < roll_back_count && block < end_block; ++i) {
+                       HFSPlusExtentDescriptor *ext = &roll_back_extents[i];
+
+                       if (block + ext->blockCount <= file_block)
+                               continue;
+
+                       HFSPlusExtentDescriptor alloc_ext = *ext;
+
+                       if (block <= file_block) {
+                               uint32_t trimmed_len = file_block - block;
+
+                               alloc_ext.startBlock += trimmed_len;
+                               alloc_ext.blockCount -= trimmed_len;
+                       }
+
+                       if (block + ext->blockCount > end_file_block) {
+                               uint32_t overlap = end_file_block - block;
+
+                               alloc_ext.blockCount = (ext->startBlock + overlap
+                                                                               - alloc_ext.startBlock);
+                       }
+
+                       if (hfs_block_alloc(hfsmp, &alloc_ext, HFS_ALLOC_ROLL_BACK, NULL))
+                               RB_FAILED;
+
+                       block += ext->blockCount;
+               }
+
+               if (BTFlushPath(hfsmp->hfs_catalog_cp->c_datafork)
+                       || BTFlushPath(hfsmp->hfs_extents_cp->c_datafork)) {
+                       RB_FAILED;
+               }
+       } // if (ret && roll_back_count)
+
+roll_back_failed:
+
+       FREE(iter_in, M_TEMP);
+       FREE(extents, M_TEMP);
+       FREE(roll_back_extents, M_TEMP);
+
+       return MacToVFSError(ret);
+}
diff --git a/bsd/hfs/hfs_extents.h b/bsd/hfs/hfs_extents.h
new file mode 100644 (file)
index 0000000..9dd6073
--- /dev/null
@@ -0,0 +1,54 @@
+//
+//  hfs_extents.h
+//  hfs
+//
+//  Created by csuter on 7/11/14.
+//  Copyright (c) 2014 Apple. All rights reserved.
+//
+
+#ifndef HFS_EXTENTS_H_
+#define HFS_EXTENTS_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "hfs_format.h"
+
+#if !HFS_EXTENTS_TEST && !HFS_ALLOC_TEST
+#include "hfs_cnode.h"
+#include "hfs.h"
+#include "hfscommon/headers/BTreesInternal.h"
+#endif
+
+typedef struct hfs_ext_iter {
+       struct vnode               *vp;                 // If NULL, this is an xattr extent
+       BTreeIterator                   bt_iter;
+       uint8_t                                 ndx;            // Index in group
+       bool                                    last_in_fork;
+       uint32_t                                file_block;
+       uint32_t                                group_block_count;
+       HFSPlusExtentRecord             group;
+} hfs_ext_iter_t;
+
+errno_t hfs_ext_find(vnode_t vp, off_t offset, hfs_ext_iter_t *iter);
+
+errno_t hfs_ext_replace(hfsmount_t *hfsmp, vnode_t vp,
+                                               uint32_t file_block,
+                                               const HFSPlusExtentDescriptor *repl,
+                                               int count,
+                                               HFSPlusExtentRecord catalog_extents);
+
+bool hfs_ext_iter_is_catalog_extents(hfs_ext_iter_t *iter);
+
+static inline void hfs_ext_copy_rec(const HFSPlusExtentRecord src,
+                                                                       HFSPlusExtentRecord dst)
+{
+       memcpy(dst, src, sizeof(HFSPlusExtentRecord));
+}
+
+static inline uint32_t hfs_ext_end(const HFSPlusExtentDescriptor *ext)
+{
+       return ext->startBlock + ext->blockCount;
+}
+
+#endif // HFS_EXTENTS_H_
index ba00a272a2f52e8b68bb541ba288e3666b9497a7..dcc1807243d10b55c129a8a54ce32cd9c139e920 100644 (file)
@@ -363,8 +363,23 @@ enum {
        kHFSHasChildLinkBit     = 0x0006,       /* folder has a child that's a dir link */
        kHFSHasChildLinkMask    = 0x0040,
 
-       kHFSHasDateAddedBit = 0x0007,   /* File/Folder has the date-added stored in the finder info. */
-       kHFSHasDateAddedMask = 0x0080 
+       kHFSHasDateAddedBit     = 0x0007,       /* File/Folder has the date-added stored in the finder info. */
+       kHFSHasDateAddedMask    = 0x0080, 
+
+       kHFSFastDevPinnedBit    = 0x0008,       /* this file has been pinned to the fast-device by the hot-file code on cooperative fusion */
+       kHFSFastDevPinnedMask   = 0x0100,
+
+       kHFSDoNotFastDevPinBit  = 0x0009,       /* this file can not be pinned to the fast-device */
+       kHFSDoNotFastDevPinMask = 0x0200,
+
+       kHFSFastDevCandidateBit  = 0x000a,      /* this item is a potential candidate for fast-dev pinning (as are any of its descendents */
+       kHFSFastDevCandidateMask = 0x0400,
+
+       kHFSAutoCandidateBit     = 0x000b,      /* this item was automatically marked as a fast-dev candidate by the kernel */
+       kHFSAutoCandidateMask    = 0x0800
+
+       // There are only 4 flag bits remaining: 0x1000, 0x2000, 0x4000, 0x8000
+
 };
 
 
index b90b722b5ca74340fdc680b1ca1e07bde44b5e1f..0958179ea875ea3e302c64aede427d9666b59dd8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2015 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2014 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -273,8 +273,7 @@ enum {
 #define HFSIOC_CLRBACKINGSTOREINFO  _IO('h', 8)
 #define HFS_CLRBACKINGSTOREINFO  IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
 
-#define HFSIOC_BULKACCESS _IOW('h', 9, struct user32_access_t)
-#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
+// 'h', 9 used to be HFSIOC_BULKACCESS which is now deprecated
 
 /* Unsupported - Previously used to enable/disable ACLs */
 #define HFSIOC_UNSUPPORTED  _IOW('h', 10, int32_t)
@@ -364,10 +363,28 @@ enum {
 #define HFSIOC_CS_FREESPACE_TRIM _IOWR('h', 39, u_int32_t)
 #define HFS_CS_FREESPACE_TRIM    IOCBASECMD(HFSIOC_CS_FREESPACE_TRIM)
 
+
 /* Get file system information for the given volume */
 #define HFSIOC_GET_FSINFO        _IOWR('h', 45, hfs_fsinfo)
 #define HFS_GET_FSINFO           IOCBASECMD(HFSIOC_GET_FSINFO)
 
+/* Re-pin hotfile data; argument controls what state gets repinned */
+#define HFSIOC_REPIN_HOTFILE_STATE _IOWR('h', 46, u_int32_t)
+#define HFS_REPIN_HOTFILE_STATE    IOCBASECMD(HFSIOC_REPIN_HOTFILE_STATE)
+
+#define HFS_REPIN_METADATA      0x0001
+#define HFS_REPIN_USERDATA      0x0002
+
+/* Mark a directory or file as worth caching on any underlying "fast" device */
+#define HFSIOC_SET_HOTFILE_STATE _IOWR('h', 47, u_int32_t)
+#define HFS_SET_HOTFILE_STATE    IOCBASECMD(HFSIOC_SET_HOTFILE_STATE)
+
+/* flags to pass to SET_HOTFILE_STATE */
+#define HFS_MARK_FASTDEVCANDIDATE   0x0001
+#define HFS_UNMARK_FASTDEVCANDIDATE 0x0002
+#define HFS_NEVER_FASTDEVCANDIDATE  0x0004
+
+
 #endif /* __APPLE_API_UNSTABLE */
 
 #endif /* ! _HFS_FSCTL_H_ */
index d3071086a916ea6f504ee041846a802537b3c1dc..ffb31575ba8bc05124ffe13fe5cea95590bda244 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -40,9 +40,7 @@
 #include "hfscommon/headers/BTreesPrivate.h"
 #include "hfscommon/headers/FileMgrInternal.h"
 
-#if CONFIG_PROTECT
 #include <hfs/hfs_cprotect.h>
-#endif
 
 
 union HFSPlusRecord {
@@ -837,6 +835,10 @@ static int fsinfo_cprotect_count_callback(struct hfsmount *hfsmp, HFSPlusKey *ke
 {
        struct hfs_fsinfo_cprotect *fsinfo = (struct hfs_fsinfo_cprotect *)data;
        static const uint16_t cp_xattrname_utf16[] = CONTENT_PROTECTION_XATTR_NAME_CHARS;
+       /*
+        * NOTE: cp_xattrname_utf16_len is the number of UTF-16 code units in
+        * the EA name string.
+        */
        static const size_t cp_xattrname_utf16_len = sizeof(cp_xattrname_utf16)/2;
        struct cp_xattr_v5 *xattr;
        size_t xattr_len = sizeof(struct cp_xattr_v5);
@@ -850,7 +852,7 @@ static int fsinfo_cprotect_count_callback(struct hfsmount *hfsmp, HFSPlusKey *ke
 
        /* We only look at content protection xattrs */
        if ((key->attr_key.attrNameLen != cp_xattrname_utf16_len) ||
-               (bcmp(key->attr_key.attrName, cp_xattrname_utf16, cp_xattrname_utf16_len))) {
+               (bcmp(key->attr_key.attrName, cp_xattrname_utf16, 2 * cp_xattrname_utf16_len))) {
                return 0;
        }
 
index 7ba80c737bc31470fd4394d89797d58be24a9c07..4cecf72a9717c297e9474be435df2b642c1d3e0e 100644 (file)
@@ -89,13 +89,47 @@ typedef struct hotfile_entry {
        u_int32_t  blocks;
 } hotfile_entry_t;
 
+
+//
+// We cap the max temperature for non-system files to "MAX_NORMAL_TEMP"
+// so that they will always have a lower temperature than system (aka 
+// "auto-cached") files.  System files have MAX_NORMAL_TEMP added to
+// their temperature which produces two bands of files (all non-system
+// files will have a temp less than MAX_NORMAL_TEMP and all system
+// files will have a temp greatern than MAX_NORMAL_TEMP).
+//
+// This puts non-system files on the left side of the hotfile btree 
+// (and we start evicting from the left-side of the tree).  The idea is 
+// that we will evict non-system files more aggressively since their
+// working set changes much more dynamically than system files (which 
+// are for the most part, static).
+//
+// NOTE: these values have to fit into a 32-bit int.  We use a
+//       value of 1-billion which gives a pretty broad range
+//       and yet should not run afoul of any sign issues.
+//
+#define MAX_NORMAL_TEMP    1000000000
+#define HF_TEMP_RANGE      MAX_NORMAL_TEMP
+
+
+//
+// These used to be defines of the hard coded values.  But if
+// we're on an cooperative fusion (CF) system we need to change 
+// the values (which happens in hfs_recording_init()
+// 
+uint32_t hfc_default_file_count = 1000;
+uint32_t hfc_default_duration   = (3600 * 60);
+uint32_t hfc_max_file_count     = 5000;
+uint64_t hfc_max_file_size      = (10 * 1024 * 1024);
+
+
 /*
  * Hot File Recording Data (runtime).
  */
 typedef struct hotfile_data {
        struct hfsmount *hfsmp;
        long             refcount;
-       int              activefiles;  /* active number of hot files */
+       u_int32_t        activefiles;  /* active number of hot files */
        u_int32_t        threshold;
        u_int32_t        maxblocks;
        hotfile_entry_t *rootentry;
@@ -107,11 +141,15 @@ typedef struct hotfile_data {
 static int  hfs_recording_start (struct hfsmount *);
 static int  hfs_recording_stop (struct hfsmount *);
 
+/* Hotfiles pinning routines */
+static int hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned);
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned);
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc);
 
 /*
  * Hot File Data recording functions (in-memory binary tree).
  */
-static void              hf_insert (hotfile_data_t *, hotfile_entry_t *);
+static int               hf_insert (hotfile_data_t *, hotfile_entry_t *);
 static void              hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
 static hotfile_entry_t * hf_coldest (hotfile_data_t *);
 static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
@@ -128,11 +166,12 @@ static void  hf_printtree (hotfile_entry_t *);
  */
 static int  hotfiles_collect (struct hfsmount *);
 static int  hotfiles_age (struct hfsmount *);
-static int  hotfiles_adopt (struct hfsmount *);
+static int  hotfiles_adopt (struct hfsmount *, vfs_context_t);
 static int  hotfiles_evict (struct hfsmount *, vfs_context_t);
 static int  hotfiles_refine (struct hfsmount *);
 static int  hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
 static int  hfs_addhotfile_internal(struct vnode *);
+static int  hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp);
 
 
 /*
@@ -140,7 +179,10 @@ static int  hfs_addhotfile_internal(struct vnode *);
  */
 static int  hfc_btree_create (struct hfsmount *, unsigned int, unsigned int);
 static int  hfc_btree_open (struct hfsmount *, struct vnode **);
+static int  hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs);
 static int  hfc_btree_close (struct hfsmount *, struct vnode *);
+static int  hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key);
+static int  hfc_btree_delete(struct hfsmount *hfsmp);
 static int  hfc_comparekeys (HotFileKey *, HotFileKey *);
 
 
@@ -154,7 +196,7 @@ char hfc_tag[] = "CLUSTERED HOT FILES B-TREE     ";
  */
 
 /*
- * Start recording the hotest files on a file system.
+ * Start recording the hottest files on a file system.
  *
  * Requires that the hfc_mutex be held.
  */
@@ -206,16 +248,31 @@ hfs_recording_start(struct hfsmount *hfsmp)
                    (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
                    (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
                    (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
-                       hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
+                       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                               if (hfsmp->hfs_hotfile_freeblks == 0) {
+                                       hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+                               }
+                               hfsmp->hfc_maxfiles = 0x7fffffff;
+                               printf("hfs: %s: %s: hotfile freeblocks: %d, max: %d\n", hfsmp->vcbVN, __FUNCTION__,
+                                      hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks);
+                       } else {
+                               hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
+                       }
                        hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
-                       hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
+                       int timeleft = (int)SWAP_BE32(hotfileinfo.timeleft);
+                       if (timeleft < 0 || timeleft > (int)(HFC_DEFAULT_DURATION*2)) {
+                               // in case this field got botched, don't let it screw things up
+                               // printf("hfs: hotfiles: bogus looking timeleft: %d\n", timeleft);
+                               timeleft = HFC_DEFAULT_DURATION;
+                       }
+                       hfsmp->hfc_timeout = timeleft + tv.tv_sec ;
                        /* Fix up any bogus timebase values. */
                        if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
                                hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
                        }
 #if HFC_VERBOSE
-                       printf("hfs: Resume recording hot files on %s (%d secs left)\n",
-                               hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft));
+                       printf("hfs: Resume recording hot files on %s (%d secs left (%d); timeout %ld)\n",
+                              hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft), timeleft, hfsmp->hfc_timeout - tv.tv_sec);
 #endif
                } else {
                        hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
@@ -240,7 +297,10 @@ hfs_recording_start(struct hfsmount *hfsmp)
                        return (error);
                }
 #if HFC_VERBOSE
-               printf("hfs: begin recording hot files on %s\n", hfsmp->vcbVN);
+               printf("hfs: begin recording hot files on %s (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+                      hfsmp->vcbVN,
+                      hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+                      hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
 #endif
                hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
                hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
@@ -391,7 +451,7 @@ hfs_recording_stop(struct hfsmount *hfsmp)
        /*
         * Compute the amount of space to reclaim...
         */
-       if (listp->hfl_totalblocks > hfsmp->hfs_hotfile_freeblks) {
+       if (listp->hfl_totalblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
                listp->hfl_reclaimblks =
                        MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
                        hfsmp->hfs_hotfile_freeblks;
@@ -425,15 +485,40 @@ out:
        return (error);
 }
 
+static void
+save_btree_user_info(struct hfsmount *hfsmp)
+{
+       HotFilesInfo hotfileinfo;
+       struct timeval tv;
+
+       microtime(&tv);
+       hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
+       hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
+       hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
+       hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
+       hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
+       hotfileinfo.threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
+       hotfileinfo.maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               hotfileinfo.usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfs_hotfile_cur_freeblks(hfsmp));
+#if HFC_VERBOSE
+               printf("hfs: %s: saving usedblocks = %d (timeleft: %d; timeout %ld)\n", hfsmp->vcbVN, (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks),
+                      SWAP_BE32(hotfileinfo.timeleft), hfsmp->hfc_timeout);
+#endif
+       } else {
+               hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+       }
+       strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
+       (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
+}
+
 /*
  * Suspend recording the hotest files on a file system.
  */
 int
 hfs_recording_suspend(struct hfsmount *hfsmp)
 {
-       HotFilesInfo hotfileinfo;
        hotfile_data_t *hotdata = NULL;
-       struct timeval tv;
        int  error;
 
        if (hfsmp->hfc_stage == HFC_DISABLED)
@@ -465,25 +550,13 @@ hfs_recording_suspend(struct hfsmount *hfsmp)
        }
 
        if (hfs_start_transaction(hfsmp) != 0) {
-           error = EINVAL;
            goto out;
        }
        if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
-               error = EPERM;
                goto end_transaction;
        }
 
-       microtime(&tv);
-       hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
-       hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
-       hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
-       hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
-       hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
-       hotfileinfo.threshold   = SWAP_BE32 (hotdata->threshold);
-       hotfileinfo.maxfileblks = SWAP_BE32 (hotdata->maxblocks);
-       hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
-       strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
-       (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
+       save_btree_user_info(hfsmp);
 
        hfs_unlock(VTOC(hfsmp->hfc_filevp));
 
@@ -507,122 +580,233 @@ out:
 }
 
 
-/*
- *
- */
-int
-hfs_recording_init(struct hfsmount *hfsmp)
+static void
+reset_file_ids(struct hfsmount *hfsmp, uint32_t *fileid_table, int num_ids)
+{
+       int i, error;
+
+       for(i=0; i < num_ids; i++) {
+               struct vnode *vp;
+
+               error = hfs_vget(hfsmp, fileid_table[i], &vp, 0, 0);
+               if (error) {
+                       if (error == ENOENT) {
+                               error = 0;
+                               continue;  /* stale entry, go to next */
+                       }
+                       continue;
+               }
+
+               // hfs_vget returns a locked cnode so no need to lock here
+
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+                       error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, NULL, vfs_context_kernel());
+               }
+
+               /*
+                * The updates to the catalog must be journaled
+                */
+               hfs_start_transaction(hfsmp);
+
+               //
+               // turn off _all_ the hotfile related bits since we're resetting state
+               //
+               if (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
+                       vnode_clearfastdevicecandidate(vp);
+               }
+
+               VTOC(vp)->c_attr.ca_recflags &= ~(kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+               VTOC(vp)->c_flag |= C_MODIFIED;
+
+               hfs_update(vp, 0);
+
+               hfs_end_transaction(hfsmp);
+               
+               hfs_unlock(VTOC(vp));
+               vnode_put(vp);
+       }
+}
+
+static int
+flag_hotfile(struct hfsmount *hfsmp, const char *filename)
+{
+       struct vnode *dvp = NULL, *fvp = NULL;
+       vfs_context_t ctx = vfs_context_kernel();
+       struct componentname cname;
+       int  error=0;
+       size_t fname_len;
+       const char *orig_fname = filename;
+       
+       if (filename == NULL) {
+               return EINVAL;
+       }
+
+       fname_len = strlen(filename);    // do NOT include the trailing '\0' so that we break out of the loop below
+       
+       error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
+       if (error) {
+               return (error);
+       }
+
+       /* At this point, 'dvp' must be considered iocounted */
+       const char *ptr;
+       ptr = filename;
+
+       while (ptr < (orig_fname + fname_len - 1)) {
+               for(; ptr < (orig_fname + fname_len) && *ptr && *ptr != '/'; ptr++) {
+                       /* just keep advancing till we reach the end of the string or a slash */
+               }
+
+               cname.cn_nameiop = LOOKUP;
+               cname.cn_flags = ISLASTCN;
+               cname.cn_context = ctx;
+               cname.cn_ndp = NULL;
+               cname.cn_pnbuf = __DECONST(char *, orig_fname);
+        cname.cn_nameptr = __DECONST(char *, filename);
+               cname.cn_pnlen = fname_len;
+               cname.cn_namelen = ptr - filename;
+               cname.cn_hash = 0;
+               cname.cn_consume = 0;
+
+               error = VNOP_LOOKUP(dvp, &fvp, &cname, ctx);
+               if (error) {
+                       /*
+                        * If 'dvp' is non-NULL, then it has an iocount.  Make sure to release it
+                        * before bailing out.  VNOP_LOOKUP could legitimately return ENOENT
+                        * if the item didn't exist or if we raced with a delete.
+                        */
+                       if (dvp) {
+                               vnode_put(dvp);
+                               dvp = NULL;
+                       }
+                       return error;
+               }
+
+               if (ptr < orig_fname + fname_len - 1) {
+                       //
+                       // we've got a multi-part pathname so drop the ref on the dir,
+                       // make dvp become what we just looked up, and advance over
+                       // the slash character in the pathname to get to the next part
+                       // of the component
+                       //
+                       vnode_put(dvp);
+                       dvp = fvp;
+                       fvp = NULL;
+
+                       filename = ++ptr;   // skip the slash character
+               }
+       }
+       
+       if (fvp == NULL) {
+               error = ENOENT;
+               goto out;
+       }
+
+       struct cnode *cp = VTOC(fvp);
+       if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+               goto out;
+       }
+
+       hfs_start_transaction(hfsmp);
+       
+       cp->c_attr.ca_recflags |= (kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+       cp->c_flag |= C_MODIFIED;
+
+       hfs_update(fvp, 0);
+
+       hfs_end_transaction(hfsmp);
+
+       hfs_unlock(cp);
+       //printf("hfs: flagged /%s with the fast-dev-candidate|auto-candidate flags\n", filename);
+
+
+out:
+       if (fvp) {
+               vnode_put(fvp);
+               fvp = NULL;
+       }
+
+       if (dvp) {
+               vnode_put(dvp);
+               dvp = NULL;
+       }
+
+       return error;
+}
+
+
+static void
+hfs_setup_default_cf_hotfiles(struct hfsmount *hfsmp)
+{
+       const char *system_default_hotfiles[] = {
+               "usr",
+               "System",
+               "Applications",
+               "private/var/db/dyld"
+       };
+       int i;
+
+       for(i=0; i < (int)(sizeof(system_default_hotfiles)/sizeof(char *)); i++) {
+               flag_hotfile(hfsmp, system_default_hotfiles[i]);
+       }
+}
+
+
+#define NUM_FILE_RESET_IDS   4096    // so we allocate 16k to hold file-ids
+
+static void
+hfs_hotfile_reset(struct hfsmount *hfsmp)
 {
        CatalogKey * keyp;
        CatalogRecord * datap;
        u_int32_t  dataSize;
-       HFSPlusCatalogFile *filep;
        BTScanState scanstate;
        BTreeIterator * iterator = NULL;
        FSBufferDescriptor  record;
-       HotFileKey * key;
-       filefork_t * filefork;
        u_int32_t  data;
-       struct cat_attr cattr;
        u_int32_t  cnid;
        int error = 0;
+       uint32_t *fileids=NULL;
+       int cur_id_index = 0;
 
-       int inserted = 0;  /* debug variables */
+       int cleared = 0;  /* debug variables */
        int filecount = 0;
+       int dircount = 0;
 
-       /*
-        * For now, only the boot volume is supported.
-        */
-       if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
-               hfsmp->hfc_stage = HFC_DISABLED;
-               return (EPERM);
-       }
-
-       /*
-        * Tracking of hot files requires up-to-date access times.
-        * So if access time updates are disabled, then we disable
-        * hot files, too.
-        */
-       if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
-               hfsmp->hfc_stage = HFC_DISABLED;
-               return EPERM;
-       }
-       
-       /*
-        * If the Hot File btree exists then metadata zone is ready.
-        */
-       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
-       if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
-               if (hfsmp->hfc_stage == HFC_DISABLED)
-                       hfsmp->hfc_stage = HFC_IDLE;
-               return (0);
-       }
-
-       if (hfs_start_transaction(hfsmp) != 0) {
-               return EINVAL;
-       }
-
-       error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
-       if (error) {
-#if HFC_VERBOSE
-               printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
-#endif
-               goto out2;
-       }
-       /*
-        * Open the Hot File B-tree file for writing.
-        */
-       if (hfsmp->hfc_filevp)
-               panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
-       error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
-       if (error) {
 #if HFC_VERBOSE
-               printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
+       printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
 #endif
-               goto out2;
-       }
+
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
        if (iterator == NULL) {
                error = ENOMEM;
-               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
-               hfsmp->hfc_filevp = NULL;
-               goto out2;
+               goto out;
        }
        bzero(iterator, sizeof(*iterator));
-       key = (HotFileKey*) &iterator->key;
-       key->keyLength = HFC_KEYLENGTH;
+
+       MALLOC(fileids, uint32_t *, NUM_FILE_RESET_IDS * sizeof(uint32_t), M_TEMP, M_WAITOK);
+       if (fileids == NULL) {
+               error = ENOMEM;
+               goto out;
+       }
 
        record.bufferAddress = &data;
        record.itemSize = sizeof(u_int32_t);
        record.itemCount = 1;
-#if HFC_VERBOSE
-       printf("hfs: Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN);
-#endif
+
        /*
         * Get ready to scan the Catalog file.
         */
        error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
                               kCatSearchBufferSize, &scanstate);
        if (error) {
-               printf("hfs_recording_init: err %d BTScanInit\n", error);
-               goto out2;
-       }
-
-       /*
-        * The writes to Hot File B-tree file are journaled.
-        */
-       if (hfs_start_transaction(hfsmp) != 0) {
-           error = EINVAL;
-           goto out1;
-       } 
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
-               error = EPERM;
-               goto out0;
+               printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+               goto out;
        }
-       filefork = VTOF(hfsmp->hfc_filevp);
 
        /*
-        * Visit all the catalog btree leaf records.
+        * Visit all the catalog btree leaf records, clearing any that have the
+        * HotFileCached bit set.
         */
        for (;;) {
                error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
@@ -630,56 +814,955 @@ hfs_recording_init(struct hfsmount *hfsmp)
                        if (error == btNotFound)
                                error = 0;
                        else
-                               printf("hfs_recording_init: err %d BTScanNext\n", error);
+                               printf("hfs_hotfile_reset: err %d BTScanNext\n", error);
                        break;
                }
-               if ((datap->recordType != kHFSPlusFileRecord) ||
-                   (dataSize != sizeof(HFSPlusCatalogFile))) {
-                       continue;
-               }
-               filep = (HFSPlusCatalogFile *)datap;
-               filecount++;
-               if (filep->dataFork.totalBlocks == 0) {
-                       continue;
-               }
-               /*
-                * Any file that has blocks inside the hot file
-                * space is recorded for later eviction.
-                *
-                * For now, resource forks are ignored.
-                */
-               if (!hotextents(hfsmp, &filep->dataFork.extents[0])) {
+
+               if (datap->recordType == kHFSPlusFolderRecord && (dataSize == sizeof(HFSPlusCatalogFolder))) {
+                       HFSPlusCatalogFolder *dirp = (HFSPlusCatalogFolder *)datap;
+
+                       dircount++;
+               
+                       if ((dirp->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+                               continue;
+                       }
+
+                       cnid = dirp->folderID;
+               } else if ((datap->recordType == kHFSPlusFileRecord) && (dataSize == sizeof(HFSPlusCatalogFile))) {
+                       HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)datap;   
+
+                       filecount++;
+
+                       /*
+                        * If the file doesn't have any of the HotFileCached bits set, ignore it.
+                        */
+                       if ((filep->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+                               continue;
+                       }
+
+                       cnid = filep->fileID;
+               } else {
                        continue;
                }
-               cnid = filep->fileID;
 
                /* Skip over journal files. */
                if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
                        continue;
                }
-               /*
-                * XXX - need to skip quota files as well.
-                */
-
-               /* Insert a hot file entry. */
-               key->keyLength   = HFC_KEYLENGTH;
-               key->temperature = HFC_MINIMUM_TEMPERATURE;
-               key->fileID      = cnid;
-               key->forkType    = 0;
-               data = 0x3f3f3f3f;
-               error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
-               if (error) {
-                       printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
-                       error = MacToVFSError(error);
-                       break;
-               }
+
+               //
+               // Just record the cnid of the file for now.  We will modify it separately
+               // because we can't modify the catalog while we're scanning it.
+               //
+               fileids[cur_id_index++] = cnid;
+               if (cur_id_index >= NUM_FILE_RESET_IDS) {
+                       //
+                       // We're over the limit of file-ids so we have to terminate this
+                       // scan, go modify all the catalog records, then restart the scan.
+                       // This is required because it's not permissible to modify the
+                       // catalog while scanning it.
+                       //
+                       (void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+                       reset_file_ids(hfsmp, fileids, cur_id_index);
+                       cleared += cur_id_index;
+                       cur_id_index = 0;
+
+                       // restart the scan
+                       error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+                                                kCatSearchBufferSize, &scanstate);
+                       if (error) {
+                               printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+                               goto out;
+                       }
+                       continue;
+               }
+       }
+
+       if (cur_id_index) {
+               reset_file_ids(hfsmp, fileids, cur_id_index);
+               cleared += cur_id_index;
+               cur_id_index = 0;
+       }
+
+       printf("hfs: cleared HotFileCache related bits on %d files out of %d (dircount %d)\n", cleared, filecount, dircount);
+
+       (void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+out:   
+       if (fileids)
+               FREE(fileids, M_TEMP);
+       
+       if (iterator)
+               FREE(iterator, M_TEMP);
+
+       //
+       // If the hotfile btree exists, delete it.  We need to open
+       // it to be able to delete it because we need the hfc_filevp
+       // for deletion.
+       //
+       error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+       if (!error) {
+               printf("hfs: hotfile_reset: deleting existing hotfile btree\n");
+               hfc_btree_delete(hfsmp);
+       }
+       
+       if (hfsmp->hfc_filevp) {
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               hfsmp->hfc_filevp = NULL;
+       }
+
+       hfsmp->hfs_hotfile_blk_adjust = 0;
+       hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+}
+
+
+//
+// This should ONLY be called by hfs_recording_init() and the special fsctl.
+//
+// We assume that the hotfile btree is already opened.
+//
+static int
+hfs_hotfile_repin_files(struct hfsmount *hfsmp)
+{
+       BTreeIterator * iterator = NULL;
+       HotFileKey * key;
+       filefork_t * filefork;
+       int  error = 0;
+       int  bt_op;
+       enum hfc_stage stage;
+       uint32_t pinned_blocks;
+       uint32_t num_files=0, nrsrc=0;
+       uint32_t total_pinned=0;
+
+       if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || !hfsmp->hfc_filevp) {
+               //
+               // this is only meaningful if we're pinning hotfiles
+               // (as opposed to the regular form of hotfiles that
+               // get relocated to the hotfile zone)
+               //
+               return 0;
+       }
+
+#if HFC_VERBOSE
+       printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
+#endif
+       
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+               return (EPERM);
+       }
+
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               hfs_unlock(VTOC(hfsmp->hfc_filevp));
+               return (ENOMEM);
+       }
+
+       stage = hfsmp->hfc_stage;
+       hfsmp->hfc_stage = HFC_BUSY;
+
+       bt_op = kBTreeFirstRecord;
+
+       bzero(iterator, sizeof(*iterator));
+       key = (HotFileKey*) &iterator->key;
+
+       filefork = VTOF(hfsmp->hfc_filevp);
+       int lockflags;
+
+       while (1) {
+
+               lockflags = 0;
+               /*
+                * Obtain the first record (ie the coldest one).
+                */
+               if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
+                       // no more records
+                       error = 0;
+                       break;
+               }
+               if (key->keyLength != HFC_KEYLENGTH) {
+                       // printf("hfs: hotfiles_repin_files: invalid key length %d\n", key->keyLength);
+                       error = EFTYPE;
+                       break;
+               }               
+               if (key->temperature == HFC_LOOKUPTAG) {
+                       // ran into thread records in the hotfile btree
+                       error = 0;
+                       break;
+               }
+
+        //
+               // Just lookup the records in the catalog and pin the direct
+               // mapped extents.  Faster than instantiating full vnodes
+               // (and thereby thrashing the system vnode cache).
+               //
+               struct cat_desc fdesc;
+               struct cat_attr attr;
+               struct cat_fork fork;
+        uint8_t forktype = 0;
+
+               lockflags = hfs_systemfile_lock(hfsmp, (SFL_CATALOG | SFL_EXTENTS), HFS_SHARED_LOCK);
+        /*
+         * Snoop the cnode hash to find out if the item we want is in-core already.
+         *
+         * We largely expect this function to fail (the items we want are probably not in the hash).
+         * we use the special variant which bails out as soon as it finds a vnode (even if it is
+         * marked as open-unlinked or actually removed on-disk.  If we find a vnode, then we
+         * release the systemfile locks and go through the pin-vnode path instead.
+         */
+        if (hfs_chash_snoop (hfsmp, key->fileID, 1, NULL, NULL) == 0) {
+            pinned_blocks = 0;
+
+            /* unlock immediately and go through the in-core path */
+            hfs_systemfile_unlock(hfsmp, lockflags);
+                       lockflags = 0;
+
+            error = hfs_getvnode_and_pin (hfsmp, key->fileID, &pinned_blocks);
+            if (error) {
+                /* if ENOENT, then it was deleted in the catalog. Remove from our hotfiles tracking */
+                if (error == ENOENT) {
+                    hfc_btree_delete_record(hfsmp, iterator, key);
+                }
+                /* other errors, just ignore and move on with life */
+            }
+            else { //!error
+                total_pinned += pinned_blocks;
+                num_files++;
+            }
+
+            goto next;
+        }
+
+        /* If we get here, we're still holding the systemfile locks */
+               error = cat_idlookup(hfsmp, key->fileID, 1, 0, &fdesc, &attr, &fork);
+               if (error) {
+                       //
+                       // this file system could have been mounted while booted from a
+                       // different partition and thus the hotfile btree would not have
+                       // been maintained.  thus a file that was hotfile cached could
+                       // have been deleted while booted from a different partition which
+                       // means we need to delete it from the hotfile btree.
+                       //
+                       // block accounting is taken care of at the end: we re-assign
+                       // hfsmp->hfs_hotfile_freeblks based on how many blocks we actually
+                       // pinned.
+                       //
+                       hfc_btree_delete_record(hfsmp, iterator, key);
+
+                       goto next;
+               }
+
+               if (fork.cf_size == 0) {
+                       // hmmm, the data is probably in the resource fork (aka a compressed file)
+                       error = cat_idlookup(hfsmp, key->fileID, 1, 1, &fdesc, &attr, &fork);
+                       if (error) {
+                               hfc_btree_delete_record(hfsmp, iterator, key);
+                               goto next;
+                       }
+            forktype = 0xff;
+                       nrsrc++;
+               }
+
+               pinned_blocks = 0;
+
+        /* Can't release the catalog /extents lock yet, we may need to go find the overflow blocks */
+        error = hfs_pin_extent_record (hfsmp, fork.cf_extents, &pinned_blocks);
+        if (error) {
+            goto next;  //skip to next
+        }
+               /* add in the blocks from the inline 8 */
+        total_pinned += pinned_blocks;
+        pinned_blocks = 0;
+
+        /* Could this file have overflow extents? */
+        if (fork.cf_extents[kHFSPlusExtentDensity-1].startBlock) {
+            /* better pin them, too */
+            error = hfs_pin_overflow_extents (hfsmp, key->fileID, forktype, &pinned_blocks);
+            if (error) {
+                               /* If we fail to pin all of the overflow extents, then just skip to the next file */
+                goto next;
+            }
+        }
+
+               num_files++;
+        if (pinned_blocks) {
+            /* now add in any overflow also */
+            total_pinned += pinned_blocks;
+        }
+
+       next:
+               if (lockflags) {
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       lockflags = 0;
+               }
+               bt_op = kBTreeNextRecord;
+
+       } /* end while */
+
+#if HFC_VERBOSE
+       printf("hfs: hotfiles_repin_files: re-pinned %d files (nrsrc %d, total pinned %d blks; freeblock %d, maxblocks %d, calculated free: %d)\n",
+              num_files, nrsrc, total_pinned, hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks,
+             hfsmp->hfs_hotfile_maxblks - total_pinned);
+#endif
+       //
+       // make sure this is accurate based on how many blocks we actually pinned
+       //
+       hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - total_pinned;
+
+       hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+       FREE(iterator, M_TEMP); 
+       hfsmp->hfc_stage = stage;
+       wakeup((caddr_t)&hfsmp->hfc_stage);
+       return (error);
+}
+
+void
+hfs_repin_hotfiles(struct hfsmount *hfsmp)
+{
+       int error, need_close;
+       
+       lck_mtx_lock(&hfsmp->hfc_mutex);
+
+       if (hfsmp->hfc_filevp == NULL) {
+               error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+               if (!error) {
+                       need_close = 1;
+               } else {
+                       printf("hfs: failed to open the btree err=%d.  Unable to re-pin hotfiles.\n", error);
+                       lck_mtx_unlock(&hfsmp->hfc_mutex);
+                       return;
+               }
+       } else {
+               need_close = 0;
+       }
+
+       hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel());
+                       
+       hfs_hotfile_repin_files(hfsmp);
+
+       if (need_close) {
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               hfsmp->hfc_filevp = NULL;
+       }
+
+       lck_mtx_unlock(&hfsmp->hfc_mutex);
+}
+
+/*
+ * For a given file ID, find and pin all of its overflow extents to the underlying CS
+ * device.  Assumes that the extents overflow b-tree is locked for the duration of this call.
+ *
+ * Emit the number of blocks pinned in output argument 'pinned'
+ *
+ * Return success or failure (errno) in return value.
+ *
+ */
+int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+                                     uint8_t forktype, uint32_t *pinned) {
+
+    struct BTreeIterator *ext_iter = NULL;
+    ExtentKey *ext_key_ptr = NULL;
+    ExtentRecord ext_data;
+    FSBufferDescriptor btRecord;
+    uint16_t btRecordSize;
+    int error = 0;
+
+    uint32_t pinned_blocks = 0;
+
+
+    MALLOC (ext_iter, struct BTreeIterator*, sizeof (struct BTreeIterator), M_TEMP, M_WAITOK);
+    if (ext_iter == NULL) {
+        return ENOMEM;
+    }
+    bzero (ext_iter, sizeof(*ext_iter));
+
+    BTInvalidateHint (ext_iter);
+    ext_key_ptr = (ExtentKey*)&ext_iter->key;
+    btRecord.bufferAddress = &ext_data;
+    btRecord.itemCount = 1;
+
+    /*
+     * This is like when you delete a file; we don't actually need most of the search machinery because
+     * we are going to need all of the extent records that belong to this file (for a given fork type),
+     * so we might as well use a straight-up iterator.
+     *
+     * Position the B-Tree iterator at the first record with this file ID
+     */
+    btRecord.itemSize = sizeof (HFSPlusExtentRecord);
+    ext_key_ptr->hfsPlus.keyLength = kHFSPlusExtentKeyMaximumLength;
+    ext_key_ptr->hfsPlus.forkType = forktype;
+    ext_key_ptr->hfsPlus.pad = 0;
+    ext_key_ptr->hfsPlus.fileID = fileid;
+    ext_key_ptr->hfsPlus.startBlock = 0;
+
+    error = BTSearchRecord (VTOF(hfsmp->hfs_extents_vp), ext_iter, &btRecord, &btRecordSize, ext_iter);
+    if (error ==  btNotFound) {
+        /* empty b-tree, so that's ok. we'll fall out during error check below. */
+        error = 0;
+    }
+
+    while (1) {
+        uint32_t found_fileid;
+        uint32_t pblocks;
+
+        error = BTIterateRecord (VTOF(hfsmp->hfs_extents_vp), kBTreeNextRecord, ext_iter, &btRecord, &btRecordSize);
+        if (error) {
+            /* swallow it if it's btNotFound, otherwise just bail out */
+            if (error == btNotFound)
+                error = 0;
+            break;
+        }
+
+        found_fileid = ext_key_ptr->hfsPlus.fileID;
+        /*
+         * We only do one fork type at a time. So if either the fork-type doesn't
+         * match what we are looking for (resource or data), OR the file id doesn't match
+         * which indicates that there's nothing more with this file ID as the key, then bail out
+         */
+        if ((found_fileid != fileid) || (ext_key_ptr->hfsPlus.forkType != forktype))  {
+            error = 0;
+            break;
+        }
+
+        /* Otherwise, we now have an extent record. Process and pin all of the file extents. */
+        pblocks = 0;
+        error = hfs_pin_extent_record (hfsmp, ext_data.hfsPlus, &pblocks);
+
+        if (error) {
+            break;
+        }
+        pinned_blocks += pblocks;
+
+        /* if 8th extent is empty, then bail out */
+        if (ext_data.hfsPlus[kHFSPlusExtentDensity-1].startBlock == 0) {
+            error = 0;
+            break;
+        }
+
+    } // end extent-getting loop
+
+    /* dump the iterator */
+    FREE (ext_iter, M_TEMP);
+
+    if (error == 0) {
+        /*
+         * In the event that the file has no overflow extents, pinned_blocks
+         * will never be updated, so we'll properly export 0 pinned blocks to caller
+         */
+        *pinned = pinned_blocks;
+    }
+
+    return error;
+
+}
+
+
+static int
+hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned) {
+    struct vnode *vp;
+    int error = 0;
+    *pinned = 0;
+    uint32_t pblocks;
+
+    /*
+     * Acquire the vnode for this file.  This returns a locked cnode on success
+     */
+    error = hfs_vget(hfsmp, fileid, &vp, 0, 0);
+    if (error) {
+        /* It's possible the file was open-unlinked. In this case, we'll get ENOENT back. */
+        return error;
+    }
+
+    /*
+     * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck
+     * here.  We do not want to move them.
+     */
+    if (!vnode_isreg(vp)) {
+        hfs_unlock(VTOC(vp));
+        vnode_put(vp);
+        return EPERM;
+    }
+
+    if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+        hfs_unlock(VTOC(vp));
+        vnode_put(vp);
+        return EINVAL;
+    }
+
+    error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pblocks, vfs_context_kernel());
+    if (error == 0) {
+        *pinned = pblocks;
+    }
+
+    hfs_unlock(VTOC(vp));
+    vnode_put(vp);
+
+    return error;
+
+}
+
+/*
+ * Pins an HFS Extent record to the underlying CoreStorage.  Assumes that Catalog & Extents overflow
+ * B-trees are held locked, as needed.
+ *
+ * Returns the number of blocks pinned in the output argument 'pinned'
+ *
+ * Returns error status (0 || errno) in return value.
+ */
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned) {
+    uint32_t pb = 0;
+    int i;
+    int error;
+
+       if (pinned == NULL) {
+               return EINVAL;
+       }
+    *pinned = 0;
+
+
+
+       /* iterate through the extents */
+       for ( i = 0; i < kHFSPlusExtentDensity; i++) {
+               if (extents[i].startBlock == 0) {
+                       break;
+               }
+
+               error = hfs_pin_block_range (hfsmp, HFS_PIN_IT, extents[i].startBlock,
+                               extents[i].blockCount, vfs_context_kernel());
+
+               if (error) {
+                       break;
+               }
+               pb += extents[i].blockCount;
+       }
+
+    *pinned = pb;
+
+       return error;
+}
+
+/*
+ * Consume an HFS Plus on-disk catalog record and pin its blocks
+ * to the underlying CS devnode.
+ *
+ * NOTE: This is an important distinction!
+ * This function takes in an HFSPlusCatalogFile* which is the actual
+ * 200-some-odd-byte on-disk representation in the Catalog B-Tree (not
+ * one of the run-time structs that we normally use.
+ *
+ * This assumes that the catalog and extents-overflow btrees
+ * are locked, at least in shared mode
+ */
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc) {
+       uint32_t pinned_blocks = 0;
+       HFSPlusForkData *forkdata;
+       int error = 0;
+       uint8_t forktype = 0;
+
+       if (rsrc) {
+        forkdata = &cfp->resourceFork;
+               forktype = 0xff;
+       }
+       else {
+               forkdata = &cfp->dataFork;
+       }
+
+       uint32_t pblocks = 0;
+
+       /* iterate through the inline extents */
+       error = hfs_pin_extent_record (hfsmp, forkdata->extents, &pblocks);
+       if (error) {
+        return error;
+       }
+
+       pinned_blocks += pblocks;
+    pblocks = 0;
+
+       /* it may have overflow extents */
+       if (forkdata->extents[kHFSPlusExtentDensity-1].startBlock != 0) {
+        error = hfs_pin_overflow_extents (hfsmp, cfp->fileID, forktype, &pblocks);
+       }
+    pinned_blocks += pblocks;
+
+       hfsmp->hfs_hotfile_freeblks -= pinned_blocks;
+
+       return error;
+}
+
+
+/*
+ *
+ */
+int
+hfs_recording_init(struct hfsmount *hfsmp)
+{
+       CatalogKey * keyp;
+       CatalogRecord * datap;
+       u_int32_t  dataSize;
+       HFSPlusCatalogFile *filep;
+       BTScanState scanstate;
+       BTreeIterator * iterator = NULL;
+       FSBufferDescriptor  record;
+       HotFileKey * key;
+       filefork_t * filefork;
+       u_int32_t  data;
+       struct cat_attr cattr;
+       u_int32_t  cnid;
+       int error = 0;
+       long starting_temp;
+
+       int started_tr = 0;
+       int started_scan = 0;
+
+       int inserted = 0;  /* debug variables */
+       int filecount = 0;
+       int uncacheable = 0;
+
+       /*
+        * For now, only the boot volume is supported.
+        */
+       if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
+               hfsmp->hfc_stage = HFC_DISABLED;
+               return (EPERM);
+       }
+
+       /* We grab the HFC mutex even though we're not fully mounted yet, just for orderliness */
+       lck_mtx_lock (&hfsmp->hfc_mutex);
+
+       /*
+        * Tracking of hot files requires up-to-date access times.
+        * So if access time updates are disabled, then we disable
+        * hot files, too.
+        */
+       if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
+               hfsmp->hfc_stage = HFC_DISABLED;
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EPERM;
+       }
+       
+       //
+       // Check if we've been asked to suspend operation
+       //
+       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-suspend", &cattr, NULL);
+       if (cnid != 0) {
+               printf("hfs: %s: %s: hotfiles explicitly disabled!  remove /.hotfiles-suspend to re-enable\n", hfsmp->vcbVN, __FUNCTION__);
+               hfsmp->hfc_stage = HFC_DISABLED;
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EPERM;
+       }
+
+       //
+       // Check if we've been asked to reset our state.
+       //
+       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-reset", &cattr, NULL);
+       if (cnid != 0) {
+               hfs_hotfile_reset(hfsmp);
+       }
+
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               //
+               // Cooperative Fusion (CF) systems use different constants 
+               // than traditional hotfile systems.  These were picked after a bit of
+               // experimentation - we can cache many more files on the
+               // ssd in an CF system and we can do so more rapidly
+               // so bump the limits considerably (and turn down the
+               // duration so that it doesn't take weeks to adopt all
+               // the files).
+               //
+               hfc_default_file_count = 20000;
+               hfc_default_duration   = 300;    // 5min
+               hfc_max_file_count     = 50000;
+               hfc_max_file_size      = (512ULL * 1024ULL * 1024ULL);
+       }
+
+       /*
+        * If the Hot File btree exists then metadata zone is ready.
+        */
+       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
+       if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
+               int recreate = 0;
+               
+               if (hfsmp->hfc_stage == HFC_DISABLED)
+                       hfsmp->hfc_stage = HFC_IDLE;
+               hfsmp->hfs_hotfile_freeblks = 0;
+
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && cattr.ca_blocks > 0) {
+                       //
+                       // make sure the hotfile btree is pinned
+                       //
+                       error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+                       if (!error) {
+                               /* XXX: must fix hfs_pin_vnode too */
+                               hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel());
+                               
+                       } else {
+                               printf("hfs: failed to open the btree err=%d.  Recreating hotfile btree.\n", error);
+                               recreate = 1;
+                       }
+                       
+                       hfs_hotfile_repin_files(hfsmp);
+
+                       if (hfsmp->hfc_filevp) {
+                               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+                               hfsmp->hfc_filevp = NULL;
+                       }
+
+               } else if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       // hmmm, the hotfile btree is zero bytes long?  how odd.  let's recreate it.
+                       printf("hfs: hotfile btree is zero bytes long?!  recreating it.\n");
+                       recreate = 1;
+               }
+
+               if (!recreate) {
+                       /* don't forget to unlock the mutex */
+                       lck_mtx_unlock (&hfsmp->hfc_mutex);
+                       return (0);
+               } else {
+                       //
+                       // open the hotfile btree file ignoring errors because
+                       // we need the vnode pointer for hfc_btree_delete() to
+                       // be able to do its work
+                       //
+                       error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+                       if (!error) {
+                               // and delete it!
+                               error = hfc_btree_delete(hfsmp);
+                               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+                               hfsmp->hfc_filevp = NULL;
+                       }
+               }
+       }
+
+       printf("hfs: %s: %s: creating the hotfile btree\n", hfsmp->vcbVN, __FUNCTION__);
+       if (hfs_start_transaction(hfsmp) != 0) {
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EINVAL;
+       }
+
+       /* B-tree creation must be journaled */
+       started_tr = 1;
+
+       error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
+       if (error) {
+#if HFC_VERBOSE
+               printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+               goto recording_init_out;
+       }
+
+       hfs_end_transaction (hfsmp);
+       started_tr = 0;
+       /*
+        * Do a journal flush + flush track cache. We have to ensure that the async I/Os have been issued to the media
+        * before proceeding.
+        */
+       hfs_flush (hfsmp, HFS_FLUSH_FULL);
+
+       /* now re-start a new transaction */
+       if (hfs_start_transaction (hfsmp) != 0) {
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EINVAL;
+       }
+       started_tr = 1;
+
+       /*
+        * Open the Hot File B-tree file for writing.
+        */
+       if (hfsmp->hfc_filevp)
+               panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+       error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+       if (error) {
+#if HFC_VERBOSE
+               printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+               goto recording_init_out;
+       }
+
+       /*
+        * This function performs work similar to namei; we must NOT hold the catalog lock while
+        * calling it. This will decorate catalog records as being pinning candidates. (no hotfiles work)
+        */
+       hfs_setup_default_cf_hotfiles(hfsmp);
+
+       /*
+        * now grab the hotfiles b-tree vnode/cnode lock first, as it is not classified as a systemfile.
+        */
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+               error = EPERM;
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               /* zero it out to avoid pinning later on */
+               hfsmp->hfc_filevp = NULL;
+               goto recording_init_out;
+       }
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               error = ENOMEM;
+               hfs_unlock (VTOC(hfsmp->hfc_filevp));
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               /* zero it out to avoid pinning */
+               hfsmp->hfc_filevp = NULL;
+               goto recording_init_out;
+       }
+
+       bzero(iterator, sizeof(*iterator));
+       key = (HotFileKey*) &iterator->key;
+       key->keyLength = HFC_KEYLENGTH;
+
+       record.bufferAddress = &data;
+       record.itemSize = sizeof(u_int32_t);
+       record.itemCount = 1;
+
+#if HFC_VERBOSE
+       printf("hfs: Evaluating space for \"%s\" metadata zone... (freeblks %d)\n", HFSTOVCB(hfsmp)->vcbVN,
+              hfsmp->hfs_hotfile_freeblks);
+#endif
+
+       /*
+        * Get ready to scan the Catalog file. We explicitly do NOT grab the catalog lock because
+        * we're fully single-threaded at the moment (by virtue of being called during mount()),
+        * and if we have to grow the hotfile btree, then we would need to grab the catalog lock
+        * and if we take a shared lock here, it would deadlock (see <rdar://problem/21486585>)
+        *
+        * We already started a transaction so we should already be holding the journal lock at this point.
+        * Note that we have to hold the journal lock / start a txn BEFORE the systemfile locks.
+        */
+
+       error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+                              kCatSearchBufferSize, &scanstate);
+       if (error) {
+               printf("hfs_recording_init: err %d BTScanInit\n", error);
+
+               /* drop the systemfile locks */
+               hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+
+               /* zero it out to avoid pinning */
+               hfsmp->hfc_filevp = NULL;
+               goto recording_init_out;
+       }
+
+       started_scan = 1;
+
+       filefork = VTOF(hfsmp->hfc_filevp);
+
+       starting_temp = random() % HF_TEMP_RANGE;
+
+       /*
+        * Visit all the catalog btree leaf records. We have to hold the catalog lock to do this.
+        *
+        * NOTE: The B-Tree scanner reads from the media itself. Under normal circumstances it would be
+        * fine to simply use b-tree routines to read blocks that correspond to b-tree nodes, because the
+        * block cache is going to ensure you always get the cached copy of a block (even if a journal
+        * txn has modified one of those blocks).  That is NOT true when
+        * using the scanner.  In particular, it will always read whatever is on-disk. So we have to ensure
+        * that the journal has flushed and that the async I/Os to the metadata files have been issued.
+        */
+       for (;;) {
+               error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
+               if (error) {
+                       if (error == btNotFound)
+                               error = 0;
+                       else
+                               printf("hfs_recording_init: err %d BTScanNext\n", error);
+                       break;
+               }
+               if ((datap->recordType != kHFSPlusFileRecord) ||
+                   (dataSize != sizeof(HFSPlusCatalogFile))) {
+                       continue;
+               }
+               filep = (HFSPlusCatalogFile *)datap;
+               filecount++;
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       if (filep->flags & kHFSDoNotFastDevPinMask) {
+                               uncacheable++;
+                       }
+
+                       //
+                       // If the file does not have the FastDevPinnedMask set, we
+                       // can ignore it and just go to the next record.
+                       //
+                       if ((filep->flags & kHFSFastDevPinnedMask) == 0) {
+                               continue;
+                       }
+               } else if (filep->dataFork.totalBlocks == 0) {
+                       continue;
+               }
+
+               /*
+                * On a regular hdd, any file that has blocks inside
+                * the hot file space is recorded for later eviction.
+                *
+                * For now, resource forks are ignored.
+                *
+                * We don't do this on CF systems as there is no real
+                * hotfile area - we just pin/unpin blocks belonging to
+                * interesting files.
+                */
+               if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && !hotextents(hfsmp, &filep->dataFork.extents[0])) {
+                       continue;
+               }
+               cnid = filep->fileID;
+
+               /* Skip over journal files. */
+               if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
+                       continue;
+               }
+               /*
+                * XXX - need to skip quota files as well.
+                */
+
+               uint32_t temp;
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       int rsrc = 0;
+
+                       temp = (uint32_t)starting_temp++;
+                       if (filep->flags & kHFSAutoCandidateMask) {
+                               temp += MAX_NORMAL_TEMP;
+                       }
+
+                       /* use the data fork by default */
+                       if (filep->dataFork.totalBlocks == 0) {
+                               /*
+                 * but if empty, switch to rsrc as its likely
+                 * a compressed file
+                 */
+                               rsrc = 1;
+                       }
+
+                       error =  hfs_pin_catalog_rec (hfsmp, filep, rsrc);
+                       if (error)
+                               break;
+
+               } else {
+                       temp = HFC_MINIMUM_TEMPERATURE;
+               }
+
+               /* Insert a hot file entry. */
+               key->keyLength   = HFC_KEYLENGTH;
+               key->temperature = temp;
+               key->fileID      = cnid;
+               key->forkType    = 0;
+               data = 0x3f3f3f3f;
+               error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+               if (error) {
+                       printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+                       error = MacToVFSError(error);
+                       break;
+               }
 
                /* Insert the corresponding thread record. */
                key->keyLength = HFC_KEYLENGTH;
                key->temperature = HFC_LOOKUPTAG;
                key->fileID = cnid;
                key->forkType = 0;
-               data = HFC_MINIMUM_TEMPERATURE;
+               data = temp;
                error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                if (error) {
                        printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
@@ -687,29 +1770,50 @@ hfs_recording_init(struct hfsmount *hfsmp)
                        break;
                }
                inserted++;
-       }
+       } // end catalog iteration loop
+
+       save_btree_user_info(hfsmp);
        (void) BTFlushPath(filefork);
-       hfs_unlock(VTOC(hfsmp->hfc_filevp));
 
-out0:
-       hfs_end_transaction(hfsmp);
+recording_init_out:
+
+       /* Unlock first, then pin after releasing everything else */
+       if (hfsmp->hfc_filevp) {
+               hfs_unlock (VTOC(hfsmp->hfc_filevp));
+       }
+
+       if (started_scan) {
+               (void) BTScanTerminate (&scanstate, &data, &data, &data);
+       }
+
+       if (started_tr) {
+               hfs_end_transaction(hfsmp);
+       }
+
 #if HFC_VERBOSE
-       printf("hfs: %d files identified out of %d\n", inserted, filecount);
+       printf("hfs: %d files identified out of %d (freeblocks is now: %d)\n", inserted, filecount, hfsmp->hfs_hotfile_freeblks);
+       if (uncacheable) {
+               printf("hfs: %d files were marked as uncacheable\n", uncacheable);
+       }
 #endif
        
-out1:
-       (void) BTScanTerminate(&scanstate, &data, &data, &data);
-out2:  
-       hfs_end_transaction(hfsmp);
        if (iterator)
                FREE(iterator, M_TEMP);
+
        if (hfsmp->hfc_filevp) {
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel());
+               }
                (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
                hfsmp->hfc_filevp = NULL;
        }
+
        if (error == 0)
                hfsmp->hfc_stage = HFC_IDLE;
 
+       /* Finally, unlock the HFC mutex */
+       lck_mtx_unlock (&hfsmp->hfc_mutex);
+
        return (error);
 }
 
@@ -740,7 +1844,7 @@ hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
                        break;
        
                case HFC_ADOPTION:
-                       (void) hotfiles_adopt(hfsmp);
+                       (void) hotfiles_adopt(hfsmp, ctx);
                        break;
                default:
                        break;
@@ -778,6 +1882,20 @@ hfs_addhotfile(struct vnode *vp)
        return (error);
 }
 
+static int
+hf_ignore_process(const char *pname, size_t maxlen)
+{
+       if (   strncmp(pname, "mds", maxlen) == 0
+           || strncmp(pname, "mdworker", maxlen) == 0
+           || strncmp(pname, "mds_stores", maxlen) == 0
+           || strncmp(pname, "makewhatis", maxlen) == 0) {
+               return 1;
+       }
+
+       return 0;
+       
+}
+
 static int
 hfs_addhotfile_internal(struct vnode *vp)
 {
@@ -813,20 +1931,59 @@ hfs_addhotfile_internal(struct vnode *vp)
        ffp = VTOF(vp);
        cp = VTOC(vp);
 
-       if ((ffp->ff_bytesread == 0) ||
-           (ffp->ff_blocks == 0) ||
-           (ffp->ff_size == 0) ||
-           (ffp->ff_blocks > hotdata->maxblocks) ||
-           (cp->c_flag & (C_DELETED | C_NOEXISTS)) ||
-           (cp->c_bsdflags & UF_NODUMP) ||
-           (cp->c_atime < hfsmp->hfc_timebase)) {
-               return (0);
+       if (cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask)) {
+               // it's already a hotfile or can't be a hotfile...
+               return 0;
        }
 
-       temperature = ffp->ff_bytesread / ffp->ff_size;
-       if (temperature < hotdata->threshold) {
-               return (0);
+       if (vnode_isdir(vp) || vnode_issystem(vp) || (cp->c_flag & (C_DELETED | C_NOEXISTS))) {
+               return 0;
+       }
+
+       if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && vnode_isfastdevicecandidate(vp)) {
+               //
+               // On cooperative fusion (CF) systems we have different criteria for whether something
+               // can be pinned to the ssd.
+               //
+               if (cp->c_flag & (C_DELETED|C_NOEXISTS)) {
+                       //
+                       // dead files are definitely not worth caching
+                       //
+                       return 0;
+               } else if (ffp->ff_blocks == 0 && !(cp->c_bsdflags & UF_COMPRESSED) && !(cp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) {
+                       //
+                       // empty files aren't worth caching but compressed ones might be, as are 
+                       // newly created files that live in WorthCaching directories... 
+                       //
+                       return 0;
+               }
+
+               char pname[256];
+               pname[0] = '\0';
+               proc_selfname(pname, sizeof(pname));
+               if (hf_ignore_process(pname, sizeof(pname))) {
+                       // ignore i/o's from certain system daemons 
+                       return 0;
+               }
+
+               temperature = cp->c_fileid;        // in memory we just keep it sorted by file-id
+       } else {
+               // the normal hard drive based hotfile checks
+               if ((ffp->ff_bytesread == 0) ||
+                   (ffp->ff_blocks == 0) ||
+                   (ffp->ff_size == 0) ||
+                   (ffp->ff_blocks > hotdata->maxblocks) ||
+                   (cp->c_bsdflags & (UF_NODUMP | UF_COMPRESSED)) ||
+                   (cp->c_atime < hfsmp->hfc_timebase)) {
+                       return (0);
+               }
+
+               temperature = ffp->ff_bytesread / ffp->ff_size;
+               if (temperature < hotdata->threshold) {
+                       return (0);
+               }
        }
+
        /*
         * If there is room or this file is hotter than
         * the coldest one then add it to the list.
@@ -834,72 +1991,222 @@ hfs_addhotfile_internal(struct vnode *vp)
         */
        if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
            (hotdata->coldest == NULL) ||
-           (temperature > hotdata->coldest->temperature)) {
+           (temperature >= hotdata->coldest->temperature)) {
+               ++hotdata->refcount;
+               entry = hf_getnewentry(hotdata);
+               entry->temperature = temperature;
+               entry->fileid = cp->c_fileid;
+               //
+               // if ffp->ff_blocks is zero, it might be compressed so make sure we record
+               // that there's at least one block.
+               //
+               entry->blocks = ffp->ff_blocks ? ffp->ff_blocks : 1;   
+               if (hf_insert(hotdata, entry) == EEXIST) {
+                       // entry is already present, don't need to add it again
+                       entry->right = hotdata->freelist;
+                       hotdata->freelist = entry;
+               }
+               --hotdata->refcount;
+       }
+
+       return (0);
+}
+
+/*
+ * Remove a hot file from the recording list.
+ *
+ * This can happen when a hot file becomes
+ * an active vnode (active hot files are
+ * not kept in the recording list until the
+ * end of the recording period).
+ *
+ * Note: the cnode is locked on entry.
+ */
+int
+hfs_removehotfile(struct vnode *vp)
+{
+       hotfile_data_t *hotdata;
+       hfsmount_t *hfsmp;
+       cnode_t *cp;
+       filefork_t *ffp;
+       u_int32_t temperature;
+
+       hfsmp = VTOHFS(vp);
+       if (hfsmp->hfc_stage != HFC_RECORDING)
+               return (0);
+
+       if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
+               return (0);
+       }
+
+       ffp = VTOF(vp);
+       cp = VTOC(vp);
+
+       if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) ||
+           (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) {
+               return (0);
+       }
+
+       lck_mtx_lock(&hfsmp->hfc_mutex);
+       if (hfsmp->hfc_stage != HFC_RECORDING)
+               goto out;
+       if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL)
+               goto out;
+
+       temperature = ffp->ff_bytesread / ffp->ff_size;
+       if (temperature < hotdata->threshold)
+               goto out;
+
+       if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) {
+               ++hotdata->refcount;
+               hf_delete(hotdata, VTOC(vp)->c_fileid, temperature);
+               --hotdata->refcount;
+       }
+out:
+       lck_mtx_unlock(&hfsmp->hfc_mutex);
+       return (0);
+}
+
+int
+hfs_hotfile_deleted(__unused struct vnode *vp)
+{
+#if 1
+       return 0;
+#else  
+       //
+       // XXXdbg - this code, while it would work, would introduce a huge inefficiency
+       //          to deleting files as the way it's written would require us to open
+       //          the hotfile btree on every open, delete two records in it and then
+       //          close the hotfile btree (which involves more writes).
+       //
+       //          We actually can be lazy about deleting hotfile records for files
+       //          that get deleted.  When it's time to evict things, if we encounter
+       //          a record that references a dead file (i.e. a fileid which no
+       //          longer exists), the eviction code will remove the records.  Likewise
+       //          the code that scans the HotFile B-Tree at boot time to re-pin files
+       //          will remove dead records.
+       //
+
+       hotfile_data_t *hotdata;
+       hfsmount_t *hfsmp;
+       cnode_t *cp;
+       filefork_t *filefork;
+       u_int32_t temperature;
+       BTreeIterator * iterator = NULL;
+       FSBufferDescriptor record;
+       HotFileKey *key;
+       u_int32_t data;
+       int error=0;
+
+       cp = VTOC(vp);
+       if (cp == NULL || !(cp->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+               return 0;
+       }
+
+       hfsmp = VTOHFS(vp);
+       if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+               return 0;
+       }
+       
+       if (hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) != 0 || hfsmp->hfc_filevp == NULL) {
+               // either there is no hotfile info or it's damaged
+               return EINVAL;
+       }
+       
+       filefork = VTOF(hfsmp->hfc_filevp);
+       if (filefork == NULL) {
+               return 0;
+       }
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               return ENOMEM;
+       }       
+       bzero(iterator, sizeof(*iterator));
+       key = (HotFileKey*) &iterator->key;
+
+       record.bufferAddress = &data;
+       record.itemSize = sizeof(u_int32_t);
+       record.itemCount = 1;
+
+       key->keyLength = HFC_KEYLENGTH;
+       key->temperature = HFC_LOOKUPTAG;
+       key->fileID = cp->c_fileid;
+       key->forkType = 0;
+
+       lck_mtx_lock(&hfsmp->hfc_mutex);
+       (void) BTInvalidateHint(iterator);
+       if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) == 0) {
+               temperature = key->temperature;
+               hfc_btree_delete_record(hfsmp, iterator, key);
+       } else {
+               //printf("hfs: hotfile_deleted: did not find fileid %d\n", cp->c_fileid);
+               error = ENOENT;
+       }
+
+       if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) != NULL) {
+               // just in case, also make sure it's removed from the in-memory list as well
                ++hotdata->refcount;
-               entry = hf_getnewentry(hotdata);
-               entry->temperature = temperature;
-               entry->fileid = cp->c_fileid;
-               entry->blocks = ffp->ff_blocks;
-               hf_insert(hotdata, entry);
+               hf_delete(hotdata, cp->c_fileid, cp->c_fileid);
                --hotdata->refcount;
        }
 
-       return (0);
+       lck_mtx_unlock(&hfsmp->hfc_mutex);
+       FREE(iterator, M_TEMP);
+
+       hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+       
+       return error;
+#endif
 }
 
-/*
- * Remove a hot file from the recording list.
- *
- * This can happen when a hot file becomes
- * an active vnode (active hot files are
- * not kept in the recording list until the
- * end of the recording period).
- *
- * Note: the cnode is locked on entry.
- */
 int
-hfs_removehotfile(struct vnode *vp)
+hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks)
 {
-       hotfile_data_t *hotdata;
        hfsmount_t *hfsmp;
-       cnode_t *cp;
-       filefork_t *ffp;
-       u_int32_t temperature;
+       
+       if (vp == NULL) {
+               return 0;
+       }
 
        hfsmp = VTOHFS(vp);
-       if (hfsmp->hfc_stage != HFC_RECORDING)
-               return (0);
 
-       if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
-               return (0);
+       if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || num_blocks == 0 || vp == NULL) {
+               return 0;
        }
 
-       ffp = VTOF(vp);
-       cp = VTOC(vp);
-
-       if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) ||
-           (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) {
-               return (0);
+       //
+       // if file is not HotFileCached or it has the CanNotHotFile cache
+       // bit set then there is nothing to do
+       //
+       if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) || (VTOC(vp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) {
+               // it's not a hot file or can't be one so don't bother tracking
+               return 0;
        }
+       
+       OSAddAtomic(num_blocks, &hfsmp->hfs_hotfile_blk_adjust);
 
-       lck_mtx_lock(&hfsmp->hfc_mutex);
-       if (hfsmp->hfc_stage != HFC_RECORDING)
-               goto out;
-       if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL)
-               goto out;
+       return (0);
+}
 
-       temperature = ffp->ff_bytesread / ffp->ff_size;
-       if (temperature < hotdata->threshold)
-               goto out;
+//
+// Assumes hfsmp->hfc_mutex is LOCKED
+//
+static int
+hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp)
+{
+       if (hfsmp->hfc_stage < HFC_IDLE) {
+               return 0;
+       }
+       
+       int cur_blk_adjust = hfsmp->hfs_hotfile_blk_adjust;   // snap a copy of this value
 
-       if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) {
-               ++hotdata->refcount;
-               hf_delete(hotdata, VTOC(vp)->c_fileid, temperature);
-               --hotdata->refcount;
+       if (cur_blk_adjust) {
+               OSAddAtomic(-cur_blk_adjust, &hfsmp->hfs_hotfile_blk_adjust);
+               hfsmp->hfs_hotfile_freeblks += cur_blk_adjust;
        }
-out:
-       lck_mtx_unlock(&hfsmp->hfc_mutex);
-       return (0);
+
+       return hfsmp->hfs_hotfile_freeblks;
 }
 
 
@@ -971,10 +2278,15 @@ hotfiles_refine(struct hfsmount *hfsmp)
        int  i;
        int  error = 0;
 
-
        if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                return (0);     
 
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               // on ssd's we don't refine the temperature since the
+               // replacement algorithm is simply random
+               return 0;
+       }
+
        mp = HFSTOVFS(hfsmp);
 
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
@@ -1016,12 +2328,12 @@ hotfiles_refine(struct hfsmount *hfsmp)
                 * Update thread entry with latest temperature.
                 */
                error = BTUpdateRecord(filefork, iterator,
-                               (IterateCallBackProcPtr)update_callback,
-                               &listp->hfl_hotfile[i].hf_temperature);
+                                      (IterateCallBackProcPtr)update_callback,
+                                     &listp->hfl_hotfile[i].hf_temperature);
                if (error) {
                        printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
                        error = MacToVFSError(error);
-               //      break;
+                       //      break;
                }
                /*
                 * Re-key entry with latest temperature.
@@ -1049,7 +2361,6 @@ hotfiles_refine(struct hfsmount *hfsmp)
                        error = MacToVFSError(error);
                        break;
                }
-
                /*
                 * Invalidate this entry in the list.
                 */
@@ -1075,7 +2386,7 @@ out:
  * Requires that the hfc_mutex be held.
  */
 static int
-hotfiles_adopt(struct hfsmount *hfsmp)
+hotfiles_adopt(struct hfsmount *hfsmp, vfs_context_t ctx)
 {
        BTreeIterator * iterator = NULL;
        struct vnode *vp;
@@ -1091,6 +2402,14 @@ hotfiles_adopt(struct hfsmount *hfsmp)
        int  last;
        int  error = 0;
        int  startedtrans = 0;
+       //
+       // all files in a given adoption phase have a temperature
+       // that starts at a random value and then increases linearly.
+       // the idea is that during eviction, files that were adopted
+       // together will be evicted together
+       //
+       long starting_temp = random() % HF_TEMP_RANGE;
+       long temp_adjust = 0;
 
        if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                return (0);     
@@ -1108,6 +2427,14 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                return (ENOMEM);
        }
 
+#if HFC_VERBOSE
+               printf("hfs:%s: hotfiles_adopt: (hfl_next: %d, hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+                      hfsmp->vcbVN,
+                      listp->hfl_next,
+                      hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+                      hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
        stage = hfsmp->hfc_stage;
        hfsmp->hfc_stage = HFC_BUSY;
 
@@ -1128,17 +2455,30 @@ hotfiles_adopt(struct hfsmount *hfsmp)
 
        for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
                /*
-                * Skip invalid entries (already in hot area).
+                * Skip entries that aren't going to work.
                 */
                if (listp->hfl_hotfile[i].hf_temperature == 0) {
-                               listp->hfl_next++;
-                               continue;
+                       //printf("hfs: zero temp on file-id %d\n", listp->hfl_hotfile[i].hf_fileid);
+                       listp->hfl_next++;
+                       continue;
+               }
+               if (listp->hfl_hotfile[i].hf_fileid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
+                       //printf("hfs: cannot adopt the hotfile b-tree itself! (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+                       listp->hfl_next++;
+                       continue;
+               }
+               if (listp->hfl_hotfile[i].hf_fileid < kHFSFirstUserCatalogNodeID) {
+                       //printf("hfs: cannot adopt system files (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+                       listp->hfl_next++;
+                       continue;
                }
+
                /*
                 * Acquire a vnode for this file.
                 */
                error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
                if (error) {
+                       //printf("failed to get fileid %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
                        if (error == ENOENT) {
                                error = 0;
                                listp->hfl_next++;
@@ -1146,16 +2486,24 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                        }
                        break;
                }
+
+               //printf("hfs: examining hotfile entry w/fileid %d, temp %d, blocks %d (HotFileCached: %s)\n",
+               //       listp->hfl_hotfile[i].hf_fileid, listp->hfl_hotfile[i].hf_temperature,
+               //       listp->hfl_hotfile[i].hf_blocks,
+               //       (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) ? "YES" : "NO");
+
                if (!vnode_isreg(vp)) {
                        /* Symlinks are ineligible for adoption into the hotfile zone.  */
-                       printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
+                       //printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
                        listp->hfl_hotfile[i].hf_temperature = 0;
                        listp->hfl_next++;
                        continue;  /* stale entry, go to next */
                }
-               if (hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
+               if (   (VTOC(vp)->c_flag & (C_DELETED | C_NOEXISTS))
+                   || (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && hotextents(hfsmp, &VTOF(vp)->ff_extents[0]))
+                   || (VTOC(vp)->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask))) {
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
                        listp->hfl_hotfile[i].hf_temperature = 0;
@@ -1163,8 +2511,35 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                        listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
                        continue;  /* stale entry, go to next */
                }
+
                fileblocks = VTOF(vp)->ff_blocks;
-               if (fileblocks > hfsmp->hfs_hotfile_freeblks) {
+
+               //
+               // for CF, if the file is empty (and not compressed) or it is too large,
+               // do not try to pin it.  (note: if fileblocks == 0 but the file is marked
+               // as compressed, we may still be able to cache it).
+               //
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+                   ((fileblocks == 0 && !(VTOC(vp)->c_bsdflags & UF_COMPRESSED)) ||
+                    (unsigned int)fileblocks > (HFC_MAXIMUM_FILESIZE / (uint64_t)HFSTOVCB(hfsmp)->blockSize))) {
+                       // don't try to cache something too large or that's zero-bytes
+
+                       vnode_clearfastdevicecandidate(vp);    // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+                       hfs_unlock(VTOC(vp));
+                       vnode_put(vp);
+                       listp->hfl_hotfile[i].hf_temperature = 0;
+                       listp->hfl_next++;
+                       listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+                       continue;  /* entry is too big, just carry on with the next guy */
+               }
+
+               if (fileblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
+                       //
+                       // No room for this file.  Although eviction should have made space
+                       // it's best that we check here as well since writes to existing
+                       // hotfiles may have eaten up space since we performed eviction
+                       //
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
                        listp->hfl_next++;
@@ -1174,6 +2549,10 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                
                if ((blksmoved > 0) &&
                    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
+                       //
+                       // we've done enough work, let's be nice to the system and
+                       // stop until the next iteration
+                       //
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
                        break;  /* adopt this entry the next time around */
@@ -1183,10 +2562,76 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                else
                        data = 0x3f3f3f3f;
 
-               error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       //
+                       // For CF we pin the blocks belonging to the file
+                       // to the "fast" (aka ssd) media
+                       //
+                       uint32_t pinned_blocks;
+
+                       if (vnode_isautocandidate(vp)) {
+                               VTOC(vp)->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+                       }
+                       if (VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+                               //
+                               // this moves auto-cached files to the higher tier 
+                               // of "temperatures" which means they are less likely
+                               // to get evicted (user selected hotfiles will get
+                               // evicted first in the theory that they change more
+                               // frequently compared to system files)
+                               //
+                               temp_adjust = MAX_NORMAL_TEMP;
+                       } else {
+                               temp_adjust = 0;
+                       }
+
+                       hfs_unlock(VTOC(vp));  // don't need an exclusive lock for this
+                       hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+                       error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pinned_blocks, ctx);
+
+                       fileblocks = pinned_blocks;
+
+                       // go back to an exclusive lock since we're going to modify the cnode again
+                       hfs_unlock(VTOC(vp));
+                       hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+               } else {
+                       //
+                       // Old style hotfiles moves the data to the center (aka "hot")
+                       // region of the disk
+                       //
+                       error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
+               }
+
+               if (!error) {
+                       VTOC(vp)->c_attr.ca_recflags |= kHFSFastDevPinnedMask;
+                       VTOC(vp)->c_flag |= C_MODIFIED;
+               } else if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && error == EALREADY) {
+                       //
+                       // If hfs_pin_vnode() returned EALREADY then this file is not
+                       // ever able to be hotfile cached the normal way.  This can
+                       // happen with compressed files which have their data stored
+                       // in an extended attribute.  We flag them so that we won't
+                       // bother to try and hotfile cache them again the next time
+                       // they're read.
+                       //
+                       VTOC(vp)->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+                       VTOC(vp)->c_flag |= C_MODIFIED;
+               }
+
                hfs_unlock(VTOC(vp));
                vnode_put(vp);
                if (error) {
+#if HFC_VERBOSE
+                       if (error != EALREADY) {
+                               printf("hfs: hotfiles_adopt: could not relocate file %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
+                       }
+#endif
+
+                       if (last < listp->hfl_count) {
+                               last++;
+                       }
                        /* Move on to next item. */
                        listp->hfl_next++;
                        continue;
@@ -1197,6 +2642,22 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                
                /* Insert hot file entry */
                key->keyLength   = HFC_KEYLENGTH;
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       //
+                       // The "temperature" for a CF hotfile is simply a random
+                       // number that we sequentially increment for each file in
+                       // the set of files we're currently adopting.  This has the
+                       // nice property that all of the files we pin to the ssd
+                       // in the current phase will sort together in the hotfile
+                       // btree.  When eviction time comes we will evict them
+                       // together as well.  This gives the eviction phase temporal
+                       // locality - things written together get evicted together
+                       // which is what ssd's like.
+                       //
+                       listp->hfl_hotfile[i].hf_temperature = (uint32_t)temp_adjust + starting_temp++;
+               }
+
                key->temperature = listp->hfl_hotfile[i].hf_temperature;
                key->fileID      = listp->hfl_hotfile[i].hf_fileid;
                key->forkType    = 0;
@@ -1210,8 +2671,9 @@ hotfiles_adopt(struct hfsmount *hfsmp)
 
                error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                if (error) {
-                       printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+                       int orig_error = error;
                        error = MacToVFSError(error);
+                       printf("hfs: hotfiles_adopt:1: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
                        stage = HFC_IDLE;
                        break;
                }
@@ -1224,12 +2686,20 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                data = listp->hfl_hotfile[i].hf_temperature;
                error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                if (error) {
-                       printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+                       int orig_error = error;
                        error = MacToVFSError(error);
+                       printf("hfs: hotfiles_adopt:2: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
                        stage = HFC_IDLE;
                        break;
+               } else {
+                       (void) BTFlushPath(filefork);
+                       blksmoved += fileblocks;
+               }
+
+               listp->hfl_next++;
+               if (listp->hfl_next >= listp->hfl_count) {
+                       break;
                }
-               (void) BTFlushPath(filefork);
 
                /* Transaction complete. */
                if (startedtrans) {
@@ -1237,12 +2707,7 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                    startedtrans = 0;
                }
 
-               blksmoved += fileblocks;
-               listp->hfl_next++;
-               if (listp->hfl_next >= listp->hfl_count) {
-                       break;
-               }
-               if (hfsmp->hfs_hotfile_freeblks <= 0) {
+               if (hfs_hotfile_cur_freeblks(hfsmp) <= 0) {
 #if HFC_VERBOSE
                        printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
 #endif
@@ -1251,10 +2716,19 @@ hotfiles_adopt(struct hfsmount *hfsmp)
        } /* end for */
 
 #if HFC_VERBOSE
-       printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks);
+       printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d files left)\n", listp->hfl_next, blksmoved, listp->hfl_count - i);
 #endif
+       if (!startedtrans) {
+               // start a txn so we'll save the btree summary info
+               if (hfs_start_transaction(hfsmp) == 0) {
+                       startedtrans = 1;
+               }
+       }               
+
        /* Finish any outstanding transactions. */
        if (startedtrans) {
+               save_btree_user_info(hfsmp);
+
                (void) BTFlushPath(filefork);
                hfs_end_transaction(hfsmp);
                startedtrans = 0;
@@ -1312,6 +2786,13 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
                return (EPERM);
        }
 
+#if HFC_VERBOSE
+               printf("hfs:%s: hotfiles_evict (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+                      hfsmp->vcbVN,
+                      hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+                      hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
        if (iterator == NULL) {
                hfs_unlock(VTOC(hfsmp->hfc_filevp));
@@ -1329,6 +2810,10 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
 
        filefork = VTOF(hfsmp->hfc_filevp);
 
+#if HFC_VERBOSE
+       printf("hfs: hotfiles_evict: reclaim blks %d\n", listp->hfl_reclaimblks);
+#endif
+       
        while (listp->hfl_reclaimblks > 0 &&
               blksmoved < HFC_BLKSPERSYNC &&
               filesmoved < HFC_FILESPERSYNC) {
@@ -1376,7 +2861,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
                 * here.  We do not want to move them. 
                 */
                if (!vnode_isreg(vp)) {
-                       printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
+                       //printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
                        goto delete;  /* invalid entry, go to next */
@@ -1392,7 +2877,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
                /*
                 * Make sure file is in the hot area.
                 */
-               if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
+               if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0]) && !(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
 #if HFC_VERBOSE
                        printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID);
 #endif
@@ -1402,15 +2887,38 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
                }
                
                /*
-                * Relocate file out of hot area.
+                * Relocate file out of hot area.  On cooperative fusion (CF) that just 
+                * means un-pinning the data from the ssd.  For traditional hotfiles that means moving
+                * the file data out of the hot region of the disk.
                 */
-               error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       uint32_t pinned_blocks;
+                       
+                       hfs_unlock(VTOC(vp));  // don't need an exclusive lock for this
+                       hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+                       error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &pinned_blocks, ctx);
+                       fileblocks = pinned_blocks;
+
+                       if (!error) {
+                               // go back to an exclusive lock since we're going to modify the cnode again
+                               hfs_unlock(VTOC(vp));
+                               hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+                       }
+               } else {
+                       error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
+               }
                if (error) {
+#if HFC_VERBOSE
                        printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
+#endif
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
                        bt_op = kBTreeNextRecord;
                        goto next;  /* go to next */
+               } else {
+                       VTOC(vp)->c_attr.ca_recflags &= ~kHFSFastDevPinnedMask;
+                       VTOC(vp)->c_flag |= C_MODIFIED;
                }
 
                //
@@ -1466,6 +2974,8 @@ next:
 #endif
        /* Finish any outstanding transactions. */
        if (startedtrans) {
+               save_btree_user_info(hfsmp);
+
                (void) BTFlushPath(filefork);
                hfs_end_transaction(hfsmp);
                startedtrans = 0;
@@ -1511,6 +3021,13 @@ hotfiles_age(struct hfsmount *hfsmp)
        u_int16_t  reclen;
 
 
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               //
+               // hotfiles don't age on CF
+               //
+               return 0;
+       }
+
        MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK);
        if (iterator == NULL) {
                error = ENOMEM;
@@ -1690,6 +3207,12 @@ hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents)
  */
 static int
 hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
+{
+       return hfc_btree_open_ext(hfsmp, vpp, 0);
+}
+
+static int
+hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs)
 {
        proc_t p;
        struct vnode *vp;
@@ -1745,8 +3268,12 @@ again:
        /* Open the B-tree file for writing... */
        error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);      
        if (error) {
-               printf("hfs: hfc_btree_open: BTOpenPath error %d\n", error);
-               error = MacToVFSError(error);
+               if (!ignore_btree_errs) {
+                       printf("hfs: hfc_btree_open: BTOpenPath error %d; filesize %lld\n", error, VTOF(vp)->ff_size);
+                       error = MacToVFSError(error);
+               } else {
+                       error = 0;
+               }
        }
 
        hfs_unlock(VTOC(vp));
@@ -1759,6 +3286,18 @@ again:
        if (!vnode_issystem(vp))
                panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp);
 
+       HotFilesInfo hotfileinfo;
+
+       if (error == 0 && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+               if ((BTGetUserData(VTOF(vp), &hotfileinfo, sizeof(hotfileinfo)) == 0) && (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC)) {
+                       if (hfsmp->hfs_hotfile_freeblks == 0) {
+                               hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+                       }
+
+                       hfs_hotfile_cur_freeblks(hfsmp);        // factors in any adjustments that happened at run-time
+               }
+       }
+       
        return (error);
 }
 
@@ -1775,7 +3314,7 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
 
 
        if (hfsmp->jnl) {
-           hfs_journal_flush(hfsmp, FALSE);
+           hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
        }
 
        if (vnode_get(vp) == 0) {
@@ -1793,6 +3332,106 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
        return (error);
 }
 
+//
+// Assumes that hfsmp->hfc_filevp points to the hotfile btree vnode
+// (i.e. you called hfc_btree_open() ahead of time)
+//
+static int
+hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key)
+{
+       int error;
+       filefork_t *filefork=VTOF(hfsmp->hfc_filevp);
+
+       /* Start a new transaction before calling BTree code. */
+       if (hfs_start_transaction(hfsmp) != 0) {
+               return EINVAL;
+       }
+
+       error = BTDeleteRecord(filefork, iterator);
+       if (error) {
+               error = MacToVFSError(error);
+               printf("hfs: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+               goto out;
+       }
+
+       int savedtemp;
+       savedtemp = key->temperature;
+       key->temperature = HFC_LOOKUPTAG;
+       error = BTDeleteRecord(filefork, iterator);
+       if (error) {
+               error = MacToVFSError(error);
+               printf("hfs:2: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+       }
+       key->temperature = savedtemp;
+
+       (void) BTFlushPath(filefork);
+
+out:
+       /* Transaction complete. */
+       hfs_end_transaction(hfsmp);
+
+       return error;
+}
+
+//
+// You have to have already opened the hotfile btree so
+// that hfsmp->hfc_filevp is filled in.
+//
+static int
+hfc_btree_delete(struct hfsmount *hfsmp)
+{
+       struct vnode *dvp = NULL;
+       vfs_context_t ctx = vfs_context_current();
+       struct vnode_attr va;
+       struct componentname cname;
+       static char filename[] = HFC_FILENAME;
+       int  error;
+
+       error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
+       if (error) {
+               return (error);
+       }
+       cname.cn_nameiop = DELETE;
+       cname.cn_flags = ISLASTCN;
+       cname.cn_context = ctx;
+       cname.cn_pnbuf = filename;
+       cname.cn_pnlen = sizeof(filename);
+       cname.cn_nameptr = filename;
+       cname.cn_namelen = strlen(filename);
+       cname.cn_hash = 0;
+       cname.cn_consume = 0;
+
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_type, VREG);
+       VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
+       VATTR_SET(&va, va_uid, 0);
+       VATTR_SET(&va, va_gid, 0);
+
+       if (hfs_start_transaction(hfsmp) != 0) {
+           error = EINVAL;
+           goto out;
+       } 
+
+       /* call ourselves directly, ignore the higher-level VFS file creation code */
+       error = VNOP_REMOVE(dvp, hfsmp->hfc_filevp, &cname, 0, ctx);
+       if (error) {
+               printf("hfs: error %d removing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+       }
+
+       hfs_end_transaction(hfsmp);
+
+out:
+       if (dvp) {
+               vnode_put(dvp);
+               dvp = NULL;
+       }
+
+       return 0;
+}
+
+
+
+
 /*
  *  Create a hot files btree file.
  *
@@ -1877,7 +3516,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent
                ((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
                        SWAP_BE16 (kIsInvisible + kNameLocked);
 
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize)) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize, VM_KERN_MEMORY_FILE)) {
                        error = ENOMEM;
                        goto out;
                }       
@@ -1918,7 +3557,14 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent
                hotfileinfo->timeleft    = 0;
                hotfileinfo->threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
                hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
-               hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       if (hfsmp->hfs_hotfile_freeblks == 0) {
+                               hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+                       }
+                       hotfileinfo->usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks);
+               } else {
+                       hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+               }
                strlcpy((char *)hotfileinfo->tag, hfc_tag,
                        sizeof hotfileinfo->tag);
                offset += kBTreeHeaderUserBytes;
@@ -2049,7 +3695,7 @@ hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
 /*
  * Insert a hot file entry into the tree.
  */
-static void
+static int
 hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry) 
 {
        hotfile_entry_t *entry = hotdata->rootentry;
@@ -2060,44 +3706,48 @@ hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry)
                hotdata->rootentry = newentry;
                hotdata->coldest = newentry;
                hotdata->activefiles++;
-               return;
+               return 0;
        }
 
        while (entry) {
                if (temperature > entry->temperature) {
-                       if (entry->right)
+                       if (entry->right) {
                                entry = entry->right;
-                       else {
+                       else {
                                entry->right = newentry;
                                break;
                        }
                } else if (temperature < entry->temperature) {
-                       if (entry->left) 
+                       if (entry->left) {
                                entry = entry->left;
-                       else {
+                       else {
                                entry->left = newentry;
                                break;
                        }
                } else if (fileid > entry->fileid) { 
-                       if (entry->right)
+                       if (entry->right) {
                                entry = entry->right;
-                       else {
+                       else {
                                if (entry->fileid != fileid)
                                        entry->right = newentry;
                                break;
                        }
                } else { 
-                       if (entry->left) 
+                       if (entry->left) {
                                entry = entry->left;
-                       else {
-                               if (entry->fileid != fileid)
+                       else {
+                               if (entry->fileid != fileid) {
                                        entry->left = newentry;
+                               } else {
+                                       return EEXIST;
+                               }
                                break;
                        }
                }
        }
 
        hotdata->activefiles++;
+       return 0;
 }
 
 /*
@@ -2158,7 +3808,7 @@ hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
 
        if (entry) {
                /*
-                * Reorginize the sub-trees spanning from our entry.
+                * Reorganize the sub-trees spanning from our entry.
                 */
                if ((next = entry->right)) {
                        hotfile_entry_t *pnextl, *psub;
@@ -2254,7 +3904,7 @@ hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist)
        sortedlist->hfl_count = i;
        
 #if HFC_VERBOSE
-       printf("hfs: hf_getsortedlist returned %d entries\n", i);
+       printf("hfs: hf_getsortedlist returning %d entries w/%d total blocks\n", i, sortedlist->hfl_totalblocks);
 #endif
 }
 
index 5c1ac29bba77bb8538d10c4fea734cf279629c9d..7d868195458e9e2f6e4a9a1724c44763018cc8bc 100644 (file)
 /*
  * Temperature measurement constraints.
  */
-#define HFC_DEFAULT_FILE_COUNT  1000
-#define HFC_DEFAULT_DURATION            (3600 * 60)
+#define HFC_DEFAULT_FILE_COUNT  hfc_default_file_count
+#define HFC_DEFAULT_DURATION     hfc_default_duration
 #define HFC_CUMULATIVE_CYCLES   3
-#define HFC_MAXIMUM_FILE_COUNT  5000
-#define HFC_MAXIMUM_FILESIZE    (10 * 1024 * 1024)
+#define HFC_MAXIMUM_FILE_COUNT  hfc_max_file_count
+#define HFC_MAXIMUM_FILESIZE    hfc_max_file_size 
 #define HFC_MINIMUM_TEMPERATURE  24
 
 
@@ -95,9 +95,16 @@ struct HotFilesInfo {
        u_int32_t       timeleft;    /* time remaining in recording period (secs) */
        u_int32_t       threshold;
        u_int32_t       maxfileblks;
-       u_int32_t       maxfilecnt;
+       union {
+               u_int32_t       _maxfilecnt;   // on hdd's we track the max # of files
+               u_int32_t       _usedblocks;   // on ssd's we track how many blocks are used
+       } _u;
        u_int8_t        tag[32];
 };
+
+#define usedblocks _u._usedblocks
+#define maxfilecnt _u._maxfilecnt
+
 typedef struct HotFilesInfo HotFilesInfo;
 
 #define HFC_MAGIC      0xFF28FF26
@@ -118,6 +125,11 @@ int  hfs_recording_suspend (struct hfsmount *);
 
 int  hfs_addhotfile (struct vnode *);
 int  hfs_removehotfile (struct vnode *);
+int  hfs_hotfile_deleted(struct vnode *vp);   // called when a file is deleted
+void hfs_repin_hotfiles(struct hfsmount *);
+
+// call this to adjust the number of used hotfile blocks either up/down
+int  hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks);
 
 #endif /* __APPLE_API_PRIVATE */
 #endif /* KERNEL */
index 324a15f16b0ab305c828fc106afa9908bcfa3293..827fc4f2970d1163e5d7d6c61dfe139eaa254f82 100644 (file)
@@ -1,3 +1,33 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef HFS_KDEBUG_H_
+#define HFS_KDEBUG_H_
+
 #include <sys/kdebug.h>
 
 /*
@@ -22,9 +52,9 @@ enum {
        HFSDBG_BLOCK_DEALLOCATE         = HFSDBG_CODE(5),       /* 0x03080014 */
        HFSDBG_READ_BITMAP_BLOCK        = HFSDBG_CODE(6),       /* 0x03080018 */
        HFSDBG_RELEASE_BITMAP_BLOCK     = HFSDBG_CODE(7),       /* 0x0308001C */
-       HFSDBG_ALLOC_CONTIG_BITMAP      = HFSDBG_CODE(8),       /* 0x03080020 */
+       HFSDBG_FIND_CONTIG_BITMAP       = HFSDBG_CODE(8),       /* 0x03080020 */
        HFSDBG_ALLOC_ANY_BITMAP         = HFSDBG_CODE(9),       /* 0x03080024 */
-       HFSDBG_ALLOC_KNOWN_BITMAP       = HFSDBG_CODE(10),      /* 0x03080028 */
+       HFSDBG_ALLOC_FIND_KNOWN         = HFSDBG_CODE(10),      /* 0x03080028 */
        HFSDBG_MARK_ALLOC_BITMAP        = HFSDBG_CODE(11),      /* 0x0308002C */
        HFSDBG_MARK_FREE_BITMAP         = HFSDBG_CODE(12),      /* 0x03080030 */
        HFSDBG_BLOCK_FIND_CONTIG        = HFSDBG_CODE(13),      /* 0x03080034 */
@@ -38,7 +68,7 @@ enum {
        HFSDBG_SYNCER                   = HFSDBG_CODE(21),      /* 0x03080054 */
        HFSDBG_SYNCER_TIMED             = HFSDBG_CODE(22),      /* 0x03080058 */
        HFSDBG_UNMAP_SCAN               = HFSDBG_CODE(23),      /* 0x0308005C */        
-       HFSDBG_UNMAP_SCAN_TRIM          = HFSDBG_CODE(24)       /* 0x03080060 */
+       HFSDBG_UNMAP_SCAN_TRIM          = HFSDBG_CODE(24),      /* 0x03080060 */
 };
 
 /*
@@ -62,10 +92,10 @@ enum {
     5       HFSDBG_BLOCK_DEALLOCATE     startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0
     6       HFSDBG_READ_BITMAP_BLOCK    startBlock, 0, 0, 0, 0 ... err, 0, 0, 0, 0
     7       HFSDBG_RELEASE_BITMAP_BLOCK dirty, 0, 0, 0, 0 ... 0, 0, 0, 0, 0
-    8       HFSDBG_ALLOC_CONTIG_BITMAP  startBlock, minBlocks, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    8       HFSDBG_FIND_CONTIG_BITMAP  startBlock, minBlocks, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
     9       HFSDBG_ALLOC_ANY_BITMAP     startBlock, endBlock,  maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
-    10      HFSDBG_ALLOC_KNOWN_BITMAP   0, 0, maxBlocks, 0, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
-    11      HFSDBG_MARK_ALLOC_BITMAP    startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0
+    10      HFSDBG_ALLOC_FIND_KNOWN            0, 0, maxBlocks, 0, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    11      HFSDBG_MARK_ALLOC_BITMAP    startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0
     12      HFSDBG_MARK_FREE_BITMAP     startBlock, blockCount, valid, 0, 0 ... err, 0, 0, 0, 0
     13      HFSDBG_BLOCK_FIND_CONTIG    startBlock, endBlock, minBlocks, maxBlocks, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
     14      HFSDBG_IS_ALLOCATED         startBlock, blockCount, stop, 0, 0 ... err, 0, actualBlockCount, 0, 0
@@ -80,3 +110,5 @@ enum {
     23      HFSDBG_UNMAP_SCAN           hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0
     24      HFSDBG_UNMAP_TRIM           hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0  
 */
+
+#endif // HFS_KDEBUG_H_
index 667bad9c6581434ffe2a494c35b7f81f4c29c330..2dd7fda4bf116bdfda1f62fcc361c6272f1bc0d4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -388,10 +388,9 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp,
                                }
                        }
                }
+                       cp->c_flag |= C_MODIFIED;
                cp->c_touch_chgtime = TRUE;
-               cp->c_flag |= C_FORCEUPDATE;
            }
-           dcp->c_flag |= C_FORCEUPDATE;
        }
 out:
        hfs_systemfile_unlock(hfsmp, lockflags);
@@ -450,6 +449,8 @@ hfs_vnop_link(struct vnop_link_args *ap)
        if (v_type == VLNK)
                return (ENOTSUP);
 
+       cp = VTOC(vp);
+
        if (v_type == VDIR) {
 #if CONFIG_HFS_DIRLINK
                /* Make sure our private directory exists. */
@@ -464,8 +465,10 @@ hfs_vnop_link(struct vnop_link_args *ap)
                if (hfsmp->jnl == NULL) {
                        return (EPERM);
                }
+
                /* Directory hardlinks also need the parent of the original directory. */
-               if ((error = hfs_vget(hfsmp, hfs_currentparent(VTOC(vp)), &fdvp, 1, 0))) {
+               if ((error = hfs_vget(hfsmp, hfs_currentparent(cp, /* have_lock: */ false),
+                                                         &fdvp, 1, 0))) {
                        return (error);
                }
 #else
@@ -503,9 +506,8 @@ hfs_vnop_link(struct vnop_link_args *ap)
                }
        }
        tdcp = VTOC(tdvp);
-       cp = VTOC(vp);
        /* grab the parent CNID from originlist after grabbing cnode locks */
-       parentcnid = hfs_currentparent(cp);
+       parentcnid = hfs_currentparent(cp, /* have_lock: */ true);
 
        /* 
         * Make sure we didn't race the src or dst parent directories with rmdir.
@@ -607,6 +609,7 @@ hfs_vnop_link(struct vnop_link_args *ap)
        lockflags = 0;
 
        cp->c_linkcount++;
+       cp->c_flag |= C_MODIFIED;
        cp->c_touch_chgtime = TRUE;
        error = hfs_makelink(hfsmp, vp, cp, tdcp, cnp);
        if (error) {
@@ -633,10 +636,10 @@ hfs_vnop_link(struct vnop_link_args *ap)
                        }
                }
                tdcp->c_dirchangecnt++;
+               tdcp->c_flag |= C_MODIFIED;
                hfs_incr_gencount(tdcp);
                tdcp->c_touch_chgtime = TRUE;
                tdcp->c_touch_modtime = TRUE;
-               tdcp->c_flag |= C_FORCEUPDATE;
 
                error = hfs_update(tdvp, 0);
                if (error) {
@@ -652,8 +655,8 @@ hfs_vnop_link(struct vnop_link_args *ap)
                    ((fdcp->c_attr.ca_recflags & kHFSHasChildLinkMask) == 0)) {
 
                        fdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
+                       fdcp->c_flag |= C_MODIFIED;
                        fdcp->c_touch_chgtime = TRUE;
-                       fdcp->c_flag |= C_FORCEUPDATE;
                        error = hfs_update(fdvp, 0);
                        if (error) {
                                if (error != EIO && error != ENXIO) {
@@ -673,10 +676,8 @@ hfs_vnop_link(struct vnop_link_args *ap)
                hfs_volupdate(hfsmp, VOL_MKFILE,
                        (tdcp->c_cnid == kHFSRootFolderID));
        }
-       /* Make sure update occurs inside transaction */
-       cp->c_flag |= C_FORCEUPDATE;  
 
-       if (error == 0 && (ret = hfs_update(vp, TRUE)) != 0) {
+       if (error == 0 && (ret = hfs_update(vp, 0)) != 0) {
                if (ret != EIO && ret != ENXIO)
                        printf("hfs_vnop_link: error %d updating vp @ %p\n", ret, vp);
                hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
@@ -794,9 +795,9 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c
        dcp->c_dirchangecnt++;
        hfs_incr_gencount(dcp);
        microtime(&tv);
-       dcp->c_ctime = tv.tv_sec;
-       dcp->c_mtime = tv.tv_sec;
-       (void ) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+       dcp->c_touch_chgtime = dcp->c_touch_modtime = true;
+       dcp->c_flag |= C_MODIFIED;
+       hfs_update(dcp->c_vp, 0);
 
        /*
         * If this is the last link then we need to process the inode.
@@ -877,7 +878,7 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c
                    firstlink == cndesc.cd_cnid) {
                        if (setfirstlink(hfsmp, cp->c_fileid, nextlinkid) == 0)
                                cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
-               } else if (vnode_isreg(vp) && cp->c_attr.ca_firstlink == cndesc.cd_cnid) {
+               } else if (cp->c_attr.ca_firstlink == cndesc.cd_cnid) {
                        cp->c_attr.ca_firstlink = nextlinkid;
                }
                /* Update previous link. */
@@ -888,22 +889,23 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c
                if (nextlinkid) {
                        (void) cat_update_siblinglinks(hfsmp, nextlinkid, prevlinkid, HFS_IGNORABLE_LINK);
                }
-
-               /*
-                * The call to cat_releasedesc below will only release the name buffer;
-                * it does not zero out the rest of the fields in the 'cat_desc' data structure.
-                * 
-                * As a result, since there are still other links at this point, we need
-                * to make the current cnode descriptor point to the raw inode.  If a path-based
-                * system call comes along first, it will replace the descriptor with a valid link
-                * ID.  If a userland process already has a file descriptor open, then they will
-                * bypass that lookup, though.  Replacing the descriptor CNID with the raw
-                * inode will force it to generate a new full path.
-                */
-               cp->c_cnid = cp->c_fileid;
-
        }
 
+       /*
+        * The call to cat_releasedesc below will only release the name
+        * buffer; it does not zero out the rest of the fields in the
+        * 'cat_desc' data structure.
+        *
+        * As a result, since there are still other links at this point,
+        * we need to make the current cnode descriptor point to the raw
+        * inode.  If a path-based system call comes along first, it will
+        * replace the descriptor with a valid link ID.  If a userland
+        * process already has a file descriptor open, then they will
+        * bypass that lookup, though.  Replacing the descriptor CNID with
+        * the raw inode will force it to generate a new full path.
+        */
+       cp->c_cnid = cp->c_fileid;
+
        /* Push new link count to disk. */
        cp->c_ctime = tv.tv_sec;        
        (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
@@ -1198,11 +1200,22 @@ hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
        thread_t thread = current_thread();
 
        TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) {
-               if ((origin->lo_thread == thread) ||
-                   (origin->lo_parentcnid == parentcnid)) {
+               if (origin->lo_thread == thread) {
                        TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
                        FREE(origin, M_TEMP);
                        break;
+               } else if (origin->lo_parentcnid == parentcnid) {
+                       /*
+                        * If the threads don't match, then we don't want to
+                        * delete the entry because that might cause other threads
+                        * to fall back and use whatever happens to be in
+                        * c_parentcnid or the wrong link ID.  By setting the
+                        * values to zero here, it should serve as an indication
+                        * that the path is no longer valid and that's better than
+                        * using a random parent ID or link ID.
+                        */
+                       origin->lo_parentcnid = 0;
+                       origin->lo_cnid = 0;
                }
        }
 }
@@ -1222,7 +1235,7 @@ hfs_haslinkorigin(cnode_t *cp)
        
                TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
                        if (origin->lo_thread == thread) {
-                               return (1);
+                               return origin->lo_cnid != 0;
                        }
                }
        }
@@ -1236,17 +1249,25 @@ hfs_haslinkorigin(cnode_t *cp)
  */
 __private_extern__
 cnid_t
-hfs_currentparent(cnode_t *cp)
+hfs_currentparent(cnode_t *cp, bool have_lock)
 {
        if (cp->c_flag & C_HARDLINK) {
+               if (!have_lock)
+                       hfs_lock_always(cp, HFS_SHARED_LOCK);
+
                linkorigin_t *origin;
                thread_t thread = current_thread();
-       
+
                TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
                        if (origin->lo_thread == thread) {
+                               if (!have_lock)
+                                       hfs_unlock(cp);
                                return (origin->lo_parentcnid);
                        }
                }
+
+               if (!have_lock)
+                       hfs_unlock(cp);
        }
        return (cp->c_parentcnid);
 }
@@ -1387,3 +1408,24 @@ out:
        return MacToVFSError(result);
 }
 
+errno_t hfs_first_link(hfsmount_t *hfsmp, cnode_t *cp, cnid_t *link_id)
+{
+       errno_t error = 0;
+
+       if (S_ISDIR(cp->c_mode)) {
+               int lockf = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+               error = getfirstlink(hfsmp, cp->c_fileid, link_id);
+
+               hfs_systemfile_unlock(hfsmp, lockf);
+       } else {
+               if (cp->c_attr.ca_firstlink)
+                       *link_id = cp->c_attr.ca_firstlink;
+               else {
+                       // This can happen if the cnode has been deleted
+                       error = ENOENT;
+               }
+       }
+
+       return error;
+}
index e198d3190c43eb61a7b7873f1d4faad16690b513..c46bce7c7d38505b4e0c7de94f6c8e3a56b1b828 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,7 @@
 #include <sys/file.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
+#include <sys/vnode_internal.h>
 #include <sys/malloc.h>
 #include <sys/kdebug.h>
 #include <sys/kauth.h>
@@ -333,7 +334,8 @@ found:
                 * Directory hard links can have multiple parents so
                 * find the appropriate parent for the current thread.
                 */
-               if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp)), &tvp, 0, 0))) {
+               if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp),
+                                                                       /* have_lock: */ false), &tvp, 0, 0))) {
                        goto exit;
                }
                *cnode_locked = 1;
@@ -419,12 +421,9 @@ found:
                 * Save the origin info for file and directory hardlinks.  Directory hardlinks 
                 * need the origin for '..' lookups, and file hardlinks need it to ensure that 
                 * competing lookups do not cause us to vend different hardlinks than the ones requested.
-                * We want to restrict saving the cache entries to LOOKUP namei operations, since
-                * we're really doing this to protect getattr.
                 */
-               if ((nameiop == LOOKUP) && (VTOC(tvp)->c_flag & C_HARDLINK)) {
+               if (ISSET(VTOC(tvp)->c_flag, C_HARDLINK))
                        hfs_savelinkorigin(VTOC(tvp), VTOC(dvp)->c_fileid);
-               }
                *cnode_locked = 1;
                *vpp = tvp;
        }
@@ -473,12 +472,19 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
        int flags = cnp->cn_flags;
        int force_casesensitive_lookup = proc_is_forcing_hfs_case_sensitivity(p);
        int cnode_locked;
+       int fastdev_candidate = 0;
+       int auto_candidate = 0;
 
        *vpp = NULL;
        dcp = VTOC(dvp);
-       
        hfsmp = VTOHFS(dvp);
 
+       if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) || (dcp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) ){
+               fastdev_candidate = 1;
+               auto_candidate = (vnode_isautocandidate(dvp) || (dcp->c_attr.ca_recflags & kHFSAutoCandidateMask));
+       }
+       
+
        /*
         * Lookup an entry in the cache
         *
@@ -513,7 +519,10 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
                goto exit;
        }
        
-       
+       if (cp->c_attr.ca_recflags & kHFSDoNotFastDevPinMask) {
+               fastdev_candidate = 0;
+       }
+
        /*
         * If this is a hard-link vnode then we need to update
         * the name (of the link), the parent ID, the cnid, the
@@ -603,12 +612,8 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
                                         * Save the origin info for file and directory hardlinks.  Directory hardlinks 
                                         * need the origin for '..' lookups, and file hardlinks need it to ensure that 
                                         * competing lookups do not cause us to vend different hardlinks than the ones requested.
-                                        * We want to restrict saving the cache entries to LOOKUP namei operations, since
-                                        * we're really doing this to protect getattr.
                                         */
-                                       if (cnp->cn_nameiop == LOOKUP) {
-                                               hfs_savelinkorigin(cp, dcp->c_fileid);
-                                       }
+                                       hfs_savelinkorigin(cp, dcp->c_fileid);
                                }
                                else {
                                        /* If the fileID does not match then do NOT replace the descriptor! */
@@ -650,9 +655,25 @@ lookup:
 
        error = hfs_lookup(dvp, vpp, cnp, &cnode_locked, force_casesensitive_lookup);
        
+       if (*vpp && (VTOC(*vpp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) {
+               fastdev_candidate = 0;
+       }
+
+       if (*vpp && (VTOC(*vpp)->c_attr.ca_recflags & kHFSAutoCandidateMask)) {
+               //printf("vp %s / %d is an auto-candidate\n", (*vpp)->v_name ? (*vpp)->v_name : "no-name", VTOC(*vpp)->c_fileid);
+               auto_candidate = 1;
+       }
+       
        if (cnode_locked)
                hfs_unlock(VTOC(*vpp));
 exit:
+       if (*vpp && fastdev_candidate && (*vpp)->v_parent == dvp && !(vnode_isfastdevicecandidate(*vpp))) {
+               vnode_setfastdevicecandidate(*vpp);
+               if (auto_candidate) {
+                       vnode_setautocandidate(*vpp);
+               }
+       }
+
        {
        uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
 
index f09bdc7d2609867889becac6b21bbc0c6795b79c..78719c0694fcf3f8cc3769b6a5ead6b56548c79e 100644 (file)
@@ -56,6 +56,8 @@
 #include <sys/mount_internal.h>
 #include <sys/file_internal.h>
 
+#include <libkern/OSDebug.h>
+
 #include <miscfs/specfs/specdev.h>
 
 #include <sys/ubc.h>
@@ -64,6 +66,8 @@
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
 
+#include <IOKit/IOBSD.h>
+
 #include <sys/kdebug.h>
 
 #include       "hfs.h"
@@ -76,6 +80,7 @@
 #include       "hfs_cnode.h"
 #include       "hfs_dbg.h"
 
+
 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
 
 enum {
@@ -85,9 +90,12 @@ enum {
 /* from bsd/hfs/hfs_vfsops.c */
 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 
+/* from hfs_hotfiles.c */
+extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+                                             uint8_t forktype, uint32_t *pinned);
+
 static int  hfs_clonefile(struct vnode *, int, int, int);
 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
-static int  hfs_minorupdate(struct vnode *vp);
 static int  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
 
 /* from bsd/hfs/hfs_vnops.c */
@@ -141,13 +149,13 @@ hfs_vnop_read(struct vnop_read_args *ap)
        if (offset < 0)
                return (EINVAL);        /* cant read from a negative offset */
 
+#if SECURE_KERNEL
        if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
                                                (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
                /* Don't allow unencrypted io request from user space */
                return EPERM;
        }
-
-
+#endif
 
 #if HFS_COMPRESSION
        if (VNODE_IS_RSRC(vp)) {
@@ -159,12 +167,19 @@ hfs_vnop_read(struct vnop_read_args *ap)
                int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
                if (compressed) {
                        retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+                       if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
+                               (void) hfs_addhotfile(vp);
+                       }
                        if (compressed) {
                                if (retval == 0) {
                                        /* successful read, update the access time */
                                        VTOC(vp)->c_touch_acctime = TRUE;
                                        
-                                       /* compressed files are not hot file candidates */
+                                       //
+                                       // compressed files are not traditional hot file candidates
+                                       // but they may be for CF (which ignores the ff_bytesread
+                                       // field)
+                                       //
                                        if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
                                                VTOF(vp)->ff_bytesread = 0;
                                        }
@@ -193,7 +208,8 @@ hfs_vnop_read(struct vnop_read_args *ap)
        if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
                goto exit;
        }
-#endif
+
+#endif // CONFIG_PROTECT
 
        /* 
         * If this read request originated from a syscall (as opposed to 
@@ -264,6 +280,16 @@ read_again:
                } else {
                        fp->ff_bytesread += bytesread;
                }
+
+               if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
+                       //
+                       // We don't add hotfiles for processes doing IO_EVTONLY I/O
+                       // on the assumption that they're system processes such as
+                       // mdworker which scan everything in the system (and thus
+                       // do not represent user-initiated access to files)
+                       //
+                       (void) hfs_addhotfile(vp);
+               }
                if (took_cnode_lock)
                        hfs_unlock(cp);
        }
@@ -284,6 +310,30 @@ exit:
        return (retval);
 }
 
+/*
+ * Ideally, this wouldn't be necessary; the cluster code should be
+ * able to handle this on the read-side.  See <rdar://20420068>.
+ */
+static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
+{
+       assert(VTOC(vp)->c_lockowner != current_thread());
+       assert(VTOC(vp)->c_truncatelockowner == current_thread());
+
+       struct filefork *fp = VTOF(vp);
+
+       if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
+               // Nothing to do
+               return 0;
+       }
+
+       zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
+
+       /* N.B. At present, @zero_up_to is not important because the cluster
+          code will always zero up to the end of the page anyway. */
+       return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
+                                                fp->ff_size, 0, IO_HEADZEROFILL);
+}
+
 /*
  * Write data to a file.
  */
@@ -314,7 +364,6 @@ hfs_vnop_write(struct vnop_write_args *ap)
        int took_truncate_lock = 0;
        int io_return_on_throttle = 0;
        int throttled_count = 0;
-       struct rl_entry *invalid_range;
 
 #if HFS_COMPRESSION
        if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
@@ -347,12 +396,13 @@ hfs_vnop_write(struct vnop_write_args *ap)
 
 #endif
 
+#if SECURE_KERNEL
        if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
                                                (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
                /* Don't allow unencrypted io request from user space */
                return EPERM;
        }
-
+#endif
 
        resid = uio_resid(uio);
        offset = uio_offset(uio);
@@ -419,9 +469,12 @@ again:
                goto exit;
        }
 
+       cred = vfs_context_ucred(ap->a_context);
+       if (cred && suser(cred, NULL) != 0)
+               eflags |= kEFReserveMask;
+
        origFileSize = fp->ff_size;
        writelimit = offset + resid;
-       filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 
        /*
         * We may need an exclusive truncate lock for several reasons, all
@@ -439,16 +492,11 @@ again:
         *    old EOF and new EOF are in the same block, we still need to
         *    protect that range of bytes until they are written for the
         *    first time.
-        * 3. The write overlaps some invalid ranges (delayed zero fill; that
-        *    part of the file has been allocated, but not yet written).
         *
         * If we had a shared lock with the above cases, we need to try to upgrade
         * to an exclusive lock.  If the upgrade fails, we will lose the shared
         * lock, and will need to take the truncate lock again; the took_truncate_lock
         * flag will still be set, causing us to try for an exclusive lock next time.
-        *
-        * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
-        * lock is held, since it protects the range lists.
         */
        if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
            ((fp->ff_unallocblocks != 0) ||
@@ -471,26 +519,16 @@ again:
                goto exit;
        }
        cnode_locked = 1;
-       
-       /*
-        * Now that we have the cnode lock, see if there are delayed zero fill ranges
-        * overlapping our write.  If so, we need the truncate lock exclusive (see above).
-        */
-       if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
-           (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
-               /*
-                * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
-                * a deadlock, rather than simply returning failure.  (That is, it apparently does
-                * not behave like a "try_lock").  Since this condition is rare, just drop the
-                * cnode lock and try again.  Since took_truncate_lock is set, we will
-                * automatically take the truncate lock exclusive.
-                */
-               hfs_unlock(cp);
-               cnode_locked = 0;
-               hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
-               goto again;
+
+       filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
+
+       if (offset > filebytes
+               && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
+                                                        hfsmp->blockSize) < offset - filebytes)) {
+               retval = ENOSPC;
+               goto exit;
        }
-       
+
        KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
                     (int)offset, uio_resid(uio), (int)fp->ff_size,
                     (int)filebytes, 0);
@@ -500,7 +538,6 @@ again:
                goto sizeok;
        }
 
-       cred = vfs_context_ucred(ap->a_context);
        bytesToAdd = writelimit - filebytes;
 
 #if QUOTA
@@ -517,8 +554,6 @@ again:
 
        while (writelimit > filebytes) {
                bytesToAdd = writelimit - filebytes;
-               if (cred && suser(cred, NULL) != 0)
-                       eflags |= kEFReserveMask;
 
                /* Protect extents b-tree and allocation bitmap */
                lockflags = SFL_BITMAP;
@@ -543,7 +578,7 @@ again:
                KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
                        (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
        }
-       (void) hfs_update(vp, TRUE);
+       (void) hfs_update(vp, 0);
        (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
        (void) hfs_end_transaction(hfsmp);
 
@@ -561,133 +596,45 @@ again:
 sizeok:
        if (retval == E_NONE) {
                off_t filesize;
-               off_t zero_off;
-               off_t tail_off;
-               off_t inval_start;
-               off_t inval_end;
-               off_t io_start;
+               off_t head_off;
                int lflag;
 
-               if (writelimit > fp->ff_size)
+               if (writelimit > fp->ff_size) {
                        filesize = writelimit;
-               else
+                       struct timeval tv;
+                       rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
+                       microuptime(&tv);
+                       cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+               } else
                        filesize = fp->ff_size;
 
                lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
 
-               if (offset <= fp->ff_size) {
-                       zero_off = offset & ~PAGE_MASK_64;
-                       
-                       /* Check to see whether the area between the zero_offset and the start
-                          of the transfer to see whether is invalid and should be zero-filled
-                          as part of the transfer:
-                        */
-                       if (offset > zero_off) {
-                               if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
-                                       lflag |= IO_HEADZEROFILL;
-                       }
-               } else {
-                       off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
-                       
-                       /* The bytes between fp->ff_size and uio->uio_offset must never be
-                          read without being zeroed.  The current last block is filled with zeroes
-                          if it holds valid data but in all cases merely do a little bookkeeping
-                          to track the area from the end of the current last page to the start of
-                          the area actually written.  For the same reason only the bytes up to the
-                          start of the page where this write will start is invalidated; any remainder
-                          before uio->uio_offset is explicitly zeroed as part of the cluster_write.
-                          
-                          Note that inval_start, the start of the page after the current EOF,
-                          may be past the start of the write, in which case the zeroing
-                          will be handled by the cluser_write of the actual data.
-                        */
-                       inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
-                       inval_end = offset & ~PAGE_MASK_64;
-                       zero_off = fp->ff_size;
-                       
-                       if ((fp->ff_size & PAGE_MASK_64) &&
-                               (rl_scan(&fp->ff_invalidranges,
-                                                       eof_page_base,
-                                                       fp->ff_size - 1,
-                                                       &invalid_range) != RL_NOOVERLAP)) {
-                               /* The page containing the EOF is not valid, so the
-                                  entire page must be made inaccessible now.  If the write
-                                  starts on a page beyond the page containing the eof
-                                  (inval_end > eof_page_base), add the
-                                  whole page to the range to be invalidated.  Otherwise
-                                  (i.e. if the write starts on the same page), zero-fill
-                                  the entire page explicitly now:
-                                */
-                               if (inval_end > eof_page_base) {
-                                       inval_start = eof_page_base;
-                               } else {
-                                       zero_off = eof_page_base;
-                               };
-                       };
-                       
-                       if (inval_start < inval_end) {
-                               struct timeval tv;
-                               /* There's some range of data that's going to be marked invalid */
-                               
-                               if (zero_off < inval_start) {
-                                       /* The pages between inval_start and inval_end are going to be invalidated,
-                                          and the actual write will start on a page past inval_end.  Now's the last
-                                          chance to zero-fill the page containing the EOF:
-                                        */
-                                       hfs_unlock(cp);
-                                       cnode_locked = 0;
-                                       retval = cluster_write(vp, (uio_t) 0,
-                                                       fp->ff_size, inval_start,
-                                                       zero_off, (off_t)0,
-                                                       lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
-                                       hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-                                       cnode_locked = 1;
-                                       if (retval) goto ioerr_exit;
-                                       offset = uio_offset(uio);
-                               };
-                               
-                               /* Mark the remaining area of the newly allocated space as invalid: */
-                               rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
-                               microuptime(&tv);
-                               cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
-                               zero_off = fp->ff_size = inval_end;
-                       };
-                       
-                       if (offset > zero_off) lflag |= IO_HEADZEROFILL;
-               };
-
-               /* Check to see whether the area between the end of the write and the end of
-                  the page it falls in is invalid and should be zero-filled as part of the transfer:
-                */
-               tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
-               if (tail_off > filesize) tail_off = filesize;
-               if (tail_off > writelimit) {
-                       if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
-                               lflag |= IO_TAILZEROFILL;
-                       };
-               };
-               
                /*
-                * if the write starts beyond the current EOF (possibly advanced in the
-                * zeroing of the last block, above), then we'll zero fill from the current EOF
-                * to where the write begins:
-                *
-                * NOTE: If (and ONLY if) the portion of the file about to be written is
-                *       before the current EOF it might be marked as invalid now and must be
-                *       made readable (removed from the invalid ranges) before cluster_write
-                *       tries to write it:
+                * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
+                * for one case below).  For the regions that lie before the
+                * beginning and after the end of this write that are in the
+                * same page, we let the cluster code handle zeroing that out
+                * if necessary.  If those areas are not cached, the cluster
+                * code will try and read those areas in, and in the case
+                * where those regions have never been written to,
+                * hfs_vnop_blockmap will consult the invalid ranges and then
+                * indicate that.  The cluster code will zero out those areas.
                 */
-               io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
-               if (io_start < fp->ff_size) {
-                       off_t io_end;
 
-                       io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
-                       rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
-               };
+               head_off = trunc_page_64(offset);
+
+               if (head_off < offset && head_off >= fp->ff_size) {
+                       /*
+                        * The first page is beyond current EOF, so as an
+                        * optimisation, we can pass IO_HEADZEROFILL.
+                        */
+                       lflag |= IO_HEADZEROFILL;
+               }
 
                hfs_unlock(cp);
                cnode_locked = 0;
-               
+
                /*
                 * We need to tell UBC the fork's new size BEFORE calling
                 * cluster_write, in case any of the new pages need to be
@@ -708,11 +655,14 @@ sizeok:
                 * zero, unless we are extending the file via write.
                 */
                if (filesize > fp->ff_size) {
+                       retval = hfs_zero_eof_page(vp, offset);
+                       if (retval)
+                               goto exit;
                        fp->ff_new_size = filesize;
                        ubc_setsize(vp, filesize);
                }
-               retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
-                               tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
+               retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
+                                                          0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
                if (retval) {
                        fp->ff_new_size = 0;    /* no longer extending; use ff_size */
                        
@@ -770,17 +720,17 @@ sizeok:
        // XXXdbg - see radar 4871353 for more info
        {
            if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
-               VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+                       hfs_flush(hfsmp, HFS_FLUSH_CACHE);
            }
        }
 
 ioerr_exit:
-       if (resid > uio_resid(uio)) {
-               if (!cnode_locked) {
-                       hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-                       cnode_locked = 1;
-               }
+       if (!cnode_locked) {
+               hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+               cnode_locked = 1;
+       }
 
+       if (resid > uio_resid(uio)) {
                cp->c_touch_chgtime = TRUE;
                cp->c_touch_modtime = TRUE;
                hfs_incr_gencount(cp);
@@ -806,7 +756,7 @@ ioerr_exit:
                        filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
                }
        } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
-               retval = hfs_update(vp, TRUE);
+               retval = hfs_update(vp, 0);
 
        /* Updating vcbWrCnt doesn't need to be atomic. */
        hfsmp->vcbWrCnt++;
@@ -814,9 +764,15 @@ ioerr_exit:
        KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
                (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 exit:
+       if (retval && took_truncate_lock
+               && cp->c_truncatelockowner == current_thread()) {
+               fp->ff_new_size = 0;
+               rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
+       }
+
        if (cnode_locked)
                hfs_unlock(cp);
-       
+
        if (took_truncate_lock) {
                hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
        }
@@ -1744,8 +1700,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                        cp->c_bsdflags &= ~UF_TRACKED;
 
                        // mark the cnodes dirty
-                       cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
-                       to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+                       cp->c_flag |= C_MODIFIED;
+                       to_cp->c_flag |= C_MODIFIED;
 
                        int lockflags;
                        if ((error = hfs_start_transaction(hfsmp)) == 0) {
@@ -1849,6 +1805,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
        case HFS_RESIZE_VOLUME: {
                u_int64_t newsize;
                u_int64_t cursize;
+               int ret;
 
                vfsp = vfs_statfs(HFSTOVFS(hfsmp));
                if (suser(cred, NULL) &&
@@ -1865,14 +1822,18 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                }
                newsize = *(u_int64_t *)ap->a_data;
                cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
-               
+
+               if (newsize == cursize) {
+                       return (0);
+               }
+               IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
                if (newsize > cursize) {
-                       return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
-               } else if (newsize < cursize) {
-                       return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
+                       ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
                } else {
-                       return (0);
+                       ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
                }
+               IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
+               return (ret);
        }
        case HFS_CHANGE_NEXT_ALLOCATION: {
                int error = 0;          /* Assume success */
@@ -2105,22 +2066,6 @@ fail_change_next_allocation:
                return hfs_thaw(hfsmp, current_proc());
        }
 
-       case HFS_BULKACCESS_FSCTL: {
-           int size;
-           
-           if (hfsmp->hfs_flags & HFS_STANDARD) {
-               return EINVAL;
-           }
-
-           if (is64bit) {
-               size = sizeof(struct user64_access_t);
-           } else {
-               size = sizeof(struct user32_access_t);
-           }
-           
-           return do_bulk_access_check(hfsmp, vp, ap, size, context);
-       } 
-
        case HFS_EXT_BULKACCESS_FSCTL: {
            int size;
            
@@ -2427,7 +2372,22 @@ fail_change_next_allocation:
                }
                error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
                if (error == 0) {
-                       error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
+                       error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
+                       hfs_unlock(VTOC(vp));
+               }
+
+               return error;
+       }
+
+       case F_BARRIERFSYNC: {
+               int error;
+
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+               if (error == 0) {
+                       error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
                        hfs_unlock(VTOC(vp));
                }
 
@@ -2562,7 +2522,7 @@ fail_change_next_allocation:
                hfs_lock_mount (hfsmp);
                bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
                hfs_unlock_mount (hfsmp);
-               (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+               (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
                break;
                
        case HFS_GET_BOOT_INFO:
@@ -2604,8 +2564,8 @@ fail_change_next_allocation:
                        jnl_start = 0;
                        jnl_size  = 0;
            } else {
-                       jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
-                       jnl_size  = (off_t)hfsmp->jnl_size;
+                       jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
+                       jnl_size  = hfsmp->jnl_size;
            }
 
                jip->jstart = jnl_start;
@@ -2735,7 +2695,7 @@ fail_change_next_allocation:
                hfs_journal_lock(hfsmp);
 
                /* Flush the journal and wait for all I/Os to finish up */
-               error = hfs_journal_flush(hfsmp, TRUE);
+               error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
                if (error) {
                        hfs_journal_unlock(hfsmp);
                        return error;
@@ -2775,6 +2735,91 @@ fail_change_next_allocation:
                break;
        }
 
+       case HFS_SET_HOTFILE_STATE: {
+               int error;
+               struct cnode *cp = VTOC(vp);
+               uint32_t hf_state = *((uint32_t*)ap->a_data);
+               uint32_t num_unpinned = 0;
+               
+               error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+               if (error) {
+                       return error;
+               }
+
+               // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
+               if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
+                       vnode_setfastdevicecandidate(vp);
+
+                       cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
+                       cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
+                       cp->c_flag |= C_MODIFIED;
+               } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
+                       vnode_clearfastdevicecandidate(vp);
+                       hfs_removehotfile(vp);
+
+                       if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
+                               hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned, ap->a_context);
+                       }
+                               
+                       if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
+                               cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+                       }
+                       cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
+                       cp->c_flag |= C_MODIFIED;
+
+               } else {
+                       error = EINVAL;
+               }
+
+               if (num_unpinned != 0) {
+                       lck_mtx_lock(&hfsmp->hfc_mutex);
+                       hfsmp->hfs_hotfile_freeblks += num_unpinned;
+                       lck_mtx_unlock(&hfsmp->hfc_mutex);
+               }
+
+               hfs_unlock(cp);
+               return error;
+               break;
+       }
+
+       case HFS_REPIN_HOTFILE_STATE: {
+               int error=0;
+               uint32_t repin_what = *((uint32_t*)ap->a_data);
+
+               /* Only root allowed */
+               if (!kauth_cred_issuser(kauth_cred_get())) {
+                       return EACCES;
+               }
+
+               if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
+                       // this system is neither regular Fusion or Cooperative Fusion
+                       // so this fsctl makes no sense.
+                       return EINVAL;
+               }
+
+               //
+               // After a converting a CoreStorage volume to be encrypted, the
+               // extents could have moved around underneath us.  This call
+               // allows corestoraged to re-pin everything that should be
+               // pinned (it would happen on the next reboot too but that could
+               // be a long time away).
+               //
+               if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
+                       hfs_pin_fs_metadata(hfsmp);
+               }
+               if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+                       hfs_repin_hotfiles(hfsmp);
+               }
+               if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
+                       //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
+                       //XXX Do we care? They have a more transient/dynamic nature/lifetime.
+               }
+
+               return error;
+               break;
+       }               
+
+
        default:
                return (ENOTTY);
        }
@@ -2916,6 +2961,62 @@ hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
  * searched for mapping.
  *
  * System file cnodes are expected to be locked (shared or exclusive).
+ *
+ * -- INVALID RANGES --
+ *
+ * Invalid ranges are used to keep track of where we have extended a
+ * file, but have not yet written that data to disk.  In the past we
+ * would clear up the invalid ranges as we wrote to those areas, but
+ * before data was actually flushed to disk.  The problem with that
+ * approach is that the data can be left in the cache and is therefore
+ * still not valid on disk.  So now we clear up the ranges here, when
+ * the flags field has VNODE_WRITE set, indicating a write is about to
+ * occur.  This isn't ideal (ideally we want to clear them up when
+ * know the data has been successfully written), but it's the best we
+ * can do.
+ *
+ * For reads, we use the invalid ranges here in block map to indicate
+ * to the caller that the data should be zeroed (a_bpn == -1).  We
+ * have to be careful about what ranges we return to the cluster code.
+ * Currently the cluster code can only handle non-rounded values for
+ * the EOF; it cannot handle funny sized ranges in the middle of the
+ * file (the main problem is that it sends down odd sized I/Os to the
+ * disk).  Our code currently works because whilst the very first
+ * offset and the last offset in the invalid ranges are not aligned,
+ * gaps in the invalid ranges between the first and last, have to be
+ * aligned (because we always write page sized blocks).  For example,
+ * consider this arrangement:
+ *
+ *         +-------------+-----+-------+------+
+ *         |             |XXXXX|       |XXXXXX|
+ *         +-------------+-----+-------+------+
+ *                       a     b       c      d
+ *
+ * This shows two invalid ranges <a, b> and <c, d>.  Whilst a and d
+ * are not necessarily aligned, b and c *must* be.
+ *
+ * Zero-filling occurs in a number of ways:
+ *
+ *   1. When a read occurs and we return with a_bpn == -1.
+ *
+ *   2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
+ *      which will cause us to iterate over the ranges bringing in
+ *      pages that are not present in the cache and zeroing them.  Any
+ *      pages that are already in the cache are left untouched.  Note
+ *      that hfs_fsync does not always flush invalid ranges.
+ *
+ *   3. When we extend a file we zero out from the old EOF to the end
+ *      of the page.  It would be nice if we didn't have to do this if
+ *      the page wasn't present (and could defer it), but because of
+ *      the problem described above, we have to.
+ *
+ * The invalid ranges are also used to restrict the size that we write
+ * out on disk: see hfs_prepare_fork_for_update.
+ *
+ * Note that invalid ranges are ignored when neither the VNODE_READ or
+ * the VNODE_WRITE flag is specified.  This is useful for the
+ * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
+ * just want to know whether blocks are physically allocated or not.
  */
 int
 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
@@ -2936,7 +3037,7 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
        struct cnode *cp;
        struct filefork *fp;
        struct hfsmount *hfsmp;
-       size_t bytesContAvail = 0;
+       size_t bytesContAvail = ap->a_size;
        int retval = E_NONE;
        int syslocks = 0;
        int lockflags = 0;
@@ -2977,17 +3078,110 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
        if (ap->a_bpn == NULL)
                return (0);
 
+       hfsmp = VTOHFS(vp);
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+
        if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
-               if (VTOC(vp)->c_lockowner != current_thread()) {
+               if (cp->c_lockowner != current_thread()) {
                        hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                        tooklock = 1;
                }
+
+               // For reads, check the invalid ranges
+               if (ISSET(ap->a_flags, VNODE_READ)) {
+                       if (ap->a_foffset >= fp->ff_size) {
+                               retval = ERANGE;
+                               goto exit;
+                       }
+
+                       overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+                                                                 ap->a_foffset + (off_t)bytesContAvail - 1,
+                                                                 &invalid_range);
+                       switch(overlaptype) {
+                               case RL_MATCHINGOVERLAP:
+                               case RL_OVERLAPCONTAINSRANGE:
+                               case RL_OVERLAPSTARTSBEFORE:
+                                       /* There's no valid block for this byte offset */
+                                       *ap->a_bpn = (daddr64_t)-1;
+                                       /* There's no point limiting the amount to be returned
+                                        * if the invalid range that was hit extends all the way
+                                        * to the EOF (i.e. there's no valid bytes between the
+                                        * end of this range and the file's EOF):
+                                        */
+                                       if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+                                               ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+                                               bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+                                       }
+
+                                       retval = 0;
+                                       goto exit;
+
+                               case RL_OVERLAPISCONTAINED:
+                               case RL_OVERLAPENDSAFTER:
+                                       /* The range of interest hits an invalid block before the end: */
+                                       if (invalid_range->rl_start == ap->a_foffset) {
+                                               /* There's actually no valid information to be had starting here: */
+                                               *ap->a_bpn = (daddr64_t)-1;
+                                               if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+                                                       ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+                                                       bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+                                               }
+
+                                               retval = 0;
+                                               goto exit;
+                                       } else {
+                                               /*
+                                                * Sadly, the lower layers don't like us to
+                                                * return unaligned ranges, so we skip over
+                                                * any invalid ranges here that are less than
+                                                * a page: zeroing of those bits is not our
+                                                * responsibility (it's dealt with elsewhere).
+                                                */
+                                               do {
+                                                       off_t rounded_start = round_page_64(invalid_range->rl_start);
+                                                       if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
+                                                               break;
+                                                       if (rounded_start < invalid_range->rl_end + 1) {
+                                                               bytesContAvail = rounded_start - ap->a_foffset;
+                                                               break;
+                                                       }
+                                               } while ((invalid_range = TAILQ_NEXT(invalid_range,
+                                                                                                                        rl_link)));
+                                       }
+                                       break;
+
+                               case RL_NOOVERLAP:
+                                       break;
+                       } // switch
+               }
        }
-       hfsmp = VTOHFS(vp);
-       cp = VTOC(vp);
-       fp = VTOF(vp);
+
+#if CONFIG_PROTECT
+       if (cp->c_cpentry) {
+               const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
+                                                          ? VNODE_WRITE : VNODE_READ);
+
+               cp_io_params_t io_params;
+               cp_io_params(hfsmp, cp->c_cpentry,
+                                        off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
+                                        direction, &io_params);
+
+               if (io_params.max_len < (off_t)bytesContAvail)
+                       bytesContAvail = io_params.max_len;
+
+               if (io_params.phys_offset != -1) {
+                       *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
+                                                 / hfsmp->hfs_logical_block_size);
+
+                       retval = 0;
+                       goto exit;
+               }
+       }
+#endif
 
 retry:
+
        /* Check virtual blocks only when performing write operation */
        if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
                if (hfs_start_transaction(hfsmp) != 0) {
@@ -3049,7 +3243,7 @@ retry:
                        hfs_systemfile_unlock(hfsmp, lockflags);
                        cp->c_flag |= C_MODIFIED;
                        if (started_tr) {
-                               (void) hfs_update(vp, TRUE);
+                               (void) hfs_update(vp, 0);
                                (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 
                                hfs_end_transaction(hfsmp);
@@ -3059,19 +3253,13 @@ retry:
                }
        }
 
-       retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
+       retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
                               ap->a_bpn, &bytesContAvail);
        if (syslocks) {
                hfs_systemfile_unlock(hfsmp, lockflags);
                syslocks = 0;
        }
 
-       if (started_tr) {
-               (void) hfs_update(vp, TRUE);
-               (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
-               hfs_end_transaction(hfsmp);
-               started_tr = 0;
-       }       
        if (retval) {
                /* On write, always return error because virtual blocks, if any, 
                 * should have been allocated in ExtendFileC().  We do not 
@@ -3083,42 +3271,28 @@ retry:
                    (ap->a_flags & VNODE_WRITE) ||
                    ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
                        goto exit;
-               } 
-               
+               }
+
                /* Validate if the start offset is within logical file size */
                if (ap->a_foffset >= fp->ff_size) {
                        goto exit;
                }
 
                /*
-                * At this point, we have encountered a failure during 
-                * MapFileBlockC that resulted in ERANGE, and we are not servicing
-                * a write, and there are borrowed blocks.
-                * 
-                * However, the cluster layer will not call blockmap for 
-                * blocks that are borrowed and in-cache.  We have to assume that 
-                * because we observed ERANGE being emitted from MapFileBlockC, this 
-                * extent range is not valid on-disk.  So we treat this as a 
-                * mapping that needs to be zero-filled prior to reading.  
+                * At this point, we have encountered a failure during
+                * MapFileBlockC that resulted in ERANGE, and we are not
+                * servicing a write, and there are borrowed blocks.
                 *
-                * Note that under certain circumstances (such as non-contiguous 
-                * userland VM mappings in the calling process), cluster_io 
-                * may be forced to split a large I/O driven by hfs_vnop_write 
-                * into multiple sub-I/Os that necessitate a RMW cycle.  If this is
-                * the case here, then we have already removed the invalid range list
-                * mapping prior to getting to this blockmap call, so we should not
-                * search the invalid rangelist for this byte range.
-                */
-
-               bytesContAvail = fp->ff_size - ap->a_foffset;
-               /*
-                * Clip the contiguous available bytes to, at most, the allowable
-                * maximum or the amount requested.
+                * However, the cluster layer will not call blockmap for
+                * blocks that are borrowed and in-cache.  We have to assume
+                * that because we observed ERANGE being emitted from
+                * MapFileBlockC, this extent range is not valid on-disk.  So
+                * we treat this as a mapping that needs to be zero-filled
+                * prior to reading.
                 */
 
-               if (bytesContAvail > ap->a_size) {
-                       bytesContAvail = ap->a_size;
-               }
+               if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
+                       bytesContAvail = fp->ff_size - ap->a_foffset;
 
                *ap->a_bpn = (daddr64_t) -1;
                retval = 0;
@@ -3126,54 +3300,42 @@ retry:
                goto exit;
        }
 
-       /* MapFileC() found a valid extent in the filefork.  Search the 
-        * mapping information further for invalid file ranges 
-        */
-       overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
-                             ap->a_foffset + (off_t)bytesContAvail - 1,
-                             &invalid_range);
-       if (overlaptype != RL_NOOVERLAP) {
-               switch(overlaptype) {
-               case RL_MATCHINGOVERLAP:
-               case RL_OVERLAPCONTAINSRANGE:
-               case RL_OVERLAPSTARTSBEFORE:
-                       /* There's no valid block for this byte offset */
-                       *ap->a_bpn = (daddr64_t)-1;
-                       /* There's no point limiting the amount to be returned
-                        * if the invalid range that was hit extends all the way 
-                        * to the EOF (i.e. there's no valid bytes between the
-                        * end of this range and the file's EOF):
-                        */
-                       if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
-                           ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
-                               bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
-                       }
-                       break;
-       
-               case RL_OVERLAPISCONTAINED:
-               case RL_OVERLAPENDSAFTER:
-                       /* The range of interest hits an invalid block before the end: */
-                       if (invalid_range->rl_start == ap->a_foffset) {
-                               /* There's actually no valid information to be had starting here: */
-                               *ap->a_bpn = (daddr64_t)-1;
-                               if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
-                                   ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
-                                       bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+exit:
+       if (retval == 0) {
+               if (ISSET(ap->a_flags, VNODE_WRITE)) {
+                       struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
+
+                       // See if we might be overlapping invalid ranges...
+                       if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
+                               /*
+                                * Mark the file as needing an update if we think the
+                                * on-disk EOF has changed.
+                                */
+                               if (ap->a_foffset <= r->rl_start)
+                                       SET(cp->c_flag, C_MODIFIED);
+
+                               /*
+                                * This isn't the ideal place to put this.  Ideally, we
+                                * should do something *after* we have successfully
+                                * written to the range, but that's difficult to do
+                                * because we cannot take locks in the callback.  At
+                                * present, the cluster code will call us with VNODE_WRITE
+                                * set just before it's about to write the data so we know
+                                * that data is about to be written.  If we get an I/O
+                                * error at this point then chances are the metadata
+                                * update to follow will also have an I/O error so the
+                                * risk here is small.
+                                */
+                               rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
+                                                 &fp->ff_invalidranges);
+
+                               if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
+                                       cp->c_flag &= ~C_ZFWANTSYNC;
+                                       cp->c_zftimeout = 0;
                                }
-                       } else {
-                               bytesContAvail = invalid_range->rl_start - ap->a_foffset;
                        }
-                       break;
+               }
 
-               case RL_NOOVERLAP:
-                       break;
-               } /* end switch */
-               if (bytesContAvail > ap->a_size)
-                       bytesContAvail = ap->a_size;
-       } 
-               
-exit:
-       if (retval == 0) {
                if (ap->a_run)
                        *ap->a_run = bytesContAvail;
 
@@ -3181,6 +3343,13 @@ exit:
                        *(int *)ap->a_poff = 0;
        }
 
+       if (started_tr) {
+               hfs_update(vp, TRUE);
+               hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+               hfs_end_transaction(hfsmp);
+               started_tr = 0;
+       }
+
        if (tooklock)
                hfs_unlock(cp);
 
@@ -3216,86 +3385,17 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
        }
        
 #if CONFIG_PROTECT
-       cnode_t *cp = NULL; 
-       
-       if ((!bufattr_rawencrypted(&bp->b_attr)) && 
-                       ((cp = cp_get_protected_cnode(vp)) != NULL)) {
-               /* 
-                * We rely upon the truncate lock to protect the
-                * CP cache key from getting tossed prior to our IO finishing here.
-                * Nearly all cluster io calls to manipulate file payload from HFS
-                * take the truncate lock before calling into the cluster
-                * layer to ensure the file size does not change, or that they
-                * have exclusive right to change the EOF of the file.  
-                * That same guarantee protects us here since the code that
-                * deals with CP lock events must now take the truncate lock 
-                * before doing anything. 
-                *
-                * There is 1 exception here:
-                * 1) One exception should be the VM swapfile IO, because HFS will
-                * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
-                * swapfile code only without holding the truncate lock.  This is because
-                * individual swapfiles are maintained at fixed-length sizes by the VM code.
-                * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to 
-                * create our own UPL and thus take the truncate lock before calling 
-                * into the cluster layer.  In that case, however, we are not concerned 
-                * with the CP blob being wiped out in the middle of the IO 
-                * because there isn't anything to toss; the VM swapfile key stays
-                * in-core as long as the file is open. 
-                */
-               
-               
-               /*
-                * Last chance: If this data protected I/O does not have unwrapped keys
-                * present, then try to get them.  We already know that it should, by this point.
-                */
-               if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) {
-                       int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS);
-                       if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) {
-                               /*
-                                * We have to be careful here.  By this point in the I/O path, VM or the cluster
-                                * engine has prepared a buf_t with the proper file offsets and all the rest,
-                                * so simply erroring out will result in us leaking this particular buf_t.
-                                * We need to properly decorate the buf_t just as buf_strategy would so as 
-                                * to make it appear that the I/O errored out with the particular error code.
-                                */
-                               buf_seterror (bp, error);
-                               buf_biodone(bp);
-                               return error;
-                       }
-               }
-               
-               /*
-                *NB:
-                * For filesystem resize, we may not have access to the underlying
-                * file's cache key for whatever reason (device may be locked).  However,
-                * we do not need it since we are going to use the temporary HFS-wide resize key
-                * which is generated once we start relocating file content.  If this file's I/O 
-                * should be done using the resize key, it will have been supplied already, so
-                * do not attach the file's cp blob to the buffer. 
-                */
-               if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
-                       buf_setcpaddr(bp, cp->c_cpentry);
-               }
-       }
-#endif /* CONFIG_PROTECT */
+       error = cp_handle_strategy(bp);
+
+       if (error)
+               return error;
+#endif
        
        error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
        
        return error;
 }
 
-static int 
-hfs_minorupdate(struct vnode *vp) {
-       struct cnode *cp = VTOC(vp);
-       cp->c_flag &= ~C_MODIFIED;
-       cp->c_touch_acctime = 0;
-       cp->c_touch_chgtime = 0;
-       cp->c_touch_modtime = 0;
-       
-       return 0;
-}
-
 int
 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
 {
@@ -3310,7 +3410,6 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
        int blksize;
        struct hfsmount *hfsmp;
        int lockflags;
-       int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
        int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
 
        blksize = VTOVCB(vp)->blockSize;
@@ -3419,13 +3518,8 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
                        hfs_systemfile_unlock(hfsmp, lockflags);
 
                        if (hfsmp->jnl) {
-                               if (skipupdate) {
-                                       (void) hfs_minorupdate(vp);
-                               }
-                               else {  
-                                       (void) hfs_update(vp, TRUE);
-                                       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
-                               }
+                               hfs_update(vp, 0);
+                               hfs_volupdate(hfsmp, VOL_UPDATE, 0);
                        }
 
                        hfs_end_transaction(hfsmp);
@@ -3443,47 +3537,22 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
                                rl_remove_all(&fp->ff_invalidranges);
                } else {
                        if (UBCINFOEXISTS(vp)  && (vnode_issystem(vp) == 0) && retval == E_NONE) {
-                               struct rl_entry *invalid_range;
-                               off_t zero_limit;
-                       
-                               zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
-                               if (length < zero_limit) zero_limit = length;
-
                                if (length > (off_t)fp->ff_size) {
                                        struct timeval tv;
 
                                        /* Extending the file: time to fill out the current last page w. zeroes? */
-                                       if ((fp->ff_size & PAGE_MASK_64) &&
-                                           (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
-                                           fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
-                                               
-                                               /* There's some valid data at the start of the (current) last page
+                                       if (fp->ff_size & PAGE_MASK_64) {
+                                               /* There might be some valid data at the start of the (current) last page
                                                   of the file, so zero out the remainder of that page to ensure the
-                                                  entire page contains valid data.  Since there is no invalid range
-                                                  possible past the (current) eof, there's no need to remove anything
-                                                  from the invalid range list before calling cluster_write():  */
+                                                  entire page contains valid data. */
                                                hfs_unlock(cp);
-                                               retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
-                                                               fp->ff_size, (off_t)0,
-                                                               (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
+                                               retval = hfs_zero_eof_page(vp, length);
                                                hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                                                if (retval) goto Err_Exit;
-                                               
-                                               /* Merely invalidate the remaining area, if necessary: */
-                                               if (length > zero_limit) {
-                                                       microuptime(&tv);
-                                                       rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
-                                                       cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
-                                               }
-                                       } else {
-                                       /* The page containing the (current) eof is invalid: just add the
-                                          remainder of the page to the invalid list, along with the area
-                                          being newly allocated:
-                                        */
+                                       }
                                        microuptime(&tv);
                                        rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
                                        cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
-                                       };
                                }
                        } else {
                                        panic("hfs_truncate: invoked on non-UBC object?!");
@@ -3533,9 +3602,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
                        hfs_unlock_mount (hfsmp);
                }
 
-#if QUOTA
                off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
-#endif /* QUOTA */
                if (hfs_start_transaction(hfsmp) != 0) {
                        retval = EINVAL;
                        goto Err_Exit;
@@ -3557,13 +3624,8 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
                        if (retval == 0) {
                                fp->ff_size = length;
                        }
-                       if (skipupdate) {
-                               (void) hfs_minorupdate(vp);
-                       }
-                       else {
-                               (void) hfs_update(vp, TRUE);
-                               (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
-                       }
+                       hfs_update(vp, 0);
+                       hfs_volupdate(hfsmp, VOL_UPDATE, 0);
                }
                hfs_end_transaction(hfsmp);
 
@@ -3575,6 +3637,15 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
                (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
 #endif /* QUOTA */
 
+               //
+               // Unlike when growing a file, we adjust the hotfile block count here
+               // instead of deeper down in the block allocation code because we do
+               // not necessarily have a vnode or "fcb" at the time we're deleting
+               // the file and so we wouldn't know if it was hotfile cached or not
+               //
+               hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
+
+
                /* 
                 * Only set update flag if the logical length changes & we aren't
                 * suppressing modtime updates.
@@ -3585,30 +3656,24 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf
                fp->ff_size = length;
        }
        if (cp->c_mode & (S_ISUID | S_ISGID)) {
-               if (!vfs_context_issuser(context)) {
+               if (!vfs_context_issuser(context))
                        cp->c_mode &= ~(S_ISUID | S_ISGID);
-                       skipupdate = 0;
-               }
        }
-       if (skipupdate) {
-               retval = hfs_minorupdate(vp);
-       }
-       else {
-               cp->c_touch_chgtime = TRUE;     /* status changed */
-               if (suppress_times == 0) {
-                       cp->c_touch_modtime = TRUE;     /* file data was modified */
-               
-                       /* 
-                        * If we are not suppressing the modtime update, then
-                        * update the gen count as well.
-                        */
-                       if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
-                               hfs_incr_gencount(cp);
-                       }
-               }
+       cp->c_flag |= C_MODIFIED;
+       cp->c_touch_chgtime = TRUE;     /* status changed */
+       if (suppress_times == 0) {
+               cp->c_touch_modtime = TRUE;     /* file data was modified */
 
-               retval = hfs_update(vp, MNT_WAIT);
+               /*
+                * If we are not suppressing the modtime update, then
+                * update the gen count as well.
+                */
+               if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
+                       hfs_incr_gencount(cp);
+               }
        }
+
+       retval = hfs_update(vp, 0);
        if (retval) {
                KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
                     -1, -1, -1, retval, 0);
@@ -3728,10 +3793,12 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
        
        /* Data Fork */
        if (datafork) {
+               off_t prev_filebytes;
                datafork->ff_size = 0;
 
                fileblocks = datafork->ff_blocks;
-               filebytes = (off_t)fileblocks * (off_t)blksize;         
+               filebytes = (off_t)fileblocks * (off_t)blksize;
+               prev_filebytes = filebytes;
                
                /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
                
@@ -3761,6 +3828,12 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
                        }
                        (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
                        
+                       struct cnode *cp = datafork ? FTOC(datafork) : NULL;
+                       struct vnode *vp;
+                       vp = cp ? CTOV(cp, 0) : NULL;
+                       hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
+                       prev_filebytes = filebytes;
+                       
                        /* Finish the transaction and start over if necessary */
                        hfs_end_transaction(hfsmp);
                        
@@ -3864,6 +3937,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags,
        int blksize;
        errno_t error = 0;
        struct cnode *cp = VTOC(vp);
+       hfsmount_t *hfsmp = VTOHFS(vp);
 
        /* Cannot truncate an HFS directory! */
        if (vnode_isdir(vp)) {
@@ -3874,7 +3948,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags,
                return (EPERM);
        }
 
-       blksize = VTOVCB(vp)->blockSize;
+       blksize = hfsmp->blockSize;
        fileblocks = fp->ff_blocks;
        filebytes = (off_t)fileblocks * (off_t)blksize;
 
@@ -3901,22 +3975,28 @@ hfs_truncate(struct vnode *vp, off_t length, int flags,
                        } else {
                                filebytes = length;
                        }
-                       cp->c_flag |= C_FORCEUPDATE;
                        error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
                        if (error)
                                break;
                }
        } else if (length > filebytes) {
-               while (filebytes < length) {
-                       if ((length - filebytes) > HFS_BIGFILE_SIZE) {
-                               filebytes += HFS_BIGFILE_SIZE;
-                       } else {
-                               filebytes = length;
+               kauth_cred_t cred = vfs_context_ucred(context);
+               const bool keep_reserve = cred && suser(cred, NULL) != 0;
+
+               if (hfs_freeblks(hfsmp, keep_reserve)
+                       < howmany(length - filebytes, blksize)) {
+                       error = ENOSPC;
+               } else {
+                       while (filebytes < length) {
+                               if ((length - filebytes) > HFS_BIGFILE_SIZE) {
+                                       filebytes += HFS_BIGFILE_SIZE;
+                               } else {
+                                       filebytes = length;
+                               }
+                               error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
+                               if (error)
+                                       break;
                        }
-                       cp->c_flag |= C_FORCEUPDATE;
-                       error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
-                       if (error)
-                               break;
                }
        } else /* Same logical size */ {
 
@@ -3927,6 +4007,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags,
                fp->ff_bytesread = 0;
        }
 
+
        if (!caller_has_cnode_lock)
                hfs_unlock(cp);
 
@@ -4032,6 +4113,13 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
         * value of filebytes is 0, length will be at least 1.
         */
        if (length > filebytes) {
+               if (ISSET(extendFlags, kEFAllMask)
+                       && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
+                               < howmany(length - filebytes, hfsmp->blockSize))) {
+                       retval = ENOSPC;
+                       goto Err_Exit;
+               }
+
                off_t total_bytes_added = 0, orig_request_size;
 
                orig_request_size = moreBytesRequested = length - filebytes;
@@ -4109,7 +4197,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
                    hfs_systemfile_unlock(hfsmp, lockflags);
 
                    if (hfsmp->jnl) {
-                       (void) hfs_update(vp, TRUE);
+                       (void) hfs_update(vp, 0);
                        (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
                    }
 
@@ -4169,9 +4257,10 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
        }
 
 Std_Exit:
+       cp->c_flag |= C_MODIFIED;
        cp->c_touch_chgtime = TRUE;
        cp->c_touch_modtime = TRUE;
-       retval2 = hfs_update(vp, MNT_WAIT);
+       retval2 = hfs_update(vp, 0);
 
        if (retval == 0)
                retval = retval2;
@@ -4416,12 +4505,19 @@ retry_pagein:
                                 * compressed once the compression lock is successfully taken
                                 * i.e. we would block on that lock while the file is being inflated
                                 */
+                               if (error == 0 && vnode_isfastdevicecandidate(vp)) {
+                                       (void) hfs_addhotfile(vp);
+                               }
                                if (compressed) {
                                        if (error == 0) {
                                                /* successful page-in, update the access time */
                                                VTOC(vp)->c_touch_acctime = TRUE;
                                        
-                                               /* compressed files are not hot file candidates */
+                                               //
+                                               // compressed files are not traditional hot file candidates
+                                               // but they may be for CF (which ignores the ff_bytesread
+                                               // field)
+                                               //
                                                if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
                                                        fp->ff_bytesread = 0;
                                                }
@@ -4514,6 +4610,10 @@ retry_pagein:
                                fp->ff_bytesread += bytesread;
                        }
                        cp->c_touch_acctime = TRUE;
+
+                       if (vnode_isfastdevicecandidate(vp)) {
+                               (void) hfs_addhotfile(vp);
+                       }
                        if (took_cnode_lock)
                                hfs_unlock(cp);
                }
@@ -4567,16 +4667,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
        cp = VTOC(vp);
        fp = VTOF(vp);
        
-       /*
-        * Figure out where the file ends, for pageout purposes.  If
-        * ff_new_size > ff_size, then we're in the middle of extending the
-        * file via a write, so it is safe (and necessary) that we be able
-        * to pageout up to that point.
-        */
-       filesize = fp->ff_size;
-       if (fp->ff_new_size > filesize)
-               filesize = fp->ff_new_size;
-
        a_flags = ap->a_flags;
        a_pl_offset = ap->a_pl_offset;
 
@@ -4628,6 +4718,16 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
         * it was either passed in or we succesfully created it
         */
 
+       /*
+        * Figure out where the file ends, for pageout purposes.  If
+        * ff_new_size > ff_size, then we're in the middle of extending the
+        * file via a write, so it is safe (and necessary) that we be able
+        * to pageout up to that point.
+        */
+       filesize = fp->ff_size;
+       if (fp->ff_new_size > filesize)
+               filesize = fp->ff_new_size;
+
        /* 
         * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own  
         * UPL instead of relying on the UPL passed into us.  We go ahead and do that here,
@@ -4718,42 +4818,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                        }
                        xsize = num_of_pages * PAGE_SIZE;
 
-                       if (!vnode_isswap(vp)) {
-                               off_t end_of_range;
-                               int tooklock;
-
-                               tooklock = 0;
-
-                               if (cp->c_lockowner != current_thread()) {
-                                       if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-                                               /*
-                                                * we're in the v2 path, so we are the
-                                                * owner of the UPL... we may have already
-                                                * processed some of the UPL, so abort it
-                                                * from the current working offset to the
-                                                * end of the UPL
-                                                */
-                                               ubc_upl_abort_range(upl,
-                                                                   offset,
-                                                                   ap->a_size - offset,
-                                                                   UPL_ABORT_FREE_ON_EMPTY);
-                                               goto pageout_done;
-                                       }
-                                       tooklock = 1;
-                               }
-                               end_of_range = f_offset + xsize - 1;
-       
-                               if (end_of_range >= filesize) {
-                                       end_of_range = (off_t)(filesize - 1);
-                               }
-                               if (f_offset < filesize) {
-                                       rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
-                                       cp->c_flag |= C_MODIFIED;  /* leof is dirty */
-                               }
-                               if (tooklock) {
-                                       hfs_unlock(cp);
-                               }
-                       }
                        if ((error = cluster_pageout(vp, upl, offset, f_offset,
                                                        xsize, filesize, a_flags))) {
                                if (error_ret == 0)
@@ -4770,36 +4834,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                }
        } /* end block for v2 pageout behavior */
        else {
-               if (!vnode_isswap(vp)) {
-                       off_t end_of_range;
-                       int tooklock = 0;
-
-                       if (cp->c_lockowner != current_thread()) {
-                               if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
-                                       if (!(a_flags & UPL_NOCOMMIT)) {
-                                               ubc_upl_abort_range(upl,
-                                                                   a_pl_offset,
-                                                                   ap->a_size,
-                                                                   UPL_ABORT_FREE_ON_EMPTY);
-                                       }
-                                       goto pageout_done;
-                               }
-                               tooklock = 1;
-                       }
-                       end_of_range = ap->a_f_offset + ap->a_size - 1;
-       
-                       if (end_of_range >= filesize) {
-                               end_of_range = (off_t)(filesize - 1);
-                       }
-                       if (ap->a_f_offset < filesize) {
-                               rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
-                               cp->c_flag |= C_MODIFIED;  /* leof is dirty */
-                       }
-
-                       if (tooklock) {
-                               hfs_unlock(cp);
-                       }
-               }
                /* 
                 * just call cluster_pageout for old pre-v2 behavior
                 */
@@ -4911,6 +4945,168 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
        return (retval);
 }
 
+
+int
+hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks, vfs_context_t ctx)
+{
+       _dk_cs_pin_t pin;
+       unsigned ioc;
+       int err;
+
+       memset(&pin, 0, sizeof(pin));
+       pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
+       pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
+       switch (pin_state) {
+       case HFS_PIN_IT:
+               ioc = _DKIOCCSPINEXTENT;
+               pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
+               break;
+       case HFS_PIN_IT | HFS_TEMP_PIN:
+               ioc = _DKIOCCSPINEXTENT;
+               pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
+               break;
+       case HFS_PIN_IT | HFS_DATALESS_PIN:
+               ioc = _DKIOCCSPINEXTENT;
+               pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
+               break;
+       case HFS_UNPIN_IT:
+               ioc = _DKIOCCSUNPINEXTENT;
+               pin.cp_flags = 0;
+               break;
+       case HFS_UNPIN_IT | HFS_EVICT_PIN:
+               ioc = _DKIOCCSPINEXTENT;
+               pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
+               break;
+       default:
+               return EINVAL;
+       }
+       err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, ctx);
+       return err;
+}
+
+//
+// The cnode lock should already be held on entry to this function
+//
+int
+hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned, vfs_context_t ctx)
+{
+       struct filefork *fp = VTOF(vp);
+       int i, err=0, need_put=0;
+       struct vnode *rsrc_vp=NULL;
+       uint32_t npinned = 0;
+       off_t               offset;
+
+       if (num_blocks_pinned) {
+               *num_blocks_pinned = 0;
+       }
+       
+       if (vnode_vtype(vp) != VREG) {
+               /* Not allowed to pin directories or symlinks */
+               printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
+               return (EPERM);
+       }
+       
+       if (fp->ff_unallocblocks) {
+               printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
+               return (EINVAL);
+       }
+
+       /*
+        * It is possible that if the caller unlocked/re-locked the cnode after checking
+        * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
+        * cnode was unlocked.  So check the condition again and return ENOENT so that
+        * the caller knows why we failed to pin the vnode. 
+        */
+       if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
+               // makes no sense to pin something that's pending deletion
+               return ENOENT;
+       }
+
+       if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+               if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
+                       //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
+                       //       VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
+
+                       fp = VTOC(rsrc_vp)->c_rsrcfork;
+                       need_put = 1;
+               }
+       }
+       if (fp->ff_blocks == 0) {
+               if (need_put) {
+                       //
+                       // use a distinct error code for a compressed file that has no resource fork;
+                       // we return EALREADY to indicate that the data is already probably hot file
+                       // cached because it's in an EA and the attributes btree is on the ssd
+                       // 
+                       err = EALREADY;
+               } else {
+                       err = EINVAL;
+               }
+               goto out;
+       }
+
+       offset = 0;
+       for (i = 0; i < kHFSPlusExtentDensity; i++) {
+               if (fp->ff_extents[i].startBlock == 0) {
+                       break;
+               }
+
+               err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, ctx);
+               if (err) {
+                       break;
+               } else {
+                       npinned += fp->ff_extents[i].blockCount;                        
+               }
+       }
+       
+       if (err || npinned == 0) {
+               goto out;
+       }
+
+       if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
+               uint32_t pblocks;
+               uint8_t forktype = 0;
+
+               if (fp == VTOC(vp)->c_rsrcfork) {
+                       forktype = 0xff;
+               }
+               /*
+                * The file could have overflow extents, better pin them.
+                *
+                * We assume that since we are holding the cnode lock for this cnode,
+                * the files extents cannot be manipulated, but the tree could, so we
+                * need to ensure that it doesn't change behind our back as we iterate it.
+                */
+               int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+               err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
+               hfs_systemfile_unlock (hfsmp, lockflags);
+
+               if (err) {
+                       goto out;
+               }
+               npinned += pblocks;
+       }
+
+out:
+       if (num_blocks_pinned) {
+               *num_blocks_pinned = npinned;
+       }
+       
+       if (need_put && rsrc_vp) {
+               //
+               // have to unlock the cnode since it's shared between the
+               // resource fork vnode and the data fork vnode (and the
+               // vnode_put() may need to re-acquire the cnode lock to
+               // reclaim the resource fork vnode)
+               //
+               hfs_unlock(VTOC(vp));
+               vnode_put(rsrc_vp);
+               hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+       }
+       return err;
+}
+
+
 /*
  * Relocate a file to a new location on disk
  *  cnode must be locked on entry
@@ -5170,13 +5366,13 @@ out:
 
        /* Push cnode's new extent data to disk. */
        if (retval == 0) {
-               (void) hfs_update(vp, MNT_WAIT);
+               hfs_update(vp, 0);
        }
        if (hfsmp->jnl) {
                if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
-                       (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+                       (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
                else
-                       (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                       (void) hfs_flushvolumeheader(hfsmp, 0);
        }
 exit:
        if (started_tr)
@@ -5242,7 +5438,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
        }
 #endif /* CONFIG_PROTECT */
 
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize, VM_KERN_MEMORY_FILE)) {
                hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                return (ENOMEM);
        }
@@ -5336,7 +5532,7 @@ hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
        bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
        breadcnt = bufsize / iosize;
 
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize, VM_KERN_MEMORY_FILE)) {
                return (ENOMEM);
        }       
        start_blk = ((daddr64_t)blkstart * blksize) / iosize;
@@ -5397,3 +5593,90 @@ out:
 
        return (error);
 }
+
+errno_t hfs_flush_invalid_ranges(vnode_t vp)
+{
+       cnode_t *cp = VTOC(vp);
+
+       assert(cp->c_lockowner == current_thread());
+       assert(cp->c_truncatelockowner == current_thread());
+
+       if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
+               return 0;
+
+       filefork_t *fp = VTOF(vp);
+
+       /*
+        * We can't hold the cnode lock whilst we call cluster_write so we
+        * need to copy the extents into a local buffer.
+        */
+       int max_exts = 16;
+       struct ext {
+               off_t start, end;
+       } exts_buf[max_exts];           // 256 bytes
+       struct ext *exts = exts_buf;
+       int ext_count = 0;
+       errno_t ret;
+
+       struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
+
+       while (r) {
+               /* If we have more than can fit in our stack buffer, switch
+                  to a heap buffer. */
+               if (exts == exts_buf && ext_count == max_exts) {
+                       max_exts = 256;
+                       MALLOC(exts, struct ext *, sizeof(struct ext) * max_exts,
+                                  M_TEMP, M_WAITOK);
+                       memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
+               }
+
+               struct rl_entry *next = TAILQ_NEXT(r, rl_link);
+
+               exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
+
+               if (!next || (ext_count == max_exts && exts != exts_buf)) {
+                       hfs_unlock(cp);
+                       for (int i = 0; i < ext_count; ++i) {
+                               ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
+                                                                       exts[i].start, 0,
+                                                                       IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
+                               if (ret) {
+                                       hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+                                       goto exit;
+                               }
+                       }
+
+                       if (!next) {
+                               hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+                               break;
+                       }
+
+                       /* Push any existing clusters which should clean up our invalid
+                          ranges as they go through hfs_vnop_blockmap. */
+                       cluster_push(vp, 0);
+
+                       hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+
+                       /*
+                        * Get back to where we were (given we dropped the lock).
+                        * This shouldn't be many because we pushed above.
+                        */
+                       TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
+                               if (r->rl_end > exts[ext_count - 1].end)
+                                       break;
+                       }
+
+                       ext_count = 0;
+               } else
+                       r = next;
+       }
+
+       ret = 0;
+
+exit:
+
+       if (exts != exts_buf)
+               FREE(exts, M_TEMP);
+
+       return ret;
+}
index ceaa4d572b47c657daa936ac0ba696b76a5b85dc..f5dc27ad558b44b7030528e3ebdf3996774427ce 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2013-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -30,6 +30,7 @@
 #include <sys/ubc.h>
 #include <sys/vnode_internal.h>
 #include <sys/mount_internal.h>
+
 #include <sys/buf_internal.h>
 #include <vfs/vfs_journal.h>
 #include <miscfs/specfs/specdev.h>
 #include "hfs_cnode.h"
 #include "hfs_endian.h"
 #include "hfs_btreeio.h"
-
-#if CONFIG_PROTECT
-#include <sys/cprotect.h>
-#endif
+#include "hfs_cprotect.h"
 
 /* Enable/disable debugging code for live volume resizing */
 int hfs_resize_debug = 0;
 
-static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
+static errno_t hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit,
+                                                                               struct HFSPlusCatalogFile *filerec, bool *overlaps);
 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
 static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
 
@@ -170,9 +169,9 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
        hfs_unlock_mount (hfsmp);
-       
+
        /* Start with a clean journal. */
-       hfs_journal_flush(hfsmp, TRUE);
+       hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
     
        /*
         * Enclose changes inside a transaction.
@@ -371,7 +370,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        vcb->totalBlocks += addblks;
        vcb->freeBlocks += addblks;
        MarkVCBDirty(vcb);
-       error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+       error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
        if (error) {
                printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
                /*
@@ -469,7 +468,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        if (transaction_begun) {
                hfs_end_transaction(hfsmp);
-               hfs_journal_flush(hfsmp, TRUE);
+               hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
                transaction_begun = 0;
        }
     
@@ -501,9 +500,10 @@ out_noalloc:
        }
        if (transaction_begun) {
                hfs_end_transaction(hfsmp);
-               hfs_journal_flush(hfsmp, FALSE);
                /* Just to be sure, sync all data to the disk */
-               (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
+               int flush_error = hfs_flush(hfsmp, HFS_FLUSH_FULL);
+               if (flush_error && !error)
+                       error = flush_error;
        }
        if (error) {
                printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
@@ -567,7 +567,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                error = EINVAL;
                goto out;
        }
-    
+
        /*
         * Make sure that the file system has enough free blocks reclaim.
         *
@@ -614,9 +614,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                error = ENOSPC;
                goto out;
        }
-       
+
        /* Start with a clean journal. */
-       hfs_journal_flush(hfsmp, TRUE);
+       hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
        
        if (hfs_start_transaction(hfsmp) != 0) {
                error = EINVAL;
@@ -674,6 +674,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * an extent being relocated is more than the free blocks that
         * will exist after the volume is resized.
         */
+       hfsmp->reclaimBlocks = reclaimblks;
        hfsmp->freeBlocks -= reclaimblks;
        updateFreeBlocks = true;
        hfs_unlock_mount(hfsmp);
@@ -706,7 +707,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                 */
                hfs_end_transaction(hfsmp);
                transaction_begun = 0;
-        
+
                /* Attempt to reclaim some space. */
                error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
                if (error != 0) {
@@ -714,6 +715,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                        error = ENOSPC;
                        goto out;
                }
+
                if (hfs_start_transaction(hfsmp) != 0) {
                        error = EINVAL;
                        goto out;
@@ -770,7 +772,8 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        hfsmp->totalBlocks = newblkcnt;
        hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
        hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
-    
+       hfsmp->reclaimBlocks = 0;
+
        /*
         * At this point, a smaller HFS file system exists in a larger volume.
         * As per volume format, the alternate volume header is located 1024 bytes
@@ -793,7 +796,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        }
        
        MarkVCBDirty(hfsmp);
-       error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+       error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
        if (error) {
                panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
        }
@@ -839,7 +842,8 @@ out:
        if (error && (updateFreeBlocks == true)) {
                hfsmp->freeBlocks += reclaimblks;
        }
-       
+       hfsmp->reclaimBlocks = 0;
+
        if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
                hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
        }
@@ -856,9 +860,10 @@ out:
        }
        if (transaction_begun) {
                hfs_end_transaction(hfsmp);
-               hfs_journal_flush(hfsmp, FALSE);
                /* Just to be sure, sync all data to the disk */
-               (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
+               int flush_error = hfs_flush(hfsmp, HFS_FLUSH_FULL);
+               if (flush_error && !error)
+                       error = flush_error;
        }
     
        if (error) {
@@ -918,7 +923,7 @@ hfs_copy_extent(
                 u_int32_t oldStart,            /* The start of the source extent. */
                 u_int32_t newStart,            /* The start of the destination extent. */
                 u_int32_t blockCount,  /* The number of allocation blocks to copy. */
-                vfs_context_t context)
+                __unused vfs_context_t context)
 {
        int err = 0;
        size_t bufferSize;
@@ -955,36 +960,10 @@ hfs_copy_extent(
         * a special cpentry to the IOMedia/LwVM code for handling.
         */
        if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
-               int cp_err = 0;
-               /*
-                * Ideally, the file whose extents we are about to manipulate is using the
-                * newer offset-based IVs so that we can manipulate it regardless of the
-                * current lock state.  However, we must maintain support for older-style
-                * EAs.
-                *
-                * For the older EA case, the IV was tied to the device LBA for file content.
-                * This means that encrypted data cannot be moved from one location to another
-                * in the filesystem without garbling the IV data.  As a result, we need to
-                * access the file's plaintext because we cannot do our AES-symmetry trick
-                * here.  This requires that we attempt a key-unwrap here (via cp_handle_relocate)
-                * to make forward progress.  If the keys are unavailable then we will
-                * simply stop the resize in its tracks here since we cannot move
-                * this extent at this time.
-                */
-               if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
-                       cp_err = cp_handle_relocate(cp, hfsmp);
-               }
-        
-               if (cp_err) {
-                       printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
-                       return cp_err;
-               }
-        
                cpenabled = 1;
        }
 #endif
-    
-    
+
        /*
         * Determine the I/O size to use
         *
@@ -994,7 +973,7 @@ hfs_copy_extent(
         */
        vfs_ioattr(hfsmp->hfs_mp, &ioattr);
        bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
-       if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
+       if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize, VM_KERN_MEMORY_FILE))
                return ENOMEM;
     
        /* Get a buffer for doing the I/O */
@@ -1025,24 +1004,15 @@ hfs_copy_extent(
                /* Attach the new CP blob  to the buffer if needed */
 #if CONFIG_PROTECT
                if (cpenabled) {
-                       if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
-                               /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
-                               cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
-                               buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
-                       }
-                       else {
-                               /*
-                                * Use the cnode's cp key.  This file is tied to the
-                                * LBAs of the physical blocks that it occupies.
-                                */
-                               buf_setcpaddr (bp, cp->c_cpentry);
-                       }
-            
+                       /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
+                       cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
+                       bufattr_setcpx(buf_attr(bp), hfsmp->hfs_resize_cpx);
+
                        /* Initialize the content protection file offset to start at 0 */
                        buf_setcpoff (bp, 0);
                }
 #endif
-        
+
                /* Do the read */
                err = VNOP_STRATEGY(bp);
                if (!err)
@@ -1070,16 +1040,7 @@ hfs_copy_extent(
 #if CONFIG_PROTECT
                /* Attach the CP to the buffer if needed */
                if (cpenabled) {
-                       if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
-                               buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
-                       }
-                       else {
-                               /*
-                                * Use the cnode's CP key.  This file is still tied
-                                * to the LBAs of the physical blocks that it occupies.
-                                */
-                               buf_setcpaddr (bp, cp->c_cpentry);
-                       }
+                       bufattr_setcpx(buf_attr(bp), hfsmp->hfs_resize_cpx);
                        /*
                         * The last STRATEGY call may have updated the cp file offset behind our
                         * back, so we cannot trust it.  Re-initialize the content protection
@@ -1117,9 +1078,10 @@ hfs_copy_extent(
     
        /* Make sure all writes have been flushed to disk. */
        if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
-               err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
+
+               err = hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                if (err) {
-                       printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
+                       printf("hfs_copy_extent: hfs_flush failed (%d)\n", err);
                        err = 0;        /* Don't fail the copy. */
                }
        }
@@ -1901,7 +1863,7 @@ relocate_full_extent:
                        cp->c_flag |= C_MODIFIED;
                        /* If this is a system file, sync volume headers on disk */
                        if (extent_info->is_sysfile) {
-                               error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+                               error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
                        }
                }
        } else {
@@ -1944,7 +1906,7 @@ out:
         */
     if ((extent_info->catalog_fp) &&
         (extent_info->is_sysfile == false)) {
-               (void) hfs_update(extent_info->vp, MNT_WAIT);
+               hfs_update(extent_info->vp, 0);
        }
     
        hfs_end_transaction(hfsmp);
@@ -2072,7 +2034,7 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
                /* If the current vnode is system vnode, flush journal
                 * to make sure that all data is written to the disk.
                 */
-               error = hfs_journal_flush(hfsmp, TRUE);
+               error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
                if (error) {
                        printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
                        goto out;
@@ -2266,7 +2228,7 @@ out:
                FREE(extent_info->dirlink_fork, M_TEMP);
        }
        if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
-               (void) hfs_update(vp, MNT_WAIT);
+               hfs_update(vp, 0);
        }
        if (took_truncate_lock) {
                hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
@@ -2311,7 +2273,7 @@ hfs_journal_relocate_callback(void *_args)
        JournalInfoBlock *jibp;
     
        error = buf_meta_bread(hfsmp->hfs_devvp,
-                           hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
+                           (uint64_t)hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                            hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
        if (error) {
                printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
@@ -2331,9 +2293,9 @@ hfs_journal_relocate_callback(void *_args)
                return error;
        }
        if (!journal_uses_fua(hfsmp->jnl)) {
-               error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
+               error = hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                if (error) {
-                       printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
+                       printf("hfs_journal_relocate_callback: hfs_flush failed (%d)\n", error);
                        error = 0;              /* Don't fail the operation. */
                }
        }
@@ -2429,7 +2391,7 @@ hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize
        }
     
        /* Update the catalog record for .journal */
-       journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
+       journal_fork.cf_size = hfs_blk_to_bytes(newBlockCount, hfsmp->blockSize);
        journal_fork.cf_extents[0].startBlock = newStartBlock;
        journal_fork.cf_extents[0].blockCount = newBlockCount;
        journal_fork.cf_blocks = newBlockCount;
@@ -2539,7 +2501,8 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_conte
                return 0;
        }
     
-       error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
+       error = hfs_relocate_journal_file(hfsmp, hfs_blk_to_bytes(blockCount, hfsmp->blockSize),
+                                                                         HFS_RESIZE_TRUNCATE, context);
        if (error == 0) {
                hfsmp->hfs_resize_blocksmoved += blockCount;
                hfs_truncatefs_progress(hfsmp);
@@ -2596,7 +2559,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs
        
        /* Copy the old journal info block content to the new location */
        error = buf_meta_bread(hfsmp->hfs_devvp,
-                           hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
+                           (uint64_t)hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                            hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
        if (error) {
                printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
@@ -2606,7 +2569,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs
                goto free_fail;
        }
        new_bp = buf_getblk(hfsmp->hfs_devvp,
-                        newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
+                        (uint64_t)newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                         hfsmp->blockSize, 0, 0, BLK_META);
        bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
        buf_brelse(old_bp);
@@ -2618,9 +2581,9 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs
                goto free_fail;
        }
        if (!journal_uses_fua(hfsmp->jnl)) {
-               error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
+               error = hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                if (error) {
-                       printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
+                       printf("hfs_reclaim_journal_info_block: hfs_flush failed (%d)\n", error);
                        /* Don't fail the operation. */
                }
        }
@@ -2653,7 +2616,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs
        
        /* Update the pointer to the journal info block in the volume header. */
        hfsmp->vcbJinfoBlock = newBlock;
-       error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+       error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
        if (error) {
                printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
                goto fail;
@@ -2663,7 +2626,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs
        if (error) {
                printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
        }
-       error = hfs_journal_flush(hfsmp, FALSE);
+       error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
        if (error) {
                printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
        }
@@ -2964,7 +2927,7 @@ hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context
        /* Store the value to print total blocks moved by this function in end */
        prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
     
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator), VM_KERN_MEMORY_FILE)) {
                return ENOMEM;
        }
        bzero(iterator, sizeof(*iterator));
@@ -3095,7 +3058,7 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_
        /* Store the value to print total blocks moved by this function at the end */
        prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
     
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator), VM_KERN_MEMORY_FILE)) {
                error = ENOMEM;
                goto reclaim_filespace_done;
        }
@@ -3109,16 +3072,11 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_
         * end of the function.
         */
        if (cp_fs_protected (hfsmp->hfs_mp)) {
-               int needs = 0;
-               error = cp_needs_tempkeys(hfsmp, &needs);
-        
-               if ((error == 0) && (needs)) {
-                       error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
-                       if (error == 0) {
-                               keys_generated = 1;
-                       }
+               error = cpx_gentempkeys(&hfsmp->hfs_resize_cpx, hfsmp);
+               if (error == 0) {
+                       keys_generated = 1;
                }
-        
+
                if (error) {
                        printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
                        goto reclaim_filespace_done;
@@ -3151,10 +3109,14 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_
                }
         
                /* Check if any of the extents require relocation */
-               if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
+               bool overlaps;
+               error = hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec, &overlaps);
+               if (error)
+                       break;
+
+               if (!overlaps)
                        continue;
-               }
-        
+
                /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
                if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
                        if (hfs_resize_debug) {
@@ -3226,8 +3188,8 @@ reclaim_filespace_done:
     
 #if CONFIG_PROTECT
        if (keys_generated) {
-               cp_entry_destroy(hfsmp->hfs_resize_cpentry);
-               hfsmp->hfs_resize_cpentry = NULL;
+               cpx_free(hfsmp->hfs_resize_cpx);
+               hfsmp->hfs_resize_cpx = NULL;
        }
 #endif
        return error;
@@ -3263,7 +3225,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim
        }
     
        /* Just to be safe, sync the content of the journal to the disk before we proceed */
-       hfs_journal_flush(hfsmp, TRUE);
+       hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
     
        /* First, relocate journal file blocks if they're in the way.
         * Doing this first will make sure that journal relocate code
@@ -3340,7 +3302,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim
         * strictly required, but shouldn't hurt.
         */
        if (hfsmp->hfs_resize_blocksmoved) {
-               hfs_journal_flush(hfsmp, TRUE);
+               hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
        }
     
        /* Reclaim extents from catalog file records */
@@ -3356,7 +3318,25 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim
                printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
                return error;
        }
-    
+
+       /*
+        * Make sure reserved ranges in the region we're to allocate don't
+        * overlap.
+        */
+       struct rl_entry *range;
+again:;
+       int lockf = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_SHARED_LOCK);
+       TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
+               if (rl_overlap(range, hfsmp->allocLimit, RL_INFINITY) != RL_NOOVERLAP) {
+                       // Wait 100ms
+                       hfs_systemfile_unlock(hfsmp, lockf);
+                       msleep(hfs_reclaimspace, NULL, PINOD, "waiting on reserved blocks",
+                                  &(struct timespec){ 0, 100 * 1000000 });
+                       goto again;
+               }
+       }
+       hfs_systemfile_unlock(hfsmp, lockf);
+
        return error;
 }
 
@@ -3369,20 +3349,21 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaim
  *     true  - One of the extents need to be relocated
  *     false - No overflow extents need to be relocated, or there was an error
  */
-static int
-hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
+static errno_t
+hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit,
+                                                struct HFSPlusCatalogFile *filerec, bool *overlaps)
 {
        struct BTreeIterator * iterator = NULL;
        struct FSBufferDescriptor btdata;
        HFSPlusExtentRecord extrec;
        HFSPlusExtentKey *extkeyptr;
        FCB *fcb;
-       int overlapped = false;
        int i, j;
        int error;
        int lockflags = 0;
        u_int32_t endblock;
-    
+       errno_t ret = 0;
+
        /* Check if data fork overlaps the target space */
        for (i = 0; i < kHFSPlusExtentDensity; ++i) {
                if (filerec->dataFork.extents[i].blockCount == 0) {
@@ -3391,7 +3372,7 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF
                endblock = filerec->dataFork.extents[i].startBlock +
         filerec->dataFork.extents[i].blockCount;
                if (endblock > allocLimit) {
-                       overlapped = true;
+                       *overlaps = true;
                        goto out;
                }
        }
@@ -3404,19 +3385,19 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF
                endblock = filerec->resourceFork.extents[j].startBlock +
         filerec->resourceFork.extents[j].blockCount;
                if (endblock > allocLimit) {
-                       overlapped = true;
+                       *overlaps = true;
                        goto out;
                }
        }
     
        /* Return back if there are no overflow extents for this file */
        if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
+               *overlaps = false;
                goto out;
        }
     
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
-               return 0;
-       }       
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+
        bzero(iterator, sizeof(*iterator));
        extkeyptr = (HFSPlusExtentKey *)&iterator->key;
        extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
@@ -3438,9 +3419,10 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF
         */
        error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
        if (error && (error != btNotFound)) {
+               ret = MacToVFSError(error);
                goto out;
        }
-    
+
        /* BTIterateRecord() might return error if the btree is empty, and 
         * therefore we return that the extent does not overflow to the caller
         */
@@ -3457,22 +3439,29 @@ hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HF
                        }
                        endblock = extrec[i].startBlock + extrec[i].blockCount;
                        if (endblock > allocLimit) {
-                               overlapped = true;
+                               *overlaps = true;
                                goto out;
                        }
                }
                /* Look for more records. */
                error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
        }
-    
+
+       if (error && error != btNotFound) {
+               ret = MacToVFSError(error);
+               goto out;
+       }
+
+       *overlaps = false;
+
 out:
        if (lockflags) {
                hfs_systemfile_unlock(hfsmp, lockflags);
        }
-       if (iterator) {
-               kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-       }
-       return overlapped;
+
+       FREE(iterator, M_TEMP);
+
+       return ret;
 }
 
 
index a76a9a9e5fdfe6427315381472c6d1002e21f0a0..45cd1a22d053961bdcfe21a89b66bfe0034af251 100644 (file)
@@ -324,7 +324,7 @@ hfs_vnop_search(ap)
                (void) hfs_fsync(vcb->catalogRefNum, MNT_WAIT, 0, p);
                if (hfsmp->jnl) {
                    hfs_systemfile_unlock(hfsmp, lockflags);
-                   hfs_journal_flush(hfsmp, FALSE);
+                   hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
                    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
                }
 
@@ -332,6 +332,7 @@ hfs_vnop_search(ap)
                bzero((caddr_t)myCatPositionPtr, sizeof(*myCatPositionPtr));
                err = BTScanInitialize(catalogFCB, 0, 0, 0, kCatSearchBufferSize, &myBTScanState);
                if (err) {
+                       hfs_systemfile_unlock(hfsmp, lockflags);
                        goto ExitThisRoutine;
                }
        } else {
index 9df531ab8f2e92d5e2ecc67292614b11631ab628..a819362bb5eca73af012c12804f1fa6e8e9fee13 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include "hfs_quota.h"
 #include "hfs_btreeio.h"
 #include "hfs_kdebug.h"
+#include "hfs_cprotect.h"
 
 #include "hfscommon/headers/FileMgrInternal.h"
 #include "hfscommon/headers/BTreesInternal.h"
 
-#if CONFIG_PROTECT
-#include <sys/cprotect.h>
-#endif
-
 #define HFS_MOUNT_DEBUG 1
 
 #if    HFS_DIAGNOSTIC
@@ -208,11 +205,6 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
        vfsp = vfs_statfs(mp);
        (void)hfs_statfs(mp, vfsp, NULL);
 
-       /* Invoke ioctl that asks if the underlying device is Core Storage or not */
-       error = VNOP_IOCTL(rvp, _DKIOCCORESTORAGE, NULL, 0, context);
-       if (error == 0) {
-               hfsmp->hfs_flags |= HFS_CS;
-       }
        return (0);
 }
 
@@ -300,7 +292,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 
                        /* mark the volume cleanly unmounted */
                        hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
-                       retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+                       retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
                        hfsmp->hfs_flags |= HFS_READ_ONLY;
 
                        /*
@@ -412,7 +404,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                 * for metadata writes.
                                 */
                                hfsmp->jnl = journal_open(hfsmp->jvp,
-                                               (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
+                                               hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
                                                hfsmp->jnl_size,
                                                hfsmp->hfs_devvp,
                                                hfsmp->hfs_logical_block_size,
@@ -463,7 +455,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                        /* mark the volume dirty (clear clean unmount bit) */
                        hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 
-                       retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+                       retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
                        if (retval != E_NONE) {
                                if (HFS_MOUNT_DEBUG) {
                                        printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
@@ -483,12 +475,24 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                hfs_remove_orphans(hfsmp);
 
                                /*
-                                * Allow hot file clustering if conditions allow.
+                                * Since we're upgrading to a read-write mount, allow
+                                * hot file clustering if conditions allow.
+                                *
+                                * Note: this normally only would happen if you booted
+                                *       single-user and upgraded the mount to read-write
+                                *
+                                * Note: at this point we are not allowed to fail the
+                                *       mount operation because the HotFile init code
+                                *       in hfs_recording_init() will lookup vnodes with
+                                *       VNOP_LOOKUP() which hangs vnodes off the mount
+                                *       (and if we were to fail, VFS is not prepared to
+                                *       clean that up at this point.  Since HotFiles are
+                                *       optional, this is not a big deal.
                                 */
                                if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && 
-                                          ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0))    {
+                                   (((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0) || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) ) {
                                        (void) hfs_recording_init(hfsmp);
-                               }
+                               }                                       
                                /* Force ACLs on HFS+ file systems. */
                                if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
                                        vfs_setextendedsecurity(HFSTOVFS(hfsmp));
@@ -520,21 +524,6 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                /* After hfs_mountfs succeeds, we should have valid hfsmp */
                hfsmp = VFSTOHFS(mp);
 
-               /*
-                * Check to see if the file system exists on CoreStorage.  
-                *
-                * This must be done after examining the root folder's CP EA since
-                * hfs_vfs_root will create a vnode (which must not occur until after
-                * we've established the CP level of the FS).
-                */ 
-               if (retval == 0) {
-                       errno_t err;
-                       /* Invoke ioctl that asks if the underlying device is Core Storage or not */
-                       err = VNOP_IOCTL(devvp, _DKIOCCORESTORAGE, NULL, 0, context);
-                       if (err == 0) {
-                               hfsmp->hfs_flags |= HFS_CS;
-                       }
-               }
        }
 
 out:
@@ -1008,10 +997,8 @@ static boolean_t hfs_has_elapsed (const struct timeval *a,
 }
 
 static void
-hfs_syncer(void *arg0, void *unused)
+hfs_syncer(void *arg0, __unused void *unused)
 {
-#pragma unused(unused)
-    
     struct hfsmount *hfsmp = arg0;
     struct timeval   now;
 
@@ -1077,9 +1064,6 @@ hfs_syncer(void *arg0, void *unused)
 
     hfsmp->hfs_syncer_thread = current_thread();
 
-    if (hfs_start_transaction(hfsmp) != 0)    // so we hold off any new writes
-        goto out;
-
     /*
      * We intentionally do a synchronous flush (of the journal or entire volume) here.
      * For journaled volumes, this means we wait until the metadata blocks are written
@@ -1098,7 +1082,7 @@ hfs_syncer(void *arg0, void *unused)
      * user data to be written.
      */
     if (hfsmp->jnl) {
-        hfs_journal_flush(hfsmp, TRUE);
+        hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
     } else {
         hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
     }
@@ -1109,10 +1093,6 @@ hfs_syncer(void *arg0, void *unused)
                           tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp), 
                           hfsmp->hfs_mp->mnt_pending_write_size, 0);
 
-    hfs_end_transaction(hfsmp);
-
-out:
-
     hfsmp->hfs_syncer_thread = NULL;
 
     hfs_syncer_lock(hfsmp);
@@ -1192,9 +1172,16 @@ void hfs_scan_blocks (struct hfsmount *hfsmp) {
         */
        (void) ScanUnmapBlocks(hfsmp);
 
+       (void) hfs_lock_mount (hfsmp);
+       hfsmp->scan_var &= ~HFS_ALLOCATOR_SCAN_INFLIGHT;
        hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
+       wakeup((caddr_t) &hfsmp->scan_var);
+       hfs_unlock_mount (hfsmp);
 
+       buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+       
        hfs_systemfile_unlock(hfsmp, flags);
+
 }
 
 static int hfs_root_unmounted_cleanly = 0;
@@ -1409,7 +1396,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                if (device_features & DK_FEATURE_UNMAP) {
                        hfsmp->hfs_flags |= HFS_UNMAP;
                }
-       }       
+
+               if(device_features & DK_FEATURE_BARRIER)
+                       hfsmp->hfs_flags |= HFS_FEATURE_BARRIER;
+       }
 
        /* 
         * See if the disk is a solid state device, too.  We need this to decide what to do about 
@@ -1421,6 +1411,25 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                }
        }
 
+       /* See if the underlying device is Core Storage or not */
+       dk_corestorage_info_t cs_info;
+       memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
+       if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, context) == 0) {
+               hfsmp->hfs_flags |= HFS_CS;
+               if (isroot && (cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA)) {
+                       hfsmp->hfs_flags |= HFS_CS_METADATA_PIN;
+               }
+               if (isroot && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES)) {
+                       hfsmp->hfs_flags |= HFS_CS_HOTFILE_PIN;
+                       hfsmp->hfs_cs_hotfile_size = cs_info.hotfile_size;
+               }
+               if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_SWAPFILE)) {
+                       hfsmp->hfs_flags |= HFS_CS_SWAPFILE_PIN;
+
+                       mp->mnt_ioflags |= MNT_IOFLAGS_SWAPPIN_SUPPORTED;
+                       mp->mnt_max_swappin_available = cs_info.swapfile_pinning;
+               }
+       }
 
        /*
         *  Init the volume information structure
@@ -1490,6 +1499,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                        hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
        }
 
+       // Reservations
+       rl_init(&hfsmp->hfs_reserved_ranges[0]);
+       rl_init(&hfsmp->hfs_reserved_ranges[1]);
+
        // record the current time at which we're mounting this volume
        struct timeval tv;
        microtime(&tv);
@@ -1572,6 +1585,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                        embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
                                          (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
 
+                       /* 
+                        * Cooperative Fusion is not allowed on embedded HFS+ 
+                        * filesystems (HFS+ inside HFS standard wrapper)
+                        */
+                       hfsmp->hfs_flags &= ~HFS_CS_METADATA_PIN;
+
                        /*
                         * If the embedded volume doesn't start on a block
                         * boundary, then switch the device to a 512-byte
@@ -2003,7 +2022,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        hfs_generate_volume_notifications(hfsmp);
 
        if (ronly == 0) {
-               (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+               (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
        }
        FREE(mdbp, M_TEMP);
        return (0);
@@ -2178,7 +2197,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
                        }
                }
 
-               retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+               retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
                if (retval) {
                        HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
                        if (!force)
@@ -2192,7 +2211,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
        }
 
        if (hfsmp->jnl) {
-               hfs_journal_flush(hfsmp, FALSE);
+               hfs_flush(hfsmp, HFS_FLUSH_FULL);
        }
        
        /*
@@ -2246,6 +2265,11 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
        hfs_locks_destroy(hfsmp);
        hfs_delete_chash(hfsmp);
        hfs_idhash_destroy(hfsmp);
+
+       assert(TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS])
+                  && TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS]));
+       assert(!hfsmp->lockedBlocks);
+
        FREE(hfsmp, M_HFSMNT);
 
        return (0);
@@ -2481,35 +2505,49 @@ hfs_sync_metadata(void *arg)
 
 
 struct hfs_sync_cargs {
-        kauth_cred_t cred;
-        struct proc  *p;
-        int    waitfor;
-        int    error;
+       kauth_cred_t  cred;
+       struct proc      *p;
+       int                       waitfor;
+       int                       error;
+       int                       atime_only_syncs;
+       time_t            sync_start_time;
 };
 
 
 static int
 hfs_sync_callback(struct vnode *vp, void *cargs)
 {
-       struct cnode *cp;
+       struct cnode *cp = VTOC(vp);
        struct hfs_sync_cargs *args;
        int error;
 
        args = (struct hfs_sync_cargs *)cargs;
 
-       if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+       if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
                return (VNODE_RETURNED);
        }
-       cp = VTOC(vp);
 
-       if ((cp->c_flag & C_MODIFIED) ||
-           (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
-           vnode_hasdirtyblks(vp)) {
-               error = hfs_fsync(vp, args->waitfor, 0, args->p);
+       hfs_dirty_t dirty_state = hfs_is_dirty(cp);
+
+       bool sync = dirty_state == HFS_DIRTY || vnode_hasdirtyblks(vp);
+
+       if (!sync && dirty_state == HFS_DIRTY_ATIME
+               && args->atime_only_syncs < 256) {
+               // We only update if the atime changed more than 60s ago
+               if (args->sync_start_time - cp->c_attr.ca_atime > 60) {
+                       sync = true;
+                       ++args->atime_only_syncs;
+               }
+       }
+
+       if (sync) {
+               error = hfs_fsync(vp, args->waitfor, 0, args->p);
 
                if (error)
                        args->error = error;
-       }
+       } else if (cp->c_touch_acctime)
+               hfs_touchtimes(VTOHFS(vp), cp);
+
        hfs_unlock(cp);
        return (VNODE_RETURNED);
 }
@@ -2557,6 +2595,13 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
        args.waitfor = waitfor;
        args.p = p;
        args.error = 0;
+       args.atime_only_syncs = 0;
+
+       struct timeval tv;
+       microtime(&tv);
+
+       args.sync_start_time = tv.tv_sec;
+
        /*
         * hfs_sync_callback will be called for each vnode
         * hung off of this mount point... the vnode will be
@@ -2586,11 +2631,7 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
                (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
                cp = VTOC(btvp);
 
-               if (((cp->c_flag &  C_MODIFIED) == 0) &&
-                   (cp->c_touch_acctime == 0) &&
-                   (cp->c_touch_chgtime == 0) &&
-                   (cp->c_touch_modtime == 0) &&
-                   vnode_hasdirtyblks(btvp) == 0) {
+               if (!hfs_is_dirty(cp) && !vnode_hasdirtyblks(btvp)) {
                        hfs_unlock(VTOC(btvp));
                        continue;
                }
@@ -2628,13 +2669,13 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
         * Write back modified superblock.
         */
        if (IsVCBDirty(vcb)) {
-               error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
+               error = hfs_flushvolumeheader(hfsmp, waitfor == MNT_WAIT ? HFS_FVH_WAIT : 0);
                if (error)
                        allerror = error;
        }
 
        if (hfsmp->jnl) {
-           hfs_journal_flush(hfsmp, FALSE);
+           hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
        }
 
        hfs_lock_mount(hfsmp);
@@ -3018,11 +3059,10 @@ encodinghint_exit:
                bcopy(&local_jib, buf_ptr, sizeof(local_jib));
                if (buf_bwrite (jib_buf)) {
                        return EIO;
-               }               
+               }
 
                /* Force a flush track cache */
-               (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
-
+               hfs_flush(hfsmp, HFS_FLUSH_CACHE);
 
                /* Now proceed with full volume sync */
                hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
@@ -3093,7 +3133,7 @@ encodinghint_exit:
                vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 
                hfs_unlock_global (hfsmp);
-               hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
+               hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
 
                {
                        fsid_t fsid;
@@ -3148,7 +3188,7 @@ encodinghint_exit:
                
                hfs_unlock_global (hfsmp);
 
-               hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
+               hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
 
                {
                        fsid_t fsid;
@@ -3174,8 +3214,8 @@ encodinghint_exit:
                        jnl_start = 0;
                        jnl_size  = 0;
            } else {
-                       jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
-                       jnl_size  = (off_t)hfsmp->jnl_size;
+                       jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+                       jnl_size  = hfsmp->jnl_size;
            }
 
            if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
@@ -3257,35 +3297,56 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con
 
        error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
        if (error)
-               return (error);
+               return error;
 
        /*
-        * ADLs may need to have their origin state updated
-        * since build_path needs a valid parent.  The same is true
-        * for hardlinked files as well.  There isn't a race window here
-        * in re-acquiring the cnode lock since we aren't pulling any data 
-        * out of the cnode; instead, we're going to the catalog.
+        * If the look-up was via the object ID (rather than the link ID),
+        * then we make sure there's a parent here.  We can't leave this
+        * until hfs_vnop_getattr because if there's a problem getting the
+        * parent at that point, all the caller will do is call
+        * hfs_vfs_vget again and we'll end up in an infinite loop.
         */
-       if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
-           (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0)) {
-               cnode_t *cp = VTOC(*vpp);
-               struct cat_desc cdesc;
-               
+
+       cnode_t *cp = VTOC(*vpp);
+
+       if (ISSET(cp->c_flag, C_HARDLINK) && ino == cp->c_fileid) {
+               hfs_lock_always(cp, HFS_SHARED_LOCK);
+
                if (!hfs_haslinkorigin(cp)) {
-                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
-                       error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
-                       hfs_systemfile_unlock(hfsmp, lockflags);
-                       if (error == 0) {
-                               if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
-                                       (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
-                                       hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
+                       if (!hfs_lock_upgrade(cp))
+                               hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+
+                       if (cp->c_cnid == cp->c_fileid) {
+                               /*
+                                * Descriptor is stale, so we need to refresh it.  We
+                                * pick the first link.
+                                */
+                               cnid_t link_id;
+
+                               error = hfs_first_link(hfsmp, cp, &link_id);
+
+                               if (!error) {
+                                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                                       error = cat_findname(hfsmp, link_id, &cp->c_desc);
+                                       hfs_systemfile_unlock(hfsmp, lockflags);
                                }
-                               cat_releasedesc(&cdesc);
+                       } else {
+                               // We'll use whatever link the descriptor happens to have
+                               error = 0;
                        }
+                       if (!error)
+                               hfs_savelinkorigin(cp, cp->c_parentcnid);
                }
+
                hfs_unlock(cp);
+
+               if (error) {
+                       vnode_put(*vpp);
+                       *vpp = NULL;
+               }
        }
-       return (0);
+
+       return error;
 }
 
 
@@ -3626,7 +3687,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
        hfs_unlock_mount (hfsmp);
 
        if (hfsmp->jnl) {
-               hfs_flushvolumeheader(hfsmp, 0, 0);
+               hfs_flushvolumeheader(hfsmp, 0);
        }
 
        return (0);
@@ -3737,7 +3798,8 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
  *  are always stored in-memory as "H+".
  */
 int
-hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
+hfs_flushvolumeheader(struct hfsmount *hfsmp, 
+                                         hfs_flush_volume_header_options_t options)
 {
        ExtendedVCB *vcb = HFSTOVCB(hfsmp);
        struct filefork *fp;
@@ -3746,20 +3808,25 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
        struct buf *bp, *alt_bp;
        int i;
        daddr64_t priIDSector;
-       int critical;
+       bool critical = false;
        u_int16_t  signature;
        u_int16_t  hfsversion;
        daddr64_t avh_sector;
+       bool altflush = ISSET(options, HFS_FVH_WRITE_ALT);
+
+       if (ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)
+               && !hfs_header_needs_flushing(hfsmp)) {
+               return 0;
+       }
 
        if (hfsmp->hfs_flags & HFS_READ_ONLY) {
                return(0);
        }
 #if CONFIG_HFS_STD
        if (hfsmp->hfs_flags & HFS_STANDARD) {
-               return hfs_flushMDB(hfsmp, waitfor, altflush);
+               return hfs_flushMDB(hfsmp, ISSET(options, HFS_FVH_WAIT) ? MNT_WAIT : 0, altflush);
        }
 #endif
-       critical = altflush;
        priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
                                  HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 
@@ -3939,7 +4006,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
        volumeHeader->fileCount         = SWAP_BE32 (vcb->vcbFilCnt);
        volumeHeader->folderCount       = SWAP_BE32 (vcb->vcbDirCnt);
        volumeHeader->totalBlocks       = SWAP_BE32 (vcb->totalBlocks);
-       volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks);
+       volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks + vcb->reclaimBlocks);
        volumeHeader->nextAllocation    = SWAP_BE32 (vcb->nextAllocation);
        volumeHeader->rsrcClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
        volumeHeader->dataClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
@@ -3949,13 +4016,10 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
 
        if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
                bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
-               critical = 1;
+               critical = true;
        }
 
-       /*
-        * System files are only dirty when altflush is set.
-        */
-       if (altflush == 0) {
+       if (!altflush && !ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)) {
                goto done;
        }
 
@@ -3972,6 +4036,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
                volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
                FTOC(fp)->c_flag &= ~C_MODIFIED;
+               altflush = true;
        }
 
        /* Sync Catalog file meta data */
@@ -3987,6 +4052,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
                volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
                FTOC(fp)->c_flag &= ~C_MODIFIED;
+               altflush = true;
        }
 
        /* Sync Allocation file meta data */
@@ -4002,6 +4068,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
                volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
                FTOC(fp)->c_flag &= ~C_MODIFIED;
+               altflush = true;
        }
 
        /* Sync Attribute file meta data */
@@ -4013,7 +4080,10 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                        volumeHeader->attributesFile.extents[i].blockCount =
                                SWAP_BE32 (fp->ff_extents[i].blockCount);
                }
-               FTOC(fp)->c_flag &= ~C_MODIFIED;
+               if (ISSET(FTOC(fp)->c_flag, C_MODIFIED)) {
+                       FTOC(fp)->c_flag &= ~C_MODIFIED;
+                       altflush = true;
+               }
                volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
                volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
                volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
@@ -4033,9 +4103,13 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                        volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
                        volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
                        FTOC(fp)->c_flag &= ~C_MODIFIED;
+                       altflush = true;
                }
        }
 
+       if (altflush)
+               critical = true;
 done:
        MarkVCBClean(hfsmp);
        hfs_unlock_mount (hfsmp);
@@ -4150,8 +4224,7 @@ done:
                                         * may now appear to be beyond the device EOF.
                                         */
                                        (void) VNOP_BWRITE(alt_bp);
-                                       (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
-                                                       NULL, FWRITE, NULL);
+                                       hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                                }               
                        } else if (alt_bp) {
                                buf_brelse(alt_bp);
@@ -4163,14 +4236,13 @@ done:
        if (hfsmp->jnl) {
                journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
        } else {
-               if (waitfor != MNT_WAIT) {
+               if (!ISSET(options, HFS_FVH_WAIT)) {
                        buf_bawrite(bp);
                } else {
                        retval = VNOP_BWRITE(bp);
                        /* When critical data changes, flush the device cache */
-                       if (critical && (retval == 0)) { 
-                               (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, 
-                                               NULL, FWRITE, NULL);
+                       if (critical && (retval == 0)) {
+                               hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                        }
                }
        }
@@ -4505,11 +4577,10 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
                                        strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
 
                                        volname_length = strlen ((const char*)vcb->vcbVN);
-#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
                                        /* Send the volume name down to CoreStorage if necessary */     
                                        error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
                                        if (error == 0) {
-                                               (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+                                               (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
                                        }
                                        error = 0;
                                }
@@ -4519,7 +4590,7 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
                        
                                if (error)
                                        MarkVCBDirty(vcb);
-                               (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+                               (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
                        }
                        hfs_end_transaction(hfsmp);
                }                       
index ca011c65253fd229b38beabf662e0c5efd4f419c..1015fbd914685864c446bc2e900dc860890e05d3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 /* for parsing boot-args */
 #include <pexpert/pexpert.h>
 
-#if CONFIG_PROTECT
-#include <sys/cprotect.h>
-#endif
-
 #include "hfs.h"
 #include "hfs_catalog.h"
 #include "hfs_dbg.h"
@@ -67,6 +63,7 @@
 #include "hfs_endian.h"
 #include "hfs_cnode.h"
 #include "hfs_fsctl.h"
+#include "hfs_cprotect.h"
 
 #include "hfscommon/headers/FileMgrInternal.h"
 #include "hfscommon/headers/BTreesInternal.h"
@@ -610,6 +607,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                }
                goto ErrorExit;
        }
+
        hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
        hfs_unlock(hfsmp->hfs_extents_cp);
 
@@ -800,13 +798,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
        volname_length = strlen ((const char*)vcb->vcbVN);
        cat_releasedesc(&cndesc);
        
-#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
-
-
        /* Send the volume name down to CoreStorage if necessary */     
        retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
        if (retval == 0) {
-               (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+               (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
        }       
        
        /* reset retval == 0. we don't care about errors in volname conversion */
@@ -826,23 +821,19 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
        hfs_lock_mount (hfsmp);
 
        kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
-       /* Wait until it registers that it's got the appropriate locks */
-       while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
-               (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
-               if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
-                       break;
-               }
-               else {
-                       hfs_lock_mount (hfsmp);
-               }
+       /* Wait until it registers that it's got the appropriate locks (or that it is finished) */
+       while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
+               msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0);
        }
 
+       hfs_unlock_mount(hfsmp);
+
        thread_deallocate (allocator_scanner);
 
        /* mark the volume dirty (clear clean unmount bit) */
        vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
        if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
-               hfs_flushvolumeheader(hfsmp, TRUE, 0);
+               hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
        }
 
        /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
@@ -947,6 +938,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
        }
 
+       if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
+               hfs_pin_fs_metadata(hfsmp);
+       }
        /*
         * Distinguish 3 potential cases involving content protection:
         * 1. mount point bit set; vcbAtrb does not support it. Fail.
@@ -975,17 +969,8 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 #if CONFIG_PROTECT
                /* Get the EAs as needed. */
                int cperr = 0;
-               uint16_t majorversion;
-               uint16_t minorversion;
-               uint64_t flags;
-               uint8_t cryptogen = 0;
                struct cp_root_xattr *xattr = NULL;
                MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
-               if (xattr == NULL) {
-                       retval = ENOMEM;
-                       goto ErrorExit;
-               }
-               bzero (xattr, sizeof(struct cp_root_xattr));
 
                /* go get the EA to get the version information */
                cperr = cp_getrootxattr (hfsmp, xattr);
@@ -997,56 +982,54 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 
                if (cperr == 0) {
                        /* Have to run a valid CP version. */
-                       if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
+                       if (!cp_is_supported_version(xattr->major_version)) {
                                cperr = EINVAL;
                        }
                }
                else if (cperr == ENOATTR) {
-                       printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
+                       printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
                        bzero(xattr, sizeof(struct cp_root_xattr));
-                       xattr->major_version = CP_NEW_MAJOR_VERS;
+                       xattr->major_version = CP_CURRENT_VERS;
                        xattr->minor_version = CP_MINOR_VERS;
                        cperr = cp_setrootxattr (hfsmp, xattr);
                }
-               majorversion = xattr->major_version;
-               minorversion = xattr->minor_version;
-               flags = xattr->flags;
-               if (xattr->flags & CP_ROOT_CRYPTOG1) {
-                       cryptogen = 1;
-               }
 
-               if (xattr) {
+               if (cperr) {
                        FREE(xattr, M_TEMP);
+                       retval = EPERM;
+                       goto ErrorExit;
                }
 
-               /* Recheck for good status */
-               if (cperr == 0) {
-                       /* If we got here, then the CP version is valid. Set it in the mount point */
-                       hfsmp->hfs_running_cp_major_vers = majorversion;
-                       printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
-                       hfsmp->cproot_flags = flags;
-                       hfsmp->cp_crypto_generation = cryptogen;
+               /* If we got here, then the CP version is valid. Set it in the mount point */
+               hfsmp->hfs_running_cp_major_vers = xattr->major_version;
+               printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
+               hfsmp->cproot_flags = xattr->flags;
+               hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
 
-                       /* 
-                        * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
-                        * Ensure that the boot-arg's value is valid for FILES (not directories),
-                        * since only files are actually protected for now.
-                        */ 
-                        
-                       PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
-                       
-                       if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
-                               PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
-                       }
-                       
-                       if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
-                               hfsmp->default_cp_class = PROTECTION_CLASS_C;
-                       }
+               FREE(xattr, M_TEMP);
+
+               /*
+                * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
+                * Ensure that the boot-arg's value is valid for FILES (not directories),
+                * since only files are actually protected for now.
+                */
+
+               PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
+
+               if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
+                       PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
                }
-               else {
-                       retval = EPERM;
-                       goto ErrorExit;
+
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+               PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
+#endif
+#endif
+
+               if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
+                       hfsmp->default_cp_class = PROTECTION_CLASS_C;
                }
+
 #else
                /* If CONFIG_PROTECT not built, ignore CP */
                vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);    
@@ -1097,8 +1080,30 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
        /*
         * Allow hot file clustering if conditions allow.
         */
-       if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
-           ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
+       if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
+           ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
+               //
+               // Wait until the bitmap scan completes before we initializes the
+               // hotfile area so that we do not run into any issues with the
+               // bitmap being read while hotfiles is initializing itself.  On
+               // some older/slower machines, without this interlock, the bitmap
+               // would sometimes get corrupted at boot time.
+               //
+               hfs_lock_mount(hfsmp);
+               while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
+                       (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
+               }
+               hfs_unlock_mount(hfsmp);
+               
+               /*
+                * Note: at this point we are not allowed to fail the
+                *       mount operation because the HotFile init code
+                *       in hfs_recording_init() will lookup vnodes with
+                *       VNOP_LOOKUP() which hangs vnodes off the mount
+                *       (and if we were to fail, VFS is not prepared to
+                *       clean that up at this point.  Since HotFiles are
+                *       optional, this is not a big deal.
+                */
                (void) hfs_recording_init(hfsmp);
        }
 
@@ -1123,6 +1128,53 @@ ErrorExit:
        return (retval);
 }
 
+static int
+_pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
+{
+       int err;
+
+       err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+       if (err == 0) {
+               err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL, vfs_context_kernel());
+               hfs_unlock(VTOC(vp));
+       }
+
+       return err;
+}
+
+void
+hfs_pin_fs_metadata(struct hfsmount *hfsmp)
+{
+       ExtendedVCB *vcb;
+       int err;
+       
+       vcb = HFSTOVCB(hfsmp);
+
+       err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
+       if (err != 0) {
+               printf("hfs: failed to pin extents overflow file %d\n", err);
+       }                               
+       err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
+       if (err != 0) {
+               printf("hfs: failed to pin catalog file %d\n", err);
+       }                               
+       err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
+       if (err != 0) {
+               printf("hfs: failed to pin bitmap file %d\n", err);
+       }                               
+       err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
+       if (err != 0) {
+               printf("hfs: failed to pin extended attr file %d\n", err);
+       }                               
+       
+       hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1, vfs_context_kernel());
+       hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1, vfs_context_kernel());
+                       
+       if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
+               // and hey, if we've got a journal, let's pin that too!
+               hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize), vfs_context_kernel());
+       }
+}
 
 /*
  * ReleaseMetaFileVNode
@@ -1363,6 +1415,19 @@ void hfs_unlock_mount (struct hfsmount *hfsmp) {
 
 /*
  * Lock HFS system file(s).
+ *
+ * This function accepts a @flags parameter which indicates which
+ * system file locks are required.  The value it returns should be
+ * used in a subsequent call to hfs_systemfile_unlock.  The caller
+ * should treat this value as opaque; it may or may not have a
+ * relation to the @flags field that is passed in.  The *only*
+ * guarantee that we make is that a value of zero means that no locks
+ * were taken and that there is no need to call hfs_systemfile_unlock
+ * (although it is harmless to do so).  Recursion is supported but
+ * care must still be taken to ensure correct lock ordering.  Note
+ * that requests for certain locks may cause other locks to also be
+ * taken, including locks that are not possible to ask for via the
+ * @flags parameter.
  */
 int
 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
@@ -1371,19 +1436,20 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp
         * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
         */
        if (flags & SFL_CATALOG) {
+               if (hfsmp->hfs_catalog_cp
+                       && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
 #ifdef HFS_CHECK_LOCK_ORDER
-               if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
-                       panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
-               }
-               if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
-                       panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
-               }
-               if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
-                       panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
-               }
+                       if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
+                               panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
+                       }
+                       if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
+                               panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
+                       }
+                       if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+                               panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
+                       }
 #endif /* HFS_CHECK_LOCK_ORDER */
 
-               if (hfsmp->hfs_catalog_cp) {
                        (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
                        /*
                         * When the catalog file has overflow extents then
@@ -1401,16 +1467,17 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp
        }
 
        if (flags & SFL_ATTRIBUTE) {
+               if (hfsmp->hfs_attribute_cp
+                       && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
 #ifdef HFS_CHECK_LOCK_ORDER
-               if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
-                       panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
-               }
-               if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
-                       panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
-               }
+                       if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
+                               panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
+                       }
+                       if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+                               panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
+                       }
 #endif /* HFS_CHECK_LOCK_ORDER */
-
-               if (hfsmp->hfs_attribute_cp) {
+                       
                        (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
                        /*
                         * When the attribute file has overflow extents then
@@ -1428,13 +1495,14 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp
        }
 
        if (flags & SFL_STARTUP) {
+               if (hfsmp->hfs_startup_cp
+                       && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
 #ifdef HFS_CHECK_LOCK_ORDER
-               if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
-                       panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
-               }
+                       if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+                               panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
+                       }
 #endif /* HFS_CHECK_LOCK_ORDER */
 
-               if (hfsmp->hfs_startup_cp) {
                        (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
                        /*
                         * When the startup file has overflow extents then
@@ -1508,6 +1576,9 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp
 void
 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
 {
+       if (!flags)
+               return;
+
        struct timeval tv;
        u_int32_t lastfsync;
        int numOfLockedBuffs;
@@ -1739,7 +1810,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
        cat_cookie_t cookie;
        int catlock = 0;
        int catreserve = 0;
-       int started_tr = 0;
+       bool started_tr = false;
        int lockflags;
        int result;
        int orphaned_files = 0;
@@ -1798,159 +1869,177 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                 * where xxx is the file's cnid in decimal.
                 *
                 */
-               if (bcmp(tempname, filename, namelen) == 0) {
-                       struct filefork dfork;
-               struct filefork rfork;
-                       struct cnode cnode;
-                       int mode = 0;
-
-                       bzero(&dfork, sizeof(dfork));
-                       bzero(&rfork, sizeof(rfork));
-                       bzero(&cnode, sizeof(cnode));
-                       
-                       /* Delete any attributes, ignore errors */
-                       (void) hfs_removeallattr(hfsmp, filerec.fileID);
-                       
-                       if (hfs_start_transaction(hfsmp) != 0) {
-                           printf("hfs_remove_orphans: failed to start transaction\n");
-                           goto exit;
-                       }
-                       started_tr = 1;
-               
-                       /*
-                        * Reserve some space in the Catalog file.
-                        */
-                       if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
-                           printf("hfs_remove_orphans: cat_preflight failed\n");
-                               goto exit;
-                       }
-                       catreserve = 1;
+               if (bcmp(tempname, filename, namelen) != 0)
+                       continue;
 
-                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
-                       catlock = 1;
+               struct filefork dfork;
+               struct filefork rfork;
+               struct cnode cnode;
+               int mode = 0;
 
-                       /* Build a fake cnode */
-                       cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
-                                       &dfork.ff_data, &rfork.ff_data);
-                       cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
-                       cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
-                       cnode.c_desc.cd_namelen = namelen;
-                       cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
-                       cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
-
-                       /* Position iterator at previous entry */
-                       if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
-                           NULL, NULL) != 0) {
-                               break;
-                       }
+               bzero(&dfork, sizeof(dfork));
+               bzero(&rfork, sizeof(rfork));
+               bzero(&cnode, sizeof(cnode));
+                       
+               if (hfs_start_transaction(hfsmp) != 0) {
+                       printf("hfs_remove_orphans: failed to start transaction\n");
+                       goto exit;
+               }
+               started_tr = true;
+               
+               /*
+                * Reserve some space in the Catalog file.
+                */
+               if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
+                       printf("hfs_remove_orphans: cat_preflight failed\n");
+                       goto exit;
+               }
+               catreserve = 1;
+
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+               catlock = 1;
+
+               /* Build a fake cnode */
+               cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
+                                               &dfork.ff_data, &rfork.ff_data);
+               cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+               cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
+               cnode.c_desc.cd_namelen = namelen;
+               cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
+               cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
+
+               /* Position iterator at previous entry */
+               if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
+                                                       NULL, NULL) != 0) {
+                       break;
+               }
 
-                       /* Truncate the file to zero (both forks) */
-                       if (dfork.ff_blocks > 0) {
-                               u_int64_t fsize;
+               /* Truncate the file to zero (both forks) */
+               if (dfork.ff_blocks > 0) {
+                       u_int64_t fsize;
                                
-                               dfork.ff_cp = &cnode;
-                               cnode.c_datafork = &dfork;
-                               cnode.c_rsrcfork = NULL;
-                               fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
-                               while (fsize > 0) {
-                                   if (fsize > HFS_BIGFILE_SIZE) {
-                                               fsize -= HFS_BIGFILE_SIZE;
-                                       } else {
-                                               fsize = 0;
-                                       }
-
-                                       if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, 
-                                                                         cnode.c_attr.ca_fileid, false) != 0) {
-                                               printf("hfs: error truncating data fork!\n");
-                                               break;
-                                       }
-
-                                       //
-                                       // if we're iteratively truncating this file down,
-                                       // then end the transaction and start a new one so
-                                       // that no one transaction gets too big.
-                                       //
-                                       if (fsize > 0 && started_tr) {
-                                               /* Drop system file locks before starting 
-                                                * another transaction to preserve lock order.
-                                                */
-                                               hfs_systemfile_unlock(hfsmp, lockflags);
-                                               catlock = 0;
-                                               hfs_end_transaction(hfsmp);
-
-                                               if (hfs_start_transaction(hfsmp) != 0) {
-                                                       started_tr = 0;
-                                                       break;
-                                               }
-                                               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
-                                               catlock = 1;
-                                       }
+                       dfork.ff_cp = &cnode;
+                       cnode.c_datafork = &dfork;
+                       cnode.c_rsrcfork = NULL;
+                       fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
+                       while (fsize > 0) {
+                               if (fsize > HFS_BIGFILE_SIZE) {
+                                       fsize -= HFS_BIGFILE_SIZE;
+                               } else {
+                                       fsize = 0;
                                }
-                       }
 
-                       if (rfork.ff_blocks > 0) {
-                               rfork.ff_cp = &cnode;
-                               cnode.c_datafork = NULL;
-                               cnode.c_rsrcfork = &rfork;
-                               if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
-                                       printf("hfs: error truncating rsrc fork!\n");
+                               if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, 
+                                                                 cnode.c_attr.ca_fileid, false) != 0) {
+                                       printf("hfs: error truncating data fork!\n");
                                        break;
                                }
+
+                               //
+                               // if we're iteratively truncating this file down,
+                               // then end the transaction and start a new one so
+                               // that no one transaction gets too big.
+                               //
+                               if (fsize > 0) {
+                                       /* Drop system file locks before starting 
+                                        * another transaction to preserve lock order.
+                                        */
+                                       hfs_systemfile_unlock(hfsmp, lockflags);
+                                       catlock = 0;
+                                       hfs_end_transaction(hfsmp);
+
+                                       if (hfs_start_transaction(hfsmp) != 0) {
+                                               started_tr = false;
+                                               goto exit;
+                                       }
+                                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+                                       catlock = 1;
+                               }
                        }
+               }
 
-                       /* Remove the file or folder record from the Catalog */ 
-                       if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
-                               printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
-                               hfs_systemfile_unlock(hfsmp, lockflags);
-                               catlock = 0;
-                               hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+               if (rfork.ff_blocks > 0) {
+                       rfork.ff_cp = &cnode;
+                       cnode.c_datafork = NULL;
+                       cnode.c_rsrcfork = &rfork;
+                       if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
+                               printf("hfs: error truncating rsrc fork!\n");
                                break;
                        }
-                       
-                       mode = cnode.c_attr.ca_mode & S_IFMT;
+               }
 
-                       if (mode == S_IFDIR) {
-                               orphaned_dirs++;
-                       }
-                       else {
-                               orphaned_files++;
-                       }
+               // Deal with extended attributes
+               if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
+                       // hfs_removeallattr uses its own transactions
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       catlock = false;
+                       hfs_end_transaction(hfsmp);
 
-                       /* Update parent and volume counts */   
-                       hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
-                       if (mode == S_IFDIR) {
-                               DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+                       hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
+
+                       if (!started_tr) {
+                               if (hfs_start_transaction(hfsmp) != 0) {
+                                       printf("hfs_remove_orphans: failed to start transaction\n");
+                                       goto exit;
+                               }
+                               started_tr = true;
                        }
 
-                       (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
-                                        &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+                       catlock = 1;
+               }
 
-                       /* Drop locks and end the transaction */
+               /* Remove the file or folder record from the Catalog */ 
+               if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
+                       printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
                        hfs_systemfile_unlock(hfsmp, lockflags);
-                       cat_postflight(hfsmp, &cookie, p);
-                       catlock = catreserve = 0;
+                       catlock = 0;
+                       hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+                       break;
+               }
 
-                       /* 
-                          Now that Catalog is unlocked, update the volume info, making
-                          sure to differentiate between files and directories
-                       */
-                       if (mode == S_IFDIR) {
-                               hfs_volupdate(hfsmp, VOL_RMDIR, 0);
-                       }
-                       else{
-                               hfs_volupdate(hfsmp, VOL_RMFILE, 0);
-                       }
+               mode = cnode.c_attr.ca_mode & S_IFMT;
 
-                       if (started_tr) {
-                               hfs_end_transaction(hfsmp);
-                               started_tr = 0;
-                       }
+               if (mode == S_IFDIR) {
+                       orphaned_dirs++;
+               }
+               else {
+                       orphaned_files++;
+               }
+
+               /* Update parent and volume counts */   
+               hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+               if (mode == S_IFDIR) {
+                       DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+               }
 
-               } /* end if */
+               (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+                                                &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+
+               /* Drop locks and end the transaction */
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               cat_postflight(hfsmp, &cookie, p);
+               catlock = catreserve = 0;
+
+               /* 
+                  Now that Catalog is unlocked, update the volume info, making
+                  sure to differentiate between files and directories
+               */
+               if (mode == S_IFDIR) {
+                       hfs_volupdate(hfsmp, VOL_RMDIR, 0);
+               }
+               else{
+                       hfs_volupdate(hfsmp, VOL_RMFILE, 0);
+               }
+
+               hfs_end_transaction(hfsmp);
+               started_tr = false;
        } /* end for */
+
+exit:
+
        if (orphaned_files > 0 || orphaned_dirs > 0)
                printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
-exit:
        if (catlock) {
                hfs_systemfile_unlock(hfsmp, lockflags);
        }
@@ -2029,7 +2118,7 @@ static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
                return true;
        }
 
-       uint32_t loanedblks = hfsmp->loanedBlocks;
+       uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
        uint32_t bandblks       = hfsmp->hfs_sparsebandblks;
        uint64_t maxblks        = hfsmp->hfs_backingfs_maxblocks;
 
@@ -2097,7 +2186,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
         */
        freeblks = hfsmp->freeBlocks;
        rsrvblks = hfsmp->reserveBlocks;
-       loanblks = hfsmp->loanedBlocks;
+       loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
        if (wantreserve) {
                if (freeblks > rsrvblks)
                        freeblks -= rsrvblks;
@@ -2119,20 +2208,6 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
                freeblks = MIN(freeblks, vfreeblks);
 #endif /* HFS_SPARSE_DEV */
 
-       if (hfsmp->hfs_flags & HFS_CS) {
-               uint64_t cs_free_bytes;
-               uint64_t cs_free_blks;
-               if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
-                   (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
-                       cs_free_blks = cs_free_bytes / hfsmp->blockSize;
-                       if (cs_free_blks > loanblks)
-                               cs_free_blks -= loanblks;
-                       else
-                               cs_free_blks = 0;
-                       freeblks = MIN(cs_free_blks, freeblks);
-               }
-       }
-
        return (freeblks);
 }
 
@@ -3051,7 +3126,7 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
         * Add the existing size of the Extents Overflow B-tree.
         * (It rarely grows, so don't bother reserving additional room for it.)
         */
-       zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+       zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
        
        /*
         * If there is an Attributes B-tree, leave room for 11 clumps worth.
@@ -3166,7 +3241,11 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
        filesize += temp / 3;
        hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
 
-       hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
+       } else {
+               hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+       }
 
        /* Convert to allocation blocks. */
        blk = zonesize / vcb->blockSize;
@@ -3186,11 +3265,12 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
                hfsmp->hfs_hotfile_end = 0;
                hfsmp->hfs_hotfile_freeblks = 0;
        }
-#if 0
-       printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
-       printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
-       printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
+#if DEBUG
+       printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+       printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+       printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
 #endif
+
        hfsmp->hfs_flags |= HFS_METADATA_ZONE;
 }
 
@@ -3202,19 +3282,33 @@ hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
        int  lockflags;
        int  freeblocks;
 
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               //
+               // This is only used at initialization time and on an ssd
+               // we'll get the real info from the hotfile btree user
+               // info
+               //
+               return 0;
+       }
+
        lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
        freeblocks = MetaZoneFreeBlocks(vcb);
        hfs_systemfile_unlock(hfsmp, lockflags);
 
        /* Minus Extents overflow file reserve. */
-       freeblocks -=
-               hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
+       if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
+               freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
+       }
+
        /* Minus catalog file reserve. */
-       freeblocks -=
-               hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
+       if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
+               freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
+       }
+       
        if (freeblocks < 0)
                freeblocks = 0;
 
+       // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
        return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
 }
 
@@ -3347,21 +3441,46 @@ hfs_start_transaction(struct hfsmount *hfsmp)
        }
 #endif /* HFS_CHECK_LOCK_ORDER */
 
-       if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
-               /* 
-                * The global lock should be held shared if journal is 
-                * active to prevent disabling.  If we're not the owner 
-                * of the journal lock, verify that we're not already
-                * holding the global lock exclusive before moving on.   
-                */
-               if (hfsmp->hfs_global_lockowner == thread) {
-                       ret = EBUSY;
-                       goto out;
+again:
+
+       if (hfsmp->jnl) {
+               if (journal_owner(hfsmp->jnl) != thread) {
+                       /*
+                        * The global lock should be held shared if journal is 
+                        * active to prevent disabling.  If we're not the owner 
+                        * of the journal lock, verify that we're not already
+                        * holding the global lock exclusive before moving on.   
+                        */
+                       if (hfsmp->hfs_global_lockowner == thread) {
+                               ret = EBUSY;
+                               goto out;
+                       }
+
+                       hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+                       // Things could have changed
+                       if (!hfsmp->jnl) {
+                               hfs_unlock_global(hfsmp);
+                               goto again;
+                       }
+
+                       OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+                       unlock_on_err = 1;
                }
+       } else {
+               // No journal
+               if (hfsmp->hfs_global_lockowner != thread) {
+                       hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
+
+                       // Things could have changed
+                       if (hfsmp->jnl) {
+                               hfs_unlock_global(hfsmp);
+                               goto again;
+                       }
 
-               hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
-               OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
-               unlock_on_err = 1;
+                       OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+                       unlock_on_err = 1;
+               }
        }
 
        /* If a downgrade to read-only mount is in progress, no other
@@ -3376,13 +3495,13 @@ hfs_start_transaction(struct hfsmount *hfsmp)
 
        if (hfsmp->jnl) {
                ret = journal_start_transaction(hfsmp->jnl);
-               if (ret == 0) {
-                       OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
-               }
        } else {
                ret = 0;
        }
 
+       if (ret == 0)
+               ++hfsmp->hfs_transaction_nesting;
+
 out:
        if (ret != 0 && unlock_on_err) {
                hfs_unlock_global (hfsmp);
@@ -3395,12 +3514,15 @@ out:
 int
 hfs_end_transaction(struct hfsmount *hfsmp)
 {
-    int need_unlock=0, ret;
+    int ret;
+
+       assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
+       assert(hfsmp->hfs_transaction_nesting > 0);
 
-    if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
-           && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
-           need_unlock = 1;
-    } 
+       if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
+               hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
+
+       bool need_unlock = !--hfsmp->hfs_transaction_nesting;
 
        if (hfsmp->jnl) {
                ret = journal_end_transaction(hfsmp->jnl);
@@ -3440,49 +3562,105 @@ hfs_journal_unlock(struct hfsmount *hfsmp)
        hfs_unlock_global (hfsmp);
 }
 
-/* 
- * Flush the contents of the journal to the disk. 
+/*
+ * Flush the contents of the journal to the disk.
  *
- *  Input: 
- *     wait_for_IO - 
- *     If TRUE, wait to write in-memory journal to the disk 
- *     consistently, and also wait to write all asynchronous 
- *     metadata blocks to its corresponding locations
- *     consistently on the disk.  This means that the journal 
- *     is empty at this point and does not contain any 
- *     transactions.  This is overkill in normal scenarios  
- *     but is useful whenever the metadata blocks are required 
- *     to be consistent on-disk instead of just the journal 
- *     being consistent; like before live verification 
- *     and live volume resizing.  
+ *  - HFS_FLUSH_JOURNAL
+ *      Wait to write in-memory journal to the disk consistently.
+ *      This means that the journal still contains uncommitted
+ *      transactions and the file system metadata blocks in
+ *      the journal transactions might be written asynchronously
+ *      to the disk.  But there is no guarantee that they are
+ *      written to the disk before returning to the caller.
+ *      Note that this option is sufficient for file system
+ *      data integrity as it guarantees consistent journal
+ *      content on the disk.
+ *
+ *  - HFS_FLUSH_JOURNAL_META
+ *      Wait to write in-memory journal to the disk
+ *      consistently, and also wait to write all asynchronous
+ *      metadata blocks to its corresponding locations
+ *      consistently on the disk. This is overkill in normal
+ *      scenarios but is useful whenever the metadata blocks
+ *      are required to be consistent on-disk instead of
+ *      just the journalbeing consistent; like before live
+ *      verification and live volume resizing.  The update of the
+ *      metadata doesn't include a barrier of track cache flush.
+ *
+ *  - HFS_FLUSH_FULL
+ *      HFS_FLUSH_JOURNAL + force a track cache flush to media
+ *
+ *  - HFS_FLUSH_CACHE
+ *      Force a track cache flush to media.
+ *
+ *  - HFS_FLUSH_BARRIER
+ *      Barrier-only flush to ensure write order
  *
- *     If FALSE, only wait to write in-memory journal to the 
- *     disk consistently.  This means that the journal still 
- *     contains uncommitted transactions and the file system 
- *     metadata blocks in the journal transactions might be 
- *     written asynchronously to the disk.  But there is no 
- *     guarantee that they are written to the disk before 
- *     returning to the caller.  Note that this option is 
- *     sufficient for file system data integrity as it 
- *     guarantees consistent journal content on the disk.
  */
-int
-hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
+errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
 {
-       int ret;
+       errno_t error = 0;
+       journal_flush_options_t options = 0;
+       dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
 
-       /* Only peek at hfsmp->jnl while holding the global lock */
-       hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
-       if (hfsmp->jnl) {
-               ret = journal_flush(hfsmp->jnl, wait_for_IO);
-       } else {
-               ret = 0;
+       switch (mode) {
+               case HFS_FLUSH_JOURNAL_META:
+                       // wait for journal, metadata blocks and previous async flush to finish
+                       SET(options, JOURNAL_WAIT_FOR_IO);
+
+                       // no break
+
+               case HFS_FLUSH_JOURNAL:
+               case HFS_FLUSH_JOURNAL_BARRIER:
+               case HFS_FLUSH_FULL:
+
+                       if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
+                           !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+                               mode = HFS_FLUSH_FULL;
+
+                       if (mode == HFS_FLUSH_FULL)
+                               SET(options, JOURNAL_FLUSH_FULL);
+
+                       /* Only peek at hfsmp->jnl while holding the global lock */
+                       hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+                       if (hfsmp->jnl)
+                               error = journal_flush(hfsmp->jnl, options);
+
+                       hfs_unlock_global (hfsmp);
+
+                       /*
+                        * This may result in a double barrier as
+                        * journal_flush may have issued a barrier itself
+                        */
+                       if (mode == HFS_FLUSH_JOURNAL_BARRIER)
+                               error = VNOP_IOCTL(hfsmp->hfs_devvp,
+                                   DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+                                   FWRITE, vfs_context_kernel());
+
+                       break;
+
+               case HFS_FLUSH_CACHE:
+                       // Do a full sync
+                       sync_req.options = 0;
+
+                       // no break
+
+               case HFS_FLUSH_BARRIER:
+                       // If barrier only flush doesn't support, fall back to use full flush.
+                       if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+                               sync_req.options = 0;
+
+                       error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+                                          FWRITE, vfs_context_kernel());
+                       break;
+
+               default:
+                       error = EINVAL;
        }
-       hfs_unlock_global (hfsmp);
-       
-       return ret;
-}
 
+       return error;
+}
 
 /*
  * hfs_erase_unused_nodes
@@ -3679,8 +3857,8 @@ hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
        *docid = extinfo->document_id++;
 
        // mark the root cnode dirty
-       cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
-       (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+       cp->c_flag |= C_MODIFIED;
+       hfs_update(cp->c_vp, 0);
 
        hfs_systemfile_unlock (hfsmp, lockflags);
        (void) hfs_end_transaction(hfsmp);
@@ -3799,7 +3977,7 @@ int hfs_freeze(struct hfsmount *hfsmp)
           might have the global lock at the moment and also so we
           can flush the journal. */
        hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
-       journal_flush(hfsmp->jnl, TRUE);
+       journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
        hfs_unlock_global(hfsmp);
 
        // don't need to iterate on all vnodes, we just need to
index 0c327a7922f7f2e11e9695514b28590a47a43a46..dac4b088f814fb1ae4b13f377c2df5bfcd647994 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -48,7 +48,6 @@
 #include <sys/kauth.h>
 #include <sys/uio_internal.h>
 #include <sys/fsctl.h>
-#include <sys/cprotect.h>
 #include <sys/xattr.h>
 #include <string.h>
 #include <sys/fsevents.h>
@@ -71,6 +70,8 @@
 #include "hfs_quota.h"
 #include "hfs_endian.h"
 #include "hfs_kdebug.h"
+#include "hfs_cprotect.h"
+
 
 #include "hfscommon/headers/BTreesInternal.h"
 #include "hfscommon/headers/FileMgrInternal.h"
@@ -107,10 +108,6 @@ static int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp,
                                                 hfs_move_data_options_t options);
 static int hfs_move_fork(filefork_t *srcfork, cnode_t *src, 
                                                 filefork_t *dstfork, cnode_t *dst);
-static const struct cat_fork *
-hfs_prepare_fork_for_update(const filefork_t *pfork,
-                                                       struct cat_fork *fork_buf,
-                                                       uint32_t block_size);
 
 #if HFS_COMPRESSION
 static int hfs_move_compressed(cnode_t *from_vp, cnode_t *to_vp);
@@ -501,7 +498,7 @@ clear_tombstone_docid(struct  doc_tombstone *ut, __unused struct hfsmount *hfsmp
                // printf("clearing doc-id from ino %d\n", ocp->c_desc.cd_cnid);
                ofip->document_id = 0;
                ocp->c_bsdflags &= ~UF_TRACKED;
-               ocp->c_flag |= C_MODIFIED | C_FORCEUPDATE;   // mark it dirty
+               ocp->c_flag |= C_MODIFIED;
                /* cat_update(hfsmp, &ocp->c_desc, &ocp->c_attr, NULL, NULL); */
 
        }
@@ -693,6 +690,38 @@ hfs_vnop_open(struct vnop_open_args *ap)
        if (hfs_is_journal_file(hfsmp, cp))
                return (EPERM);
 
+       bool have_lock = false;
+
+#if CONFIG_PROTECT
+       if (ISSET(ap->a_mode, FENCRYPTED) && cp->c_cpentry && vnode_isreg(vp)) {
+               bool have_trunc_lock = false;
+
+
+               if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+                       if (have_trunc_lock)
+                               hfs_unlock_truncate(cp, 0);
+                       return error;
+               }
+
+               have_lock = true;
+
+               if (cp->c_cpentry->cp_raw_open_count + 1
+                       < cp->c_cpentry->cp_raw_open_count) {
+                       // Overflow; too many raw opens on this file
+                       hfs_unlock(cp);
+                       if (have_trunc_lock)
+                               hfs_unlock_truncate(cp, 0);
+                       return ENFILE;
+               }
+
+
+               if (have_trunc_lock)
+                       hfs_unlock_truncate(cp, 0);
+
+               ++cp->c_cpentry->cp_raw_open_count;
+       }
+#endif
+
        if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
            (hfsmp->jnl == NULL) ||
 #if NAMEDSTREAMS
@@ -700,10 +729,16 @@ hfs_vnop_open(struct vnop_open_args *ap)
 #else
            !vnode_isreg(vp) || vnode_isinuse(vp, 0)) {
 #endif
+
+#if CONFIG_PROTECT
+               if (have_lock)
+                       hfs_unlock(cp);
+#endif
+
                return (0);
        }
 
-       if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+       if (!have_lock && (error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
                return (error);
 
 #if QUOTA
@@ -778,6 +813,13 @@ hfs_vnop_close(ap)
        cp = VTOC(vp);
        hfsmp = VTOHFS(vp);
 
+#if CONFIG_PROTECT
+       if (cp->c_cpentry && ISSET(ap->a_fflag, FENCRYPTED) && vnode_isreg(vp)) {
+               assert(cp->c_cpentry->cp_raw_open_count > 0);
+               --cp->c_cpentry->cp_raw_open_count;
+       }
+#endif
+
        /* 
         * If the rsrc fork is a named stream, it can cause the data fork to
         * stay around, preventing de-allocation of these blocks. 
@@ -1008,10 +1050,13 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
                        return (error);
                hfs_touchtimes(hfsmp, cp);
-       }
-       else {
-               if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT)))
-                       return (error);
+
+               // downgrade to a shared lock since that's all we need from here on out
+               cp->c_lockowner = HFS_SHARED_OWNER;
+               lck_rw_lock_exclusive_to_shared(&cp->c_rwlock);
+
+       } else if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+               return (error);
        }
 
        if (v_type == VDIR) {
@@ -1172,19 +1217,9 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                        VATTR_SET_SUPPORTED(vap, va_acl);
                }
        }
-       if (VATTR_IS_ACTIVE(vap, va_access_time)) {
-               /* Access times are lazily updated, get current time if needed */
-               if (cp->c_touch_acctime) {
-                       struct timeval tv;
-       
-                       microtime(&tv);
-                       vap->va_access_time.tv_sec = tv.tv_sec;
-               } else {
-                       vap->va_access_time.tv_sec = cp->c_atime;
-               }
-               vap->va_access_time.tv_nsec = 0;
-               VATTR_SET_SUPPORTED(vap, va_access_time);
-       }
+
+       vap->va_access_time.tv_sec = cp->c_atime;
+       vap->va_access_time.tv_nsec = 0;
        vap->va_create_time.tv_sec = cp->c_itime;
        vap->va_create_time.tv_nsec = 0;
        vap->va_modify_time.tv_sec = cp->c_mtime;
@@ -1233,7 +1268,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
         */
        if (cp->c_flag & C_HARDLINK) {
                vap->va_linkid = (u_int64_t)hfs_currentcnid(cp);
-               vap->va_parentid = (u_int64_t)hfs_currentparent(cp);
+               vap->va_parentid = (u_int64_t)hfs_currentparent(cp, /* have_lock: */ true);
        } else {
                vap->va_linkid = (u_int64_t)cp->c_cnid;
                vap->va_parentid = (u_int64_t)cp->c_parentcnid;
@@ -1256,7 +1291,6 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                        }
                } else
                        vap->va_data_size = data_size;
-//             vap->va_supported |= VNODE_ATTR_va_data_size;
                VATTR_SET_SUPPORTED(vap, va_data_size);
        }
 #else
@@ -1266,7 +1300,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
 
 #if CONFIG_PROTECT
        if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) {
-               vap->va_dataprotect_class = cp->c_cpentry ? cp->c_cpentry->cp_pclass : 0;
+               vap->va_dataprotect_class = cp->c_cpentry ? CP_CLASS(cp->c_cpentry->cp_pclass) : 0;
                VATTR_SET_SUPPORTED(vap, va_dataprotect_class);
        }       
 #endif
@@ -1288,7 +1322,8 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
        }
 
        /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */
-       vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time |
+       vap->va_supported |= VNODE_ATTR_va_access_time |
+                                                VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time |
                             VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time |
                             VNODE_ATTR_va_iosize | VNODE_ATTR_va_uid |
                             VNODE_ATTR_va_gid | VNODE_ATTR_va_mode |
@@ -1421,8 +1456,15 @@ hfs_vnop_setattr(ap)
        }
 
 #if CONFIG_PROTECT
-       if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
-               return (error);
+       /*
+        * All metadata changes should be allowed except a size-changing setattr, which
+        * has effects on file content and requires calling into cp_handle_vnop
+        * to have content protection check.
+        */
+       if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+               if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+                       return (error);
+               }
        }
 #endif /* CONFIG_PROTECT */
 
@@ -1612,6 +1654,7 @@ hfs_vnop_setattr(ap)
                }
 
                cp->c_bsdflags = vap->va_flags;
+               cp->c_flag |= C_MODIFIED;
                cp->c_touch_chgtime = TRUE;
 
                
@@ -1662,13 +1705,14 @@ hfs_vnop_setattr(ap)
                         */
                        if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) &&
                            (cp->c_cnid != kHFSRootFolderID) &&
+                               !VATTR_IS_ACTIVE(vap, va_create_time) &&
                            (cp->c_mtime < cp->c_itime)) {
                                cp->c_itime = cp->c_mtime;
                        }
                }
                if (VATTR_IS_ACTIVE(vap, va_backup_time))
                        cp->c_btime = vap->va_backup_time.tv_sec;
-               cp->c_flag |= C_MODIFIED;
+               cp->c_flag |= C_MINOR_MOD;
        }
        
        /*
@@ -1677,10 +1721,11 @@ hfs_vnop_setattr(ap)
        VATTR_SET_SUPPORTED(vap, va_encoding);
        if (VATTR_IS_ACTIVE(vap, va_encoding)) {
                cp->c_encoding = vap->va_encoding;
+               cp->c_flag |= C_MODIFIED;
                hfs_setencodingbits(hfsmp, cp->c_encoding);
        }
 
-       if ((error = hfs_update(vp, TRUE)) != 0)
+       if ((error = hfs_update(vp, 0)) != 0)
                goto out;
 out:
        if (cp) {
@@ -1741,8 +1786,12 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc
                return (0);
        };
 #endif
-       cp->c_mode &= ~ALLPERMS;
-       cp->c_mode |= (mode & ALLPERMS);
+
+       mode_t new_mode = (cp->c_mode & ~ALLPERMS) | (mode & ALLPERMS);
+       if (new_mode != cp->c_mode) {
+               cp->c_mode = new_mode;
+               cp->c_flag |= C_MINOR_MOD;
+       }
        cp->c_touch_chgtime = TRUE;
        return (0);
 }
@@ -1841,6 +1890,13 @@ hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
 
        ogid = cp->c_gid;
        ouid = cp->c_uid;
+
+       if (ouid == uid && ogid == gid) {
+               // No change, just set change time
+               cp->c_touch_chgtime = TRUE;
+               return 0;
+       }
+
 #if QUOTA
        if ((error = hfs_getinoquota(cp)))
                return (error);
@@ -1912,6 +1968,11 @@ good:
                panic("hfs_chown: lost quota");
 #endif /* QUOTA */
 
+       /*
+        * Without quotas, we could probably make this a minor
+        * modification.
+        */
+       cp->c_flag |= C_MODIFIED;
 
        /*
          According to the SUSv3 Standard, chown() shall mark
@@ -2342,7 +2403,7 @@ hfs_vnop_exchange(ap)
         * 2) Drop the special bits from the current flags
         * 3) swap the special flag bits to their destination
         */      
-       from_cp->c_flag |= to_flag_special;
+       from_cp->c_flag |= to_flag_special | C_MODIFIED;
        from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags;
        bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32);
 
@@ -2371,7 +2432,7 @@ hfs_vnop_exchange(ap)
         * Only OR in the "from" flags into our cnode flags below. 
         * Leave the rest of the flags alone.
         */
-       to_cp->c_flag |= from_flag_special;
+       to_cp->c_flag |= from_flag_special | C_MODIFIED;
 
        to_cp->c_attr.ca_recflags = tempattr.ca_recflags;
        bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32);
@@ -2403,15 +2464,15 @@ exit:
            hfs_end_transaction(hfsmp);
        }
 
+       if (have_cnode_locks)
+               hfs_unlockpair(from_cp, to_cp);
+
        if (have_from_trunc_lock)
                hfs_unlock_truncate(from_cp, 0);
 
        if (have_to_trunc_lock)
                hfs_unlock_truncate(to_cp, 0);
 
-       if (have_cnode_locks)
-               hfs_unlockpair(from_cp, to_cp);
-
        return (error);
 }
 
@@ -2738,10 +2799,10 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp,
        struct cat_fork dfork_buf;
        const struct cat_fork *dfork, *rfork;
 
-       dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, &dfork_buf,
-                                                                               hfsmp->blockSize);
-       rfork = hfs_prepare_fork_for_update(from_rfork, &rfork_buf.ff_data,
-                                                                               hfsmp->blockSize);
+       dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, NULL,
+                                                                               &dfork_buf, hfsmp->blockSize);
+       rfork = hfs_prepare_fork_for_update(from_rfork, NULL,
+                                                                               &rfork_buf.ff_data, hfsmp->blockSize);
 
        // Update the catalog nodes, to_cp first
        if ((error = cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, 
@@ -2749,7 +2810,7 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp,
                goto exit;
        }
 
-       CLR(to_cp->c_flag, C_MODIFIED);
+       CLR(to_cp->c_flag, C_MODIFIED | C_MINOR_MOD);
 
        // Update in-memory resource fork data here
        if (from_rfork) {
@@ -2778,9 +2839,9 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp,
        }
 
        // Currently unnecessary, but might be useful in future...
-       dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, &dfork_buf,
+       dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, NULL, &dfork_buf,
                                                                                hfsmp->blockSize);
-       rfork = hfs_prepare_fork_for_update(from_rfork, &rfork_buf.ff_data,
+       rfork = hfs_prepare_fork_for_update(from_rfork, NULL, &rfork_buf.ff_data,
                                                                                hfsmp->blockSize);
 
        // Update from_cp
@@ -2789,7 +2850,7 @@ int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp,
                goto exit;
        }
 
-       CLR(from_cp->c_flag, C_MODIFIED);
+       CLR(from_cp->c_flag, C_MODIFIED | C_MINOR_MOD);
 
 exit:
        if (lockflags) {
@@ -2820,8 +2881,9 @@ static int hfs_move_fork(filefork_t *srcfork, cnode_t *src_cp,
                                                 filefork_t *dstfork, cnode_t *dst_cp) 
 {
        // Move the invalid ranges
-       dstfork->ff_invalidranges = srcfork->ff_invalidranges;
-       rl_init(&srcfork->ff_invalidranges);
+       TAILQ_SWAP(&dstfork->ff_invalidranges, &srcfork->ff_invalidranges,
+                          rl_entry, rl_link);
+       rl_remove_all(&srcfork->ff_invalidranges);
 
        // Move the fork data (copy whole structure)
        dstfork->ff_data = srcfork->ff_data;
@@ -2868,19 +2930,19 @@ static void hfs_fsync_panic_hook(panic_hook_t *hook_)
  *  cnode must be locked
  */
 int
-hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
+hfs_fsync(struct vnode *vp, int waitfor, hfs_fsync_mode_t fsyncmode, struct proc *p)
 {
        struct cnode *cp = VTOC(vp);
        struct filefork *fp = NULL;
        int retval = 0;
        struct hfsmount *hfsmp = VTOHFS(vp);
-       struct rl_entry *invalid_range;
        struct timeval tv;
        int waitdata;           /* attributes necessary for data retrieval */
        int wait;               /* all other attributes (e.g. atime, etc.) */
        int lockflag;
        int took_trunc_lock = 0;
        int locked_buffers = 0;
+       int fsync_default = 1;
 
        /*
         * Applications which only care about data integrity rather than full
@@ -2889,8 +2951,11 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
         */
        wait = (waitfor == MNT_WAIT);
        waitdata = (waitfor == MNT_DWAIT) | wait;
+
        if (always_do_fullfsync)
-               fullsync = 1;
+               fsyncmode = HFS_FSYNC_FULL;
+       if (fsyncmode != HFS_FSYNC)
+               fsync_default = 0;
        
        /* HFS directories don't have any data blocks. */
        if (vnode_isdir(vp))
@@ -2944,7 +3009,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
                cp->c_zftimeout != 0))) {
 
                microuptime(&tv);
-               if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && !fullsync && tv.tv_sec < (long)cp->c_zftimeout) {
+               if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && fsync_default && tv.tv_sec < (long)cp->c_zftimeout) {
                        /* Remember that a force sync was requested. */
                        cp->c_flag |= C_ZFWANTSYNC;
                        goto datasync;
@@ -2959,30 +3024,11 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
                                hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                                took_trunc_lock = 1;
                        }
-                       while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
-                               off_t start = invalid_range->rl_start;
-                               off_t end = invalid_range->rl_end;
-               
-                               /* The range about to be written must be validated
-                                * first, so that VNOP_BLOCKMAP() will return the
-                                * appropriate mapping for the cluster code:
-                                */
-                               rl_remove(start, end, &fp->ff_invalidranges);
-
-                               /* Don't hold cnode lock when calling into cluster layer. */
-                               hfs_unlock(cp);
-                               (void) cluster_write(vp, (struct uio *) 0,
-                                                    fp->ff_size, end + 1, start, (off_t)0,
-                                                    IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
-                               hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
-                               cp->c_flag |= C_MODIFIED;
-                       }
+                       hfs_flush_invalid_ranges(vp);
                        hfs_unlock(cp);
                        (void) cluster_push(vp, waitdata ? IO_SYNC : 0);
                        hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
                }
-               cp->c_flag &= ~C_ZFWANTSYNC;
-               cp->c_zftimeout = 0;
        }
 datasync:
        if (took_trunc_lock) {
@@ -3029,7 +3075,7 @@ datasync:
                 * if the vnode is in the middle of a recycle (VL_TERMINATE or VL_DEAD is set).
                 */
                if (vnode_isrecycled(vp)) {
-                       fullsync = 1;
+                       fsync_default = 0;
                }
        }
 
@@ -3043,7 +3089,7 @@ metasync:
                cp->c_touch_chgtime = FALSE;
                cp->c_touch_modtime = FALSE;
        } else if ( !(vp->v_flag & VSWAP) ) /* User file */ {
-               retval = hfs_update(vp, wait);
+               retval = hfs_update(vp, HFS_UPDATE_FORCE);
 
                /*
                 * When MNT_WAIT is requested push out the catalog record for
@@ -3051,7 +3097,7 @@ metasync:
                 * because the journal_flush or hfs_metasync_all will push out
                 * all of the metadata changes.
                 */
-               if ((retval == 0) && wait && !fullsync && cp->c_hint &&
+               if ((retval == 0) && wait && fsync_default && cp->c_hint &&
                    !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) {
                        hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p);
                }
@@ -3060,27 +3106,24 @@ metasync:
                 * If this was a full fsync, make sure all metadata
                 * changes get to stable storage.
                 */
-               if (fullsync) {
+               if (!fsync_default) {
                        if (hfsmp->jnl) {
-                               hfs_journal_flush(hfsmp, FALSE);
-                       
-                               if (journal_uses_fua(hfsmp->jnl)) {
-                                       /*
-                                        * the journal_flush did NOT issue a sync track cache command,
-                                        * and the fullsync indicates we are supposed to flush all cached
-                                        * data to the media, so issue the sync track cache command
-                                        * explicitly
-                                        */
-                                       VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
-                               }
+                               if (fsyncmode == HFS_FSYNC_FULL)
+                                       hfs_flush(hfsmp, HFS_FLUSH_FULL);
+                               else
+                                       hfs_flush(hfsmp,
+                                           HFS_FLUSH_JOURNAL_BARRIER);
                        } else {
                                retval = hfs_metasync_all(hfsmp);
                                /* XXX need to pass context! */
-                               VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+                               hfs_flush(hfsmp, HFS_FLUSH_CACHE);
                        }
                }
        }
 
+       if (!hfs_is_dirty(cp) && !ISSET(cp->c_flag, C_DELETED))
+               vnode_cleardirty(vp);
+
        return (retval);
 }
 
@@ -3459,9 +3502,9 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 
                dcp->c_touch_chgtime = TRUE;
                dcp->c_touch_modtime = TRUE;
-               hfs_touchtimes(hfsmp, cp);
-               (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
-               cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE);
+               dcp->c_flag |= C_MODIFIED;
+
+               hfs_update(dcp->c_vp, 0);
        }
 
        hfs_systemfile_unlock(hfsmp, lockflags);
@@ -3634,6 +3677,8 @@ relock:
         * more work.
         */
        if (error == 0) {
+               hfs_hotfile_deleted(vp);
+               
                if (rvp) {
                recycle_rsrc = 1;
                }
@@ -3758,23 +3803,6 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                return (EPERM);
        }
 
-       /*
-        * If removing a symlink, then we need to ensure that the
-        * data blocks for the symlink are not still in-flight or pending.  
-        * If so, we will unlink the symlink here, making its blocks 
-        * available for re-allocation by a subsequent transaction.  That is OK, but
-        * then the I/O for the data blocks could then go out before the journal 
-        * transaction that created it was flushed, leading to I/O ordering issues.
-        */
-       if (vp->v_type == VLNK) {       
-               /* 
-                * This will block if the asynchronous journal flush is in progress.
-                * If this symlink is not being renamed over and doesn't have any open FDs,
-                * then we'll remove it from the journal's bufs below in kill_block.
-                */
-               buf_wait_for_shadow_io (vp, 0);
-       }
-
        /*
         * Hard links require special handling.
         */
@@ -4199,7 +4227,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
                cp->c_flag |= C_NOEXISTS;
                cp->c_flag &= ~C_DELETED;
                
-               cp->c_touch_chgtime = TRUE;   /* XXX needed ? */
+               cp->c_touch_chgtime = TRUE;
                --cp->c_linkcount;
                
                /* 
@@ -5155,15 +5183,22 @@ skip_rm:
                }
                tdcp->c_entries++;
                tdcp->c_dirchangecnt++;
+               tdcp->c_flag |= C_MODIFIED;
                hfs_incr_gencount(tdcp);
 
                if (fdcp->c_entries > 0)
                        fdcp->c_entries--;
                fdcp->c_dirchangecnt++;
+               fdcp->c_flag |= C_MODIFIED;
                fdcp->c_touch_chgtime = TRUE;
                fdcp->c_touch_modtime = TRUE;
 
-               fdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
+               if (ISSET(fcp->c_flag, C_HARDLINK)) {
+                       hfs_relorigin(fcp, fdcp->c_fileid);
+                       if (fdcp->c_fileid != fdcp->c_cnid)
+                               hfs_relorigin(fcp, fdcp->c_cnid);
+               }
+
                (void) hfs_update(fdvp, 0);
        }
        hfs_incr_gencount(fdcp);
@@ -5172,7 +5207,6 @@ skip_rm:
        tdcp->c_touch_chgtime = TRUE;
        tdcp->c_touch_modtime = TRUE;
 
-       tdcp->c_flag |= C_FORCEUPDATE;  // XXXdbg - force it out!
        (void) hfs_update(tdvp, 0);
 
        /* Update the vnode's name now that the rename has completed. */
@@ -5686,7 +5720,7 @@ hfs_vnop_readdir(ap)
                /* disk corruption */
                cp->c_entries++;
                /* Mark the cnode as dirty. */
-               cp->c_flag |= (C_MODIFIED | C_FORCEUPDATE);
+               cp->c_flag |= C_MODIFIED;
                printf("hfs_vnop_readdir: repairing valence to non-zero! \n");
                bump_valence++;
        }
@@ -5911,48 +5945,50 @@ hfs_vnop_pathconf(ap)
 /*
  * Prepares a fork for cat_update by making sure ff_size and ff_blocks
  * are no bigger than the valid data on disk thus reducing the chance
- * of exposing unitialised data in the event of a non clean unmount.
+ * of exposing uninitialised data in the event of a non clean unmount.
  * fork_buf is where to put the temporary copy if required.  (It can
  * be inside pfork.)
  */
-static const struct cat_fork *
-hfs_prepare_fork_for_update(const filefork_t *pfork,
-                                                       struct cat_fork *fork_buf,
+const struct cat_fork *
+hfs_prepare_fork_for_update(filefork_t *ff,
+                                                       const struct cat_fork *cf,
+                                                       struct cat_fork *cf_buf,
                                                        uint32_t block_size)
 {
-       if (!pfork)
+       if (!ff)
                return NULL;
 
-       off_t max_size = pfork->ff_size;
+       if (!cf)
+               cf = &ff->ff_data;
+       if (!cf_buf)
+               cf_buf = &ff->ff_data;
+
+       off_t max_size = ff->ff_size;
 
        // Check first invalid range
-       if (!TAILQ_EMPTY(&pfork->ff_invalidranges))
-               max_size = TAILQ_FIRST(&pfork->ff_invalidranges)->rl_start;
+       if (!TAILQ_EMPTY(&ff->ff_invalidranges))
+               max_size = TAILQ_FIRST(&ff->ff_invalidranges)->rl_start;
 
-       if (!pfork->ff_unallocblocks && pfork->ff_size <= max_size)
-               return &pfork->ff_data; // Nothing to do
+       if (!ff->ff_unallocblocks && ff->ff_size <= max_size)
+               return cf; // Nothing to do
 
-       if (pfork->ff_blocks < pfork->ff_unallocblocks) {
+       if (ff->ff_blocks < ff->ff_unallocblocks) {
                panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
-                         pfork->ff_blocks, pfork->ff_unallocblocks);
+                         ff->ff_blocks, ff->ff_unallocblocks);
        }
 
-       struct cat_fork *out = fork_buf;
+       struct cat_fork *out = cf_buf;
 
-       if (out != &pfork->ff_data)
-               bcopy(&pfork->ff_data, out, sizeof(*out));
+       if (out != cf)
+               bcopy(cf, out, sizeof(*cf));
 
        // Adjust cf_blocks for cf_vblocks
        out->cf_blocks -= out->cf_vblocks;
 
        /*
-        * We have to trim the size with the updated cf_blocks.  You might
-        * think that this is unnecessary because the invalid ranges
-        * should catch this, but we update invalid ranges *before* we do
-        * I/O whereas cf_vblocks is updated later in hfs_vnop_blockmap.
-        * There might still be a chance that we will be exposing
-        * unitialised data because the metadata takes a different path to
-        * data but the window should be tiny (if it exists at all).
+        * Here we trim the size with the updated cf_blocks.  This is
+        * probably unnecessary now because the invalid ranges should
+        * catch this (but that wasn't always the case).
         */
        off_t alloc_bytes = hfs_blk_to_bytes(out->cf_blocks, block_size);
        if (out->cf_size > alloc_bytes)
@@ -5968,13 +6004,11 @@ hfs_prepare_fork_for_update(const filefork_t *pfork,
 /*
  * Update a cnode's on-disk metadata.
  *
- * If waitfor is set, then wait for the disk write of
- * the node to complete.
- *
- * The cnode must be locked exclusive
+ * The cnode must be locked exclusive.  See declaration for possible
+ * options.
  */
 int
-hfs_update(struct vnode *vp, __unused int waitfor)
+hfs_update(struct vnode *vp, int options)
 {
        struct cnode *cp = VTOC(vp);
        struct proc *p;
@@ -5987,6 +6021,9 @@ hfs_update(struct vnode *vp, __unused int waitfor)
        int error;
        uint32_t tstate = 0;
 
+       if (ISSET(cp->c_flag, C_NOEXISTS))
+               return 0;
+
        p = current_proc();
        hfsmp = VTOHFS(vp);
 
@@ -5995,14 +6032,14 @@ hfs_update(struct vnode *vp, __unused int waitfor)
                return (0);
        }
        if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) {
-               cp->c_flag &= ~C_MODIFIED;
+               CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED);
                cp->c_touch_acctime = 0;
                cp->c_touch_chgtime = 0;
                cp->c_touch_modtime = 0;
                return (0);
        }
        if (kdebug_enable) {
-               if (cp->c_touch_acctime)
+               if (cp->c_touch_acctime || cp->c_atime != cp->c_attr.ca_atimeondisk)
                        tstate |= DBG_HFS_UPDATE_ACCTIME;
                if (cp->c_touch_modtime)
                        tstate |= DBG_HFS_UPDATE_MODTIME;
@@ -6011,58 +6048,65 @@ hfs_update(struct vnode *vp, __unused int waitfor)
 
                if (cp->c_flag & C_MODIFIED)
                        tstate |= DBG_HFS_UPDATE_MODIFIED;
-               if (cp->c_flag & C_FORCEUPDATE)
+               if (ISSET(options, HFS_UPDATE_FORCE))
                        tstate |= DBG_HFS_UPDATE_FORCE;
                if (cp->c_flag & C_NEEDS_DATEADDED)
                        tstate |= DBG_HFS_UPDATE_DATEADDED;
+               if (cp->c_flag & C_MINOR_MOD)
+                       tstate |= DBG_HFS_UPDATE_MINOR;
        }
        hfs_touchtimes(hfsmp, cp);
 
-       /* Nothing to update. */
-       if ((cp->c_flag & (C_MODIFIED | C_FORCEUPDATE)) == 0) {
-               return (0);
+       if (!ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD)
+               && !hfs_should_save_atime(cp)) {
+               // Nothing to update
+               return 0;
        }
-       
-       if (cp->c_datafork)
-               dataforkp = &cp->c_datafork->ff_data;
-       if (cp->c_rsrcfork)
-               rsrcforkp = &cp->c_rsrcfork->ff_data;
 
-       /*
-        * For delayed allocations updates are
-        * postponed until an fsync or the file
-        * gets written to disk.
-        *
-        * Deleted files can defer meta data updates until inactive.
-        *
-        * If we're ever called with the C_FORCEUPDATE flag though
-        * we have to do the update.
-        */
-       if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 &&
-           (ISSET(cp->c_flag, C_DELETED) || 
-           (dataforkp && cp->c_datafork->ff_unallocblocks) ||
-           (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) {
-       //      cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE);
-               cp->c_flag |= C_MODIFIED;
+       KDBG(HFSDBG_UPDATE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(vp), tstate);
 
-               return (0);
+       bool check_txn = false;
+
+       if (!ISSET(options, HFS_UPDATE_FORCE) && !ISSET(cp->c_flag, C_MODIFIED)) {
+               /*
+                * This must be a minor modification.  If the current
+                * transaction already has an update for this node, then we
+                * bundle in the modification.
+                */
+               if (hfsmp->jnl
+                       && journal_current_txn(hfsmp->jnl) == cp->c_update_txn) {
+                       check_txn = true;
+               } else {
+                       tstate |= DBG_HFS_UPDATE_SKIPPED;
+                       error = 0;
+                       goto exit;
+               }
        }
 
-       KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_START, VM_KERNEL_ADDRPERM(vp), tstate, 0, 0, 0);
+       if ((error = hfs_start_transaction(hfsmp)) != 0)
+               goto exit;
 
-       if ((error = hfs_start_transaction(hfsmp)) != 0) {
-           KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error, -1, 0);
-           return error;
+       if (check_txn
+               && journal_current_txn(hfsmp->jnl) != cp->c_update_txn) {
+               hfs_end_transaction(hfsmp);
+               tstate |= DBG_HFS_UPDATE_SKIPPED;
+               error = 0;
+               goto exit;
        }
 
-    /* 
+       if (cp->c_datafork)
+               dataforkp = &cp->c_datafork->ff_data;
+       if (cp->c_rsrcfork)
+               rsrcforkp = &cp->c_rsrcfork->ff_data;
+
+    /*
      * Modify the values passed to cat_update based on whether or not
      * the file has invalid ranges or borrowed blocks.
      */
-    dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, &datafork, hfsmp->blockSize);
-       rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, &rsrcfork, hfsmp->blockSize);
+    dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, NULL, &datafork, hfsmp->blockSize);
+       rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, NULL, &rsrcfork, hfsmp->blockSize);
 
-       if (kdebug_enable) {
+       if (__improbable(kdebug_enable & KDEBUG_TRACE)) {
                long dbg_parms[NUMPARMS];
                int  dbg_namelen;
 
@@ -6080,19 +6124,22 @@ hfs_update(struct vnode *vp, __unused int waitfor)
         */
        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
 
-       /* XXX - waitfor is not enforced */
        error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp);
 
+       if (hfsmp->jnl)
+               cp->c_update_txn = journal_current_txn(hfsmp->jnl);
+
        hfs_systemfile_unlock(hfsmp, lockflags);
 
-       /* After the updates are finished, clear the flags */
-       cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE);
+       CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD);
 
        hfs_end_transaction(hfsmp);
 
-       KERNEL_DEBUG_CONSTANT(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error, 0, 0);
+exit:
+
+       KDBG(HFSDBG_UPDATE | DBG_FUNC_END, VM_KERNEL_ADDRPERM(vp), tstate, error);
 
-       return (error);
+       return error;
 }
 
 /*
@@ -6337,9 +6384,10 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                dcp->c_dirchangecnt++;
                hfs_incr_gencount(dcp);
 
-               dcp->c_ctime = tv.tv_sec;
-               dcp->c_mtime = tv.tv_sec;
-               (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+               dcp->c_touch_chgtime = dcp->c_touch_modtime = true;
+               dcp->c_flag |= C_MODIFIED;
+
+               hfs_update(dcp->c_vp, 0);
 
 #if CONFIG_PROTECT
                /*
@@ -6378,7 +6426,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        hfs_systemfile_unlock(hfsmp, lockflags);
        if (error)
                goto exit;
-       
+
+       uint32_t txn = hfsmp->jnl ? journal_current_txn(hfsmp->jnl) : 0;
+
        /* Invalidate negative cache entries in the directory */
        if (dcp->c_flag & C_NEG_ENTRIES) {
                cache_purge_negatives(dvp);
@@ -6412,7 +6462,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
         */
 
        if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target))  {
-               cp_entry_destroy (entry);
+               cp_entry_destroy (hfsmp, entry);
                entry = NULL;
        }
 #endif
@@ -6444,20 +6494,21 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 
        cp = VTOC(tvp);
 
+       cp->c_update_txn = txn;
+
        struct  doc_tombstone *ut;
        ut = get_uthread_doc_tombstone();
        if (   ut->t_lastop_document_id != 0 
            && ut->t_lastop_parent == dvp
            && ut->t_lastop_parent_vid == vnode_vid(dvp)
-           && strcmp((char *)ut->t_lastop_filename, (char *)cp->c_desc.cd_nameptr) == 0) {
+           && strcmp((char *)ut->t_lastop_filename, (const char *)cp->c_desc.cd_nameptr) == 0) {
                struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
 
                //printf("CREATE: preserving doc-id %lld on %s\n", ut->t_lastop_document_id, ut->t_lastop_filename);
                fip->document_id = (uint32_t)(ut->t_lastop_document_id & 0xffffffff);
 
                cp->c_bsdflags |= UF_TRACKED;
-               // mark the cnode dirty
-               cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+               cp->c_flag |= C_MODIFIED;
 
                if ((error = hfs_start_transaction(hfsmp)) == 0) {
                        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
@@ -6484,6 +6535,36 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                }
        }
 
+       if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) && !vnode_isautocandidate(dvp))) {
+
+               //printf("hfs: flagging %s (fileid: %d) as VFASTDEVCANDIDATE (dvp name: %s)\n",
+               //       cnp->cn_nameptr ? cnp->cn_nameptr : "<NONAME>",
+               //       cp->c_fileid,
+               //       dvp->v_name ? dvp->v_name : "no-dir-name");
+
+               //
+               // On new files we set the FastDevCandidate flag so that
+               // any new blocks allocated to it will be pinned.
+               //
+               cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
+               vnode_setfastdevicecandidate(tvp);
+
+               //
+               // properly inherit auto-cached flags
+               //
+               if (vnode_isautocandidate(dvp)) {
+                       cp->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+                       vnode_setautocandidate(tvp);
+               }
+
+
+               //
+               // We also want to add it to the hotfile adoption list so
+               // that it will eventually land in the hotfile btree
+               //
+               (void) hfs_addhotfile(tvp);
+       }
+       
        *vpp = tvp;
 
 #if CONFIG_PROTECT
@@ -6507,7 +6588,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
                         * Upon success, the keys were generated and written out. 
                         * Update the cp pointer in the cnode.
                         */
-                       cp_replace_entry (cp, keyed_entry);
+                       cp_replace_entry (hfsmp, cp, keyed_entry);
                        if (nocache) {
                                cache_enter (dvp, tvp, cnp);
                        }
@@ -6578,7 +6659,7 @@ exit:
         * out the pointer if it was called already.
         */
        if (entry) {
-               cp_entry_destroy (entry);
+               cp_entry_destroy (hfsmp, entry);
                entry = NULL;
        }       
 #endif
@@ -6608,6 +6689,11 @@ exit:
  * hfs_vgetrsrc acquires a resource fork vnode corresponding to the
  * cnode that is found in 'vp'.  The cnode should be locked upon entry
  * and will be returned locked, but it may be dropped temporarily.
+ *
+ * If the resource fork vnode does not exist, HFS will attempt to acquire an
+ * empty (uninitialized) vnode from VFS so as to avoid deadlocks with
+ * jetsam. If we let the normal getnewvnode code produce the vnode for us
+ * we would be doing so while holding the cnode lock of our cnode.
  * 
  * On success, *rvpp wlll hold the resource fork vnode with an
  * iocount.  *Don't* forget the vnode_put.
@@ -6615,7 +6701,8 @@ exit:
 int
 hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp)
 {
-       struct vnode *rvp;
+       struct vnode *rvp = NULLVP;
+       struct vnode *empty_rvp = NULLVP;
        struct vnode *dvp = NULLVP;
        struct cnode *cp = VTOC(vp);
        int error;
@@ -6662,22 +6749,49 @@ restart:
                int lockflags;
                int newvnode_flags = 0;
 
-               /*
-                * Make sure cnode lock is exclusive, if not upgrade it.
+               /* 
+                * In this case, we don't currently see a resource fork vnode attached
+                * to this cnode.  In most cases, we were called from a read-only VNOP
+                * like getattr, so it should be safe to drop the cnode lock and then 
+                * re-acquire it.  
+                *
+                * Here, we drop the lock so that we can acquire an empty/husk
+                * vnode so that we don't deadlock against jetsam.  
                 *
-                * We assume that we were called from a read-only VNOP (getattr)
-                * and that its safe to have the cnode lock dropped and reacquired.
+                * It does not currently appear possible to hold the truncate lock via
+                * FS re-entrancy when we get to this point. (8/2014)
                 */
-               if (cp->c_lockowner != current_thread()) {
-                       /*
-                        * If the upgrade fails we lose the lock and
-                        * have to take the exclusive lock on our own.
-                        */
-                       if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE)
-                               lck_rw_lock_exclusive(&cp->c_rwlock);
-                       cp->c_lockowner = current_thread();
+               hfs_unlock (cp);
+
+               error = vnode_create_empty (&empty_rvp); 
+
+               hfs_lock_always (cp, HFS_EXCLUSIVE_LOCK);
+
+               if (error) {
+                       /* If acquiring the 'empty' vnode failed, then nothing to clean up */
+                       return error;
                }
 
+               /* 
+                * We could have raced with another thread here while we dropped our cnode
+                * lock.  See if the cnode now has a resource fork vnode and restart if appropriate.
+                *
+                * Note: We just released the cnode lock, so there is a possibility that the
+                * cnode that we just acquired has been deleted or even removed from disk
+                * completely, though this is unlikely. If the file is open-unlinked, the 
+                * check below will resolve it for us.  If it has been completely 
+                * removed (even from the catalog!), then when we examine the catalog 
+                * directly, below, while holding the catalog lock, we will not find the
+                * item and we can fail out properly.
+                */
+               if (cp->c_rsrc_vp) {
+                       /* Drop the empty vnode before restarting */
+                       vnode_put (empty_rvp);
+                       empty_rvp = NULL;
+                       rvp = NULL;
+                       goto restart;
+               }
+       
                /*
                 * hfs_vgetsrc may be invoked for a cnode that has already been marked
                 * C_DELETED.  This is because we need to continue to provide rsrc
@@ -6760,6 +6874,8 @@ restart:
 
                hfs_systemfile_unlock(hfsmp, lockflags);
                if (error) {
+                       /* Drop our 'empty' vnode ! */
+                       vnode_put (empty_rvp);
                        return (error);
                }
                /*
@@ -6781,20 +6897,33 @@ restart:
                        // Should never happen because cn.cn_nameptr won't ever be long...
                        if (cn.cn_namelen >= MAXPATHLEN) {
                                FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
+                               /* Drop our 'empty' vnode ! */
+                               vnode_put (empty_rvp);
                                return ENAMETOOLONG;
+                               
                        }
                }
                dvp = vnode_getparent(vp);
+               
+               /* 
+                * We are about to call hfs_getnewvnode and pass in the vnode that we acquired
+                * earlier when we were not holding any locks. The semantics of GNV_USE_VP require that
+                * either hfs_getnewvnode consume the vnode and vend it back to us, properly initialized,
+                * or it will consume/dispose of it properly if it errors out.
+                */ 
+               rvp = empty_rvp;
+               
                error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL,
-                                       descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr,
-                                       &rsrcfork, &rvp, &newvnode_flags);
+                                       descptr, (GNV_WANTRSRC | GNV_SKIPLOCK | GNV_USE_VP), 
+                                                               &cp->c_attr, &rsrcfork, &rvp, &newvnode_flags);
+                       
                if (dvp)
                        vnode_put(dvp);
                if (cn.cn_pnbuf)
                        FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
                if (error)
                        return (error);
-       }
+       }  /* End 'else' for rsrc fork not existing */
 
        *rvpp = rvp;
        return (0);
@@ -6941,17 +7070,17 @@ hfsfifo_close(ap)
 static u_int32_t 
 hfs_get_document_id_internal(const uint8_t *finderinfo, mode_t mode)
 {
-       u_int8_t *finfo = NULL;
+       const uint8_t *finfo = NULL;
        u_int32_t doc_id = 0;
        
        /* overlay the FinderInfo to the correct pointer, and advance */
-       finfo = ((uint8_t *)finderinfo) + 16;
+       finfo = finderinfo + 16;
 
        if (S_ISDIR(mode) || S_ISREG(mode)) {
-               struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+               const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
                doc_id = extinfo->document_id;
        } else if (S_ISDIR(mode)) {
-               struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)finderinfo + 16);
+               const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo;
                doc_id = extinfo->document_id;
        }       
 
index 3a989c1328d159e72a496fdad0465d40c57c8ac2..c63dce8ea8ba0286d2c7b82de48e1ea1d22e8d82 100644 (file)
@@ -38,7 +38,6 @@
 #include <sys/fsctl.h>
 #include <sys/vnode_internal.h>
 #include <sys/kauth.h>
-#include <sys/cprotect.h>
 #include <sys/uio_internal.h>
 
 #include "hfs.h"
@@ -48,6 +47,7 @@
 #include "hfs_endian.h"
 #include "hfs_btreeio.h"
 #include "hfs_fsctl.h"
+#include "hfs_cprotect.h"
 
 #include "hfscommon/headers/BTreesInternal.h"
 
@@ -495,7 +495,7 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap,
        btdata.bufferAddress = recp;
        btdata.itemSize = sizeof(HFSPlusAttrRecord);
        btdata.itemCount = 1;
-       
+
        result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
        if (result) {
                goto exit;
@@ -856,7 +856,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
                        cp->c_bsdflags &= ~UF_HIDDEN;
                }
 
-               result = hfs_update(vp, FALSE);
+               result = hfs_update(vp, 0);
 
                hfs_unlock(cp);
                return (result);
@@ -1032,21 +1032,11 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi
        int exists = 0;
        int allocatedblks = 0;
        u_int32_t target_id;
-       int takelock = 1;
 
        if (cp) {
                target_id = cp->c_fileid;
        } else {
                target_id = fileid;
-               if (target_id != 1) {
-                       /* 
-                        * If we are manipulating something other than 
-                        * the root folder (id 1), and do not have a cnode-in-hand, 
-                        * then we must already hold the requisite b-tree locks from 
-                        * earlier up the call stack. (See hfs_makenode)
-                        */
-                       takelock = 0;
-               }
        }
        
        /* Start a transaction for our changes. */
@@ -1079,10 +1069,7 @@ int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsi
                hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp);
        }
 
-       if (takelock) {
-               /* Take exclusive access to the attributes b-tree. */
-               lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-       }
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 
        /* Build the b-tree key. */
        MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
@@ -1277,9 +1264,7 @@ exit:
        if (btfile && started_transaction) {
                (void) BTFlushPath(btfile);
        }
-       if (lockflags) {
-               hfs_systemfile_unlock(hfsmp, lockflags);
-       }
+       hfs_systemfile_unlock(hfsmp, lockflags);
        if (result == 0) {
                if (vp) {
                        cp = VTOC(vp);
@@ -1287,6 +1272,7 @@ exit:
                         * modified time of the file.
                         */
                        cp->c_touch_chgtime = TRUE;
+                       cp->c_flag |= C_MODIFIED;
                        cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
                        if ((bcmp(ap->a_name, KAUTH_FILESEC_XATTR, sizeof(KAUTH_FILESEC_XATTR)) == 0)) {
                                cp->c_attr.ca_recflags |= kHFSHasSecurityMask;
@@ -1401,7 +1387,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                if (result == 0) {
                        cp->c_touch_chgtime = TRUE;
                        cp->c_flag |= C_MODIFIED;
-                       result = hfs_update(vp, FALSE);
+                       result = hfs_update(vp, 0);
                }
 
                hfs_end_transaction(hfsmp);
@@ -1490,7 +1476,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                /* Updating finderInfo updates change time and modified time */
                cp->c_touch_chgtime = TRUE;
                cp->c_flag |= C_MODIFIED;
-               hfs_update(vp, FALSE);
+               hfs_update(vp, 0);
         
                hfs_unlock(cp);
         
@@ -1540,6 +1526,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                result = file_attribute_exist(hfsmp, cp->c_fileid);
                if (result == 0) {
                        cp->c_attr.ca_recflags &= ~kHFSHasAttributesMask;
+                       cp->c_flag |= C_MODIFIED;
                }
                if (result == EEXIST) {
                        result = 0;
@@ -1550,6 +1537,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
                /* If ACL was removed, clear security bit */
                if ((bcmp(ap->a_name, KAUTH_FILESEC_XATTR, sizeof(KAUTH_FILESEC_XATTR)) == 0)) {
                        cp->c_attr.ca_recflags &= ~kHFSHasSecurityMask;
+                       cp->c_flag |= C_MODIFIED;
                }
                (void) hfs_update(vp, 0);
        }
@@ -1963,18 +1951,28 @@ listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *dat
  *
  * This function takes the necessary locks on the attribute
  * b-tree file and the allocation (bitmap) file.
+ *
+ * NOTE: Upon sucecss, this function will return with an open
+ * transaction.  The reason we do it this way is because when we
+ * delete the last attribute, we must make sure the flag in the
+ * catalog record that indicates there are no more records is cleared.
+ * The caller is responsible for doing this and *must* do it before
+ * ending the transaction.
  */
 int
-hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid)
+hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid, 
+                                 bool *open_transaction)
 {
        BTreeIterator *iterator = NULL;
        HFSPlusAttrKey *key;
        struct filefork *btfile;
-       int result, lockflags;
+       int result, lockflags = 0;
+
+       *open_transaction = false;
+
+       if (hfsmp->hfs_attribute_vp == NULL)
+               return 0;
 
-       if (hfsmp->hfs_attribute_vp == NULL) {
-               return (0);
-       }
        btfile = VTOF(hfsmp->hfs_attribute_vp);
 
        MALLOC(iterator, BTreeIterator *, sizeof(BTreeIterator), M_TEMP, M_WAITOK);
@@ -1985,25 +1983,32 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid)
        key = (HFSPlusAttrKey *)&iterator->key;
 
        /* Loop until there are no more attributes for this file id */
-       for(;;) {
+       do {
+               if (!*open_transaction)
+                       lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+               (void) hfs_buildattrkey(fileid, NULL, key);
+               result = BTIterateRecord(btfile, kBTreeNextRecord, iterator, NULL, NULL);
+               if (result || key->fileID != fileid)
+                       goto exit;
+
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               lockflags = 0;
+
+               if (*open_transaction) {
+                       hfs_end_transaction(hfsmp);
+                       *open_transaction = false;
+               }
+
                if (hfs_start_transaction(hfsmp) != 0) {
                        result = EINVAL;
                        goto exit;
                }
 
-               /* Lock the attribute b-tree and the allocation (bitmap) files */
+               *open_transaction = true;
+
                lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 
-               /*
-                * Go to first possible attribute key/record pair
-                */
-               (void) hfs_buildattrkey(fileid, NULL, key);
-               result = BTIterateRecord(btfile, kBTreeNextRecord, iterator, NULL, NULL);
-               if (result || key->fileID != fileid) {
-                       hfs_systemfile_unlock(hfsmp, lockflags);
-                       hfs_end_transaction(hfsmp);
-                       goto exit;
-               }
                result = remove_attribute_records(hfsmp, iterator);
 
 #if HFS_XATTR_VERBOSE
@@ -2011,14 +2016,22 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid)
                        printf("hfs_removeallattr: unexpected err %d\n", result);
                }
 #endif
+       } while (!result);
+
+exit:
+       FREE(iterator, M_TEMP);
+
+       if (lockflags)
                hfs_systemfile_unlock(hfsmp, lockflags);
+
+       result = result == btNotFound ? 0 : MacToVFSError(result);
+
+       if (result && *open_transaction) {
                hfs_end_transaction(hfsmp);
-               if (result)
-                       break;
+               *open_transaction = false;
        }
-exit:
-       FREE(iterator, M_TEMP);
-       return (result == btNotFound ? 0: MacToVFSError(result));
+
+       return result;
 }
 
 __private_extern__
index 99888cafdc3f270c41779f3283e862fa2722cff7..a8a057e64e96ccf704000cfe3745e4b329a143e8 100644 (file)
@@ -1722,14 +1722,22 @@ OSStatus        BTDeleteRecord          (FCB                                            *filePtr,
 
        /////////////////////// Extend File If Necessary ////////////////////////////
 
-       if ((btreePtr->treeDepth + 1UL) > btreePtr->totalNodes)
+       /*
+        * Worst case: we delete the first record in the tree and
+        * following key is sufficiently larger to cause all parents to
+        * require splitting and we need a new root node and a new map
+        * node.
+        */
+       if (index == 0 && btreePtr->treeDepth + 1 > btreePtr->freeNodes)
        {
-               nodesNeeded = btreePtr->treeDepth + 1 + btreePtr->totalNodes;
+               nodesNeeded = btreePtr->treeDepth + btreePtr->totalNodes;
                if (nodesNeeded > CalcMapBits (btreePtr))
                        ++nodesNeeded;
 
-               err = ExtendBTree (btreePtr, nodesNeeded);
-               M_ExitOnError (err);
+               if (nodesNeeded - btreePtr->totalNodes > btreePtr->freeNodes) {
+                       err = ExtendBTree (btreePtr, nodesNeeded);
+                       M_ExitOnError (err);
+               }
        }
 
        ///////////////////////////// Delete Record /////////////////////////////////
index fe2f917143eaf71c23b69762f10cd66971fb26a1..dbd0a8a5450764aeadd2143d1cd2ef3544552f50 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003, 2005-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2003, 2005-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -196,7 +196,7 @@ OSStatus    AllocateNode (BTreeControlBlockPtr              btreePtr, u_int32_t     *nodeNum)
        M_ExitOnError (err);
        
        --btreePtr->freeNodes;
-       btreePtr->flags |= kBTHeaderDirty;
+       M_BTreeHeaderDirty(btreePtr);
        
        /* Account for allocations from node reserve */
        BTUpdateReserve(btreePtr, 1);
@@ -273,7 +273,7 @@ OSStatus    FreeNode (BTreeControlBlockPtr          btreePtr, u_int32_t     nodeNum)
        M_ExitOnError (err);
        
        ++btreePtr->freeNodes;
-       btreePtr->flags |= kBTHeaderDirty;                                      // how about a macro for this
+       M_BTreeHeaderDirty(btreePtr);
 
        return noErr;
 
@@ -494,7 +494,7 @@ Success:
        btreePtr->totalNodes     =  newTotalNodes;
        btreePtr->freeNodes             += (newTotalNodes - oldTotalNodes) - newMapNodes;
 
-       btreePtr->flags                 |= kBTHeaderDirty;              //\80\80 how about a macro for this
+       M_BTreeHeaderDirty(btreePtr);
 
        /* Force the b-tree header changes to disk */
        (void) UpdateHeader (btreePtr, true);
index 45456569f89249fcb9a0c54d67dfcfb45f906981..34bd8e41b16dc7d857e03d9247b2b492d162d979 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -315,7 +315,7 @@ OSStatus    SearchTree      (BTreeControlBlockPtr    btreePtr,
         {
                 goto ErrorExit;
         }
-        
+
         //     The child node should be at a level one less than the parent.
         --level;
        }
@@ -1318,8 +1318,8 @@ static OSStatus   AddNewRootNode  (BTreeControlBlockPtr    btreePtr,
        // update BTreeInfoRec
        
        btreePtr->rootNode       = rootNum;
-       btreePtr->flags         |= kBTHeaderDirty;
-       
+       M_BTreeHeaderDirty(btreePtr);
+
        return noErr;
 
 
index 909ab5c1d2fa1fa061b4781110fffecf50b3a10b..fa7e210d0f6924e2480eff193821f97874aeaf07 100644 (file)
@@ -675,21 +675,11 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
        u_int16_t                       btRecordSize;
        OSErr                           err;
 
-    
+       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator),
+                       M_TEMP, M_WAITOK | M_ZERO);
 
-       MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
-       if (btIterator == NULL) {
-               return memFullErr;  // translates to ENOMEM
-       }
-
-       MALLOC (tmpIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
-       if (tmpIterator == NULL) {      
-               FREE (btIterator, M_TEMP);      
-               return memFullErr;  // translates to ENOMEM
-       }
-
-       bzero(btIterator, sizeof(*btIterator));
-       bzero (tmpIterator, sizeof(*tmpIterator));
+       MALLOC (tmpIterator, struct BTreeIterator*, sizeof(struct BTreeIterator),
+                       M_TEMP, M_WAITOK | M_ZERO);
 
        fcb = GetFileControlBlock(vcb->extentsRefNum);
 
@@ -721,7 +711,10 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
                extentKeyPtr->hfs.startBlock = 0;
        }
 #else 
-    else return cmBadNews;
+       else {
+               err = cmBadNews;
+               goto exit;
+       }
 #endif
 
        err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
@@ -730,8 +723,8 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
                if (err == noErr) {             //      Did we find a bogus extent record?
                        err = cmBadNews;        //      Yes, so indicate things are messed up.
                }
-               
-               return err;                             //      Got some unexpected error, so return it
+
+               goto exit;
        }
 
        do
@@ -770,6 +763,8 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,
                if (err != noErr)
                        break;
        }       while ( true );
+
+exit:
        
        FREE (tmpIterator, M_TEMP);
        FREE (btIterator, M_TEMP);
index 92b49c840fddca95fa853cab13167312a449f22c..bd4b905ad8e0460a50d9248e043ec49b7bfc26fa 100644 (file)
@@ -147,8 +147,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb,
 
                        case kHFSFileRecord:
                        {
-                               HFSExtentDescriptor     *dataExtent;
-                               HFSExtentDescriptor     *rsrcExtent;
+                               const HFSExtentDescriptor       *dataExtent;
+                               const HFSExtentDescriptor       *rsrcExtent;
                                
                                if ( recordSize != sizeof(HFSCatalogFile) )
                                        return false;                                                           
@@ -171,8 +171,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb,
                                if ( catalogRecord->hfsFile.rsrcPhysicalSize < catalogRecord->hfsFile.rsrcLogicalSize )
                                        return false;
                
-                               dataExtent = (HFSExtentDescriptor*) &catalogRecord->hfsFile.dataExtents;
-                               rsrcExtent = (HFSExtentDescriptor*) &catalogRecord->hfsFile.rsrcExtents;
+                               dataExtent = (const HFSExtentDescriptor*) &catalogRecord->hfsFile.dataExtents;
+                               rsrcExtent = (const HFSExtentDescriptor*) &catalogRecord->hfsFile.rsrcExtents;
        
 #if 0
                                for (i = 0; i < kHFSExtentDensity; ++i)
@@ -222,8 +222,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb,
                        case kHFSPlusFileRecord:
                        {
 //                             u_int16_t                                       i;
-                               HFSPlusExtentDescriptor *dataExtent;
-                               HFSPlusExtentDescriptor *rsrcExtent;
+                               const HFSPlusExtentDescriptor   *dataExtent;
+                               const HFSPlusExtentDescriptor   *rsrcExtent;
                                
                                if ( recordSize != sizeof(HFSPlusCatalogFile) )
                                        return false;                                                           
@@ -237,8 +237,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb,
                
                                // make sure 0 ¾ LEOF ¾ PEOF for both forks
                
-                               dataExtent = (HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.dataFork.extents;
-                               rsrcExtent = (HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.resourceFork.extents;
+                               dataExtent = (const HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.dataFork.extents;
+                               rsrcExtent = (const HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.resourceFork.extents;
        
 #if 0
                                for (i = 0; i < kHFSPlusExtentDensity; ++i)
index 018a8701eda78004433f3018c1f752fe428abe19..11747510182ca7bd2182039a713d821b1f116eae 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -35,6 +35,7 @@
 #include "../headers/BTreesInternal.h"
 
 #include <sys/malloc.h>
+#include <sys/vnode_internal.h>
  
 /*
 ============================================================
@@ -66,7 +67,9 @@ Public (Exported) Routines:
        FlushExtentFile
                                        Flush the extents file for a given volume.
 
-
+       SearchExtentFile
+                                       Search the FCB and extents file for an extent record that
+                                       contains a given file position (in bytes).
 
 
 ============================================================
@@ -74,9 +77,6 @@ Internal Routines:
 ============================================================
        FindExtentRecord
                                        Search the extents BTree for a particular extent record.
-       SearchExtentFile
-                                       Search the FCB and extents file for an extent record that
-                                       contains a given file position (in bytes).
        SearchExtentRecord
                                        Search a given extent record to see if it contains a given
                                        file position (in bytes).  Used by SearchExtentFile.
@@ -143,16 +143,6 @@ static OSErr GetFCBExtentRecord(
        const FCB                               *fcb,
        HFSPlusExtentRecord             extents);
 
-static OSErr SearchExtentFile(
-       ExtendedVCB             *vcb,
-       const FCB                               *fcb,
-       int64_t                                 filePosition,
-       HFSPlusExtentKey                *foundExtentKey,
-       HFSPlusExtentRecord             foundExtentData,
-       u_int32_t                               *foundExtentDataIndex,
-       u_int32_t                               *extentBTreeHint,
-       u_int32_t                               *endingFABNPlusOne );
-
 static OSErr SearchExtentRecord(
        ExtendedVCB             *vcb,
        u_int32_t                               searchFABN,
@@ -877,6 +867,64 @@ int32_t CompareExtentKeysPlus( const HFSPlusExtentKey *searchKey, const HFSPlusE
        return( result );
 }
 
+static int
+should_pin_blocks(hfsmount_t *hfsmp, FCB *fcb)
+{
+       if (!ISSET(hfsmp->hfs_flags, HFS_CS_HOTFILE_PIN)
+               || fcb->ff_cp == NULL || fcb->ff_cp->c_vp == NULL) {
+               return 0;
+       }
+
+       int pin_blocks;
+
+       //
+       // File system metadata should get pinned
+       //
+       if (vnode_issystem(fcb->ff_cp->c_vp)) {
+               return 1;
+       }
+
+       //
+       // If a file is AutoCandidate, we should not pin its blocks because
+       // it was an automatically added file and this function is intended
+       // to pin new blocks being added to user-generated content.
+       //
+       // If a file is marked FastDevPinned or FastDevCandidate it is an
+       // existing pinned file or a new file that should be pinned.
+       //
+       if (fcb->ff_cp->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+               return 0;
+       }
+
+       if ((fcb->ff_cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSFastDevCandidateMask)) != 0) {
+               pin_blocks = 1;
+       } else {
+               pin_blocks = 0;
+       }
+
+       return pin_blocks;
+}
+       
+
+
+static void
+pin_blocks_if_needed(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockCount)   
+{
+       if (!should_pin_blocks(vcb, fcb)) {
+               return;
+       }
+       
+       // ask CoreStorage to pin the new blocks being added to this file
+       if (hfs_pin_block_range((struct hfsmount *)vcb, HFS_PIN_IT, startBlock, blockCount, vfs_context_kernel()) == 0) {
+               struct vnode *vp = fcb->ff_cp->c_vp;
+               
+               // and make sure to keep our accounting in order
+               hfs_hotfile_adjust_blocks(vp, -blockCount);
+       }
+}
+
+
+
 /*
  * Add a file extent to a file.
  *
@@ -928,8 +976,12 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockC
                foundIndex = 0;
 
                error = CreateExtentRecord(vcb, &foundKey, foundData, &hint);
-               if (error == fxOvFlErr)
+               if (error == fxOvFlErr) {
                        error = dskFulErr;
+               } else if (error == 0) {
+                       pin_blocks_if_needed(vcb, fcb, startBlock, blockCount);
+               }
+               
        } else {
                /* 
                 * Add a new extent into existing record.
@@ -937,6 +989,9 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockC
                foundData[foundIndex].startBlock = startBlock;
                foundData[foundIndex].blockCount = blockCount;
                error = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
+               if (error == 0) {
+                       pin_blocks_if_needed(vcb, fcb, startBlock, blockCount);
+               }
        }
        (void) FlushExtentFile(vcb);
 
@@ -983,6 +1038,8 @@ OSErr ExtendFileC (
        int64_t                         availbytes;
        int64_t                         peof;
        u_int32_t                       prevblocks;
+       uint32_t                        fastdev = 0;
+
        struct hfsmount *hfsmp = (struct hfsmount*)vcb; 
        allowFlushTxns = 0;
        needsFlush = false;
@@ -1030,7 +1087,12 @@ OSErr ExtendFileC (
                FTOC(fcb)->c_blocks   += blocksToAdd;
                fcb->ff_blocks        += blocksToAdd;
 
-               FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+               /*
+                * We haven't touched the disk here; no blocks have been
+                * allocated and the volume will not be inconsistent if we
+                * don't update the catalog record immediately.
+                */
+               FTOC(fcb)->c_flag |= C_MINOR_MOD;
                *actualBytesAdded = bytesToAdd;
                return (0);
        }
@@ -1100,7 +1162,7 @@ OSErr ExtendFileC (
                //      Enough blocks are already allocated.  Just update the FCB to reflect the new length.
                fcb->ff_blocks = peof / volumeBlockSize;
                FTOC(fcb)->c_blocks += (bytesToAdd / volumeBlockSize);
-               FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+               FTOC(fcb)->c_flag |= C_MODIFIED;
                goto Exit;
        }
        if (err != fxRangeErr)          // Any real error?
@@ -1172,6 +1234,8 @@ OSErr ExtendFileC (
                wantContig = true;
        }
 
+       if (should_pin_blocks(hfsmp, fcb))
+               fastdev = HFS_ALLOC_FAST_DEV;
 
        useMetaZone = flags & kEFMetadataMask;
        do {
@@ -1193,7 +1257,7 @@ OSErr ExtendFileC (
                                err = dskFulErr;
                        }
                        else {
-                               uint32_t ba_flags = 0;
+                               uint32_t ba_flags = fastdev;
 
                                if (wantContig) {
                                        ba_flags |= HFS_ALLOC_FORCECONTIG;      
@@ -1253,12 +1317,6 @@ OSErr ExtendFileC (
 
                }
                if (err == noErr) {
-                   if (actualNumBlocks != 0) {
-                               // this catalog entry *must* get forced to disk when
-                               // hfs_update() is called
-                               FTOC(fcb)->c_flag |= C_FORCEUPDATE;
-                       }
-
                        //      Add the new extent to the existing extent record, or create a new one.
                        if ((actualStartBlock == startBlock) && (blockHint == 0)) {
                                //      We grew the file's last extent, so just adjust the number of blocks.
@@ -1321,7 +1379,7 @@ OSErr ExtendFileC (
                                        if (err != noErr) break;
                                }
                        }
-                       
+
                        // Figure out how many bytes were actually allocated.
                        // NOTE: BlockAllocate could have allocated more than we asked for.
                        // Don't set the PEOF beyond what our client asked for.
@@ -1336,7 +1394,7 @@ OSErr ExtendFileC (
                        }
                        fcb->ff_blocks += (bytesThisExtent / volumeBlockSize);
                        FTOC(fcb)->c_blocks += (bytesThisExtent / volumeBlockSize);
-                       FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+                       FTOC(fcb)->c_flag |= C_MODIFIED;
 
                        //      If contiguous allocation was requested, then we've already got one contiguous
                        //      chunk.  If we didn't get all we wanted, then adjust the error to disk full.
@@ -1366,6 +1424,11 @@ Exit:
                *actualBytesAdded = 0;
        }
 
+       if (fastdev) {
+               hfs_hotfile_adjust_blocks(fcb->ff_cp->c_vp, 
+                                         (int64_t)prevblocks - fcb->ff_blocks);
+       }
+
        if (needsFlush)
                (void) FlushExtentFile(vcb);
 
@@ -1474,7 +1537,7 @@ OSErr TruncateFileC (
                 * has been removed from disk already.  We wouldn't need to force 
                 * another update
                 */
-               FTOC(fcb)->c_flag |= (C_MODIFIED | C_FORCEUPDATE);
+               FTOC(fcb)->c_flag |= C_MODIFIED;
        }
        //
        //      If the new PEOF is 0, then truncateToExtent has no meaning (we should always deallocate
@@ -1715,7 +1778,7 @@ CopyExtents:
                FTOC(fcb)->c_blocks -= headblks;
                fcb->ff_blocks = blkcnt;
 
-               FTOC(fcb)->c_flag |= C_FORCEUPDATE;
+               FTOC(fcb)->c_flag |= C_MODIFIED;
                FTOC(fcb)->c_touch_chgtime = TRUE;
 
                (void) FlushExtentFile(vcb);
@@ -1851,7 +1914,7 @@ static OSErr SearchExtentRecord(
 //             (other)                 (some other internal I/O error)
 //\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b\8b
 
-static OSErr SearchExtentFile(
+OSErr SearchExtentFile(
        ExtendedVCB     *vcb,
        const FCB                       *fcb,
        int64_t                         filePosition,
index 79547be7fbb7ff9837bbc9b32dde23560b9bb173..612171809aac16b49c987a6740b0bd531f8768e4 100644 (file)
 
 /*
 Public routines:
-       BlockAllocate
+       BlockAllocate / hfs_block_alloc
                                        Allocate space on a volume.  Can allocate space contiguously.
                                        If not contiguous, then allocation may be less than what was
                                        asked for.  Returns the starting block number, and number of
-                                       blocks.  (Will only do a single extent???)
+                                       blocks.  It will only return a single extent.
+
        BlockDeallocate
                                        Deallocate a contiguous run of allocation blocks.
  
@@ -92,20 +93,20 @@ Internal routines:
                                        block number of the first block in the range is returned.  This is only
                                        called by the bitmap scanning logic as the red-black tree should be able
                                        to do this internally by searching its tree. 
-       BlockAllocateAny
+       BlockFindAny
                                        Find and allocate a contiguous range of blocks up to a given size.  The
                                        first range of contiguous free blocks found are allocated, even if there
                                        are fewer blocks than requested (and even if a contiguous range of blocks
                                        of the given size exists elsewhere).
-       BlockAllocateAnyBitmap
+       BlockFindAnyBitmap
                                        Finds a range of blocks per the above requirements without using the 
                                        Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
                                        any valid range of free space needed.
-       BlockAllocateContig
-                                       Find and allocate a contiguous range of blocks of a given size.  If
-                                       a contiguous range of free blocks of the given size isn't found, then
-                                       the allocation fails (i.e. it is "all or nothing"). 
-       BlockAllocateKnown
+       BlockFindContig
+                                       Find a contiguous range of blocks of a given size.
+                                       If the minimum cannot be satisfied, nothing is
+                                       returned.
+       BlockFindKnown
                                        Try to allocate space from known free space in the volume's
                                        free extent cache.
        ReadBitmapBlock
@@ -155,23 +156,22 @@ Optimization Routines
                                        
 */
 
+
 #include <sys/types.h>
 #include <sys/buf.h>
 
-
 #if !HFS_ALLOC_TEST
 
 #include "../../hfs_macos_defs.h"
 #include <sys/systm.h>
 #include <sys/ubc.h>
 #include <kern/kalloc.h>
-
 /* For VM Page size */
 #include <libkern/libkern.h>
+#include <vfs/vfs_journal.h>
 #include "../../hfs.h"
 #include "../../hfs_endian.h"
 #include "../headers/FileMgrInternal.h"
-#include <vfs/vfs_journal.h>
 
 #endif // !HFS_ALLOC_TEST
 
@@ -183,6 +183,8 @@ Optimization Routines
 #include "../../hfs_dbg.h"
 #include "../../hfs_format.h"
 #include "../../hfs_kdebug.h"
+#include "../../rangelist.h"
+#include "../../hfs_extents.h"
 
 /* Headers for unmap-on-mount support */
 #include <sys/disk.h>
@@ -243,38 +245,44 @@ static OSErr ReadBitmapBlock(
                ExtendedVCB             *vcb,
                u_int32_t               bit,
                u_int32_t               **buffer,
-               uintptr_t               *blockRef);
+               uintptr_t               *blockRef,
+               hfs_block_alloc_flags_t flags);
 
 static OSErr ReleaseBitmapBlock(
                ExtendedVCB             *vcb,
                uintptr_t               blockRef,
                Boolean                 dirty);
 
-static OSErr BlockAllocateAny(
+static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+                                                                HFSPlusExtentDescriptor *extent,
+                                                                hfs_block_alloc_flags_t flags,
+                                                                hfs_alloc_extra_args_t *ap);
+
+static OSErr BlockFindAny(
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
                u_int32_t               endingBlock,
                u_int32_t               maxBlocks,
-               u_int32_t               flags,
+               hfs_block_alloc_flags_t flags,
                Boolean                 trustSummary,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks);
 
-static OSErr BlockAllocateAnyBitmap(
+static OSErr BlockFindAnyBitmap(
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
                u_int32_t               endingBlock,
                u_int32_t               maxBlocks,
-               u_int32_t               flags,
+               hfs_block_alloc_flags_t flags,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks);
 
-static OSErr BlockAllocateContig(
+static OSErr BlockFindContig(
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
                u_int32_t               minBlocks,
                u_int32_t               maxBlocks,
-               u_int32_t               flags,
+               hfs_block_alloc_flags_t flags,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks);
 
@@ -287,18 +295,25 @@ static OSErr BlockFindContiguous(
                Boolean                 useMetaZone,
                Boolean                 trustSummary,
                u_int32_t               *actualStartBlock,
-               u_int32_t               *actualNumBlocks);
+               u_int32_t               *actualNumBlocks,
+               hfs_block_alloc_flags_t flags);
 
-static OSErr BlockAllocateKnown(
+static OSErr BlockFindKnown(
                ExtendedVCB             *vcb,
                u_int32_t               maxBlocks,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks);
 
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+                                                               HFSPlusExtentDescriptor *extent,
+                                                               uint32_t max_blocks,
+                                                               hfs_block_alloc_flags_t flags);
+
 static OSErr BlockMarkAllocatedInternal (
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
-               register u_int32_t      numBlocks);
+               u_int32_t               numBlocks,
+               hfs_block_alloc_flags_t flags);
 
 static OSErr BlockMarkFreeInternal(
                ExtendedVCB     *vcb,
@@ -362,6 +377,8 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc
 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
 
+static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
+
 /* Functions for getting free exents */
 
 typedef struct bitmap_context {
@@ -525,7 +542,7 @@ static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
        u_int64_t length;
        int error = 0;
 
-       if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
+       if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
                int extent_no = list->extent_count;
                offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
                length = (u_int64_t) numBlocks * hfsmp->blockSize;
@@ -564,7 +581,7 @@ static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
                KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
        }
 
-       if (list->extent_count > 0) {
+       if (list->extent_count > 0 && list->extents != NULL) {
                bzero(&unmap, sizeof(unmap));
                unmap.extents = list->extents;
                unmap.extentsCount = list->extent_count;
@@ -619,7 +636,7 @@ static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlo
 {
        u_int64_t offset;
        u_int64_t length;
-       int err;
+       int err = 0;
 
        if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
@@ -813,6 +830,7 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
         dk_extent_t *extents;
         };
         */
+       bzero (&trimlist, sizeof(trimlist));
 
        /* 
         * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
@@ -835,7 +853,6 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
                if (extents == NULL) {
                        return ENOMEM;
                }
-               bzero (&trimlist, sizeof(trimlist));
                trimlist.extents = (dk_extent_t*)extents;
                trimlist.allocated_count = alloc_count;
                trimlist.extent_count = 0;
@@ -881,10 +898,129 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
        return error;
 }
 
+static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start, 
+                                                                uint32_t count, int list, 
+                                                                struct rl_entry **reservation)
+{
+       struct rl_entry *range, *next_range;
+
+       if (list == HFS_TENTATIVE_BLOCKS) {
+               int nranges = 0;
+               // Don't allow more than 4 tentative reservations
+               TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+                                                  rl_link, next_range) {
+                       if (++nranges > 3)
+                               hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+               }
+       }
+
+       MALLOC(range, struct rl_entry *, sizeof(*range), M_TEMP, M_WAITOK);
+       range->rl_start = start;
+       range->rl_end = start + count - 1;
+       TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+       *reservation = range;
+}
+
+static void hfs_release_reserved(hfsmount_t *hfsmp,
+                                                                struct rl_entry *range,
+                                                                int list)
+{
+       if (range->rl_start == -1)
+               return;
+
+       TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+
+       if (rl_len(range) > 0) {
+               if (list == HFS_TENTATIVE_BLOCKS)
+                       hfsmp->tentativeBlocks -= rl_len(range);
+               else {
+                       /*
+                        * We don't need to unmap tentative blocks because we won't have
+                        * written to them, but we might have written to reserved blocks.
+                        * Nothing can refer to those blocks so this doesn't have to be
+                        * via the journal. If this proves to be too expensive, we could
+                        * consider not sending down the unmap or we could require this
+                        * to always be called within a transaction and then we can use
+                        * the journal.
+                        */
+                       dk_extent_t extent = {
+                               .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
+                                                  + hfsmp->hfsPlusIOPosOffset),
+                               .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
+                       };
+                       dk_unmap_t unmap = {
+                               .extents = &extent,
+                               .extentsCount = 1,
+                       };
+                       VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
+                                          0, vfs_context_kernel());
+                       assert(hfsmp->lockedBlocks >= rl_len(range));
+                       hfsmp->lockedBlocks -= rl_len(range);
+               }
+               hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
+               add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
+       }
+
+       range->rl_start = -1;
+       range->rl_end   = -2;
+}
+
+static void hfs_free_locked_internal(hfsmount_t *hfsmp,
+                                                                          struct rl_entry **reservation,
+                                                                          int list)
+{
+       if (*reservation) {
+               hfs_release_reserved(hfsmp, *reservation, list);
+               FREE(*reservation, M_TEMP);
+               *reservation = NULL;
+       }
+}
+
+void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+       hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
+}
+
+void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+       hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
+}
+
+OSErr BlockAllocate (
+                hfsmount_t             *hfsmp,                         /* which volume to allocate space on */
+                u_int32_t              startingBlock,          /* preferred starting block, or 0 for no preference */
+                u_int32_t              minBlocks,              /* desired number of blocks to allocate */
+                u_int32_t              maxBlocks,              /* maximum number of blocks to allocate */
+                hfs_block_alloc_flags_t flags,                 /* option flags */
+                u_int32_t              *actualStartBlock,      /* actual first block of allocation */
+                u_int32_t              *actualNumBlocks)
+{
+       hfs_alloc_extra_args_t extra_args = {
+               .max_blocks = maxBlocks
+       };
+
+       HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
+
+       OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
+
+       *actualStartBlock = extent.startBlock;
+       *actualNumBlocks  = extent.blockCount;
+
+       return err;
+}
+
+errno_t hfs_block_alloc(hfsmount_t *hfsmp,
+                                               HFSPlusExtentDescriptor *extent,
+                                               hfs_block_alloc_flags_t flags,
+                                               hfs_alloc_extra_args_t *ap)
+{
+       return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
+}
+
 /*
  ;________________________________________________________________________________
  ;
- ; Routine:       BlockAllocate
+ ; Routine:       hfs_block_alloc_int
  ;
  ; Function:   Allocate space on a volume.     If contiguous allocation is requested,
  ;                        at least the requested number of bytes will be allocated or an
@@ -899,57 +1035,125 @@ u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
  ;                        point.
  ;
  ; Input Arguments:
- ;      vcb                     - Pointer to ExtendedVCB for the volume to allocate space on
- ;      fcb                     - Pointer to FCB for the file for which storage is being allocated
- ;      startingBlock   - Preferred starting allocation block, 0 = no preference
- ;      minBlocks               - Number of blocks requested.  If the allocation is non-contiguous,
- ;                                        less than this may actually be allocated
- ;      maxBlocks               - The maximum number of blocks to allocate.  If there is additional free
- ;                                        space after bytesRequested, then up to maxBlocks bytes should really
- ;                                        be allocated.  (Used by ExtendFileC to round up allocations to a multiple
- ;                                        of the file's clump size.)
- ;      flags           - Flags to specify options like contiguous, use metadata zone, 
- ;                                        skip free block check, etc.
+ ;   hfsmp           - Pointer to the HFS mount structure.
+ ;   extent          - startBlock indicates the block to start
+ ;                     searching from and blockCount is the number of
+ ;                     blocks required.  Depending on the flags used,
+ ;                     more or less blocks may be returned.  The
+ ;                     allocated extent is returned via this
+ ;                     parameter.
+ ;   flags           - Flags to specify options like contiguous, use
+ ;                     metadata zone, skip free block check, etc.
+ ;   ap              - Additional arguments used depending on flags.
+ ;                     See hfs_alloc_extra_args_t and below.
  ;
  ; Output:
- ;      (result)                - Error code, zero for successful allocation
- ;      *startBlock     - Actual starting allocation block
- ;      *actualBlccks   - Actual number of allocation blocks allocated
+ ;   (result)        - Error code, zero for successful allocation
+ ;   extent          - If successful, the allocated extent.
  ;
  ; Side effects:
  ;      The volume bitmap is read and updated; the volume bitmap cache may be changed.
+ ;
+ ; HFS_ALLOC_TENTATIVE
+ ; Blocks will be reserved but not marked allocated.  They can be
+ ; stolen if free space is limited.  Tentative blocks can be used by
+ ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
+ ; @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_USE_TENTATIVE
+ ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
+ ; @ap->reservation_in should be set to whatever @ap->reservation_out
+ ; was set to when HFS_ALLOC_TENTATIVE was used.  If the tentative
+ ; reservation was stolen, a normal allocation will take place.
+ ;
+ ; HFS_ALLOC_LOCKED
+ ; Blocks will be reserved but not marked allocated.  Unlike tentative
+ ; reservations they cannot be stolen.  It is safe to write to these
+ ; blocks.  @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_COMMIT
+ ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
+ ; mark them allocated on disk.  @ap->reservation_in is used.
+ ;
+ ; HFS_ALLOC_ROLL_BACK
+ ; Take blocks that were just recently deallocated and mark them
+ ; allocated.  This is for roll back situations.  Blocks got
+ ; deallocated and then something went wrong and we need to roll back
+ ; by marking the blocks allocated.
+ ;
+ ; HFS_ALLOC_FORCECONTIG
+ ; It will not return fewer than @min_blocks.
+ ;
+ ; HFS_ALLOC_TRY_HARD
+ ; We will perform an exhaustive search to try and find @max_blocks.
+ ; It will not return fewer than @min_blocks.
+ ;
  ;________________________________________________________________________________
  */
-OSErr BlockAllocate (
-               ExtendedVCB             *vcb,                           /* which volume to allocate space on */
-               u_int32_t               startingBlock,          /* preferred starting block, or 0 for no preference */
-               u_int32_t               minBlocks,              /* desired number of blocks to allocate */
-               u_int32_t               maxBlocks,              /* maximum number of blocks to allocate */
-               u_int32_t               flags,                  /* option flags */
-               u_int32_t               *actualStartBlock,      /* actual first block of allocation */
-               u_int32_t               *actualNumBlocks)       
-/*
- *  actualNumBlocks is the number of blocks actually allocated; 
- * if forceContiguous was zero, then this may represent fewer than minBlocks 
- */
+OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+                                                 HFSPlusExtentDescriptor *extent,
+                                                 hfs_block_alloc_flags_t flags,
+                                                 hfs_alloc_extra_args_t *ap)
 {
        u_int32_t  freeBlocks;
-       OSErr                   err;
+       OSErr                   err = 0;
        Boolean                 updateAllocPtr = false;         //      true if nextAllocation needs to be updated
-       struct hfsmount *hfsmp;
        Boolean useMetaZone;
-       Boolean forceContiguous;
+       Boolean forceContiguous = false;
        Boolean forceFlush;
 
+       uint32_t startingBlock = extent->startBlock;
+       uint32_t minBlocks = extent->blockCount;
+       uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
+
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
 
-       if (flags & HFS_ALLOC_FORCECONTIG) {
-               forceContiguous = true;
-       } else {
-               forceContiguous = false;
+       if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+               extent->startBlock = (*ap->reservation_in)->rl_start;
+               extent->blockCount = rl_len(*ap->reservation_in);
+               goto mark_allocated;
+       }
+
+       if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
+               goto mark_allocated;
+
+       freeBlocks = hfs_freeblks(hfsmp, 0);
+
+       if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+               struct rl_entry *range = *ap->reservation_in;
+
+               if (range && range->rl_start != -1) {
+                       /*
+                        * It's possible that we have a tentative reservation
+                        * but there aren't enough free blocks due to loaned blocks
+                        * or insufficient space in the backing store.
+                        */
+                       uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
+
+                       if (count >= minBlocks) {
+                               extent->startBlock = range->rl_start;
+                               extent->blockCount = count;
+
+                               // Should we go straight to commit?
+                               if (!ISSET(flags, HFS_ALLOC_LOCKED))
+                                       SET(flags, HFS_ALLOC_COMMIT);
+
+                               goto mark_allocated;
+                       }
+               }
+
+               /*
+                * We can't use the tentative reservation so free it and allocate
+                * normally.
+                */
+               hfs_free_tentative(hfsmp, ap->reservation_in);
+               CLR(flags, HFS_ALLOC_USE_TENTATIVE);
        }
 
+       if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
+               forceContiguous = true;
+
        if (flags & HFS_ALLOC_METAZONE) {
                useMetaZone = true;
        } else {
@@ -963,15 +1167,11 @@ OSErr BlockAllocate (
                forceFlush = false;
        }
 
+       assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
 
-       //
-       //      Initialize outputs in case we get an error
-       //
-       *actualStartBlock = 0;
-       *actualNumBlocks = 0;
-       hfsmp = VCBTOHFS (vcb);
-       freeBlocks = hfs_freeblks(hfsmp, 0);
-
+       // See if we have to steal tentative blocks
+       if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
+               SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
 
        /* Skip free block check if blocks are being allocated for relocating 
         * data during truncating a volume.
@@ -989,11 +1189,11 @@ OSErr BlockAllocate (
                //      If the disk is already full, don't bother.
                if (freeBlocks == 0) {
                        err = dskFulErr;
-                       goto Exit;
+                       goto exit;
                }
                if (forceContiguous && freeBlocks < minBlocks) {
                        err = dskFulErr;
-                       goto Exit;
+                       goto exit;
                }
 
                /*
@@ -1007,6 +1207,14 @@ OSErr BlockAllocate (
                }
        }
 
+       if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+               err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
+               if (err)
+                       goto exit;
+
+               goto mark_allocated;
+       }
+
        //
        //      If caller didn't specify a starting block number, then use the volume's
        //      next block to allocate from.
@@ -1015,18 +1223,18 @@ OSErr BlockAllocate (
                hfs_lock_mount (hfsmp);
 
                /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
-               if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
-                       startingBlock = vcb->sparseAllocation;
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       startingBlock = hfsmp->sparseAllocation;
                } 
                else {
-                       startingBlock = vcb->nextAllocation;
+                       startingBlock = hfsmp->nextAllocation;
                }
                hfs_unlock_mount(hfsmp);
                updateAllocPtr = true;
        }
 
 
-       if (startingBlock >= vcb->allocLimit) {
+       if (startingBlock >= hfsmp->allocLimit) {
                startingBlock = 0; /* overflow so start at beginning */
        }
 
@@ -1035,8 +1243,8 @@ OSErr BlockAllocate (
        //      that is long enough.  Otherwise, find the first free block.
        //
        if (forceContiguous) {
-               err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
-                               flags, actualStartBlock, actualNumBlocks);
+               err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
+                               flags, &extent->startBlock, &extent->blockCount);
                /*
                 * If we allocated from a new position then also update the roving allocator.  
                 * This will keep the roving allocation pointer up-to-date even 
@@ -1045,9 +1253,9 @@ OSErr BlockAllocate (
                 * the block to vend out.
                 */
                if ((err == noErr) &&
-                               (*actualStartBlock > startingBlock) &&
-                               ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
-                                (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
+                               (extent->startBlock > startingBlock) &&
+                               ((extent->startBlock < hfsmp->hfs_metazone_start) ||
+                                (extent->startBlock > hfsmp->hfs_metazone_end))) {
                        updateAllocPtr = true;
                }
        } else {                                        
@@ -1069,12 +1277,13 @@ OSErr BlockAllocate (
                }
 
                /* 
-                * BlockAllocateKnown only examines the free extent cache; anything in there will
+                * BlockFindKnown only examines the free extent cache; anything in there will
                 * have been committed to stable storage already.
                 */
-               err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
+               err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
+                                                       &extent->blockCount);
 
-               /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
+               /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
 
                if (err == dskFulErr) {
                        /* 
@@ -1082,9 +1291,9 @@ OSErr BlockAllocate (
                         * allocation limit.  We 'trust' the summary bitmap in this call, if it tells us
                         * that it could not find any free space.
                         */
-                       err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
+                       err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
                                        maxBlocks, flags, true, 
-                                       actualStartBlock, actualNumBlocks);
+                                       &extent->startBlock, &extent->blockCount);
                }
                if (err == dskFulErr) {
                        /*
@@ -1094,14 +1303,14 @@ OSErr BlockAllocate (
                         * If it is off, then we trust the above and go up until the startingBlock.
                         */
                        if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
-                               err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
+                               err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
                                                flags, false, 
-                                               actualStartBlock, actualNumBlocks);
+                                               &extent->startBlock, &extent->blockCount);
                        }
                        else {
-                               err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
+                               err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
                                                flags, false, 
-                                               actualStartBlock, actualNumBlocks);
+                                               &extent->startBlock, &extent->blockCount);
                        }       
 
                        /*
@@ -1109,60 +1318,82 @@ OSErr BlockAllocate (
                         */              
                        if (err == dskFulErr && forceFlush) {
                                flags |= HFS_ALLOC_FLUSHTXN;
-                               err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
+                               err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
                                                flags, false, 
-                                               actualStartBlock, actualNumBlocks);
+                                               &extent->startBlock, &extent->blockCount);
                        }
                }
        }
 
-Exit:
-       if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) {
-               errno_t ec;
-               _dk_cs_map_t cm;
-               uint64_t mapped_blocks;
-
-               cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset;
-               cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize;
-               cm.cm_bytes_mapped = 0;
-               ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current());
-               if (ec != 0 && ec != ENOSPC) {
-                       printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec);
-                       err = ec;
-                       goto Exit_CS;
-               }
-               mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize;
-               /* CoreStorage returned more blocks than requested */
-               if (mapped_blocks > *actualNumBlocks) {
-                       printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n", 
-                                       mapped_blocks, *actualNumBlocks);
-               }
-               if (*actualNumBlocks > mapped_blocks) {
-                       if (forceContiguous && mapped_blocks < minBlocks) {
-                               mapped_blocks = 0;
-                       }
-               }
-               uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks;
-               uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks;
-               if (numBlocksToFree > 0) {
-                       err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags);
-                       if (err != noErr) {
-                               printf ("BlockDeallocate failed (err=%d)\n", err);
-                               goto Exit_CS;
+       if (err)
+               goto exit;
+
+mark_allocated:
+
+       // Handle alignment
+       if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
+               /*
+                * See the comment in FileMgrInternal.h for alignment
+                * semantics.
+                */
+               uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
+                                                        % ap->alignment);
+
+               // @minBlocks is still the minimum
+               if (extent->blockCount >= minBlocks + rounding)
+                       extent->blockCount -= rounding;
+       }
+
+       err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
+                                                                        extent->blockCount, flags);
+
+       if (err)
+               goto exit;
+
+       if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
+               && !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+               if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
+#if !HFS_ALLOC_TEST        /* need this guard because this file is compiled outside of the kernel */
+                       hfs_pin_block_range(hfsmp, HFS_PIN_IT,
+                                                               extent->startBlock, extent->blockCount,
+                                                               vfs_context_kernel());
+#endif
+               } else {
+                       _dk_cs_map_t cm = {
+                               .cm_extent = {
+                                       (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
+                                        + hfsmp->hfsPlusIOPosOffset),
+                                       hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
+                               }
+                       };
+
+                       errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
+                                                                         (caddr_t)&cm, 0, vfs_context_current());
+
+                       /*
+                        * Ignore errors for now; we are fully provisioned so in
+                        * theory CoreStorage should be able to handle this
+                        * allocation.  Should we want to change this in future, then
+                        * we should think carefully how we handle errors.  Allowing
+                        * CoreStorage to truncate our allocation is problematic
+                        * because we might have minimum and alignment requirements
+                        * and backing out changes we have already made is
+                        * non-trivial.
+                        */
+
+                       if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
+                               printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
+                                          err2, cm.cm_bytes_mapped);
                        }
                }
-               *actualNumBlocks = mapped_blocks;
-               if (*actualNumBlocks == 0 && err == noErr) {
-                       err = dskFulErr;
-               }
        }
-Exit_CS: 
+
        // if we actually allocated something then go update the
        // various bits of state that we maintain regardless of
        // whether there was an error (i.e. partial allocations
        // still need to update things like the free block count).
        //
-       if (*actualNumBlocks != 0) {
+       if (extent->blockCount != 0) {
                //
                //      If we used the volume's roving allocation pointer, then we need to update it.
                //      Adding in the length of the current allocation might reduce the next allocate
@@ -1173,24 +1404,39 @@ Exit_CS:
                //
                hfs_lock_mount (hfsmp);
 
-               lck_spin_lock(&hfsmp->vcbFreeExtLock);
-               if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
-                       vcb->sparseAllocation = *actualStartBlock;
-               }
-               lck_spin_unlock(&hfsmp->vcbFreeExtLock);
-               if (*actualNumBlocks < vcb->hfs_freed_block_count) {
-                       vcb->hfs_freed_block_count -= *actualNumBlocks;
-               } else {
-                       vcb->hfs_freed_block_count = 0;
-               }
+               if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
+                       lck_spin_lock(&hfsmp->vcbFreeExtLock);
+                       if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
+                               hfsmp->sparseAllocation = extent->startBlock;
+                       }
+                       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+                       if (extent->blockCount < hfsmp->hfs_freed_block_count) {
+                               hfsmp->hfs_freed_block_count -= extent->blockCount;
+                       } else {
+                               hfsmp->hfs_freed_block_count = 0;
+                       }
+
+                       if (updateAllocPtr &&
+                               ((extent->startBlock < hfsmp->hfs_metazone_start) ||
+                                (extent->startBlock > hfsmp->hfs_metazone_end))) {
+                               HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
+                       }
 
-               if (updateAllocPtr &&
-                               ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
-                                (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
-                       HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
+                       (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
                }
 
-               (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
+               if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+                       (*ap->reservation_in)->rl_start += extent->blockCount;
+                       hfsmp->tentativeBlocks -= extent->blockCount;
+                       if (rl_len(*ap->reservation_in) <= 0)
+                               hfs_free_tentative(hfsmp, ap->reservation_in);
+               } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+                       // Handle committing locked extents
+                       assert(hfsmp->lockedBlocks >= extent->blockCount);
+                       (*ap->reservation_in)->rl_start += extent->blockCount;
+                       hfsmp->lockedBlocks -= extent->blockCount;
+                       hfs_free_locked(hfsmp, ap->reservation_in);
+               }
 
                /* 
                 * Update the number of free blocks on the volume 
@@ -1198,36 +1444,122 @@ Exit_CS:
                 * Skip updating the free blocks count if the block are 
                 * being allocated to relocate data as part of hfs_truncatefs()
                 */
-               if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
-                       vcb->freeBlocks -= *actualNumBlocks;
+
+               if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+                       hfsmp->tentativeBlocks += extent->blockCount;
+               } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+                       hfsmp->lockedBlocks += extent->blockCount;
+               } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+                       hfsmp->freeBlocks -= extent->blockCount;
                }
-               MarkVCBDirty(vcb);
+               MarkVCBDirty(hfsmp);
                hfs_unlock_mount(hfsmp);
 
-               hfs_generate_volume_notifications(VCBTOHFS(vcb));
+               hfs_generate_volume_notifications(hfsmp);
+
+               if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+                       add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, 
+                                                                0, ap->reservation_out);
+               } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+                       add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, 
+                                                                1, ap->reservation_out);
+               }
+
+               if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+                       /*
+                        * See if we used tentative blocks.  Note that we cannot
+                        * free the reservations here because we don't have access
+                        * to the external pointers.  All we can do is update the
+                        * reservations and they'll be cleaned up when whatever is
+                        * holding the pointers calls us back.
+                        *
+                        * We use the rangelist code to detect overlaps and
+                        * constrain the tentative block allocation.  Note that
+                        * @end is inclusive so that our rangelist code will
+                        * resolve the various cases for us.  As a result, we need
+                        * to ensure that we account for it properly when removing
+                        * the blocks from the tentative count in the mount point
+                        * and re-inserting the remainder (either head or tail)
+                        */
+                       struct rl_entry *range, *next_range;
+                       struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
+                       const uint32_t start = extent->startBlock;
+                       const uint32_t end = start + extent->blockCount - 1;
+                       TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
+                               switch (rl_overlap(range, start, end)) {
+                                       case RL_OVERLAPCONTAINSRANGE:
+                                               // Keep the bigger part
+                                               if (start - range->rl_start > range->rl_end - end) {
+                                                       // Discard the tail
+                                                       hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+                                                       hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
+                                                       const uint32_t old_end = range->rl_end;
+                                                       range->rl_end = start - 1;
+                                                       add_free_extent_cache(hfsmp, end + 1, old_end - end);
+                                               } else {
+                                                       // Discard the head
+                                                       hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+                                                       hfs_release_summary(hfsmp, range->rl_start,
+                                                                                               start - range->rl_start);
+                                                       const uint32_t old_start = range->rl_start;
+                                                       range->rl_start = end + 1;
+                                                       add_free_extent_cache(hfsmp, old_start,
+                                                                                                 start - old_start);
+                                               }
+                                               assert(range->rl_end >= range->rl_start);
+                                               break;
+                                       case RL_MATCHINGOVERLAP:
+                                       case RL_OVERLAPISCONTAINED:
+                                               hfsmp->tentativeBlocks -= rl_len(range);
+                                               range->rl_end = range->rl_start - 1;
+                                               hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+                                               break;
+                                       case RL_OVERLAPSTARTSBEFORE:
+                                               hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+                                               range->rl_end = start - 1;
+                                               assert(range->rl_end >= range->rl_start);
+                                               break;
+                                       case RL_OVERLAPENDSAFTER:
+                                               hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+                                               range->rl_start = end + 1;
+                                               assert(range->rl_end >= range->rl_start);
+                                               break;
+                                       case RL_NOOVERLAP:
+                                               break;
+                               }
+                       }
+               }
        }
 
+exit:
+
        if (ALLOC_DEBUG) {
                if (err == noErr) {
-                       if (*actualStartBlock >= hfsmp->totalBlocks) {
+                       if (extent->startBlock >= hfsmp->totalBlocks) {
                                panic ("BlockAllocate: vending invalid blocks!");
                        }
-                       if (*actualStartBlock >= hfsmp->allocLimit) {
+                       if (extent->startBlock >= hfsmp->allocLimit) {
                                panic ("BlockAllocate: vending block past allocLimit!");
                        }
 
-                       if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {     
+                       if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {  
                                panic ("BlockAllocate: vending too many invalid blocks!");
                        }
 
-                       if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {      
+                       if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {   
                                panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
                        }
                }
        }
 
+       if (err) {
+               // Just to be safe...
+               extent->startBlock = 0;
+               extent->blockCount = 0;
+       }
+
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
-               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
 
        return err;
 }
@@ -1251,6 +1583,7 @@ Exit_CS:
 ; Side effects:
 ;       The volume bitmap is read and updated; the volume bitmap cache may be changed.
 ;       The Allocator's red-black trees may also be modified as a result.
+;
 ;________________________________________________________________________________
 */
 
@@ -1258,8 +1591,11 @@ OSErr BlockDeallocate (
                ExtendedVCB             *vcb,                   //      Which volume to deallocate space on
                u_int32_t               firstBlock,             //      First block in range to deallocate
                u_int32_t               numBlocks,              //      Number of contiguous blocks to deallocate
-               u_int32_t               flags)
+               hfs_block_alloc_flags_t flags)
 {
+       if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
+               return 0;
+
        OSErr                   err;
        struct hfsmount *hfsmp;
        hfsmp = VCBTOHFS(vcb);
@@ -1390,7 +1726,8 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb)
                                (void) ReleaseBitmapBlock(vcb, blockRef, false);
                                blockRef = 0;
                        }
-                       if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
+                       if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef, 
+                                                               HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
                                return (0);
                        }
                        buffer = (u_int8_t *)currCache;
@@ -1432,6 +1769,104 @@ static u_int32_t NextBitmapBlock(
 }
 
 
+// Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
+static void bits_set(void *bitmap, int start, int end)
+{
+       const int start_bit = start & 63;
+       const int end_bit   = end   & 63;
+
+#define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
+#define RIGHT_MASK(bit)        OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
+
+       uint64_t *p = (uint64_t *)bitmap + start / 64;
+
+       if ((start & ~63) == (end & ~63)) {
+               // Start and end in same 64 bits
+               *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
+       } else {
+               *p++ |= RIGHT_MASK(start_bit);
+
+               int nquads = (end - end_bit - start - 1) / 64;
+
+               while (nquads--)
+                       *p++ = 0xffffffffffffffffull;
+
+               if (end_bit)
+                       *p |= LEFT_MASK(end_bit);
+       }
+}
+
+// Modifies the buffer and applies any reservations that we might have
+static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
+                                                                 hfs_block_alloc_flags_t flags,
+                                                                 bool always_copy)
+{
+       bool taken_copy = false;
+       void *buffer = (void *)buf_dataptr(bp);
+       const uint32_t nbytes = buf_count(bp);
+       const off_t end = offset + nbytes * 8 - 1;
+
+       for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+                                 ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
+               struct rl_entry *entry;
+               TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+                       uint32_t a, b;
+
+                       enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
+
+                       if (overlap_type == RL_NOOVERLAP)
+                               continue;
+
+                       /*
+                        * If always_copy is false, we only take a copy if B_LOCKED is
+                        * set because ReleaseScanBitmapRange doesn't invalidate the
+                        * buffer in that case.
+                        */
+                       if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
+                               buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
+                               buf_brelse(bp);
+                               bp = new_bp;
+                               buf_setflags(bp, B_NOCACHE);
+                               buffer = (void *)buf_dataptr(bp);
+                               taken_copy = true;
+                       }
+
+                       switch (overlap_type) {
+                       case RL_OVERLAPCONTAINSRANGE:
+                       case RL_MATCHINGOVERLAP:
+                               memset(buffer, 0xff, nbytes);
+                               return bp;
+                       case RL_OVERLAPISCONTAINED:
+                               a = entry->rl_start;
+                               b = entry->rl_end;
+                               break;
+                       case RL_OVERLAPSTARTSBEFORE:
+                               a = offset;
+                               b = entry->rl_end;
+                               break;
+                       case RL_OVERLAPENDSAFTER:
+                               a = entry->rl_start;
+                               b = end;
+                               break;
+                       case RL_NOOVERLAP:
+                               __builtin_unreachable();
+                       }
+
+                       a -= offset;
+                       b -= offset;
+
+                       assert(a < buf_count(bp) * 8);
+                       assert(b < buf_count(bp) * 8);
+                       assert(b >= a);
+
+                       // b is inclusive
+                       bits_set(buffer, a, b + 1);
+               }
+       } // for (;;)
+
+       return bp;
+}
+
 /*
 ;_______________________________________________________________________
 ;
@@ -1449,11 +1884,11 @@ static u_int32_t NextBitmapBlock(
 ;      blockRef
 ;_______________________________________________________________________
 */
-static OSErr ReadBitmapBlock(
-               ExtendedVCB             *vcb,
-               u_int32_t               bit,
-               u_int32_t               **buffer,
-               uintptr_t               *blockRef)
+static OSErr ReadBitmapBlock(ExtendedVCB               *vcb,
+                                                        u_int32_t              bit,
+                                                        u_int32_t              **buffer,
+                                                        uintptr_t              *blockRef,
+                                                        hfs_block_alloc_flags_t flags)
 {
        OSErr                   err;
        struct buf *bp = NULL;
@@ -1492,6 +1927,13 @@ static OSErr ReadBitmapBlock(
                        *blockRef = 0;
                        *buffer = NULL;
                } else {
+                       if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
+                               bp = process_reservations(vcb, bp, block * blockSize * 8,
+                                                                                 flags, /* always_copy: */ true);
+                       }
+
+                       buf_setfsprivate(bp, (void *)(uintptr_t)flags);
+
                        *blockRef = (uintptr_t)bp;
                        *buffer = (u_int32_t *)buf_dataptr(bp);
                }
@@ -1572,6 +2014,9 @@ static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
                        *blockRef = 0;
                        *buffer = NULL;
                } else {
+                       bp = process_reservations(hfsmp, bp, (offset * 8), 0,
+                                                                         /* always_copy: */ false);
+
                        *blockRef = bp;
                        *buffer = (u_int32_t *)buf_dataptr(bp);
                }
@@ -1616,7 +2061,11 @@ static OSErr ReleaseBitmapBlock(
 
        if (bp) {
                if (dirty) {
-                       // XXXdbg
+                       hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
+
+                       if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
+                               panic("Modified read-only bitmap buffer!");
+
                        struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
                        if (hfsmp->jnl) {
@@ -1670,15 +2119,65 @@ static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
        return (0);
 }
 
+/* 
+ * @extent.startBlock, on input, contains a preferred block for the
+ * allocation.  @extent.blockCount, on input, contains the minimum
+ * number of blocks acceptable.  Upon success, the result is conveyed
+ * in @extent.
+ */
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+                                                               HFSPlusExtentDescriptor *extent,
+                                                               uint32_t max_blocks,
+                                                               hfs_block_alloc_flags_t flags)
+{
+       OSErr err = dskFulErr;
+
+       const uint32_t min_blocks = extent->blockCount;
+
+       // It's > rather than >= because the last block is always reserved
+       if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
+               && hfsmp->allocLimit - extent->startBlock > max_blocks) {
+               /*
+                * This is just checking to see if there's an extent starting
+                * at extent->startBlock that will suit.  We only check for
+                * @max_blocks here; @min_blocks is ignored.
+                */
+
+               err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
+                                                                 max_blocks, max_blocks, true, true,
+                                                                 &extent->startBlock, &extent->blockCount, flags);
+
+               if (err != dskFulErr)
+                       return err;
+       }
+
+       err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
+                                               &extent->blockCount);
+
+       if (!err) {
+               if (extent->blockCount >= max_blocks)
+                       return 0;
+       } else if (err != dskFulErr)
+               return err;
+
+       // Try a more exhaustive search
+       return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
+                                                          min_blocks, max_blocks,
+                                                          /* useMetaZone: */ true,
+                                                          /* trustSummary: */ true,
+                                                          &extent->startBlock, &extent->blockCount, flags);
+}
+
 /*
 _______________________________________________________________________
 
-Routine:       BlockAllocateContig
+Routine:       BlockFindContig
 
-Function:      Allocate a contiguous group of allocation blocks.  The
-                       allocation is all-or-nothing.  The caller guarantees that
-                       there are enough free blocks (though they may not be
-                       contiguous, in which case this call will fail).
+Function:   Find a contiguous group of allocation blocks.  If the
+                       minimum cannot be satisfied, nothing is returned.  The
+                       caller guarantees that there are enough free blocks
+                       (though they may not be contiguous, in which case this
+                       call will fail).
 
 Inputs:
        vcb                             Pointer to volume where space is to be allocated
@@ -1692,12 +2191,12 @@ Outputs:
        actualNumBlocks         Number of blocks allocated, or 0 if error
 _______________________________________________________________________
 */
-static OSErr BlockAllocateContig(
+static OSErr BlockFindContig(
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
                u_int32_t               minBlocks,
                u_int32_t               maxBlocks,
-               u_int32_t               flags,
+               hfs_block_alloc_flags_t flags,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks)
 {
@@ -1718,40 +2217,39 @@ static OSErr BlockAllocateContig(
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
-               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
+               KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
 
        while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
 
                /* Try and find something that works. */
-               do {
+
+               /*
+                * NOTE: If the only contiguous free extent of at least minBlocks
+                * crosses startingBlock (i.e. starts before, ends after), then we
+                * won't find it. Earlier versions *did* find this case by letting
+                * the second search look past startingBlock by minBlocks.  But
+                * with the free extent cache, this can lead to duplicate entries
+                * in the cache, causing the same blocks to be allocated twice.
+                */
+               retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, 
+                               maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
+
+               if (retval == dskFulErr && currentStart != 0) {
                        /*
-                        * NOTE: If the only contiguous free extent of at least minBlocks
-                        * crosses startingBlock (i.e. starts before, ends after), then we
-                        * won't find it. Earlier versions *did* find this case by letting
-                        * the second search look past startingBlock by minBlocks.  But
-                        * with the free extent cache, this can lead to duplicate entries
-                        * in the cache, causing the same blocks to be allocated twice.
+                        * We constrain the endingBlock so we don't bother looking for ranges
+                        * that would overlap those found in the previous call, if the summary bitmap
+                        * is not on for this volume.  If it is, then we assume that it was not trust
+                        * -worthy and do a full scan.
                         */
-                       retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, 
-                                       maxBlocks, useMetaZone, true, &foundStart, &foundCount);
-
-                       if (retval == dskFulErr && currentStart != 0) {
-                               /*
-                                * We constrain the endingBlock so we don't bother looking for ranges
-                                * that would overlap those found in the previous call, if the summary bitmap
-                                * is not on for this volume.  If it is, then we assume that it was not trust
-                                * -worthy and do a full scan.
-                                */
-                               if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
-                                       retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, 
-                                                       maxBlocks, useMetaZone, false, &foundStart, &foundCount);
-                               }
-                               else {
-                                       retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks, 
-                                                       maxBlocks, useMetaZone, false, &foundStart, &foundCount);
-                               }
-                       }       
-               } while (0);
+                       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                               retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, 
+                                               maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+                       }
+                       else {
+                               retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
+                                               maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+                       }
+               }
 
                if (retval != noErr) {
                        goto bailout;
@@ -1819,17 +2317,15 @@ static OSErr BlockAllocateContig(
        } // end while loop. 
 
 bailout:
-       /* mark the blocks as in-use */
+
        if (retval == noErr) {
                *actualStartBlock = foundStart;
                *actualNumBlocks = foundCount;
-               err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
-
-               if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) {
-                       KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0);
-               }
        }
 
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
+
        return retval;
 
 }
@@ -1838,12 +2334,11 @@ bailout:
 /*
 _______________________________________________________________________
 
-Routine:       BlockAllocateAny
+Routine:       BlockFindAny
 
-Function:      Allocate one or more allocation blocks.  If there are fewer
-                       free blocks than requested, all free blocks will be
-                       allocated.  The caller guarantees that there is at least
-                       one free block.
+Function: Find one or more allocation blocks and may return fewer than
+          requested.  The caller guarantees that there is at least one
+          free block.
 
 Inputs:
        vcb                             Pointer to volume where space is to be allocated
@@ -1858,12 +2353,12 @@ Outputs:
 _______________________________________________________________________
 */
 
-static OSErr BlockAllocateAny(
+static OSErr BlockFindAny(
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
                register u_int32_t      endingBlock,
                u_int32_t               maxBlocks,
-               u_int32_t               flags,
+               hfs_block_alloc_flags_t flags,
                Boolean                 trustSummary,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks)
@@ -1913,7 +2408,7 @@ static OSErr BlockAllocateAny(
                }
        }
 
-       err =  BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks, 
+       err =  BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
                        flags, actualStartBlock, actualNumBlocks);
 
        return err;
@@ -1921,33 +2416,32 @@ static OSErr BlockAllocateAny(
 
 
 /*
- * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
- * where the free allocation blocks are.  Inputs and outputs are the same as for
- * BlockAllocateAny and BlockAllocateAnyRBTree
+ * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
+ * figure out where the free allocation blocks are.  Inputs and
+ * outputs are the same as for BlockFindAny.
  */
 
-static OSErr BlockAllocateAnyBitmap(
+static OSErr BlockFindAnyBitmap(
                ExtendedVCB             *vcb,
                u_int32_t               startingBlock,
                register u_int32_t      endingBlock,
                u_int32_t               maxBlocks,
-               u_int32_t               flags,
+               hfs_block_alloc_flags_t flags,
                u_int32_t               *actualStartBlock,
                u_int32_t               *actualNumBlocks)
 {
        OSErr                   err;
-       register u_int32_t      block;                  //      current block number
+       register u_int32_t      block = 0;              //      current block number
        register u_int32_t      currentWord;    //      Pointer to current word within bitmap block
        register u_int32_t      bitMask;                //      Word with given bits already set (ready to OR in)
        register u_int32_t      wordsLeft;              //      Number of words left in this bitmap block
        u_int32_t  *buffer = NULL;
        u_int32_t  *currCache = NULL;
-       uintptr_t  blockRef;
+       uintptr_t  blockRef = 0;
        u_int32_t  bitsPerBlock;
        u_int32_t  wordsPerBlock;
        Boolean dirty = false;
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
-       uint32_t summary_block_scan = 0;
        Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
        Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
 
@@ -1955,6 +2449,7 @@ static OSErr BlockAllocateAnyBitmap(
                KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
 
 restartSearchAny:
+
        /*
         * When we're skipping the metadata zone and the start/end
         * range overlaps with the metadata zone then adjust the 
@@ -1981,7 +2476,7 @@ restartSearchAny:
        //
        //      Pre-read the first bitmap block
        //
-       err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
+       err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
        if (err != noErr) goto Exit;
        buffer = currCache;
 
@@ -2005,6 +2500,8 @@ restartSearchAny:
         * While loop 1:
         *              Find the first unallocated block starting at 'block'
         */
+       uint32_t summary_block_scan = 0;
+
        block=startingBlock;
        while (block < endingBlock) {
                if ((currentWord & bitMask) == 0)
@@ -2053,7 +2550,7 @@ restartSearchAny:
                                        goto Exit;
                                }
 
-                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
+                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
                                if (err != noErr) goto Exit;
                                buffer = currCache;
                                summary_block_scan = block;
@@ -2156,7 +2653,7 @@ restartSearchAny:
                                        goto Exit;
                                }
 
-                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
+                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
                                if (err != noErr) {
                                        goto Exit;
                                }
@@ -2179,11 +2676,8 @@ Exit:
        
                // sanity check
                if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
-                       panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
+                       panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
                }
-
-               /* Mark the bits found as in-use */
-               err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks);
        }
        else {
                *actualStartBlock = 0;
@@ -2200,10 +2694,11 @@ Exit:
 /*
 _______________________________________________________________________
 
-Routine:       BlockAllocateKnown
+Routine:       BlockFindKnown
 
-Function:      Try to allocate space from known free space in the free
-                       extent cache.
+Function:   Return a potential extent from the free extent cache.  The
+                   returned extent *must* be marked allocated and removed
+                   from the cache by the *caller*.
 
 Inputs:
        vcb                             Pointer to volume where space is to be allocated
@@ -2218,7 +2713,7 @@ Returns:
 _______________________________________________________________________
 */
 
-static OSErr BlockAllocateKnown(
+static OSErr BlockFindKnown(
                ExtendedVCB             *vcb,
                u_int32_t               maxBlocks,
                u_int32_t               *actualStartBlock,
@@ -2229,7 +2724,7 @@ static OSErr BlockAllocateKnown(
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
-               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
 
        hfs_lock_mount (hfsmp);
        lck_spin_lock(&vcb->vcbFreeExtLock);
@@ -2238,7 +2733,7 @@ static OSErr BlockAllocateKnown(
                lck_spin_unlock(&vcb->vcbFreeExtLock);
                hfs_unlock_mount(hfsmp);
                if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
-                       KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
+                       KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
                return dskFulErr;
        }
        lck_spin_unlock(&vcb->vcbFreeExtLock);
@@ -2255,29 +2750,17 @@ static OSErr BlockAllocateKnown(
 
        lck_spin_unlock(&vcb->vcbFreeExtLock);
 
-       remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
-
        // sanity check
        if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 
        {
                printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
                hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
-               *actualStartBlock = 0;
-               *actualNumBlocks = 0;
                err = EIO;
-       } 
-       else 
-       {
-               //
-               //      Now mark the found extent in the bitmap
-               //
-               err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
-       }
-
-       sanity_check_free_ext(vcb, 0);
+       } else
+               err = 0;
 
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
-               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
 
        return err;
 }
@@ -2300,12 +2783,11 @@ OSErr BlockMarkAllocated(
 
        hfsmp = VCBTOHFS(vcb);
 
-       return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
+       return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
 
 }
 
 
-
 /*
 _______________________________________________________________________
 
@@ -2327,9 +2809,10 @@ _______________________________________________________________________
 */
 static 
 OSErr BlockMarkAllocatedInternal (
-               ExtendedVCB             *vcb,
-               u_int32_t               startingBlock,
-               register u_int32_t      numBlocks)
+                                                                 ExtendedVCB           *vcb,
+                                                                 u_int32_t             startingBlock,
+                                                                 u_int32_t     numBlocks,
+                                                                 hfs_block_alloc_flags_t flags)
 {
        OSErr                   err;
        register u_int32_t      *currentWord;   //      Pointer to current word within bitmap block
@@ -2338,14 +2821,24 @@ OSErr BlockMarkAllocatedInternal (
        u_int32_t               firstBit;               //      Bit index within word of first bit to allocate
        u_int32_t               numBits;                //      Number of bits in word to allocate
        u_int32_t               *buffer = NULL;
-       uintptr_t  blockRef;
+       uintptr_t  blockRef = 0;
        u_int32_t  bitsPerBlock;
        u_int32_t  wordsPerBlock;
        // XXXdbg
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
        if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
-               KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+               KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
+
+#if DEBUG
+
+       struct rl_entry *range;
+       TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
+               assert(rl_overlap(range, startingBlock,
+                                                 startingBlock + numBlocks - 1) == RL_NOOVERLAP);
+       }
+
+#endif
 
        int force_flush = 0;
        /*
@@ -2368,11 +2861,24 @@ OSErr BlockMarkAllocatedInternal (
 
        hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
 
+       /*
+        * Don't make changes to the disk if we're just reserving.  Note that
+        * we could do better in the tentative case because we could, in theory,
+        * avoid the journal flush above.  However, that would mean that we would
+        * need to catch the callback to stop it incorrectly addding the extent
+        * to our free cache.
+        */
+       if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
+               err = 0;
+               goto Exit;
+       }
+
        //
        //      Pre-read the bitmap block containing the first word of allocation
        //
 
-       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                 HFS_ALLOC_IGNORE_RESERVED);
        if (err != noErr) goto Exit;
        //
        //      Initialize currentWord, and wordsLeft.
@@ -2407,7 +2913,7 @@ OSErr BlockMarkAllocatedInternal (
                        numBits = numBlocks;                                    //      entire allocation is inside this one word
                        bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));        //      turn off bits after last
                }
-#if DEBUG_BUILD
+#if DEBUG
                if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
                        panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
                }
@@ -2433,7 +2939,8 @@ OSErr BlockMarkAllocatedInternal (
                        err = ReleaseBitmapBlock(vcb, blockRef, true);
                        if (err != noErr) goto Exit;
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
                        if (err != noErr) goto Exit;
 
                        // XXXdbg
@@ -2445,7 +2952,7 @@ OSErr BlockMarkAllocatedInternal (
                        currentWord = buffer;
                        wordsLeft = wordsPerBlock;
                }
-#if DEBUG_BUILD
+#if DEBUG
                if (*currentWord != 0) {
                        panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
                }
@@ -2471,7 +2978,8 @@ OSErr BlockMarkAllocatedInternal (
                        err = ReleaseBitmapBlock(vcb, blockRef, true);
                        if (err != noErr) goto Exit;
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
                        if (err != noErr) goto Exit;
 
                        // XXXdbg
@@ -2483,7 +2991,7 @@ OSErr BlockMarkAllocatedInternal (
                        currentWord = buffer;
                        wordsLeft = wordsPerBlock;
                }
-#if DEBUG_BUILD
+#if DEBUG
                if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
                        panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
                }
@@ -2652,7 +3160,7 @@ OSErr BlockMarkFreeInternal(
        u_int32_t       currentBit;             //      Bit index within word of current bit to allocate
        u_int32_t       numBits;                //      Number of bits in word to allocate
        u_int32_t       *buffer = NULL;
-       uintptr_t       blockRef;
+       uintptr_t       blockRef = 0;
        u_int32_t       bitsPerBlock;
        u_int32_t       wordsPerBlock;
        // XXXdbg
@@ -2667,27 +3175,46 @@ OSErr BlockMarkFreeInternal(
         */
        if ((do_validate == true) && 
                        (startingBlock + numBlocks > vcb->totalBlocks)) {
-               if (ALLOC_DEBUG) {
-                       panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
-               }
-
+#if ALLOC_DEBUG || DEBUG
+               panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
+               __builtin_unreachable();
+#else
                printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
                hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
                err = EIO;
                goto Exit;
+#endif
        }
 
        //
        //      Pre-read the bitmap block containing the first word of allocation
        //
 
-       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, 
+                                                 HFS_ALLOC_IGNORE_RESERVED);
        if (err != noErr) goto Exit;
        // XXXdbg
        if (hfsmp->jnl) {
                journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
        }
 
+       uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
+
+       // Work out the bounds of any unmap we can send down
+       struct rl_entry *range;
+       for (int i = 0; i < 2; ++i) {
+               TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+                       if (range->rl_start < startingBlock
+                               && range->rl_end >= min_unmap) {
+                               min_unmap = range->rl_end + 1;
+                       }
+                       if (range->rl_end >= startingBlock + numBlocks
+                               && range->rl_start < max_unmap) {
+                               max_unmap = range->rl_start;
+                       }
+               }
+       }
+
        //
        //      Figure out how many bits and words per bitmap block.
        //
@@ -2703,7 +3230,7 @@ OSErr BlockMarkFreeInternal(
        currentWord = buffer + wordIndexInBlock;
        currentBit = startingBlock % kBitsPerWord;
        bitMask = kHighBitInWordMask >> currentBit;
-       while (true) {
+       while (unmapStart > min_unmap) {
                // Move currentWord/bitMask back by one bit
                bitMask <<= 1;
                if (bitMask == 0) {
@@ -2758,7 +3285,8 @@ OSErr BlockMarkFreeInternal(
                        err = ReleaseBitmapBlock(vcb, blockRef, true);
                        if (err != noErr) goto Exit;
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
                        if (err != noErr) goto Exit;
 
                        // XXXdbg
@@ -2795,7 +3323,8 @@ OSErr BlockMarkFreeInternal(
                        err = ReleaseBitmapBlock(vcb, blockRef, true);
                        if (err != noErr) goto Exit;
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, 
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
                        if (err != noErr) goto Exit;
 
                        // XXXdbg
@@ -2825,7 +3354,7 @@ OSErr BlockMarkFreeInternal(
        currentWord = buffer + wordIndexInBlock;
        currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
        bitMask = kHighBitInWordMask >> currentBit;
-       while (true) {
+       while (unmapStart + unmapCount < max_unmap) {
                // Move currentWord/bitMask/wordsLeft forward one bit
                bitMask >>= 1;
                if (bitMask == 0) {
@@ -2855,10 +3384,12 @@ Exit:
        return err;
 
 Corruption:
-#if DEBUG_BUILD
+#if DEBUG
        panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
+       __builtin_unreachable();
 #else
-       printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
+       printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
+                       vcb->vcbVN, startingBlock_in, numBlocks_in);
        hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
        err = EIO;
        goto Exit;
@@ -2905,7 +3436,8 @@ static OSErr BlockFindContiguous(
                Boolean                 useMetaZone,
                Boolean                 trustSummary,
                u_int32_t               *actualStartBlock,
-               u_int32_t               *actualNumBlocks)
+               u_int32_t               *actualNumBlocks,
+               hfs_block_alloc_flags_t flags)
 {
        OSErr                   err;
        register u_int32_t      currentBlock;           //      Block we're currently looking at.
@@ -2917,10 +3449,11 @@ static OSErr BlockFindContiguous(
        register u_int32_t      bitMask;
        register u_int32_t      wordsLeft;
        register u_int32_t      tempWord;
-       uintptr_t  blockRef;
+       uintptr_t  blockRef = 0;
        u_int32_t  wordsPerBlock;
        u_int32_t  updated_free_extent = 0;
        struct hfsmount *hfsmp = (struct hfsmount*) vcb;
+       HFSPlusExtentDescriptor best = { 0, 0 };
 
        if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
@@ -2966,16 +3499,19 @@ static OSErr BlockFindContiguous(
         */
        if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
                uint32_t suggestion;
-               if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
-                       currentBlock = suggestion;
-               }               
+               err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+               if (err && err != ENOSPC)
+                       goto ErrorExit;
+               if (err == ENOSPC || suggestion >= stopBlock)
+                       goto DiskFull;
+               currentBlock = suggestion;
        }
 
 
        //
        //      Pre-read the first bitmap block.
        //
-       err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
+       err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
        if ( err != noErr ) goto ErrorExit;
 
        //
@@ -2987,6 +3523,10 @@ static OSErr BlockFindContiguous(
        currentWord = buffer + wordsLeft;
        wordsLeft = wordsPerBlock - wordsLeft;
 
+       uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
+                                                 - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+                                                        ? 0 : hfsmp->tentativeBlocks));
+
        /*
         * This outer do-while loop is the main body of this function.  Its job is 
         * to search through the blocks (until we hit 'stopBlock'), and iterate
@@ -2996,6 +3536,13 @@ static OSErr BlockFindContiguous(
        do
        {
                foundBlocks = 0;
+               /*
+                * We will try and update the summary table as we search
+                * below.  Note that we will never update the summary table
+                * for the first and last blocks that the summary table
+                * covers.  Ideally, we should, but the benefits probably
+                * aren't that significant so we leave things alone for now.
+                */
                uint32_t summary_block_scan = 0;
                /*
                 * Inner while loop 1:
@@ -3066,14 +3613,15 @@ static OSErr BlockFindContiguous(
                                /* Skip over fully allocated bitmap blocks if we can */
                                if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
                                        uint32_t suggestion;
-                                       if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
-                                               if (suggestion < stopBlock) {
-                                                       currentBlock = suggestion;
-                                               }                       
-                                       }
+                                       err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+                                       if (err && err != ENOSPC)
+                                               goto ErrorExit;
+                                       if (err == ENOSPC || suggestion >= stopBlock)
+                                               goto LoopExit;
+                                       currentBlock = suggestion;
                                }
 
-                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
+                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
                                if ( err != noErr ) goto ErrorExit;
 
                                /*
@@ -3172,7 +3720,7 @@ FoundUnused:
                                        }
                                }
 
-                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
+                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
                                if ( err != noErr ) goto ErrorExit;
 
                                currentWord = buffer;
@@ -3214,37 +3762,82 @@ FoundUsed:
                foundBlocks = currentBlock - firstBlock;
                if (foundBlocks > maxBlocks)
                        foundBlocks = maxBlocks;
-               if (foundBlocks >= minBlocks)
+
+               if (remaining) {
+                       if (foundBlocks > remaining) {
+#if DEBUG || DEVELOPMENT
+                               printf("hfs: found more blocks than are indicated free!\n");
+#endif
+                               remaining = UINT32_MAX;
+                       } else
+                               remaining -= foundBlocks;
+               }
+
+               if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+                       if (foundBlocks > best.blockCount) {
+                               best.startBlock = firstBlock;
+                               best.blockCount = foundBlocks;
+                       }
+
+                       if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
+                               break;
+
+                       /*
+                        * Note that we will go ahead and add this free extent to our
+                        * cache below but that's OK because we'll remove it again if we
+                        * decide to use this extent.
+                        */
+               } else if (foundBlocks >= minBlocks)
                        break;          //      Found what we needed!
 
                /*
-                * We did not find the total blocks were were looking for, but 
+                * We did not find the total blocks we were looking for, but
                 * add this free block run to our free extent cache list, if possible.
                 */
-               if (hfsmp->jnl == NULL) {
-                       /* If there is no journal, go ahead and add to the free ext cache. */
-                       updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+
+               // If we're ignoring tentative ranges, we need to account for them here
+               if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+                       struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
+                       struct rl_entry *range;;
+                       TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
+                               rl_subtract(&free_extent, range);
+                               if (rl_len(range) == 0)
+                                       break;
+                       }
+                       firstBlock = free_extent.rl_start;
+                       foundBlocks = rl_len(&free_extent);
                }
-               else {
-                       /*
-                        * If journaled, only add to the free extent cache if this block is not
-                        * waiting for a TRIM to complete; that implies that the transaction that freed it
-                        * has not yet been committed to stable storage. 
-                        */
-                       int recently_deleted = 0;
-                       uint32_t nextblock;
-                       err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, 
-                                       (uint64_t)foundBlocks, &recently_deleted, &nextblock);
-                       if ((err) || (recently_deleted == 0))  {
-                               /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
+
+               if (foundBlocks) {
+                       if (hfsmp->jnl == NULL) {
+                               /* If there is no journal, go ahead and add to the free ext cache. */
                                updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
                        }
-                       err = 0;
+                       else {
+                               /*
+                                * If journaled, only add to the free extent cache if this block is not
+                                * waiting for a TRIM to complete; that implies that the transaction that freed it
+                                * has not yet been committed to stable storage. 
+                                */
+                               int recently_deleted = 0;
+                               uint32_t nextblock;
+                               err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, 
+                                               (uint64_t)foundBlocks, &recently_deleted, &nextblock);
+                               if ((err) || (recently_deleted == 0))  {
+                                       /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
+                                       updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+                               }
+                               err = 0;
+                       }
                }
-
        } while (currentBlock < stopBlock);
 LoopExit:
 
+       if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+               firstBlock = best.startBlock;
+               foundBlocks = best.blockCount;
+       }
+
        //      Return the outputs.
        if (foundBlocks < minBlocks)
        {
@@ -3365,7 +3958,8 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
        /*
         * Pre-read the bitmap block containing the first word of allocation
         */
-       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+                                                       HFS_ALLOC_IGNORE_TENTATIVE);
        if (error)
                goto JustReturn;
 
@@ -3418,7 +4012,8 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
                        error = ReleaseBitmapBlock(hfsmp, blockRef, false);
                        if (error) goto Exit;
 
-                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+                                                                       HFS_ALLOC_IGNORE_TENTATIVE);
                        if (error) goto Exit;
 
                        /* Readjust currentWord and wordsLeft. */
@@ -3450,7 +4045,8 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
                        error = ReleaseBitmapBlock(hfsmp, blockRef, false);
                        if (error) goto Exit;
 
-                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+                                                                       HFS_ALLOC_IGNORE_TENTATIVE);
                        if (error) goto Exit;
 
                        currentWord = buffer;
@@ -3750,7 +4346,7 @@ int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block,  uint32_t *ne
                 * Compute how much of hfs_summary_size is useable for the given number
                 * of allocation blocks eligible on this FS.
                 */
-               err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap);
+               err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
                if (err) {
                        goto summary_exit;
                }
@@ -3810,7 +4406,7 @@ int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block,  uint32_t *ne
                if (maybe_has_blocks == 0) {
                        err = ENOSPC;
                }
-       }       
+       }
 
        /* If the summary table is not active for this mount, we'll just return ENOSPC */
 summary_exit:
@@ -4607,15 +5203,7 @@ static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint
         * have to complete the I/O on VBMIOSize boundaries, but we can only read
         * up until the end of the bitmap file.
         */
-       bitmap_len = hfsmp->totalBlocks / kBitsPerByte;
-       if (bitmap_len % (hfsmp->blockSize)) {
-               bitmap_len = (bitmap_len / hfsmp->blockSize);
-               /* round up to the end of the next alloc block */
-               bitmap_len++;
-
-               /* Convert the # of alloc blocks back to bytes. */
-               bitmap_len = bitmap_len * hfsmp->blockSize;     
-       }
+       bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
 
        remaining_bitmap = bitmap_len - bitmap_off;
 
@@ -4647,7 +5235,7 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3
        u_int32_t  firstBit;       // Bit index within word of first bit to allocate
        u_int32_t  numBits;        // Number of bits in word to allocate
        u_int32_t  bitsPerBlock;
-       uintptr_t  blockRef;
+       uintptr_t  blockRef = 0;
        u_int32_t  wordsPerBlock;
        u_int32_t  numBlocks = 1;
        u_int32_t  *buffer = NULL;
@@ -4664,7 +5252,8 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3
                /*
                 * Pre-read the bitmap block containing the first word of allocation
                 */
-               error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+               error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+                                                               HFS_ALLOC_IGNORE_TENTATIVE);
                if (error)
                        return (error);
        }
@@ -4784,11 +5373,17 @@ u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
        /* Force a rebuild of the summary table. */
        (void) hfs_rebuild_summary (hfsmp);
 
-       return 0;
+       // Delete any tentative ranges that are in the area we're shrinking
+       struct rl_entry *range, *next_range;
+       TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+                                          rl_link, next_range) {
+               if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
+                       hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+       }
 
+       return 0;
 }
 
-
 /*
  * Remove an extent from the list of free extents.
  *
@@ -5046,6 +5641,16 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc
        if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
                KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
 
+#if DEBUG
+       for (i = 0; i < 2; ++i) {
+               struct rl_entry *range;
+               TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+                       assert(rl_overlap(range, startBlock,
+                                                         startBlock + blockCount - 1) == RL_NOOVERLAP);
+               }
+       }
+#endif
+
        /* No need to add extent that is beyond current allocLimit */
        if (startBlock >= hfsmp->allocLimit) {
                goto out_not_locked;
@@ -5233,7 +5838,7 @@ static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
                hfs_journal_lock(hfsmp);
 
                /* Flush the journal and wait for all I/Os to finish up */
-               error = hfs_journal_flush(hfsmp, TRUE);
+               error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
                if (error) {
                        hfs_journal_unlock(hfsmp);
                        return error;
@@ -5290,6 +5895,8 @@ static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
        if (error)
                return error;
 
+       assert(iosize != 0);
+
        /* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
        byte_offset = start_bit / kBitsPerByte;
 
@@ -5569,7 +6176,7 @@ errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
        hfs_journal_lock(hfsmp);
 
        /* Flush the journal and wait for all I/Os to finish up */
-       error = hfs_journal_flush(hfsmp, TRUE);
+       error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
        if (error) {
                hfs_journal_unlock(hfsmp);
                return error;
index b95d8c9921c1d8edbe6cefa371d001ea3edb1996..f3c4e37d282d779228469b9f892b73496cbc60f3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
index 3b8dd7ac1d443664ff1d58d2e6fbfbf38e29ea82..07f06afb8a3b8ae2afa7d57cb649b69dbcec0b7b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -179,7 +179,6 @@ typedef enum {
 #define                M_SetBitNum(integer,bitNumber)          ((integer) |= (1<<(bitNumber)))
 #define                M_IsOdd(integer)                                        (((integer) & 1) != 0)
 #define                M_IsEven(integer)                                       (((integer) & 1) == 0)
-#define                M_BTreeHeaderDirty(btreePtr)            btreePtr->flags |= kBTHeaderDirty
 
 #define                M_MapRecordSize(nodeSize)                       (nodeSize - sizeof (BTNodeDescriptor) - 6)
 #define                M_HeaderMapRecordSize(nodeSize)         (nodeSize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) - 128 - 8)
@@ -229,8 +228,11 @@ typedef struct BTreeControlBlock {                                 // fields specific to BTree CBs
        u_int32_t                                        numValidHints;         // Hint used to find correct record.
        u_int32_t                                       reservedNodes;
        BTreeIterator   iterator; // useable when holding exclusive b-tree lock
-} BTreeControlBlock, *BTreeControlBlockPtr;
 
+#if DEBUG
+       void                                            *madeDirtyBy[2];
+#endif
+} BTreeControlBlock, *BTreeControlBlockPtr;
 
 u_int32_t CalcKeySize(const BTreeControlBlock *btcb, const BTreeKey *key);
 #define CalcKeySize(btcb, key)                 ( ((btcb)->attributes & kBTBigKeysMask) ? ((key)->length16 + 2) : ((key)->length8 + 1) )
@@ -244,6 +246,13 @@ typedef enum {
                                        kBTHeaderDirty  = 0x00000001
 }      BTreeFlags;
 
+static inline void M_BTreeHeaderDirty(BTreeControlBlock *bt) {
+#if DEBUG
+       bt->madeDirtyBy[0] = __builtin_return_address(0);
+       bt->madeDirtyBy[1] = __builtin_return_address(1);
+#endif
+       bt->flags |= kBTHeaderDirty;
+}
 
 typedef        int8_t                          *NodeBuffer;
 typedef BlockDescriptor                 NodeRec, *NodePtr;             //\80\80 remove this someday...
index 30eb8a84eafd7952b2f0aca8638654a4d5c1d6d9..20d38dd9381d70f8b54c13d50ff4bd98c795bda0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -212,27 +212,84 @@ ReplaceBTreeRecord                                (FileReference                          refNum,
 /*     Prototypes for exported routines in VolumeAllocation.c*/
 
 /* 
- * Flags for BlockAllocate() and BlockDeallocate()
+ * Flags for BlockAllocate(), BlockDeallocate() and hfs_block_alloc.
+ * Some of these are for internal use only.  See the comment at the
+ * top of hfs_alloc_int for more details on the semantics of these
+ * flags.
  */ 
-#define HFS_ALLOC_FORCECONTIG          0x1     //force contiguous block allocation; minblocks must be allocated
-#define HFS_ALLOC_METAZONE                     0x2     //can use metazone blocks
-#define HFS_ALLOC_SKIPFREEBLKS         0x4     //skip checking/updating freeblocks during alloc/dealloc
-#define HFS_ALLOC_FLUSHTXN                     0x8     //pick best fit for allocation, even if a jnl flush is req'd
-
+#define HFS_ALLOC_FORCECONTIG          0x001   //force contiguous block allocation; minblocks must be allocated
+#define HFS_ALLOC_METAZONE                     0x002   //can use metazone blocks
+#define HFS_ALLOC_SKIPFREEBLKS         0x004   //skip checking/updating freeblocks during alloc/dealloc
+#define HFS_ALLOC_FLUSHTXN                     0x008   //pick best fit for allocation, even if a jnl flush is req'd
+#define HFS_ALLOC_TENTATIVE                    0x010   //reserved allocation that can be claimed back
+#define HFS_ALLOC_LOCKED                       0x020   //reserved allocation that can't be claimed back
+#define HFS_ALLOC_IGNORE_TENTATIVE     0x040   //Steal tentative blocks if necessary
+#define HFS_ALLOC_IGNORE_RESERVED      0x080   //Ignore tentative/committed blocks
+#define HFS_ALLOC_USE_TENTATIVE                0x100   //Use the supplied tentative range (if possible)
+#define HFS_ALLOC_COMMIT                       0x200   //Commit the supplied extent to disk
+#define HFS_ALLOC_TRY_HARD                     0x400   //Search hard to try and get maxBlocks; implies HFS_ALLOC_FLUSHTXN
+#define HFS_ALLOC_ROLL_BACK                    0x800   //Reallocate blocks that were just deallocated
+#define HFS_ALLOC_FAST_DEV          0x1000  //Prefer fast device for allocation
+
+typedef uint32_t hfs_block_alloc_flags_t;
+
+struct rl_entry;
 EXTERN_API_C( OSErr )
-BlockAllocate                                  (ExtendedVCB *                  vcb,
-                                                                u_int32_t                              startingBlock,
-                                                                u_int32_t                              minBlocks,
-                                                                u_int32_t                              maxBlocks,
-                                                                u_int32_t                              flags,
-                                                                u_int32_t *                    startBlock,
-                                                                u_int32_t *                    actualBlocks);
+BlockAllocate                                  (ExtendedVCB *                   vcb,
+                                                                u_int32_t                               startingBlock,
+                                                                u_int32_t                               minBlocks,
+                                                                u_int32_t                               maxBlocks,
+                                                                hfs_block_alloc_flags_t flags,
+                                                                u_int32_t *                     startBlock,
+                                                                u_int32_t *                     actualBlocks);
+
+typedef struct hfs_alloc_extra_args {
+       // Used with HFS_ALLOC_TRY_HARD and HFS_ALLOC_FORCECONTIG
+       uint32_t                                max_blocks;
+
+       // Used with with HFS_ALLOC_USE_TENTATIVE & HFS_ALLOC_COMMIT
+       struct rl_entry           **reservation_in;
+
+       // Used with HFS_ALLOC_TENTATIVE & HFS_ALLOC_LOCKED
+       struct rl_entry           **reservation_out;
+
+       /*
+        * If the maximum cannot be returned, the allocation will be
+        * trimmed to the specified alignment after taking
+        * @alignment_offset into account.  @alignment and
+        * @alignment_offset are both in terms of blocks, *not* bytes.
+        * The result will be such that:
+        *
+        *   (block_count + @alignment_offset) % @alignment == 0
+        *
+        * Alignment is *not* guaranteed.
+        *
+        * One example where alignment might be useful is in the case
+        * where the page size is greater than the allocation block size
+        * and I/O is being performed in multiples of the page size.
+        */
+       int                                             alignment;
+       int                                             alignment_offset;
+} hfs_alloc_extra_args_t;
+
+/*
+ * Same as BlockAllocate but slightly different API.
+ * @extent.startBlock is a hint for where to start searching and
+ * @extent.blockCount is the minimum number of blocks acceptable.
+ * Additional arguments can be passed in @extra_args and use will
+ * depend on @flags.  See comment at top of hfs_block_alloc_int for
+ * more information.
+ */
+errno_t hfs_block_alloc(hfsmount_t *hfsmp,
+                                               HFSPlusExtentDescriptor *extent,
+                                               hfs_block_alloc_flags_t flags,
+                                               hfs_alloc_extra_args_t *extra_args);
 
 EXTERN_API_C( OSErr )
-BlockDeallocate                                        (ExtendedVCB *                  vcb,
-                                                                u_int32_t                              firstBlock,
-                                                                u_int32_t                              numBlocks,
-                                                                u_int32_t                              flags);
+BlockDeallocate                                        (ExtendedVCB *                   vcb,
+                                                                u_int32_t                               firstBlock,
+                                                                u_int32_t                               numBlocks,
+                                                                hfs_block_alloc_flags_t flags);
 
 EXTERN_API_C ( void )
 ResetVCBFreeExtCache(struct hfsmount *hfsmp);
@@ -261,6 +318,9 @@ hfs_init_summary (struct hfsmount *hfsmp);
 errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
                                                          void (*callback)(void *data, off_t), void *callback_arg);
 
+void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation);
+void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation);
+
 /*     File Extent Mapping routines*/
 EXTERN_API_C( OSErr )
 FlushExtentFile                                        (ExtendedVCB *                  vcb);
@@ -275,6 +335,15 @@ EXTERN_API_C( int32_t )
 CompareExtentKeysPlus                  (const HFSPlusExtentKey *searchKey,
                                                                 const HFSPlusExtentKey *trialKey);
 
+OSErr SearchExtentFile(ExtendedVCB                     *vcb,
+                                          const FCB                    *fcb,
+                                          int64_t                               filePosition,
+                                          HFSPlusExtentKey             *foundExtentKey,
+                                          HFSPlusExtentRecord   foundExtentData,
+                                          u_int32_t                    *foundExtentDataIndex,
+                                          u_int32_t                    *extentBTreeHint,
+                                          u_int32_t                    *endingFABNPlusOne );
+
 EXTERN_API_C( OSErr )
 TruncateFileC (ExtendedVCB *vcb, FCB *fcb, int64_t peof, int deleted, 
                           int rsrc, uint32_t fileid, Boolean truncateToExtent);
@@ -307,8 +376,6 @@ NodesAreContiguous                          (ExtendedVCB *                  vcb,
                                                                 u_int32_t                              nodeSize);
 #endif
 
-
-
 /*     Get the current time in UTC (GMT)*/
 EXTERN_API_C( u_int32_t )
 GetTimeUTC                                             (void);
index 0a1b412b6b7223ec88d769463aa2fb161941b644..81b384c480c26c60373147093d660ab17015965a 100644 (file)
 #include <sys/time.h>
 #include <sys/malloc.h>
 
+#if !RANGELIST_TEST
+#include <kern/debug.h>
+#endif
+
 #include "rangelist.h"
 
 static enum rl_overlaptype rl_scan_from(struct rl_head *rangelist, off_t start, off_t end, struct rl_entry **overlap, struct rl_entry *range);
@@ -67,8 +71,6 @@ rl_init(struct rl_head *rangelist)
     TAILQ_INIT(rangelist);
 }
 
-
-
 /*
  * Add a range to the list
  */
@@ -177,7 +179,7 @@ rl_remove(off_t start, off_t end, struct rl_head *rangelist)
        if (TAILQ_EMPTY(rangelist)) {
                return;
        };
-        
+
        range = TAILQ_FIRST(rangelist);
        while ((ovcase = rl_scan_from(rangelist, start, end, &overlap, range))) {
                switch (ovcase) {
@@ -257,16 +259,53 @@ rl_scan(struct rl_head *rangelist,
                off_t start,
                off_t end,
                struct rl_entry **overlap) {
-               
-       if (TAILQ_EMPTY(rangelist)) {
-               *overlap = NULL;
-               return RL_NOOVERLAP;
-       };
-        
+
        return rl_scan_from(rangelist, start, end, overlap, TAILQ_FIRST(rangelist));    
 }
 
+enum rl_overlaptype
+rl_overlap(const struct rl_entry *range, off_t start, off_t end)
+{
+       /*
+        * OK, check for overlap
+        *
+        * Six cases:
+        *      0) no overlap (RL_NOOVERLAP)
+        *      1) overlap == range (RL_MATCHINGOVERLAP)
+        *      2) overlap contains range (RL_OVERLAPCONTAINSRANGE)
+        *      3) range contains overlap (RL_OVERLAPISCONTAINED)
+        *      4) overlap starts before range (RL_OVERLAPSTARTSBEFORE)
+        *      5) overlap ends after range (RL_OVERLAPENDSAFTER)
+        */
+       if (start > range->rl_end || range->rl_start > end) {
+               /* Case 0 (RL_NOOVERLAP) */
+               return RL_NOOVERLAP;
+       }
+
+       if (range->rl_start == start && range->rl_end == end) {
+               /* Case 1 (RL_MATCHINGOVERLAP) */
+               return RL_MATCHINGOVERLAP;
+       }
+
+       if (range->rl_start <= start && range->rl_end >= end) {
+               /* Case 2 (RL_OVERLAPCONTAINSRANGE) */
+               return RL_OVERLAPCONTAINSRANGE;
+       }
 
+       if (start <= range->rl_start && end >= range->rl_end) {
+               /* Case 3 (RL_OVERLAPISCONTAINED) */
+               return RL_OVERLAPISCONTAINED;
+       }
+
+       if (range->rl_start < start && range->rl_end < end) {
+               /* Case 4 (RL_OVERLAPSTARTSBEFORE) */
+               return RL_OVERLAPSTARTSBEFORE;
+       }
+
+       /* Case 5 (RL_OVERLAPENDSAFTER) */
+       // range->rl_start > start && range->rl_end > end
+       return RL_OVERLAPENDSAFTER;
+}
 
 /*
  * Walk the list of ranges for an entry to
@@ -276,88 +315,29 @@ rl_scan(struct rl_head *rangelist,
  *          There may be more than one.
  */
 static enum rl_overlaptype
-rl_scan_from(struct rl_head *rangelist,
+rl_scan_from(struct rl_head *rangelist __unused,
                         off_t start,
                         off_t end,
                         struct rl_entry **overlap,
-                       struct rl_entry *range)
+                        struct rl_entry *range)
 {
-       if (TAILQ_EMPTY(rangelist)) {
-               *overlap = NULL;
-               return RL_NOOVERLAP;
-       };
-        
 #ifdef RL_DIAGNOSTIC
-               rl_verify(rangelist);
+       rl_verify(rangelist);
 #endif
 
-       *overlap = range;
-        
-       while (1) {
-               /*
-                * OK, check for overlap
-                *
-                * Six cases:
-                *      0) no overlap (RL_NOOVERLAP)
-                *      1) overlap == range (RL_MATCHINGOVERLAP)
-                *      2) overlap contains range (RL_OVERLAPCONTAINSRANGE)
-                *      3) range contains overlap (RL_OVERLAPISCONTAINED)
-                *      4) overlap starts before range (RL_OVERLAPSTARTSBEFORE)
-                *      5) overlap ends after range (RL_OVERLAPENDSAFTER)
-                */
-               if (((range->rl_end != RL_INFINITY) && (start > range->rl_end)) ||
-                       ((end != RL_INFINITY) && (range->rl_start > end))) {
-                       /* Case 0 (RL_NOOVERLAP), at least with the current entry: */
-                       if ((end != RL_INFINITY) && (range->rl_start > end)) {
-                               return RL_NOOVERLAP;
-                       };
-                       
-                       /* Check the other entries in the list: */
-                       range = TAILQ_NEXT(range, rl_link);
+       while (range) {
+               enum rl_overlaptype ot = rl_overlap(range, start, end);
+
+               if (ot != RL_NOOVERLAP || range->rl_start > end) {
                        *overlap = range;
-                       if (range == NULL)
-                               return RL_NOOVERLAP;
-                       
-                       continue;
-               }
-               
-               if ((range->rl_start == start) && (range->rl_end == end)) {
-                       /* Case 1 (RL_MATCHINGOVERLAP) */
-                       return RL_MATCHINGOVERLAP;
-               }
-               
-               if ((range->rl_start <= start) &&
-                       (end != RL_INFINITY) &&
-                       ((range->rl_end >= end) || (range->rl_end == RL_INFINITY))) {
-                               /* Case 2 (RL_OVERLAPCONTAINSRANGE) */
-                       return RL_OVERLAPCONTAINSRANGE;
-               }
-               
-               if ((start <= range->rl_start) &&
-                       ((end == RL_INFINITY) ||
-                        ((range->rl_end != RL_INFINITY) && (end >= range->rl_end)))) {
-                       /* Case 3 (RL_OVERLAPISCONTAINED) */
-                       return RL_OVERLAPISCONTAINED;
-               }
-               
-               if ((range->rl_start < start) &&
-                       ((range->rl_end >= start) || (range->rl_end == RL_INFINITY))) {
-                       /* Case 4 (RL_OVERLAPSTARTSBEFORE) */
-                       return RL_OVERLAPSTARTSBEFORE;
-               }
-               
-               if ((range->rl_start > start) &&
-                       (end != RL_INFINITY) &&
-                       ((range->rl_end > end) || (range->rl_end == RL_INFINITY))) {
-                       /* Case 5 (RL_OVERLAPENDSAFTER) */
-                       return RL_OVERLAPENDSAFTER;
+                       return ot;
                }
 
-               /* Control should never reach here... */
-#ifdef RL_DIAGNOSTIC
-               panic("hfs: rl_scan_from: unhandled overlap condition?!");
-#endif
+               range = TAILQ_NEXT(range, rl_link);
        }
+
+       *overlap = NULL;
+       return RL_NOOVERLAP;
 }
 
 
@@ -421,6 +401,38 @@ void rl_remove_all(struct rl_head *rangelist)
        TAILQ_INIT(rangelist);
 }
 
+/*
+ * In the case where b is contained by a, we return the the largest part
+ * remaining.  The result is stored in a.
+ */
+void rl_subtract(struct rl_entry *a, const struct rl_entry *b)
+{
+       switch (rl_overlap(b, a->rl_start, a->rl_end)) {
+               case RL_MATCHINGOVERLAP:
+               case RL_OVERLAPCONTAINSRANGE:
+                       a->rl_end = a->rl_start - 1;
+                       break;
+               case RL_OVERLAPISCONTAINED:
+                       // Keep the bigger part
+                       if (b->rl_start - a->rl_start >= a->rl_end - b->rl_end) {
+                               // Keep left
+                               a->rl_end = b->rl_start - 1;
+                       } else {
+                               // Keep right
+                               a->rl_start = b->rl_end + 1;
+                       }
+                       break;
+               case RL_OVERLAPSTARTSBEFORE:
+                       a->rl_start = b->rl_end + 1;
+                       break;
+               case RL_OVERLAPENDSAFTER:
+                       a->rl_end = b->rl_start - 1;
+                       break;
+               case RL_NOOVERLAP:
+                       break;
+       }
+}
+
 #else /* not HFS - temp workaround until 4277828 is fixed */
 /* stubs for exported routines that aren't present when we build kernel without HFS */
 
index 0f66d34c957690d1aacb310fa840aa1d27051e73..41708be5da34055d6faed119889fc5692278e134 100644 (file)
@@ -44,7 +44,7 @@ enum rl_overlaptype {
     RL_OVERLAPENDSAFTER                /* 5 */
 };
 
-#define RL_INFINITY ((off_t)-1)
+#define RL_INFINITY INT64_MAX
 
 TAILQ_HEAD(rl_head, rl_entry);
 
@@ -63,6 +63,22 @@ enum rl_overlaptype rl_scan(struct rl_head *rangelist,
                                                        off_t start,
                                                        off_t end,
                                                        struct rl_entry **overlap);
+enum rl_overlaptype rl_overlap(const struct rl_entry *range, 
+                                                          off_t start, off_t end);
+
+static __attribute__((pure)) inline
+off_t rl_len(const struct rl_entry *range)
+{
+       return range->rl_end - range->rl_start + 1;
+}
+
+void rl_subtract(struct rl_entry *a, const struct rl_entry *b);
+
+static inline struct rl_entry rl_make(off_t start, off_t end)
+{
+       return (struct rl_entry){ .rl_start = start, .rl_end = end };
+}
+
 __END_DECLS
 
 #endif /* __APPLE_API_PRIVATE */
index 7433ece891298666a62db8a18371b653fcd61bf1..6c5370018a9bac03cf810bc5e5a35068e5f4c873 100644 (file)
@@ -13,6 +13,9 @@ DATAFILES = \
        types.h vmparam.h _types.h _param.h \
        _mcontext.h
 
+PRIVATE_DATAFILES = \
+       disklabel.h
+
 KERNELFILES = \
        endian.h param.h \
        profile.h signal.h limits.h _limits.h \
@@ -21,7 +24,7 @@ KERNELFILES = \
 
 
 INSTALL_MD_LIST = ${DATAFILES}
-INSTALL_MD_LCL_LIST = ${DATAFILES} disklabel.h
+INSTALL_MD_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_MD_DIR = i386
 
index a8dc219324d46090f7ed65ccba6fed8997a456ff..94a32658430d3fd6d840a0c65efd29e663dbed5b 100644 (file)
@@ -36,7 +36,6 @@
 
 #include <kern/thread.h>
 
-extern void astbsd_on(void);
 extern void act_set_astbsd(thread_t);
 extern void bsd_ast(thread_t);
 
index b344c7a9de873f9bc3c50fb7ce813e5976df4ceb..d9b90aff2a4991dd1dcecc0310cadf3b853f8ac3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/flow_divert.h>       /* flow_divert_init() */
 #include <net/content_filter.h>                /* for cfil_init() */
 #include <net/necp.h>                  /* for necp_init() */
+#include <net/network_agent.h>         /* for netagent_init() */
 #include <net/packet_mangler.h>                /* for pkt_mnglr_init() */
 #include <net/if_utun.h>               /* for utun_register_control() */
-#include <net/if_ipsec.h>       /* for ipsec_register_control() */
+#include <net/if_ipsec.h>              /* for ipsec_register_control() */
 #include <net/net_str_id.h>            /* for net_str_id_init() */
 #include <net/netsrc.h>                        /* for netsrc_init() */
 #include <net/ntstat.h>                        /* for nstat_init() */
 #include <netinet/tcp_cc.h>                    /* for tcp_cc_init() */
+#include <netinet/mptcp_var.h>         /* for mptcp_control_register() */
 #include <kern/assert.h>               /* for assert() */
 #include <sys/kern_overrides.h>                /* for init_system_override() */
 
 #include <machine/pal_routines.h>
 #include <console/video_console.h>
 
+
 void * get_user_regs(thread_t);                /* XXX kludge for <machine/thread.h> */
 void IOKitInitializeTime(void);                /* XXX */
 void IOSleep(unsigned int);            /* XXX */
@@ -243,6 +246,11 @@ struct     kmemstats kmemstats[M_LAST];
 
 struct vnode *rootvp;
 int boothowto = RB_DEBUG;
+int minimalboot = 0;
+
+#if PROC_REF_DEBUG
+__private_extern__ int proc_ref_tracking_disabled = 0; /* disable panics on leaked proc refs across syscall boundary */
+#endif
 
 extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
 extern void IOSecureBSDRoot(const char * rootName);
@@ -271,6 +279,10 @@ void bsd_exec_setup(int);
 
 __private_extern__ int bootarg_vnode_cache_defeat = 0;
 
+#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
+__private_extern__ int bootarg_no_vnode_jetsam = 0;
+#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+
 /*
  * Prevent kernel-based ASLR from being used, for testing.
  */
@@ -288,12 +300,11 @@ void bsd_utaskbootstrap(void);
 static void parse_bsd_args(void);
 extern task_t bsd_init_task;
 extern boolean_t init_task_died;
-extern char    init_task_failure_data[];
 #if CONFIG_DEV_KMEM
 extern void dev_kmem_init(void);
 #endif
 extern void time_zone_slock_init(void);
-extern void select_wait_queue_init(void);
+extern void select_waitq_init(void);
 static void process_name(const char *, proc_t);
 
 static void setconf(void);
@@ -340,11 +351,8 @@ extern int check_policy_init(int);
 static void
 process_name(const char *s, proc_t p)
 {
-       size_t length = strlen(s);
-
-       bcopy(s, p->p_comm,
-               length >= sizeof(p->p_comm) ? sizeof(p->p_comm) :
-                       length + 1);
+       strlcpy(p->p_comm, s, sizeof(p->p_comm));
+       strlcpy(p->p_name, s, sizeof(p->p_name));
 }
 
 /* To allow these values to be patched, they're globals here */
@@ -500,7 +508,7 @@ bsd_init(void)
 
        /* Initialize System Override call */
        init_system_override();
-
+       
        /*
         * Create process 0.
         */
@@ -534,10 +542,6 @@ bsd_init(void)
        LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
        proc_list_unlock();
 
-#if CONFIG_LCTX
-       kernproc->p_lctx = NULL;
-#endif
-
        kernproc->task = kernel_task;
        
        kernproc->p_stat = SRUN;
@@ -644,7 +648,7 @@ bsd_init(void)
                                &minimum,
                                (vm_size_t)bsd_pageable_map_size,
                                TRUE,
-                               VM_FLAGS_ANYWHERE,
+                               VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_BSD),
                                &bsd_pageable_map);
                if (ret != KERN_SUCCESS) 
                        panic("bsd_init: Failed to allocate bsd pageable map");
@@ -750,8 +754,8 @@ bsd_init(void)
        psem_cache_init();
        bsd_init_kprintf("calling time_zone_slock_init\n");
        time_zone_slock_init();
-       bsd_init_kprintf("calling select_wait_queue_init\n");
-       select_wait_queue_init();
+       bsd_init_kprintf("calling select_waitq_init\n");
+       select_waitq_init();
 
        /*
         * Initialize protocols.  Block reception of incoming packets
@@ -857,7 +861,9 @@ bsd_init(void)
        /* Initialize Network Extension Control Policies */
        necp_init();
 #endif
-       
+
+       netagent_init();
+
        /* register user tunnel kernel control handler */
        utun_register_control();
 #if IPSEC
@@ -866,6 +872,9 @@ bsd_init(void)
        netsrc_init();
        nstat_init();
        tcp_cc_init();
+#if MPTCP
+       mptcp_control_register();
+#endif /* MPTCP */
 #endif /* NETWORKING */
 
        bsd_init_kprintf("calling vnode_pager_bootstrap\n");
@@ -966,7 +975,7 @@ bsd_init(void)
            devfs_kernel_mount(mounthere);
        }
 #endif /* DEVFS */
-       
+
        /* Initialize signal state for process 0. */
        bsd_init_kprintf("calling siginit\n");
        siginit(kernproc);
@@ -990,6 +999,7 @@ bsd_init(void)
        consider_zone_gc(FALSE);
 #endif
 
+
        bsd_init_kprintf("done\n");
 }
 
@@ -1015,7 +1025,6 @@ bsdinit_task(void)
 
        bsd_init_task = get_threadtask(thread);
        init_task_died = FALSE;
-       init_task_failure_data[0] = 0;
 
 #if CONFIG_MACF
        mac_cred_label_associate_user(p->p_ucred);
@@ -1103,7 +1112,7 @@ bsd_utaskbootstrap(void)
        ut = (struct uthread *)get_bsdthread_info(thread);
        ut->uu_sigmask = 0;
        act_set_astbsd(thread);
-       (void) thread_resume(thread);
+       proc_clear_return_wait(initproc, thread);
 }
 
 static void
@@ -1121,6 +1130,15 @@ parse_bsd_args(void)
        if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */
                boothowto |= RB_SAFEBOOT;
 
+       if (PE_parse_boot_argn("-minimalboot", namep, sizeof(namep))) {
+               /*
+                * -minimalboot indicates that we want userspace to be bootstrapped to a
+                * minimal environment.  What constitutes minimal is up to the bootstrap
+                * process.
+                */
+               minimalboot = 1;
+       }
+
 
        /* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */
        if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof (namep)))
@@ -1150,6 +1168,21 @@ parse_bsd_args(void)
        if (PE_parse_boot_argn("-novfscache", namep, sizeof(namep))) {
                nc_disabled = 1;
        }
+
+#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
+       if (PE_parse_boot_argn("-no_vnode_jetsam", namep, sizeof(namep)))
+                bootarg_no_vnode_jetsam = 1;
+#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+
+
+
+#if PROC_REF_DEBUG
+       if (PE_parse_boot_argn("-disable_procref_tracking", namep, sizeof(namep))) {
+               proc_ref_tracking_disabled = 1;
+       }
+#endif
+
+       PE_parse_boot_argn("sigrestrict", &sigrestrict_arg, sizeof(sigrestrict_arg));
 }
 
 void
index 648d6e30568d5c7d25ae437e4a79c81adfcb19df..f941c0128ce5336820d890885eec248928ebe51c 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <sys/time.h>
@@ -37,7 +37,7 @@
 #include <kern/assert.h>
 #include <sys/conf.h>
 #include <sys/proc_internal.h>
-#include <sys/buf.h>   /* for SET */
+#include <sys/buf.h> /* for SET */
 #include <sys/kernel.h>
 #include <sys/user.h>
 #include <sys/sysent.h>
@@ -48,29 +48,27 @@ extern int chrtoblk_set(int, int);
 extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int);
 
 /* XXX most of these just exist to export; there's no good header for them*/
-void   pcb_synch(void);
+void pcb_synch(void);
 
-TAILQ_HEAD(,devsw_lock) devsw_locks;
+TAILQ_HEAD(, devsw_lock) devsw_locks;
 lck_mtx_t devsw_lock_list_mtx;
-lck_grp_t *devsw_lock_grp;
+lck_grp_t * devsw_lock_grp;
 
 /* Just to satisfy pstat command */
-int     dmmin, dmmax, dmtext;
+int dmmin, dmmax, dmtext;
 
 vm_offset_t
-kmem_mb_alloc(vm_map_t  mbmap, int size, int physContig) 
+kmem_mb_alloc(vm_map_t mbmap, int size, int physContig)
 {
-        vm_offset_t addr = 0;
+       vm_offset_t addr = 0;
        kern_return_t kr = KERN_SUCCESS;
 
-       if(!physContig)
-               kr = kernel_memory_allocate(mbmap, &addr, size,
-                       0, KMA_NOPAGEWAIT|KMA_KOBJECT|KMA_LOMEM);
+       if (!physContig)
+               kr = kernel_memory_allocate(mbmap, &addr, size, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF);
        else
-               kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, 
-                       0xfffff, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM);
+               kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, 0xfffff, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF);
 
-       ifkr != KERN_SUCCESS)
+       if (kr != KERN_SUCCESS)
                addr = 0;
 
        return addr;
@@ -89,17 +87,17 @@ current_proc(void)
 {
        /* Never returns a NULL */
        struct uthread * ut;
-       struct proc *p; 
+       struct proc * p;
        thread_t thread = current_thread();
 
-       ut = (struct uthread *)get_bsdthread_info(thread); 
-       if (ut &&  (ut->uu_flag & UT_VFORK) && ut->uu_proc) {
+       ut = (struct uthread *)get_bsdthread_info(thread);
+       if (ut && (ut->uu_flag & UT_VFORK) && ut->uu_proc) {
                p = ut->uu_proc;
-               if ((p->p_lflag & P_LINVFORK) == 0) 
+               if ((p->p_lflag & P_LINVFORK) == 0)
                        panic("returning child proc not under vfork");
-               if (p->p_vforkact != (void *)thread) 
+               if (p->p_vforkact != (void *)thread)
                        panic("returning child proc which is not cur_act");
-               return(p);
+               return (p);
        }
 
        p = (struct proc *)get_bsdtask_info(current_task());
@@ -114,7 +112,7 @@ current_proc(void)
 
 struct bdevsw nobdev = NO_BDEVICE;
 struct cdevsw nocdev = NO_CDEVICE;
-/* 
+/*
  *     if index is -1, return a free slot if avaliable
  *       else see whether the index is free
  *     return the major number that is free else -1
@@ -126,32 +124,31 @@ struct cdevsw nocdev = NO_CDEVICE;
 int
 bdevsw_isfree(int index)
 {
-       struct bdevsw *devsw;
+       struct bdevsw * devsw;
 
        if (index < 0) {
-           if (index == -1)
-               index = 1;      /* start at 1 to avoid collision with volfs (Radar 2842228) */
-           else
-               index = -index; /* start at least this far up in the table */
-           devsw = &bdevsw[index];
-           for(; index < nblkdev; index++, devsw++) {
-               if(memcmp((char *)devsw, 
-                           (char *)&nobdev, 
-                           sizeof(struct bdevsw)) == 0)
-                   break;
-           }
+               if (index == -1)
+                       index = 1; /* start at 1 to avoid collision with volfs (Radar 2842228) */
+               else
+                       index = -index; /* start at least this far up in the table */
+               devsw = &bdevsw[index];
+               for (; index < nblkdev; index++, devsw++) {
+                       if (memcmp((char *)devsw, (char *)&nobdev, sizeof(struct bdevsw)) == 0)
+                               break;
+               }
        }
+
+       if (index < 0 || index >= nblkdev)
+               return (-1);
+
        devsw = &bdevsw[index];
-       if ((index < 0) || (index >= nblkdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)&nobdev, 
-                         sizeof(struct bdevsw)) != 0)) {
-               return(-1);
+       if ((memcmp((char *)devsw, (char *)&nobdev, sizeof(struct bdevsw)) != 0)) {
+               return (-1);
        }
-       return(index);
+       return (index);
 }
 
-/* 
+/*
  *     if index is -1, find a free slot to add
  *       else see whether the slot is free
  *     return the major number that is used else -1
@@ -161,36 +158,36 @@ bdevsw_isfree(int index)
  *     instead of starting at 0
  */
 int
-bdevsw_add(int index, struct bdevsw * bsw) 
+bdevsw_add(int index, struct bdevsw * bsw)
 {
        index = bdevsw_isfree(index);
        if (index < 0) {
-               return(-1);
+               return (-1);
        }
        bdevsw[index] = *bsw;
-       return(index);
+       return (index);
 }
 /*
  *     if the slot has the same bsw, then remove
  *     else -1
  */
 int
-bdevsw_remove(int index, struct bdevsw * bsw) 
+bdevsw_remove(int index, struct bdevsw * bsw)
 {
-       struct bdevsw *devsw;
+       struct bdevsw * devsw;
+
+       if (index < 0 || index >= nblkdev)
+               return (-1);
 
        devsw = &bdevsw[index];
-       if ((index < 0) || (index >= nblkdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)bsw, 
-                         sizeof(struct bdevsw)) != 0)) {
-               return(-1);
+       if ((memcmp((char *)devsw, (char *)bsw, sizeof(struct bdevsw)) != 0)) {
+               return (-1);
        }
        bdevsw[index] = nobdev;
-       return(index);
+       return (index);
 }
 
-/* 
+/*
  *     if index is -1, return a free slot if avaliable
  *       else see whether the index is free
  *     return the major number that is free else -1
@@ -202,32 +199,31 @@ bdevsw_remove(int index, struct bdevsw * bsw)
 int
 cdevsw_isfree(int index)
 {
-       struct cdevsw *devsw;
+       struct cdevsw * devsw;
 
        if (index < 0) {
-           if (index == -1)
-               index = 0;
-           else
-               index = -index; /* start at least this far up in the table */
-           devsw = &cdevsw[index];
-           for(; index < nchrdev; index++, devsw++) {
-               if(memcmp((char *)devsw, 
-                           (char *)&nocdev, 
-                           sizeof(struct cdevsw)) == 0)
-                   break;
-           }
+               if (index == -1)
+                       index = 0;
+               else
+                       index = -index; /* start at least this far up in the table */
+               devsw = &cdevsw[index];
+               for (; index < nchrdev; index++, devsw++) {
+                       if (memcmp((char *)devsw, (char *)&nocdev, sizeof(struct cdevsw)) == 0)
+                               break;
+               }
        }
+
+       if (index < 0 || index >= nchrdev)
+               return (-1);
+
        devsw = &cdevsw[index];
-       if ((index < 0) || (index >= nchrdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)&nocdev, 
-                         sizeof(struct cdevsw)) != 0)) {
-               return(-1);
+       if ((memcmp((char *)devsw, (char *)&nocdev, sizeof(struct cdevsw)) != 0)) {
+               return (-1);
        }
-       return(index);
+       return (index);
 }
 
-/* 
+/*
  *     if index is -1, find a free slot to add
  *       else see whether the slot is free
  *     return the major number that is used else -1
@@ -242,34 +238,34 @@ cdevsw_isfree(int index)
  *             before them.  -24 is currently a safe starting point.
  */
 int
-cdevsw_add(int index, struct cdevsw * csw) 
+cdevsw_add(int index, struct cdevsw * csw)
 {
        index = cdevsw_isfree(index);
        if (index < 0) {
-               return(-1);
+               return (-1);
        }
        cdevsw[index] = *csw;
-       return(index);
+       return (index);
 }
 /*
  *     if the slot has the same csw, then remove
  *     else -1
  */
 int
-cdevsw_remove(int index, struct cdevsw * csw) 
+cdevsw_remove(int index, struct cdevsw * csw)
 {
-       struct cdevsw *devsw;
+       struct cdevsw * devsw;
+
+       if (index < 0 || index >= nchrdev)
+               return (-1);
 
        devsw = &cdevsw[index];
-       if ((index < 0) || (index >= nchrdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)csw, 
-                         sizeof(struct cdevsw)) != 0)) {
-               return(-1);
+       if ((memcmp((char *)devsw, (char *)csw, sizeof(struct cdevsw)) != 0)) {
+               return (-1);
        }
        cdevsw[index] = nocdev;
        cdevsw_flags[index] = 0;
-       return(index);
+       return (index);
 }
 
 static int
@@ -278,7 +274,7 @@ cdev_set_bdev(int cdev, int bdev)
        return (chrtoblk_set(cdev, bdev));
 }
 
-int  
+int
 cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev)
 {
        index = cdevsw_add(index, csw);
@@ -293,17 +289,17 @@ cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev)
 }
 
 int
-cdevsw_setkqueueok(int index, struct cdevsw *csw, int use_offset)
+cdevsw_setkqueueok(int index, struct cdevsw * csw, int use_offset)
 {
-       struct cdevsw *devsw;
+       struct cdevsw * devsw;
        uint64_t flags = CDEVSW_SELECT_KQUEUE;
 
+       if (index < 0 || index >= nchrdev)
+               return (-1);
+
        devsw = &cdevsw[index];
-       if ((index < 0) || (index >= nchrdev) ||
-           (memcmp((char *)devsw, 
-                         (char *)csw, 
-                         sizeof(struct cdevsw)) != 0)) {
-               return(-1);
+       if ((memcmp((char *)devsw, (char *)csw, sizeof(struct cdevsw)) != 0)) {
+               return (-1);
        }
 
        if (use_offset) {
@@ -314,19 +310,19 @@ cdevsw_setkqueueok(int index, struct cdevsw *csw, int use_offset)
        return 0;
 }
 
-#include <pexpert/pexpert.h>   /* for PE_parse_boot_arg */
+#include <pexpert/pexpert.h> /* for PE_parse_boot_arg */
 
 /*
  * Copy the "hostname" variable into a caller-provided buffer
  * Returns: 0 for success, ENAMETOOLONG for insufficient buffer space.
- * On success, "len" will be set to the number of characters preceding 
+ * On success, "len" will be set to the number of characters preceding
  * the NULL character in the hostname.
  */
 int
-bsd_hostname(char *buf, int bufsize, int *len)
+bsd_hostname(char * buf, int bufsize, int * len)
 {
        /*
-        * "hostname" is null-terminated, and "hostnamelen" is equivalent to strlen(hostname).
+        * "hostname" is null-terminated, and "hostnamelen" is equivalent to strlen(hostname).
         */
        if (hostnamelen < bufsize) {
                strlcpy(buf, hostname, bufsize);
@@ -334,7 +330,7 @@ bsd_hostname(char *buf, int bufsize, int *len)
                return 0;
        } else {
                return ENAMETOOLONG;
-       }    
+       }
 }
 
 void
@@ -343,19 +339,20 @@ devsw_lock(dev_t dev, int mode)
        devsw_lock_t newlock, tmplock;
        int res;
 
-       assert(0 <= major(dev) && major(dev) < nchrdev);        
+       assert(0 <= major(dev) && major(dev) < nchrdev);
        assert(mode == S_IFCHR || mode == S_IFBLK);
 
        MALLOC(newlock, devsw_lock_t, sizeof(struct devsw_lock), M_TEMP, M_WAITOK | M_ZERO);
        newlock->dl_dev = dev;
        newlock->dl_thread = current_thread();
        newlock->dl_mode = mode;
-       
+
        lck_mtx_lock_spin(&devsw_lock_list_mtx);
 retry:
-       TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) {
+       TAILQ_FOREACH(tmplock, &devsw_locks, dl_list)
+       {
                if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {
-                       res = msleep(tmplock, &devsw_lock_list_mtx, PVFS, "devsw_lock", NULL);  
+                       res = msleep(tmplock, &devsw_lock_list_mtx, PVFS, "devsw_lock", NULL);
                        assert(res == 0);
                        goto retry;
                }
@@ -363,19 +360,19 @@ retry:
 
        TAILQ_INSERT_TAIL(&devsw_locks, newlock, dl_list);
        lck_mtx_unlock(&devsw_lock_list_mtx);
-
 }
 void
 devsw_unlock(dev_t dev, int mode)
 {
        devsw_lock_t tmplock;
 
-       assert(0 <= major(dev) && major(dev) < nchrdev);        
+       assert(0 <= major(dev) && major(dev) < nchrdev);
 
        lck_mtx_lock_spin(&devsw_lock_list_mtx);
 
-       TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) {
-               if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {       
+       TAILQ_FOREACH(tmplock, &devsw_locks, dl_list)
+       {
+               if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {
                        break;
                }
        }
@@ -390,9 +387,9 @@ devsw_unlock(dev_t dev, int mode)
 
        wakeup(tmplock);
        TAILQ_REMOVE(&devsw_locks, tmplock, dl_list);
-       
+
        lck_mtx_unlock(&devsw_lock_list_mtx);
-       
+
        FREE(tmplock, M_TEMP);
 }
 
index ce43a47858b32da5021c7dfdff154392879368e9..5c71793ad24b61ce873328f2607cbf073c62e8a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -64,18 +64,33 @@ baseName(const char *path)
     return ret;
 }
 
+static char*
+vnpath(vnode_t vp, char *path, int len)
+{
+    int origlen = len;
+    path[0] = 0;
+    vn_getpath(vp, path, &len);
+    path[origlen - 1] = 0;
+    return path;
+}
+
 #define ErrorLog(x, args...) printf("%s:%d:%s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, ## args)
+#define ErrorLogWithPath(x, args...) do { char *path; MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK); printf("%s:%d:%s: %s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, vnpath(vp, path, PATH_MAX), ## args); FREE(path, M_TEMP); } while(0)
 
 #if COMPRESSION_DEBUG
 #define DebugLog ErrorLog
+#define DebugLogWithPath ErrorLogWithPath
 #else
 #define DebugLog(x...) do { } while(0)
+#define DebugLogWithPath(x...) do { } while(0)
 #endif
 
 #if COMPRESSION_DEBUG_VERBOSE
 #define VerboseLog ErrorLog
+#define VerboseLogWithPath ErrorLogWithPath
 #else
 #define VerboseLog(x...) do { } while(0)
+#define VerboseLogWithPath(x...) do { } while(0)
 #endif
 
 #if MALLOC_DEBUG
@@ -197,7 +212,7 @@ extern boolean_t IOServiceWaitForMatchingResource( const char * property, uint64
 extern boolean_t IOCatalogueMatchingDriversPresent( const char * property );
 
 static void *
-_decmp_get_func(uint32_t type, uintptr_t offset)
+_decmp_get_func(vnode_t vp, uint32_t type, uintptr_t offset)
 {
        /*
         this function should be called while holding a shared lock to decompressorsLock,
@@ -220,7 +235,7 @@ _decmp_get_func(uint32_t type, uintptr_t offset)
         char resourceName[80];
         uint64_t delay = 10000000ULL; // 10 milliseconds.
         snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type);
-        printf("waiting for %s\n", resourceName);
+        ErrorLogWithPath("waiting for %s\n", resourceName);
         while(decompressors[type] == NULL) {
             lck_rw_unlock_shared(decompressorsLock); // we have to unlock to allow the kext to register
             if (IOServiceWaitForMatchingResource(resourceName, delay)) {
@@ -229,17 +244,17 @@ _decmp_get_func(uint32_t type, uintptr_t offset)
             }
             if (!IOCatalogueMatchingDriversPresent(providesName)) {
                 // 
-                printf("the kext with %s is no longer present\n", providesName);
+                ErrorLogWithPath("the kext with %s is no longer present\n", providesName);
                 lck_rw_lock_shared(decompressorsLock);
                 break;
             }
-            printf("still waiting for %s\n", resourceName);
+            ErrorLogWithPath("still waiting for %s\n", resourceName);
             delay *= 2;
             lck_rw_lock_shared(decompressorsLock);
         }
         // IOKit says the kext is loaded, so it should be registered too!
         if (decompressors[type] == NULL) {
-            ErrorLog("we found %s, but the type still isn't registered\n", providesName);
+            ErrorLogWithPath("we found %s, but the type still isn't registered\n", providesName);
             return NULL;
         }
         // it's now registered, so let's return the function
@@ -247,25 +262,15 @@ _decmp_get_func(uint32_t type, uintptr_t offset)
     }
     
        // the compressor hasn't registered, so it never will unless someone manually kextloads it
-       ErrorLog("tried to access a compressed file of unregistered type %d\n", type);
+       ErrorLogWithPath("tried to access a compressed file of unregistered type %d\n", type);
        return NULL;
 }
 
-#define decmp_get_func(type, func) ((typeof(((decmpfs_registration*)NULL)->func))_decmp_get_func(type, offsetof_func(func)))
+#define decmp_get_func(vp, type, func) ((typeof(((decmpfs_registration*)NULL)->func))_decmp_get_func(vp, type, offsetof_func(func)))
 
 #pragma mark --- utilities ---
 
 #if COMPRESSION_DEBUG
-static char*
-vnpath(vnode_t vp, char *path, int len)
-{
-    int origlen = len;
-    path[0] = 0;
-    vn_getpath(vp, path, &len);
-    path[origlen - 1] = 0;
-    return path;
-}
-
 static int
 vnsize(vnode_t vp, uint64_t *size)
 {
@@ -274,7 +279,7 @@ vnsize(vnode_t vp, uint64_t *size)
     VATTR_WANTED(&va, va_data_size);
        int error = vnode_getattr(vp, &va, decmpfs_ctx);
     if (error != 0) {
-        ErrorLog("vnode_getattr err %d\n", error);
+        ErrorLogWithPath("vnode_getattr err %d\n", error);
         return error;
     }
     *size = va.va_data_size;
@@ -499,7 +504,7 @@ decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **
     }
     
     if (hdr->compression_magic != DECMPFS_MAGIC) {
-        ErrorLog("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC);
+        ErrorLogWithPath("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC);
         err = EINVAL;
                goto out;
     }
@@ -509,7 +514,7 @@ decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **
             /* return the header even though the type is out of range */
             err = ERANGE;
         } else {
-            ErrorLog("compression_type %d out of range\n", hdr->compression_type);
+            ErrorLogWithPath("compression_type %d out of range\n", hdr->compression_type);
             err = EINVAL;
         }
                goto out;
@@ -517,7 +522,7 @@ decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **
        
 out:
     if (err && (err != ERANGE)) {
-        DebugLog("err %d\n", err);
+        DebugLogWithPath("err %d\n", err);
         if (data) FREE(data, M_TEMP);
         *hdrOut = NULL;
     } else {
@@ -597,11 +602,11 @@ decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp)
     }
     
     lck_rw_lock_shared(decompressorsLock);
-    decmpfs_validate_compressed_file_func validate = decmp_get_func(hdr->compression_type, validate);
+    decmpfs_validate_compressed_file_func validate = decmp_get_func(vp, hdr->compression_type, validate);
     if (validate) {    /* make sure this validation function is valid */
         /* is the data okay? */
                err = validate(vp, decmpfs_ctx, hdr);
-    } else if (decmp_get_func(hdr->compression_type, fetch) == NULL) {
+    } else if (decmp_get_func(vp, hdr->compression_type, fetch) == NULL) {
         /* the type isn't registered */
         err = EIO;
     } else {
@@ -613,7 +618,7 @@ out:
     if (hdr) FREE(hdr, M_TEMP);
 #if COMPRESSION_DEBUG
     if (err) {
-        DebugLog("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag);
+        DebugLogWithPath("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag);
     }
 #endif
     return err;
@@ -664,7 +669,7 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp)
             break;
         default:
             /* unknown state, assume file is not compressed */
-            ErrorLog("unknown cmp_state %d\n", cmp_state);
+            ErrorLogWithPath("unknown cmp_state %d\n", cmp_state);
             return 0;
     }
     
@@ -748,7 +753,7 @@ done:
             
             /* update the decompression flags in the decmpfs cnode */
             lck_rw_lock_shared(decompressorsLock);
-            decmpfs_get_decompression_flags_func get_flags = decmp_get_func(hdr->compression_type, get_flags);
+            decmpfs_get_decompression_flags_func get_flags = decmp_get_func(vp, hdr->compression_type, get_flags);
             if (get_flags) {
                 decompression_flags = get_flags(vp, decmpfs_ctx, hdr);
             }
@@ -772,7 +777,7 @@ done:
                        return 1;
         default:
             /* unknown state, assume file is not compressed */
-            ErrorLog("unknown ret %d\n", ret);
+            ErrorLogWithPath("unknown ret %d\n", ret);
             return 0;
     }
 }
@@ -887,12 +892,12 @@ decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr)
        
        if (ctx == decmpfs_ctx)
                return 0;
-       if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, 22) == 0)
+       if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME) - 1) == 0)
                return decmpfs_hides_rsrc(ctx, cp);
        if (!decmpfs_fast_file_is_compressed(cp))
     /* file is not compressed, so don't hide this xattr */
                return 0;
-       if (strncmp(xattr, DECMPFS_XATTR_NAME, 11) == 0)
+       if (strncmp(xattr, DECMPFS_XATTR_NAME, sizeof(DECMPFS_XATTR_NAME) - 1) == 0)
     /* it's our xattr, so hide it */
                return 1;
        /* don't hide this xattr */
@@ -965,14 +970,14 @@ out:
 }
 
 static int
-compression_type_valid(decmpfs_header *hdr)
+compression_type_valid(vnode_t vp, decmpfs_header *hdr)
 {
     /* fast pre-check to determine if the given compressor has checked in */
     int ret = 0;
     
     /* every compressor must have at least a fetch function */
     lck_rw_lock_shared(decompressorsLock);
-    if (decmp_get_func(hdr->compression_type, fetch) != NULL) {
+    if (decmp_get_func(vp, hdr->compression_type, fetch) != NULL) {
         ret = 1;
     }
     lck_rw_unlock_shared(decompressorsLock);
@@ -1012,7 +1017,7 @@ decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_cnode *cp, decmpfs_header *h
     }
     
     lck_rw_lock_shared(decompressorsLock);
-    decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(hdr->compression_type, fetch);
+    decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(vp, hdr->compression_type, fetch);
     if (fetch) {
                err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read);
                lck_rw_unlock_shared(decompressorsLock);
@@ -1050,7 +1055,7 @@ commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abo
         VerboseLog("aborting upl, flags 0x%08x\n", flags);
                kr = ubc_upl_abort_range(upl, pl_offset, uplSize, flags);
         if (kr != KERN_SUCCESS)
-            ErrorLog("ubc_upl_commit_range error %d\n", (int)kr);
+            ErrorLog("ubc_upl_abort_range error %d\n", (int)kr);
     } else {
         VerboseLog("committing upl, flags 0x%08x\n", flags | UPL_COMMIT_CLEAR_DIRTY);
                kr = ubc_upl_commit_range(upl, pl_offset, uplSize, flags | UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_WRITTEN_BY_KERNEL);
@@ -1067,7 +1072,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp
     /* handles a page-in request from vfs for a compressed file */
     
     int err                      = 0;
-    struct vnode *vp             = ap->a_vp;
+    vnode_t vp                   = ap->a_vp;
     upl_t pl                     = ap->a_pl;
        upl_offset_t pl_offset       = ap->a_pl_offset;
     off_t f_offset               = ap->a_f_offset;
@@ -1088,7 +1093,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp
     
        
        if (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)) {
-               DebugLog("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)));
+               DebugLogWithPath("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)));
        }
     
     err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
@@ -1098,7 +1103,7 @@ decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmp
        
     cachedSize = hdr->uncompressed_size;
     
-    if (!compression_type_valid(hdr)) {
+    if (!compression_type_valid(vp, hdr)) {
         /* compressor not registered */
         err = ENOTSUP;
         goto out;
@@ -1138,7 +1143,7 @@ decompress:
     
     uint64_t did_read = 0;
        if (decmpfs_fast_get_state(cp) == FILE_IS_CONVERTING) {
-               ErrorLog("unexpected pagein during decompress\n");
+               ErrorLogWithPath("unexpected pagein during decompress\n");
                /*
                 if the file is converting, this must be a recursive call to pagein from underneath a call to decmpfs_decompress_file;
                 pretend that it succeeded but don't do anything since we're just going to write over the pages anyway
@@ -1149,19 +1154,19 @@ decompress:
         err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, uplPos, uplSize, 1, &vec, &did_read);
        }
     if (err) {
-        DebugLog("decmpfs_fetch_uncompressed_data err %d\n", err);
+        DebugLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err);
         int cmp_state = decmpfs_fast_get_state(cp);
         if (cmp_state == FILE_IS_CONVERTING) {
-            DebugLog("cmp_state == FILE_IS_CONVERTING\n");
+            DebugLogWithPath("cmp_state == FILE_IS_CONVERTING\n");
             cmp_state = wait_for_decompress(cp);
             if (cmp_state == FILE_IS_COMPRESSED) {
-                DebugLog("cmp_state == FILE_IS_COMPRESSED\n");
+                DebugLogWithPath("cmp_state == FILE_IS_COMPRESSED\n");
                 /* a decompress was attempted but it failed, let's try calling fetch again */
                 goto decompress;
             }
         }
         if (cmp_state == FILE_IS_NOT_COMPRESSED) {
-            DebugLog("cmp_state == FILE_IS_NOT_COMPRESSED\n");
+            DebugLogWithPath("cmp_state == FILE_IS_NOT_COMPRESSED\n");
             /* the file was decompressed after we started reading it */
             abort_pagein = 1;   /* we're not going to commit our data */
             *is_compressed = 0; /* instruct caller to fall back to its normal path */
@@ -1180,7 +1185,7 @@ decompress:
  
        kr = ubc_upl_unmap(pl); data = NULL; /* make sure to set data to NULL so we don't try to unmap again below */
     if (kr != KERN_SUCCESS)
-        ErrorLog("ubc_upl_unmap error %d\n", (int)kr);
+        ErrorLogWithPath("ubc_upl_unmap error %d\n", (int)kr);
     else {
         if (!abort_pagein) {
             /* commit our pages */
@@ -1192,9 +1197,16 @@ out:
        if (data) ubc_upl_unmap(pl);
     if (hdr) FREE(hdr, M_TEMP);
        if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0);
-    if (err)
-        ErrorLog("err %d\n", err);
-    
+    if (err) {
+#if DEVELOPMENT || DEBUG
+        char *path;
+        MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK);
+        panic("%s: decmpfs_pagein_compressed: err %d", vnpath(vp, path, PATH_MAX), err);
+        FREE(path, M_TEMP);
+#else
+        ErrorLogWithPath("err %d\n", err);
+#endif
+    }
        return err;
 }
 
@@ -1228,7 +1240,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
        
     uplPos = uio_offset(uio);
     uplSize = uio_resid(uio);
-    VerboseLog("uplPos %lld uplSize %lld\n", uplPos, uplSize);
+    VerboseLogWithPath("uplPos %lld uplSize %lld\n", uplPos, uplSize);
        
     cachedSize = decmpfs_cnode_get_vnode_cached_size(cp);
     
@@ -1260,7 +1272,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
     if (err != 0) {
         goto out;
     }
-    if (!compression_type_valid(hdr)) {
+    if (!compression_type_valid(vp, hdr)) {
         err = ENOTSUP;
         goto out;
     }
@@ -1268,16 +1280,15 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
     uplPos = uioPos;
     uplSize = uioRemaining;
 #if COMPRESSION_DEBUG
-    char path[PATH_MAX];
-    DebugLog("%s: uplPos %lld uplSize %lld\n", vnpath(vp, path, sizeof(path)), (uint64_t)uplPos, (uint64_t)uplSize);
+    DebugLogWithPath("uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
 #endif
        
     lck_rw_lock_shared(decompressorsLock);
-    decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(hdr->compression_type, adjust_fetch);
+    decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(vp, hdr->compression_type, adjust_fetch);
     if (adjust_fetch) {
         /* give the compressor a chance to adjust the portion of the file that we read */
                adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize);
-        VerboseLog("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
+        VerboseLogWithPath("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
     }
     lck_rw_unlock_shared(decompressorsLock);
     
@@ -1305,7 +1316,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
     /* round size up to page multiple */
     uplSize = (uplSize + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
     
-    VerboseLog("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
+    VerboseLogWithPath("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
     
     uplRemaining = uplSize;
     curUplPos = uplPos;
@@ -1324,11 +1335,11 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
         /* create the upl */
         kr = ubc_create_upl(vp, curUplPos, curUplSize, &upl, &pli, UPL_SET_LITE);
         if (kr != KERN_SUCCESS) {
-            ErrorLog("ubc_create_upl error %d\n", (int)kr);
+            ErrorLogWithPath("ubc_create_upl error %d\n", (int)kr);
             err = EINVAL;
             goto out;
         }
-        VerboseLog("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize);
+        VerboseLogWithPath("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize);
        
 #if CONFIG_IOSCHED
        /* Mark the UPL as the requesting UPL for decompression */
@@ -1340,8 +1351,14 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
         if (kr != KERN_SUCCESS) {
 
            commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
-
-            ErrorLog("ubc_upl_map error %d\n", (int)kr);
+#if DEVELOPMENT || DEBUG
+            char *path;
+            MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK);
+            panic("%s: decmpfs_read_compressed: ubc_upl_map error %d", vnpath(vp, path, PATH_MAX), (int)kr);
+            FREE(path, M_TEMP);
+#else
+            ErrorLogWithPath("ubc_upl_map error %d\n", (int)kr);
+#endif
             err = EINVAL;
             goto out;
         }
@@ -1351,7 +1368,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
 
            commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
 
-            ErrorLog("ubc_upl_map mapped null\n");
+            ErrorLogWithPath("ubc_upl_map mapped null\n");
             err = EINVAL;
             goto out;
         }
@@ -1362,21 +1379,21 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
         vec = (decmpfs_vector){ .buf = data, .size = curUplSize };
         err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, curUplPos, curUplSize, 1, &vec, &did_read);
         if (err) {
-            ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err);
+            ErrorLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err);
             
             /* maybe the file is converting to decompressed */
             int cmp_state = decmpfs_fast_get_state(cp);
             if (cmp_state == FILE_IS_CONVERTING) {
-                ErrorLog("cmp_state == FILE_IS_CONVERTING\n");
+                ErrorLogWithPath("cmp_state == FILE_IS_CONVERTING\n");
                 cmp_state = wait_for_decompress(cp);
                 if (cmp_state == FILE_IS_COMPRESSED) {
-                    ErrorLog("cmp_state == FILE_IS_COMPRESSED\n");
+                    ErrorLogWithPath("cmp_state == FILE_IS_COMPRESSED\n");
                     /* a decompress was attempted but it failed, let's try fetching again */
                     goto decompress;
                 }
             }
             if (cmp_state == FILE_IS_NOT_COMPRESSED) {
-                ErrorLog("cmp_state == FILE_IS_NOT_COMPRESSED\n");
+                ErrorLogWithPath("cmp_state == FILE_IS_NOT_COMPRESSED\n");
                 /* the file was decompressed after we started reading it */
                 abort_read = 1;     /* we're not going to commit our data */
                 *is_compressed = 0; /* instruct caller to fall back to its normal path */
@@ -1391,11 +1408,11 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
             if (abort_read) {
                                kr = commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
             } else {
-                VerboseLog("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining);
+                VerboseLogWithPath("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining);
                 if (uioRemaining) {
                     off_t uplOff = uioPos - curUplPos;
                     if (uplOff < 0) {
-                        ErrorLog("uplOff %lld should never be negative\n", (int64_t)uplOff);
+                        ErrorLogWithPath("uplOff %lld should never be negative\n", (int64_t)uplOff);
                         err = EINVAL;
                     } else {
                         off_t count = curUplPos + curUplSize - uioPos;
@@ -1407,9 +1424,9 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
                             int io_resid = count;
                             err = cluster_copy_upl_data(uio, upl, uplOff, &io_resid);
                             int copied = count - io_resid;
-                            VerboseLog("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied);
+                            VerboseLogWithPath("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied);
                             if (err) {
-                                ErrorLog("cluster_copy_upl_data err %d\n", err);
+                                ErrorLogWithPath("cluster_copy_upl_data err %d\n", err);
                             }
                             uioPos += copied;
                             uioRemaining -= copied;
@@ -1422,7 +1439,7 @@ decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_c
                 }
             }
         } else {
-            ErrorLog("ubc_upl_unmap error %d\n", (int)kr);
+            ErrorLogWithPath("ubc_upl_unmap error %d\n", (int)kr);
         }
     
         uplRemaining -= curUplSize;
@@ -1433,14 +1450,14 @@ out:
     if (hdr) FREE(hdr, M_TEMP);
        if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0);
     if (err) {/* something went wrong */
-        ErrorLog("err %d\n", err);
+        ErrorLogWithPath("err %d\n", err);
         return err;
     }
        
 #if COMPRESSION_DEBUG
     uplSize = uio_resid(uio);
     if (uplSize)
-        VerboseLog("still %lld bytes to copy\n", uplSize);
+        VerboseLogWithPath("still %lld bytes to copy\n", uplSize);
 #endif
     return 0;
 }
@@ -1456,10 +1473,10 @@ decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp)
     decmpfs_header *hdr = NULL;
     int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
     if (err) {
-        ErrorLog("decmpfs_fetch_compressed_header err %d\n", err);
+        ErrorLogWithPath("decmpfs_fetch_compressed_header err %d\n", err);
     } else {
         lck_rw_lock_shared(decompressorsLock);
-        decmpfs_free_compressed_data_func free_data = decmp_get_func(hdr->compression_type, free_data);
+        decmpfs_free_compressed_data_func free_data = decmp_get_func(vp, hdr->compression_type, free_data);
         if (free_data) {
                        err = free_data(vp, decmpfs_ctx, hdr);
         } else {
@@ -1469,7 +1486,7 @@ decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp)
         lck_rw_unlock_shared(decompressorsLock);
         
         if (err != 0) {
-            ErrorLog("decompressor err %d\n", err);
+            ErrorLogWithPath("decompressor err %d\n", err);
         }
     }
     
@@ -1498,7 +1515,7 @@ unset_compressed_flag(vnode_t vp)
        err = vnode_getattr(vp, &va, decmpfs_ctx);
     
     if (err != 0) {
-        ErrorLog("vnode_getattr err %d\n", err);
+        ErrorLogWithPath("vnode_getattr err %d\n", err);
     } else {
         new_bsdflags = va.va_flags & ~UF_COMPRESSED;
         
@@ -1506,7 +1523,7 @@ unset_compressed_flag(vnode_t vp)
         VATTR_SET(&va, va_flags, new_bsdflags);
                err = vnode_setattr(vp, &va, decmpfs_ctx);
         if (err != 0) {
-            ErrorLog("vnode_setattr err %d\n", err);
+            ErrorLogWithPath("vnode_setattr err %d\n", err);
         }
     }
     return err;
@@ -1618,7 +1635,7 @@ decompress:
                decmpfs_vector vec = { .buf = data, .size = MIN(allocSize, remaining) };
                err = decmpfs_fetch_uncompressed_data(vp, cp, hdr, offset, vec.size, 1, &vec, &bytes_read);
                if (err != 0) {
-                       ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err);
+                       ErrorLogWithPath("decmpfs_fetch_uncompressed_data err %d\n", err);
                        goto out;
                }
                
@@ -1630,7 +1647,7 @@ decompress:
                uio_reset(uio_w, offset, UIO_SYSSPACE, UIO_WRITE);
                err = uio_addiov(uio_w, CAST_USER_ADDR_T(data), bytes_read);
                if (err != 0) {
-                       ErrorLog("uio_addiov err %d\n", err);
+                       ErrorLogWithPath("uio_addiov err %d\n", err);
                        err = ENOMEM;
                        goto out;
                }
@@ -1638,7 +1655,7 @@ decompress:
                err = VNOP_WRITE(vp, uio_w, 0, decmpfs_ctx);
                if (err != 0) {
                        /* if the write failed, truncate the file to zero bytes */
-                       ErrorLog("VNOP_WRITE err %d\n", err);
+                       ErrorLogWithPath("VNOP_WRITE err %d\n", err);
                        break;
                }
                offset += bytes_read;
@@ -1647,7 +1664,7 @@ decompress:
        
        if (err == 0) {
                if (offset != toSize) {
-                       ErrorLog("file decompressed to %lld instead of %lld\n", offset, toSize);
+                       ErrorLogWithPath("file decompressed to %lld instead of %lld\n", offset, toSize);
                        err = EINVAL;
                        goto out;
                }
@@ -1657,18 +1674,18 @@ decompress:
                /* sync the data and metadata */
                err = VNOP_FSYNC(vp, MNT_WAIT, decmpfs_ctx);
                if (err != 0) {
-                       ErrorLog("VNOP_FSYNC err %d\n", err);
+                       ErrorLogWithPath("VNOP_FSYNC err %d\n", err);
                        goto out;
                }
        }
        
        if (err != 0) {
                /* write, setattr, or fsync failed */
-               ErrorLog("aborting decompress, err %d\n", err);
+               ErrorLogWithPath("aborting decompress, err %d\n", err);
                if (truncate_okay) {
                        /* truncate anything we might have written */
                        int error = vnode_setsize(vp, 0, 0, decmpfs_ctx);
-                       ErrorLog("vnode_setsize err %d\n", error);
+                       ErrorLogWithPath("vnode_setsize err %d\n", error);
                }
                goto out;
        }
@@ -1682,7 +1699,7 @@ nodecmp:
        /* free the compressed data associated with this file */
        err = decmpfs_free_compressed_data(vp, cp);
        if (err != 0) {
-               ErrorLog("decmpfs_free_compressed_data err %d\n", err);
+               ErrorLogWithPath("decmpfs_free_compressed_data err %d\n", err);
        }
        
        /*
@@ -1699,7 +1716,7 @@ nodecmp:
        {
                uint64_t filesize = 0;
                vnsize(vp, &filesize);
-               DebugLog("new file size %lld\n", filesize);
+               DebugLogWithPath("new file size %lld\n", filesize);
        }
 #endif
        
@@ -1763,8 +1780,7 @@ decmpfs_fetch_uncompressed_data_Type1(__unused vnode_t vp, __unused vfs_context_
     
 #if COMPRESSION_DEBUG
     static int dummy = 0; // prevent syslog from coalescing printfs
-    char path[PATH_MAX];
-    DebugLog("%s: %d memcpy %lld at %lld\n", vnpath(vp, path, sizeof(path)), dummy++, size, (uint64_t)offset);
+    DebugLogWithPath("%d memcpy %lld at %lld\n", dummy++, size, (uint64_t)offset);
 #endif
     
     remaining = size;
index 65c98080d0554a3c64b094ddd464793dee2ccc07..27ad69aa124ec8db87621a4548411e6b707dfb63 100644 (file)
@@ -33,6 +33,7 @@
 #include <sys/sysproto.h>
 #include <sys/bsdtask_info.h>
 #include <sys/random.h>
+#include <sys/stackshot.h>
 
 #define HZ      100
 #include <mach/clock_types.h>
@@ -55,6 +56,7 @@
 #include <kern/cpu_data.h>
 #include <kern/assert.h>
 #include <kern/telemetry.h>
+#include <kern/sched_prim.h>
 #include <vm/vm_kern.h>
 #include <sys/lock.h>
 
@@ -124,12 +126,13 @@ int cpu_number(void);     /* XXX <machine/...> include path broken */
 void commpage_update_kdebug_enable(void); /* XXX sign */
 
 /* XXX should probably be static, but it's debugging code... */
-int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t);
+int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t);
 void kdbg_control_chud(int, void *);
 int kdbg_control(int *, u_int, user_addr_t, size_t *);
 int kdbg_readcpumap(user_addr_t, size_t *);
 int kdbg_readcurcpumap(user_addr_t, size_t *);
 int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t);
+int kdbg_readthrmap_v3(user_addr_t, size_t *, int);
 int kdbg_readcurthrmap(user_addr_t, size_t *);
 int kdbg_getreg(kd_regtype *);
 int kdbg_setreg(kd_regtype *);
@@ -140,11 +143,30 @@ void kdbg_thrmap_init(void);
 int kdbg_reinit(boolean_t);
 int kdbg_bootstrap(boolean_t);
 
-int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size);
-kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount);
+int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count,
+                              uint8_t** cpumap, uint32_t* cpumap_size);
+
+kd_threadmap* kdbg_thrmap_init_internal(unsigned int count,
+                                        unsigned int *mapsize,
+                                        unsigned int *mapcount);
+
+static boolean_t kdebug_current_proc_enabled(uint32_t debugid);
+static boolean_t kdebug_debugid_enabled(uint32_t debugid);
+static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id);
+
+int kdbg_write_v3_header(user_addr_t, size_t *, int);
+int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag,
+                               uint32_t sub_tag, uint64_t length,
+                               vnode_t vp, vfs_context_t ctx);
+
+user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag,
+                                             uint64_t length, vnode_t vp,
+                                             vfs_context_t ctx);
 
 static int kdbg_enable_typefilter(void);
 static int kdbg_disable_typefilter(void);
+static int kdbg_allocate_typefilter(void);
+static int kdbg_deallocate_typefilter(void);
 
 static int create_buffers(boolean_t);
 static void delete_buffers(void);
@@ -162,7 +184,6 @@ static boolean_t    kd_early_overflow = FALSE;
 
 #define SLOW_NOLOG     0x01
 #define SLOW_CHECKS    0x02
-#define SLOW_ENTROPY   0x04                    /* Obsolescent */
 #define SLOW_CHUD      0x08
 
 #define EVENTS_PER_STORAGE_UNIT                2048
@@ -214,6 +235,11 @@ struct kd_bufinfo {
        uint32_t num_bufs;
 } __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) ));
 
+
+/*
+ * In principle, this control block can be shared in DRAM with other
+ * coprocessors and runtimes, for configuring what tracing is enabled.
+ */
 struct kd_ctrl_page_t {
        union kds_ptr kds_free_list;
        uint32_t enabled        :1;
@@ -238,6 +264,10 @@ struct kd_bufinfo *kdbip = NULL;
 
 #define KDCOPYBUF_COUNT        8192
 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
+
+#define PAGE_4KB       4096
+#define PAGE_16KB      16384
+
 kd_buf *kdcopybuf = NULL;
 
 boolean_t kdlog_bg_trace = FALSE;
@@ -259,25 +289,19 @@ static lck_grp_t  * kd_trace_mtx_sysctl_grp;
 static lck_attr_t * kd_trace_mtx_sysctl_attr;
 static lck_grp_attr_t   *kd_trace_mtx_sysctl_grp_attr;
 
-static lck_grp_t       *stackshot_subsys_lck_grp;
-static lck_grp_attr_t  *stackshot_subsys_lck_grp_attr;
-static lck_attr_t      *stackshot_subsys_lck_attr;
-static lck_mtx_t        stackshot_subsys_mutex;
+extern kern_return_t stack_snapshot2(int pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
+
+#if CONFIG_TELEMETRY
+extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
+#endif /* CONFIG_TELEMETRY */
 
-void *stackshot_snapbuf = NULL;
+extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
 
-int
-stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval);
+extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
 
-int  
-stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced);
-extern void
-kdp_snapshot_preflight(int pid, void  *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset);
+extern kern_return_t stack_snapshot_from_kernel_internal(int pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced);
 
-extern int
-kdp_stack_snapshot_geterror(void);
-extern unsigned int
-kdp_stack_snapshot_bytes_traced(void);
+int stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced);
 
 kd_threadmap *kd_mapptr = 0;
 unsigned int kd_mapsize = 0;
@@ -290,25 +314,27 @@ int       RAW_file_written = 0;
 
 pid_t global_state_pid = -1;       /* Used to control exclusive use of kd_buffer */
 
-#define DBG_FUNC_MASK  0xfffffffc
+/*
+ * A globally increasing counter for identifying strings in trace.  Starts at
+ * 1 because 0 is a reserved return value.
+ */
+__attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)))
+static uint64_t g_curr_str_id = 1;
 
-/*  TODO: move to kdebug.h */
-#define CLASS_MASK      0xff000000
-#define CLASS_OFFSET    24
-#define SUBCLASS_MASK   0x00ff0000
-#define SUBCLASS_OFFSET 16
-#define CSC_MASK        0xffff0000     /*  class and subclass mask */
-#define CSC_OFFSET      SUBCLASS_OFFSET
+#define STR_ID_SIG_OFFSET (48)
+#define STR_ID_MASK       ((1ULL << STR_ID_SIG_OFFSET) - 1)
+#define STR_ID_SIG_MASK   (~STR_ID_MASK)
 
-#define EXTRACT_CLASS(debugid)          ( (uint8_t) ( ((debugid) & CLASS_MASK   ) >> CLASS_OFFSET    ) )
-#define EXTRACT_SUBCLASS(debugid)       ( (uint8_t) ( ((debugid) & SUBCLASS_MASK) >> SUBCLASS_OFFSET ) )
-#define EXTRACT_CSC(debugid)            ( (uint16_t)( ((debugid) & CSC_MASK     ) >> CSC_OFFSET      ) )
+/*
+ * A bit pattern for identifying string IDs generated by
+ * kdebug_trace_string(2).
+ */
+static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET);
 
 #define INTERRUPT      0x01050000
 #define MACH_vmfault   0x01300008
 #define BSC_SysCall    0x040c0000
 #define MACH_SysCall   0x010c0000
-#define DBG_SCALL_MASK 0xffff0000
 
 /* task to string structure */
 struct tts
@@ -337,8 +363,6 @@ typedef void (*kd_chudhook_fn) (uint32_t debugid, uintptr_t arg1,
 
 volatile kd_chudhook_fn kdebug_chudhook = 0;   /* pointer to CHUD toolkit function */
 
-__private_extern__ void stackshot_lock_init( void );
-
 static uint8_t *type_filter_bitmap;
 
 /*
@@ -505,7 +529,7 @@ create_buffers(boolean_t early_trace)
 
        kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
 
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus) != KERN_SUCCESS) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                error = ENOSPC;
                goto out;
        }
@@ -529,19 +553,19 @@ create_buffers(boolean_t early_trace)
        kd_bufs = NULL;
 
        if (kdcopybuf == 0) {
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE) != KERN_SUCCESS) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                        error = ENOSPC;
                        goto out;
                }
        }
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers))) != KERN_SUCCESS) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                error = ENOSPC;
                goto out;
        }
        bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
 
        for (i = 0; i < f_buffers; i++) {
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size) != KERN_SUCCESS) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                        error = ENOSPC;
                        goto out;
                }
@@ -550,7 +574,7 @@ create_buffers(boolean_t early_trace)
                kd_bufs[i].kdsb_size = f_buffer_size;
        }
        if (p_buffer_size) {
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size) != KERN_SUCCESS) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                        error = ENOSPC;
                        goto out;
                }
@@ -784,7 +808,7 @@ int
 kernel_debug_register_callback(kd_callback_t callback)
 {
        kd_iop_t* iop;
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t)) == KERN_SUCCESS) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) {
                memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
                
                /*
@@ -859,8 +883,18 @@ kernel_debug_enter(
                        goto out1;
        
                if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
-                       if (isset(type_filter_bitmap, EXTRACT_CSC(debugid))) 
-                               goto record_event;
+                       /*
+                        * Recheck if TYPEFILTER is being used, and if so,
+                        * dereference bitmap. If the trace facility is being
+                        * disabled, we have ~100ms of preemption-free CPU
+                        * usage to access the bitmap.
+                        */
+                       disable_preemption();
+                       if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
+                               if (isset(type_filter_bitmap, KDBG_EXTRACT_CSC(debugid)))
+                                       goto record_event_preempt_disabled;
+                       }
+                       enable_preemption();
                        goto out1;
                }
                else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
@@ -869,10 +903,10 @@ kernel_debug_enter(
                        goto out1;
                }
                else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
-                       if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
-                               (debugid & DBG_FUNC_MASK) != kdlog_value2 &&
-                               (debugid & DBG_FUNC_MASK) != kdlog_value3 &&
-                               (debugid & DBG_FUNC_MASK) != kdlog_value4)
+                       if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
+                               (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
+                               (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
+                               (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
                                goto out1;
                }
        }
@@ -881,6 +915,7 @@ record_event:
 
        disable_preemption();
 
+record_event_preempt_disabled:
        if (kd_ctrl_page.enabled == 0)
                goto out;
 
@@ -1034,16 +1069,26 @@ kernel_debug_internal(
 
                if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
                        /* Always record trace system info */
-                       if (EXTRACT_CLASS(debugid) == DBG_TRACE)
+                       if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
                                goto record_event;
 
-                       if (isset(type_filter_bitmap, EXTRACT_CSC(debugid))) 
-                               goto record_event;
+                       /*
+                        * Recheck if TYPEFILTER is being used, and if so,
+                        * dereference bitmap. If the trace facility is being
+                        * disabled, we have ~100ms of preemption-free CPU
+                        * usage to access the bitmap.
+                        */
+                       disable_preemption();
+                       if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
+                               if (isset(type_filter_bitmap, KDBG_EXTRACT_CSC(debugid)))
+                                       goto record_event_preempt_disabled;
+                       }
+                       enable_preemption();
                        goto out1;
                }
                else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
                        /* Always record trace system info */
-                       if (EXTRACT_CLASS(debugid) == DBG_TRACE)
+                       if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
                                goto record_event;
                                
                        if (debugid < kdlog_beg || debugid > kdlog_end)
@@ -1051,19 +1096,20 @@ kernel_debug_internal(
                }
                else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
                        /* Always record trace system info */
-                       if (EXTRACT_CLASS(debugid) == DBG_TRACE)
+                       if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
                                goto record_event;
                
-                       if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
-                           (debugid & DBG_FUNC_MASK) != kdlog_value2 &&
-                           (debugid & DBG_FUNC_MASK) != kdlog_value3 &&
-                           (debugid & DBG_FUNC_MASK) != kdlog_value4)
+                       if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
+                           (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
+                           (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
+                           (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
                                goto out1;
                }
        }
 record_event:
        disable_preemption();
 
+record_event_preempt_disabled:
        if (kd_ctrl_page.enabled == 0)
                goto out;
 
@@ -1120,8 +1166,8 @@ out1:
                uint32_t        etype;
                uint32_t        stype;
                
-               etype = debugid & DBG_FUNC_MASK;
-               stype = debugid & DBG_SCALL_MASK;
+               etype = debugid & KDBG_EVENTID_MASK;
+               stype = debugid & KDBG_CSC_MASK;
 
                if (etype == INTERRUPT || etype == MACH_vmfault ||
                    stype == BSC_SysCall || stype == MACH_SysCall) {
@@ -1181,7 +1227,7 @@ kernel_debug1(
 }
 
 void
-kernel_debug_string(const char *message)
+kernel_debug_string_simple(const char *message)
 {
        uintptr_t arg[4] = {0, 0, 0, 0};
 
@@ -1268,7 +1314,24 @@ kernel_debug_early_end(void)
                        TRACE_LOST_EVENTS, 0, 0, 0, 0, 0);
 
        /* This trace marks the start of kernel tracing */
-       kernel_debug_string("early trace done");
+       kernel_debug_string_simple("early trace done");
+}
+
+/*
+ * Returns non-zero if debugid is in a reserved class.
+ */
+static int
+kdebug_validate_debugid(uint32_t debugid)
+{
+       uint8_t debugid_class;
+
+       debugid_class = KDBG_EXTRACT_CLASS(debugid);
+       switch (debugid_class) {
+               case DBG_TRACE:
+                       return EPERM;
+       }
+
+       return 0;
 }
 
 /*
@@ -1293,17 +1356,10 @@ kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval)
  */
 int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval)
 {
-       uint8_t code_class;
-
-       /*
-        * Not all class are supported for injection from userspace, especially ones used by the core
-        * kernel tracing infrastructure.
-        */
-       code_class = EXTRACT_CLASS(uap->code);
+       int err;
 
-       switch (code_class) {
-               case DBG_TRACE:
-                       return EPERM;
+       if ((err = kdebug_validate_debugid(uap->code)) != 0) {
+               return err;
        }
 
        if ( __probable(kdebug_enable == 0) )
@@ -1314,6 +1370,307 @@ int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __u
        return(0);
 }
 
+/*
+ * Adding enough padding to contain a full tracepoint for the last
+ * portion of the string greatly simplifies the logic of splitting the
+ * string between tracepoints.  Full tracepoints can be generated using
+ * the buffer itself, without having to manually add zeros to pad the
+ * arguments.
+ */
+
+/* 2 string args in first tracepoint and 9 string data tracepoints */
+#define STR_BUF_ARGS (2 + (9 * 4))
+/* times the size of each arg on K64 */
+#define MAX_STR_LEN  (STR_BUF_ARGS * sizeof(uint64_t))
+/* on K32, ending straddles a tracepoint, so reserve blanks */
+#define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
+
+/*
+ * This function does no error checking and assumes that it is called with
+ * the correct arguments, including that the buffer pointed to by str is at
+ * least STR_BUF_SIZE bytes.  However, str must be aligned to word-size and
+ * be NUL-terminated.  In cases where a string can fit evenly into a final
+ * tracepoint without its NUL-terminator, this function will not end those
+ * strings with a NUL in trace.  It's up to clients to look at the function
+ * qualifier for DBG_FUNC_END in this case, to end the string.
+ */
+static uint64_t
+kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr,
+                             size_t str_len)
+{
+       /* str must be word-aligned */
+       uintptr_t *str = vstr;
+       size_t written = 0;
+       uintptr_t thread_id;
+       int i;
+       uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING,
+                                              TRACE_STRING_GLOBAL);
+
+       thread_id = (uintptr_t)thread_tid(current_thread());
+
+       /* if the ID is being invalidated, just emit that */
+       if (str_id != 0 && str_len == 0) {
+               kernel_debug_internal(trace_debugid | DBG_FUNC_START | DBG_FUNC_END,
+                                     (uintptr_t)debugid, (uintptr_t)str_id, 0, 0,
+                                     thread_id);
+               return str_id;
+       }
+
+       /* generate an ID, if necessary */
+       if (str_id == 0) {
+               str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id);
+               str_id = (str_id & STR_ID_MASK) | g_str_id_signature;
+       }
+
+       trace_debugid |= DBG_FUNC_START;
+       /* string can fit in a single tracepoint */
+       if (str_len <= (2 * sizeof(uintptr_t))) {
+               trace_debugid |= DBG_FUNC_END;
+       }
+
+       kernel_debug_internal(trace_debugid, (uintptr_t)debugid,
+                             (uintptr_t)str_id, str[0],
+                                                str[1], thread_id);
+
+       trace_debugid &= KDBG_EVENTID_MASK;
+       i = 2;
+       written += 2 * sizeof(uintptr_t);
+
+       for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) {
+               if ((written + (4 * sizeof(uintptr_t))) >= str_len) {
+                       trace_debugid |= DBG_FUNC_END;
+               }
+               kernel_debug_internal(trace_debugid, str[i],
+                                                    str[i + 1],
+                                                    str[i + 2],
+                                                    str[i + 3], thread_id);
+       }
+
+       return str_id;
+}
+
+/*
+ * Returns true if the current process can emit events, and false otherwise.
+ * Trace system and scheduling events circumvent this check, as do events
+ * emitted in interrupt context.
+ */
+static boolean_t
+kdebug_current_proc_enabled(uint32_t debugid)
+{
+       /* can't determine current process in interrupt context */
+       if (ml_at_interrupt_context()) {
+               return TRUE;
+       }
+
+       /* always emit trace system and scheduling events */
+       if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE ||
+           (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0)))
+       {
+               return TRUE;
+       }
+
+       if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
+               proc_t cur_proc = current_proc();
+
+               /* only the process with the kdebug bit set is allowed */
+               if (cur_proc && !(cur_proc->p_kdebug)) {
+                       return FALSE;
+               }
+       } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
+               proc_t cur_proc = current_proc();
+
+               /* every process except the one with the kdebug bit set is allowed */
+               if (cur_proc && cur_proc->p_kdebug) {
+                       return FALSE;
+               }
+       }
+
+       return TRUE;
+}
+
+/*
+ * Returns true if the debugid is disabled by filters, and false if the
+ * debugid is allowed to be traced.  A debugid may not be traced if the
+ * typefilter disables its class and subclass, it's outside a range
+ * check, or if it's not an allowed debugid in a value check.  Trace
+ * system events bypass this check.
+ */
+static boolean_t
+kdebug_debugid_enabled(uint32_t debugid)
+{
+       boolean_t is_enabled = TRUE;
+
+       /* if no filtering is enabled */
+       if (!kd_ctrl_page.kdebug_slowcheck) {
+               return TRUE;
+       }
+
+       if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
+               return TRUE;
+       }
+
+       if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
+               disable_preemption();
+
+               /*
+                * Recheck if typefilter is still being used.  If tracing is being
+                * disabled, there's a 100ms sleep on the other end to keep the
+                * bitmap around for this check.
+                */
+               if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
+                       if (!(isset(type_filter_bitmap, KDBG_EXTRACT_CSC(debugid)))) {
+                               is_enabled = FALSE;
+                       }
+               }
+
+               enable_preemption();
+       } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
+               if (debugid < kdlog_beg || debugid > kdlog_end) {
+                       is_enabled = FALSE;
+               }
+       } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
+               if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
+                       (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
+                       (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
+                       (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
+               {
+                       is_enabled = FALSE;
+               }
+       }
+
+       return is_enabled;
+}
+
+/*
+ * Returns 0 if a string can be traced with these arguments.  Returns errno
+ * value if error occurred.
+ */
+static errno_t
+kdebug_check_trace_string(uint32_t debugid, uint64_t str_id)
+{
+       /* if there are function qualifiers on the debugid */
+       if (debugid & ~KDBG_EVENTID_MASK) {
+               return EINVAL;
+       }
+
+       if (kdebug_validate_debugid(debugid)) {
+               return EPERM;
+       }
+
+       if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) {
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Implementation of KPI kernel_debug_string.
+ */
+int
+kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str)
+{
+       /* arguments to tracepoints must be word-aligned */
+       __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
+       assert_static(sizeof(str_buf) > MAX_STR_LEN);
+       vm_size_t len_copied;
+       int err;
+
+       assert(str_id);
+
+       if (__probable(kdebug_enable == 0)) {
+               return 0;
+       }
+
+       if (!kdebug_current_proc_enabled(debugid)) {
+               return 0;
+       }
+
+       if (!kdebug_debugid_enabled(debugid)) {
+               return 0;
+       }
+
+       if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) {
+               return err;
+       }
+
+       if (str == NULL) {
+               if (str_id == 0) {
+                       return EINVAL;
+               }
+
+               *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0);
+               return 0;
+       }
+
+       memset(str_buf, 0, sizeof(str_buf));
+       len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1);
+       *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf,
+                                              len_copied);
+       return 0;
+}
+
+/*
+ * Support syscall kdebug_trace_string.
+ */
+int
+kdebug_trace_string(__unused struct proc *p,
+                    struct kdebug_trace_string_args *uap,
+                    uint64_t *retval)
+{
+       __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
+       assert_static(sizeof(str_buf) > MAX_STR_LEN);
+       size_t len_copied;
+       int err;
+
+       if (__probable(kdebug_enable == 0)) {
+               return 0;
+       }
+
+       if (!kdebug_current_proc_enabled(uap->debugid)) {
+               return 0;
+       }
+
+       if (!kdebug_debugid_enabled(uap->debugid)) {
+               return 0;
+       }
+
+       if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) {
+               return err;
+       }
+
+       if (uap->str == USER_ADDR_NULL) {
+               if (uap->str_id == 0) {
+                       return EINVAL;
+               }
+
+               *retval = kernel_debug_string_internal(uap->debugid, uap->str_id,
+                                                      NULL, 0);
+               return 0;
+       }
+
+       memset(str_buf, 0, sizeof(str_buf));
+       err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied);
+
+       /* it's alright to truncate the string, so allow ENAMETOOLONG */
+       if (err == ENAMETOOLONG) {
+               str_buf[MAX_STR_LEN] = '\0';
+       } else if (err) {
+               return err;
+       }
+
+       if (len_copied <= 1) {
+               return EINVAL;
+       }
+
+       /* convert back to a length */
+       len_copied--;
+
+       *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf,
+                                              len_copied);
+       return 0;
+}
+
 static void
 kdbg_lock_init(void)
 {
@@ -1376,7 +1733,7 @@ kdbg_reinit(boolean_t early_trace)
                kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
                kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
                kd_mapsize = 0;
-               kd_mapptr = (kd_threadmap *) 0;
+               kd_mapptr = NULL;
                kd_mapcount = 0;
        }  
        ret = kdbg_bootstrap(early_trace);
@@ -1496,7 +1853,7 @@ kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap,
        *cpumap_size = bytes_needed;
        
        if (*cpumap == NULL) {
-               if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size) != KERN_SUCCESS) {
+               if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                        return ENOMEM;
                }
        } else if (bytes_available < bytes_needed) {
@@ -1585,7 +1942,7 @@ kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsiz
        if (count && count < *mapcount)
                return (0);
 
-       if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize) == KERN_SUCCESS)) {
+       if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
                bzero((void *)kaddr, *mapsize);
                mapptr = (kd_threadmap *)kaddr;
        } else
@@ -1593,7 +1950,7 @@ kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsiz
 
        tts_mapsize = tts_count * sizeof(struct tts);
 
-       if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize) == KERN_SUCCESS)) {
+       if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
                bzero((void *)kaddr, tts_mapsize);
                tts_mapptr = (struct tts *)kaddr;
        } else {
@@ -1650,12 +2007,13 @@ kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsiz
 static void
 kdbg_clear(void)
 {
-        /*
+       /*
         * Clean up the trace buffer
         * First make sure we're not in
         * the middle of cutting a trace
         */
        kdbg_set_tracing_enabled(FALSE, KDEBUG_ENABLE_TRACE);
+       kdbg_disable_typefilter();
 
        /*
         * make sure the SLOW_NOLOG is seen
@@ -1664,13 +2022,12 @@ kdbg_clear(void)
         */
        IOSleep(100);
 
-        global_state_pid = -1;
+       global_state_pid = -1;
        kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
        kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
        kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
        
-       kdbg_disable_typefilter();
-
+       kdbg_deallocate_typefilter();
        delete_buffers();
        nkdbufs = 0;
 
@@ -1793,16 +2150,13 @@ kdbg_setrtcdec(kd_regtype *kdr)
 int
 kdbg_enable_typefilter(void)
 {
-       if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
-               /* free the old filter */
-               kdbg_disable_typefilter();
-       }
-       
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE) != KERN_SUCCESS) {
-               return ENOSPC;
+       int ret;
+
+       /* Allocate memory for bitmap if not already allocated */
+       ret = kdbg_allocate_typefilter();
+       if (ret) {
+               return ret;
        }
-       
-       bzero(type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
 
        /* Turn off range and value checks */
        kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
@@ -1818,20 +2172,55 @@ kdbg_disable_typefilter(void)
 {
        /*  Disable filter checking */  
        kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
-       
+
        /*  Turn off slow checks unless pid checks are using them */
        if ( (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) )
                kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
        else
                kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
-       
-       if(type_filter_bitmap == NULL)
-               return 0;
 
-       vm_offset_t old_bitmap = (vm_offset_t)type_filter_bitmap;
-       type_filter_bitmap = NULL;
+       /* typefilter bitmap will be deallocated later */
+
+       return 0;
+}
+
+static int
+kdbg_allocate_typefilter(void)
+{
+       if (type_filter_bitmap == NULL) {
+               vm_offset_t bitmap = 0;
+
+               if (kmem_alloc(kernel_map, &bitmap, KDBG_TYPEFILTER_BITMAP_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
+                       return ENOSPC;
+               }
+
+               bzero((void *)bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
+
+               if (!OSCompareAndSwapPtr(NULL, (void *)bitmap, &type_filter_bitmap)) {
+                       kmem_free(kernel_map, bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
+                       return 0; /* someone assigned a buffer */
+               }
+       } else {
+               bzero(type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
+       }
+
+       return 0;
+}
+
+static int
+kdbg_deallocate_typefilter(void)
+{
+       if(type_filter_bitmap) {
+               vm_offset_t bitmap = (vm_offset_t)type_filter_bitmap;
+
+               if (OSCompareAndSwapPtr((void *)bitmap, NULL, &type_filter_bitmap)) {
+                       kmem_free(kernel_map, bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
+                       return 0;
+               } else {
+                       /* already swapped */
+               }
+       }
 
-       kmem_free(kernel_map, old_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
        return 0;
 }
 
@@ -1945,50 +2334,317 @@ kdbg_getreg(__unused kd_regtype * kdr)
        return(EINVAL);
 }
 
+static int
+kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
+{
+       return vn_rdwr(UIO_WRITE, vp, buffer, size, file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
+                       vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+}
+
 int
-kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
+kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
 {
-       uint8_t* cpumap = NULL;
-       uint32_t cpumap_size = 0;
        int ret = KERN_SUCCESS;
-
-       if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
-               if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
-                       if (user_cpumap) {
-                               size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
-                               if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
-                                       ret = EFAULT;
-                               }
+       kd_chunk_header_v3 header;
+
+       header.tag = tag;
+       header.sub_tag = sub_tag;
+       header.length = length;
+
+       // Check that only one of them is valid
+       assert(!buffer ^ !vp);
+       assert((vp == NULL) || (ctx != NULL));
+
+       // Write the 8-byte future_chunk_timestamp field in the payload
+       if (buffer || vp) {
+               if (vp) {
+                       ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset);
+                       if (ret) {
+                               goto write_error;
                        }
-                       *user_cpumap_size = cpumap_size;
-                       kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
-               } else
-                       ret = EINVAL;
-       } else
-               ret = EINVAL;
-
-       return (ret);
+                       RAW_file_offset  += (sizeof(kd_chunk_header_v3));
+               }
+               else {
+                       ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3));
+                       if (ret) {
+                               goto write_error;
+                       }
+               }
+       }
+write_error:
+       return ret;
 }
 
 int
-kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
+kdbg_write_v3_chunk_header_to_buffer(void * buffer, uint32_t tag, uint32_t sub_tag, uint64_t length)
 {
-       kd_threadmap *mapptr;
-       unsigned int mapsize;
-       unsigned int mapcount;
-       unsigned int count = 0;
-       int ret = 0;
+       kd_chunk_header_v3 header;
 
-       count = *bufsize/sizeof(kd_threadmap);
-       *bufsize = 0;
-
-       if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) {
-               if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap)))
-                       ret = EFAULT;
-               else
-                       *bufsize = (mapcount * sizeof(kd_threadmap));
+       header.tag = tag;
+       header.sub_tag = sub_tag;
+       header.length = length;
 
-               kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
+       if (!buffer) {
+               return 0;
+       }
+
+       memcpy(buffer, &header, sizeof(kd_chunk_header_v3));
+
+       return (sizeof(kd_chunk_header_v3));
+}
+
+int
+kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd)
+{
+       proc_t p;
+       struct vfs_context context;
+       struct fileproc *fp;
+       vnode_t vp;
+       p = current_proc();
+
+       proc_fdlock(p);
+       if ( (fp_lookup(p, fd, &fp, 1)) ) {
+               proc_fdunlock(p);
+               return EFAULT;
+       }
+
+       context.vc_thread = current_thread();
+       context.vc_ucred = fp->f_fglob->fg_cred;
+
+       if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
+               fp_drop(p, fd, fp, 1);
+               proc_fdunlock(p);
+               return EBADF;
+       }
+       vp = (struct vnode *) fp->f_fglob->fg_data;
+       proc_fdunlock(p);
+
+       if ( (vnode_getwithref(vp)) == 0 ) {
+               RAW_file_offset = fp->f_fglob->fg_offset;
+
+               kd_chunk_header_v3 chunk_header = { .tag = tag, .sub_tag = sub_tag, .length = length };
+
+               int ret = kdbg_write_to_vnode((caddr_t)  &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset);
+               if (!ret) {
+                       RAW_file_offset += sizeof(kd_chunk_header_v3);
+               }
+
+               ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset);
+               if (!ret) {
+                       RAW_file_offset  += payload_size;
+               }
+
+               fp->f_fglob->fg_offset = RAW_file_offset;
+               vnode_put(vp);
+       }
+
+       fp_drop(p, fd, fp, 0);
+       return KERN_SUCCESS;
+}
+
+user_addr_t
+kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
+{
+        uint64_t future_chunk_timestamp = 0;
+        length += sizeof(uint64_t);
+
+        if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) {
+                return 0;
+        }
+        if (buffer) {
+                buffer += sizeof(kd_chunk_header_v3);
+        }
+
+        // Check that only one of them is valid
+        assert(!buffer ^ !vp);
+        assert((vp == NULL) || (ctx != NULL));
+
+        // Write the 8-byte future_chunk_timestamp field in the payload
+        if (buffer || vp) {
+                if (vp) {
+                        int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset);
+                        if (!ret) {
+                                RAW_file_offset  += (sizeof(uint64_t));
+                        }
+                }
+                else {
+                        if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) {
+                                return 0;
+                        }
+                }
+        }
+
+        return (buffer + sizeof(uint64_t));
+}
+
+int
+kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd)
+{
+        int ret = KERN_SUCCESS;
+        kd_header_v3 header;
+
+        uint8_t* cpumap = 0;
+        uint32_t cpumap_size = 0;
+        uint32_t thrmap_size = 0;
+
+        size_t bytes_needed = 0;
+
+        // Check that only one of them is valid
+        assert(!user_header ^ !fd);
+        assert(user_header_size);
+
+        if ( !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ) {
+                ret = EINVAL;
+                goto bail;
+        }
+
+        if ( !(user_header || fd) ) {
+                ret = EINVAL;
+                goto bail;
+        }
+
+        // Initialize the cpu map
+        ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size);
+        if (ret != KERN_SUCCESS) {
+                goto bail;
+        }
+
+        // Check if a thread map is initialized
+        if ( !kd_mapptr ) {
+                ret = EINVAL;
+                goto bail;
+        }
+        thrmap_size = kd_mapcount * sizeof(kd_threadmap);
+
+        // Setup the header.
+        // See v3 header description in sys/kdebug.h for more inforamtion.
+
+        header.tag = RAW_VERSION3;
+        header.sub_tag = V3_HEADER_VERSION;
+        header.length = ( sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header));
+
+        mach_timebase_info_data_t timebase = {0, 0};
+        clock_timebase_info(&timebase);
+        header.timebase_numer = timebase.numer;
+        header.timebase_denom = timebase.denom;
+        header.timestamp = 0;
+        header.walltime_secs = 0;
+        header.walltime_usecs = 0;
+        header.timezone_minuteswest = 0;
+        header.timezone_dst = 0;
+
+#if defined __LP64__
+        header.flags = 1;
+#else
+        header.flags = 0;
+#endif
+
+        // If its a buffer, check if we have enough space to copy the header and the maps.
+        if (user_header) {
+                bytes_needed = header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3));
+                if ( !user_header_size ) {
+                        ret = EINVAL;
+                        goto bail;
+                }
+                if (*user_header_size < bytes_needed) {
+                        ret = EINVAL;
+                        goto bail;
+                }
+        }
+
+        // Start writing the header
+        if (fd) {
+                void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3));
+                size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3));
+
+                ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd);
+                if (ret) {
+                        goto bail;
+                }
+        }
+        else {
+            if (copyout(&header, user_header, sizeof(kd_header_v3))) {
+                    ret = EFAULT;
+                    goto bail;
+            }
+            // Update the user pointer
+            user_header += sizeof(kd_header_v3);
+        }
+
+        // Write a cpu map. This is a sub chunk of the header
+        cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header));
+        size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header));
+        if (fd) {
+                ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd);
+                if (ret) {
+                        goto bail;
+                }
+        }
+        else {
+                ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL);
+                if (ret) {
+                        goto bail;
+                }
+                user_header += sizeof(kd_chunk_header_v3);
+                if (copyout(cpumap, user_header, payload_size))  {
+                        ret = EFAULT;
+                        goto bail;
+                }
+                // Update the user pointer
+                user_header += payload_size;
+        }
+
+        // Write a thread map
+        if (fd) {
+                ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd);
+                if (ret) {
+                        goto bail;
+                }
+        }
+        else {
+                ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL);
+                if (ret) {
+                        goto bail;
+                }
+                user_header += sizeof(kd_chunk_header_v3);
+                if (copyout(kd_mapptr, user_header, thrmap_size)) {
+                        ret = EFAULT;
+                        goto bail;
+                }
+                user_header += thrmap_size;
+        }
+
+        if (fd) {
+                RAW_file_written += bytes_needed;
+        }
+
+        *user_header_size = bytes_needed;
+bail:
+        if (cpumap) {
+                kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
+        }
+        return (ret);
+}
+
+int
+kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
+{
+       uint8_t* cpumap = NULL;
+       uint32_t cpumap_size = 0;
+       int ret = KERN_SUCCESS;
+
+       if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
+               if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
+                       if (user_cpumap) {
+                               size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
+                               if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
+                                       ret = EFAULT;
+                               }
+                       }
+                       *user_cpumap_size = cpumap_size;
+                       kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
+               } else
+                       ret = EINVAL;
        } else
                ret = EINVAL;
 
@@ -1996,113 +2652,181 @@ kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
 }
 
 int
-kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
+kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
 {
-       int avail = *number;
-       int ret = 0;
-       uint32_t count = 0;
+       kd_threadmap *mapptr;
        unsigned int mapsize;
+       unsigned int mapcount;
+       unsigned int count = 0;
+       int ret = 0;
 
-       count = avail/sizeof (kd_threadmap);
+       count = *bufsize/sizeof(kd_threadmap);
+       *bufsize = 0;
 
-       mapsize = kd_mapcount * sizeof(kd_threadmap);
+       if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) {
+               if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap)))
+                       ret = EFAULT;
+               else
+                       *bufsize = (mapcount * sizeof(kd_threadmap));
 
-       if (count && (count <= kd_mapcount))
-       {
-               if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
-               {
-                       if (*number < mapsize)
-                               ret = EINVAL;
-                       else
-                       {
-                               if (vp)
-                               {
-                                       RAW_header      header;
-                                       clock_sec_t     secs;
-                                       clock_usec_t    usecs;
-                                       char    *pad_buf;
-                                       uint32_t pad_size;
-                                       uint32_t extra_thread_count = 0;
-                                       uint32_t cpumap_size;
-                                       
-                                       /*
-                                        * To write a RAW_VERSION1+ file, we
-                                        * must embed a cpumap in the "padding"
-                                        * used to page align the events folloing
-                                        * the threadmap. If the threadmap happens
-                                        * to not require enough padding, we
-                                        * artificially increase its footprint
-                                        * until it needs enough padding.
-                                        */
+               kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
+       } else
+               ret = EINVAL;
 
-                                       pad_size = PAGE_SIZE - ((sizeof(RAW_header) + (count * sizeof(kd_threadmap))) & PAGE_MASK_64);
-                                       cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
+       return (ret);
+}
 
-                                       if (cpumap_size > pad_size) {
-                                               /* Force an overflow onto the next page, we get a full page of padding */
-                                               extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
-                                       }
+static int
+kdbg_write_v1_plus_header(uint32_t count, vnode_t vp, vfs_context_t ctx)
+{
+       int ret = 0;
+       RAW_header      header;
+       clock_sec_t     secs;
+       clock_usec_t    usecs;
+       char    *pad_buf;
+       uint32_t pad_size;
+       uint32_t extra_thread_count = 0;
+       uint32_t cpumap_size;
+       unsigned int mapsize = kd_mapcount * sizeof(kd_threadmap);
 
-                                       header.version_no = RAW_VERSION1;
-                                       header.thread_count = count + extra_thread_count;
+       /*
+        * To write a RAW_VERSION1+ file, we
+        * must embed a cpumap in the "padding"
+        * used to page align the events following
+        * the threadmap. If the threadmap happens
+        * to not require enough padding, we
+        * artificially increase its footprint
+        * until it needs enough padding.
+        */
 
-                                       clock_get_calendar_microtime(&secs, &usecs);
-                                       header.TOD_secs = secs;
-                                       header.TOD_usecs = usecs;
+        assert(vp);
+        assert(ctx);
 
-                                       ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
-                                                     UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-                                       if (ret)
-                                               goto write_error;
-                                       RAW_file_offset += sizeof(RAW_header);
+       pad_size = PAGE_16KB - ((sizeof(RAW_header) + (count * sizeof(kd_threadmap))) & PAGE_MASK_64);
+       cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
 
-                                       ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, mapsize, RAW_file_offset,
-                                                     UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-                                       if (ret)
-                                               goto write_error;
-                                       RAW_file_offset += mapsize;
+       if (cpumap_size > pad_size) {
+               /* If the cpu map doesn't fit in the current available pad_size,
+                * we increase the pad_size by 16K. We do this so that the event
+                * data is always  available on a page aligned boundary for both
+                * 4k and 16k systems. We enforce this alignment for the event
+                * data so that we can take advantage of optimized file/disk writes.*/
+               pad_size += PAGE_16KB;
+       }
 
-                                       if (extra_thread_count) {
-                                               pad_size = extra_thread_count * sizeof(kd_threadmap);
-                                               pad_buf = (char *)kalloc(pad_size);
-                                               memset(pad_buf, 0, pad_size);
+       /* The way we are silently embedding a cpumap in the "padding" is by artificially
+        * increasing the number of thread entries. However, we'll also need to ensure that
+        * the cpumap is embedded in the last 4K page before when the event data is expected.
+        * This way the tools can read the data starting the next page boundary on both
+        * 4K and 16K systems preserving compatibility with older versions of the tools
+       */
+       if (pad_size > PAGE_4KB) {
+               pad_size -= PAGE_4KB;
+               extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
+       }
 
-                                               ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
-                                                             UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-                                               kfree(pad_buf, pad_size);
+       header.version_no = RAW_VERSION1;
+       header.thread_count = count + extra_thread_count;
+
+       clock_get_calendar_microtime(&secs, &usecs);
+       header.TOD_secs = secs;
+       header.TOD_usecs = usecs;
+
+       ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
+                     UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+       if (ret)
+               goto write_error;
+       RAW_file_offset += sizeof(RAW_header);
+
+       ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, mapsize, RAW_file_offset,
+                     UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+       if (ret)
+               goto write_error;
+       RAW_file_offset += mapsize;
+
+       if (extra_thread_count) {
+               pad_size = extra_thread_count * sizeof(kd_threadmap);
+               pad_buf = (char *)kalloc(pad_size);
+               if (!pad_buf) {
+                       ret = ENOMEM;
+                       goto write_error;
+               }
+               memset(pad_buf, 0, pad_size);
 
-                                               if (ret)
-                                                       goto write_error;
-                                               RAW_file_offset += pad_size;
+               ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
+                               UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+               kfree(pad_buf, pad_size);
 
-                                       }
-                                       
-                                       pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
-                                       if (pad_size) {
-                                               pad_buf = (char *)kalloc(pad_size);
-                                               memset(pad_buf, 0, pad_size);
-
-                                               /*
-                                                * embed a cpumap in the padding bytes.
-                                                * older code will skip this.
-                                                * newer code will know how to read it.
-                                                */
-                                               uint32_t temp = pad_size;
-                                               if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
-                                                       memset(pad_buf, 0, pad_size);
-                                               }
-
-                                               ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
-                                                             UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-                                               kfree(pad_buf, pad_size);
-
-                                               if (ret)
-                                                       goto write_error;
-                                               RAW_file_offset += pad_size;
-                                       }
-                                       RAW_file_written += sizeof(RAW_header) + mapsize + pad_size;
+               if (ret)
+                       goto write_error;
+               RAW_file_offset += pad_size;
 
-                               } else {
+       }
+
+       pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
+       if (pad_size) {
+               pad_buf = (char *)kalloc(pad_size);
+               if (!pad_buf) {
+                       ret = ENOMEM;
+                       goto write_error;
+               }
+               memset(pad_buf, 0, pad_size);
+
+               /*
+                * embed a cpumap in the padding bytes.
+                * older code will skip this.
+                * newer code will know how to read it.
+                */
+               uint32_t temp = pad_size;
+               if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
+                       memset(pad_buf, 0, pad_size);
+               }
+
+               ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
+                               UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+               kfree(pad_buf, pad_size);
+
+               if (ret)
+                       goto write_error;
+               RAW_file_offset += pad_size;
+       }
+       RAW_file_written += sizeof(RAW_header) + mapsize + pad_size;
+
+write_error:
+       return ret;
+}
+
+int
+kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
+{
+
+       int avail = 0;
+       int ret = 0;
+       uint32_t count = 0;
+       unsigned int mapsize;
+
+       if ((!vp && !buffer) || (vp && buffer)) {
+               return EINVAL;
+       }
+
+       assert(number);
+       assert((vp == NULL) || (ctx != NULL));
+
+       avail = *number;
+       count = avail/sizeof (kd_threadmap);
+       mapsize = kd_mapcount * sizeof(kd_threadmap);
+
+       if (count && (count <= kd_mapcount)) {
+               if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) {
+                       if (*number < mapsize)
+                               ret = EINVAL;
+                       else {
+                               if (vp) {
+                                       ret = kdbg_write_v1_plus_header(count, vp, ctx);
+                                       if (ret)
+                                               goto write_error;
+                               }
+                               else {
                                        if (copyout(kd_mapptr, buffer, mapsize))
                                                ret = EINVAL;
                                }
@@ -2118,10 +2842,11 @@ kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ct
        {
                count = 0;
 
-               vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset,
-                       UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-               RAW_file_offset += sizeof(uint32_t);
-               RAW_file_written += sizeof(uint32_t);
+               ret = kdbg_write_to_vnode((caddr_t)&count, sizeof(uint32_t), vp, ctx, RAW_file_offset);
+               if (!ret) {
+                       RAW_file_offset += sizeof(uint32_t);
+                       RAW_file_written += sizeof(uint32_t);
+               }
        }
 write_error:
        if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
@@ -2135,6 +2860,54 @@ write_error:
        return(ret);
 }
 
+int
+kdbg_readthrmap_v3(user_addr_t buffer, size_t *number, int fd)
+{
+       int avail = 0;
+       int ret = 0;
+       uint32_t count = 0;
+       unsigned int mapsize;
+
+       if ((!fd && !buffer) || (fd && buffer)) {
+               return EINVAL;
+       }
+
+       assert(number);
+
+       avail = *number;
+       count = avail/sizeof (kd_threadmap);
+       mapsize = kd_mapcount * sizeof(kd_threadmap);
+
+       if (count && (count <= kd_mapcount)) {
+               if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) {
+                       if (*number < mapsize) {
+                               ret = EINVAL;
+                       }
+                       else {
+                               ret = kdbg_write_v3_header(buffer, number, fd);
+                               if (ret) {
+                                       goto write_error;
+                               }
+                       }
+               }
+               else {
+                       ret = EINVAL;
+               }
+       }
+       else {
+               ret = EINVAL;
+       }
+write_error:
+       if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) {
+               kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
+               kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
+               kd_mapsize = 0;
+               kd_mapptr = (kd_threadmap *) 0;
+               kd_mapcount = 0;
+       }  
+       return(ret);
+}
+
 
 static int
 kdbg_set_nkdbufs(unsigned int value)
@@ -2164,6 +2937,7 @@ kdbg_enable_bg_trace(void)
                        kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
                        kdlog_bg_trace_running = TRUE;
                }
+               wakeup(&kdlog_bg_trace);
        }
        return ret;
 }
@@ -2219,7 +2993,9 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 
        if (name[0] == KERN_KDGETENTROPY ||
                name[0] == KERN_KDWRITETR ||
+               name[0] == KERN_KDWRITETR_V3 ||
                name[0] == KERN_KDWRITEMAP ||
+               name[0] == KERN_KDWRITEMAP_V3 ||
                name[0] == KERN_KDEFLAGS ||
                name[0] == KERN_KDDFLAGS ||
                name[0] == KERN_KDENABLE ||
@@ -2281,7 +3057,6 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                                        ret = EINVAL;
                        }
                        goto out;
-                       
                case KERN_KDGETENTROPY: {
                        /* Obsolescent - just fake with a random buffer */
                        char    *buffer = (char *) kalloc(size);
@@ -2301,6 +3076,43 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                        kdlog_bg_trace = FALSE;
                        kdbg_disable_bg_trace();
                        goto out;
+
+               case KERN_KDWAIT_BG_TRACE_RESET:
+                       if (!kdlog_bg_trace){
+                               ret = EINVAL;
+                               goto out;
+                       }
+                       wait_result_t wait_result = assert_wait(&kdlog_bg_trace, THREAD_ABORTSAFE);
+                       lck_mtx_unlock(kd_trace_mtx_sysctl);
+                       if (wait_result == THREAD_WAITING)
+                               wait_result = thread_block(THREAD_CONTINUE_NULL);
+                       if (wait_result == THREAD_INTERRUPTED)
+                               ret = EINTR;
+                       lck_mtx_lock(kd_trace_mtx_sysctl);
+                       goto out;
+
+               case KERN_KDSET_BG_TYPEFILTER:
+                       if (!kdlog_bg_trace || !kdlog_bg_trace_running){
+                               ret = EINVAL;
+                               goto out;
+                       }
+
+                       if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
+                               ret = EINVAL;
+                               goto out;
+                       }
+
+                       if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){
+                               if ((ret = kdbg_enable_typefilter()))
+                                       goto out;
+                       }
+
+                       if (copyin(where, type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE)) {
+                               ret = EINVAL;
+                               goto out;
+                       }
+                       kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, type_filter_bitmap);
+                       goto out;
        }
        
        if ((curproc = current_proc()) != NULL)
@@ -2406,10 +3218,12 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 
                        break;
                case KERN_KDREADTR:
-                       ret = kdbg_read(where, sizep, NULL, NULL);
+                       ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
                        break;
                case KERN_KDWRITETR:
+               case KERN_KDWRITETR_V3:
                case KERN_KDWRITEMAP:
+               case KERN_KDWRITEMAP_V3:
                {
                        struct  vfs_context context;
                        struct  fileproc *fp;
@@ -2417,9 +3231,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                        vnode_t vp;
                        int     fd;
 
-                       kdbg_disable_bg_trace();
-
-                       if (name[0] == KERN_KDWRITETR) {
+                       if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
                                int s;
                                int wait_result = THREAD_AWAKENED;
                                u_int64_t abstime;
@@ -2472,17 +3284,23 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 
                        if ((ret = vnode_getwithref(vp)) == 0) {
                                RAW_file_offset = fp->f_fglob->fg_offset;
-                               if (name[0] == KERN_KDWRITETR) {
+                               if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
                                        number = nkdbufs * sizeof(kd_buf);
 
                                        KERNEL_DEBUG_CONSTANT(TRACE_WRITING_EVENTS | DBG_FUNC_START, 0, 0, 0, 0, 0);
-                                       ret = kdbg_read(0, &number, vp, &context);
+                                       if (name[0] == KERN_KDWRITETR_V3)
+                                               ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3);
+                                       else
+                                               ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1);
                                        KERNEL_DEBUG_CONSTANT(TRACE_WRITING_EVENTS | DBG_FUNC_END, number, 0, 0, 0, 0);
 
                                        *sizep = number;
                                } else {
                                        number = kd_mapcount * sizeof(kd_threadmap);
-                                       kdbg_readthrmap(0, &number, vp, &context);
+                                       if (name[0] == KERN_KDWRITEMAP_V3)
+                                               kdbg_readthrmap_v3(0, &number, fd);
+                                       else
+                                               kdbg_readthrmap(0, &number, vp, &context);
                                }
                                fp->f_fglob->fg_offset = RAW_file_offset;
                                vnode_put(vp);
@@ -2597,16 +3415,16 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
                case KERN_KDSET_TYPEFILTER:
                        kdbg_disable_bg_trace();
 
-                       if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){
-                               if ((ret = kdbg_enable_typefilter()))
-                                       break;
-                       }
-
                        if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
                                ret = EINVAL;
                                break;
                        }
 
+                       if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){
+                               if ((ret = kdbg_enable_typefilter()))
+                                       break;
+                       }
+
                        if (copyin(where, type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE)) {
                                ret = EINVAL;
                                break;
@@ -2630,7 +3448,7 @@ out:
  * move through the lists w/o use of any locks
  */
 int
-kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
+kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
 {
        unsigned int count;
        unsigned int cpu, min_cpu;
@@ -2650,6 +3468,7 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
        boolean_t lostevents = FALSE;
        boolean_t out_of_events = FALSE;
 
+        assert(number);
        count = *number/sizeof(kd_buf);
        *number = 0;
 
@@ -2805,12 +3624,23 @@ nextevent:
                                break;
                }
                if (tempbuf_number) {
+                       if (file_version == RAW_VERSION3) {
+                               if ( !(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) {
+                                       error = EFAULT;
+                                       goto check_error;
+                               }
+                               if (buffer)
+                                       buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
 
+                               assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)));
+                               count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
+                               *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
+                       }
                        if (vp) {
-                               error = vn_rdwr(UIO_WRITE, vp, (caddr_t)kdcopybuf, tempbuf_number * sizeof(kd_buf), RAW_file_offset,
-                                               UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-
-                               RAW_file_offset += (tempbuf_number * sizeof(kd_buf));
+                               size_t write_size = tempbuf_number * sizeof(kd_buf);
+                               error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset);
+                               if (!error)
+                                       RAW_file_offset += write_size;
        
                                if (RAW_file_written >= RAW_FLUSH_SIZE) {
                                        cluster_push(vp, 0);
@@ -2821,6 +3651,7 @@ nextevent:
                                error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
                                buffer += (tempbuf_number * sizeof(kd_buf));
                        }
+check_error:
                        if (error) {
                                *number = 0;
                                error = EINVAL;
@@ -2852,31 +3683,33 @@ unsigned char *getProcName(struct proc *proc) {
 
 }
 
-#define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
-#define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
-#if defined(__i386__) || defined (__x86_64__)
-#define TRAP_DEBUGGER __asm__ volatile("int3");
-#else
-#error No TRAP_DEBUGGER definition for this architecture
-#endif
-
-#define SANE_TRACEBUF_SIZE (8 * 1024 * 1024)
-#define SANE_BOOTPROFILE_TRACEBUF_SIZE (64 * 1024 * 1024)
-
-/* Initialize the mutex governing access to the stack snapshot subsystem */
-__private_extern__ void
-stackshot_lock_init( void )
+static int
+stackshot_kern_return_to_bsd_error(kern_return_t kr)
 {
-       stackshot_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-
-       stackshot_subsys_lck_grp = lck_grp_alloc_init("stackshot_subsys_lock", stackshot_subsys_lck_grp_attr);
-
-       stackshot_subsys_lck_attr = lck_attr_alloc_init();
-
-       lck_mtx_init(&stackshot_subsys_mutex, stackshot_subsys_lck_grp, stackshot_subsys_lck_attr);
+       switch (kr) {
+               case KERN_SUCCESS:
+                       return 0;
+               case KERN_RESOURCE_SHORTAGE:
+                       return ENOMEM;
+               case KERN_NO_SPACE:
+                       return ENOSPC;
+               case KERN_NO_ACCESS:
+                       return EPERM;
+               case KERN_MEMORY_PRESENT:
+                       return EEXIST;
+               case KERN_NOT_SUPPORTED:
+                       return ENOTSUP;
+               case KERN_NOT_IN_SET:
+                       return ENOENT;
+               default:
+                       return EINVAL;
+       }
 }
 
+
 /*
+ * DEPRECATION WARNING: THIS SYSCALL IS BEING REPLACED WITH SYS_stack_snapshot_with_config and SYS_microstackshot.
+ *
  * stack_snapshot:   Obtains a coherent set of stack traces for all threads
  *                  on the system, tracing both kernel and user stacks
  *                  where available. Uses machine specific trace routines
@@ -2901,208 +3734,147 @@ stackshot_lock_init( void )
 int
 stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t *retval) {
        int error = 0;
+       kern_return_t kr;
 
        if ((error = suser(kauth_cred_get(), &p->p_acflag)))
                 return(error);
 
-       return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size,
-           uap->flags, uap->dispatch_offset, retval);
+       kr = stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size, uap->flags, retval);
+       return stackshot_kern_return_to_bsd_error(kr);
 }
 
-int  
-stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced)
-{
-       int error = 0;
-       boolean_t istate;
-
-       if ((buf == NULL) || (size <= 0) || (bytesTraced == NULL)) {
-               return -1;
-       }
-
-       /* cap in individual stackshot to SANE_TRACEBUF_SIZE */
-       if (size > SANE_TRACEBUF_SIZE) {
-               size = SANE_TRACEBUF_SIZE;
-       }
-
-/* Serialize tracing */        
-       STACKSHOT_SUBSYS_LOCK();
-       istate = ml_set_interrupts_enabled(FALSE);
-
-
-/* Preload trace parameters*/  
-       kdp_snapshot_preflight(pid, buf, size, flags, 0);
-
-/* Trap to the debugger to obtain a coherent stack snapshot; this populates
- * the trace buffer
+/*
+ * stack_snapshot_with_config: Obtains a coherent set of stack traces for specified threads on the sysem,
+ *                             tracing both kernel and user stacks where available. Allocates a buffer from the
+ *                             kernel and maps the buffer into the calling task's address space.
+ *
+ * Inputs:                     uap->stackshot_config_version - version of the stackshot config that is being passed
+ *                             uap->stackshot_config - pointer to the stackshot config
+ *                             uap->stackshot_config_size- size of the stackshot config being passed
+ * Outputs:                    EINVAL if there is a problem with the arguments
+ *                             EFAULT if we failed to copy in the arguments succesfully
+ *                             EPERM if the caller is not privileged
+ *                             ENOTSUP if the caller is passing a version of arguments that is not supported by the kernel
+ *                             (indicates libsyscall:kernel mismatch) or if the caller is requesting unsupported flags
+ *                             ENOENT if the caller is requesting an existing buffer that doesn't exist or if the
+ *                             requested PID isn't found
+ *                             ENOMEM if the kernel is unable to allocate enough memory to serve the request
+ *                             ENOSPC if there isn't enough space in the caller's address space to remap the buffer
+ *                             ESRCH if the target PID isn't found
+ *                             returns KERN_SUCCESS on success 
  */
-       TRAP_DEBUGGER;
-
-       ml_set_interrupts_enabled(istate);
-
-       *bytesTraced = kdp_stack_snapshot_bytes_traced();
-
-       error = kdp_stack_snapshot_geterror();
-       
-       STACKSHOT_SUBSYS_UNLOCK();
-
-    return error;
-
-}
-
 int
-stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval)
+stack_snapshot_with_config(struct proc *p, struct stack_snapshot_with_config_args *uap, __unused int *retval)
 {
-       boolean_t istate;
        int error = 0;
-       unsigned bytesTraced = 0;
+       kern_return_t kr;
 
-#if CONFIG_TELEMETRY
-       if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
-               telemetry_global_ctl(1);
-               *retval = 0;
-               return (0);
-       } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
-               telemetry_global_ctl(0);
-               *retval = 0;
-               return (0);
-       }
-
-       if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE) {
-               error = telemetry_enable_window();
-
-               if (error != KERN_SUCCESS) {
-                       /* We are probably out of memory */
-                       *retval = -1;
-                       return ENOMEM;
-               }
+       if ((error = suser(kauth_cred_get(), &p->p_acflag)))
+                return(error);
 
-               *retval = 0;
-               return (0);
-       } else if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE) {
-               telemetry_disable_window();
-               *retval = 0;
-               return (0);
+       if((void*)uap->stackshot_config == NULL) {
+               return EINVAL;
        }
-#endif
 
-       *retval = -1;
-/* Serialize tracing */        
-       STACKSHOT_SUBSYS_LOCK();
-       
-       if (tracebuf_size <= 0) {
-               error = EINVAL;
-               goto error_exit;
+       switch (uap->stackshot_config_version) {
+               case STACKSHOT_CONFIG_TYPE:
+                       if (uap->stackshot_config_size != sizeof(stackshot_config_t)) {
+                               return EINVAL;
+                       }
+                       stackshot_config_t config;
+                       error = copyin(uap->stackshot_config, &config, sizeof(stackshot_config_t));
+                       if (error != KERN_SUCCESS)
+                       {
+                               return EFAULT;
+                       }
+                       kr = kern_stack_snapshot_internal(uap->stackshot_config_version, &config, sizeof(stackshot_config_t), TRUE);
+                       return stackshot_kern_return_to_bsd_error(kr);
+               default:
+                       return ENOTSUP;
        }
+}
 
 #if CONFIG_TELEMETRY
-       if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
-
-               if (tracebuf_size > SANE_TRACEBUF_SIZE) {
-                       error = EINVAL;
-                       goto error_exit;
-               }
-
-               bytesTraced = tracebuf_size;
-               error = telemetry_gather(tracebuf, &bytesTraced, 
-                                        (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE);
-               if (error == KERN_NO_SPACE) {
-                       error = ENOSPC;
-               }
-
-               *retval = (int)bytesTraced;
-               goto error_exit;
-       }
-
-       if (flags & STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS) {
-
-               if (tracebuf_size > SANE_TRACEBUF_SIZE) {
-                       error = EINVAL;
-                       goto error_exit;
-               }
-
-               bytesTraced = tracebuf_size;
-               error = telemetry_gather_windowed(tracebuf, &bytesTraced);
-               if (error == KERN_NO_SPACE) {
-                       error = ENOSPC;
-               }
-
-               *retval = (int)bytesTraced;
-               goto error_exit;
-       }
-
-       if (flags & STACKSHOT_GET_BOOT_PROFILE) {
-
-               if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) {
-                       error = EINVAL;
-                       goto error_exit;
-               }
-
-               bytesTraced = tracebuf_size;
-               error = bootprofile_gather(tracebuf, &bytesTraced);
-               if (error == KERN_NO_SPACE) {
-                       error = ENOSPC;
-               }
-
-               *retval = (int)bytesTraced;
-               goto error_exit;
-       }
-#endif
-
-       if (tracebuf_size > SANE_TRACEBUF_SIZE) {
-               error = EINVAL;
-               goto error_exit;
-       }
-
-       assert(stackshot_snapbuf == NULL);
-       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&stackshot_snapbuf, tracebuf_size) != KERN_SUCCESS) {
-               error = ENOMEM;
-               goto error_exit;
-       }
+/*
+ * microstackshot:     Catch all system call for microstackshot related operations, including
+ *                     enabling/disabling both global and windowed microstackshots as well
+ *                     as retrieving windowed or global stackshots and the boot profile.
+ * Inputs:             uap->tracebuf - address of the user space destination
+ *                     buffer
+ *                     uap->tracebuf_size - size of the user space trace buffer
+ *                     uap->flags - various flags
+ * Outputs:            EPERM if the caller is not privileged
+ *                     EINVAL if the supplied mss_args is NULL, mss_args.tracebuf is NULL or mss_args.tracebuf_size is not sane
+ *                     ENOMEM if we don't have enough memory to satisfy the request
+ *                     *retval contains the number of bytes traced, if successful
+ *                     and -1 otherwise.
+ */
+int
+microstackshot(struct proc *p, struct microstackshot_args *uap, int32_t *retval)
+{
+       int error = 0;
+       kern_return_t kr;
 
-       if (panic_active()) {
-               error = ENOMEM;
-               goto error_exit;
-       }
+       if ((error = suser(kauth_cred_get(), &p->p_acflag)))
+                return(error);
 
-       istate = ml_set_interrupts_enabled(FALSE);
-/* Preload trace parameters*/  
-       kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, flags, dispatch_offset);
+       kr = stack_microstackshot(uap->tracebuf, uap->tracebuf_size, uap->flags, retval);
+       return stackshot_kern_return_to_bsd_error(kr);
+}
+#endif /* CONFIG_TELEMETRY */
 
-/* Trap to the debugger to obtain a coherent stack snapshot; this populates
- * the trace buffer
+/*
+ * kern_stack_snapshot_with_reason:    Obtains a coherent set of stack traces for specified threads on the sysem,
+ *                                     tracing both kernel and user stacks where available. Allocates a buffer from the
+ *                                     kernel and stores the address of this buffer.
+ *
+ * Inputs:                             reason - the reason for triggering a stackshot (unused at the moment, but in the
+ *                                             future will be saved in the stackshot)
+ * Outputs:                            EINVAL/ENOTSUP if there is a problem with the arguments
+ *                                     EPERM if the caller doesn't pass at least one KERNEL stackshot flag
+ *                                     ENOMEM if the kernel is unable to allocate enough memory to serve the request
+ *                                     ESRCH if the target PID isn't found
+ *                                     returns KERN_SUCCESS on success
  */
+int
+kern_stack_snapshot_with_reason(__unused char *reason)
+{
+       stackshot_config_t config;
+       kern_return_t kr;
+
+       config.sc_pid = -1;
+       config.sc_flags = (STACKSHOT_SAVE_LOADINFO | STACKSHOT_GET_GLOBAL_MEM_STATS | STACKSHOT_SAVE_IN_KERNEL_BUFFER |
+                               STACKSHOT_KCDATA_FORMAT);
+       config.sc_since_timestamp = 0;
+       config.sc_out_buffer_addr = 0;
+       config.sc_out_size_addr = 0;
+
+       kr = kern_stack_snapshot_internal(STACKSHOT_CONFIG_TYPE, &config, sizeof(stackshot_config_t), FALSE);
+       return stackshot_kern_return_to_bsd_error(kr);
+}
 
-       TRAP_DEBUGGER;
-
-       ml_set_interrupts_enabled(istate);
-
-       bytesTraced = kdp_stack_snapshot_bytes_traced();
-                       
-       if (bytesTraced > 0) {
-               if ((error = copyout(stackshot_snapbuf, tracebuf,
-                       ((bytesTraced < tracebuf_size) ?
-                           bytesTraced : tracebuf_size))))
-                       goto error_exit;
-               *retval = bytesTraced;
-       }
-       else {
-               error = ENOENT;
-               goto error_exit;
-       }
+/*
+ * stack_snapshot_from_kernel: Stackshot function for kernel consumers who have their own buffer.
+ *
+ * Inputs:                     pid - the PID to be traced or -1 for the whole system
+ *                             buf - a pointer to the buffer where the stackshot should be written
+ *                             size - the size of the buffer
+ *                             flags - flags to be passed to the stackshot
+ *                             *bytes_traced - a pointer to be filled with the length of the stackshot
+ * Outputs:                    -1 if there is a problem with the arguments
+ *                             the error returned by the stackshot code otherwise
+ */
+int
+stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced)
+{
+       kern_return_t kr;
 
-       error = kdp_stack_snapshot_geterror();
-       if (error == -1) {
-               error = ENOSPC;
-               *retval = -1;
-               goto error_exit;
+       kr = stack_snapshot_from_kernel_internal(pid, buf, size, flags, bytes_traced);
+       if (kr == KERN_FAILURE) {
+               return -1;
        }
 
-error_exit:
-       if (stackshot_snapbuf != NULL)
-               kmem_free(kernel_map, (vm_offset_t) stackshot_snapbuf, tracebuf_size);
-       stackshot_snapbuf = NULL;
-       STACKSHOT_SUBSYS_UNLOCK();
-       return error;
+       return kr;
 }
 
 void
@@ -3114,7 +3886,7 @@ start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map)
        nkdbufs = kdbg_set_nkdbufs(new_nkdbufs);
        kdbg_lock_init();
 
-       kernel_debug_string("start_kern_tracing");
+       kernel_debug_string_simple("start_kern_tracing");
 
        if (0 == kdbg_reinit(TRUE)) {
 
@@ -3169,7 +3941,8 @@ start_kern_tracing_with_typefilter(unsigned int new_nkdbufs,
 
        /* setup the typefiltering */
        if (0 == kdbg_enable_typefilter())
-               setbit(type_filter_bitmap, typefilter & (CSC_MASK >> CSC_OFFSET));
+               setbit(type_filter_bitmap,
+                      typefilter & (KDBG_CSC_MASK >> KDBG_CSC_OFFSET));
 }
 
 void
@@ -3214,7 +3987,7 @@ kdbg_dump_trace_to_file(const char *filename)
        kdbg_readthrmap(0, &number, vp, ctx);
 
        number = nkdbufs*sizeof(kd_buf);
-       kdbg_read(0, &number, vp, ctx);
+       kdbg_read(0, &number, vp, ctx, RAW_VERSION1);
        
        vnode_close(vp, FWRITE, ctx);
 
@@ -3309,7 +4082,7 @@ kdebug_serial_print(
        uint64_t        delta = timestamp - kd_last_timstamp;
        uint64_t        delta_us = delta / NSEC_PER_USEC;
        uint64_t        delta_us_tenth = (delta % NSEC_PER_USEC) / 100;
-       uint32_t        event_id = debugid & DBG_FUNC_MASK;
+       uint32_t        event_id = debugid & KDBG_EVENTID_MASK;
        const char      *command;
        const char      *bra;
        const char      *ket;
@@ -3371,7 +4144,7 @@ kdebug_serial_print(
        /* threadid, cpu and command name */
        if (threadid == (uintptr_t)thread_tid(current_thread()) &&
            current_proc() &&
-           current_proc()->p_comm)
+           current_proc()->p_comm[0])
                command = current_proc()->p_comm;
        else
                command = "-";
index 2513122e697814570d7f1f19fafbddfa665c670f..44c956e9bb63c65639db5a295db0064c393603db 100644 (file)
@@ -63,6 +63,7 @@
 
 #include <mach/mach_types.h>
 #include <kern/kern_types.h>
+#include <kern/waitq.h>
 #include <kern/zalloc.h>
 #include <kern/task.h>
 #include <kern/sched_prim.h>
@@ -123,7 +124,7 @@ typedef struct aio_workq   {
        TAILQ_HEAD(, aio_workq_entry)   aioq_entries;
        int                             aioq_count;
        lck_mtx_t                       aioq_mtx;
-       wait_queue_t                    aioq_waitq;
+       struct waitq                    aioq_waitq;
 } *aio_workq_t;
 
 #define AIO_NUM_WORK_QUEUES 1
@@ -303,7 +304,7 @@ aio_workq_init(aio_workq_t wq)
        TAILQ_INIT(&wq->aioq_entries);
        wq->aioq_count = 0;
        lck_mtx_init(&wq->aioq_mtx, aio_queue_lock_grp, aio_lock_attr);
-       wq->aioq_waitq = wait_queue_alloc(SYNC_POLICY_FIFO);
+       waitq_init(&wq->aioq_waitq, SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
 }
 
 
@@ -1393,7 +1394,8 @@ aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked)
        /* And work queue */
        aio_workq_lock_spin(queue);
        aio_workq_add_entry_locked(queue, entryp);
-       wait_queue_wakeup_one(queue->aioq_waitq, queue, THREAD_AWAKENED, -1);
+       waitq_wakeup64_one(&queue->aioq_waitq, CAST_EVENT64_T(queue),
+                          THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
        aio_workq_unlock(queue);
        
        if (proc_locked == 0) {
@@ -1824,7 +1826,7 @@ aio_get_some_work( void )
 
 nowork:
        /* We will wake up when someone enqueues something */
-       wait_queue_assert_wait(queue->aioq_waitq, queue, THREAD_UNINT, 0);
+       waitq_assert_wait64(&queue->aioq_waitq, CAST_EVENT64_T(queue), THREAD_UNINT, 0);
        aio_workq_unlock(queue);
        thread_block( (thread_continue_t)aio_work_thread );
 
index 8e5b0150bd8775b1dd5aac552cbc792e2df70012..ebda4203d13a9c844ed37d9ff5e21d6fe80bc1d5 100644 (file)
 #include <sys/kern_control.h>
 #include <sys/kauth.h>
 #include <sys/sysctl.h>
+#include <sys/proc_info.h>
 #include <net/if_var.h>
 
 #include <mach/vm_types.h>
 
 #include <kern/thread.h>
 
+struct kctl {
+       TAILQ_ENTRY(kctl)       next;           /* controller chain */
+       kern_ctl_ref            kctlref;
+
+       /* controller information provided when registering */
+       char                    name[MAX_KCTL_NAME];    /* unique identifier */
+       u_int32_t               id;
+       u_int32_t               reg_unit;
+
+       /* misc communication information */
+       u_int32_t               flags;          /* support flags */
+       u_int32_t               recvbufsize;    /* request more than the default buffer size */
+       u_int32_t               sendbufsize;    /* request more than the default buffer size */
+
+       /* Dispatch functions */
+       ctl_connect_func        connect;        /* Make contact */
+       ctl_disconnect_func     disconnect;     /* Break contact */
+       ctl_send_func           send;           /* Send data to nke */
+       ctl_send_list_func      send_list;      /* Send list of packets */
+       ctl_setopt_func         setopt;         /* set kctl configuration */
+       ctl_getopt_func         getopt;         /* get kctl configuration */
+       ctl_rcvd_func           rcvd;           /* Notify nke when client reads data */
+
+       TAILQ_HEAD(, ctl_cb)    kcb_head;
+       u_int32_t               lastunit;
+};
+
+struct ctl_cb {
+       TAILQ_ENTRY(ctl_cb)     next;           /* controller chain */
+       lck_mtx_t               *mtx;
+       struct socket           *so;            /* controlling socket */
+       struct kctl             *kctl;          /* back pointer to controller */
+       void                    *userdata;
+       u_int32_t               unit;
+       u_int32_t               usecount;
+};
+
 #ifndef ROUNDUP64
 #define        ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
 #endif
@@ -84,7 +122,6 @@ static lck_mtx_t     *ctl_mtx;
 /* all the controllers are chained */
 TAILQ_HEAD(kctl_list, kctl)    ctl_head;
 
-
 static int ctl_attach(struct socket *, int, struct proc *);
 static int ctl_detach(struct socket *);
 static int ctl_sofreelastref(struct socket *so);
@@ -103,7 +140,8 @@ static int ctl_usr_rcvd(struct socket *so, int flags);
 static struct kctl *ctl_find_by_name(const char *);
 static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit);
 
-static struct socket *kcb_find_socket(struct kctl *, u_int32_t unit);
+static struct socket *kcb_find_socket(kern_ctl_ref kctlref, u_int32_t unit,
+       u_int32_t *);
 static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit);
 static void ctl_post_msg(u_int32_t event_code, u_int32_t id);
 
@@ -154,7 +192,6 @@ __private_extern__ int kctl_reg_list SYSCTL_HANDLER_ARGS;
 __private_extern__ int kctl_pcblist SYSCTL_HANDLER_ARGS;
 __private_extern__ int kctl_getstat SYSCTL_HANDLER_ARGS;
 
-static int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw));
 
 SYSCTL_NODE(_net_systm, OID_AUTO, kctl,
        CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Kernel control family");
@@ -184,6 +221,19 @@ u_int32_t ctl_debug = 0;
 SYSCTL_INT(_net_systm_kctl, OID_AUTO, debug,
        CTLFLAG_RW | CTLFLAG_LOCKED, &ctl_debug, 0, "");
 
+#define        KCTL_TBL_INC 16
+
+static uintptr_t kctl_tbl_size = 0;
+static u_int32_t kctl_tbl_growing = 0;
+static uintptr_t kctl_tbl_count = 0;
+static struct kctl **kctl_table = NULL;
+static uintptr_t kctl_ref_gencnt = 0;
+
+static void kctl_tbl_grow(void);
+static kern_ctl_ref kctl_make_ref(struct kctl *kctl);
+static void kctl_delete_ref(kern_ctl_ref);
+static struct kctl *kctl_from_ref(kern_ctl_ref);
+
 /*
  * Install the protosw's for the Kernel Control manager.
  */
@@ -192,6 +242,7 @@ kern_control_init(struct domain *dp)
 {
        struct protosw *pr;
        int i;
+       int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw));
 
        VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
        VERIFY(dp == systemdomain);
@@ -307,7 +358,6 @@ ctl_detach(struct socket *so)
        return (0);
 }
 
-
 static int
 ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 {
@@ -419,7 +469,7 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
        soisconnecting(so);
 
        socket_unlock(so, 0);
-       error = (*kctl->connect)(kctl, &sa, &kcb->userdata);
+       error = (*kctl->connect)(kctl->kctlref, &sa, &kcb->userdata);
        socket_lock(so, 0);
        if (error)
                goto end;
@@ -429,7 +479,7 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 end:
        if (error && kctl->disconnect) {
                socket_unlock(so, 0);
-               (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata);
+               (*kctl->disconnect)(kctl->kctlref, kcb->unit, kcb->userdata);
                socket_lock(so, 0);
        }
 done:
@@ -457,7 +507,8 @@ ctl_disconnect(struct socket *so)
 
                if (kctl && kctl->disconnect) {
                        socket_unlock(so, 0);
-                       (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata);
+                       (*kctl->disconnect)(kctl->kctlref, kcb->unit,
+                           kcb->userdata);
                        socket_lock(so, 0);
                }
 
@@ -547,7 +598,7 @@ ctl_usr_rcvd(struct socket *so, int flags)
 
        if (kctl->rcvd) {
                socket_unlock(so, 0);
-               (*kctl->rcvd)(kctl, kcb->unit, kcb->userdata, flags);
+               (*kctl->rcvd)(kctl->kctlref, kcb->unit, kcb->userdata, flags);
                socket_lock(so, 0);
        }
 
@@ -578,7 +629,8 @@ ctl_send(struct socket *so, int flags, struct mbuf *m,
        if (error == 0 && kctl->send) {
                so_tc_update_stats(m, so, m_get_service_class(m));
                socket_unlock(so, 0);
-               error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags);
+               error = (*kctl->send)(kctl->kctlref, kcb->unit, kcb->userdata,
+                   m, flags);
                socket_lock(so, 0);
        } else {
                m_freem(m);
@@ -615,8 +667,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m,
                        so_tc_update_stats(nxt, so, m_get_service_class(nxt));
 
                socket_unlock(so, 0);
-               error = (*kctl->send_list)(kctl, kcb->unit, kcb->userdata, m,
-                       flags);
+               error = (*kctl->send_list)(kctl->kctlref, kcb->unit,
+                   kcb->userdata, m, flags);
                socket_lock(so, 0);
        } else if (error == 0 && kctl->send) {
                while (m != NULL && error == 0) {
@@ -625,8 +677,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m,
                        m->m_nextpkt = NULL;
                        so_tc_update_stats(m, so, m_get_service_class(m));
                        socket_unlock(so, 0);
-                       error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m,
-                               flags);
+                       error = (*kctl->send)(kctl->kctlref, kcb->unit,
+                           kcb->userdata, m, flags);
                        socket_lock(so, 0);
                        m = nextpkt;
                }
@@ -643,27 +695,27 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m,
 }
 
 static errno_t
-ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize,
-       u_int32_t flags)
+ctl_rcvbspace(struct socket *so, u_int32_t datasize,
+       u_int32_t kctlflags, u_int32_t flags)
 {
        struct sockbuf *sb = &so->so_rcv;
        u_int32_t space = sbspace(sb);
        errno_t error;
 
-       if ((kctl->flags & CTL_FLAG_REG_CRIT) == 0) {
+       if ((kctlflags & CTL_FLAG_REG_CRIT) == 0) {
                if ((u_int32_t) space >= datasize)
                        error = 0;
                else
                        error = ENOBUFS;
        } else if ((flags & CTL_DATA_CRIT) == 0) {
-                       /*
-                        * Reserve 25% for critical messages
-                        */
-                       if (space < (sb->sb_hiwat >> 2) ||
-                           space  < datasize)
-                               error = ENOBUFS;
-                       else
-                               error = 0;
+               /*
+                * Reserve 25% for critical messages
+                */
+               if (space < (sb->sb_hiwat >> 2) ||
+                   space  < datasize)
+                       error = ENOBUFS;
+               else
+                       error = 0;
        } else {
                u_int32_t autorcvbuf_max;
 
@@ -688,10 +740,18 @@ ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize,
                                if (sb->sb_hiwat > ctl_autorcvbuf_high)
                                        ctl_autorcvbuf_high = sb->sb_hiwat;
 
+                               /*
+                                * A final check
+                                */
+                               if ((u_int32_t) sbspace(sb) >= datasize) {
+                                       error = 0;
+                               } else {
+                                       error = ENOBUFS;
+                               }
+
                                if (ctl_debug)
-                                       printf("%s - grown to %d\n",
-                                           __func__, sb->sb_hiwat);
-                               error = 0;
+                                       printf("%s - grown to %d error %d\n",
+                                           __func__, sb->sb_hiwat, error);
                        } else {
                                error = ENOBUFS;
                        }
@@ -703,22 +763,20 @@ ctl_rcvbspace(struct kctl *kctl, struct socket *so, u_int32_t datasize,
 }
 
 errno_t
-ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags)
+ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, struct mbuf *m,
+    u_int32_t flags)
 {
        struct socket   *so;
        errno_t         error = 0;
-       struct kctl     *kctl = (struct kctl *)kctlref;
        int             len = m->m_pkthdr.len;
+       u_int32_t       kctlflags;
 
-       if (kctl == NULL)
-               return (EINVAL);
-
-       so = kcb_find_socket(kctl, unit);
-
-       if (so == NULL)
+       so = kcb_find_socket(kctlref, unit, &kctlflags);
+       if (so == NULL) {
                return (EINVAL);
+       }
 
-       if (ctl_rcvbspace(kctl, so, len, flags) != 0) {
+       if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) {
                error = ENOBUFS;
                OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock);
                goto bye;
@@ -768,21 +826,26 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list,
 {
        struct socket *so = NULL;
        errno_t error = 0;
-       struct kctl *kctl = (struct kctl *)kctlref;
        struct mbuf *m, *nextpkt;
        int needwakeup = 0;
        int len;
+       u_int32_t kctlflags;
 
        /*
         * Need to point the beginning of the list in case of early exit
         */
        m = m_list;
 
-       if (kctl == NULL) {
+       /*
+        * kcb_find_socket takes the socket lock with a reference
+        */
+       so = kcb_find_socket(kctlref, unit, &kctlflags);
+       if (so == NULL) {
                error = EINVAL;
                goto done;
        }
-       if (kctl->flags & CTL_FLAG_REG_SOCK_STREAM) {
+
+       if (kctlflags & CTL_FLAG_REG_SOCK_STREAM) {
                error = EOPNOTSUPP;
                goto done;
        }
@@ -790,14 +853,6 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list,
                error = EINVAL;
                goto done;
        }
-       /*
-        * kcb_find_socket takes the socket lock with a reference
-        */
-       so = kcb_find_socket(kctl, unit);
-       if (so == NULL) {
-               error = EINVAL;
-               goto done;
-       }
 
        for (m = m_list; m != NULL; m = nextpkt) {
                nextpkt = m->m_nextpkt;
@@ -811,7 +866,7 @@ ctl_enqueuembuf_list(void *kctlref, u_int32_t unit, struct mbuf *m_list,
                 * so it's not reliable from a data standpoint
                 */
                len = m_space(m);
-               if (ctl_rcvbspace(kctl, so, len, flags) != 0) {
+               if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) {
                        error = ENOBUFS;
                        OSIncrementAtomic64(
                            (SInt64 *)&kctlstat.kcs_enqueue_fullsock);
@@ -879,19 +934,17 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len,
        struct socket   *so;
        struct mbuf     *m;
        errno_t         error = 0;
-       struct kctl     *kctl = (struct kctl *)kctlref;
        unsigned int    num_needed;
        struct mbuf     *n;
        size_t          curlen = 0;
+       u_int32_t       kctlflags;
 
-       if (kctlref == NULL)
-               return (EINVAL);
-
-       so = kcb_find_socket(kctl, unit);
-       if (so == NULL)
+       so = kcb_find_socket(kctlref, unit, &kctlflags);
+       if (so == NULL) {
                return (EINVAL);
+       }
 
-       if (ctl_rcvbspace(kctl, so, len, flags) != 0) {
+       if (ctl_rcvbspace(so, len, kctlflags, flags) != 0) {
                error = ENOBUFS;
                OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_enqueue_fullsock);
                goto bye;
@@ -940,20 +993,50 @@ bye:
        return (error);
 }
 
+errno_t
+ctl_getenqueuepacketcount(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *pcnt)
+{
+       struct socket   *so;
+       u_int32_t cnt;
+       struct mbuf *m1;
+
+       if (pcnt == NULL)
+               return (EINVAL);
+
+       so = kcb_find_socket(kctlref, unit, NULL);
+       if (so == NULL) {
+               return (EINVAL);
+       }
+
+       cnt = 0;
+       m1 = so->so_rcv.sb_mb;
+       while (m1 != NULL) {
+               if (m1->m_type == MT_DATA ||
+                   m1->m_type == MT_HEADER ||
+                   m1->m_type == MT_OOBDATA)
+                       cnt += 1;
+               m1 = m1->m_nextpkt;
+       }
+       *pcnt = cnt;
+
+       socket_unlock(so, 1);
+
+       return (0);
+}
 
 errno_t
 ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space)
 {
-       struct kctl             *kctl = (struct kctl *)kctlref;
        struct socket   *so;
        long avail;
 
-       if (kctlref == NULL || space == NULL)
+       if (space == NULL)
                return (EINVAL);
 
-       so = kcb_find_socket(kctl, unit);
-       if (so == NULL)
+       so = kcb_find_socket(kctlref, unit, NULL);
+       if (so == NULL) {
                return (EINVAL);
+       }
 
        avail = sbspace(&so->so_rcv);
        *space = (avail < 0) ? 0 : avail;
@@ -966,15 +1049,15 @@ errno_t
 ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit,
     u_int32_t *difference)
 {
-       struct kctl             *kctl = (struct kctl *)kctlref;
        struct socket   *so;
 
-       if (kctlref == NULL || difference == NULL)
+       if (difference == NULL)
                return (EINVAL);
 
-       so = kcb_find_socket(kctl, unit);
-       if (so == NULL)
+       so = kcb_find_socket(kctlref, unit, NULL);
+       if (so == NULL) {
                return (EINVAL);
+       }
 
        if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat) {
                *difference = 0;
@@ -1017,16 +1100,13 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
                                if (data == NULL)
                                        return (ENOMEM);
                                error = sooptcopyin(sopt, data,
-                                               sopt->sopt_valsize,
-                                               sopt->sopt_valsize);
+                                   sopt->sopt_valsize, sopt->sopt_valsize);
                        }
                        if (error == 0) {
                                socket_unlock(so, 0);
-                               error = (*kctl->setopt)(kcb->kctl, kcb->unit,
-                                                       kcb->userdata,
-                                                       sopt->sopt_name,
-                                                       data,
-                                                       sopt->sopt_valsize);
+                               error = (*kctl->setopt)(kctl->kctlref,
+                                   kcb->unit, kcb->userdata, sopt->sopt_name,
+                                   data, sopt->sopt_valsize);
                                socket_lock(so, 0);
                        }
                        FREE(data, M_TEMP);
@@ -1050,7 +1130,7 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
                        }
                        len = sopt->sopt_valsize;
                        socket_unlock(so, 0);
-                       error = (*kctl->getopt)(kcb->kctl, kcb->unit,
+                       error = (*kctl->getopt)(kctl->kctlref, kcb->unit,
                                        kcb->userdata, sopt->sopt_name,
                                                data, &len);
                        if (data != NULL && len > sopt->sopt_valsize)
@@ -1126,6 +1206,148 @@ ctl_ioctl(struct socket *so, u_long cmd, caddr_t data,
        return (error);
 }
 
+static void
+kctl_tbl_grow()
+{
+       struct kctl **new_table;
+       uintptr_t new_size;
+
+       lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+
+       while (kctl_tbl_growing) {
+               /* Another thread is allocating */
+               (void) msleep((caddr_t) &kctl_tbl_growing, ctl_mtx,
+                   PSOCK | PCATCH, "kctl_tbl_growing", 0);
+       }
+       /* Another thread grew the table */
+       if (kctl_table != NULL && kctl_tbl_count < kctl_tbl_size)
+               return;
+
+       /* Verify we have a sane size */
+       if (kctl_tbl_size + KCTL_TBL_INC >= UINT16_MAX) {
+               printf("%s kctl_tbl_size %lu too big\n",
+                   __func__, kctl_tbl_size);
+               return;
+       }
+       kctl_tbl_growing = 1;
+
+       new_size = kctl_tbl_size + KCTL_TBL_INC;
+
+       lck_mtx_unlock(ctl_mtx);
+       new_table = _MALLOC(sizeof(struct kctl *) * new_size,
+           M_TEMP, M_WAIT | M_ZERO);
+       lck_mtx_lock(ctl_mtx);
+
+       if (new_table != NULL) {
+               if (kctl_table != NULL) {
+                       bcopy(kctl_table, new_table,
+                           kctl_tbl_size * sizeof(struct kctl *));
+
+                       _FREE(kctl_table, M_TEMP);
+               }
+               kctl_table = new_table;
+               kctl_tbl_size = new_size;
+       }
+
+       kctl_tbl_growing = 0;
+}
+
+#define KCTLREF_INDEX_MASK 0x0000FFFF
+#define KCTLREF_GENCNT_MASK 0xFFFF0000
+#define KCTLREF_GENCNT_SHIFT 16
+
+static kern_ctl_ref
+kctl_make_ref(struct kctl *kctl)
+{
+       uintptr_t i;
+
+       lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+
+       if (kctl_tbl_count >= kctl_tbl_size)
+               kctl_tbl_grow();
+
+       kctl->kctlref = NULL;
+       for (i = 0; i < kctl_tbl_size; i++) {
+               if (kctl_table[i] == NULL) {
+                       uintptr_t ref;
+
+                       /*
+                        * Reference is index plus one
+                        */
+                       kctl_ref_gencnt += 1;
+
+                       /*
+                        * Add generation count as salt to reference to prevent
+                        * use after deregister
+                        */
+                       ref = ((kctl_ref_gencnt << KCTLREF_GENCNT_SHIFT) & 
+                           KCTLREF_GENCNT_MASK) +
+                           ((i + 1) & KCTLREF_INDEX_MASK);
+
+                       kctl->kctlref = (void *)(ref);
+                       kctl_table[i] = kctl;
+                       kctl_tbl_count++;
+                       break;
+               }
+       }
+
+       if (kctl->kctlref == NULL)
+               panic("%s no space in table", __func__);
+
+       if (ctl_debug > 0)
+               printf("%s %p for %p\n",
+                       __func__, kctl->kctlref, kctl);
+
+       return (kctl->kctlref);
+}
+
+static void
+kctl_delete_ref(kern_ctl_ref kctlref)
+{
+       /*
+        * Reference is index plus one
+        */
+       uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1;
+
+       lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+
+       if (i < kctl_tbl_size) {
+               struct kctl *kctl = kctl_table[i];
+
+               if (kctl->kctlref == kctlref) {
+                       kctl_table[i] = NULL;
+                       kctl_tbl_count--;
+               } else {
+                       kctlstat.kcs_bad_kctlref++;
+               }
+       } else {
+               kctlstat.kcs_bad_kctlref++;
+       }
+}
+
+static struct kctl *
+kctl_from_ref(kern_ctl_ref kctlref)
+{
+       /*
+        * Reference is index plus one
+        */
+       uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1;
+       struct kctl *kctl = NULL;
+
+       lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+
+       if (i >= kctl_tbl_size) {
+               kctlstat.kcs_bad_kctlref++;
+               return (NULL);
+       }
+       kctl = kctl_table[i];
+       if (kctl->kctlref != kctlref) {
+               kctlstat.kcs_bad_kctlref++;
+               return (NULL);
+       }
+       return (kctl);
+}
+
 /*
  * Register/unregister a NKE
  */
@@ -1153,6 +1375,12 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref)
 
        lck_mtx_lock(ctl_mtx);
 
+       if (kctl_make_ref(kctl) == NULL) {
+               lck_mtx_unlock(ctl_mtx);
+               FREE(kctl, M_TEMP);
+               return (ENOMEM);
+       }
+
        /*
         * Kernel Control IDs
         *
@@ -1169,6 +1397,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref)
 
                /* Verify the same name isn't already registered */
                if (ctl_find_by_name(userkctl->ctl_name) != NULL) {
+                       kctl_delete_ref(kctl->kctlref);
                        lck_mtx_unlock(ctl_mtx);
                        FREE(kctl, M_TEMP);
                        return (EEXIST);
@@ -1212,6 +1441,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref)
                }
 
                if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit)) {
+                       kctl_delete_ref(kctl->kctlref);
                        lck_mtx_unlock(ctl_mtx);
                        FREE(kctl, M_TEMP);
                        return (EEXIST);
@@ -1263,7 +1493,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref)
 
        lck_mtx_unlock(ctl_mtx);
 
-       *kctlref = kctl;
+       *kctlref = kctl->kctlref;
 
        ctl_post_msg(KEV_CTL_REGISTERED, kctl->id);
        return (0);
@@ -1274,18 +1504,16 @@ ctl_deregister(void *kctlref)
 {
        struct kctl             *kctl;
 
-       if (kctlref == NULL)    /* sanity check */
-               return (EINVAL);
-
        lck_mtx_lock(ctl_mtx);
-       TAILQ_FOREACH(kctl, &ctl_head, next) {
-               if (kctl == (struct kctl *)kctlref)
-                       break;
-       }
-       if (kctl != (struct kctl *)kctlref) {
+       if ((kctl = kctl_from_ref(kctlref)) == NULL) {
+               kctlstat.kcs_bad_kctlref++;
                lck_mtx_unlock(ctl_mtx);
+               if (ctl_debug != 0)
+                       printf("%s invalid kctlref %p\n",
+                               __func__, kctlref);
                return (EINVAL);
        }
+
        if (!TAILQ_EMPTY(&kctl->kcb_head)) {
                lck_mtx_unlock(ctl_mtx);
                return (EBUSY);
@@ -1296,6 +1524,7 @@ ctl_deregister(void *kctlref)
        kctlstat.kcs_reg_count--;
        kctlstat.kcs_gencnt++;
 
+       kctl_delete_ref(kctl->kctlref);
        lck_mtx_unlock(ctl_mtx);
 
        ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id);
@@ -1347,7 +1576,7 @@ ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize)
                        break;
        }
 
-       if (kctl && kctl->name) {
+       if (kctl) {
                if (maxsize > MAX_KCTL_NAME)
                        maxsize = MAX_KCTL_NAME;
                strlcpy(out_name, kctl->name, maxsize);
@@ -1396,48 +1625,66 @@ kcb_find(struct kctl *kctl, u_int32_t unit)
 }
 
 static struct socket *
-kcb_find_socket(struct kctl *kctl, u_int32_t unit)
+kcb_find_socket(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *kctlflags)
 {
        struct socket *so = NULL;
        struct ctl_cb   *kcb;
        void *lr_saved;
+       struct kctl *kctl;
+       int i;
 
        lr_saved = __builtin_return_address(0);
 
        lck_mtx_lock(ctl_mtx);
-       kcb = kcb_find(kctl, unit);
-       if (kcb && kcb->kctl == kctl) {
-               so = kcb->so;
-               if (so) {
-                       kcb->usecount++;
-               }
+       /*
+        * First validate the kctlref
+        */
+       if ((kctl = kctl_from_ref(kctlref)) == NULL) {
+               kctlstat.kcs_bad_kctlref++;
+               lck_mtx_unlock(ctl_mtx);
+               if (ctl_debug != 0)
+                       printf("%s invalid kctlref %p\n",
+                               __func__, kctlref);
+               return (NULL);
        }
-       lck_mtx_unlock(ctl_mtx);
 
-       if (so == NULL) {
+       kcb = kcb_find(kctl, unit);
+       if (kcb == NULL || kcb->kctl != kctl || (so = kcb->so) == NULL) {
+               lck_mtx_unlock(ctl_mtx);
                return (NULL);
        }
+       /*
+        * This prevents the socket from being closed
+        */
+       kcb->usecount++;
+       /*
+        * Respect lock ordering: socket before ctl_mtx
+        */
+       lck_mtx_unlock(ctl_mtx);
 
        socket_lock(so, 1);
+       /*
+        * The socket lock history is more useful if we store
+        * the address of the caller.
+        */
+       i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX;
+       so->lock_lr[i] = lr_saved;
 
        lck_mtx_lock(ctl_mtx);
-       if (kcb->kctl == NULL) {
+
+       if ((kctl = kctl_from_ref(kctlref)) == NULL || kcb->kctl == NULL) {
                lck_mtx_unlock(ctl_mtx);
                socket_unlock(so, 1);
                so = NULL;
                lck_mtx_lock(ctl_mtx);
-       } else {
-               /*
-                * The socket lock history is more useful if we store
-                * the address of the caller.
-                */
-               int i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX;
-
-               so->lock_lr[i] = lr_saved;
+       } else if (kctlflags != NULL) {
+               *kctlflags = kctl->flags;
        }
+
        kcb->usecount--;
        if (kcb->usecount == 0)
                wakeup((event_t)&kcb->usecount);
+
        lck_mtx_unlock(ctl_mtx);
 
        return (so);
@@ -1626,7 +1873,7 @@ kctl_reg_list SYSCTL_HANDLER_ARGS
                xkr->xkr_id = kctl->id;
                xkr->xkr_reg_unit = kctl->reg_unit;
                xkr->xkr_flags = kctl->flags;
-               xkr->xkr_kctlref = (uint64_t)VM_KERNEL_ADDRPERM(kctl);
+               xkr->xkr_kctlref = (uint64_t)(kctl->kctlref);
                xkr->xkr_recvbufsize = kctl->recvbufsize;
                xkr->xkr_sendbufsize = kctl->sendbufsize;
                xkr->xkr_lastunit = kctl->lastunit;
@@ -1808,3 +2055,25 @@ done:
        lck_mtx_unlock(ctl_mtx);
        return (error);
 }
+
+void
+kctl_fill_socketinfo(struct socket *so, struct socket_info *si)
+{
+       struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb;
+       struct kern_ctl_info *kcsi =
+           &si->soi_proto.pri_kern_ctl;
+       struct kctl *kctl = kcb->kctl;
+
+       si->soi_kind = SOCKINFO_KERN_CTL;
+
+       if (kctl == 0)
+               return;
+
+       kcsi->kcsi_id = kctl->id;
+       kcsi->kcsi_reg_unit = kctl->reg_unit;
+       kcsi->kcsi_flags = kctl->flags;
+       kcsi->kcsi_recvbufsize = kctl->recvbufsize;
+       kcsi->kcsi_sendbufsize = kctl->sendbufsize;
+       kcsi->kcsi_unit = kcb->unit;
+       strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME);
+}
index 2bd9de059d460bb414c7a13b14082d4afec284f3..9477378efef55ebf546573a516b513eafc58661d 100644 (file)
@@ -99,7 +99,6 @@ extern int freespace_mb(vnode_t vp);
 kern_return_t thread_getstatus(register thread_t act, int flavor,
        thread_state_t tstate, mach_msg_type_number_t *count);
 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
-extern kern_return_t task_suspend_internal(task_t);
 
 static cpu_type_t process_cpu_type(proc_t proc);
 static cpu_type_t process_cpu_subtype(proc_t proc);
@@ -192,7 +191,7 @@ collectth_state(thread_t th_act, void *tirp)
  * Parameters: core_proc                       Process to dump core [*]
  *                             reserve_mb                      If non-zero, leave filesystem with
  *                                                                     at least this much free space.
- *                             ignore_ulimit           If set, ignore the process's core file ulimit.  
+ *                             coredump_flags  Extra options (ignore rlimit, run fsync)
  *
  * Returns:    0                               Success
  *             EFAULT                          Failed
@@ -203,7 +202,7 @@ collectth_state(thread_t th_act, void *tirp)
  */
 #define        MAX_TSTATE_FLAVORS      10
 int
-coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
+coredump(proc_t core_proc, uint32_t reserve_mb, int coredump_flags)
 {
 /* Begin assumptions that limit us to only the current process */
        vfs_context_t ctx = vfs_context_current();
@@ -265,8 +264,10 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
 
        mapsize = get_vmmap_size(map);
 
-       if ((mapsize >=  core_proc->p_rlimit[RLIMIT_CORE].rlim_cur) && (ignore_ulimit == 0))
+       if (((coredump_flags & COREDUMP_IGNORE_ULIMIT) == 0) &&
+           (mapsize >=  core_proc->p_rlimit[RLIMIT_CORE].rlim_cur))
                return (EFAULT);
+
        (void) task_suspend_internal(task);
 
        MALLOC(alloced_name, char *, MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO);
@@ -325,7 +326,7 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
 
        header_size = command_size + mach_header_sz;
 
-       if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size) != KERN_SUCCESS) {
+       if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
                error = ENOMEM;
                goto out;
        }
@@ -416,6 +417,7 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
                        sc64->maxprot = maxprot;
                        sc64->initprot = prot;
                        sc64->nsects = 0;
+                       sc64->flags = 0;
                } else  {
                        sc = (struct segment_command *) (header + hoffset);
                        sc->cmd = LC_SEGMENT;
@@ -429,6 +431,7 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
                        sc->maxprot = maxprot;
                        sc->initprot = prot;
                        sc->nsects = 0;
+                       sc->flags = 0;
                }
 
                /*
@@ -488,6 +491,9 @@ coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
        error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, header_size, (off_t)0,
                        UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, cred, (int *) 0, core_proc);
        kmem_free(kernel_map, header, header_size);
+
+       if ((coredump_flags & COREDUMP_FULLFSYNC) && error == 0)
+               error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
 out:
        error1 = vnode_close(vp, FWRITE, ctx);
 out2:
index b3c0d357afa8c42c01071e9257352ec9f76fb44d..fbbfb752a590d9f4abfe6d2a5f134edb8076e1f7 100644 (file)
@@ -264,9 +264,7 @@ static void kauth_groups_trimcache(int newsize);
 
 #endif /* CONFIG_EXT_RESOLVER */
 
-static const int kauth_cred_primes[KAUTH_CRED_PRIMES_COUNT] = KAUTH_CRED_PRIMES;
-static int     kauth_cred_primes_index = 0;
-static int     kauth_cred_table_size = 0;
+#define KAUTH_CRED_TABLE_SIZE 97
 
 TAILQ_HEAD(kauth_cred_entry_head, ucred);
 static struct kauth_cred_entry_head * kauth_cred_table_anchor = NULL;
@@ -3364,15 +3362,14 @@ kauth_cred_init(void)
        int             i;
        
        kauth_cred_hash_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/);
-       kauth_cred_table_size = kauth_cred_primes[kauth_cred_primes_index];
 
        /*allocate credential hash table */
        MALLOC(kauth_cred_table_anchor, struct kauth_cred_entry_head *, 
-                       (sizeof(struct kauth_cred_entry_head) * kauth_cred_table_size), 
+                       (sizeof(struct kauth_cred_entry_head) * KAUTH_CRED_TABLE_SIZE),
                        M_KAUTH, M_WAITOK | M_ZERO);
        if (kauth_cred_table_anchor == NULL)
                panic("startup: kauth_cred_init");
-       for (i = 0; i < kauth_cred_table_size; i++) {
+       for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
                TAILQ_INIT(&kauth_cred_table_anchor[i]);
        }
 }
@@ -5095,7 +5092,7 @@ kauth_cred_add(kauth_cred_t new_cred)
        KAUTH_CRED_HASH_LOCK_ASSERT();
 
        hash_key = kauth_cred_get_hashkey(new_cred);
-       hash_key %= kauth_cred_table_size;
+       hash_key %= KAUTH_CRED_TABLE_SIZE;
 
        /* race fix - there is a window where another matching credential 
         * could have been inserted between the time this one was created and we
@@ -5140,7 +5137,7 @@ kauth_cred_remove(kauth_cred_t cred)
        kauth_cred_t    found_cred;
 
        hash_key = kauth_cred_get_hashkey(cred);
-       hash_key %= kauth_cred_table_size;
+       hash_key %= KAUTH_CRED_TABLE_SIZE;
 
        /* Avoid race */
        if (cred->cr_ref < 1)
@@ -5200,7 +5197,7 @@ kauth_cred_find(kauth_cred_t cred)
 #endif
 
        hash_key = kauth_cred_get_hashkey(cred);
-       hash_key %= kauth_cred_table_size;
+       hash_key %= KAUTH_CRED_TABLE_SIZE;
 
        /* Find cred in the credential hash table */
        TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[hash_key], cr_link) {
@@ -5325,7 +5322,7 @@ kauth_cred_hash_print(void)
                
        printf("\n\t kauth credential hash table statistics - current cred count %d \n", kauth_cred_count);
        /* count slot hits, misses, collisions, and max depth */
-       for (i = 0; i < kauth_cred_table_size; i++) {
+       for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
                printf("[%02d] ", i);
                j = 0;
                TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) {
@@ -5510,7 +5507,7 @@ sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1, __unus
                return (EPERM);
 
        /* calculate space needed */
-       for (i = 0; i < kauth_cred_table_size; i++) {
+       for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
                TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) {
                        counter++;
                }
@@ -5531,7 +5528,7 @@ sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1, __unus
        /* fill in creds to send back */
        nextp = cred_listp;
        space = 0;
-       for (i = 0; i < kauth_cred_table_size; i++) {
+       for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
                TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) {
                        nextp->credp = found_cred;
                        nextp->cr_ref = found_cred->cr_ref;
index 66af46613e287e362ced16438f6a762dee3c8be1..1459a472f2c710efb0d27b3158c105594452ff02 100644 (file)
@@ -78,27 +78,27 @@ int cs_force_kill = 0;
 int cs_force_hard = 0;
 int cs_debug = 0;
 #if SECURE_KERNEL
-const int cs_enforcement_enable=1;
-const int cs_library_val_enable=1;
-#else
+const int cs_enforcement_enable = 1;
+const int cs_library_val_enable = 1;
+#else /* !SECURE_KERNEL */
+int cs_enforcement_panic=0;
+
 #if CONFIG_ENFORCE_SIGNED_CODE
-int cs_enforcement_enable=1;
+int cs_enforcement_enable = 1;
 #else
-int cs_enforcement_enable=0;
-#endif /* CONFIG_ENFORCE_SIGNED_CODE */
+int cs_enforcement_enable = 0;
+#endif
 
 #if CONFIG_ENFORCE_LIBRARY_VALIDATION
 int cs_library_val_enable = 1;
 #else
 int cs_library_val_enable = 0;
-#endif /* CONFIG_ENFORCE_LIBRARY_VALIDATION */
+#endif
 
-int cs_enforcement_panic=0;
-#endif /* SECURE_KERNEL */
+#endif /* !SECURE_KERNEL */
 int cs_all_vnodes = 0;
 
 static lck_grp_t *cs_lockgrp;
-static lck_rw_t * SigPUPLock;
 
 SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, "");
@@ -109,7 +109,11 @@ SYSCTL_INT(_vm, OID_AUTO, cs_all_vnodes, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_all_vn
 #if !SECURE_KERNEL
 SYSCTL_INT(_vm, OID_AUTO, cs_enforcement, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_enable, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, cs_enforcement_panic, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_panic, 0, "");
+
+#if !CONFIG_ENFORCE_LIBRARY_VALIDATION
+SYSCTL_INT(_vm, OID_AUTO, cs_library_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_library_val_enable, 0, "");
 #endif
+#endif /* !SECURE_KERNEL */
 
 int panic_on_cs_killed = 0;
 void
@@ -133,10 +137,15 @@ cs_init(void)
        }
 
        PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug));
+
+#if !CONFIG_ENFORCE_LIBRARY_VALIDATION
+       PE_parse_boot_argn("cs_library_val_enable", &cs_library_val_enable,
+                          sizeof (cs_library_val_enable));
 #endif
+#endif /* !SECURE_KERNEL */
+
        lck_grp_attr_t *attr = lck_grp_attr_alloc_init();
        cs_lockgrp = lck_grp_alloc_init("KERNCS", attr);
-       SigPUPLock = lck_rw_alloc_init(cs_lockgrp, NULL);
 }
 
 int
@@ -258,232 +267,6 @@ cs_enforcement(struct proc *p)
        return 0;
 }
 
-static struct {
-       struct cscsr_functions *funcs;
-       vm_map_offset_t csr_map_base;
-       vm_map_size_t csr_map_size;
-       int inuse;
-       int disabled;
-} csr_state;
-
-SYSCTL_INT(_vm, OID_AUTO, sigpup_disable, CTLFLAG_RW | CTLFLAG_LOCKED, &csr_state.disabled, 0, "");
-
-static int
-vnsize(vfs_context_t vfs, vnode_t vp, uint64_t *size)
-{
-       struct vnode_attr va;
-       int error;
-
-       VATTR_INIT(&va);
-       VATTR_WANTED(&va, va_data_size);
-
-       error = vnode_getattr(vp, &va, vfs);
-       if (error)
-               return error;
-       *size = va.va_data_size;
-       return 0;
-}
-
-int
-sigpup_install(user_addr_t argsp)
-{
-       struct sigpup_install_table args;
-       memory_object_control_t control;
-       kern_return_t result;
-       vfs_context_t vfs = NULL;
-       struct vnode_attr va;
-       vnode_t vp = NULL;
-        char *buf = NULL;
-       uint64_t size;
-       size_t len = 0;
-       int error = 0;
-       
-       if (!cs_enforcement_enable || csr_state.funcs == NULL)
-               return ENOTSUP;
-
-       lck_rw_lock_exclusive(SigPUPLock);
-
-       if (kauth_cred_issuser(kauth_cred_get()) == 0) {
-               error = EPERM;
-               goto cleanup;
-       }
-
-       if (cs_debug > 10)
-               printf("sigpup install\n");
-
-       if (csr_state.csr_map_base != 0 || csr_state.inuse) {
-               error = EPERM;
-               goto cleanup;
-       }
-
-       if (USER_ADDR_NULL == argsp) {
-               error = EINVAL;
-               goto cleanup;
-       }
-       if ((error = copyin(argsp, &args, sizeof(args))) != 0)
-               goto cleanup;
-
-       if (cs_debug > 10)
-               printf("sigpup install with args\n");
-
-       MALLOC(buf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
-       if (buf == NULL) {
-               error = ENOMEM;
-               goto cleanup;
-       }
-       if ((error = copyinstr((user_addr_t)args.path, buf, MAXPATHLEN, &len)) != 0)
-               goto cleanup;
-
-       if ((vfs = vfs_context_create(NULL)) == NULL) {
-               error = ENOMEM;
-               goto cleanup;
-       }
-
-       if ((error = vnode_lookup(buf, VNODE_LOOKUP_NOFOLLOW, &vp, vfs)) != 0)
-               goto cleanup;
-
-       if (cs_debug > 10)
-               printf("sigpup found file: %s\n", buf);
-
-       /* make sure vnode is on the process's root volume */
-       if (rootvnode->v_mount != vp->v_mount) {
-               if (cs_debug) printf("sigpup csr no on root volume\n");
-               error = EPERM;
-               goto cleanup;
-       }
-
-       /* make sure vnode is owned by "root" */
-       VATTR_INIT(&va);
-       VATTR_WANTED(&va, va_uid);
-       error = vnode_getattr(vp, &va, vfs);
-       if (error)
-               goto cleanup;
-
-       if (va.va_uid != 0) {
-               if (cs_debug) printf("sigpup: csr file not owned by root\n");
-               error = EPERM;
-               goto cleanup;
-       }
-
-       error = vnsize(vfs, vp, &size);
-       if (error)
-               goto cleanup;
-
-       control = ubc_getobject(vp, 0);
-       if (control == MEMORY_OBJECT_CONTROL_NULL) {
-               error = EINVAL;
-               goto cleanup;
-       }
-
-       csr_state.csr_map_size = mach_vm_round_page(size);
-
-       if (cs_debug > 10)
-               printf("mmap!\n");
-
-       result = vm_map_enter_mem_object_control(kernel_map,
-                                                &csr_state.csr_map_base,
-                                                csr_state.csr_map_size,
-                                                0, VM_FLAGS_ANYWHERE,
-                                                control, 0 /* file offset */,
-                                                0 /* cow */,
-                                                VM_PROT_READ,
-                                                VM_PROT_READ, 
-                                                VM_INHERIT_DEFAULT);
-       if (result != KERN_SUCCESS) {
-               error = EINVAL;
-               goto cleanup;
-       }
-
-       error = csr_state.funcs->csr_validate_header((const uint8_t *)csr_state.csr_map_base,
-           csr_state.csr_map_size);
-       if (error) {
-               if (cs_debug > 10)
-                       printf("sigpup header invalid, dropping mapping");
-               sigpup_drop();
-               goto cleanup;
-       }
-
-       if (cs_debug > 10)
-               printf("table loaded %ld bytes\n", (long)csr_state.csr_map_size);
-
-cleanup:
-       lck_rw_unlock_exclusive(SigPUPLock);
-
-        if (buf)
-                FREE(buf, M_TEMP);
-       if (vp)
-               (void)vnode_put(vp);
-       if (vfs)
-               (void)vfs_context_rele(vfs);
-        
-       if (error)
-               printf("sigpup: load failed with error: %d\n", error);
-
-
-       return error;
-}
-
-int
-sigpup_drop(void)
-{
-
-       if (kauth_cred_issuser(kauth_cred_get()) == 0)
-               return EPERM;
-
-       lck_rw_lock_exclusive(SigPUPLock);
-
-       if (csr_state.csr_map_base == 0 || csr_state.inuse) {
-               printf("failed to unload the sigpup database\n");
-               lck_rw_unlock_exclusive(SigPUPLock);
-               return EINVAL;
-       }
-
-       if (cs_debug > 10)
-               printf("sigpup: unloading\n");
-
-       (void)mach_vm_deallocate(kernel_map,
-           csr_state.csr_map_base, csr_state.csr_map_size);
-
-       csr_state.csr_map_base = 0;
-       csr_state.csr_map_size = 0;
-
-       lck_rw_unlock_exclusive(SigPUPLock);
-
-       return 0;
-}
-
-void   sigpup_attach_vnode(vnode_t); /* XXX */
-
-void
-sigpup_attach_vnode(vnode_t vp)
-{
-       const void *csblob;
-       size_t cslen;
-
-       if (!cs_enforcement_enable || csr_state.funcs == NULL || csr_state.csr_map_base == 0 || csr_state.disabled)
-               return;
-
-       /* if the file is not on the root volumes or already been check, skip */
-       if (vp->v_mount != rootvnode->v_mount || (vp->v_flag & VNOCS))
-               return;
-
-       csblob = csr_state.funcs->csr_find_file_codedirectory(vp, (const uint8_t *)csr_state.csr_map_base,
-           (size_t)csr_state.csr_map_size, &cslen);
-       if (csblob) {
-               ubc_cs_sigpup_add(vp, (vm_address_t)csblob, (vm_size_t)cslen);
-               csr_state.inuse = 1;
-       }
-       vp->v_flag |= VNOCS;
-}
-
-void
-cs_register_cscsr(struct cscsr_functions *funcs)
-{
-       if (csr_state.funcs || funcs->csr_version < CSCSR_VERSION)
-               return;
-       csr_state.funcs = funcs;
-}
-
 /*
  * Library validation functions 
  */
@@ -504,36 +287,32 @@ cs_require_lv(struct proc *p)
 }
 
 /*
- * Function: csblob_get_teamid
+ * Function: csblob_get_platform_binary
  *
- * Description: This function returns a pointer to the team id
-               stored within the codedirectory of the csblob.
-               If the codedirectory predates team-ids, it returns
-               NULL.
-               This does not copy the name but returns a pointer to
-               it within the CD. Subsequently, the CD must be 
-               available when this is used.
+ * Description: This function returns true if the binary is
+ *             in the trust cache.
 */
-const char *
-csblob_get_teamid(struct cs_blob *csblob)
-{
-       const CS_CodeDirectory *cd;
 
-       if ((cd = (const CS_CodeDirectory *)cs_find_blob(
-                                               csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL)
-               return NULL;
-       
-       if (ntohl(cd->version) < CS_SUPPORTSTEAMID)
-               return NULL;
+int
+csblob_get_platform_binary(struct cs_blob *blob)
+{
+    if (blob && blob->csb_platform_binary)
+       return 1;
+    return 0;
+}
 
-       if (ntohl(cd->teamOffset) == 0)
-               return NULL;
-       
-       const char *name = ((const char *)cd) + ntohl(cd->teamOffset);
-       if (cs_debug > 1)
-               printf("found team-id %s in cdblob\n", name);
+/*
+ * Function: csblob_get_flags
+ *
+ * Description: This function returns the flags for a given blob
+*/
 
-       return name;
+unsigned int
+csblob_get_flags(struct cs_blob *blob)
+{
+       if (blob)
+               return blob->csb_flags;
+       return 0;
 }
 
 /*
@@ -542,7 +321,7 @@ csblob_get_teamid(struct cs_blob *csblob)
  * Description: This function returns the cs_blob
  *             for the process p
  */
-static struct cs_blob *
+struct cs_blob *
 csproc_get_blob(struct proc *p)
 {
        if (NULL == p)
@@ -554,6 +333,63 @@ csproc_get_blob(struct proc *p)
        return ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff);
 }
 
+/*
+ * Function: csproc_get_blob
+ *
+ * Description: This function returns the cs_blob
+ *             for the vnode vp
+ */
+struct cs_blob *
+csvnode_get_blob(struct vnode *vp, off_t offset)
+{
+       return ubc_cs_blob_get(vp, -1, offset);
+}
+
+/*
+ * Function: csblob_get_teamid
+ *
+ * Description: This function returns a pointer to the
+ *             team id of csblob
+*/
+const char *
+csblob_get_teamid(struct cs_blob *csblob)
+{
+       return csblob->csb_teamid;
+}
+
+/*
+ * Function: csblob_get_identity
+ *
+ * Description: This function returns a pointer to the
+ *             identity string
+ */
+const char *
+csblob_get_identity(struct cs_blob *csblob)
+{
+       const CS_CodeDirectory *cd;
+
+       cd = (const CS_CodeDirectory *)csblob_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY);
+       if (cd == NULL)
+               return NULL;
+
+       if (cd->identOffset == 0)
+               return NULL;
+
+       return ((const char *)cd) + ntohl(cd->identOffset);
+}
+
+/*
+ * Function: csblob_get_cdhash
+ *
+ * Description: This function returns a pointer to the
+ *             cdhash of csblob (20 byte array)
+ */
+const uint8_t *
+csblob_get_cdhash(struct cs_blob *csblob)
+{
+       return csblob->csb_cdhash;
+}
+
 /*
  * Function: csproc_get_teamid 
  *
@@ -566,8 +402,10 @@ csproc_get_teamid(struct proc *p)
        struct cs_blob *csblob;
 
        csblob = csproc_get_blob(p);
+       if (csblob == NULL)
+           return NULL;
 
-       return (csblob == NULL) ? NULL : csblob->csb_teamid;
+       return csblob_get_teamid(csblob);
 }
 
 /*
@@ -585,8 +423,10 @@ csvnode_get_teamid(struct vnode *vp, off_t offset)
                return NULL;
 
        csblob = ubc_cs_blob_get(vp, -1, offset);
+       if (csblob == NULL)
+           return NULL;
 
-       return (csblob == NULL) ? NULL : csblob->csb_teamid;
+       return csblob_get_teamid(csblob);
 }
 
 /*
@@ -607,6 +447,14 @@ csproc_get_platform_binary(struct proc *p)
        return (csblob == NULL) ? 0 : csblob->csb_platform_binary;
 }
 
+int
+csproc_get_platform_path(struct proc *p)
+{
+       struct cs_blob *csblob = csproc_get_blob(p);
+
+       return (csblob == NULL) ? 0 : csblob->csb_platform_path;
+}
+
 /*
  * Function: csfg_get_platform_binary
  *
@@ -648,6 +496,28 @@ out:
        return platform_binary;
 }
 
+uint8_t *
+csfg_get_cdhash(struct fileglob *fg, uint64_t offset, size_t *cdhash_size)
+{
+       vnode_t vp;
+
+       if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE)
+               return NULL;
+
+       vp = (struct vnode *)fg->fg_data;
+       if (vp == NULL)
+               return NULL;
+
+       struct cs_blob *csblob = NULL;
+       if ((csblob = ubc_cs_blob_get(vp, -1, offset)) == NULL) 
+               return NULL;
+
+       if (cdhash_size)
+               *cdhash_size = CS_CDHASH_LEN;
+
+       return csblob->csb_cdhash;
+}
+
 /*
  * Function: csfg_get_teamid
  *
@@ -694,6 +564,12 @@ cs_entitlement_flags(struct proc *p)
        return (p->p_csflags & CS_ENTITLEMENT_FLAGS);
 }
 
+int
+cs_restricted(struct proc *p)
+{
+       return (p->p_csflags & CS_RESTRICT) ? 1 : 0;
+}
+
 /*
  * Function: csfg_get_path
  *
@@ -717,3 +593,100 @@ csfg_get_path(struct fileglob *fg, char *path, int *len)
           or an error code */
        return vn_getpath(vp, path, len);
 }
+
+/* Retrieve the entitlements blob for a process.
+ * Returns:
+ *   EINVAL    no text vnode associated with the process
+ *   EBADEXEC   invalid code signing data
+ *   0         no error occurred
+ *
+ * On success, out_start and out_length will point to the
+ * entitlements blob if found; or will be set to NULL/zero
+ * if there were no entitlements.
+ */
+
+int
+cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
+{
+       struct cs_blob *csblob;
+
+       *out_start = NULL;
+       *out_length = 0;
+
+       if (NULL == p->p_textvp)
+               return EINVAL;
+
+       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
+               return 0;
+
+       return csblob_get_entitlements(csblob, out_start, out_length);
+}
+
+/* Retrieve the codesign identity for a process.
+ * Returns:
+ *   NULL      an error occured
+ *   string    the cs_identity
+ */
+
+const char *
+cs_identity_get(proc_t p)
+{
+       struct cs_blob *csblob;
+
+       if (NULL == p->p_textvp)
+               return NULL;
+
+       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
+               return NULL;
+
+       return csblob_get_identity(csblob);
+}
+
+
+/* Retrieve the codesign blob for a process.
+ * Returns:
+ *   EINVAL    no text vnode associated with the process
+ *   0         no error occurred
+ *
+ * On success, out_start and out_length will point to the
+ * cms blob if found; or will be set to NULL/zero
+ * if there were no blob.
+ */
+
+int
+cs_blob_get(proc_t p, void **out_start, size_t *out_length)
+{
+       struct cs_blob *csblob;
+
+       *out_start = NULL;
+       *out_length = 0;
+
+       if (NULL == p->p_textvp)
+               return EINVAL;
+
+       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
+               return 0;
+
+       *out_start = (void *)csblob->csb_mem_kaddr;
+       *out_length = csblob->csb_mem_size;
+
+       return 0;
+}
+
+/*
+ * return cshash of a process, cdhash is of size CS_CDHASH_LEN
+ */
+
+uint8_t *
+cs_get_cdhash(struct proc *p)
+{
+       struct cs_blob *csblob;
+
+       if (NULL == p->p_textvp)
+               return NULL;
+
+       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
+               return NULL;
+
+       return csblob->csb_cdhash;
+}
index 7badfcc58d01a718c1b108a01d378a3125b4238d..4b5c26815c9c1035a9300fdcd5d1884bf796398f 100644 (file)
 #include <sys/systm.h>
 #include <sys/types.h>
 
-/* allow everything by default? */
-/* XXX: set this to 0 later: <rdar://problem/16040413> */
-static int csr_allow_all = 1;
-
-/* allow everything if CSR_ALLOW_APPLE_INTERNAL is set */
-static int csr_allow_internal = 1;
-
-/* Current boot-arg policy:
- * rootless=0
- *    csr_allow_all = 1
- * rootless=1
- *    csr_allow_all = 0
- *    csr_allow_internal = 0
- *
- * After <rdar://problem/16239861>:
- * rootless=0
- *    no effect
- * rootless=1
- *    csr_allow_internal = 0
- *
- * Enforcement policy:
- * ===============================
- *            | csr_allow_internal
- *            |   0         1
- * ===============================
- *   csr_   0 | always   customer
- *  allow_    |
- *   all    1 | never    never
- * ===============================
- * NB: "customer" means enforce when
- * CSR_ALLOW_APPLE_INTERNAL not set */
+/* enable enforcement by default */
+static int csr_allow_all = 0;
 
 void
 csr_init(void)
@@ -73,62 +44,6 @@ csr_init(void)
                /* special booter; allow everything */
                csr_allow_all = 1;
        }
-
-       int rootless_boot_arg;
-       if (PE_parse_boot_argn("rootless", &rootless_boot_arg, sizeof(rootless_boot_arg))) {
-               /* XXX: set csr_allow_all to boot arg value for now
-                * (to be removed by <rdar://problem/16239861>) */
-               csr_allow_all = !rootless_boot_arg;
-               /* if rootless=1, do not allow everything when CSR_ALLOW_APPLE_INTERNAL is set */
-               csr_allow_internal &= !rootless_boot_arg;
-       }
-}
-
-int
-csrctl(__unused proc_t p, struct csrctl_args *uap, __unused int32_t *retval)
-{
-       int error = 0;
-
-       if (uap->useraddr == 0)
-               return EINVAL;
-       if (uap->usersize != sizeof(csr_config_t))
-               return EINVAL;
-
-       switch (uap->op) {
-               case CSR_OP_CHECK:
-               {
-                       csr_config_t mask;
-                       error = copyin(uap->useraddr, &mask, sizeof(csr_config_t));
-
-                       if (error)
-                               return error;
-
-                       error = csr_check(mask);
-                       break;
-               }
-
-               case CSR_OP_GET_ACTIVE_CONFIG:
-               case CSR_OP_GET_PENDING_CONFIG: /* fall through */
-               {
-                       csr_config_t config = 0;
-                       if (uap->op == CSR_OP_GET_ACTIVE_CONFIG)
-                               error = csr_get_active_config(&config);
-                       else
-                               error = csr_get_pending_config(&config);
-
-                       if (error)
-                               return error;
-
-                       error = copyout(&config, uap->useraddr, sizeof(csr_config_t));
-                       break;
-               }
-
-               default:
-                       error = EINVAL;
-                       break;
-       }
-
-       return error;
 }
 
 int
@@ -138,28 +53,19 @@ csr_get_active_config(csr_config_t *config)
        if (args->flags & kBootArgsFlagCSRActiveConfig) {
                *config = args->csrActiveConfig & CSR_VALID_FLAGS;
        } else {
-               /* XXX: change to 0 when <rdar://problem/16239698> is in the build */
-               *config = CSR_ALLOW_APPLE_INTERNAL;
+               *config = 0;
        }
 
        return 0;
 }
 
 int
-csr_get_pending_config(csr_config_t *config)
+csr_check(csr_config_t mask)
 {
        boot_args *args = (boot_args *)PE_state.bootArgs;
-       if (args->flags & kBootArgsFlagCSRPendingConfig) {
-               *config = args->csrPendingConfig & CSR_VALID_FLAGS;
-               return 0;
-       } else {
-               return ENOENT;
-       }
-}
+       if ((mask & CSR_ALLOW_DEVICE_CONFIGURATION) && !(args->flags & kBootArgsFlagCSRConfigMode))
+               return EPERM;
 
-int
-csr_check(csr_config_t mask)
-{
        if (csr_allow_all) {
                return 0;
        }
@@ -170,10 +76,6 @@ csr_check(csr_config_t mask)
                return error;
        }
 
-       if (csr_allow_internal && (config & CSR_ALLOW_APPLE_INTERNAL)) {
-               return 0;
-       }
-
        if (mask == 0) {
                /* pass 0 to check if Rootless enforcement is active */
                return -1;
@@ -188,3 +90,60 @@ csr_set_allow_all(int value)
 {
        csr_allow_all = !!value; // force value to 0 or 1
 }
+
+/*
+ * Syscall stubs
+ */
+
+int syscall_csr_check(struct csrctl_args *args);
+int syscall_csr_get_active_config(struct csrctl_args *args);
+
+
+int
+syscall_csr_check(struct csrctl_args *args)
+{
+       csr_config_t mask = 0;
+       int error = 0;
+
+       if (args->useraddr == 0 || args->usersize != sizeof(mask))
+               return EINVAL;
+
+       error = copyin(args->useraddr, &mask, sizeof(mask));
+       if (error)
+               return error;
+
+       return csr_check(mask);
+}
+
+int
+syscall_csr_get_active_config(struct csrctl_args *args)
+{
+       csr_config_t config = 0;
+       int error = 0;
+
+       if (args->useraddr == 0 || args->usersize != sizeof(config))
+               return EINVAL;
+
+       error = csr_get_active_config(&config);
+       if (error)
+               return error;
+
+       return copyout(&config, args->useraddr, sizeof(config));
+}
+
+/*
+ * Syscall entrypoint
+ */
+
+int
+csrctl(__unused proc_t p, struct csrctl_args *args, __unused int32_t *retval)
+{
+       switch (args->op) {
+               case CSR_SYSCALL_CHECK:
+                       return syscall_csr_check(args);
+               case CSR_SYSCALL_GET_ACTIVE_CONFIG:
+                       return syscall_csr_get_active_config(args);
+               default:
+                       return ENOSYS;
+       }
+}
index 54faaeb1396b4c0accb80fbfe4a2168d550d3962..df33970cada3a8e90028e65904fd136bd94dbcc7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/spawn.h>
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
+#include <kern/waitq.h>
 #include <libkern/OSAtomic.h>
 
 #include <sys/ubc_internal.h>
 #include <mach/mach_port.h>
 #include <stdbool.h>
 
-#if CONFIG_PROTECT
-#include <sys/cprotect.h>
-#endif
 #include <hfs/hfs.h>
 
 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
@@ -154,7 +152,7 @@ extern void file_lock_init(void);
 extern kauth_scope_t   kauth_scope_fileop;
 
 /* Conflict wait queue for when selects collide (opaque type) */
-extern struct wait_queue select_conflict_queue;
+extern struct waitq select_conflict_queue;
 
 #define f_flag f_fglob->fg_flag
 #define f_type f_fglob->fg_ops->fo_type
@@ -546,6 +544,11 @@ dup(proc_t p, struct dup_args *uap, int32_t *retval)
        fp_drop(p, old, fp, 1);
        proc_fdunlock(p);
 
+       if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
+               KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
+                   new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
+       }
+
        return (error);
 }
 
@@ -754,7 +757,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
        struct fileproc *fp;
        char *pop;
        struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
-       int i, tmp, error, error2, flg = F_POSIX;
+       int i, tmp, error, error2, flg = 0;
        struct flock fl;
        struct flocktimeout fltimeout;
        struct timespec *timeout = NULL;
@@ -942,12 +945,51 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                }
                goto out;
 
+       case F_SETCONFINED:
+               /*
+                * If this is the only reference to this fglob in the process
+                * and it's already marked as close-on-fork then mark it as
+                * (immutably) "confined" i.e. any fd that points to it will
+                * forever be close-on-fork, and attempts to use an IPC
+                * mechanism to move the descriptor elsewhere will fail.
+                */
+               if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
+                       struct fileglob *fg = fp->f_fglob;
+
+                       lck_mtx_lock_spin(&fg->fg_lock);
+                       if (fg->fg_lflags & FG_CONFINED)
+                               error = 0;
+                       else if (1 != fg->fg_count)
+                               error = EAGAIN; /* go close the dup .. */
+                       else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
+                               fg->fg_lflags |= FG_CONFINED;
+                               error = 0;
+                       } else
+                               error = EBADF;  /* open without O_CLOFORK? */
+                       lck_mtx_unlock(&fg->fg_lock);
+               } else {
+                       /*
+                        * Other subsystems may have built on the immutability
+                        * of FG_CONFINED; clearing it may be tricky.
+                        */
+                       error = EPERM;          /* immutable */
+               }
+               goto out;
+
+       case F_GETCONFINED:
+               *retval = (fp->f_fglob->fg_lflags & FG_CONFINED) ? 1 : 0;
+               error = 0;
+               goto out;
+
        case F_SETLKWTIMEOUT:
        case F_SETLKW:
+       case F_OFD_SETLKWTIMEOUT:
+       case F_OFD_SETLKW:
                flg |= F_WAIT;
                /* Fall into F_SETLK */
 
        case F_SETLK:
+       case F_OFD_SETLK:
                if (fp->f_type != DTYPE_VNODE) {
                        error = EBADF;
                        goto out;
@@ -959,7 +1001,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                proc_fdunlock(p);
 
                /* Copy in the lock structure */
-               if (uap->cmd == F_SETLKWTIMEOUT) {
+               if (F_SETLKWTIMEOUT == uap->cmd ||
+                   F_OFD_SETLKWTIMEOUT == uap->cmd) {
                        error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
                        if (error) {
                                goto outdrop;
@@ -994,45 +1037,90 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        goto outdrop;
                }
 #endif
-               switch (fl.l_type) {
-
-               case F_RDLCK:
-                       if ((fflag & FREAD) == 0) {
-                               (void)vnode_put(vp);
-                               error = EBADF;
-                               goto outdrop;
+               switch (uap->cmd) {
+               case F_OFD_SETLK:
+               case F_OFD_SETLKW:
+               case F_OFD_SETLKWTIMEOUT:
+                       flg |= F_OFD_LOCK;
+                       switch (fl.l_type) {
+                       case F_RDLCK:
+                               if ((fflag & FREAD) == 0) {
+                                       error = EBADF;
+                                       break;
+                               }
+                               error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
+                                   F_SETLK, &fl, flg, &context, timeout);
+                               break;
+                       case F_WRLCK:
+                               if ((fflag & FWRITE) == 0) {
+                                       error = EBADF;
+                                       break;
+                               }
+                               error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
+                                   F_SETLK, &fl, flg, &context, timeout);
+                               break;
+                       case F_UNLCK:
+                               error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
+                                   F_UNLCK, &fl, F_OFD_LOCK, &context,
+                                   timeout);
+                               break;
+                       default:
+                               error = EINVAL;
+                               break;
                        }
-                       // XXX UInt32 unsafe for LP64 kernel
-                       OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
-                       error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout);
-                       (void)vnode_put(vp);
-                       goto outdrop;
+                       if (0 == error &&
+                           (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
+                               struct fileglob *fg = fp->f_fglob;
 
-               case F_WRLCK:
-                       if ((fflag & FWRITE) == 0) {
-                               (void)vnode_put(vp);
-                               error = EBADF;
-                               goto outdrop;
+                               /*
+                                * arrange F_UNLCK on last close (once
+                                * set, FG_HAS_OFDLOCK is immutable)
+                                */
+                               if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
+                                       lck_mtx_lock_spin(&fg->fg_lock);
+                                       fg->fg_lflags |= FG_HAS_OFDLOCK;
+                                       lck_mtx_unlock(&fg->fg_lock);
+                               }
                        }
-                       // XXX UInt32 unsafe for LP64 kernel
-                       OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
-                       error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout);
-                       (void)vnode_put(vp);
-                       goto outdrop;
-
-               case F_UNLCK:
-                       error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
-                               F_POSIX, &context, timeout);
-                       (void)vnode_put(vp);
-                       goto outdrop;
-
+                       break;
                default:
-                       (void)vnode_put(vp);
-                       error = EINVAL;
-                       goto outdrop;
+                       flg |= F_POSIX;
+                       switch (fl.l_type) {
+                       case F_RDLCK:
+                               if ((fflag & FREAD) == 0) {
+                                       error = EBADF;
+                                       break;
+                               }
+                               // XXX UInt32 unsafe for LP64 kernel
+                               OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
+                               error = VNOP_ADVLOCK(vp, (caddr_t)p,
+                                   F_SETLK, &fl, flg, &context, timeout);
+                               break;
+                       case F_WRLCK:
+                               if ((fflag & FWRITE) == 0) {
+                                       error = EBADF;
+                                       break;
+                               }
+                               // XXX UInt32 unsafe for LP64 kernel
+                               OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
+                               error = VNOP_ADVLOCK(vp, (caddr_t)p,
+                                   F_SETLK, &fl, flg, &context, timeout);
+                               break;
+                       case F_UNLCK:
+                               error = VNOP_ADVLOCK(vp, (caddr_t)p,
+                                   F_UNLCK, &fl, F_POSIX, &context, timeout);
+                               break;
+                       default:
+                               error = EINVAL;
+                               break;
+                       }
+                       break;
                }
+               (void) vnode_put(vp);
+               goto outdrop;
 
        case F_GETLK:
+       case F_OFD_GETLK:
                if (fp->f_type != DTYPE_VNODE) {
                        error = EBADF;
                        goto out;
@@ -1088,7 +1176,20 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                            uap->cmd, &fl);
                        if (error == 0)
 #endif
-                       error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context, NULL);
+                       switch (uap->cmd) {
+                       case F_OFD_GETLK:
+                               error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
+                                   F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
+                               break;
+                       case F_OFD_GETLKPID:
+                               error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
+                                   F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
+                               break;
+                       default:
+                               error = VNOP_ADVLOCK(vp, (caddr_t)p,
+                                   uap->cmd, &fl, F_POSIX, &context, NULL);
+                               break;
+                       }
 
                        (void)vnode_put(vp);
 
@@ -1390,23 +1491,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                }
                devBlockSize = vfs_devblocksize(vnode_mount(vp));
                if (uap->cmd == F_LOG2PHYS_EXT) {
-#if defined(__LP64__) 
-                       a_size = l2p_struct.l2p_contigbytes;
-#else
-                       if ((l2p_struct.l2p_contigbytes > SIZE_MAX) || (l2p_struct.l2p_contigbytes < 0)) {
-                               /* size_t is 32-bit on a 32-bit kernel, therefore 
-                                * assigning l2p_contigbytes to a_size may have 
-                                * caused integer overflow.  We, therefore, return 
-                                * an error here instead of calculating incorrect 
-                                * value. 
-                                */
-                               printf ("fcntl: F_LOG2PHYS_EXT: l2p_contigbytes=%lld will overflow, returning error\n", l2p_struct.l2p_contigbytes);
-                               error = EFBIG;
+                       if (l2p_struct.l2p_contigbytes < 0) {
+                               vnode_put(vp);
+                               error = EINVAL;
                                goto outdrop;
-                       } else {
-                               a_size = l2p_struct.l2p_contigbytes;
                        }
-#endif
+
+                       a_size = MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
                } else {
                        a_size = devBlockSize;
                }
@@ -1494,7 +1585,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
        }
 
        case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
-       case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZECACHE
+       case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
+       case F_BARRIERFSYNC:  // fsync + barrier
        case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
        case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
                if (fp->f_type != DTYPE_VNODE) {
@@ -1620,7 +1712,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
        case F_ADDSIGS:
        case F_ADDFILESIGS:
        case F_ADDFILESIGS_FOR_DYLD_SIM:
+       case F_ADDFILESIGS_RETURN:
        {
+               struct cs_blob *blob = NULL;
                struct user_fsignatures fs;
                kern_return_t kr;
                vm_offset_t kernel_blob_addr;
@@ -1663,123 +1757,109 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                        goto outdrop;
                }
 
-               struct cs_blob * existing_blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start);
-               if (existing_blob != NULL)
+               /*
+                * First check if we have something loaded a this offset
+                */
+               blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start);
+               if (blob != NULL)
                {
                        /* If this is for dyld_sim revalidate the blob */
                        if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
-                               error = ubc_cs_blob_revalidate(vp, existing_blob, blob_add_flags);
+                               error = ubc_cs_blob_revalidate(vp, blob, blob_add_flags);
                        }
-                       vnode_put(vp);
-                       goto outdrop;
-               }
-/*
- * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
- * our use cases for the immediate future, but note that at the time of this commit, some
- * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
- *
- * We should consider how we can manage this more effectively; the above means that some
- * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
- * threshold considered ridiculous at the time of this change.
- */
-#define CS_MAX_BLOB_SIZE (10ULL * 1024ULL * 1024ULL)
-               if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
-                       error = E2BIG;
-                       vnode_put(vp);
-                       goto outdrop;
-               }
 
-               kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
-               kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
-               if (kr != KERN_SUCCESS) {
-                       error = ENOMEM;
-                       vnode_put(vp);
-                       goto outdrop;
-               }
+               } else {
+                       /*
+                        * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
+                        * our use cases for the immediate future, but note that at the time of this commit, some
+                        * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
+                        *
+                        * We should consider how we can manage this more effectively; the above means that some
+                        * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
+                        * threshold considered ridiculous at the time of this change.
+                        */
+#define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
+                       if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
+                               error = E2BIG;
+                               vnode_put(vp);
+                               goto outdrop;
+                       }
 
-               if(uap->cmd == F_ADDSIGS) {
-                       error = copyin(fs.fs_blob_start,
-                                      (void *) kernel_blob_addr,
-                                      kernel_blob_size);
-               } else /* F_ADDFILESIGS */ {
-                       int resid;
-
-                       error = vn_rdwr(UIO_READ,
-                                       vp,
-                                       (caddr_t) kernel_blob_addr,
-                                       kernel_blob_size,
-                                        fs.fs_file_start + fs.fs_blob_start,
-                                       UIO_SYSSPACE,
-                                       0,
-                                       kauth_cred_get(),
-                                       &resid,
-                                       p);
-                       if ((error == 0) && resid) {
-                               /* kernel_blob_size rounded to a page size, but signature may be at end of file */
-                               memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
+                       kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
+                       kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
+                       if (kr != KERN_SUCCESS) {
+                               error = ENOMEM;
+                               vnode_put(vp);
+                               goto outdrop;
                        }
-               }
-               
-               if (error) {
-                       ubc_cs_blob_deallocate(kernel_blob_addr,
-                                              kernel_blob_size);
-                       vnode_put(vp);
-                       goto outdrop;
-               }
 
-               error = ubc_cs_blob_add(
-                       vp,
-                       CPU_TYPE_ANY,   /* not for a specific architecture */
-                       fs.fs_file_start,
-                       kernel_blob_addr,
-                       kernel_blob_size,
-                       blob_add_flags);
-               if (error) {
-                       ubc_cs_blob_deallocate(kernel_blob_addr,
+                       if(uap->cmd == F_ADDSIGS) {
+                               error = copyin(fs.fs_blob_start,
+                                              (void *) kernel_blob_addr,
                                               kernel_blob_size);
-               } else {
-                       /* ubc_blob_add() has consumed "kernel_blob_addr" */
+                       } else /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */ {
+                               int resid;
+
+                               error = vn_rdwr(UIO_READ,
+                                               vp,
+                                               (caddr_t) kernel_blob_addr,
+                                               kernel_blob_size,
+                                               fs.fs_file_start + fs.fs_blob_start,
+                                               UIO_SYSSPACE,
+                                               0,
+                                               kauth_cred_get(),
+                                               &resid,
+                                               p);
+                               if ((error == 0) && resid) {
+                                       /* kernel_blob_size rounded to a page size, but signature may be at end of file */
+                                       memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
+                               }
+                       }
+               
+                       if (error) {
+                               ubc_cs_blob_deallocate(kernel_blob_addr,
+                                                      kernel_blob_size);
+                               vnode_put(vp);
+                               goto outdrop;
+                       }
+
+                       blob = NULL;
+                       error = ubc_cs_blob_add(vp,
+                                               CPU_TYPE_ANY,   /* not for a specific architecture */
+                                               fs.fs_file_start,
+                                               kernel_blob_addr,
+                                               kernel_blob_size,
+                                               blob_add_flags,
+                                               &blob);
+                       if (error) {
+                               ubc_cs_blob_deallocate(kernel_blob_addr,
+                                                      kernel_blob_size);
+                       } else {
+                               /* ubc_blob_add() has consumed "kernel_blob_addr" */
 #if CHECK_CS_VALIDATION_BITMAP
-                       ubc_cs_validation_bitmap_allocate( vp );
+                               ubc_cs_validation_bitmap_allocate( vp );
 #endif
+                       }
+               }
+
+               if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
+                       /*
+                        * The first element of the structure is a
+                        * off_t that happen to have the same size for
+                        * all archs. Lets overwrite that.
+                        */
+                       off_t end_offset = 0;
+                       if (blob)
+                               end_offset = blob->csb_end_offset;
+                       error = copyout(&end_offset, argp, sizeof (end_offset));
                }
 
                (void) vnode_put(vp);
                break;
        }
        case F_FINDSIGS: {
-#ifdef SECURE_KERNEL
                error = ENOTSUP;
-#else /* !SECURE_KERNEL */
-               off_t offsetMacho;
-
-               if (fp->f_type != DTYPE_VNODE) {
-                       error = EBADF;
-                       goto out;
-               }
-               vp = (struct vnode *)fp->f_data;
-               proc_fdunlock(p);
-               error = vnode_getwithref(vp);
-               if (error)
-                       goto outdrop;
-
-               error = copyin(argp, &offsetMacho, sizeof(offsetMacho));
-               if (error) {
-                       (void)vnode_put(vp);
-                       goto outdrop;
-               }
-
-#if CONFIG_MACF
-               error = mac_vnode_find_sigs(p, vp, offsetMacho);
-#else
-               error = EPERM;
-#endif
-               if (error) {
-                       (void)vnode_put(vp);
-                       goto outdrop;
-               }
-#endif /* SECURE_KERNEL */
-               break;
+               goto out;
        }
 #if CONFIG_PROTECT
        case F_GETPROTECTIONCLASS: {
@@ -2294,8 +2374,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 
                const CS_SuperBlob *super_blob = (void *)t_blob->csb_mem_kaddr;
                const CS_CodeDirectory *cd = findCodeDirectory(super_blob,
-                                                        (char *) super_blob,
-                                                        (char *) super_blob + t_blob->csb_mem_size);
+                                                        (const char *) super_blob,
+                                                        (const char *) super_blob + t_blob->csb_mem_size);
                if (cd == NULL) {
                        error = ENOENT;
                        goto outdrop;
@@ -2312,13 +2392,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                if (error) 
                        goto outdrop;
 
-               if (sizeof(t_blob->csb_sha1) > args.f_hash_size ||
+               if (sizeof(t_blob->csb_cdhash) > args.f_hash_size ||
                                        buffer_size > args.f_cd_size) {
                        error = ERANGE;
                        goto outdrop;
                }
 
-               error = copyout(t_blob->csb_sha1, args.f_cd_hash, sizeof(t_blob->csb_sha1));
+               error = copyout(t_blob->csb_cdhash, args.f_cd_hash, sizeof(t_blob->csb_cdhash));
                if (error) 
                        goto outdrop;
                error = copyout(cd, args.f_cd_buffer, buffer_size);
@@ -2424,6 +2504,19 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
                goto outdrop;
        }
 
+#if DEBUG || DEVELOPMENT
+       case F_RECYCLE:
+               if (fp->f_type != DTYPE_VNODE) {
+                       error = EBADF;
+                       goto out;
+               }
+               vp = (struct vnode *)fp->f_data;
+               proc_fdunlock(p);
+
+               vnode_recycle(vp);
+               break;
+#endif
+
        default:
                /*
                 * This is an fcntl() that we d not recognize at this level;
@@ -2778,6 +2871,10 @@ close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
                procfdtbl_reservefd(p, fd);
        }
 
+       if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET)
+               KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
+                   fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
+
        error = closef_locked(fp, fp->f_fglob, p);
        if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
                wakeup(&fp->f_flags);
@@ -3841,7 +3938,7 @@ fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
                        (fp->f_flags & ~FP_TYPEMASK);
                nfp->f_iocount = fp->f_iocount;
                nfp->f_fglob = fp->f_fglob;
-               nfp->f_waddr = fp->f_waddr;
+               nfp->f_wset = fp->f_wset;
 
                p->p_fd->fd_ofiles[fd] = nfp;
                (void) fp_drop(p, fd, nfp, 1);
@@ -4743,7 +4840,7 @@ fdcopy(proc_t p, vnode_t uth_cdir)
                 * allowing the table to shrink.
                 */
                i = newfdp->fd_nfiles;
-               while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
+               while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2)
                        i /= 2;
        }
        proc_fdunlock(p);
@@ -4803,6 +4900,7 @@ fdcopy(proc_t p, vnode_t uth_cdir)
 
                for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++)
                        if ((ofp = *fpp) != NULL &&
+                           0 == (ofp->f_fglob->fg_lflags & FG_CONFINED) &&
                            0 == (*flags & (UF_FORKCLOSE|UF_RESERVED))) {
 #if DEBUG
                                if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE)
@@ -5045,11 +5143,13 @@ fileproc_drain(proc_t p, struct fileproc * fp)
                        (*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
                }
                if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
-                       if (wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
-                               panic("bad wait queue for wait_queue_wakeup_all %p", fp->f_waddr);
-               } 
+                       if (waitq_wakeup64_all((struct waitq *)fp->f_wset, NO_EVENT64,
+                                              THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT)
+                               panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->f_wset, fp);
+               }
                if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
-                       if (wait_queue_wakeup_all(&select_conflict_queue, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
+                       if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
+                                              THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT)
                                panic("bad select_conflict_queue");
                }
                p->p_fpdrainwait = 1;
@@ -5204,27 +5304,28 @@ fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
        ipc_port_t fileport;
        mach_port_name_t name = MACH_PORT_NULL;
 
-       err = fp_lookup(p, fd, &fp, 0);
+       proc_fdlock(p);
+       err = fp_lookup(p, fd, &fp, 1);
        if (err != 0) {
-               goto out;
+               goto out_unlock;
        }
 
-       if (!filetype_issendable(fp->f_type)) {
+       if (!file_issendable(p, fp)) {
                err = EINVAL;
-               goto out;
+               goto out_unlock;
        }
 
        if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
-               proc_fdlock(p);
                err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
-               proc_fdunlock(p);
-               goto out;
+               goto out_unlock;
        }
 
        /* Dropped when port is deallocated */
        fg = fp->f_fglob;
        fg_ref(fp);
 
+       proc_fdunlock(p);
+
        /* Allocate and initialize a port */
        fileport = fileport_alloc(fg);
        if (fileport == IPC_PORT_NULL) {
@@ -5254,6 +5355,8 @@ fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
 
        return 0;
 
+out_unlock:
+       proc_fdunlock(p);
 out:
        if (MACH_PORT_VALID(name)) {
                /* Don't care if another thread races us to deallocate the entry */
@@ -5421,10 +5524,8 @@ dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
        switch (error) {
        case ENODEV:
                if (FP_ISGUARDED(wfp, GUARD_DUP)) {
-                       int err = fp_guard_exception(p,
-                           dfd, wfp, kGUARD_EXC_DUP);
                        proc_fdunlock(p);
-                       return (err);
+                       return (EPERM);
                }
 
                /*
@@ -5516,23 +5617,23 @@ fg_drop(struct fileproc * fp)
 
 #if SOCKETS
 /*
- * fg_insertuipc
+ * fg_insertuipc_mark
  *
- * Description:        Insert fileglob onto message queue
+ * Description:        Mark fileglob for insertion onto message queue if needed
+ *             Also takes fileglob reference
  *
- * Parameters: fg                              Fileglob pointer to insert
+ * Parameters: fg      Fileglob pointer to insert
  *
- * Returns:    void
+ * Returns:    true, if the fileglob needs to be inserted onto msg queue
  *
  * Locks:      Takes and drops fg_lock, potentially many times
  */
-void
-fg_insertuipc(struct fileglob * fg)
+boolean_t
+fg_insertuipc_mark(struct fileglob * fg)
 {
-       int insertque = 0;
+       boolean_t insert = FALSE;
 
        lck_mtx_lock_spin(&fg->fg_lock);
-
        while (fg->fg_lflags & FG_RMMSGQ) {
                lck_mtx_convert_spin(&fg->fg_lock);
 
@@ -5544,11 +5645,30 @@ fg_insertuipc(struct fileglob * fg)
        fg->fg_msgcount++;
        if (fg->fg_msgcount == 1) {
                fg->fg_lflags |= FG_INSMSGQ;
-               insertque=1;
+               insert = TRUE;
        }
        lck_mtx_unlock(&fg->fg_lock);
+       return (insert);
+}
 
-       if (insertque) {
+/*
+ * fg_insertuipc
+ *
+ * Description:        Insert marked fileglob onto message queue
+ *
+ * Parameters: fg      Fileglob pointer to insert
+ *
+ * Returns:    void
+ *
+ * Locks:      Takes and drops fg_lock & uipc_lock
+ *             DO NOT call this function with proc_fdlock held as unp_gc()
+ *             can potentially try to acquire proc_fdlock, which can result
+ *             in a deadlock if this function is in unp_gc_wait().
+ */
+void
+fg_insertuipc(struct fileglob * fg)
+{
+       if (fg->fg_lflags & FG_INSMSGQ) {
                lck_mtx_lock_spin(uipc_lock);
                unp_gc_wait();
                LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
@@ -5561,25 +5681,24 @@ fg_insertuipc(struct fileglob * fg)
                }
                lck_mtx_unlock(&fg->fg_lock);
        }
-
 }
 
-
 /*
- * fg_removeuipc
+ * fg_removeuipc_mark
  *
- * Description:        Remove fileglob from message queue
+ * Description:        Mark the fileglob for removal from message queue if needed
+ *             Also releases fileglob message queue reference
  *
- * Parameters: fg                              Fileglob pointer to remove
+ * Parameters: fg      Fileglob pointer to remove
  *
- * Returns:    void
+ * Returns:    true, if the fileglob needs to be removed from msg queue
  *
  * Locks:      Takes and drops fg_lock, potentially many times
  */
-void
-fg_removeuipc(struct fileglob * fg)
+boolean_t
+fg_removeuipc_mark(struct fileglob * fg)
 {
-       int removeque = 0;
+       boolean_t remove = FALSE;
 
        lck_mtx_lock_spin(&fg->fg_lock);
        while (fg->fg_lflags & FG_INSMSGQ) {
@@ -5591,11 +5710,30 @@ fg_removeuipc(struct fileglob * fg)
        fg->fg_msgcount--;
        if (fg->fg_msgcount == 0) {
                fg->fg_lflags |= FG_RMMSGQ;
-               removeque=1;
+               remove = TRUE;
        }
        lck_mtx_unlock(&fg->fg_lock);
+       return (remove);
+}
 
-       if (removeque) {
+/*
+ * fg_removeuipc
+ *
+ * Description:        Remove marked fileglob from message queue
+ *
+ * Parameters: fg      Fileglob pointer to remove
+ *
+ * Returns:    void
+ *
+ * Locks:      Takes and drops fg_lock & uipc_lock
+ *             DO NOT call this function with proc_fdlock held as unp_gc()
+ *             can potentially try to acquire proc_fdlock, which can result
+ *             in a deadlock if this function is in unp_gc_wait().
+ */
+void
+fg_removeuipc(struct fileglob * fg)
+{
+       if (fg->fg_lflags & FG_RMMSGQ) {
                lck_mtx_lock_spin(uipc_lock);
                unp_gc_wait();
                LIST_REMOVE(fg, f_msglist);
@@ -5752,17 +5890,19 @@ fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
  * process is opt-in by file type.
  */
 boolean_t
-filetype_issendable(file_type_t fdtype
+file_issendable(proc_t p, struct fileproc *fp
 {
-       switch (fdtype) {
-               case DTYPE_VNODE:
-               case DTYPE_SOCKET:
-               case DTYPE_PIPE:
-               case DTYPE_PSXSHM:
-                       return TRUE;
-               default:
-                       /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
-                       return FALSE;
+       proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
+
+       switch (fp->f_type) {
+       case DTYPE_VNODE:
+       case DTYPE_SOCKET:
+       case DTYPE_PIPE:
+       case DTYPE_PSXSHM:
+               return (0 == (fp->f_fglob->fg_lflags & FG_CONFINED));
+       default:
+               /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
+               return FALSE;
        }
 }
 
index 708aef4747ad4ddd1bbeef79d7c7fdf43cedaa9f..d80579a350d0045fd778141825b923ba4073f15a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <string.h>
 #include <sys/proc_info.h>
 #include <sys/codesign.h>
+#include <sys/pthread_shims.h>
 
 #include <kern/locks.h>
 #include <kern/clock.h>
 #include <kern/thread_call.h>
 #include <kern/sched_prim.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/zalloc.h>
+#include <kern/kalloc.h>
 #include <kern/assert.h>
 
 #include <libkern/libkern.h>
 
 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
 
-#define        KQ_EVENT        NULL
+#define        KQ_EVENT        NO_EVENT64
 
 static inline void kqlock(struct kqueue *kq);
 static inline void kqunlock(struct kqueue *kq);
@@ -123,7 +125,7 @@ static int kqueue_write(struct fileproc *fp, struct uio *uio,
     int flags, vfs_context_t ctx);
 static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
     vfs_context_t ctx);
-static int kqueue_select(struct fileproc *fp, int which, void *wql,
+static int kqueue_select(struct fileproc *fp, int which, void *wq_link_id,
     vfs_context_t ctx);
 static int kqueue_close(struct fileglob *fg, vfs_context_t ctx);
 static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn,
@@ -141,28 +143,32 @@ static const struct fileops kqueueops = {
        .fo_drain = kqueue_drain,
 };
 
-static int kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
-    int nchanges, user_addr_t eventlist, int nevents, int fd,
-    user_addr_t utimeout, unsigned int flags, int32_t *retval);
-static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp,
-    struct proc *p, int iskev64);
-static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp,
-    struct proc *p, int iskev64);
-char * kevent_description(struct kevent64_s *kevp, char *s, size_t n);
-
-static int kevent_callback(struct kqueue *kq, struct kevent64_s *kevp,
-    void *data);
+static int kevent_internal(struct proc *p, int fd, 
+                          user_addr_t changelist, int nchanges,
+                          user_addr_t eventlist, int nevents, 
+                          user_addr_t data_out, user_size_t *data_available,
+                          unsigned int flags, user_addr_t utimeout,
+                          kqueue_continue_t continuation,
+                          int32_t *retval);
+static int kevent_copyin(user_addr_t *addrp, struct kevent_internal_s *kevp,
+                        struct proc *p, unsigned int flags);
+static int kevent_copyout(struct kevent_internal_s *kevp, user_addr_t *addrp,
+                         struct proc *p, unsigned int flags);
+char * kevent_description(struct kevent_internal_s *kevp, char *s, size_t n);
+
+static int kevent_callback(struct kqueue *kq, struct kevent_internal_s *kevp,
+                          void *data);
 static void kevent_continue(struct kqueue *kq, void *data, int error);
 static void kqueue_scan_continue(void *contp, wait_result_t wait_result);
 static int kqueue_process(struct kqueue *kq, kevent_callback_t callback,
-    void *data, int *countp, struct proc *p);
+                         void *data, int *countp, struct proc *p);
 static int kqueue_begin_processing(struct kqueue *kq);
 static void kqueue_end_processing(struct kqueue *kq);
 static int knote_process(struct knote *kn, kevent_callback_t callback,
-    void *data, struct kqtailq *inprocessp, struct proc *p);
+                        void *data, struct kqtailq *inprocessp, struct proc *p);
 static void knote_put(struct knote *kn);
 static int knote_fdpattach(struct knote *kn, struct filedesc *fdp,
-    struct proc *p);
+                          struct proc *p);
 static void knote_drop(struct knote *kn, struct proc *p);
 static void knote_activate(struct knote *kn, int);
 static void knote_deactivate(struct knote *kn);
@@ -223,7 +229,7 @@ extern struct filterops sig_filtops;
 static int filt_timerattach(struct knote *kn);
 static void filt_timerdetach(struct knote *kn);
 static int filt_timer(struct knote *kn, long hint);
-static void filt_timertouch(struct knote *kn, struct kevent64_s *kev,
+static void filt_timertouch(struct knote *kn, struct kevent_internal_s *kev,
     long type);
 static struct filterops timer_filtops = {
        .f_attach = filt_timerattach,
@@ -260,7 +266,7 @@ extern struct filterops machport_filtops;
 static int filt_userattach(struct knote *kn);
 static void filt_userdetach(struct knote *kn);
 static int filt_user(struct knote *kn, long hint);
-static void filt_usertouch(struct knote *kn, struct kevent64_s *kev,
+static void filt_usertouch(struct knote *kn, struct kevent_internal_s *kev,
     long type);
 static struct filterops user_filtops = {
        .f_attach = filt_userattach,
@@ -358,8 +364,9 @@ kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 {
        if ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) != 0) {
                kn->kn_status |= KN_USEWAIT;
-               wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
-                   &kn->kn_status, THREAD_UNINT, 0);
+               waitq_assert_wait64((struct waitq *)kq->kq_wqs,
+                                   CAST_EVENT64_T(&kn->kn_status),
+                                   THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
                kqunlock(kq);
                thread_block(THREAD_CONTINUE_NULL);
                return (0);
@@ -389,8 +396,10 @@ knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
                }
                if ((kn->kn_status & KN_USEWAIT) != 0) {
                        kn->kn_status &= ~KN_USEWAIT;
-                       wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs,
-                           &kn->kn_status, THREAD_AWAKENED);
+                       waitq_wakeup64_all((struct waitq *)kq->kq_wqs,
+                                          CAST_EVENT64_T(&kn->kn_status),
+                                          THREAD_AWAKENED,
+                                          WAITQ_ALL_PRIORITIES);
                }
        }
        return ((kn->kn_status & KN_DROPPING) == 0);
@@ -425,8 +434,9 @@ kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
                }
        }
        kn->kn_status |= KN_USEWAIT;
-       wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status,
-           THREAD_UNINT, 0);
+       waitq_assert_wait64((struct waitq *)kq->kq_wqs,
+                           CAST_EVENT64_T(&kn->kn_status),
+                           THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
        kqunlock(kq);
        thread_block(THREAD_CONTINUE_NULL);
        return (oktodrop);
@@ -444,8 +454,10 @@ knote_put(struct knote *kn)
        if (--kn->kn_inuse == 0) {
                if ((kn->kn_status & KN_USEWAIT) != 0) {
                        kn->kn_status &= ~KN_USEWAIT;
-                       wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs,
-                           &kn->kn_status, THREAD_AWAKENED);
+                       waitq_wakeup64_all((struct waitq *)kq->kq_wqs,
+                                          CAST_EVENT64_T(&kn->kn_status),
+                                          THREAD_AWAKENED,
+                                          WAITQ_ALL_PRIORITIES);
                }
        }
        kqunlock(kq);
@@ -838,8 +850,10 @@ filt_timerexpire(void *knx, __unused void *spare)
        /* if someone is waiting for timer to pop */
        if (kn->kn_hookid & TIMER_CANCELWAIT) {
                struct kqueue *kq = kn->kn_kq;
-               wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook,
-                   THREAD_AWAKENED);
+               waitq_wakeup64_all((struct waitq *)kq->kq_wqs,
+                                  CAST_EVENT64_T(&kn->kn_hook),
+                                  THREAD_AWAKENED,
+                                  WAITQ_ALL_PRIORITIES);
        }
 
        filt_timerunlock();
@@ -864,8 +878,9 @@ filt_timercancel(struct knote *kn)
                } else {
                        /* we have to wait for the expire routine.  */
                        kn->kn_hookid |= TIMER_CANCELWAIT;
-                       wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
-                           &kn->kn_hook, THREAD_UNINT, 0);
+                       waitq_assert_wait64((struct waitq *)kq->kq_wqs,
+                                           CAST_EVENT64_T(&kn->kn_hook),
+                                           THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
                        filt_timerunlock();
                        thread_block(THREAD_CONTINUE_NULL);
                        filt_timerlock();
@@ -1006,7 +1021,7 @@ filt_timer(struct knote *kn, long hint)
  * pops have gone off (in kn_data).
  */
 static void
-filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type)
+filt_timertouch(struct knote *kn, struct kevent_internal_s *kev, long type)
 {
        int error;
        filt_timerlock();
@@ -1110,7 +1125,7 @@ filt_user(struct knote *kn, __unused long hint)
 }
 
 static void
-filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
+filt_usertouch(struct knote *kn, struct kevent_internal_s *kev, long type)
 {
        uint32_t ffctrl;
        switch (type) {
@@ -1170,10 +1185,9 @@ kqueue_alloc(struct proc *p)
        MALLOC_ZONE(kq, struct kqueue *, sizeof (struct kqueue), M_KQUEUE,
            M_WAITOK);
        if (kq != NULL) {
-               wait_queue_set_t wqs;
+               struct waitq_set *wqs;
 
-               wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO |
-                   SYNC_POLICY_PREPOST);
+               wqs = waitq_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST | SYNC_POLICY_DISABLE_IRQ);
                if (wqs != NULL) {
                        bzero(kq, sizeof (struct kqueue));
                        lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
@@ -1213,11 +1227,17 @@ kqueue_alloc(struct proc *p)
 void
 kqueue_dealloc(struct kqueue *kq)
 {
-       struct proc *p = kq->kq_p;
-       struct filedesc *fdp = p->p_fd;
+       struct proc *p;
+       struct filedesc *fdp;
        struct knote *kn;
        int i;
 
+       if (kq == NULL)
+               return;
+
+       p = kq->kq_p;
+       fdp = p->p_fd;
+
        proc_fdlock(p);
        for (i = 0; i < fdp->fd_knlistsize; i++) {
                kn = SLIST_FIRST(&fdp->fd_knlist[i]);
@@ -1262,11 +1282,11 @@ kqueue_dealloc(struct kqueue *kq)
        proc_fdunlock(p);
 
        /*
-        * before freeing the wait queue set for this kqueue,
-        * make sure it is unlinked from all its containing (select) sets.
+        * waitq_set_free() clears all preposts and also remove the KQ's
+        * waitq set from any select sets to which it may belong.
         */
-       wait_queue_unlink_all((wait_queue_t)kq->kq_wqs);
-       wait_queue_set_free(kq->kq_wqs);
+       waitq_set_free(kq->kq_wqs);
+       kq->kq_wqs = NULL;
        lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
        FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE);
 }
@@ -1311,43 +1331,77 @@ kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval)
 }
 
 static int
-kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p,
-    int iskev64)
+kevent_copyin(user_addr_t *addrp, struct kevent_internal_s *kevp, struct proc *p,
+    unsigned int flags)
 {
        int advance;
        int error;
 
-       if (iskev64) {
-               advance = sizeof (struct kevent64_s);
-               error = copyin(*addrp, (caddr_t)kevp, advance);
-       } else if (IS_64BIT_PROCESS(p)) {
-               struct user64_kevent kev64;
-               bzero(kevp, sizeof (struct kevent64_s));
+       if (flags & KEVENT_FLAG_LEGACY32) {
+               bzero(kevp, sizeof (*kevp));
 
-               advance = sizeof (kev64);
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_kevent kev64;
+
+                       advance = sizeof (kev64);
+                       error = copyin(*addrp, (caddr_t)&kev64, advance);
+                       if (error)
+                               return (error);
+                       kevp->ident = kev64.ident;
+                       kevp->filter = kev64.filter;
+                       kevp->flags = kev64.flags;
+                       kevp->udata = kev64.udata;
+                       kevp->fflags = kev64.fflags;
+                       kevp->data = kev64.data;
+               } else {
+                       struct user32_kevent kev32;
+
+                       advance = sizeof (kev32);
+                       error = copyin(*addrp, (caddr_t)&kev32, advance);
+                       if (error)
+                               return (error);
+                       kevp->ident = (uintptr_t)kev32.ident;
+                       kevp->filter = kev32.filter;
+                       kevp->flags = kev32.flags;
+                       kevp->udata = CAST_USER_ADDR_T(kev32.udata);
+                       kevp->fflags = kev32.fflags;
+                       kevp->data = (intptr_t)kev32.data;
+               }
+       } else if (flags & KEVENT_FLAG_LEGACY64) {
+               struct kevent64_s kev64;
+
+               bzero(kevp, sizeof (*kevp));
+
+               advance = sizeof (struct kevent64_s);
                error = copyin(*addrp, (caddr_t)&kev64, advance);
                if (error)
-                       return (error);
+                       return(error);
                kevp->ident = kev64.ident;
                kevp->filter = kev64.filter;
                kevp->flags = kev64.flags;
+               kevp->udata = kev64.udata;
                kevp->fflags = kev64.fflags;
                kevp->data = kev64.data;
-               kevp->udata = kev64.udata;
+               kevp->ext[0] = kev64.ext[0];
+               kevp->ext[1] = kev64.ext[1];
+               
        } else {
-               struct user32_kevent kev32;
-               bzero(kevp, sizeof (struct kevent64_s));
+               struct kevent_qos_s kevqos;
 
-               advance = sizeof (kev32);
-               error = copyin(*addrp, (caddr_t)&kev32, advance);
+               bzero(kevp, sizeof (*kevp));
+
+               advance = sizeof (struct kevent_qos_s);
+               error = copyin(*addrp, (caddr_t)&kevqos, advance);
                if (error)
-                       return (error);
-               kevp->ident = (uintptr_t)kev32.ident;
-               kevp->filter = kev32.filter;
-               kevp->flags = kev32.flags;
-               kevp->fflags = kev32.fflags;
-               kevp->data = (intptr_t)kev32.data;
-               kevp->udata = CAST_USER_ADDR_T(kev32.udata);
+                       return error;
+               kevp->ident = kevqos.ident;
+               kevp->filter = kevqos.filter;
+               kevp->flags = kevqos.flags;
+               kevp->udata = kevqos.udata;
+               kevp->fflags = kevqos.fflags;
+               kevp->data = kevqos.data;
+               kevp->ext[0] = kevqos.ext[0];
+               kevp->ext[1] = kevqos.ext[1];
        }
        if (!error)
                *addrp += advance;
@@ -1355,46 +1409,85 @@ kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p,
 }
 
 static int
-kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p,
-    int iskev64)
+kevent_copyout(struct kevent_internal_s *kevp, user_addr_t *addrp, struct proc *p,
+    unsigned int flags)
 {
+       user_addr_t addr = *addrp;
        int advance;
        int error;
 
-       if (iskev64) {
-               advance = sizeof (struct kevent64_s);
-               error = copyout((caddr_t)kevp, *addrp, advance);
-       } else if (IS_64BIT_PROCESS(p)) {
-               struct user64_kevent kev64;
+       if (flags & KEVENT_FLAG_LEGACY32) {
+               assert((flags & KEVENT_FLAG_STACK_EVENTS) == 0);
 
-               /*
-                * deal with the special case of a user-supplied
-                * value of (uintptr_t)-1.
-                */
-               kev64.ident = (kevp->ident == (uintptr_t)-1) ?
-                   (uint64_t)-1LL : (uint64_t)kevp->ident;
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_kevent kev64;
+
+                       /*
+                        * deal with the special case of a user-supplied
+                        * value of (uintptr_t)-1.
+                        */
+                       kev64.ident = (kevp->ident == (uintptr_t)-1) ?
+                               (uint64_t)-1LL : (uint64_t)kevp->ident;
+
+                       kev64.filter = kevp->filter;
+                       kev64.flags = kevp->flags;
+                       kev64.fflags = kevp->fflags;
+                       kev64.data = (int64_t) kevp->data;
+                       kev64.udata = kevp->udata;
+                       advance = sizeof (kev64);
+                       error = copyout((caddr_t)&kev64, addr, advance);
+               } else {
+                       struct user32_kevent kev32;
+
+                       kev32.ident = (uint32_t)kevp->ident;
+                       kev32.filter = kevp->filter;
+                       kev32.flags = kevp->flags;
+                       kev32.fflags = kevp->fflags;
+                       kev32.data = (int32_t)kevp->data;
+                       kev32.udata = kevp->udata;
+                       advance = sizeof (kev32);
+                       error = copyout((caddr_t)&kev32, addr, advance);
+               }
+       } else if (flags & KEVENT_FLAG_LEGACY64) {
+               struct kevent64_s kev64;
 
+               advance = sizeof (struct kevent64_s);
+               if (flags & KEVENT_FLAG_STACK_EVENTS) {
+                       addr -= advance;
+               }
+               kev64.ident = kevp->ident;
                kev64.filter = kevp->filter;
                kev64.flags = kevp->flags;
                kev64.fflags = kevp->fflags;
                kev64.data = (int64_t) kevp->data;
                kev64.udata = kevp->udata;
-               advance = sizeof (kev64);
-               error = copyout((caddr_t)&kev64, *addrp, advance);
+               kev64.ext[0] = kevp->ext[0];
+               kev64.ext[1] = kevp->ext[1];
+               error = copyout((caddr_t)&kev64, addr, advance);
        } else {
-               struct user32_kevent kev32;
-
-               kev32.ident = (uint32_t)kevp->ident;
-               kev32.filter = kevp->filter;
-               kev32.flags = kevp->flags;
-               kev32.fflags = kevp->fflags;
-               kev32.data = (int32_t)kevp->data;
-               kev32.udata = kevp->udata;
-               advance = sizeof (kev32);
-               error = copyout((caddr_t)&kev32, *addrp, advance);
+               struct kevent_qos_s kevqos;
+       
+               bzero(&kevqos, sizeof (struct kevent_qos_s));
+               advance = sizeof (struct kevent_qos_s);
+               if (flags & KEVENT_FLAG_STACK_EVENTS) {
+                       addr -= advance;
+               }
+               kevqos.ident = kevp->ident;
+               kevqos.filter = kevp->filter;
+               kevqos.flags = kevp->flags;
+               kevqos.fflags = kevp->fflags;
+               kevqos.data = (int64_t) kevp->data;
+               kevqos.udata = kevp->udata;
+               kevqos.ext[0] = kevp->ext[0];
+               kevqos.ext[1] = kevp->ext[1];
+               error = copyout((caddr_t)&kevqos, addr, advance);
+       }
+       if (!error) {
+               if (flags & KEVENT_FLAG_STACK_EVENTS)
+                       *addrp = addr;
+               else
+                       *addrp = addr + advance;
        }
-       if (!error)
-               *addrp += advance;
        return (error);
 }
 
@@ -1420,7 +1513,8 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error)
        fd = cont_args->fd;
        fp = cont_args->fp;
 
-       fp_drop(p, fd, fp, 0);
+       if (fp != NULL)
+               fp_drop(p, fd, fp, 0);
 
        /* don't restart after signals... */
        if (error == ERESTART)
@@ -1439,49 +1533,147 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error)
 int
 kevent(struct proc *p, struct kevent_args *uap, int32_t *retval)
 {
-       return (kevent_internal(p,
-           0,
-           uap->changelist,
-           uap->nchanges,
-           uap->eventlist,
-           uap->nevents,
-           uap->fd,
-           uap->timeout,
-           0, /* no flags from old kevent() call */
-           retval));
+       unsigned int flags = KEVENT_FLAG_LEGACY32;
+
+       return kevent_internal(p,
+                              uap->fd,
+                              uap->changelist, uap->nchanges,
+                              uap->eventlist, uap->nevents,
+                              0ULL, 0ULL,
+                              flags,
+                              uap->timeout,
+                              kevent_continue,
+                              retval);
 }
 
 int
 kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval)
 {
-       return (kevent_internal(p,
-           1,
-           uap->changelist,
-           uap->nchanges,
-           uap->eventlist,
-           uap->nevents,
-           uap->fd,
-           uap->timeout,
-           uap->flags,
-           retval));
+       unsigned int flags;
+
+       /* restrict to user flags and set legacy64 */
+       flags = uap->flags & KEVENT_FLAG_USER;
+       flags |= KEVENT_FLAG_LEGACY64;
+
+       return kevent_internal(p,
+                              uap->fd,
+                              uap->changelist, uap->nchanges,
+                              uap->eventlist, uap->nevents,
+                              0ULL, 0ULL,
+                              flags,
+                              uap->timeout,
+                              kevent_continue,
+                              retval);
 }
 
+int
+kevent_qos(struct proc *p, struct kevent_qos_args *uap, int32_t *retval)
+{
+       user_size_t usize = 0;
+       user_size_t ssize;
+       int error;
+
+       /* restrict to user flags */
+       uap->flags &= KEVENT_FLAG_USER;
+
+       if (uap->data_available) {
+               if (!IS_64BIT_PROCESS(p)) {
+                       uint32_t csize;
+
+                       error = copyin(uap->data_available, (caddr_t)&csize, sizeof(csize));
+                       if (error)
+                               return error;
+                       usize = csize;
+               } else {
+                       uint64_t csize;
+                       error = copyin(uap->data_available, (caddr_t)&csize, sizeof(csize));
+                       if (error)
+                               return error;
+                       usize = csize;
+               }
+       }
+       ssize = usize;
+
+       error = kevent_internal(p,
+                               uap->fd,
+                               uap->changelist, uap->nchanges,
+                               uap->eventlist, uap->nevents,
+                               uap->data_out, &usize,
+                               uap->flags,
+                               0ULL,
+                               kevent_continue,
+                               retval);
+
+       if (error == 0 && uap->data_available && usize != ssize) {
+               if (!IS_64BIT_PROCESS(p)) {
+                       uint32_t csize = (uint32_t)usize;
+
+                       error = copyout((caddr_t)&csize, uap->data_available, sizeof(csize));
+               } else {
+                       error = copyout((caddr_t)&usize, uap->data_available, sizeof(usize));
+               }
+       }
+       return error;
+}
+
+int 
+kevent_qos_internal(struct proc *p, int fd, 
+                   user_addr_t changelist, int nchanges,
+                   user_addr_t eventlist, int nevents,
+                   user_addr_t data_out, user_size_t *data_available,
+                   unsigned int flags, 
+                   int32_t *retval) 
+{
+       return kevent_internal(p,
+                              fd,
+                              changelist, nchanges,
+                              eventlist, nevents,
+                              data_out, data_available,
+                              flags,
+                              0ULL,
+                              NULL,
+                              retval);
+}
 static int
-kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
-    int nchanges, user_addr_t ueventlist, int nevents, int fd,
-    user_addr_t utimeout, __unused unsigned int flags,
-    int32_t *retval)
+kevent_internal(struct proc *p, 
+               int fd,
+               user_addr_t changelist, int nchanges,
+               user_addr_t ueventlist, int nevents,
+               user_addr_t data_out, user_size_t *data_available,
+               unsigned int flags, 
+               user_addr_t utimeout,
+               kqueue_continue_t continuation,
+               int32_t *retval)
 {
        struct _kevent *cont_args;
        uthread_t ut;
        struct kqueue *kq;
-       struct fileproc *fp;
-       struct kevent64_s kev;
+       struct fileproc *fp = NULL;
+       struct kevent_internal_s kev;
        int error, noutputs;
        struct timeval atv;
 
-       /* convert timeout to absolute - if we have one */
-       if (utimeout != USER_ADDR_NULL) {
+#if 1
+       /* temporarily ignore these fields */
+       (void)data_out;
+       (void)data_available;
+#endif
+
+       /* prepare to deal with stack-wise allocation of out events */
+       if (flags & KEVENT_FLAG_STACK_EVENTS) {
+               int scale = ((flags & KEVENT_FLAG_LEGACY32) ? 
+                            (IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) :
+                                                   sizeof(struct user32_kevent)) :
+                            ((flags & KEVENT_FLAG_LEGACY64) ? sizeof(struct kevent64_s) :
+                                                              sizeof(struct kevent_qos_s)));
+               ueventlist += nevents * scale;
+       }
+
+       /* convert timeout to absolute - if we have one (and not immediate) */
+       if (flags & KEVENT_FLAG_IMMEDIATE) {
+               getmicrouptime(&atv);
+       } else if (utimeout != USER_ADDR_NULL) {
                struct timeval rtv;
                if (IS_64BIT_PROCESS(p)) {
                        struct user64_timespec ts;
@@ -1502,32 +1694,72 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
                getmicrouptime(&atv);
                timevaladd(&atv, &rtv);
        } else {
+               /* wait forever value */
                atv.tv_sec = 0;
                atv.tv_usec = 0;
        }
 
-       /* get a usecount for the kq itself */
-       if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
-               return (error);
+       if (flags & KEVENT_FLAG_WORKQ) {
+               /*
+                * use the private kq associated with the proc workq.
+                * Just being a thread within the process (and not
+                * being the exit/exec thread) is enough to hold a
+                * reference on this special kq.
+                */
+               kq = p->p_wqkqueue;
+               if (kq == NULL) {
+                       struct kqueue *alloc_kq = kqueue_alloc(p);
+                       if (alloc_kq == NULL)
+                               return ENOMEM;
+
+                       proc_fdlock(p);
+                       if (p->p_wqkqueue == NULL) {
+                               /*
+                                * The kq is marked as special -
+                                * with unique interactions with
+                                * the workq for this process.
+                                */
+                               alloc_kq->kq_state |= KQ_WORKQ;
+                               kq = p->p_wqkqueue = alloc_kq;
+                               proc_fdunlock(p);
+                       } else {
+                               proc_fdunlock(p);
+                               kq = p->p_wqkqueue;
+                               kqueue_dealloc(alloc_kq);
+                       }
+               }
+       } else {
+               /* get a usecount for the kq itself */
+               if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
+                       return (error);
+       }
 
        /* each kq should only be used for events of one type */
        kqlock(kq);
-       if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) {
-               if (((iskev64 && (kq->kq_state & KQ_KEV32)) ||
-                       (!iskev64 && (kq->kq_state & KQ_KEV64)))) {
+       if (kq->kq_state & (KQ_KEV32 | KQ_KEV64 | KQ_KEV_QOS)) {
+               if (flags & KEVENT_FLAG_LEGACY32) {
+                       if ((kq->kq_state & KQ_KEV32) == 0) {
+                               error = EINVAL;
+                               kqunlock(kq);
+                               goto errorout;
+                       }
+               } else if (kq->kq_state & KQ_KEV32) {
                        error = EINVAL;
                        kqunlock(kq);
                        goto errorout;
                }
+       } else if (flags & KEVENT_FLAG_LEGACY32) {
+               kq->kq_state |= KQ_KEV32;
        } else {
-               kq->kq_state |= (iskev64 ? KQ_KEV64 : KQ_KEV32);
+               /* JMM - set KQ_KEVQOS when we are ready for exclusive */
+               kq->kq_state |= KQ_KEV64;
        }
        kqunlock(kq);
 
        /* register all the change requests the user provided... */
        noutputs = 0;
        while (nchanges > 0 && error == 0) {
-               error = kevent_copyin(&changelist, &kev, p, iskev64);
+               error = kevent_copyin(&changelist, &kev, p, flags);
                if (error)
                        break;
 
@@ -1536,7 +1768,7 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
                if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) {
                        kev.flags = EV_ERROR;
                        kev.data = error;
-                       error = kevent_copyout(&kev, &ueventlist, p, iskev64);
+                       error = kevent_copyout(&kev, &ueventlist, p, flags);
                        if (error == 0) {
                                nevents--;
                                noutputs++;
@@ -1545,25 +1777,40 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
                nchanges--;
        }
 
-       /* store the continuation/completion data in the uthread */
-       ut = (uthread_t)get_bsdthread_info(current_thread());
-       cont_args = &ut->uu_kevent.ss_kevent;
-       cont_args->fp = fp;
-       cont_args->fd = fd;
-       cont_args->retval = retval;
-       cont_args->eventlist = ueventlist;
-       cont_args->eventcount = nevents;
-       cont_args->eventout = noutputs;
-       cont_args->eventsize = iskev64;
+       /* short-circuit the scan if we only want error events */
+       if (flags & KEVENT_FLAG_ERROR_EVENTS) 
+               nevents = 0;
+
+       if (nevents > 0 && noutputs == 0 && error == 0) {
+
+               /* store the continuation/completion data in the uthread */
+               ut = (uthread_t)get_bsdthread_info(current_thread());
+               cont_args = &ut->uu_kevent.ss_kevent;
+               cont_args->fp = fp;
+               cont_args->fd = fd;
+               cont_args->retval = retval;
+               cont_args->eventlist = ueventlist;
+               cont_args->eventcount = nevents;
+               cont_args->eventout = noutputs;
+               cont_args->eventflags = flags;
 
-       if (nevents > 0 && noutputs == 0 && error == 0)
                error = kqueue_scan(kq, kevent_callback,
-                   kevent_continue, cont_args,
-                   &atv, p);
-       kevent_continue(kq, cont_args, error);
+                                   continuation, cont_args,
+                                   &atv, p);
+
+               noutputs = cont_args->eventout;
+       }
 
+       /* don't restart after signals... */
+       if (error == ERESTART)
+               error = EINTR;
+       else if (error == EWOULDBLOCK)
+               error = 0;
+       if (error == 0)
+               *retval = noutputs;
 errorout:
-       fp_drop(p, fd, fp, 0);
+       if (fp != NULL)
+               fp_drop(p, fd, fp, 0);
        return (error);
 }
 
@@ -1575,23 +1822,20 @@ errorout:
  * caller holds a reference on the kqueue
  */
 static int
-kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp,
+kevent_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp,
     void *data)
 {
        struct _kevent *cont_args;
        int error;
-       int iskev64;
 
        cont_args = (struct _kevent *)data;
        assert(cont_args->eventout < cont_args->eventcount);
 
-       iskev64 = cont_args->eventsize;
-
        /*
         * Copy out the appropriate amount of event data for this user.
         */
        error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(),
-           iskev64);
+                              cont_args->eventflags);
 
        /*
         * If there isn't space for additional events, return
@@ -1605,23 +1849,23 @@ kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp,
 /*
  * kevent_description - format a description of a kevent for diagnostic output
  *
- * called with a 128-byte string buffer
+ * called with a 256-byte string buffer
  */
 
 char *
-kevent_description(struct kevent64_s *kevp, char *s, size_t n)
+kevent_description(struct kevent_internal_s *kevp, char *s, size_t n)
 {
        snprintf(s, n,
            "kevent="
-           "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
+           "{.ident=%#llx, .filter=%d, .flags=%#x, .udata=%#llx, .fflags=%#x, .data=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
            kevp->ident,
            kevp->filter,
            kevp->flags,
+           kevp->udata,
            kevp->fflags,
            kevp->data,
-           kevp->udata,
            kevp->ext[0],
-           kevp->ext[1]);
+           kevp->ext[1] );
 
        return (s);
 }
@@ -1641,7 +1885,7 @@ kevent_description(struct kevent64_s *kevp, char *s, size_t n)
  */
 
 int
-kevent_register(struct kqueue *kq, struct kevent64_s *kev,
+kevent_register(struct kqueue *kq, struct kevent_internal_s *kev,
     __unused struct proc *ctxp)
 {
        struct proc *p = kq->kq_p;
@@ -1649,6 +1893,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev,
        struct filterops *fops;
        struct fileproc *fp = NULL;
        struct knote *kn = NULL;
+       struct klist *list;
        int error = 0;
 
        if (kev->filter < 0) {
@@ -1656,43 +1901,48 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev,
                        return (EINVAL);
                fops = sysfilt_ops[~kev->filter];       /* to 0-base index */
        } else {
-               /*
-                * XXX
-                * filter attach routine is responsible for insuring that
-                * the identifier can be attached to it.
-                */
-               printf("unknown filter: %d\n", kev->filter);
                return (EINVAL);
        }
 
 restart:
        /* this iocount needs to be dropped if it is not registered */
+       list = NULL;
        proc_fdlock(p);
-       if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 1)) != 0) {
-               proc_fdunlock(p);
-               return (error);
-       }
 
+       /* 
+        * determine where to look for the knote
+        */
        if (fops->f_isfd) {
+               if ((error = fp_lookup(p, kev->ident, &fp, 1)) != 0) {
+                       proc_fdunlock(p);
+                       return (error);
+               }
                /* fd-based knotes are linked off the fd table */
                if (kev->ident < (u_int)fdp->fd_knlistsize) {
-                       SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
-                               if (kq == kn->kn_kq &&
-                                   kev->filter == kn->kn_filter)
-                                       break;
+                       list = &fdp->fd_knlist[kev->ident];
                }
-       } else {
+       } else if (fdp->fd_knhashmask != 0) {
                /* hash non-fd knotes here too */
-               if (fdp->fd_knhashmask != 0) {
-                       struct klist *list;
-
-                       list = &fdp->fd_knhash[
-                           KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
-                       SLIST_FOREACH(kn, list, kn_link)
-                               if (kev->ident == kn->kn_id &&
-                                   kq == kn->kn_kq &&
-                                   kev->filter == kn->kn_filter)
-                                       break;
+               list = &fdp->fd_knhash[KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
+       }
+
+       /*
+        * scan the selected list looking for a match
+        */
+       if (list != NULL) {
+               SLIST_FOREACH(kn, list, kn_link) {
+                       if (kq == kn->kn_kq &&
+                           kev->ident == kn->kn_id && 
+                           kev->filter == kn->kn_filter) {
+                               if (kev->flags & EV_UDATA_SPECIFIC) {
+                                       if ((kn->kn_flags & EV_UDATA_SPECIFIC) &&
+                                           kev->udata == kn->kn_udata) {
+                                               break; /* matching udata-specific knote */
+                                       }
+                               } else if ((kn->kn_flags & EV_UDATA_SPECIFIC) == 0) {
+                                       break; /* matching non-udata-specific knote */
+                               }
+                       }
                }
        }
 
@@ -1775,11 +2025,23 @@ restart:
                proc_fdunlock(p);
 
                if (kev->flags & EV_DELETE) {
-                       knote_dequeue(kn);
-                       kn->kn_status |= KN_DISABLED;
-                       if (kqlock2knotedrop(kq, kn)) {
-                               kn->kn_fop->f_detach(kn);
-                               knote_drop(kn, p);
+                       if ((kev->flags & EV_ENABLE) == 0 &&
+                           (kev->flags & EV_DISPATCH2) == EV_DISPATCH2 &&
+                           (kn->kn_status & KN_DISABLED) == KN_DISABLED) {
+                               /* mark for deferred drop */
+                               kn->kn_status |= KN_DEFERDROP;
+                               kqunlock(kq);
+                               error = EINPROGRESS;
+                       } else {
+                               knote_dequeue(kn);
+                               kn->kn_status |= KN_DISABLED;
+                               if (kqlock2knotedrop(kq, kn)) {
+                                       kn->kn_fop->f_detach(kn);
+                                       knote_drop(kn, p);
+                               } else {
+                                       /* pretend we didn't find it */
+                                       error = ENOENT;
+                               }
                        }
                        goto done;
                }
@@ -1788,10 +2050,24 @@ restart:
                if (kev->flags & EV_DISABLE) {
                        knote_dequeue(kn);
                        kn->kn_status |= KN_DISABLED;
-               } else if (kev->flags & EV_ENABLE) {
+
+               } else if ((kev->flags & EV_ENABLE) &&
+                          (kn->kn_status & KN_DISABLED)) {
                        kn->kn_status &= ~KN_DISABLED;
-                       if (kn->kn_status & KN_ACTIVE)
-                               knote_enqueue(kn);
+
+                       /* handle deferred drop */
+                       if (kn->kn_status & KN_DEFERDROP) {
+                               kn->kn_status &= ~KN_DEFERDROP;
+                               kn->kn_flags |= (EV_DELETE | EV_ONESHOT);
+                               knote_activate(kn, 0);
+                               kqunlock(kq);
+                               goto done;
+                       }
+
+                       if (kn->kn_status & KN_ACTIVE) {
+                               /* force re-activate if previously active */
+                               knote_activate(kn, 1);
+                       }
                }
 
                /*
@@ -1828,13 +2104,21 @@ restart:
        /* still have use ref on knote */
 
        /*
-        * If the knote is not marked to always stay enqueued,
-        * invoke the filter routine to see if it should be
-        * enqueued now.
+        * Invoke the filter routine to see if it should be enqueued now.
+        */
+#if 0
+       if (kn->kn_fop->f_event(kn, 0)) {
+#else
+       /*
+        * JMM - temporary workaround until rdar://problem/19986199 
+        * This potentially results in extra wakeups for KN_STAYQUEUED event types,
+        * but waking up only truly active ones (yet trying below to determine
+        * active status, by invoking the filter routine, is having side-effects).
         */
-       if ((kn->kn_status & KN_STAYQUEUED) == 0 && kn->kn_fop->f_event(kn, 0)) {
+       if ((kn->kn_status & KN_STAYQUEUED) || kn->kn_fop->f_event(kn, 0)) {
+#endif
                if (knoteuse2kqlock(kq, kn))
-                       knote_activate(kn, 1);
+                       knote_activate(kn, (kn->kn_status & KN_STAYQUEUED));
                kqunlock(kq);
        } else {
                knote_put(kn);
@@ -1868,7 +2152,7 @@ knote_process(struct knote *kn,
     struct proc *p)
 {
        struct kqueue *kq = kn->kn_kq;
-       struct kevent64_s kev;
+       struct kevent_internal_s kev;
        int touch;
        int result;
        int error;
@@ -1916,6 +2200,9 @@ knote_process(struct knote *kn,
                                        kn->kn_fop->f_touch(kn, &kev,
                                            EVENT_PROCESS);
                                }
+                               if (result && (kn->kn_status & KN_TOUCH))
+                                       kn->kn_fop->f_touch(kn, &kev,
+                                           EVENT_PROCESS);
 
                                /*
                                 * convert back to a kqlock - bail if the knote
@@ -1928,9 +2215,7 @@ knote_process(struct knote *kn,
                                         * if revalidated as alive, make sure
                                         * it's active
                                         */
-                                       if (!(kn->kn_status & KN_ACTIVE)) {
-                                               knote_activate(kn, 0);
-                                       }
+                                       knote_activate(kn, 0);
 
                                        /*
                                         * capture all events that occurred
@@ -1964,7 +2249,9 @@ knote_process(struct knote *kn,
        /*
         * Determine how to dispatch the knote for future event handling.
         * not-fired: just return (do not callout).
-        * One-shot: deactivate it.
+        * One-shot: If dispatch2, enter deferred-delete mode (unless this is
+        *           is the deferred delete event delivery itself).  Otherwise,
+        *           deactivate and drop it.
         * Clear: deactivate and clear the state.
         * Dispatch: don't clear state, just deactivate it and mark it disabled.
         * All others: just leave where they are.
@@ -1974,7 +2261,11 @@ knote_process(struct knote *kn,
                return (EJUSTRETURN);
        } else if ((kn->kn_flags & EV_ONESHOT) != 0) {
                knote_deactivate(kn);
-               if (kqlock2knotedrop(kq, kn)) {
+               if ((kn->kn_flags & (EV_DISPATCH2|EV_DELETE)) == EV_DISPATCH2) {
+                       /* defer dropping non-delete oneshot dispatch2 events */
+                       kn->kn_status |= (KN_DISABLED | KN_DEFERDROP);
+                       kqunlock(kq);
+               } else if (kqlock2knotedrop(kq, kn)) {
                        kn->kn_fop->f_detach(kn);
                        knote_drop(kn, p);
                }
@@ -2027,8 +2318,9 @@ kqueue_begin_processing(struct kqueue *kq)
 
                /* if someone else is processing the queue, wait */
                if (kq->kq_nprocess != 0) {
-                       wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
-                           &kq->kq_nprocess, THREAD_UNINT, 0);
+                       waitq_assert_wait64((struct waitq *)kq->kq_wqs,
+                                           CAST_EVENT64_T(&kq->kq_nprocess),
+                                           THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
                        kq->kq_state |= KQ_PROCWAIT;
                        kqunlock(kq);
                        thread_block(THREAD_CONTINUE_NULL);
@@ -2049,8 +2341,10 @@ kqueue_end_processing(struct kqueue *kq)
        kq->kq_nprocess = 0;
        if (kq->kq_state & KQ_PROCWAIT) {
                kq->kq_state &= ~KQ_PROCWAIT;
-               wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs,
-                   &kq->kq_nprocess, THREAD_AWAKENED);
+               waitq_wakeup64_all((struct waitq *)kq->kq_wqs,
+                                  CAST_EVENT64_T(&kq->kq_nprocess),
+                                  THREAD_AWAKENED,
+                                  WAITQ_ALL_PRIORITIES);
        }
 }
 
@@ -2093,7 +2387,7 @@ kqueue_process(struct kqueue *kq,
         * Clear any pre-posted status from previous runs, so we
         * only detect events that occur during this run.
         */
-       wait_queue_sub_clearrefs(kq->kq_wqs);
+       waitq_set_clear_preposts(kq->kq_wqs);
 
        /*
         * loop through the enqueued knotes, processing each one and
@@ -2148,8 +2442,9 @@ kqueue_scan_continue(void *data, wait_result_t wait_result)
                error = kqueue_process(kq, cont_args->call, cont_args, &count,
                    current_proc());
                if (error == 0 && count == 0) {
-                       wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
-                           KQ_EVENT, THREAD_ABORTSAFE, cont_args->deadline);
+                       waitq_assert_wait64((struct waitq *)kq->kq_wqs,
+                                           KQ_EVENT, THREAD_ABORTSAFE,
+                                           cont_args->deadline);
                        kq->kq_state |= KQ_SLEEP;
                        kqunlock(kq);
                        thread_block_parameter(kqueue_scan_continue, kq);
@@ -2253,9 +2548,10 @@ kqueue_scan(struct kqueue *kq,
                }
 
                /* go ahead and wait */
-               wait_queue_assert_wait_with_leeway((wait_queue_t)kq->kq_wqs,
-                   KQ_EVENT, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL,
-                   deadline, 0);
+               waitq_assert_wait64_leeway((struct waitq *)kq->kq_wqs,
+                                          KQ_EVENT, THREAD_ABORTSAFE,
+                                          TIMEOUT_URGENCY_USER_NORMAL,
+                                          deadline, TIMEOUT_NO_LEEWAY);
                kq->kq_state |= KQ_SLEEP;
                kqunlock(kq);
                wait_result = thread_block_parameter(cont, kq);
@@ -2315,7 +2611,7 @@ kqueue_ioctl(__unused struct fileproc *fp,
 
 /*ARGSUSED*/
 static int
-kqueue_select(struct fileproc *fp, int which, void *wql,
+kqueue_select(struct fileproc *fp, int which, void *wq_link_id,
     __unused vfs_context_t ctx)
 {
        struct kqueue *kq = (struct kqueue *)fp->f_data;
@@ -2337,13 +2633,28 @@ kqueue_select(struct fileproc *fp, int which, void *wql,
         * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
         * (The select() call will unlink them when it ends).
         */
-       if (wql != NULL) {
+       if (wq_link_id != NULL) {
                thread_t cur_act = current_thread();
                struct uthread * ut = get_bsdthread_info(cur_act);
 
                kq->kq_state |= KQ_SEL;
-               wait_queue_link_noalloc((wait_queue_t)kq->kq_wqs, ut->uu_wqset,
-                   (wait_queue_link_t)wql);
+               waitq_link((struct waitq *)kq->kq_wqs, ut->uu_wqset,
+                          WAITQ_SHOULD_LOCK, (uint64_t *)wq_link_id);
+
+               /* always consume the reserved link object */
+               waitq_link_release(*(uint64_t *)wq_link_id);
+               *(uint64_t *)wq_link_id = 0;
+
+               /*
+                * selprocess() is expecting that we send it back the waitq
+                * that was just added to the thread's waitq set. In order
+                * to not change the selrecord() API (which is exported to
+                * kexts), we pass this value back through the
+                * void *wq_link_id pointer we were passed. We need to use
+                * memcpy here because the pointer may not be properly aligned
+                * on 32-bit systems.
+                */
+               memcpy(wq_link_id, (void *)&(kq->kq_wqs), sizeof(void *));
        }
 
        if (kqueue_begin_processing(kq) == -1) {
@@ -2487,20 +2798,28 @@ kqueue_stat(struct kqueue *kq, void *ub, int isstat64, proc_t p)
 
                bzero((void *)sb64, sizeof(*sb64));
                sb64->st_size = kq->kq_count;
-               if (kq->kq_state & KQ_KEV64)
+               if (kq->kq_state & KQ_KEV_QOS)
+                       sb64->st_blksize = sizeof(struct kevent_qos_s);
+               else if (kq->kq_state & KQ_KEV64)
                        sb64->st_blksize = sizeof(struct kevent64_s);
+               else if (IS_64BIT_PROCESS(p))
+                       sb64->st_blksize = sizeof(struct user64_kevent);
                else
-                       sb64->st_blksize = IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : sizeof(struct user32_kevent);
+                       sb64->st_blksize = sizeof(struct user32_kevent);
                sb64->st_mode = S_IFIFO;
        } else {
                struct stat *sb = (struct stat *)ub;
 
                bzero((void *)sb, sizeof(*sb));
                sb->st_size = kq->kq_count;
-               if (kq->kq_state & KQ_KEV64)
+               if (kq->kq_state & KQ_KEV_QOS)
+                       sb->st_blksize = sizeof(struct kevent_qos_s);
+               else if (kq->kq_state & KQ_KEV64)
                        sb->st_blksize = sizeof(struct kevent64_s);
+               else if (IS_64BIT_PROCESS(p))
+                       sb->st_blksize = sizeof(struct user64_kevent);
                else
-                       sb->st_blksize = IS_64BIT_PROCESS(p) ? sizeof(struct user64_kevent) : sizeof(struct user32_kevent);
+                       sb->st_blksize = sizeof(struct user32_kevent);
                sb->st_mode = S_IFIFO;
        }
        kqunlock(kq);
@@ -2513,10 +2832,31 @@ kqueue_stat(struct kqueue *kq, void *ub, int isstat64, proc_t p)
 static void
 kqueue_wakeup(struct kqueue *kq, int closed)
 {
+       wait_result_t res = THREAD_NOT_WAITING;
+
        if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0 || kq->kq_nprocess > 0) {
                kq->kq_state &= ~(KQ_SLEEP | KQ_SEL);
-               wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT,
-                   (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED);
+               res = waitq_wakeup64_all((struct waitq *)kq->kq_wqs, KQ_EVENT,
+                                        (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED,
+                                        WAITQ_ALL_PRIORITIES);
+       }
+
+       /* request additional workq threads if appropriate */
+       if (res == THREAD_NOT_WAITING && (kq->kq_state & KQ_WORKQ) &&
+           pthread_functions != NULL && pthread_functions->workq_reqthreads != NULL) {
+               /*
+                * The special workq kq should be accumulating the counts of
+                * queued sources on a pthread_priority_t basis and we should
+                * be providing that here.  For now, just hard-code a single
+                * entry request at a fixed (default) QOS.
+                */
+               struct workq_reqthreads_req_s request = { 
+                                     .priority = 0x020004ff,  /* legacy event manager */
+                                                         .count = kq->kq_count };
+               thread_t wqthread;
+
+               wqthread = (*pthread_functions->workq_reqthreads)(kq->kq_p, 1, &request);
+               assert(wqthread == THREAD_NULL);
        }
 }
 
@@ -2556,7 +2896,7 @@ knote(struct klist *list, long hint)
 
                        /* if its not going away and triggered */
                        if (knoteuse2kqlock(kq, kn) && result)
-                               knote_activate(kn, 1);
+                               knote_activate(kn, 0);
                        /* lock held again */
                }
                kqunlock(kq);
@@ -2597,12 +2937,12 @@ knote_detach(struct klist *list, struct knote *kn)
  * caller provides the wait queue link structure.
  */
 int
-knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql)
+knote_link_waitq(struct knote *kn, struct waitq *wq, uint64_t *reserved_link)
 {
        struct kqueue *kq = kn->kn_kq;
        kern_return_t kr;
 
-       kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql);
+       kr = waitq_link(wq, kq->kq_wqs, WAITQ_SHOULD_LOCK, reserved_link);
        if (kr == KERN_SUCCESS) {
                knote_markstayqueued(kn);
                return (0);
@@ -2621,12 +2961,12 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t
  * On success, caller is responsible for the link structure
  */
 int
-knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp)
+knote_unlink_waitq(struct knote *kn, struct waitq *wq)
 {
        struct kqueue *kq = kn->kn_kq;
        kern_return_t kr;
 
-       kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp);
+       kr = waitq_unlink(wq, kq->kq_wqs);
        knote_clearstayqueued(kn);
        return ((kr != KERN_SUCCESS) ? EINVAL : 0);
 }
@@ -2753,8 +3093,10 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp)
        proc_fdunlock(p);
 
        if (needswakeup)
-               wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status,
-                   THREAD_AWAKENED);
+               waitq_wakeup64_all((struct waitq *)kq->kq_wqs,
+                                  CAST_EVENT64_T(&kn->kn_status),
+                                  THREAD_AWAKENED,
+                                  WAITQ_ALL_PRIORITIES);
 
        if (kn->kn_fop->f_isfd)
                fp_drop(p, kn->kn_id, kn->kn_fp, 0);
@@ -2764,17 +3106,19 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp)
 
 /* called with kqueue lock held */
 static void
-knote_activate(struct knote *kn, int propagate)
+knote_activate(struct knote *kn, int force)
 {
        struct kqueue *kq = kn->kn_kq;
 
+       if (!force && (kn->kn_status & KN_ACTIVE))
+               return;
+
        kn->kn_status |= KN_ACTIVE;
        knote_enqueue(kn);
        kqueue_wakeup(kq, 0);
 
-       /* this is a real event: wake up the parent kq, too */
-       if (propagate)
-               KNOTE(&kq->kq_sel.si_note, 0);
+       /* wake up the parent kq, too */
+       KNOTE(&kq->kq_sel.si_note, 0);
 }
 
 /* called with kqueue lock held */
@@ -3495,15 +3839,17 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
        st = &kinfo->kq_stat;
 
        st->vst_size = kq->kq_count;
-       if (kq->kq_state & KQ_KEV64)
+       if (kq->kq_state & KQ_KEV_QOS)
+               st->vst_blksize = sizeof(struct kevent_qos_s);
+       else if (kq->kq_state & KQ_KEV64)
                st->vst_blksize = sizeof(struct kevent64_s);
        else
                st->vst_blksize = sizeof(struct kevent);
        st->vst_mode = S_IFIFO;
-       if (kq->kq_state & KQ_SEL)
-               kinfo->kq_state |=  PROC_KQUEUE_SELECT;
-       if (kq->kq_state & KQ_SLEEP)
-               kinfo->kq_state |= PROC_KQUEUE_SLEEP;
+
+       /* flags exported to libproc as PROC_KQUEUE_* (sys/proc_info.h) */
+#define PROC_KQUEUE_MASK (KQ_SEL|KQ_SLEEP|KQ_KEV32|KQ_KEV64|KQ_KEV_QOS)
+       kinfo->kq_state = kq->kq_state & PROC_KQUEUE_MASK;
 
        return (0);
 }
@@ -3526,3 +3872,95 @@ knote_clearstayqueued(struct knote *kn)
        knote_dequeue(kn);
        kqunlock(kn->kn_kq);
 }
+
+static unsigned long
+kevent_extinfo_emit(struct kqueue *kq, struct knote *kn, struct kevent_extinfo *buf,
+               unsigned long buflen, unsigned long nknotes)
+{
+       struct kevent_qos_s kevqos;
+       struct kevent_internal_s *kevp;
+       for (; kn; kn = SLIST_NEXT(kn, kn_link)) {
+               if (kq == kn->kn_kq) {
+                       if (nknotes < buflen) {
+                               struct kevent_extinfo *info = &buf[nknotes];
+
+                               kqlock(kq);
+                               bzero(&kevqos, sizeof(kevqos));
+                               kevp = &(kn->kn_kevent);
+
+                               kevqos.ident = kevp->ident;
+                               kevqos.filter = kevp->filter;
+                               kevqos.flags = kevp->flags;
+                               kevqos.fflags = kevp->fflags;
+                               kevqos.data = (int64_t) kevp->data;
+                               kevqos.udata = kevp->udata;
+                               kevqos.ext[0] = kevp->ext[0];
+                               kevqos.ext[1] = kevp->ext[1];
+
+                               memcpy(&info->kqext_kev, &kevqos, sizeof(info->kqext_kev));
+                               info->kqext_sdata = kn->kn_sdata;
+
+                               /* status flags exported to userspace/libproc */
+#define KQEXT_STATUS_MASK (KN_ACTIVE|KN_QUEUED|KN_DISABLED|KN_STAYQUEUED)
+                               info->kqext_status = kn->kn_status & KQEXT_STATUS_MASK;
+                               info->kqext_sfflags = kn->kn_sfflags;
+
+                               kqunlock(kq);
+                       }
+
+                       /* we return total number of knotes, which may be more than requested */
+                       nknotes++;
+               }
+       }
+
+       return nknotes;
+}
+
+int
+pid_kqueue_extinfo(proc_t p, struct kqueue *kq, user_addr_t ubuf,
+               uint32_t bufsize, int32_t *retval)
+{
+       struct knote *kn;
+       int i;
+       int err = 0;
+       struct filedesc *fdp = p->p_fd;
+       unsigned long nknotes = 0;
+       unsigned long buflen = bufsize / sizeof(struct kevent_extinfo);
+       struct kevent_extinfo *kqext = NULL;
+
+       kqext = kalloc(buflen * sizeof(struct kevent_extinfo));
+       if (kqext == NULL) {
+               err = ENOMEM;
+               goto out;
+       }
+       bzero(kqext, buflen * sizeof(struct kevent_extinfo));
+
+       proc_fdlock(p);
+
+       for (i = 0; i < fdp->fd_knlistsize; i++) {
+               kn = SLIST_FIRST(&fdp->fd_knlist[i]);
+               nknotes = kevent_extinfo_emit(kq, kn, kqext, buflen, nknotes);
+       }
+
+       if (fdp->fd_knhashmask != 0) {
+               for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
+                       kn = SLIST_FIRST(&fdp->fd_knhash[i]);
+                       nknotes = kevent_extinfo_emit(kq, kn, kqext, buflen, nknotes);
+               }
+       }
+
+       proc_fdunlock(p);
+
+       assert(bufsize >= sizeof(struct kevent_extinfo) * min(buflen, nknotes));
+       err = copyout(kqext, ubuf, sizeof(struct kevent_extinfo) * min(buflen, nknotes));
+
+ out:
+       if (kqext) {
+               kfree(kqext, buflen * sizeof(struct kevent_extinfo));
+               kqext = NULL;
+       }
+
+       if (!err)
+               *retval = nknotes;
+       return err;
+}
index fc270ae216c05898f2f5c0350b4ed5be14d41b60..e2e7d1526403588d395587c067f781de55fe3e5b 100644 (file)
@@ -172,9 +172,8 @@ static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
 #endif
 
 /* support for child creation in exec after vfork */
-thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit);
+thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit);
 void vfork_exit(proc_t p, int rv);
-int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
 
 /*
@@ -228,9 +227,9 @@ __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL_
 extern vm_map_t bsd_pageable_map;
 extern const struct fileops vnops;
 
-#define        ROUND_PTR(type, addr)   \
-       (type *)( ( (uintptr_t)(addr) + 16 - 1) \
-                 & ~(16 - 1) )
+#define        USER_ADDR_ALIGN(addr, val) \
+       ( ( (user_addr_t)(addr) + (val) - 1) \
+               & ~((val) - 1) )
 
 struct image_params;   /* Forward */
 static int exec_activate_image(struct image_params *imgp);
@@ -250,7 +249,7 @@ static void exec_resettextvp(proc_t, struct image_params *);
 static int check_for_signature(proc_t, struct image_params *);
 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
 static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports);
-static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp,
+static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
                              ipc_port_t * portwatch_ports, int portwatch_count);
 
 /*
@@ -307,6 +306,12 @@ exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolea
        return error;
 }
 
+/*
+ * dyld is now passed the executable path as a getenv-like variable
+ * in the same fashion as the stack_guard and malloc_entropy keys.
+ */
+#define        EXECUTABLE_KEY "executable_path="
+
 /*
  * exec_save_path
  *
@@ -342,22 +347,26 @@ exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolea
  *             unacceptable for dyld.
  */
 static int
-exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
+exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
 {
        int error;
-       size_t  len;
+       size_t len;
        char *kpath;
 
+       // imgp->ip_strings can come out of a cache, so we need to obliterate the
+       // old path.
+       memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
+
        len = MIN(MAXPATHLEN, imgp->ip_strspace);
 
        switch(seg) {
        case UIO_USERSPACE32:
        case UIO_USERSPACE64:   /* Same for copyin()... */
-               error = copyinstr(path, imgp->ip_strings, len, &len);
+               error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
                break;
        case UIO_SYSSPACE:
                kpath = CAST_DOWN(char *,path); /* SAFE */
-               error = copystr(kpath, imgp->ip_strings, len, &len);
+               error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
                break;
        default:
                error = EFAULT;
@@ -365,8 +374,15 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
        }
 
        if (!error) {
+               bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
+               len += strlen(EXECUTABLE_KEY);
+
                imgp->ip_strendp += len;
                imgp->ip_strspace -= len;
+
+               if (excpath) {
+                       *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
+               }
        }
 
        return(error);
@@ -517,7 +533,7 @@ exec_shell_imgact(struct image_params *imgp)
 
        exec_reset_save_path(imgp);
        exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
-                                                       UIO_SYSSPACE);
+                                                       UIO_SYSSPACE, NULL);
 
        /* Copy the entire interpreter + args for later processing into argv[] */
        interp = imgp->ip_interp_buffer;
@@ -718,6 +734,7 @@ exec_mach_imgact(struct image_params *imgp)
        struct _posix_spawnattr *psa = NULL;
        int                     spawn = (imgp->ip_flags & IMGPF_SPAWN);
        int                     vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
+       int                     p_name_len;
 
        /*
         * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
@@ -815,7 +832,7 @@ grade:
         */
        if (vfexec || spawn) {
                if (vfexec) {
-                       imgp->ip_new_thread = fork_create_child(task, COALITION_NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
+                       imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
                        if (imgp->ip_new_thread == NULL) {
                                error = ENOMEM;
                                goto bad;
@@ -880,7 +897,7 @@ grade:
        if (load_result.csflags & CS_VALID) {
                imgp->ip_csflags |= load_result.csflags & 
                        (CS_VALID|
-                        CS_HARD|CS_KILL|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
+                        CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
                         CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
        } else {
                imgp->ip_csflags &= ~CS_VALID;
@@ -1011,20 +1028,22 @@ grade:
         * Remember file name for accounting.
         */
        p->p_acflag &= ~AFORK;
-       /* If the translated name isn't NULL, then we want to use
-        * that translated name as the name we show as the "real" name.
-        * Otherwise, use the name passed into exec.
+
+       /*
+        * Set p->p_comm and p->p_name to the name passed to exec
         */
-       if (0 != imgp->ip_p_comm[0]) {
-               bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
-                       sizeof(p->p_comm));
-       } else {
-               if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
-                       imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
-               bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
-                       (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
-               p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
-       }
+       p_name_len = sizeof(p->p_name) - 1;
+       if(imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len)
+               imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
+       bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
+               (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
+       p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
+
+       if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
+               imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
+       bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
+               (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
+       p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
 
        pal_dbg_set_task_name( p->task );
 
@@ -1095,12 +1114,6 @@ grade:
                }
        }
 
-       /*
-        * Ensure the 'translated' and 'affinity' flags are cleared, since we
-        * no longer run PowerPC binaries.
-        */
-       OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
-
        /*
         * If posix_spawned with the START_SUSPENDED flag, stop the
         * process before it runs.
@@ -1111,7 +1124,7 @@ grade:
                        proc_lock(p);
                        p->p_stat = SSTOP;
                        proc_unlock(p);
-                       (void) task_suspend(p->task);
+                       (void) task_suspend_internal(p->task);
                }
        }
 
@@ -1218,6 +1231,7 @@ static int
 exec_activate_image(struct image_params *imgp)
 {
        struct nameidata *ndp = NULL;
+       const char *excpath;
        int error;
        int resid;
        int once = 1;   /* save SGUID-ness for interpreted files */
@@ -1229,13 +1243,13 @@ exec_activate_image(struct image_params *imgp)
        if (error)
                goto bad_notrans;
        
-       error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
+       error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
        if (error) {
                goto bad_notrans;
        }
 
-       /* Use imgp->ip_strings, which contains the copyin-ed exec path */
-       DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
+       /* Use excpath, which contains the copyin-ed exec path */
+       DTRACE_PROC1(exec, uintptr_t, excpath);
 
        MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
        if (ndp == NULL) {
@@ -1244,7 +1258,7 @@ exec_activate_image(struct image_params *imgp)
        }
 
        NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
-                  UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
+                  UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
 
 again:
        error = namei(ndp);
@@ -1339,9 +1353,9 @@ encapsulated_binary:
                        imgp->ip_vp = NULL;     /* already put */
                        imgp->ip_ndp = NULL; /* already nameidone */
 
-                       /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
+                       /* Use excpath, which exec_shell_imgact reset to the interpreter */
                        NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
-                                  UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
+                                  UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
 
                        proc_transend(p, 0);
                        goto again;
@@ -1387,11 +1401,12 @@ bad_notrans:
  * Returns:     0                       Success
  */
 static errno_t
-exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp,
+exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
                              ipc_port_t * portwatch_ports, int portwatch_count)
 {
        int apptype     = TASK_APPTYPE_NONE;
        int qos_clamp   = THREAD_QOS_UNSPECIFIED;
+       int role        = TASK_UNSPECIFIED;
 
        if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
                int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
@@ -1440,8 +1455,14 @@ exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp,
                }
        }
 
-       if (psa_apptype != TASK_APPTYPE_NONE || qos_clamp != THREAD_QOS_UNSPECIFIED) {
-               proc_set_task_spawnpolicy(p->task, apptype, qos_clamp,
+       if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
+               proc_darwin_role_to_task_role(psa_darwin_role, &role);
+       }
+
+       if (apptype   != TASK_APPTYPE_NONE      ||
+           qos_clamp != THREAD_QOS_UNSPECIFIED ||
+           role      != TASK_UNSPECIFIED) {
+               proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role,
                                          portwatch_ports, portwatch_count);
        }
 
@@ -1850,6 +1871,62 @@ spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
 }
 #endif /* CONFIG_MACF */
 
+#if CONFIG_COALITIONS
+static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
+{
+       for (int c = 0; c < COALITION_NUM_TYPES; c++) {
+               if (coal[c]) {
+                       coalition_remove_active(coal[c]);
+                       coalition_release(coal[c]);
+               }
+       }
+}
+#endif
+
+void
+proc_set_return_wait(proc_t p)
+{
+       proc_lock(p);
+       p->p_lflag |= P_LRETURNWAIT;
+       proc_unlock(p);
+}
+
+void
+proc_clear_return_wait(proc_t p, thread_t child_thread)
+{
+       proc_lock(p);
+
+       p->p_lflag &= ~P_LRETURNWAIT;
+       if (p->p_lflag & P_LRETURNWAITER) {
+               wakeup(&p->p_lflag);
+       }
+
+       proc_unlock(p);
+
+       (void)thread_resume(child_thread);
+}
+
+void
+proc_wait_to_return()
+{
+       proc_t  p;
+
+       p = current_proc();
+       proc_lock(p);
+
+       if (p->p_lflag & P_LRETURNWAIT) {
+               p->p_lflag |= P_LRETURNWAITER;
+               do {
+                       msleep(&p->p_lflag, &p->p_mlock, 0,
+                               "thread_check_setup_complete", NULL);
+               } while (p->p_lflag & P_LRETURNWAIT);
+               p->p_lflag &= ~P_LRETURNWAITER;
+       }
+
+       proc_unlock(p);
+       thread_bootstrap_return();
+}
+
 /*
  * posix_spawn
  *
@@ -1890,7 +1967,6 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
        struct vnode_attr *origvap;
        struct uthread  *uthread = 0;   /* compiler complains if not set to 0*/
        int error, sig;
-       char alt_p_comm[sizeof(p->p_comm)] = {0};       /* for PowerPC */
        int is_64 = IS_64BIT_PROCESS(p);
        struct vfs_context context;
        struct user__posix_spawn_args_desc px_args;
@@ -1926,9 +2002,9 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
        imgp->ip_origvattr = origvap;
        imgp->ip_vfs_context = &context;
        imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
-       imgp->ip_p_comm = alt_p_comm;           /* for PowerPC */
        imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
        imgp->ip_mac_return = 0;
+       imgp->ip_reserved = NULL;
 
        if (uap->adesc != USER_ADDR_NULL) {
                if(is_64) {
@@ -1950,6 +2026,10 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
                        px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
                        px_args.mac_extensions_size = px_args32.mac_extensions_size;
                        px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
+                       px_args.coal_info_size = px_args32.coal_info_size;
+                       px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
+                       px_args.reserved = 0;
+                       px_args.reserved_size = 0;
                }
                if (error)
                        goto bad;
@@ -2019,6 +2099,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
                                goto bad;
                        }
                }
+
 #if CONFIG_MACF
                if (px_args.mac_extensions_size != 0) {
                        if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
@@ -2049,50 +2130,110 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
         */
        if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
 
-               /*
-                * Set the new task's coalition, if it is requested.
-                * TODO: privilege check - 15365900
-                */
-               coalition_t coal = COALITION_NULL;
+               /* Set the new task's coalition, if it is requested.  */
+               coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
 #if CONFIG_COALITIONS
-               if (imgp->ip_px_sa) {
-                       uint64_t cid = px_sa.psa_coalitionid;
+               int i, ncoals;
+               kern_return_t kr = KERN_SUCCESS;
+               struct _posix_spawn_coalition_info coal_info;
+               int coal_role[COALITION_NUM_TYPES];
+
+               if (imgp->ip_px_sa == NULL || !px_args.coal_info)
+                       goto do_fork1;
+
+               memset(&coal_info, 0, sizeof(coal_info));
+
+               if (px_args.coal_info_size > sizeof(coal_info))
+                       px_args.coal_info_size = sizeof(coal_info);
+               error = copyin(px_args.coal_info,
+                              &coal_info, px_args.coal_info_size);
+               if (error != 0)
+                       goto bad;
+
+               ncoals = 0;
+               for (i = 0; i < COALITION_NUM_TYPES; i++) {
+                       uint64_t cid = coal_info.psci_info[i].psci_id;
                        if (cid != 0) {
-#if COALITION_DEBUG
-                               printf("%s: searching for coalition ID %llu\n", __func__, cid);
-#endif
-                               coal = coalition_find_and_activate_by_id(cid);
-                               if (coal == COALITION_NULL) {
-#if COALITION_DEBUG
-                                       printf("%s: could not find coalition ID %llu (perhaps it has been terminated or reaped)\n", __func__, cid);
-#endif
+                               /*
+                                * don't allow tasks which are not in a
+                                * privileged coalition to spawn processes
+                                * into coalitions other than their own
+                                */
+                               if (!task_is_in_privileged_coalition(p->task, i)) {
+                                       coal_dbg("ERROR: %d not in privilegd "
+                                                "coalition of type %d",
+                                                p->p_pid, i);
+                                       spawn_coalitions_release_all(coal);
+                                       error = EPERM;
+                                       goto bad;
+                               }
+
+                               coal_dbg("searching for coalition id:%llu", cid);
+                               /*
+                                * take a reference and activation on the
+                                * coalition to guard against free-while-spawn
+                                * races
+                                */
+                               coal[i] = coalition_find_and_activate_by_id(cid);
+                               if (coal[i] == COALITION_NULL) {
+                                       coal_dbg("could not find coalition id:%llu "
+                                                "(perhaps it has been terminated or reaped)", cid);
+                                       /*
+                                        * release any other coalition's we
+                                        * may have a reference to
+                                        */
+                                       spawn_coalitions_release_all(coal);
                                        error = ESRCH;
                                        goto bad;
                                }
+                               if (coalition_type(coal[i]) != i) {
+                                       coal_dbg("coalition with id:%lld is not of type:%d"
+                                                " (it's type:%d)", cid, i, coalition_type(coal[i]));
+                                       error = ESRCH;
+                                       goto bad;
+                               }
+                               coal_role[i] = coal_info.psci_info[i].psci_role;
+                               ncoals++;
                        }
                }
+               if (ncoals < COALITION_NUM_TYPES) {
+                       /*
+                        * If the user is attempting to spawn into a subset of
+                        * the known coalition types, then make sure they have
+                        * _at_least_ specified a resource coalition. If not,
+                        * the following fork1() call will implicitly force an
+                        * inheritance from 'p' and won't actually spawn the
+                        * new task into the coalitions the user specified.
+                        * (also the call to coalitions_set_roles will panic)
+                        */
+                       if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
+                               spawn_coalitions_release_all(coal);
+                               error = EINVAL;
+                               goto bad;
+                       }
+               }
+do_fork1:
 #endif /* CONFIG_COALITIONS */
 
                error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
 
-               if (error != 0) {
-                       if (coal != COALITION_NULL) {
 #if CONFIG_COALITIONS
-                               coalition_remove_active(coal);
-                               coalition_release(coal);
+               /* set the roles of this task within each given coalition */
+               if (error == 0) {
+                       kr = coalitions_set_roles(coal, get_threadtask(imgp->ip_new_thread), coal_role);
+                       if (kr != KERN_SUCCESS)
+                               error = EINVAL;
+               }
+
+               /* drop our references and activations - fork1() now holds them */
+               spawn_coalitions_release_all(coal);
 #endif /* CONFIG_COALITIONS */
-                       }
+               if (error != 0) {
                        goto bad;
                }
                imgp->ip_flags |= IMGPF_SPAWN;  /* spawn w/o exec */
                spawn_no_exec = TRUE;           /* used in later tests */
 
-               if (coal != COALITION_NULL) {
-#if CONFIG_COALITIONS
-                       coalition_remove_active(coal);
-                       coalition_release(coal);
-#endif /* CONFIG_COALITIONS */
-               }
        }
 
        if (spawn_no_exec) {
@@ -2209,16 +2350,20 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
                        }
                }
 
+#if !SECURE_KERNEL
                /*
                 * Disable ASLR for the spawned process.
-                */
-               /*
-                * But only do so if we are not embedded; embedded allows for a
-                * boot-arg (-disable_aslr) to deal with this (which itself is
-                * only honored on DEVELOPMENT or DEBUG builds of xnu).
+                *
+                * But only do so if we are not embedded + RELEASE.
+                * While embedded allows for a boot-arg (-disable_aslr)
+                * to deal with this (which itself is only honored on
+                * DEVELOPMENT or DEBUG builds of xnu), it is often
+                * useful or necessary to disable ASLR on a per-process
+                * basis for unit testing and debugging.
                 */
                if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
                        OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
+#endif /* !SECURE_KERNEL */
 
                /*
                 * Forcibly disallow execution from data pages for the spawned process
@@ -2328,15 +2473,11 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
                         * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
                         * an entitlement to configure the monitor a certain way seems silly, since
                         * whomever is turning it on could just as easily choose not to do so.
-                        *
-                        * XXX - Ignore the parameters that we get from userland. The spawnattr method of
-                        * activating the monitor always gets the system default parameters. Once we have
-                        * an explicit spawn SPI for configuring the defaults, we can revert this to
-                        * respect the params passed in from userland.
                         */
                        error = proc_set_task_ruse_cpu(p->task,
                                        TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
-                                       PROC_POLICY_CPUMON_DEFAULTS, 0,
+                                       px_sa.psa_cpumonitor_percent,
+                                       px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
                                        0, TRUE);
                }
        }
@@ -2370,11 +2511,36 @@ bad:
 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
                /* Has jetsam attributes? */
                if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
-                       memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
-                           TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND), 
-                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL));
+                       /*
+                        * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
+                        * longer relevant, as background limits are described via the inactive limit slots.
+                        * At the kernel layer, the flag is ignored.
+                        *
+                        * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
+                        * we attempt to mimic previous behavior by forcing the BG limit data into the
+                        * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
+                        * The kernel layer will flag this mapping.
+                        */
+                       if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
+                               memorystatus_update(p, px_sa.psa_priority, 0,
+                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
+                                           TRUE,
+                                           -1, TRUE,
+                                           px_sa.psa_memlimit_inactive, FALSE,
+                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
+                       } else {
+                               memorystatus_update(p, px_sa.psa_priority, 0,
+                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
+                                           TRUE,
+                                           px_sa.psa_memlimit_active,
+                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
+                                           px_sa.psa_memlimit_inactive,
+                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL),
+                                           (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
+                       }
+
                }
-#endif
+#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM*/
        }
 
        /*
@@ -2422,7 +2588,7 @@ bad:
        if (error == 0 && imgp->ip_px_sa != NULL) {
                struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
 
-               exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp,
+               exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role,
                                              portwatch_ports, portwatch_count);
        }
 
@@ -2479,7 +2645,6 @@ bad:
                        FREE(imgp->ip_px_sfa, M_TEMP);
                if (imgp->ip_px_spa != NULL)
                        FREE(imgp->ip_px_spa, M_TEMP);
-               
 #if CONFIG_MACF
                if (imgp->ip_px_smpx != NULL)
                        spawn_free_macpolicyinfo(imgp->ip_px_smpx);
@@ -2555,6 +2720,7 @@ bad:
                                p->exit_thread = current_thread();
                                proc_unlock(p);
                                exit1(p, 1, (int *)NULL);
+                               proc_clear_return_wait(p, imgp->ip_new_thread);
                                if (exec_done == FALSE) {
                                        task_deallocate(get_threadtask(imgp->ip_new_thread));
                                        thread_deallocate(imgp->ip_new_thread);
@@ -2562,6 +2728,7 @@ bad:
                        } else {
                                /* someone is doing it for us; just skip it */
                                proc_unlock(p);
+                               proc_clear_return_wait(p, imgp->ip_new_thread);
                        }
                } else {
 
@@ -2574,7 +2741,7 @@ bad:
                         * queue references on them, so we should be fine
                         * with the delayed resume of the thread here.
                         */
-                       (void)thread_resume(imgp->ip_new_thread);
+                       proc_clear_return_wait(p, imgp->ip_new_thread);
                }
        }
        if (bufp != NULL) {
@@ -2658,7 +2825,6 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
        struct vnode_attr *vap;
        struct vnode_attr *origvap;
        int error;
-       char alt_p_comm[sizeof(p->p_comm)] = {0};       /* for PowerPC */
        int is_64 = IS_64BIT_PROCESS(p);
        struct vfs_context context;
        struct uthread  *uthread;
@@ -2686,7 +2852,6 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
        imgp->ip_origvattr = origvap;
        imgp->ip_vfs_context = &context;
        imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
-       imgp->ip_p_comm = alt_p_comm;           /* for PowerPC */
        imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
        imgp->ip_mac_return = 0;
 
@@ -2752,7 +2917,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
 
                if (imgp->ip_flags & IMGPF_VFORK_EXEC) {
                        vfork_return(p, retval, p->p_pid);
-                       (void)thread_resume(imgp->ip_new_thread);
+                       proc_clear_return_wait(p, imgp->ip_new_thread);
                }
        } else {
                DTRACE_PROC1(exec__failure, int, error);
@@ -3929,8 +4094,101 @@ create_unix_stack(vm_map_t map, load_result_t* load_result,
 
 #include <sys/reboot.h>
 
+/*
+ * load_init_program_at_path
+ *
+ * Description:        Load the "init" program; in most cases, this will be "launchd"
+ *
+ * Parameters: p                       Process to call execve() to create
+ *                                     the "init" program
+ *             scratch_addr            Page in p, scratch space
+ *             path                    NULL terminated path
+ *
+ * Returns:    KERN_SUCCESS            Success
+ *             !KERN_SUCCESS           See execve/mac_execve for error codes
+ *
+ * Notes:      The process that is passed in is the first manufactured
+ *             process on the system, and gets here via bsd_ast() firing
+ *             for the first time.  This is done to ensure that bsd_init()
+ *             has run to completion.
+ *
+ *             The address map of the first manufactured process is 32 bit.
+ *             WHEN this becomes 64b, this code will fail; it needs to be
+ *             made 64b capable.
+ */
+static int
+load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
+{
+       uint32_t argv[3];
+       uint32_t argc = 0;
+       int retval[2];
+       struct execve_args init_exec_args;
+
+       /*
+        * Validate inputs and pre-conditions
+        */
+       assert(p);
+       assert(scratch_addr);
+       assert(path);
+
+       if (IS_64BIT_PROCESS(p)) {
+               panic("Init against 64b primordial proc not implemented");
+       }
+
+       /*
+        * Copy out program name.
+        */
+       size_t path_length = strlen(path) + 1;
+       (void) copyout(path, scratch_addr, path_length);
+
+       argv[argc++] = (uint32_t)scratch_addr;
+       scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, 16);
+
+       /*
+        * Put out first (and only) argument, similarly.
+        * Assumes everything fits in a page as allocated above.
+        */
+       if (boothowto & RB_SINGLE) {
+               const char *init_args = "-s";
+               size_t init_args_length = strlen(init_args)+1;
+
+               copyout(init_args, scratch_addr, init_args_length);
+
+               argv[argc++] = (uint32_t)scratch_addr;
+               scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, 16);
+       }
+
+       /*
+        * Null-end the argument list
+        */
+       argv[argc] = 0;
+       
+       /*
+        * Copy out the argument list.
+        */
+       (void) copyout(argv, scratch_addr, sizeof(argv));
+
+       /*
+        * Set up argument block for fake call to execve.
+        */
+       init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
+       init_exec_args.argp = scratch_addr;
+       init_exec_args.envp = USER_ADDR_NULL;
+
+       /*
+        * So that init task is set with uid,gid 0 token
+        */
+       set_security_token(p);
+
+       return execve(p, &init_exec_args, retval);
+}
+
 static const char * init_programs[] = {
+#if DEBUG
+       "/usr/local/sbin/launchd.debug",
+#endif
 #if DEVELOPMENT || DEBUG
+       /* Remove DEBUG conditional when <rdar://problem/17931977> is fixed */
        "/usr/local/sbin/launchd.development",
 #endif
        "/sbin/launchd",
@@ -3950,81 +4208,71 @@ static const char * init_programs[] = {
  *             process on the system, and gets here via bsd_ast() firing
  *             for the first time.  This is done to ensure that bsd_init()
  *             has run to completion.
+ *
+ *             In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
+ *             may be used to select a specific launchd executable. As with
+ *             the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
+ *             will force /sbin/launchd to be selected.
+ *
+ *             The DEBUG kernel will continue to check for a .development
+ *             version until <rdar://problem/17931977> is fixed.
+ *
+ *              Search order by build:
+ *
+ * DEBUG       DEVELOPMENT     RELEASE         PATH
+ * ----------------------------------------------------------------------------------
+ * 1           1               NA              /usr/local/sbin/launchd.$LAUNCHDSUFFIX
+ * 2           NA              NA              /usr/local/sbin/launchd.debug
+ * 3           2               NA              /usr/local/sbin/launchd.development
+ * 4           3               1               /sbin/launchd
  */
 void
 load_init_program(proc_t p)
 {
-       vm_offset_t     init_addr, addr;
-       int             argc;
-       uint32_t argv[3];
-       unsigned int i;
-       int                     error;
-       int             retval[2];
-       const char *init_program_name;
-       struct execve_args init_exec_args;
-
-       init_addr = VM_MIN_ADDRESS;
-       (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE);
-       if (init_addr == 0)
-               init_addr++;
-                       
-       for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
-       
-               init_program_name = init_programs[i];
-               addr = init_addr;
-               argc = 0;
-
-               /*
-                * Copy out program name.
-                */
-               (void) copyout(init_program_name, CAST_USER_ADDR_T(addr), strlen(init_program_name)+1);
-
-               argv[argc++] = (uint32_t)addr;
-               addr += strlen(init_program_name)+1;
-               addr = (vm_offset_t)ROUND_PTR(char, addr);
-
-               /*
-                * Put out first (and only) argument, similarly.
-                * Assumes everything fits in a page as allocated above.
-                */
-               if (boothowto & RB_SINGLE) {
-                       const char *init_args = "-s";
+       uint32_t i;
+       int error;
+       vm_offset_t scratch_addr = VM_MIN_ADDRESS;
 
-                       copyout(init_args, CAST_USER_ADDR_T(addr), strlen(init_args)+1);
+       (void) vm_allocate(current_map(), &scratch_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE);
+#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
+       (void) memorystatus_init_at_boot_snapshot();
+#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
+
+#if DEBUG || DEVELOPMENT
+       /* Check for boot-arg suffix first */
+       char launchd_suffix[64];
+       if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
+               char launchd_path[128];
+               boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
+                                              (strcmp(launchd_suffix, "release") == 0));
+
+               if (is_release_suffix) {
+                       error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), "/sbin/launchd");
+                       if (!error)
+                               return;
+
+                       panic("Process 1 exec of launchd.release failed, errno %d", error);
+               } else {
+                       strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
+                       strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
 
-                       argv[argc++] = (uint32_t)addr;
-                       addr += strlen(init_args)+1;
-                       addr = (vm_offset_t)ROUND_PTR(char, addr);
+                       /* All the error data is lost in the loop below, don't
+                        * attempt to save it. */
+                       if (!load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), launchd_path)) {
+                               return;
+                       }
                }
+       }
+#endif
 
-               /*
-                * Null-end the argument list
-                */
-               argv[argc] = 0;
-       
-               /*
-                * Copy out the argument list.
-                */
-               (void) copyout(argv, CAST_USER_ADDR_T(addr), sizeof(argv));
-
-               /*
-                * Set up argument block for fake call to execve.
-                */
-               init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
-               init_exec_args.argp = CAST_USER_ADDR_T((char **)addr);
-               init_exec_args.envp = CAST_USER_ADDR_T(0);
-       
-               /*
-                * So that init task is set with uid,gid 0 token 
-                */
-               set_security_token(p);
-
-               error = execve(p, &init_exec_args, retval);
+       error = ENOENT;
+       for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
+               error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), init_programs[i]);
                if (!error)
                        return;
        }
 
-       panic("Process 1 exec of %s failed, errno %d", init_program_name, error);
+       panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i-1]), error);
 }
 
 /*
@@ -4316,6 +4564,16 @@ taskgated_required(proc_t p, boolean_t *require_success)
        void *blob;
        int error;
 
+       if (cs_debug > 2)
+               csvnode_print_debug(p->p_textvp);
+
+       const int can_skip_taskgated = csproc_get_platform_binary(p) && !csproc_get_platform_path(p);
+       if (can_skip_taskgated) {
+               if (cs_debug) printf("taskgated not required for: %s\n", p->p_name);
+               *require_success = FALSE;
+               return FALSE;
+       }
+
        if ((p->p_csflags & CS_VALID) == 0) {
                *require_success = FALSE;
                return TRUE;
@@ -4337,11 +4595,13 @@ taskgated_required(proc_t p, boolean_t *require_success)
                        return FALSE;
                }
 
+               if (cs_debug) printf("taskgated required for: %s\n", p->p_name);
+
                return TRUE;
        }
 
        *require_success = FALSE;
-       return 0;
+       return FALSE;
 }
 
 /*
index 3d17f687c108126913aff2c2d25c116e0d81a69e..fca7ab3293c0d0ec34269589e654d2b0ef8d2827 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -94,6 +94,8 @@
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
 #include <sys/ptrace.h>
+#include <sys/proc_info.h>
+#include <sys/_types/_timeval64.h>
 #include <sys/user.h>
 #include <sys/aio_kern.h>
 #include <sys/sysproto.h>
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
 #include <kern/task.h>
+#include <corpses/task_corpse.h>
 #include <kern/thread.h>
 #include <kern/thread_call.h>
 #include <kern/sched_prim.h>
@@ -150,13 +153,18 @@ extern void dtrace_lazy_dofs_destroy(proc_t);
 #include <sys/sdt.h>
 
 extern boolean_t init_task_died;
-extern char init_task_failure_data[];
 void proc_prepareexit(proc_t p, int rv, boolean_t perf_notify);
 void vfork_exit(proc_t p, int rv);
 void vproc_exit(proc_t p);
 __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p);
 __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p);
 static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoinit, int locked, int droplock);
+static void populate_corpse_crashinfo(proc_t p, void *crash_info_ptr, struct rusage_superset *rup, mach_exception_data_type_t code, mach_exception_data_type_t subcode);
+extern int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
+
+static __attribute__((noinline)) void launchd_crashed_panic(proc_t p, int rv);
+extern void proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinfo);
+
 
 /*
  * Things which should have prototypes in headers, but don't
@@ -222,6 +230,170 @@ copyoutsiginfo(user_siginfo_t *native, boolean_t is64, user_addr_t uaddr)
        }
 }
 
+static void populate_corpse_crashinfo(proc_t p, void *crash_info_ptr, struct rusage_superset *rup, mach_exception_data_type_t code, mach_exception_data_type_t subcode)
+{
+       mach_vm_address_t uaddr = 0;
+       mach_exception_data_type_t exc_codes[EXCEPTION_CODE_MAX];
+       exc_codes[0] = code;
+       exc_codes[1] = subcode;
+       cpu_type_t cputype;
+       struct proc_uniqidentifierinfo p_uniqidinfo;
+       struct proc_workqueueinfo pwqinfo;
+       int retval = 0;
+       uint64_t crashed_threadid = thread_tid(current_thread());
+       unsigned int pflags = 0;
+
+#if CONFIG_MEMORYSTATUS
+       int memstat_dirty_flags = 0;
+#endif
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_EXCEPTION_CODES, sizeof(exc_codes), &uaddr)) {
+               copyout(exc_codes, uaddr, sizeof(exc_codes));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PID, sizeof(p->p_pid), &uaddr)) {
+               copyout(&p->p_pid, uaddr, sizeof(p->p_pid));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PPID, sizeof(p->p_ppid), &uaddr)) {
+               copyout(&p->p_ppid, uaddr, sizeof(p->p_ppid));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_CRASHED_THREADID, sizeof(uint64_t), &uaddr)) {
+               copyout(&crashed_threadid, uaddr, sizeof(uint64_t));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_RUSAGE, sizeof(struct rusage), &uaddr)) {
+               copyout(&rup->ru, uaddr, sizeof(struct rusage));
+       }
+
+       if (KERN_SUCCESS ==
+           kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_BSDINFOWITHUNIQID, sizeof(struct proc_uniqidentifierinfo), &uaddr)) {
+               proc_piduniqidentifierinfo(p, &p_uniqidinfo);
+               copyout(&p_uniqidinfo, uaddr, sizeof(struct proc_uniqidentifierinfo));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_RUSAGE_INFO, sizeof(rusage_info_current), &uaddr)) {
+               copyout(&rup->ri, uaddr, sizeof(rusage_info_current));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_CSFLAGS, sizeof(p->p_csflags), &uaddr)) {
+               copyout(&p->p_csflags, uaddr, sizeof(p->p_csflags));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_NAME, sizeof(p->p_comm), &uaddr)) {
+               copyout(&p->p_comm, uaddr, sizeof(p->p_comm));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_STARTTIME, sizeof(p->p_start), &uaddr)) {
+               struct timeval64 t64;
+               t64.tv_sec = (int64_t)p->p_start.tv_sec;
+               t64.tv_usec = (int64_t)p->p_start.tv_usec;
+               copyout(&t64, uaddr, sizeof(t64));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_USERSTACK, sizeof(p->user_stack), &uaddr)) {
+               copyout(&p->user_stack, uaddr, sizeof(p->user_stack));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_ARGSLEN, sizeof(p->p_argslen), &uaddr)) {
+               copyout(&p->p_argslen, uaddr, sizeof(p->p_argslen));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_ARGC, sizeof(p->p_argc), &uaddr)) {
+               copyout(&p->p_argc, uaddr, sizeof(p->p_argc));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_PATH, MAXPATHLEN, &uaddr)) {
+               proc_pidpathinfo(p, 0, uaddr, MAXPATHLEN, &retval);
+       }
+
+       pflags = p->p_flag & (P_LP64 | P_SUGID);
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_PROC_FLAGS, sizeof(pflags), &uaddr)) {
+               copyout(&pflags, uaddr, sizeof(pflags));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_UID, sizeof(p->p_uid), &uaddr)) {
+               copyout(&p->p_uid, uaddr, sizeof(p->p_uid));
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_GID, sizeof(p->p_gid), &uaddr)) {
+               copyout(&p->p_gid, uaddr, sizeof(p->p_gid));
+       }
+
+       cputype = cpu_type() & ~CPU_ARCH_MASK;
+       if (IS_64BIT_PROCESS(p))
+               cputype |= CPU_ARCH_ABI64;
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_CPUTYPE, sizeof(cpu_type_t), &uaddr)) {
+               copyout(&cputype, uaddr, sizeof(cpu_type_t));
+       }
+
+       bzero(&pwqinfo, sizeof(struct proc_workqueueinfo));
+       retval = fill_procworkqueue(p, &pwqinfo);
+       if (retval == 0) {
+               if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_WORKQUEUEINFO, sizeof(struct proc_workqueueinfo), &uaddr)) {
+                       copyout(&pwqinfo, uaddr, sizeof(struct proc_workqueueinfo));
+               }
+       }
+
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_RESPONSIBLE_PID, sizeof(p->p_responsible_pid), &uaddr)) {
+               copyout(&p->p_responsible_pid, uaddr, sizeof(p->p_responsible_pid));
+       }
+
+#if CONFIG_MEMORYSTATUS
+       memstat_dirty_flags = memorystatus_dirty_get(p);
+       if (KERN_SUCCESS == kcdata_get_memory_addr(crash_info_ptr, TASK_CRASHINFO_DIRTY_FLAGS, sizeof(memstat_dirty_flags), &uaddr)) {
+               copyout(&memstat_dirty_flags, uaddr, sizeof(memstat_dirty_flags));
+       }
+#endif
+
+}
+
+static __attribute__((noinline)) void
+launchd_crashed_panic(proc_t p, int rv)
+{
+       printf("pid 1 exited (signal %d, exit %d)\n",
+           WTERMSIG(rv), WEXITSTATUS(rv));
+
+#if (DEVELOPMENT || DEBUG)
+       /*
+        * For debugging purposes, generate a core file of initproc before
+        * panicking. Leave at least 300 MB free on the root volume, and ignore
+        * the process's corefile ulimit. fsync() the file to ensure it lands on disk
+        * before the panic hits.
+        */
+
+       int             err;
+       uint64_t        coredump_start = mach_absolute_time();
+       uint64_t        coredump_end;
+       clock_sec_t     tv_sec;
+       clock_usec_t    tv_usec;
+       uint32_t        tv_msec;
+
+       err = coredump(p, 300, COREDUMP_IGNORE_ULIMIT | COREDUMP_FULLFSYNC);
+
+       coredump_end = mach_absolute_time();
+
+       absolutetime_to_microtime(coredump_end - coredump_start, &tv_sec, &tv_usec);
+
+       tv_msec = tv_usec / 1000;
+
+       if (err != 0) {
+               printf("Failed to generate initproc core file: error %d, took %d.%03d seconds\n",
+                      err, (uint32_t)tv_sec, tv_msec);
+       } else {
+               printf("Generated initproc core file in %d.%03d seconds\n",
+                      (uint32_t)tv_sec, tv_msec);
+       }
+#endif
+
+       sync(p, (void *)NULL, (int *)NULL);
+
+       panic_plain("%s exited (signal %d, exit status %d %s)", (p->p_name[0] != '\0' ? p->p_name : "initproc"), WTERMSIG(rv),
+                   WEXITSTATUS(rv), ((p->p_csflags & CS_KILLED) ? "CS_KILLED" : ""));
+}
+
 /*
  * exit --
  *     Death of process.
@@ -337,32 +509,11 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo
                }
                sig_lock_to_exit(p);
        }
+
        if (p == initproc && current_proc() == p) {
-               proc_unlock(p);
-               printf("pid 1 exited (signal %d, exit %d)",
-                   WTERMSIG(rv), WEXITSTATUS(rv));
-#if (DEVELOPMENT || DEBUG)
-               int err;
-               /*
-                * For debugging purposes, generate a core file of initproc before
-                * panicking. Leave at least 300 MB free on the root volume, and ignore
-                * the process's corefile ulimit.
-                */
-               if ((err = coredump(p, 300, 1)) != 0) {
-                       printf("Failed to generate initproc core file: error %d", err);
-               } else {
-                       printf("Generated initproc core file");
-                       sync(p, (void *)NULL, (int *)NULL);
-               }
-#endif
                init_task_died = TRUE;
-               panic("%s died\nState at Last Exception:\n\n%s", 
-                                                       (p->p_comm[0] != '\0' ?
-                                                               p->p_comm :
-                                                               "launchd"),
-                                                       init_task_failure_data);
        }
-
+       
        p->p_lflag |= P_LEXIT;
        p->p_xstat = rv;
        p->p_lflag |= jetsam_flags;
@@ -381,11 +532,19 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo
 void
 proc_prepareexit(proc_t p, int rv, boolean_t perf_notify) 
 {
-       mach_exception_data_type_t code, subcode;
+       mach_exception_data_type_t code = 0, subcode = 0;
+
        struct uthread *ut;
        thread_t self = current_thread();
        ut = get_bsdthread_info(self);
        struct rusage_superset *rup;
+       int kr = 0;
+       int create_corpse = FALSE;
+
+       if (p == initproc) {
+               launchd_crashed_panic(p, rv);
+               /* NOTREACHED */
+       }
 
        /* If a core should be generated, notify crash reporter */
        if (hassigprop(WTERMSIG(rv), SA_CORE) || ((p->p_csflags & CS_KILLED) != 0)) {
@@ -407,7 +566,13 @@ proc_prepareexit(proc_t p, int rv, boolean_t perf_notify)
                        ((ut->uu_exception & 0x0f) << 20) | 
                        ((int)ut->uu_code & 0xfffff);
                subcode = ut->uu_subcode;
-               (void) task_exception_notify(EXC_CRASH, code, subcode);
+
+               kr = task_exception_notify(EXC_CRASH, code, subcode);
+
+               /* Nobody handled EXC_CRASH?? remember to make corpse */
+               if (kr != 0) {
+                       create_corpse = TRUE;
+               }
        }
 
 skipcheck:
@@ -416,6 +581,25 @@ skipcheck:
                (void)sys_perf_notify(self, p->p_pid);
        }
 
+
+       /* stash the usage into corpse data if making_corpse == true */
+       if (create_corpse == TRUE) {
+               kr = task_mark_corpse(current_task());
+               if (kr != KERN_SUCCESS) {
+                       if (kr == KERN_NO_SPACE) {
+                               printf("Process[%d] has no vm space for corpse info.\n", p->p_pid);
+                       } else if (kr == KERN_NOT_SUPPORTED) {
+                               printf("Process[%d] was destined to be corpse. But corpse is disabled by config.\n", p->p_pid);
+                       } else {
+                               printf("Process[%d] crashed: %s. Too many corpses being created.\n", p->p_pid, p->p_comm);
+                       }
+                       create_corpse = FALSE;
+               } else {
+                       /* XXX: <rdar://problem/20491659> Need to sync ATM buffer before crash */
+                       kr = task_send_trace_memory(current_task(), p->p_pid, p->p_uniqueid);
+               }
+       }
+
        /*
         * Before this process becomes a zombie, stash resource usage
         * stats in the proc for external observers to query
@@ -436,7 +620,9 @@ skipcheck:
                 */
                p->p_ru = rup;
        }
-
+       if (create_corpse) {
+               populate_corpse_crashinfo(p, task_get_corpseinfo(current_task()), rup, code, subcode);
+       }
        /*
         * Remove proc from allproc queue and from pidhash chain.
         * Need to do this before we do anything that can block.
@@ -576,6 +762,8 @@ proc_exit(proc_t p)
 
        workqueue_mark_exiting(p);
        workqueue_exit(p);
+       kqueue_dealloc(p->p_wqkqueue);
+       p->p_wqkqueue = NULL;
 
        _aio_exit( p );
 
@@ -1128,13 +1316,8 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi
         * and refernce is dropped after these calls down below
         * (locking protection is provided by list lock held in chgproccnt)
         */
-       (void)chgproccnt(kauth_cred_getruid(child->p_ucred), -1);
 
-#if CONFIG_LCTX
-       ALLLCTX_LOCK;
-       leavelctx(child);
-       ALLLCTX_UNLOCK;
-#endif
+       (void)chgproccnt(kauth_cred_getruid(child->p_ucred), -1);
 
        /*
         * Free up credentials.
index 9417b09117d740b8be1b3a6e9420770d0f5a7f31..ff5d6dda6d028c0a1acb6ded6cef2c03d34933ba 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2007, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,6 +93,7 @@
 #include <sys/acct.h>
 #include <sys/codesign.h>
 #include <sys/sysproto.h>
+
 #if CONFIG_DTRACE
 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
 extern void dtrace_fasttrap_fork(proc_t, proc_t);
@@ -149,10 +150,10 @@ void thread_set_child(thread_t child, int pid);
 void *act_thread_csave(void);
 
 
-thread_t cloneproc(task_t, coalition_t, proc_t, int, int);
+thread_t cloneproc(task_t, coalition_t *, proc_t, int, int);
 proc_t forkproc(proc_t);
 void forkproc_free(proc_t);
-thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child, int inherit_memory, int is64bit);
+thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child, int inherit_memory, int is64bit);
 void proc_vfork_begin(proc_t parent_proc);
 void proc_vfork_end(proc_t parent_proc);
 
@@ -286,7 +287,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
        thread_t child_thread;
        int err;
 
-       if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK, COALITION_NULL)) != 0) {
+       if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK, NULL)) != 0) {
                retval[1] = 0;
        } else {
                uthread_t ut = get_bsdthread_info(current_thread());
@@ -323,11 +324,12 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
  *                                     Mach thread_t of the child process
  *                                     breated
  *             kind                    kind of creation being requested
- *             coalition               if spawn, coalition the child process
- *                                     should join, or COALITION_NULL to
+ *             coalitions              if spawn, the set of coalitions the
+ *                                     child process should join, or NULL to
  *                                     inherit the parent's. On non-spawns,
  *                                     this param is ignored and the child
- *                                     always inherits the parent's coalition.
+ *                                     always inherits the parent's
+ *                                     coalitions.
  *
  * Notes:      Permissable values for 'kind':
  *
@@ -359,7 +361,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
  *             back to the other information.
  */
 int
-fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coalition)
+fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t *coalitions)
 {
        thread_t parent_thread = (thread_t)current_thread();
        uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
@@ -392,6 +394,7 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coaliti
         * always less than what an rlim_t can hold.
         * (locking protection is provided by list lock held in chgproccnt)
         */
+
        count = chgproccnt(uid, 1);
        if (uid != 0 &&
            (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
@@ -552,7 +555,7 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind, coalition_t coaliti
                 * differences.  Contrarily, spawned processes do not inherit.
                 */
                if ((child_thread = cloneproc(parent_proc->task,
-                                               spawn ? coalition : COALITION_NULL,
+                                               spawn ? coalitions : NULL,
                                                parent_proc,
                                                spawn ? FALSE : TRUE,
                                                FALSE)) == NULL) {
@@ -758,7 +761,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval)
  *             process
  *
  * Parameters: parent_task             parent task
- *             parent_coalition        parent_coalition
+ *             parent_coalitions       parent's set of coalitions
  *             child_proc              child process
  *             inherit_memory          TRUE, if the parents address space is
  *                                     to be inherited by the child
@@ -772,7 +775,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval)
  *             vfork() equivalent call, and in the system bootstrap case.
  *
  *             It creates a new task and thread (and as a side effect of the
- *             thread creation, a uthread) in the parent coalition, which is
+ *             thread creation, a uthread) in the parent coalition set, which is
  *             then associated with the process 'child'.  If the parent
  *             process address space is to be inherited, then a flag
  *             indicates that the newly created task should inherit this from
@@ -783,7 +786,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval)
  *             in this case, 'inherit_memory' MUST be FALSE.
  */
 thread_t
-fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit)
+fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child_proc, int inherit_memory, int is64bit)
 {
        thread_t        child_thread = NULL;
        task_t          child_task;
@@ -791,7 +794,7 @@ fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child
 
        /* Create a new task for the child process */
        result = task_create_internal(parent_task,
-                                       parent_coalition,
+                                       parent_coalitions,
                                        inherit_memory,
                                        is64bit,
                                        &child_task);
@@ -825,7 +828,7 @@ fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child
                resetpriority(child_proc);
 
        /* Create a new thread for the child process */
-       result = thread_create(child_task, &child_thread);
+       result = thread_create_with_continuation(child_task, &child_thread, (thread_continue_t)proc_wait_to_return);
        if (result != KERN_SUCCESS) {
                printf("%s: thread_create failed. Code: %d\n",
                    __func__, result);
@@ -887,7 +890,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval)
 
        retval[1] = 0;          /* flag parent return for user space */
 
-       if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK, COALITION_NULL)) == 0) {
+       if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK, NULL)) == 0) {
                task_t child_task;
                proc_t child_proc;
 
@@ -913,7 +916,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval)
 #endif
 
                /* "Return" to the child */
-               (void)thread_resume(child_thread);
+               proc_clear_return_wait(child_proc, child_thread);
 
                /* drop the extra references we got during the creation */
                if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) {
@@ -965,7 +968,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval)
  *             live with this being somewhat awkward.
  */
 thread_t
-cloneproc(task_t parent_task, coalition_t parent_coalition, proc_t parent_proc, int inherit_memory, int memstat_internal)
+cloneproc(task_t parent_task, coalition_t *parent_coalitions, proc_t parent_proc, int inherit_memory, int memstat_internal)
 {
 #if !CONFIG_MEMORYSTATUS
 #pragma unused(memstat_internal)
@@ -979,7 +982,7 @@ cloneproc(task_t parent_task, coalition_t parent_coalition, proc_t parent_proc,
                goto bad;
        }
 
-       child_thread = fork_create_child(parent_task, parent_coalition, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64));
+       child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64));
 
        if (child_thread == NULL) {
                /*
@@ -1196,6 +1199,7 @@ retry:
        }
        nprocs++;
        child_proc->p_pid = nextpid;
+       child_proc->p_responsible_pid = nextpid;        /* initially responsible for self */
        child_proc->p_idversion = nextpidversion++;
        /* kernel process is handcrafted and not from fork, so start from 1 */
        child_proc->p_uniqueid = ++nextuniqueid;
@@ -1232,7 +1236,7 @@ retry:
         * Increase reference counts on shared objects.
         * The p_stats and p_sigacts substructs are set in vm_fork.
         */
-       child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR | P_DELAYIDLESLEEP));
+       child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_DISABLE_ASLR | P_DELAYIDLESLEEP | P_SUGID));
        if (parent_proc->p_flag & P_PROFIL)
                startprofclock(child_proc);
 
@@ -1325,6 +1329,7 @@ retry:
         */
        proc_signalstart(child_proc, 0);
        proc_transstart(child_proc, 0, 0);
+       proc_set_return_wait(child_proc);
 
        child_proc->p_pcaction = 0;
 
@@ -1359,25 +1364,13 @@ retry:
        if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
                child_proc->p_lflag |= P_LREGISTER;
        }
+       child_proc->p_wqkqueue = NULL;
        child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
        child_proc->p_dispatchqueue_serialno_offset = parent_proc->p_dispatchqueue_serialno_offset;
 #if PSYNCH
        pth_proc_hashinit(child_proc);
 #endif /* PSYNCH */
 
-#if CONFIG_LCTX
-       child_proc->p_lctx = NULL;
-       /* Add new process to login context (if any). */
-       if (parent_proc->p_lctx != NULL) {
-               /*
-                * <rdar://6640564> This should probably be delayed in the
-                * vfork() or posix_spawn() cases.
-                */
-               LCTX_LOCK(parent_proc->p_lctx);
-               enterlctx(child_proc, parent_proc->p_lctx, 0);
-       }
-#endif
-
 #if CONFIG_MEMORYSTATUS
        /* Memorystatus + jetsam init */
        child_proc->p_memstat_state = 0;
@@ -1517,30 +1510,53 @@ uthread_alloc(task_t task, thread_t thread, int noinherit)
                if (p->p_dtrace_ptss_pages != NULL) {
                        uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p);
                }
-#endif
-#if CONFIG_MACF
-               mac_thread_label_init(uth);
 #endif
        }
 
        return (ut);
 }
 
+/*
+ * This routine frees the thread name field of the uthread_t structure. Split out of
+ * uthread_cleanup() so it can be called separately on the threads of a corpse after
+ * the corpse notification has been sent, and the handler has had a chance to extract
+ * the thread names.
+ */
+void
+uthread_cleanup_name(void *uthread)
+{
+       uthread_t uth = (uthread_t)uthread;
+
+       /*
+        * <rdar://17834538>
+        * Set pth_name to NULL before calling free().
+        * Previously there was a race condition in the
+        * case this code was executing during a stackshot
+        * where the stackshot could try and copy pth_name
+        * after it had been freed and before if was marked
+        * as null.
+        */
+       if (uth->pth_name != NULL) {
+               void *pth_name = uth->pth_name;
+               uth->pth_name = NULL;
+               kfree(pth_name, MAXTHREADNAMESIZE);
+       }
+       return;
+}
 
 /* 
  * This routine frees all the BSD context in uthread except the credential.
  * It does not free the uthread structure as well
  */
 void
-uthread_cleanup(task_t task, void *uthread, void * bsd_info)
+uthread_cleanup(task_t task, void *uthread, void * bsd_info, boolean_t is_corpse)
 {
        struct _select *sel;
        uthread_t uth = (uthread_t)uthread;
        proc_t p = (proc_t)bsd_info;
-       void *pth_name;
 
        if (uth->uu_lowpri_window || uth->uu_throttle_info) {
-               /*
+               /*
                 * task is marked as a low priority I/O type
                 * and we've somehow managed to not dismiss the throttle
                 * through the normal exit paths back to user space...
@@ -1573,25 +1589,20 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info)
                uth->uu_cdir = NULLVP;
        }
 
-       if (uth->uu_allocsize && uth->uu_wqset){
-               kfree(uth->uu_wqset, uth->uu_allocsize);
-               uth->uu_allocsize = 0;
-               uth->uu_wqset = 0;
+       if (uth->uu_wqset) {
+               if (waitq_set_is_valid(uth->uu_wqset))
+                       waitq_set_deinit(uth->uu_wqset);
+               FREE(uth->uu_wqset, M_SELECT);
+               uth->uu_wqset = NULL;
+               uth->uu_wqstate_sz = 0;
        }
-       
-       /* 
-        * <rdar://17834538>
-        * Set pth_name to NULL before calling free().
-        * Previously there was a race condition in the 
-        * case this code was executing during a stackshot
-        * where the stackshot could try and copy pth_name
-        * after it had been freed and before if was marked
-        * as null.
+
+       /*
+        * defer the removal of the thread name on process corpses until the corpse has
+        * been autopsied.
         */
-       if (uth->pth_name != NULL) {
-               pth_name = uth->pth_name;
-               uth->pth_name = NULL;
-               kfree(pth_name, MAXTHREADNAMESIZE);
+       if (!is_corpse) {
+               uthread_cleanup_name(uth);
        }
 
        if ((task != kernel_task) && p) {
@@ -1615,9 +1626,6 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info)
                if (tmpptr != NULL) {
                        dtrace_ptss_release_entry(p, tmpptr);
                }
-#endif
-#if CONFIG_MACF
-               mac_thread_label_destroy(uth);
 #endif
        }
 }
index c8223153ac4dfca53b4055777bc53a9894634295..dad131eb4783f3c7a340ce1b9e15c67569ab3222 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -131,15 +131,15 @@ guarded_fileproc_free(struct fileproc *fp)
 
 static int
 fp_lookup_guarded(proc_t p, int fd, guardid_t guard,
-    struct guarded_fileproc **gfpp)
+    struct guarded_fileproc **gfpp, int locked)
 {
        struct fileproc *fp;
        int error;
 
-       if ((error = fp_lookup(p, fd, &fp, 1)) != 0)
+       if ((error = fp_lookup(p, fd, &fp, locked)) != 0)
                return (error);
        if (FILEPROC_TYPE(fp) != FTYPE_GUARDED) {
-               (void) fp_drop(p, fd, fp, 1);
+               (void) fp_drop(p, fd, fp, locked);
                return (EINVAL);
        }
        struct guarded_fileproc *gfp = FP_TO_GFP(fp);
@@ -148,7 +148,7 @@ fp_lookup_guarded(proc_t p, int fd, guardid_t guard,
                panic("%s: corrupt fp %p", __func__, fp);
 
        if (guard != gfp->gf_guard) {
-               (void) fp_drop(p, fd, fp, 1);
+               (void) fp_drop(p, fd, fp, locked);
                return (EPERM); /* *not* a mismatch exception */
        }
        if (gfpp)
@@ -175,7 +175,7 @@ fp_isguarded(struct fileproc *fp, u_int attrs)
                if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic)
                        panic("%s: corrupt gfp %p flags %x",
                            __func__, gfp, fp->f_flags);
-               return ((attrs & gfp->gf_attrs) ? 1 : 0);
+               return ((attrs & gfp->gf_attrs) == attrs);
        }
        return (0);
 }
@@ -322,6 +322,10 @@ fd_guard_ast(thread_t t)
  * requires close-on-fork; O_CLOEXEC must be set in flags.
  * This setting is immutable; attempts to clear the flag will
  * cause a guard exception.
+ *
+ * XXX It's somewhat broken that change_fdguard_np() can completely
+ *     remove the guard and thus revoke down the immutability
+ *     promises above.  Ick.
  */
 int
 guarded_open_np(proc_t p, struct guarded_open_np_args *uap, int32_t *retval)
@@ -383,10 +387,6 @@ guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap
        if ((uap->flags & O_CLOEXEC) == 0)
                return (EINVAL);
 
-#define GUARD_REQUIRED (GUARD_DUP)
-#define GUARD_ALL      (GUARD_REQUIRED |       \
-                       (GUARD_CLOSE | GUARD_SOCKET_IPC | GUARD_FILEPORT | GUARD_WRITE))
-
        if (((uap->guardflags & GUARD_REQUIRED) != GUARD_REQUIRED) ||
            ((uap->guardflags & ~GUARD_ALL) != 0))
                return (EINVAL);
@@ -429,12 +429,17 @@ guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap
                VATTR_SET(&va, va_dataprotect_class, uap->dpclass);
        }
        
-       if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
+       if (uap->dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
                if ( uap->flags & (O_RDWR | O_WRONLY)) {
                        /* Not allowed to write raw encrypted bytes */
                        return EINVAL;          
                }                       
-               VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+               if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
+                   VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+               }
+               if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
+                   VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
+               }
        }
 
        return (open1(ctx, &nd, uap->flags | O_CLOFORK, &va,
@@ -447,10 +452,8 @@ guarded_open_dprotected_np(proc_t p, struct guarded_open_dprotected_np_args *uap
  * Create a guarded kqueue descriptor with guardid and guardflags.
  *
  * Same restrictions on guardflags as for guarded_open_np().
- * All kqueues are -always- close-on-exec and close-on-fork by themselves.
- *
- * XXX Is it ever sensible to allow a kqueue fd (guarded or not) to
- *     be sent to another process via a fileport or socket?
+ * All kqueues are -always- close-on-exec and close-on-fork by themselves
+ * and are not sendable.
  */
 int
 guarded_kqueue_np(proc_t p, struct guarded_kqueue_np_args *uap, int32_t *retval)
@@ -492,7 +495,7 @@ guarded_close_np(proc_t p, struct guarded_close_np_args *uap,
                return (error);
 
        proc_fdlock(p);
-       if ((error = fp_lookup_guarded(p, fd, uguard, &gfp)) != 0) {
+       if ((error = fp_lookup_guarded(p, fd, uguard, &gfp, 1)) != 0) {
                proc_fdunlock(p);
                return (error);
        }
@@ -535,6 +538,9 @@ guarded_close_np(proc_t p, struct guarded_close_np_args *uap,
  * the GUARD_CLOSE flag is being cleared, it is still possible to continue
  * to keep FD_CLOFORK on the descriptor by passing FD_CLOFORK via fdflagsp.
  *
+ * (File descriptors whose underlying fileglobs are marked FG_CONFINED are
+ * still close-on-fork, regardless of the setting of FD_CLOFORK.)
+ *
  * Example 1: Guard an unguarded descriptor during a set of operations,
  * then restore the original state of the descriptor.
  *
@@ -552,14 +558,10 @@ guarded_close_np(proc_t p, struct guarded_close_np_args *uap,
  * // do things with 'fd' with a different guard
  * change_fdguard_np(fd, &myg, GUARD_CLOSE, &gd, gdflags, &sav_flags);
  * // back to original guarded state
+ *
+ * XXX This SPI is too much of a chainsaw and should be revised.
  */
 
-#define FDFLAGS_GET(p, fd) (*fdflags(p, fd) & (UF_EXCLOSE|UF_FORKCLOSE))
-#define FDFLAGS_SET(p, fd, bits) \
-          (*fdflags(p, fd) |= ((bits) & (UF_EXCLOSE|UF_FORKCLOSE)))
-#define FDFLAGS_CLR(p, fd, bits) \
-          (*fdflags(p, fd) &= ~((bits) & (UF_EXCLOSE|UF_FORKCLOSE)))
-
 int
 change_fdguard_np(proc_t p, struct change_fdguard_np_args *uap,
     __unused int32_t *retval)
@@ -620,12 +622,9 @@ restart:
                 */
                if (0 == newg)
                        error = EINVAL; /* guards cannot contain zero */
-               else if (0 == uap->nguardflags)
-                       error = EINVAL; /* attributes cannot be zero */
                else if (((uap->nguardflags & GUARD_REQUIRED) != GUARD_REQUIRED) ||
-                   ((uap->guardflags & ~GUARD_ALL) != 0))
+                   ((uap->nguardflags & ~GUARD_ALL) != 0))
                        error = EINVAL; /* must have valid attributes too */
-            
                if (0 != error)
                        goto dropout;
 
@@ -655,6 +654,7 @@ restart:
                                        FDFLAGS_SET(p, fd, UF_FORKCLOSE);
                                FDFLAGS_SET(p, fd,
                                    (nfdflags & FD_CLOFORK) ? UF_FORKCLOSE : 0);
+                               /* FG_CONFINED enforced regardless */
                        } else {
                                error = EPERM;
                        }
@@ -741,6 +741,7 @@ restart:
                                FDFLAGS_CLR(p, fd, UF_FORKCLOSE | UF_EXCLOSE);
                                FDFLAGS_SET(p, fd,
                                    (nfdflags & FD_CLOFORK) ? UF_FORKCLOSE : 0);
+                               /* FG_CONFINED enforced regardless */
                                FDFLAGS_SET(p, fd,
                                    (nfdflags & FD_CLOEXEC) ? UF_EXCLOSE : 0);
                                (void) fp_drop(p, fd, nfp, 1);
@@ -792,7 +793,7 @@ guarded_write_np(struct proc *p, struct guarded_write_np_args *uap, user_ssize_t
        if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0)
                return (error);
 
-       error = fp_lookup_guarded(p, fd, uguard, &gfp);
+       error = fp_lookup_guarded(p, fd, uguard, &gfp, 0);
        if (error)
                return(error);
 
@@ -837,7 +838,7 @@ guarded_write_np(struct proc *p, struct guarded_write_np_args *uap, user_ssize_t
        if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0)
                return (error);
 
-       error = fp_lookup_guarded(p, fd, uguard, &gfp);
+       error = fp_lookup_guarded(p, fd, uguard, &gfp, 0);
        if (error)
                return(error);
 
@@ -928,12 +929,15 @@ guarded_writev_np(struct proc *p, struct guarded_writev_np_args *uap, user_ssize
        
        /* finalize uio_t for use and do the IO 
         */
-       uio_calculateresid(auio);
+       error = uio_calculateresid(auio);
+       if (error) {
+               goto ExitThisRoutine;
+       }
 
        if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0)
                goto ExitThisRoutine;
 
-       error = fp_lookup_guarded(p, uap->fd, uguard, &gfp);
+       error = fp_lookup_guarded(p, uap->fd, uguard, &gfp, 0);
        if (error)
                goto ExitThisRoutine;
 
index dde93bbce390077794e0f4d2ea94670375ae4e15..3e3443fc3da7c30ada59c31be8b458167c242636 100644 (file)
@@ -53,6 +53,7 @@
 #define REQ_PERIOD              (10)
 #define REQ_ACTIONID            (11)
 #define REQ_SW_INC              (14)
+#define REQ_PMU_VERSION         (15)
 
 /* Type-munging casts */
 typedef int (*getint_t)(void);
@@ -70,11 +71,6 @@ static void           *sysctl_buffer = NULL;
 
 typedef int (*setget_func_t)(int);
 
-/* init our stuff */
-extern void kpc_arch_init(void);
-extern void kpc_common_init(void);
-extern void kpc_thread_init(void); /* osfmk/kern/kpc_thread.c */
-
 void
 kpc_init(void)
 {
@@ -247,6 +243,9 @@ sysctl_kpc_get_config(uint32_t classes, void* buf)
 static int
 sysctl_kpc_set_config(uint32_t classes, void* buf)
 {
+       /* userspace cannot reconfigure the power class */
+       if (classes & KPC_CLASS_POWER_MASK)
+               return (EPERM);
        return kpc_set_config( classes, buf);
 }
 
@@ -259,6 +258,9 @@ sysctl_kpc_get_period(uint32_t classes, void* buf)
 static int
 sysctl_kpc_set_period(uint32_t classes, void* buf)
 {
+       /* userspace cannot reconfigure the power class */
+       if (classes & KPC_CLASS_POWER_MASK)
+               return (EPERM);
        return kpc_set_period( classes, buf);
 }
 
@@ -500,6 +502,10 @@ kpc_sysctl SYSCTL_HANDLER_ARGS
                ret = sysctl_set_int( req, (setget_func_t)kpc_set_sw_inc );
                break;          
 
+       case REQ_PMU_VERSION:
+               ret = sysctl_get_int(oidp, req, kpc_get_pmu_version());
+               break;
+
        default:
                ret = ENOENT;
                break;
@@ -533,6 +539,11 @@ SYSCTL_PROC(_kpc, OID_AUTO, thread_counting,
             (void*)REQ_THREAD_COUNTING, 
             sizeof(int), kpc_sysctl, "I", "Thread accumulation");
 
+SYSCTL_PROC(_kpc, OID_AUTO, pmu_version,
+            CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_ANYBODY,
+            (void *)REQ_PMU_VERSION,
+            sizeof(int), kpc_sysctl, "I", "PMU version for hardware");
+
 /* faux values */
 SYSCTL_PROC(_kpc, OID_AUTO, config_count,
             CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
index 3bbf77a45d61748337700edcfbae97b2bb1a0222..46c4f2e77da4cce533585487ac46fc96ca935db0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/sdt.h>
 #include <kern/task.h>
 
+#include <sys/file_internal.h>
+
 /*
  * This variable controls the maximum number of processes that will
  * be checked in doing deadlock detection.
  */
 static int maxlockdepth = MAXDEPTH;
 
+#if (DEVELOPMENT || DEBUG)
+#define LOCKF_DEBUGGING        1
+#endif
+
 #ifdef LOCKF_DEBUGGING
 #include <sys/sysctl.h>
-#include <ufs/ufs/quota.h>
-#include <ufs/ufs/inode.h>
 void lf_print(const char *tag, struct lockf *lock);
 void lf_printlist(const char *tag, struct lockf *lock);
-static int     lockf_debug = 2;
+
+#define        LF_DBG_LOCKOP   (1 << 0)        /* setlk, getlk, clearlk */
+#define        LF_DBG_LIST     (1 << 1)        /* split, coalesce */
+#define        LF_DBG_IMPINH   (1 << 2)        /* importance inheritance */
+#define        LF_DBG_TRACE    (1 << 3)        /* errors, exit */
+
+static int     lockf_debug = 0;        /* was 2, could be 3 ;-) */
 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
 
 /*
- * If there is no mask bit selector, or there is on, and the selector is
+ * If there is no mask bit selector, or there is one, and the selector is
  * set, then output the debugging diagnostic.
  */
 #define LOCKF_DEBUG(mask, ...)                                 \
@@ -138,6 +148,7 @@ static void  lf_wakelock(struct lockf *, boolean_t);
 static void     lf_hold_assertion(task_t, struct lockf *);
 static void     lf_jump_to_queue_head(struct lockf *, struct lockf *);
 static void     lf_drop_assertion(struct lockf *);
+static void     lf_boost_blocking_proc(struct lockf *, struct lockf *);
 #endif /* IMPORTANCE_INHERITANCE */
 
 /*
@@ -185,7 +196,9 @@ lf_advlock(struct vnop_advlock_args *ap)
        if (*head == (struct lockf *)0) {
                if (ap->a_op != F_SETLK) {
                        fl->l_type = F_UNLCK;
-                       LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: '%s' unlock without lock\n",
+                           vfs_context_proc(context)->p_comm);
                        return (0);
                }
        }
@@ -213,7 +226,8 @@ lf_advlock(struct vnop_advlock_args *ap)
                 * do this because we will use size to force range checks.
                 */
                if ((error = vnode_size(vp, (off_t *)&size, context))) {
-                       LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: vnode_getattr failed: %d\n", error);
                        return (error);
                }
 
@@ -225,22 +239,26 @@ lf_advlock(struct vnop_advlock_args *ap)
                break;
 
        default:
-               LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence);
+               LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
+                   fl->l_whence);
                return (EINVAL);
        }
        if (start < 0) {
-               LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
+               LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
+                   start);
                return (EINVAL);
        }
        if (fl->l_len < 0) {
                if (start == 0) {
-                       LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n");
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: len < 0 & start == 0\n");
                        return (EINVAL);
                }
                end = start - 1;
                start += fl->l_len;
                if (start < 0) {
-                       LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
+                       LOCKF_DEBUG(LF_DBG_TRACE,
+                           "lf_advlock: start < 0 (%qd)\n", start);
                        return (EINVAL);
                }
        } else if (fl->l_len == 0)
@@ -248,7 +266,7 @@ lf_advlock(struct vnop_advlock_args *ap)
        else {
                oadd = fl->l_len - 1;
                if (oadd > (off_t)(OFF_MAX - start)) {
-                       LOCKF_DEBUG(0, "lf_advlock: overflow\n");
+                       LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
                        return (EOVERFLOW);
                }
                end = start + oadd;
@@ -270,7 +288,11 @@ lf_advlock(struct vnop_advlock_args *ap)
        lock->lf_flags = ap->a_flags;
 #if IMPORTANCE_INHERITANCE
        lock->lf_boosted = LF_NOT_BOOSTED;
-#endif /* IMPORTANCE_INHERITANCE */
+#endif
+       if (ap->a_flags & F_POSIX)
+               lock->lf_owner = (struct proc *)lock->lf_id;
+       else
+               lock->lf_owner = NULL;
 
        if (ap->a_flags & F_FLOCK)
                lock->lf_flags |= F_WAKE1_SAFE;
@@ -281,6 +303,19 @@ lf_advlock(struct vnop_advlock_args *ap)
         */
        switch(ap->a_op) {
        case F_SETLK:
+               /*
+                * For F_OFD_* locks, lf_id is the fileglob.
+                * Record an "lf_owner" iff this is a confined fd
+                * i.e. it cannot escape this process and will be
+                * F_UNLCKed before the owner exits.  (This is
+                * the implicit guarantee needed to ensure lf_owner
+                * remains a valid reference here.)
+                */
+               if (ap->a_flags & F_OFD_LOCK) {
+                       struct fileglob *fg = (void *)lock->lf_id;
+                       if (fg->fg_lflags & FG_CONFINED)
+                               lock->lf_owner = current_proc();
+               }
                error = lf_setlock(lock, ap->a_timeout);
                break;
 
@@ -302,7 +337,7 @@ lf_advlock(struct vnop_advlock_args *ap)
        }
        lck_mtx_unlock(&vp->v_lock);    /* done manipulating the list */
 
-       LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
+       LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
        return (error);
 }
 
@@ -389,7 +424,7 @@ lf_coalesce_adjacent(struct lockf *lock)
                    ((*lf)->lf_end + 1) == lock->lf_start) {
                        struct lockf *adjacent = *lf;
 
-                       LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
                        lock->lf_start = (*lf)->lf_start;
                        *lf = lock;
                        lf = &(*lf)->lf_next;
@@ -404,7 +439,7 @@ lf_coalesce_adjacent(struct lockf *lock)
                    (lock->lf_end + 1) == (*lf)->lf_start) {
                        struct lockf *adjacent = *lf;
 
-                       LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
                        lock->lf_end = (*lf)->lf_end;
                        lock->lf_next = (*lf)->lf_next;
                        lf = &lock->lf_next;
@@ -420,7 +455,6 @@ lf_coalesce_adjacent(struct lockf *lock)
        }
 }
 
-
 /*
  * lf_setlock
  *
@@ -457,12 +491,9 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
        int priority, needtolink, error;
        struct vnode *vp = lock->lf_vnode;
        overlap_t ovcase;
-#if IMPORTANCE_INHERITANCE
-       task_t boosting_task, block_task;
-#endif /* IMPORTANCE_INHERITANCE */
 
 #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1) {
+       if (lockf_debug & LF_DBG_LOCKOP) {
                lf_print("lf_setlock", lock);
                lf_printlist("lf_setlock(in)", lock);
        }
@@ -491,7 +522,11 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
                /*
                 * We are blocked. Since flock style locks cover
                 * the whole file, there is no chance for deadlock.
-                * For byte-range locks we must check for deadlock.
+                *
+                * OFD byte-range locks currently do NOT support
+                * deadlock detection.
+                *
+                * For POSIX byte-range locks we must check for deadlock.
                 *
                 * Deadlock detection is done by looking through the
                 * wait channels to see if there are any cycles that
@@ -506,7 +541,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
                        int i = 0;
 
                        /* The block is waiting on something */
-                       wproc = (struct proc *)block->lf_id;
+                       wproc = block->lf_owner;
                        proc_lock(wproc);
                        TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
                                /*
@@ -536,7 +571,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
 
                                        /*
                                         * Make sure it's an advisory range
-                                        * lock and not an overall file lock;
+                                        * lock and not any other kind of lock;
                                         * if we mix lock types, it's our own
                                         * fault.
                                         */
@@ -549,8 +584,8 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
                                         * getting the requested lock, then we
                                         * would deadlock, so error out.
                                         */
-                                       bproc = (struct proc *)waitblock->lf_id;
-                                       if (bproc == (struct proc *)lock->lf_id) {
+                                       bproc = waitblock->lf_owner;
+                                       if (bproc == lock->lf_owner) {
                                                proc_unlock(wproc);
                                                FREE(lock, M_LOCKF);
                                                return (EDEADLK);
@@ -584,43 +619,37 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
                if ( !(lock->lf_flags & F_FLOCK))
                        block->lf_flags &= ~F_WAKE1_SAFE;
 
+#if IMPORTANCE_INHERITANCE
+               /*
+                * Importance donation is done only for cases where the
+                * owning task can be unambiguously determined.
+                *
+                * POSIX type locks are not inherited by child processes;
+                * we maintain a 1:1 mapping between a lock and its owning
+                * process.
+                *
+                * Flock type locks are inherited across fork() and there is
+                * no 1:1 mapping in the general case.  However, the fileglobs
+                * used by OFD locks *may* be confined to the process that
+                * created them, and thus have an "owner", in which case
+                * we also attempt importance donation.
+                */
+               if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0)
+                       lf_boost_blocking_proc(lock, block);
+               else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
+                   lock->lf_owner != block->lf_owner &&
+                   NULL != lock->lf_owner && NULL != block->lf_owner)
+                       lf_boost_blocking_proc(lock, block);
+#endif /* IMPORTANCE_INHERITANCE */
+
 #ifdef LOCKF_DEBUGGING
-               if (lockf_debug & 1) {
+               if (lockf_debug & LF_DBG_LOCKOP) {
                        lf_print("lf_setlock: blocking on", block);
                        lf_printlist("lf_setlock(block)", block);
                }
 #endif /* LOCKF_DEBUGGING */
                DTRACE_FSINFO(advlock__wait, vnode_t, vp);
-#if IMPORTANCE_INHERITANCE
-               /*
-                * Posix type of locks are not inherited by child processes and 
-                * it maintains one to one mapping between lock and its owner, while
-                * Flock type of locks are inherited across forks and it does not
-                * maintian any one to one mapping between the lock and the lock 
-                * owner. Thus importance donation is done only for Posix type of 
-                * locks.
-                */
-               if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) {
-                       block_task = proc_task((proc_t) block->lf_id);
-                       boosting_task = proc_task((proc_t) lock->lf_id);
-
-                       /* Check if current task can donate importance. The 
-                        * check of imp_donor bit is done without holding 
-                        * any lock. The value may change after you read it, 
-                        * but it is ok to boost a task while someone else is 
-                        * unboosting you.
-                        *
-                        * TODO: Support live inheritance on file locks.
-                        */
-                       if (task_is_importance_donor(boosting_task)) {
-                               if (block->lf_boosted != LF_BOOSTED && 
-                                   task_is_importance_receiver_type(block_task)) {
-                                       lf_hold_assertion(block_task, block);
-                               }
-                               lf_jump_to_queue_head(block, lock);
-                       }
-               }
-#endif /* IMPORTANCE_INHERITANCE */
+
                error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
 
                if (error == 0 && (lock->lf_flags & F_ABORT) != 0)
@@ -797,7 +826,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout)
        /* Coalesce adjacent locks with identical attributes */
        lf_coalesce_adjacent(lock);
 #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1) {
+       if (lockf_debug & LF_DBG_LOCKOP) {
                lf_print("lf_setlock: got the lock", lock);
                lf_printlist("lf_setlock(out)", lock);
        }
@@ -835,7 +864,7 @@ lf_clearlock(struct lockf *unlock)
 #ifdef LOCKF_DEBUGGING
        if (unlock->lf_type != F_UNLCK)
                panic("lf_clearlock: bad type");
-       if (lockf_debug & 1)
+       if (lockf_debug & LF_DBG_LOCKOP)
                lf_print("lf_clearlock", unlock);
 #endif /* LOCKF_DEBUGGING */
        prev = head;
@@ -892,7 +921,7 @@ lf_clearlock(struct lockf *unlock)
                break;
        }
 #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1)
+       if (lockf_debug & LF_DBG_LOCKOP)
                lf_printlist("lf_clearlock", unlock);
 #endif /* LOCKF_DEBUGGING */
        return (0);
@@ -927,7 +956,7 @@ lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
        struct lockf *block;
 
 #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 1)
+       if (lockf_debug & LF_DBG_LOCKOP)
                lf_print("lf_getlock", lock);
 #endif /* LOCKF_DEBUGGING */
 
@@ -939,9 +968,13 @@ lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
                        fl->l_len = 0;
                else
                        fl->l_len = block->lf_end - block->lf_start + 1;
-               if (block->lf_flags & F_POSIX)
-                       fl->l_pid = proc_pid((struct proc *)(block->lf_id));
-               else
+               if (NULL != block->lf_owner) {
+                       /*
+                        * lf_owner is only non-NULL when the lock
+                        * "owner" can be unambiguously determined
+                        */
+                       fl->l_pid = proc_pid(block->lf_owner);
+               } else
                        fl->l_pid = -1;
        } else {
                fl->l_type = F_UNLCK;
@@ -977,12 +1010,14 @@ lf_getblock(struct lockf *lock, pid_t matchpid)
                 * Found an overlap.
                 *
                 * If we're matching pids, and it's a record lock,
+                * or it's an OFD lock on a process-confined fd,
                 * but the pid doesn't match, then keep on looking ..
                 */
                if (matchpid != -1 &&
-                   (overlap->lf_flags & F_POSIX) != 0 &&
-                   proc_pid((struct proc *)(overlap->lf_id)) != matchpid)
+                   (overlap->lf_flags & (F_POSIX|F_OFD_LOCK)) != 0 &&
+                   proc_pid(overlap->lf_owner) != matchpid)
                        continue;
+
                /*
                 * does it block us?
                 */
@@ -1048,7 +1083,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
        if (lf == NOLOCKF)
                return (0);
 #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 2)
+       if (lockf_debug & LF_DBG_LIST)
                lf_print("lf_findoverlap: looking for overlap in", lock);
 #endif /* LOCKF_DEBUGGING */
        start = lock->lf_start;
@@ -1079,7 +1114,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                }
 
 #ifdef LOCKF_DEBUGGING
-               if (lockf_debug & 2)
+               if (lockf_debug & LF_DBG_LIST)
                        lf_print("\tchecking", lf);
 #endif /* LOCKF_DEBUGGING */
                /*
@@ -1088,7 +1123,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                if ((lf->lf_end != -1 && start > lf->lf_end) ||
                    (end != -1 && lf->lf_start > end)) {
                        /* Case 0 */
-                       LOCKF_DEBUG(2, "no overlap\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
 
                        /*
                         * NOTE: assumes that locks for the same process are 
@@ -1101,30 +1136,30 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                        continue;
                }
                if ((lf->lf_start == start) && (lf->lf_end == end)) {
-                       LOCKF_DEBUG(2, "overlap == lock\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
                        return (OVERLAP_EQUALS_LOCK);
                }
                if ((lf->lf_start <= start) &&
                    (end != -1) &&
                    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
-                       LOCKF_DEBUG(2, "overlap contains lock\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
                        return (OVERLAP_CONTAINS_LOCK);
                }
                if (start <= lf->lf_start &&
                           (end == -1 ||
                           (lf->lf_end != -1 && end >= lf->lf_end))) {
-                       LOCKF_DEBUG(2, "lock contains overlap\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
                        return (OVERLAP_CONTAINED_BY_LOCK);
                }
                if ((lf->lf_start < start) &&
                        ((lf->lf_end >= start) || (lf->lf_end == -1))) {
-                       LOCKF_DEBUG(2, "overlap starts before lock\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
                        return (OVERLAP_STARTS_BEFORE_LOCK);
                }
                if ((lf->lf_start > start) &&
                        (end != -1) &&
                        ((lf->lf_end > end) || (lf->lf_end == -1))) {
-                       LOCKF_DEBUG(2, "overlap ends after lock\n");
+                       LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
                        return (OVERLAP_ENDS_AFTER_LOCK);
                }
                panic("lf_findoverlap: default");
@@ -1162,13 +1197,13 @@ lf_split(struct lockf *lock1, struct lockf *lock2)
        struct lockf *splitlock;
 
 #ifdef LOCKF_DEBUGGING
-       if (lockf_debug & 2) {
+       if (lockf_debug & LF_DBG_LIST) {
                lf_print("lf_split", lock1);
                lf_print("splitting from", lock2);
        }
 #endif /* LOCKF_DEBUGGING */
        /*
-        * Check to see if spliting into only two pieces.
+        * Check to see if splitting into only two pieces.
         */
        if (lock1->lf_start == lock2->lf_start) {
                lock1->lf_start = lock2->lf_end + 1;
@@ -1236,7 +1271,7 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
 
                wakelock->lf_next = NOLOCKF;
 #ifdef LOCKF_DEBUGGING
-               if (lockf_debug & 2)
+               if (lockf_debug & LF_DBG_LOCKOP)
                        lf_print("lf_wakelock: awakening", wakelock);
 #endif /* LOCKF_DEBUGGING */
                if (wake_all == FALSE) {
@@ -1268,6 +1303,8 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all)
 
 
 #ifdef LOCKF_DEBUGGING
+#define GET_LF_OWNER_PID(lf)   (proc_pid((lf)->lf_owner))
+
 /*
  * lf_print DEBUG
  *
@@ -1284,7 +1321,11 @@ lf_print(const char *tag, struct lockf *lock)
 {
        printf("%s: lock %p for ", tag, (void *)lock);
        if (lock->lf_flags & F_POSIX)
-               printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
+               printf("proc %p (owner %d)",
+                   lock->lf_id, GET_LF_OWNER_PID(lock));
+       else if (lock->lf_flags & F_OFD_LOCK)
+               printf("fg %p (owner %d)",
+                   lock->lf_id, GET_LF_OWNER_PID(lock));
        else
                printf("id %p", (void *)lock->lf_id);
        if (lock->lf_vnode != 0)
@@ -1332,8 +1373,11 @@ lf_printlist(const char *tag, struct lockf *lock)
        for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
                printf("\tlock %p for ",(void *)lf);
                if (lf->lf_flags & F_POSIX)
-                       printf("proc %ld",
-                           (long)((struct proc *)lf->lf_id)->p_pid);
+                       printf("proc %p (owner %d)",
+                           lf->lf_id, GET_LF_OWNER_PID(lf));
+               else if (lf->lf_flags & F_OFD_LOCK)
+                       printf("fg %p (owner %d)",
+                           lf->lf_id, GET_LF_OWNER_PID(lf));
                else
                        printf("id %p", (void *)lf->lf_id);
                printf(", %s, start 0x%016llx, end 0x%016llx",
@@ -1344,8 +1388,11 @@ lf_printlist(const char *tag, struct lockf *lock)
                TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
                        printf("\n\t\tlock request %p for ", (void *)blk);
                        if (blk->lf_flags & F_POSIX)
-                               printf("proc %ld",
-                                   (long)((struct proc *)blk->lf_id)->p_pid);
+                               printf("proc %p (owner %d)",
+                                   blk->lf_id, GET_LF_OWNER_PID(blk));
+                       else if (blk->lf_flags & F_OFD_LOCK)
+                               printf("fg %p (owner %d)",
+                                   blk->lf_id, GET_LF_OWNER_PID(blk));
                        else
                                printf("id %p", (void *)blk->lf_id);
                        printf(", %s, start 0x%016llx, end 0x%016llx",
@@ -1387,6 +1434,9 @@ lf_hold_assertion(task_t block_task, struct lockf *block)
 {
        if (task_importance_hold_file_lock_assertion(block_task, 1)) {
                block->lf_boosted = LF_BOOSTED;
+               LOCKF_DEBUG(LF_DBG_IMPINH,
+                   "lf: importance hold file lock assert on pid %d lock %p\n",
+                   proc_pid(block->lf_owner), block);
        }
 }
 
@@ -1425,11 +1475,39 @@ lf_jump_to_queue_head(struct lockf *block, struct lockf *lock)
 static void 
 lf_drop_assertion(struct lockf *block)
 {
-       task_t current_task;
+       LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
+           proc_pid(block->lf_owner), block);
 
-       current_task = proc_task((proc_t) block->lf_id);
+       task_t current_task = proc_task(block->lf_owner);
        task_importance_drop_file_lock_assertion(current_task, 1);
        block->lf_boosted = LF_NOT_BOOSTED;
 }
 
+static void
+lf_boost_blocking_proc(struct lockf *lock, struct lockf *block)
+{
+       task_t ltask = proc_task(lock->lf_owner);
+       task_t btask = proc_task(block->lf_owner);
+
+       /*
+        * Check if ltask can donate importance. The
+        * check of imp_donor bit is done without holding
+        * any lock. The value may change after you read it,
+        * but it is ok to boost a task while someone else is
+        * unboosting you.
+        *
+        * TODO: Support live inheritance on file locks.
+        */
+       if (task_is_importance_donor(ltask)) {
+               LOCKF_DEBUG(LF_DBG_IMPINH,
+                   "lf: %d: attempt to boost pid %d that holds lock %p\n",
+                   proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
+
+               if (block->lf_boosted != LF_BOOSTED &&
+                   task_is_importance_receiver_type(btask)) {
+                       lf_hold_assertion(btask, block);
+               }
+               lf_jump_to_queue_head(block, lock);
+       }
+}
 #endif /* IMPORTANCE_INHERITANCE */
index 860a232dd41c6ecb3c321f38d917856a4226b4a3..d1adaabece24eb1e8deca7afd26a1a4c0f0d56b3 100644 (file)
@@ -280,7 +280,7 @@ const char *memname[] = {
        "mactemp",      /* 104 M_MACTEMP */
        "sbuf",         /* 105 M_SBUF */
        "extattr",      /* 106 M_EXTATTR */
-       "lctx",         /* 107 M_LCTX */
+       "select",       /* 107 M_SELECT */
 #if TRAFFIC_MGT
        "traffic_mgt",   /* 108 M_TRAFFIC_MGT */
 #else
@@ -317,6 +317,7 @@ const char *memname[] = {
 #endif
        "fdvnodedata"   /* 122 M_FD_VN_DATA */
        "fddirbuf",     /* 123 M_FD_DIRBUF */
+       "netagent",     /* 124 M_NETAGENT */
        ""
 };
 
@@ -484,7 +485,7 @@ struct kmzones {
        { 0,            KMZ_MALLOC, FALSE },            /* 104 M_MACTEMP */
        { 0,            KMZ_MALLOC, FALSE },            /* 105 M_SBUF */
        { 0,            KMZ_MALLOC, FALSE },            /* 106 M_HFS_EXTATTR */
-       { 0,            KMZ_MALLOC, FALSE },            /* 107 M_LCTX */
+       { 0,            KMZ_MALLOC, FALSE },            /* 107 M_SELECT */
        { 0,            KMZ_MALLOC, FALSE },            /* 108 M_TRAFFIC_MGT */
 #if HFS_COMPRESSION
        { SOS(decmpfs_cnode),KMZ_CREATEZONE , FALSE},   /* 109 M_DECMPFS_CNODE */
@@ -514,6 +515,9 @@ struct kmzones {
        { 0,            KMZ_MALLOC, FALSE },            /* 120 M_NECP_SOCKET_POLICY */
        { 0,            KMZ_MALLOC, FALSE },            /* 121 M_NECP_IP_POLICY */
 #endif /* NECP */
+       { 0,            KMZ_MALLOC, FALSE },            /* 122 M_FD_VN_DATA */
+       { 0,            KMZ_MALLOC, FALSE },            /* 123 M_FD_DIRBUF */
+       { 0,            KMZ_MALLOC, FALSE },            /* 124 M_NETAGENT */
 #undef SOS
 #undef SOX
 };
@@ -580,11 +584,28 @@ struct _mhead {
        char    dat[0];
 };
 
+
 void *
-_MALLOC(
+_MALLOC_external(
+       size_t          size,
+       int             type,
+       int             flags);
+void *
+_MALLOC_external(
        size_t          size,
        int             type,
        int             flags)
+{
+    static vm_allocation_site_t site = { VM_KERN_MEMORY_KALLOC, VM_TAG_BT };
+    return (__MALLOC(size, type, flags, &site));
+}
+
+void *
+__MALLOC(
+       size_t          size,
+       int             type,
+       int             flags,
+       vm_allocation_site_t *site)
 {
        struct _mhead   *hdr = NULL;
        size_t          memsize = sizeof (*hdr) + size;
@@ -599,7 +620,7 @@ _MALLOC(
                if (size > memsize)   /* overflow detected */
                        return (NULL);
                else
-                       hdr = (void *)kalloc_noblock(memsize); 
+                       hdr = (void *)kalloc_canblock(memsize, FALSE, site); 
        } else {
                if (size > memsize) {
                        /*
@@ -610,7 +631,7 @@ _MALLOC(
                        panic("_MALLOC: overflow detected, size %llu ", (uint64_t) size);
                }
                else
-                       hdr = (void *)kalloc(memsize);
+                       hdr = (void *)kalloc_canblock(memsize, TRUE, site);
 
               if (hdr == NULL) {
 
@@ -656,11 +677,12 @@ _FREE(
 }
 
 void *
-_REALLOC(
+__REALLOC(
        void            *addr,
        size_t          size,
        int             type,
-       int             flags)
+       int             flags,
+       vm_allocation_site_t *site)
 {
        struct _mhead   *hdr;
        void            *newaddr;
@@ -668,10 +690,10 @@ _REALLOC(
 
        /* realloc(NULL, ...) is equivalent to malloc(...) */
        if (addr == NULL)
-               return (_MALLOC(size, type, flags));
+               return (__MALLOC(size, type, flags, site));
 
        /* Allocate a new, bigger (or smaller) block */
-       if ((newaddr = _MALLOC(size, type, flags)) == NULL)
+       if ((newaddr = __MALLOC(size, type, flags, site)) == NULL)
                return (NULL);
 
        hdr = addr;
@@ -686,10 +708,25 @@ _REALLOC(
 }
 
 void *
-_MALLOC_ZONE(
+_MALLOC_ZONE_external(
+       size_t          size,
+       int             type,
+       int             flags);
+void *
+_MALLOC_ZONE_external(
        size_t          size,
        int             type,
        int             flags)
+{
+    return (__MALLOC_ZONE(size, type, flags, NULL));
+}
+
+void *
+__MALLOC_ZONE(
+       size_t          size,
+       int             type,
+       int             flags,
+       vm_allocation_site_t *site)
 {
        struct kmzones  *kmz;
        void            *elem;
@@ -713,9 +750,9 @@ _MALLOC_ZONE(
                }
        else
                if (flags & M_NOWAIT) {
-                       elem = (void *)kalloc_noblock(size);
+                       elem = (void *)kalloc_canblock(size, FALSE, site);
                } else {
-                       elem = (void *)kalloc(size);
+                       elem = (void *)kalloc_canblock(size, TRUE, site);
                }
 
        return (elem);
index 0d46cec142fde79b1013242f88be94ad37a9a4a0..13dcc2607c11dc729cd648d1e99bd961711a270b 100644 (file)
 #include <kern/thread.h>
 #include <kern/host.h>
 #include <libkern/libkern.h>
+#include <mach/coalition.h>
 #include <mach/mach_time.h>
 #include <mach/task.h>
 #include <mach/host_priv.h>
 #include <mach/mach_host.h>
 #include <pexpert/pexpert.h>
+#include <sys/coalition.h>
 #include <sys/kern_event.h>
 #include <sys/proc.h>
 #include <sys/proc_info.h>
@@ -104,6 +106,70 @@ do {                                              \
 #define MEMORYSTATUS_DEBUG(cond, format, ...)
 #endif
 
+/*
+ * Active / Inactive limit support
+ * proc list must be locked
+ *
+ * The SET_*** macros are used to initialize a limit
+ * for the first time.
+ *
+ * The CACHE_*** macros are use to cache the limit that will
+ * soon be in effect down in the ledgers.
+ */
+
+#define SET_ACTIVE_LIMITS_LOCKED(p, limit, is_fatal)                   \
+MACRO_BEGIN                                                            \
+(p)->p_memstat_memlimit_active = (limit);                              \
+   (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED;   \
+   if (is_fatal) {                                                     \
+          (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL;     \
+   } else {                                                            \
+          (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL;    \
+   }                                                                   \
+MACRO_END
+
+#define SET_INACTIVE_LIMITS_LOCKED(p, limit, is_fatal)                 \
+MACRO_BEGIN                                                            \
+(p)->p_memstat_memlimit_inactive = (limit);                            \
+   (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED; \
+   if (is_fatal) {                                                     \
+          (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL;   \
+   } else {                                                            \
+          (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL;  \
+   }                                                                   \
+MACRO_END
+
+#define CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception)               \
+MACRO_BEGIN                                                            \
+(p)->p_memstat_memlimit = (p)->p_memstat_memlimit_active;              \
+   if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) {       \
+          (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;            \
+   } else {                                                            \
+          (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;           \
+   }                                                                   \
+   if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED) { \
+          trigger_exception = FALSE;                                   \
+   } else {                                                            \
+          trigger_exception = TRUE;                                    \
+   }                                                                   \
+MACRO_END
+
+#define CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception)             \
+MACRO_BEGIN                                                            \
+(p)->p_memstat_memlimit = (p)->p_memstat_memlimit_inactive;            \
+   if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) {     \
+          (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;            \
+   } else {                                                            \
+          (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;           \
+   }                                                                   \
+   if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED) { \
+          trigger_exception = FALSE;                                   \
+   } else {                                                            \
+          trigger_exception = TRUE;                                    \
+   }                                                                   \
+MACRO_END
+
+
 /* General tunables */
 
 unsigned long delta_percentage = 5;
@@ -161,6 +227,7 @@ void memorystatus_send_low_swap_note(void);
 int memorystatus_wakeup = 0;
 
 unsigned int memorystatus_level = 0;
+unsigned int memorystatus_early_boot_level = 0;
 
 static int memorystatus_list_count = 0;
 
@@ -177,6 +244,10 @@ uint64_t memstat_idle_demotion_deadline = 0;
 
 static unsigned int memorystatus_dirty_count = 0;
 
+#if CONFIG_JETSAM
+SYSCTL_INT(_kern, OID_AUTO, max_task_pmem, CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, &max_task_footprint_mb, 0, "");
+#endif // CONFIG_JETSAM
+
 
 int
 memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret)
@@ -201,6 +272,16 @@ static void memorystatus_thread(void *param __unused, wait_result_t wr __unused)
 
 #if CONFIG_JETSAM
 
+static int memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit);
+
+static int memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval);
+
+static int memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry);
+
+static int memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval);
+
+static boolean_t proc_jetsam_state_is_active_locked(proc_t);
+
 int proc_get_memstat_priority(proc_t, boolean_t);
 
 /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */
@@ -208,7 +289,7 @@ int proc_get_memstat_priority(proc_t, boolean_t);
 
 static boolean_t memorystatus_idle_snapshot = 0;
 
-static int memorystatus_highwater_enabled = 1;
+static int memorystatus_highwater_enabled = 1;  /* Update the cached memlimit data. This should be removed. */
 
 unsigned int memorystatus_delta = 0;
 
@@ -216,22 +297,49 @@ static unsigned int memorystatus_available_pages_critical_base = 0;
 //static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1;
 static unsigned int memorystatus_available_pages_critical_idle_offset = 0;
 
+/* Jetsam Loop Detection */
+static boolean_t memorystatus_jld_enabled = TRUE;              /* Enables jetsam loop detection on all devices */
+static uint32_t memorystatus_jld_eval_period_msecs = 0;                /* Init pass sets this based on device memory size */
+static int      memorystatus_jld_eval_aggressive_count = 3;    /* Raise the priority max after 'n' aggressive loops */
+static int      memorystatus_jld_eval_aggressive_priority_band_max = 15;  /* Kill aggressively up through this band */
+
+#if DEVELOPMENT || DEBUG
+/* 
+ * Jetsam Loop Detection tunables.
+ */
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_period_msecs, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_period_msecs, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_count, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_count, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_priority_band_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_priority_band_max, 0, "");
+#endif /* DEVELOPMENT || DEBUG */
+
 #if DEVELOPMENT || DEBUG
 static unsigned int memorystatus_jetsam_panic_debug = 0;
 
 static unsigned int memorystatus_jetsam_policy = kPolicyDefault;
 static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0;
+static unsigned int memorystatus_debug_dump_this_bucket = 0;
 #endif
 
 static unsigned int memorystatus_thread_wasted_wakeup = 0;
 
 static uint32_t kill_under_pressure_cause = 0;
 
+/*
+ * default jetsam snapshot support
+ */
 static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot;
 #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries
-
 static unsigned int memorystatus_jetsam_snapshot_count = 0;
 static unsigned int memorystatus_jetsam_snapshot_max = 0;
+static uint64_t memorystatus_jetsam_snapshot_last_timestamp = 0;
+static uint64_t memorystatus_jetsam_snapshot_timeout = 0;
+#define JETSAM_SNAPSHOT_TIMEOUT_SECS 30
+
+/*
+ * snapshot support for memstats collected at boot.
+ */
+static memorystatus_jetsam_snapshot_t memorystatus_at_boot_snapshot;
 
 static void memorystatus_clear_errors(void);
 static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages);
@@ -240,7 +348,8 @@ static void memorystatus_update_levels_locked(boolean_t critical_only);
 //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured);
 
 static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause);
-static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors);
+static boolean_t memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, int32_t *priority, uint32_t *errors);
+static boolean_t memorystatus_kill_top_process_aggressive(boolean_t any, uint32_t cause, int aggr_count, int32_t priority_max, uint32_t *errors);
 #if LEGACY_HIWATER
 static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors);
 #endif
@@ -248,6 +357,17 @@ static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors);
 static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause);
 static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause);
 
+/* Priority Band Sorting Routines */
+static int  memorystatus_sort_bucket(unsigned int bucket_index, int sort_order);
+static int  memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order);
+static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index);
+static int  memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz);
+
+/* qsort routines */
+typedef int (*cmpfunc_t)(const void *a, const void *b);
+extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
+static int memstat_asc_cmp(const void *a, const void *b);
+
 #endif /* CONFIG_JETSAM */
 
 /* VM pressure */
@@ -300,6 +420,10 @@ static int memorystatus_send_note(int event_code, void *data, size_t data_length
 boolean_t memorystatus_freeze_enabled = FALSE;
 int memorystatus_freeze_wakeup = 0;
 
+lck_grp_attr_t *freezer_lck_grp_attr;
+lck_grp_t *freezer_lck_grp;
+static lck_mtx_t freezer_mutex;
+
 static inline boolean_t memorystatus_can_freeze_processes(void);
 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
 
@@ -313,6 +437,8 @@ static unsigned int memorystatus_freeze_pages_max = 0;
 
 static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
 
+static unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
+
 /* Stats */
 static uint64_t memorystatus_freeze_count = 0;
 static uint64_t memorystatus_freeze_pageouts = 0;
@@ -327,6 +453,10 @@ static uint64_t memorystatus_freeze_throttle_count = 0;
 
 static unsigned int memorystatus_suspended_footprint_total = 0;
 
+extern uint64_t vm_swap_get_free_space(void);
+
+static boolean_t memorystatus_freeze_update_throttle();
+
 #endif /* CONFIG_FREEZE */
 
 /* Debug */
@@ -337,6 +467,89 @@ extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *);
 
 #if CONFIG_JETSAM
 
+static void
+memorystatus_debug_dump_bucket_locked (unsigned int bucket_index)
+{
+       proc_t p = NULL;
+       uint32_t pages = 0;
+       uint32_t pages_in_mb = 0;
+       unsigned int b = bucket_index;
+       boolean_t traverse_all_buckets = FALSE;
+
+        if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
+               traverse_all_buckets = TRUE;
+               b = 0;
+        } else {
+               traverse_all_buckets = FALSE;
+               b = bucket_index;
+       }
+
+       /*
+        * Missing from this dump is the value actually
+        * stored in the ledger... also, format could be better.
+        */
+        printf("memorystatus_debug_dump ***START***\n");
+       printf("bucket [pid] [pages/pages-mb] state [EP / RP] dirty deadline [C-limit / A-limit / IA-limit] name\n");
+       p = memorystatus_get_first_proc_locked(&b, traverse_all_buckets);
+       while (p) {
+               memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
+               pages_in_mb = (pages * 4096) /1024 / 1024;
+                printf("%d     [%d]     [%d/%dMB] 0x%x [%d / %d] 0x%x %lld    [%d%s / %d%s / %d%s] %s\n", 
+                      b, p->p_pid, pages, pages_in_mb,
+                      p->p_memstat_state, p->p_memstat_effectivepriority, p->p_memstat_requestedpriority, p->p_memstat_dirty, p->p_memstat_idledeadline,
+                      p->p_memstat_memlimit,
+                      (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"),
+                      p->p_memstat_memlimit_active, 
+                      (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL ? "F " : "NF"),
+                      p->p_memstat_memlimit_inactive, 
+                      (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL ? "F " : "NF"),
+                      (p->p_comm ? p->p_comm : "unknown"));
+               p = memorystatus_get_next_proc_locked(&b, p, traverse_all_buckets);
+        }
+        printf("memorystatus_debug_dump ***END***\n");
+}
+
+static int
+sysctl_memorystatus_debug_dump_bucket SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg2)
+        int bucket_index = 0;
+        int error;
+       error = SYSCTL_OUT(req, arg1, sizeof(int));
+       if (error || !req->newptr) {
+               return (error);
+       }
+        error = SYSCTL_IN(req, &bucket_index, sizeof(int));
+        if (error || !req->newptr) {
+                return (error);
+        }
+       if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
+               /*
+                * All jetsam buckets will be dumped.
+                */
+        } else {
+               /*
+                * Only a single bucket will be dumped.
+                */
+       }
+
+       proc_list_lock();
+       memorystatus_debug_dump_bucket_locked(bucket_index);
+       proc_list_unlock();
+       memorystatus_debug_dump_this_bucket = bucket_index;
+       return (error);
+}
+
+/*
+ * Debug aid to look at jetsam buckets and proc jetsam fields.
+ *     Use this sysctl to act on a particular jetsam bucket.
+ *     Writing the sysctl triggers the dump.
+ *     Usage: sysctl kern.memorystatus_debug_dump_this_bucket=<bucket_index>
+ */
+
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_debug_dump_this_bucket, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_debug_dump_this_bucket, 0, sysctl_memorystatus_debug_dump_bucket, "I", "");
+
+
 /* Debug aid to aid determination of limit */
 
 static int
@@ -346,7 +559,6 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
        proc_t p;
        unsigned int b = 0;
        int error, enable = 0;
-       int32_t memlimit;
 
        error = SYSCTL_OUT(req, arg1, sizeof(int));
        if (error || !req->newptr) {
@@ -366,25 +578,35 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
 
        p = memorystatus_get_first_proc_locked(&b, TRUE);
        while (p) {
+               boolean_t trigger_exception;
+
                if (enable) {
-                       if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {          
-                               memlimit = -1;
+                       /*
+                        * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore.
+                        * Background limits are described via the inactive limit slots.
+                        */
+
+                       if (proc_jetsam_state_is_active_locked(p) == TRUE) {
+                               CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception);
                        } else {
-                               memlimit = p->p_memstat_memlimit;                               
+                               CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception);
                        }
+
                } else {
-                       memlimit = -1;
-               }
-               task_set_phys_footprint_limit_internal(p->task, (memlimit  > 0) ? memlimit : -1, NULL, TRUE);
-               
-               if (memlimit == -1) {
-                       p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
-               } else {
-                       if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) {
-                               p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
-                       }
+                       /*
+                        * Disabling limits does not touch the stored variants.
+                        * Set the cached limit fields to system_wide defaults.
+                        */
+                       p->p_memstat_memlimit = -1;
+                       p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
+                       trigger_exception = TRUE;
                }
-               
+
+               /*
+                * Enforce the cached limit by writing to the ledger.
+                */
+               task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit: -1, NULL, trigger_exception);
+
                p = memorystatus_get_next_proc_locked(&b, p, TRUE);
        }
        
@@ -393,6 +615,7 @@ sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
        proc_list_unlock();
 
        return 0;
+
 }
 
 SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, "");
@@ -496,6 +719,7 @@ sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
        int error = 0, pid = 0;
        int ret = 0;
        struct knote *kn = NULL;
+       boolean_t found_knote = FALSE;
 
        error = sysctl_handle_int(oidp, &pid, 0, req);
        if (error || !req->newptr)
@@ -514,17 +738,27 @@ sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
         */
 
        memorystatus_klist_lock();
-       kn = vm_find_knote_from_pid(pid, &memorystatus_klist);
-       if (kn) {
-               /*
-                * Forcibly send this pid a "warning" memory pressure notification.
-                */
-               kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
-               KNOTE(&memorystatus_klist, kMemorystatusPressure);
-               ret = 0;
+
+       SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
+               proc_t knote_proc = kn->kn_kq->kq_p;
+               pid_t knote_pid = knote_proc->p_pid;
+
+               if (knote_pid == pid) {
+                       /*
+                        * Forcibly send this pid a "warning" memory pressure notification.
+                        */
+                       kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
+                       found_knote = TRUE;
+               }
+       }
+
+       if (found_knote) {
+               KNOTE(&memorystatus_klist, 0);
+               ret = 0;
        } else {
                ret = vm_dispatch_pressure_note_to_pid(pid, FALSE);
        }
+
        memorystatus_klist_unlock();
 
        return ret;
@@ -539,6 +773,8 @@ SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_
 
 #if CONFIG_FREEZE
 
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
+
 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
 
 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
@@ -559,7 +795,6 @@ static int
 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
 {
 #pragma unused(arg1, arg2)
-
        int error, pid = 0;
        proc_t p;
 
@@ -571,6 +806,14 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
        if (error || !req->newptr)
                return (error);
 
+       if (pid == 2) {
+               vm_pageout_anonymous_pages();
+
+               return 0;
+       }
+
+       lck_mtx_lock(&freezer_mutex);
+
        p = proc_find(pid);
        if (p != NULL) {
                uint32_t purgeable, wired, clean, dirty;
@@ -578,17 +821,42 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
                uint32_t max_pages = 0;
 
                if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
-                       max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
+
+                       unsigned int avail_swap_space = 0; /* in pages. */
+
+                       if (DEFAULT_FREEZER_IS_ACTIVE) {
+                               /*
+                                * Freezer backed by default pager and swap file(s).
+                                */
+                               avail_swap_space = default_pager_swap_pages_free();
+                       } else {
+                               /*
+                                * Freezer backed by the compressor and swap file(s)
+                                * while will hold compressed data.
+                                */
+                               avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64;
+                       }
+
+                       max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max);
+
                } else {
+                       /*
+                        * We only have the compressor without any swap.
+                        */
                        max_pages = UINT32_MAX - 1;
                }
+
                error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
                proc_rele(p);
 
                if (error)
                        error = EIO;
+
+               lck_mtx_unlock(&freezer_mutex);
                return error;
        }
+
+       lck_mtx_unlock(&freezer_mutex);
        return EINVAL;
 }
 
@@ -637,6 +905,64 @@ extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation
                                                   thread_t *new_thread);
 
 #if CONFIG_JETSAM
+/*
+ * Picks the sorting routine for a given jetsam priority band.
+ *
+ * Input:
+ *     bucket_index - jetsam priority band to be sorted.
+ *     sort_order - JETSAM_SORT_xxx from kern_memorystatus.h
+ *             Currently sort_order is only meaningful when handling
+ *             coalitions.
+ *
+ * Return: 
+ *     0     on success
+ *     non-0 on failure
+ */
+static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order)
+{
+       int coal_sort_order;
+
+       /*
+        * Verify the jetsam priority
+        */
+        if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
+               return(EINVAL);
+        }
+
+#if DEVELOPMENT || DEBUG
+        if (sort_order == JETSAM_SORT_DEFAULT) {
+               coal_sort_order = COALITION_SORT_DEFAULT;
+       } else {
+               coal_sort_order = sort_order;           /* only used for testing scenarios */
+       }
+#else
+       /* Verify default */
+        if (sort_order == JETSAM_SORT_DEFAULT) {
+               coal_sort_order = COALITION_SORT_DEFAULT;
+       } else {
+               return(EINVAL);
+       }
+#endif
+
+       proc_list_lock();
+       switch (bucket_index) {
+       case JETSAM_PRIORITY_FOREGROUND:
+               if (memorystatus_sort_by_largest_coalition_locked(bucket_index, coal_sort_order) == 0) {
+                       /*
+                        * Fall back to per process sorting when zero coalitions are found.
+                        */
+                       memorystatus_sort_by_largest_process_locked(bucket_index);
+               }
+               break;
+       default:
+               memorystatus_sort_by_largest_process_locked(bucket_index);
+               break;
+       }
+       proc_list_unlock();
+       
+        return(0);
+}
+
 /*
  * Sort processes by size for a single jetsam bucket.
  */
@@ -644,6 +970,7 @@ extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation
 static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index)
 {
        proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL;
+       proc_t next_p = NULL, prev_max_proc = NULL;
        uint32_t pages = 0, max_pages = 0;
        memstat_bucket_t *current_bucket;
                
@@ -655,48 +982,36 @@ static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_inde
 
        p = TAILQ_FIRST(&current_bucket->list);
 
-       if (p) {
+       while (p) {
                memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
                max_pages = pages;
-               insert_after_proc = NULL;
-
-               p = TAILQ_NEXT(p, p_memstat_list);
-
-restart:
-               while (p) {
-
+               max_proc = p;
+               prev_max_proc = p;
+               
+               while ((next_p = TAILQ_NEXT(p, p_memstat_list)) != NULL) {
+                       /* traversing list until we find next largest process */
+                       p=next_p;
                        memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
-
                        if (pages > max_pages) {
                                max_pages = pages;
                                max_proc = p;
                        }
-                       
-                       p = TAILQ_NEXT(p, p_memstat_list);
                }
 
-               if (max_proc) {
-
+               if (prev_max_proc != max_proc) {
+                       /* found a larger process, place it in the list */
                        TAILQ_REMOVE(&current_bucket->list, max_proc, p_memstat_list);
-
                        if (insert_after_proc == NULL) {
                                TAILQ_INSERT_HEAD(&current_bucket->list, max_proc, p_memstat_list);
                        } else {
                                TAILQ_INSERT_AFTER(&current_bucket->list, insert_after_proc, max_proc, p_memstat_list);
                        }
+                       prev_max_proc = max_proc;
+               }
 
-                       insert_after_proc = max_proc;
-
-                       /* Reset parameters for the new search. */
-                       p = TAILQ_NEXT(max_proc, p_memstat_list);
-                       if (p) {
-                               memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
-                               max_pages = pages;
-                       }
-                       max_proc = NULL;
+               insert_after_proc = max_proc;
 
-                       goto restart; 
-               }
+               p = TAILQ_NEXT(max_proc, p_memstat_list);
        }
 }
 
@@ -774,6 +1089,9 @@ memorystatus_init(void)
        assert(freeze_threshold_percentage < 100);
        
 #if CONFIG_JETSAM
+       /* device tree can request to take snapshots for idle-exit kills by default */
+       PE_get_default("kern.jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot));
+
        memorystatus_delta = delta_percentage * atop_64(max_mem) / 100;
        memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100;
        memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta;
@@ -786,8 +1104,21 @@ memorystatus_init(void)
                panic("Could not allocate memorystatus_jetsam_snapshot");
        }
 
+       nanoseconds_to_absolutetime((uint64_t)JETSAM_SNAPSHOT_TIMEOUT_SECS * NSEC_PER_SEC, &memorystatus_jetsam_snapshot_timeout);
+
+       memset(&memorystatus_at_boot_snapshot, 0, sizeof(memorystatus_jetsam_snapshot_t));
+
        /* No contention at this point */
        memorystatus_update_levels_locked(FALSE);
+
+       /* Jetsam Loop Detection */
+       if (max_mem <= (512 * 1024 * 1024)) {
+               /* 512 MB devices */
+               memorystatus_jld_eval_period_msecs = 8000;      /* 8000 msecs == 8 second window */
+       } else {
+               /* 1GB and larger devices */
+               memorystatus_jld_eval_period_msecs = 6000;      /* 6000 msecs == 6 second window */
+       }
 #endif
        
 #if CONFIG_FREEZE
@@ -852,9 +1183,7 @@ memorystatus_do_kill(proc_t p, uint32_t cause) {
        KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, 
                               victim_pid, cause, vm_page_free_count, error, 0);
 
-       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
-               vm_wake_compactor_swapper();
-       }
+       vm_wake_compactor_swapper();
 
        return (error == 0);
 }
@@ -931,7 +1260,7 @@ memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state)
                present_in_deferred_bucket = TRUE;
        }
 
-       MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", 
+       MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for pid %d (dirty:0x%x, set_state %d, demotions %d).\n", 
            p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions);
 
        assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED);
@@ -959,7 +1288,7 @@ memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state)
                assert(p->p_memstat_idledeadline);
        }
 
-       MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", 
+       MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for pid %d (clear_state %d, demotions %d).\n", 
            p->p_pid, clear_state, memorystatus_scheduled_idle_demotions);
     
  
@@ -1007,7 +1336,7 @@ memorystatus_add(proc_t p, boolean_t locked)
 {
        memstat_bucket_t *bucket;
        
-       MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority);
+       MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding pid %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority);
    
        if (!locked) {
                proc_list_lock();
@@ -1039,6 +1368,14 @@ exit:
        return 0;
 }
 
+/*
+ * Description:
+ *     Moves a process from one jetsam bucket to another.
+ *     which changes the LRU position of the process.
+ *
+ *     Monitors transition between buckets and if necessary
+ *     will update cached memory limits accordingly.
+ */
 static void
 memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert)
 {
@@ -1051,7 +1388,7 @@ memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_inser
                return;
        }
        
-       MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d, inserting at %s\n",
+       MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting pid %d to priority %d, inserting at %s\n",
                           p->p_pid, priority, head_insert ? "head" : "tail");
 
        old_bucket = &memstat_bucket[p->p_memstat_effectivepriority];
@@ -1068,45 +1405,134 @@ memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_inser
        else
                TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list);
        new_bucket->count++;
-       
+
 #if CONFIG_JETSAM
-       if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) {        
+       if (memorystatus_highwater_enabled) {
+               boolean_t trigger_exception;
+
+               /* 
+                * If cached limit data is updated, then the limits
+                * will be enforced by writing to the ledgers.
+                */
+               boolean_t ledger_update_needed = TRUE;
 
                /*
-                * Adjust memory limit based on if the task is going to/from foreground and background.
+                * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore.
+                * Background limits are described via the inactive limit slots.
+                *
+                * Here, we must update the cached memory limit if the task 
+                * is transitioning between:
+                *      active <--> inactive
+                *      FG     <-->       BG
+                * but:
+                *      dirty  <-->    clean   is ignored
+                *
+                * We bypass processes that have opted into dirty tracking because
+                * a move between buckets does not imply a transition between the
+                * dirty <--> clean state.
+                * Setting limits on processes opted into dirty tracking is handled
+                * in memorystatus_dirty_set() where the transition is very clear.
                 */
 
-               if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) ||
-                       ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) {            
-                       int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit;
-                       task_set_phys_footprint_limit_internal(p->task, (memlimit  > 0) ? memlimit : -1, NULL, TRUE);
-       
-                       if (memlimit <= 0) {
-                               p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
-                       } else {
-                               p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
-                       }
+               if (p->p_memstat_dirty & P_DIRTY_TRACK) {
+
+                       ledger_update_needed = FALSE;
+
+               } else if ((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) {
+                       /*
+                        *      inactive --> active
+                        *      BG       -->     FG
+                        *      assign active state
+                        */
+                       CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception);
+
+               } else if ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
+                       /*
+                        *      active --> inactive
+                        *      FG     -->       BG
+                        *      assign inactive state
+                        */
+                       CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception);
+               } else {
+                       /*
+                        * The transition between jetsam priority buckets apparently did
+                        * not affect active/inactive state.
+                        * This is not unusual... especially during startup when
+                        * processes are getting established in their respective bands.
+                        */
+                       ledger_update_needed = FALSE;
+               }
+
+               /*
+                * Enforce the new limits by writing to the ledger
+                */
+               if (ledger_update_needed) {
+                       task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, trigger_exception);
+
+                       MEMORYSTATUS_DEBUG(3, "memorystatus_update_priority_locked: new limit on pid %d (%dMB %s) priority old --> new (%d --> %d) dirty?=0x%x %s\n",
+                                          p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
+                                          (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, priority, p->p_memstat_dirty,
+                                          (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
                }
        }
-#endif
+
+#endif  /* CONFIG_JETSAM */
        
        p->p_memstat_effectivepriority = priority;
        
        memorystatus_check_levels_locked();
 }
 
+/*
+ *
+ * Description: Update the jetsam priority and memory limit attributes for a given process.
+ *
+ * Parameters:
+ *     p       init this process's jetsam information.
+ *     priority          The jetsam priority band
+ *     user_data         user specific data, unused by the kernel
+ *     effective         guards against race if process's update already occurred
+ *     update_memlimit   When true we know this is the init step via the posix_spawn path.
+ *
+ *     memlimit_active   Value in megabytes; The monitored footprint level while the
+ *                       process is active.  Exceeding it may result in termination
+ *                       based on it's associated fatal flag.
+ *
+ *     memlimit_active_is_fatal  When a process is active and exceeds its memory footprint,
+ *                               this describes whether or not it should be immediately fatal.
+ *
+ *     memlimit_inactive Value in megabytes; The monitored footprint level while the
+ *                       process is inactive.  Exceeding it may result in termination
+ *                       based on it's associated fatal flag.
+ *
+ *     memlimit_inactive_is_fatal  When a process is inactive and exceeds its memory footprint,
+ *                                 this describes whether or not it should be immediatly fatal.
+ *
+ *     memlimit_background     This process has a high-water-mark while in the background.
+ *                             No longer meaningful.  Background limits are described via
+ *                             the inactive slots.  Flag is ignored.
+ *
+ *
+ * Returns:     0      Success
+ *             non-0   Failure
+ */
+
 int
-memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit)
+memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit,
+                   int32_t memlimit_active,   boolean_t memlimit_active_is_fatal,
+                    int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal,
+                    __unused boolean_t memlimit_background)
 {
        int ret;
        boolean_t head_insert = false;
        
 #if !CONFIG_JETSAM
-#pragma unused(update_memlimit, memlimit, memlimit_background, is_fatal_limit)
-#endif
+#pragma unused(update_memlimit, memlimit_active, memlimit_inactive)
+#pragma unused(memlimit_active_is_fatal, memlimit_inactive_is_fatal)
+#endif /* !CONFIG_JETSAM */
+
+       MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing pid %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data);
 
-       MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data);
-    
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0);
        
        if (priority == -1) {
@@ -1118,13 +1544,13 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect
        } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) {
                /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */
                priority = JETSAM_PRIORITY_IDLE;
-               head_insert = true;
+               head_insert = TRUE;
        } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) {
                /* Sanity check */
                ret = EINVAL;
                goto out;
        }
-       
+
        proc_list_lock();
        
        assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
@@ -1151,37 +1577,114 @@ memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effect
        
 #if CONFIG_JETSAM
        if (update_memlimit) {
-               p->p_memstat_memlimit = memlimit;
+               boolean_t trigger_exception;
+
+               /*
+                * Posix_spawn'd processes come through this path to instantiate ledger limits.
+                * Forked processes do not come through this path, so no ledger limits exist.
+                * (That's why forked processes can consume unlimited memory.)
+                */
+
+               MEMORYSTATUS_DEBUG(3, "memorystatus_update(enter): pid %d, priority %d, dirty=0x%x, Active(%dMB %s), Inactive(%dMB, %s)\n",
+                                  p->p_pid, priority, p->p_memstat_dirty,
+                                  memlimit_active,   (memlimit_active_is_fatal ? "F " : "NF"),
+                                  memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF"));
+
                if (memlimit_background) {
-                       /* Will be set as priority is updated */
-                       p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND;
 
-                       /* Cannot have a background memory limit and be fatal. */
-                       is_fatal_limit = FALSE;
+                       /*
+                        * With 2-level HWM support, we no longer honor P_MEMSTAT_MEMLIMIT_BACKGROUND.
+                        * Background limits are described via the inactive limit slots.
+                        */
+
+                       // p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND;
 
-               } else {
-                       /* Otherwise, apply now */
-                       if (memorystatus_highwater_enabled) {
-                               task_set_phys_footprint_limit_internal(p->task, (memlimit  > 0) ? memlimit : -1, NULL, TRUE);
+#if DEVELOPMENT || DEBUG
+                       printf("memorystatus_update: WARNING %s[%d] set unused flag P_MEMSTAT_MEMLIMIT_BACKGROUND [A==%dMB %s] [IA==%dMB %s]\n",
+                              (p->p_comm ? p->p_comm : "unknown"), p->p_pid,
+                              memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"),
+                              memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF"));
+#endif /* DEVELOPMENT || DEBUG */
+               }
+
+               if (memlimit_active <= 0) {
+                       /*
+                        * This process will have a system_wide task limit when active.
+                        * System_wide task limit is always fatal.
+                        * It's quite common to see non-fatal flag passed in here.
+                        * It's not an error, we just ignore it.
+                        */
+
+                       /*
+                        * For backward compatibility with some unexplained launchd behavior,
+                        * we allow a zero sized limit.  But we still enforce system_wide limit
+                        * when written to the ledgers.  
+                        */
+
+                       if (memlimit_active < 0) {
+                               memlimit_active = -1;  /* enforces system_wide task limit */
                        }
+                       memlimit_active_is_fatal = TRUE;
                }
-               
-               if (is_fatal_limit || memlimit <= 0) {
-                       p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
-               } else {
-                       p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
+
+               if (memlimit_inactive <= 0) {
+                       /*
+                        * This process will have a system_wide task limit when inactive.
+                        * System_wide task limit is always fatal.
+                        */
+
+                       memlimit_inactive = -1;
+                       memlimit_inactive_is_fatal = TRUE;
                }
-       }
-#endif
 
-       /*
-        * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here.
-        * But, we could be removing it from the bucket.
-        * Check and take appropriate steps if so.
-        */
-       
-       if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
-               
+               /*
+                * Initialize the active limit variants for this process.
+                */
+               SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal);
+
+               /*
+                * Initialize the inactive limit variants for this process.
+                */
+               SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal);
+
+               /*
+                * Initialize the cached limits for target process.
+                * When the target process is dirty tracked, it's typically
+                * in a clean state.  Non dirty tracked processes are
+                * typically active (Foreground or above).
+                * But just in case, we don't make assumptions...
+                */
+
+               if (proc_jetsam_state_is_active_locked(p) == TRUE) {
+                       CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception);
+               } else {
+                       CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception);
+               }
+
+               /*
+                * Enforce the cached limit by writing to the ledger.
+                */
+               if (memorystatus_highwater_enabled) {
+                       /* apply now */
+                       assert(trigger_exception == TRUE);
+                       task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, trigger_exception);
+
+                       MEMORYSTATUS_DEBUG(3, "memorystatus_update: init: limit on pid %d (%dMB %s) targeting priority(%d) dirty?=0x%x %s\n",
+                                          p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
+                                          (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), priority, p->p_memstat_dirty,
+                                          (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
+               }
+       }
+#endif /* CONFIG_JETSAM */
+
+       /*
+        * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here.
+        * But, we could be removing it from the bucket.
+        * Check and take appropriate steps if so.
+        */
+       
+       if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
+               
                memorystatus_invalidate_idle_demotion_locked(p, TRUE);
        }
        
@@ -1202,7 +1705,7 @@ memorystatus_remove(proc_t p, boolean_t locked)
        int ret;
        memstat_bucket_t *bucket;
 
-       MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", p->p_pid);
+       MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing pid %d\n", p->p_pid);
 
        if (!locked) {
                proc_list_lock();
@@ -1252,38 +1755,46 @@ memorystatus_remove(proc_t p, boolean_t locked)
        return ret;
 }
 
-static boolean_t
+/*
+ * Validate dirty tracking flags with process state.
+ *
+ * Return:
+ *     0     on success
+ *     non-0 on failure
+ */
+
+static int
 memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) {
        /* See that the process isn't marked for termination */
        if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) {
-               return FALSE;
+               return EBUSY;
        }
        
        /* Idle exit requires that process be tracked */
        if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) &&
           !(pcontrol & PROC_DIRTY_TRACK)) {
-               return FALSE;           
+               return EINVAL;
        }
 
        /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */
        if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) &&
           !(pcontrol & PROC_DIRTY_TRACK)) {
-               return FALSE;           
+               return EINVAL;
        }
 
        /* Deferral is only relevant if idle exit is specified */
        if ((pcontrol & PROC_DIRTY_DEFER) && 
           !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) {
-               return FALSE;           
+               return EINVAL;
        }
        
-       return TRUE;
+       return(0);
 }
 
 static void
 memorystatus_update_idle_priority_locked(proc_t p) {
        int32_t priority;
-       
+
        MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty);
        
        if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) {
@@ -1320,7 +1831,7 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) {
        boolean_t reschedule = FALSE;
        boolean_t already_deferred = FALSE;
        boolean_t defer_now = FALSE;
-       int ret;
+       int ret = 0;
     
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK),
                p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0);
@@ -1340,10 +1851,10 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) {
                goto exit;
        }
        
-       if (!memorystatus_validate_track_flags(p, pcontrol)) {
-               ret = EINVAL;
+       if ((ret = memorystatus_validate_track_flags(p, pcontrol)) != 0) {
+               /* error  */
                goto exit;
-        }
+       }
 
         old_dirty = p->p_memstat_dirty;
 
@@ -1374,7 +1885,7 @@ memorystatus_dirty_track(proc_t p, uint32_t pcontrol) {
                defer_now = TRUE;
        }
 
-       MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for process %d\n",
+       MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for pid %d\n",
                ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N",
                defer_now ? "Y" : "N",
                p->p_memstat_dirty & P_DIRTY ? "Y" : "N",
@@ -1496,7 +2007,7 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) {
                        memorystatus_dirty_count++;
                        ret = 0;
                } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) {
-                       if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) {
+                       if ((flag == P_DIRTY_SHUTDOWN) && (!(p->p_memstat_dirty & P_DIRTY))) {
                                /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
                                p->p_memstat_dirty |= P_DIRTY_TERMINATED;
                                kill = true;
@@ -1516,7 +2027,7 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) {
        if (ret != 0) {
                goto exit;
        }
-           
+
        if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
                now_dirty = TRUE;
 
@@ -1584,17 +2095,72 @@ memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) {
                                }
                        }
                }
-    
+
                memorystatus_update_idle_priority_locked(p);
+
+#if CONFIG_JETSAM
+               if (memorystatus_highwater_enabled) {
+                       boolean_t trigger_exception;
+                       /* 
+                        * We are in this path because this process transitioned between 
+                        * dirty <--> clean state.  Update the cached memory limits.
+                        */
+
+                       if (proc_jetsam_state_is_active_locked(p) == TRUE) {
+                               /*
+                                * process is dirty
+                                */
+                               CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception);
+                       } else {
+                               /*
+                                * process is clean
+                                */
+                               CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception);
+                       }
+
+                       /*
+                        * Enforce the new limits by writing to the ledger.
+                        *
+                        * This is a hot path and holding the proc_list_lock while writing to the ledgers,
+                        * (where the task lock is taken) is bad.  So, we temporarily drop the proc_list_lock.
+                        * We aren't traversing the jetsam bucket list here, so we should be safe.
+                        * See rdar://21394491.
+                        */
+
+                       if (proc_ref_locked(p) == p) {
+                               int ledger_limit;
+                               if (p->p_memstat_memlimit > 0) {
+                                       ledger_limit = p->p_memstat_memlimit;
+                               } else {
+                                       ledger_limit = -1;
+                               }
+                               proc_list_unlock();
+                               task_set_phys_footprint_limit_internal(p->task, ledger_limit, NULL, trigger_exception);
+                               proc_list_lock();
+                               proc_rele_locked(p);
+
+                               MEMORYSTATUS_DEBUG(3, "memorystatus_dirty_set: new limit on pid %d (%dMB %s) priority(%d) dirty?=0x%x %s\n",
+                                          p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
+                                          (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty,
+                                          (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
+                       }
+
+               }
+#endif /* CONFIG_JETSAM */
        
                /* If the deferral state changed, reschedule the demotion timer */
                if (reschedule) {
                        memorystatus_reschedule_idle_demotion_locked();
                }
        }
-               
+
        if (kill) {
-               psignal(p, SIGKILL);
+               if (proc_ref_locked(p) == p) {
+                       proc_list_unlock();
+                       psignal(p, SIGKILL);
+                       proc_list_lock();
+                       proc_rele_locked(p);
+               }
        }
        
 exit:
@@ -1868,10 +2434,25 @@ static void
 memorystatus_thread(void *param __unused, wait_result_t wr __unused)
 {
        static boolean_t is_vm_privileged = FALSE;
+
 #if CONFIG_JETSAM
        boolean_t post_snapshot = FALSE;
        uint32_t errors = 0;
        uint32_t hwm_kill = 0;
+       boolean_t sort_flag = TRUE;
+
+        /* Jetsam Loop Detection - locals */
+       memstat_bucket_t *bucket;
+       int             jld_bucket_count = 0;
+       struct timeval  jld_now_tstamp = {0,0};
+       uint64_t        jld_now_msecs = 0;
+
+       /* Jetsam Loop Detection - statics */
+       static uint64_t  jld_timestamp_msecs = 0;
+       static int       jld_idle_kill_candidates = 0;  /* Number of available processes in band 0,1 at start */
+       static int       jld_idle_kills = 0;            /* Number of procs killed during eval period  */
+       static int       jld_eval_aggressive_count = 0;         /* Bumps the max priority in aggressive loop */
+       static int32_t   jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT;
 #endif
 
        if (is_vm_privileged == FALSE) {
@@ -1882,13 +2463,16 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused)
                thread_wire(host_priv_self(), current_thread(), TRUE);
                is_vm_privileged = TRUE;
                
+               if (vm_restricted_to_single_processor == TRUE)
+                       thread_vm_bind_group_add();
+
                memorystatus_thread_block(0, memorystatus_thread);
        }
        
 #if CONFIG_JETSAM
        
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START,
-               memorystatus_available_pages, 0, 0, 0, 0);
+                             memorystatus_available_pages, memorystatus_jld_enabled, memorystatus_jld_eval_period_msecs, memorystatus_jld_eval_aggressive_count,0);
 
        /*
         * Jetsam aware version.
@@ -1942,14 +2526,104 @@ memorystatus_thread(void *param __unused, wait_result_t wr __unused)
                        break;
                }
 #endif
+               if (memorystatus_jld_enabled == TRUE) {
+
+                       /*
+                        * Jetsam Loop Detection: attempt to detect
+                        * rapid daemon relaunches in the lower bands.
+                        */
+                       
+                       microuptime(&jld_now_tstamp);
+
+                       /*
+                        * Ignore usecs in this calculation.
+                        * msecs granularity is close enough.
+                        */
+                       jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
+
+                       proc_list_lock();
+                       bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
+                       jld_bucket_count = bucket->count;
+                       bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED];
+                       jld_bucket_count += bucket->count;
+                       proc_list_unlock();
+
+                       /*
+                        * memorystatus_jld_eval_period_msecs is a tunable
+                        * memorystatus_jld_eval_aggressive_count is a tunable
+                        * memorystatus_jld_eval_aggressive_priority_band_max is a tunable
+                        */
+                       if ( (jld_bucket_count == 0) || 
+                            (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
+
+                               /* 
+                                * Refresh evaluation parameters 
+                                */
+                               jld_timestamp_msecs      = jld_now_msecs;
+                               jld_idle_kill_candidates = jld_bucket_count;
+                               jld_idle_kills           = 0;
+                               jld_eval_aggressive_count = 0;
+                               jld_priority_band_max   = JETSAM_PRIORITY_UI_SUPPORT;
+                       }
+
+                       if (jld_idle_kills > jld_idle_kill_candidates) {
+                               jld_eval_aggressive_count++;
+                               if (jld_eval_aggressive_count > memorystatus_jld_eval_aggressive_count) {
+                                       /* 
+                                        * Bump up the jetsam priority limit (eg: the bucket index)
+                                        * Enforce bucket index sanity.
+                                        */
+                                       if ((memorystatus_jld_eval_aggressive_priority_band_max < 0) || 
+                                           (memorystatus_jld_eval_aggressive_priority_band_max >= MEMSTAT_BUCKET_COUNT)) {
+                                               /*
+                                                * Do nothing.  Stick with the default level.
+                                                */
+                                       } else {
+                                               jld_priority_band_max = memorystatus_jld_eval_aggressive_priority_band_max;
+                                       }
+                               }
+
+                               killed = memorystatus_kill_top_process_aggressive(
+                                       TRUE, 
+                                       kMemorystatusKilledVMThrashing,
+                                       jld_eval_aggressive_count, 
+                                       jld_priority_band_max, 
+                                       &errors);
+
+                                       
+                               if (killed) {
+                                       /* Always generate logs after aggressive kill */
+                                       post_snapshot = TRUE;
+                                       goto done;
+                               } 
+                       } 
+               }
                
                /* LRU */
-               killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors);
+               killed = memorystatus_kill_top_process(TRUE, sort_flag, cause, &priority, &errors);
+               sort_flag = FALSE;
+
                if (killed) {
-                       /* Don't generate logs for steady-state idle-exit kills (unless overridden for debug) */
+                       /*
+                        * Don't generate logs for steady-state idle-exit kills,
+                        * unless it is overridden for debug or by the device
+                        * tree.
+                        */
                        if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) {
                                post_snapshot = TRUE;
                        }
+
+                       /* Jetsam Loop Detection */
+                       if (memorystatus_jld_enabled == TRUE) {
+                               if ((priority == JETSAM_PRIORITY_IDLE) || (priority == JETSAM_PRIORITY_IDLE_DEFERRED)) {
+                                       jld_idle_kills++;
+                               } else {
+                                       /*
+                                        * We've reached into bands beyond idle deferred.
+                                        * We make no attempt to monitor them
+                                        */
+                               }
+                       }
                        goto done;
                }
                
@@ -1991,10 +2665,19 @@ done:
        if (post_snapshot) {
                size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
                        sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
-               memorystatus_jetsam_snapshot->notification_time = mach_absolute_time();
-               memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
+               uint64_t timestamp_now = mach_absolute_time();
+               memorystatus_jetsam_snapshot->notification_time = timestamp_now;
+               if (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
+                               timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout) {
+                       int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
+                       if (!ret) {
+                               proc_list_lock();
+                               memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
+                               proc_list_unlock();
+                       }
+               }
        }
-       
+
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END,
                memorystatus_available_pages, 0, 0, 0, 0);
 
@@ -2034,24 +2717,58 @@ boolean_t memorystatus_idle_exit_from_VM(void) {
 void
 memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb)
 {
+       boolean_t is_active;
+       boolean_t is_fatal;
+
        proc_t p = current_proc();
 
-    if (warning == FALSE) {
-               printf("process %d (%s) exceeded physical memory footprint limit of %d MB\n",
-                      p->p_pid, p->p_comm, max_footprint_mb);
+       proc_list_lock();
+
+       is_active = proc_jetsam_state_is_active_locked(p);
+       is_fatal = (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT);
+
+       if (warning == FALSE) {
+               /*
+                * We only want the EXC_RESOURCE to trigger once per lifetime
+                * of the active/inactive limit state. So, here, we detect the
+                * active/inactive state of the process and mark the
+                * state as exception has been triggered.
+                */
+               if (is_active == TRUE) {
+                       /*
+                        * turn off exceptions for active state
+                        */
+                       p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED;
+               } else {
+                       /*
+                        * turn off exceptions for inactive state
+                        */
+                       p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED;
+               }
+
+               /*
+                * Soft memory limit is a non-fatal high-water-mark
+                * Hard memory limit is a fatal custom-task-limit or system-wide per-task memory limit.
+                */
+               printf("process %d (%s) exceeded physical memory footprint, the %s%sMemoryLimit of %d MB\n",
+                      p->p_pid, p->p_comm, (is_active ? "Active" : "Inactive"),
+                      (is_fatal  ? "Hard" : "Soft"), max_footprint_mb);
+
        }
 
+       proc_list_unlock();
+
 #if VM_PRESSURE_EVENTS
        if (warning == TRUE) {
                if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) {
                        /* Print warning, since it's possible that task has not registered for pressure notifications */
-                       printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n");                     
+                       printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n");
                }
                return;
        }
 #endif /* VM_PRESSURE_EVENTS */
 
-       if ((p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT) == P_MEMSTAT_FATAL_MEMLIMIT) {
+       if (is_fatal) {
                /*
                 * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task
                 * has violated either the system-wide per-task memory limit OR its own task limit.
@@ -2068,6 +2785,32 @@ memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footp
        }
 }
 
+/*
+ * Toggle the P_MEMSTAT_TERMINATED state.
+ * Takes the proc_list_lock.
+ */
+void
+proc_memstat_terminated(proc_t p, boolean_t set)
+{
+#if DEVELOPMENT || DEBUG
+       if (p) {
+               proc_list_lock();
+               if (set == TRUE) {
+                       p->p_memstat_state |= P_MEMSTAT_TERMINATED;
+               } else {
+                       p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
+               }
+               proc_list_unlock();
+       }
+#else
+#pragma unused(p, set)
+       /*
+        * do nothing
+        */
+#endif /* DEVELOPMENT || DEBUG */
+       return;
+}
+
 /*
  * This is invoked when cpulimits have been exceeded while in fatal mode.
  * The jetsam_flags do not apply as those are for memory related kills.
@@ -2109,9 +2852,8 @@ memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *ma
        }
 }
 
-
 static void
-memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause)
+memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause)
 {
        unsigned int i;
 
@@ -2170,7 +2912,7 @@ void memorystatus_pages_update(unsigned int pages_avail)
 }
 
 static boolean_t
-memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry)
+memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry)
 {      
        clock_sec_t                     tv_sec;
        clock_usec_t                    tv_usec;
@@ -2194,56 +2936,100 @@ memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jets
 }
 
 static void
-memorystatus_jetsam_snapshot_procs_locked(void)
+memorystatus_init_snapshot_vmstats(memorystatus_jetsam_snapshot_t *snapshot)
 {
-       proc_t p, next_p;
-       unsigned int b = 0, i = 0;
        kern_return_t kr = KERN_SUCCESS;
-
        mach_msg_type_number_t  count = HOST_VM_INFO64_COUNT;
        vm_statistics64_data_t  vm_stat;
 
        if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) {
-               printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr);
-               memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats));
+               printf("memorystatus_init_jetsam_snapshot_stats: host_statistics64 failed with %d\n", kr);
+               memset(&snapshot->stats, 0, sizeof(snapshot->stats));
        } else {
-               memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count;
-               memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count;
-               memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count;
-               memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count;
-               memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count;
-               memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count;
-               
-               memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count;
-               memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count;
-               memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count;
-               memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions;
-               memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions;
-               memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count;
-               memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor;
+               snapshot->stats.free_pages      = vm_stat.free_count;
+               snapshot->stats.active_pages    = vm_stat.active_count;
+               snapshot->stats.inactive_pages  = vm_stat.inactive_count;
+               snapshot->stats.throttled_pages = vm_stat.throttled_count;
+               snapshot->stats.purgeable_pages = vm_stat.purgeable_count;
+               snapshot->stats.wired_pages     = vm_stat.wire_count;
+
+               snapshot->stats.speculative_pages = vm_stat.speculative_count;
+               snapshot->stats.filebacked_pages  = vm_stat.external_page_count;
+               snapshot->stats.anonymous_pages   = vm_stat.internal_page_count;
+               snapshot->stats.compressions      = vm_stat.compressions;
+               snapshot->stats.decompressions    = vm_stat.decompressions;
+               snapshot->stats.compressor_pages  = vm_stat.compressor_page_count;
+               snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor;
+       }
+}
+
+/*
+ * Collect vm statistics at boot.
+ * Called only once (see kern_exec.c)
+ * Data can be consumed at any time.
+ */
+void
+memorystatus_init_at_boot_snapshot() {
+       memorystatus_init_snapshot_vmstats(&memorystatus_at_boot_snapshot);
+       memorystatus_at_boot_snapshot.entry_count = 0;
+       memorystatus_at_boot_snapshot.notification_time = 0;   /* updated when consumed */
+       memorystatus_at_boot_snapshot.snapshot_time = mach_absolute_time();
+}
+
+static void
+memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count )
+{
+       proc_t p, next_p;
+       unsigned int b = 0, i = 0;
+
+       memorystatus_jetsam_snapshot_t *snapshot = NULL;
+       memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL;
+       unsigned int snapshot_max = 0;
+
+       if (od_snapshot) {
+               /*
+                * This is an on_demand snapshot
+                */
+               snapshot      = od_snapshot;
+               snapshot_list = od_snapshot->entries;
+               snapshot_max  = ods_list_count;
+       } else {
+               /*
+                * This is a jetsam event snapshot
+                */
+               snapshot      = memorystatus_jetsam_snapshot;
+               snapshot_list = memorystatus_jetsam_snapshot->entries;
+               snapshot_max  = memorystatus_jetsam_snapshot_max;
        }
 
+       memorystatus_init_snapshot_vmstats(snapshot);
+
        next_p = memorystatus_get_first_proc_locked(&b, TRUE);
        while (next_p) {
                p = next_p;
                next_p = memorystatus_get_next_proc_locked(&b, p, TRUE);
                
-               if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) {
+               if (FALSE == memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[i])) {
                        continue;
                }
                
-               MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+               MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                        p->p_pid, 
                        p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7],
                        p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]);
 
-               if (++i == memorystatus_jetsam_snapshot_max) {
+               if (++i == snapshot_max) {
                        break;
                }       
        }
 
-       memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time();
-       memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i;
+       snapshot->snapshot_time = mach_absolute_time();
+       snapshot->entry_count = i;
+
+       if (!od_snapshot) {
+               /* update the system buffer count */
+               memorystatus_jetsam_snapshot_count = i;
+       }
 }
 
 #if DEVELOPMENT || DEBUG
@@ -2272,6 +3058,30 @@ memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) {
        return ret;
 }
 
+/*
+ * Triggers a sort_order on a specified jetsam priority band.
+ * This is for testing only, used to force a path through the sort
+ * function.
+ */
+static int
+memorystatus_cmd_test_jetsam_sort(int priority, int sort_order) {
+
+       int error = 0;
+
+       unsigned int bucket_index = 0;
+
+       if (priority == -1) {
+               /* Use as shorthand for default priority */
+               bucket_index = JETSAM_PRIORITY_DEFAULT;
+       } else {
+               bucket_index = (unsigned int)priority;
+       }
+
+       error = memorystatus_sort_bucket(bucket_index, sort_order);
+
+       return (error);
+}
+
 #endif
 
 /*
@@ -2289,17 +3099,17 @@ memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) {
                return FALSE;
        }
 
-       printf("memorystatus: specifically killing pid %d [%s] (%s) - memorystatus_available_pages: %d\n", 
+       printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n", 
                victim_pid, (p->p_comm ? p->p_comm : "(unknown)"),
-               jetsam_kill_cause_name[cause], memorystatus_available_pages);
+              jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages);
 
        proc_list_lock();
 
        if (memorystatus_jetsam_snapshot_count == 0) {
-               memorystatus_jetsam_snapshot_procs_locked();
+               memorystatus_init_jetsam_snapshot_locked(NULL,0);
        }
 
-       memorystatus_update_snapshot_locked(p, cause);
+       memorystatus_update_jetsam_snapshot_entry_locked(p, cause);
        proc_list_unlock();
        
        killed = memorystatus_do_kill(p, cause);
@@ -2312,12 +3122,14 @@ memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) {
  * Jetsam the first process in the queue.
  */
 static boolean_t
-memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors)
+memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, int32_t *priority, uint32_t *errors)
 {
        pid_t aPid;
        proc_t p = PROC_NULL, next_p = PROC_NULL;
        boolean_t new_snapshot = FALSE, killed = FALSE;
+       int kill_count = 0;
        unsigned int i = 0;
+       uint32_t aPid_ep;
 
 #ifndef CONFIG_FREEZE
 #pragma unused(any)
@@ -2326,9 +3138,12 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
                memorystatus_available_pages, 0, 0, 0, 0);
 
-       proc_list_lock();
 
-       memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND);
+       if (sort_flag == TRUE) {
+               (void)memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT);
+       }
+
+       proc_list_lock();
 
        next_p = memorystatus_get_first_proc_locked(&i, TRUE);
        while (next_p) {
@@ -2346,6 +3161,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
 #endif /* DEVELOPMENT || DEBUG */
                
                aPid = p->p_pid;
+               aPid_ep = p->p_memstat_effectivepriority;
 
                if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
                        continue;
@@ -2388,10 +3204,6 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
                } else
 #endif
                {
-                       if (priority) {
-                               *priority = p->p_memstat_effectivepriority;
-                       }
-                       
                        /*
                         * Capture a snapshot if none exists and:
                         * - priority was not requested (this is something other than an ambient kill)
@@ -2399,7 +3211,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
                         */
                        if ((memorystatus_jetsam_snapshot_count == 0) && 
                                (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) {
-                               memorystatus_jetsam_snapshot_procs_locked();
+                               memorystatus_init_jetsam_snapshot_locked(NULL,0);
                                new_snapshot = TRUE;
                        }
                        
@@ -2415,7 +3227,7 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
                        if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) {
                                MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
                                        aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level);
-                               memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic);
+                               memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledDiagnostic);
                                p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED;
                                if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) {
                                        jetsam_diagnostic_suspended_one_active_proc = 1;
@@ -2426,6 +3238,9 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
                                proc_list_unlock();
                                if (p) {
                                        task_suspend(p->task);
+                                       if (priority) {
+                                               *priority = aPid_ep;
+                                       }
                                        proc_rele(p);
                                        killed = TRUE;
                                }
@@ -2435,31 +3250,51 @@ memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority,
 #endif /* DEVELOPMENT || DEBUG */
                        {
                                /* Shift queue, update stats */
-                               memorystatus_update_snapshot_locked(p, cause);
-                               
-                               p = proc_ref_locked(p);
-                               proc_list_unlock();
-                               if (p) {
-                                       printf("memorystatus: %s %d [%s] (%s) - memorystatus_available_pages: %d\n",
-                                           ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) ?
+                               memorystatus_update_jetsam_snapshot_entry_locked(p, cause);
+
+                               if (proc_ref_locked(p) == p) {
+                                       proc_list_unlock();
+                                       printf("memorystatus: %s %d [%s] (%s %d) - memorystatus_available_pages: %d\n",
+                                           ((aPid_ep == JETSAM_PRIORITY_IDLE) ?
                                            "idle exiting pid" : "jetsam killing pid"),
                                            aPid, (p->p_comm ? p->p_comm : "(unknown)"),
-                                           jetsam_kill_cause_name[cause], memorystatus_available_pages);
+                                              jetsam_kill_cause_name[cause], aPid_ep, memorystatus_available_pages);
+
                                        killed = memorystatus_do_kill(p, cause);
-                               }
+
+                                       /* Success? */
+                                       if (killed) {
+                                               if (priority) {
+                                                       *priority = aPid_ep;
+                                               }
+                                               proc_rele(p);
+                                               kill_count++;
+                                               goto exit;
+                                       }
                                
-                               /* Success? */
-                               if (killed) {
-                                       proc_rele(p);
-                                       goto exit;
+                                       /*
+                                        * Failure - first unwind the state,
+                                        * then fall through to restart the search.
+                                        */
+                                       proc_list_lock();
+                                       proc_rele_locked(p);
+                                       p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
+                                       p->p_memstat_state |= P_MEMSTAT_ERROR;
+                                       *errors += 1;
                                }
                                
-                               /* Failure - unwind and restart. */
-                               proc_list_lock();
-                               proc_rele_locked(p);
-                               p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
-                               p->p_memstat_state |= P_MEMSTAT_ERROR;
-                               *errors += 1;
+                               /*
+                                * Failure - restart the search.
+                                *
+                                * We might have raced with "p" exiting on another core, resulting in no
+                                * ref on "p".  Or, we may have failed to kill "p".
+                                *
+                                * Either way, we fall thru to here, leaving the proc in the
+                                * P_MEMSTAT_TERMINATED state.
+                                *
+                                * And, we hold the the proc_list_lock at this point.
+                                */
+
                                i = 0;
                                next_p = memorystatus_get_first_proc_locked(&i, TRUE);
                        }
@@ -2475,36 +3310,238 @@ exit:
        }
        
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
-           memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0);
+                             memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0);
 
        return killed;
 }
 
-#if LEGACY_HIWATER
-
+/*
+ * Jetsam aggressively 
+ */
 static boolean_t
-memorystatus_kill_hiwat_proc(uint32_t *errors)
+memorystatus_kill_top_process_aggressive(boolean_t any, uint32_t cause, int aggr_count, int32_t priority_max, 
+                                        uint32_t *errors)
 {
-       pid_t aPid = 0;
+       pid_t aPid;
        proc_t p = PROC_NULL, next_p = PROC_NULL;
        boolean_t new_snapshot = FALSE, killed = FALSE;
+       int kill_count = 0;
        unsigned int i = 0;
-       
-       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START,
-               memorystatus_available_pages, 0, 0, 0, 0);
-       
+       int32_t aPid_ep = 0;
+
+#pragma unused(any)
+
+       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
+               memorystatus_available_pages, priority_max, 0, 0, 0);
+
        proc_list_lock();
-       memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND);
-       
+
        next_p = memorystatus_get_first_proc_locked(&i, TRUE);
        while (next_p) {
-               uint32_t footprint;
-               boolean_t skip;
+#if DEVELOPMENT || DEBUG
+               int activeProcess;
+               int procSuspendedForDiagnosis;
+#endif /* DEVELOPMENT || DEBUG */
 
-               p = next_p;
-               next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
-               
+               if ((unsigned int)(next_p->p_memstat_effectivepriority) != i) {
+
+                       /*
+                        * We have raced with next_p running on another core, as it has
+                        * moved to a different jetsam priority band.  This means we have
+                        * lost our place in line while traversing the jetsam list.  We
+                        * attempt to recover by rewinding to the beginning of the band
+                        * we were already traversing.  By doing this, we do not guarantee
+                        * that no process escapes this aggressive march, but we can make
+                        * skipping an entire range of processes less likely. (PR-21069019)
+                        */
+
+                       MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: rewinding %s moved from band %d --> %d\n",
+                              aggr_count, next_p->p_comm, i, next_p->p_memstat_effectivepriority);
+
+                       next_p = memorystatus_get_first_proc_locked(&i, TRUE);
+                       continue;
+               }
+
+               p = next_p;
+               next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
+
+               if (p->p_memstat_effectivepriority > priority_max) {
+                       /* 
+                        * Bail out of this killing spree if we have
+                        * reached beyond the priority_max jetsam band.
+                        * That is, we kill up to and through the 
+                        * priority_max jetsam band.
+                        */
+                       proc_list_unlock();
+                       goto exit;
+               }
+               
+#if DEVELOPMENT || DEBUG
+               activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND;
+               procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED;
+#endif /* DEVELOPMENT || DEBUG */
+               
+               aPid = p->p_pid;
+               aPid_ep = p->p_memstat_effectivepriority;
+
+               if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
+                       continue;
+               }
+                   
+#if DEVELOPMENT || DEBUG
+               if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) {
+                       printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid);
+                       continue;
+               }
+#endif /* DEVELOPMENT || DEBUG */
+
+               /*
+                * Capture a snapshot if none exists.
+                */
+               if (memorystatus_jetsam_snapshot_count == 0) {
+                       memorystatus_init_jetsam_snapshot_locked(NULL,0);
+                       new_snapshot = TRUE;
+               }
+                       
+               /* 
+                * Mark as terminated so that if exit1() indicates success, but the process (for example)
+                * is blocked in task_exception_notify(), it'll be skipped if encountered again - see 
+                * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the 
+                * acquisition of the proc lock.
+                */
+               p->p_memstat_state |= P_MEMSTAT_TERMINATED;
+                       
+               /* Shift queue, update stats */
+               memorystatus_update_jetsam_snapshot_entry_locked(p, cause);
+
+               /*
+                * In order to kill the target process, we will drop the proc_list_lock.
+                * To guaranteee that p and next_p don't disappear out from under the lock,
+                * we must take a ref on both.
+                * If we cannot get a reference, then it's likely we've raced with
+                * that process exiting on another core.
+                */
+               if (proc_ref_locked(p) == p) {
+                       if (next_p) {
+                               while (next_p && (proc_ref_locked(next_p) != next_p)) {
+                                       proc_t temp_p;
+
+                                        /*
+                                         * We must have raced with next_p exiting on another core.
+                                         * Recover by getting the next eligible process in the band.
+                                         */
+
+                                       MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: skipping %d [%s] (exiting?)\n",
+                                              aggr_count, next_p->p_pid, (next_p->p_comm ? next_p->p_comm : "(unknown)"));
+
+                                       temp_p = next_p;
+                                       next_p = memorystatus_get_next_proc_locked(&i, temp_p, TRUE);
+                                }
+                       }
+                       proc_list_unlock();
+
+                       printf("memorystatus: aggressive%d: %s %d [%s] (%s %d) - memorystatus_available_pages: %d\n",
+                              aggr_count,
+                              ((aPid_ep == JETSAM_PRIORITY_IDLE) ? "idle exiting pid" : "jetsam killing pid"),
+                              aPid, (p->p_comm ? p->p_comm : "(unknown)"),
+                              jetsam_kill_cause_name[cause], aPid_ep, memorystatus_available_pages);
+
+                       killed = memorystatus_do_kill(p, cause);
+                               
+                       /* Success? */
+                       if (killed) {
+                               proc_rele(p);
+                               kill_count++;
+                               p = NULL;
+                               killed = FALSE;
+
+                               /* 
+                                * Continue the killing spree.
+                                */
+                               proc_list_lock();
+                               if (next_p) {
+                                       proc_rele_locked(next_p);
+                               }
+                               continue;
+                       }
+                                       
+                       /*
+                        * Failure - first unwind the state,
+                        * then fall through to restart the search.
+                        */
+                       proc_list_lock();
+                       proc_rele_locked(p);
+                       if (next_p) {
+                               proc_rele_locked(next_p);
+                       }
+                       p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
+                       p->p_memstat_state |= P_MEMSTAT_ERROR;
+                       *errors += 1;
+                       p = NULL;
+               }
+
+               /*
+                * Failure - restart the search at the beginning of
+                * the band we were already traversing.
+                *
+                * We might have raced with "p" exiting on another core, resulting in no
+                * ref on "p".  Or, we may have failed to kill "p".
+                *
+                * Either way, we fall thru to here, leaving the proc in the 
+                * P_MEMSTAT_TERMINATED or P_MEMSTAT_ERROR state.
+                *
+                * And, we hold the the proc_list_lock at this point.
+                */
+
+               next_p = memorystatus_get_first_proc_locked(&i, TRUE);
+       }
+       
+       proc_list_unlock();
+       
+exit:
+       /* Clear snapshot if freshly captured and no target was found */
+       if (new_snapshot && (kill_count == 0)) {
+           memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
+       }
+       
+       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
+                             memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0);
+
+       if (kill_count > 0) {
+               return(TRUE);
+       }
+       else {
+               return(FALSE);
+       }
+}
+
+#if LEGACY_HIWATER
+
+static boolean_t
+memorystatus_kill_hiwat_proc(uint32_t *errors)
+{
+       pid_t aPid = 0;
+       proc_t p = PROC_NULL, next_p = PROC_NULL;
+       boolean_t new_snapshot = FALSE, killed = FALSE;
+       int kill_count = 0;
+       unsigned int i = 0;
+       uint32_t aPid_ep;
+       
+       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START,
+               memorystatus_available_pages, 0, 0, 0, 0);
+       
+       proc_list_lock();
+       
+       next_p = memorystatus_get_first_proc_locked(&i, TRUE);
+       while (next_p) {
+               uint32_t footprint;
+               boolean_t skip;
+
+               p = next_p;
+               next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
+               
                aPid = p->p_pid;
+               aPid_ep = p->p_memstat_effectivepriority;
                
                if (p->p_memstat_state  & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
                        continue;
@@ -2514,14 +3551,24 @@ memorystatus_kill_hiwat_proc(uint32_t *errors)
                if (p->p_memstat_memlimit <= 0) {
                        continue;
                }
-               
+
+#if 0
+               /*
+                * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore.
+                * Background limits are described via the inactive limit slots.
+                * Their fatal/non-fatal setting will drive whether or not to be
+                * considered in this kill path.
+                */
+
                /* skip if a currently inapplicable limit is encountered */
                if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {          
                        continue;
                }
+#endif
 
                footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024));
                skip = (((int32_t)footprint) <= p->p_memstat_memlimit);
+
 #if DEVELOPMENT || DEBUG
                if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) {
                        if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) {
@@ -2547,7 +3594,7 @@ memorystatus_kill_hiwat_proc(uint32_t *errors)
                                (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit);
                                
                        if (memorystatus_jetsam_snapshot_count == 0) {
-                               memorystatus_jetsam_snapshot_procs_locked();
+                               memorystatus_init_jetsam_snapshot_locked(NULL,0);
                                new_snapshot = TRUE;
                        }
                        
@@ -2556,7 +3603,7 @@ memorystatus_kill_hiwat_proc(uint32_t *errors)
 #if DEVELOPMENT || DEBUG
                        if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
                                MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages);
-                               memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic);
+                               memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledDiagnostic);
                                p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED;
                                
                                p = proc_ref_locked(p);
@@ -2571,28 +3618,46 @@ memorystatus_kill_hiwat_proc(uint32_t *errors)
                        } else
 #endif /* DEVELOPMENT || DEBUG */
                        {
-                               memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat);
+                               memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledHiwat);
                                
-                               p = proc_ref_locked(p);
-                               proc_list_unlock();
-                               if (p) {
-                                   printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", 
-                                               aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
-                                   killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat);
-                               }
+                               if (proc_ref_locked(p) == p) {
+                                       proc_list_unlock();
+
+                                       printf("memorystatus: jetsam killing pid %d [%s] (highwater %d) - memorystatus_available_pages: %d\n", 
+                                              aPid, (p->p_comm ? p->p_comm : "(unknown)"), aPid_ep, memorystatus_available_pages);
+
+                                       killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat);
                                
-                               /* Success? */
-                               if (killed) {
-                                       proc_rele(p);
-                                       goto exit;
+                                       /* Success? */
+                                       if (killed) {
+                                               proc_rele(p);
+                                               kill_count++;
+                                               goto exit;
+                                       }
+
+                                       /*
+                                        * Failure - first unwind the state,
+                                        * then fall through to restart the search.
+                                        */
+                                       proc_list_lock();
+                                       proc_rele_locked(p);
+                                       p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
+                                       p->p_memstat_state |= P_MEMSTAT_ERROR;
+                                       *errors += 1;
                                }
 
-                               /* Failure - unwind and restart. */
-                               proc_list_lock();
-                               proc_rele_locked(p);
-                               p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
-                               p->p_memstat_state |= P_MEMSTAT_ERROR;
-                               *errors += 1;
+                               /*
+                                * Failure - restart the search.
+                                *
+                                * We might have raced with "p" exiting on another core, resulting in no
+                                * ref on "p".  Or, we may have failed to kill "p".
+                                *
+                                * Either way, we fall thru to here, leaving the proc in the 
+                                * P_MEMSTAT_TERMINATED state.
+                                *
+                                * And, we hold the the proc_list_lock at this point.
+                                */
+
                                i = 0;
                                next_p = memorystatus_get_first_proc_locked(&i, TRUE);
                        }
@@ -2608,7 +3673,7 @@ exit:
        }
        
        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, 
-           memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0);
+                             memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0);
 
        return killed;
 }
@@ -2635,7 +3700,7 @@ memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) {
     
        if (victim_pid == -1) {
                /* No pid, so kill first process */
-               res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors);
+               res = memorystatus_kill_top_process(TRUE, TRUE, cause, NULL, &errors);
        } else {
                res = memorystatus_kill_specific_process(victim_pid, cause);
        }
@@ -2648,8 +3713,17 @@ memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) {
                /* Fire off snapshot notification */
                size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + 
                        sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count;
-               memorystatus_jetsam_snapshot->notification_time = mach_absolute_time();
-               memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
+               uint64_t timestamp_now = mach_absolute_time();
+               memorystatus_jetsam_snapshot->notification_time = timestamp_now;
+               if (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
+                               timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout) {
+                       int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
+                       if (!ret) {
+                               proc_list_lock();
+                               memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
+                               proc_list_unlock();
+                       }
+               }
        }
     
        return res;
@@ -2696,6 +3770,11 @@ memorystatus_freeze_init(void)
 {
        kern_return_t result;
        thread_t thread;
+
+       freezer_lck_grp_attr = lck_grp_attr_alloc_init();
+       freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr);
+
+       lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL);
                
        result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
        if (result == KERN_SUCCESS) {
@@ -2705,6 +3784,141 @@ memorystatus_freeze_init(void)
        }
 }
 
+/*
+ * Synchronously freeze the passed proc. Called with a reference to the proc held.
+ *
+ * Returns EINVAL or the value returned by task_freeze().
+ */
+int
+memorystatus_freeze_process_sync(proc_t p)
+{
+       int ret = EINVAL;
+       pid_t aPid = 0;
+       boolean_t memorystatus_freeze_swap_low = FALSE;
+
+       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
+               memorystatus_available_pages, 0, 0, 0, 0);
+
+       lck_mtx_lock(&freezer_mutex);
+
+       if (p == NULL) {
+               goto exit;
+       }
+
+       if (memorystatus_freeze_enabled == FALSE) {
+               goto exit;
+       }
+
+       if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
+               goto exit;
+       }
+
+       if (memorystatus_freeze_update_throttle()) {
+               printf("memorystatus_freeze_process_sync: in throttle, ignorning freeze\n");
+               memorystatus_freeze_throttle_count++;
+               goto exit;
+       }
+
+       proc_list_lock();
+
+       if (p != NULL) {
+               uint32_t purgeable, wired, clean, dirty, state;
+               uint32_t max_pages, pages, i;
+               boolean_t shared;
+
+               aPid = p->p_pid;
+               state = p->p_memstat_state;
+
+               /* Ensure the process is eligible for freezing */
+               if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) {
+                       proc_list_unlock();
+                       goto exit;
+               }
+
+               /* Only freeze processes meeting our minimum resident page criteria */
+               memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
+               if (pages < memorystatus_freeze_pages_min) {
+                       proc_list_unlock();
+                       goto exit;
+               }
+
+               if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+
+                       unsigned int avail_swap_space = 0; /* in pages. */
+
+                       if (DEFAULT_FREEZER_IS_ACTIVE) {
+                               /*
+                                * Freezer backed by default pager and swap file(s).
+                                */
+                               avail_swap_space = default_pager_swap_pages_free();
+                       } else {
+                               /*
+                                * Freezer backed by the compressor and swap file(s)
+                                * while will hold compressed data.
+                                */
+                               avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64;
+                       }
+
+                       max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max);
+
+                       if (max_pages < memorystatus_freeze_pages_min) {
+                               proc_list_unlock();
+                               goto exit;
+                       }
+               } else {
+                       /*
+                        * We only have the compressor without any swap.
+                        */
+                       max_pages = UINT32_MAX - 1;
+               }
+
+               /* Mark as locked temporarily to avoid kill */
+               p->p_memstat_state |= P_MEMSTAT_LOCKED;
+               proc_list_unlock();
+
+               ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
+
+               MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - "
+                       "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n",
+                       (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"),
+                       memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
+
+               proc_list_lock();
+               p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
+
+               if (ret == KERN_SUCCESS) {
+                       memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
+
+                       memorystatus_frozen_count++;
+
+                       p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM));
+
+                       if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+                               /* Update stats */
+                               for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
+                                       throttle_intervals[i].pageouts += dirty;
+                               }
+                       }
+
+                       memorystatus_freeze_pageouts += dirty;
+                       memorystatus_freeze_count++;
+
+                       proc_list_unlock();
+
+                       memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
+               } else {
+                       proc_list_unlock();
+               }
+       }
+
+exit:
+       lck_mtx_unlock(&freezer_mutex);
+       KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
+               memorystatus_available_pages, aPid, 0, 0, 0);
+
+       return ret;
+}
+
 static int
 memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low)
 {
@@ -2745,14 +3959,35 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low)
                } 
 
                if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
-                       /* Ensure there's enough free space to freeze this process. */                  
-                       max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
+
+                       /* Ensure there's enough free space to freeze this process. */
+
+                       unsigned int avail_swap_space = 0; /* in pages. */
+
+                       if (DEFAULT_FREEZER_IS_ACTIVE) {
+                               /*
+                                * Freezer backed by default pager and swap file(s).
+                                */
+                               avail_swap_space = default_pager_swap_pages_free();
+                       } else {
+                               /*
+                                * Freezer backed by the compressor and swap file(s)
+                                * while will hold compressed data.
+                                */
+                               avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64;
+                       }
+
+                       max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max);
+
                        if (max_pages < memorystatus_freeze_pages_min) {
                                *memorystatus_freeze_swap_low = TRUE;
                                proc_list_unlock();
                                goto exit;
                        }
                } else {
+                       /*
+                        * We only have the compressor pool.
+                        */
                        max_pages = UINT32_MAX - 1;
                }
                
@@ -2783,11 +4018,13 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low)
                        
                        p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM));
                
-                       /* Update stats */
-                       for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
-                                       throttle_intervals[i].pageouts += dirty;
+                       if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+                               /* Update stats */
+                               for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
+                                       throttle_intervals[i].pageouts += dirty;
+                               }
                        }
-               
+
                        memorystatus_freeze_pageouts += dirty;
                        memorystatus_freeze_count++;
 
@@ -2795,8 +4032,8 @@ memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low)
 
                        memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
 
-                       /* Return the number of reclaimed pages */
-                       ret = dirty;
+                       /* Return KERN_SUCESS */
+                       ret = kr;
 
                } else {
                        proc_list_unlock();
@@ -2857,6 +4094,8 @@ memorystatus_can_freeze_processes(void)
 static boolean_t 
 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
 {
+       boolean_t can_freeze = TRUE;
+
        /* Only freeze if we're sufficiently low on memory; this holds off freeze right
           after boot,  and is generally is a no-op once we've reached steady state. */
        if (memorystatus_available_pages > memorystatus_freeze_threshold) {
@@ -2868,27 +4107,68 @@ memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
                return FALSE;
        }
 
-       /* Is swap running low? */
-       if (*memorystatus_freeze_swap_low) {
-               /* If there's been no movement in free swap pages since we last attempted freeze, return. */
-               if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) {
-                       return FALSE;
+       if (COMPRESSED_PAGER_IS_SWAPLESS || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) {
+               /*
+                * In-core compressor used for freezing WITHOUT on-disk swap support.
+                */
+
+               if (vm_compressor_low_on_space()) {
+                       if (*memorystatus_freeze_swap_low) {
+                               *memorystatus_freeze_swap_low = TRUE;
+                       }
+
+                       can_freeze = FALSE;
+
+               } else {
+                       if (*memorystatus_freeze_swap_low) {
+                               *memorystatus_freeze_swap_low = FALSE;
+                       }
+
+                       can_freeze = TRUE;
+               }
+       } else {
+               /*
+                * Freezing WITH on-disk swap support.
+                */
+
+               if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+                       /*
+                        * In-core compressor fronts the swap.
+                        */
+                       if (vm_swap_low_on_space()) {
+                               if (*memorystatus_freeze_swap_low) {
+                                       *memorystatus_freeze_swap_low = TRUE;
+                               }
+
+                               can_freeze = FALSE;
+                       }
+
+               } else if (DEFAULT_FREEZER_IS_ACTIVE) {
+                       /*
+                        * Legacy freeze mode with no compressor support.
+                        */
+                       if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) {
+                               if (*memorystatus_freeze_swap_low) {
+                                       *memorystatus_freeze_swap_low = TRUE;
+                               }
+
+                               can_freeze = FALSE;
+                       }
+               } else {
+                       panic("Not a valid freeze configuration.\n");
                }
-               
-               /* Pages have been freed - we can retry. */
-               *memorystatus_freeze_swap_low = FALSE;  
        }
        
-       /* OK */
-       return TRUE;
+       return can_freeze;
 }
 
 static void
 memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
 {
+       unsigned int freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
        if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) {
                if (!interval->max_pageouts) {
-                       interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60)));
+                       interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / (24 * 60)));
                } else {
                        printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins);
                }
@@ -2949,12 +4229,12 @@ static void
 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
 {
        static boolean_t memorystatus_freeze_swap_low = FALSE;
-       
+
+       lck_mtx_lock(&freezer_mutex);
        if (memorystatus_freeze_enabled) {
                if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
-                       /* Only freeze if we've not exceeded our pageout budgets or we're not backed by swap. */
-                       if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS ||
-                               !memorystatus_freeze_update_throttle()) {
+                       /* Only freeze if we've not exceeded our pageout budgets.*/
+                       if (!memorystatus_freeze_update_throttle()) {
                                memorystatus_freeze_top_process(&memorystatus_freeze_swap_low);
                        } else {
                                printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n");
@@ -2962,6 +4242,7 @@ memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
                        }
                }
        }
+       lck_mtx_unlock(&freezer_mutex);
 
        assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
        thread_block((thread_continue_t) memorystatus_freeze_thread);   
@@ -3000,6 +4281,7 @@ boolean_t
 memorystatus_warn_process(pid_t pid, boolean_t critical) {
 
        boolean_t ret = FALSE;
+       boolean_t found_knote = FALSE;
        struct knote *kn = NULL;
 
        /*
@@ -3007,34 +4289,72 @@ memorystatus_warn_process(pid_t pid, boolean_t critical) {
         */
 
        memorystatus_klist_lock();
-       kn = vm_find_knote_from_pid(pid, &memorystatus_klist);
-       if (kn) {
-               /*
-                * By setting the "fflags" here, we are forcing 
-                * a process to deal with the case where it's 
-                * bumping up into its memory limits. If we don't
-                * do this here, we will end up depending on the
-                * system pressure snapshot evaluation in
-                * filt_memorystatus().
-                */
+
+       SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
+               proc_t knote_proc = kn->kn_kq->kq_p;
+               pid_t knote_pid = knote_proc->p_pid;
+
+               if (knote_pid == pid) {
+                       /*
+                        * By setting the "fflags" here, we are forcing
+                        * a process to deal with the case where it's
+                        * bumping up into its memory limits. If we don't
+                        * do this here, we will end up depending on the
+                        * system pressure snapshot evaluation in
+                        * filt_memorystatus().
+                        */
        
-               if (critical) {
-                       kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
-               } else {
-                       kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
+                       if (critical) {
+                               if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
+                                       kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
+                               } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
+                                       kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
+                               }
+                       } else {
+                               if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
+                                       kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
+                               }
+                       }
+
+                       found_knote = TRUE;
                }
-               KNOTE(&memorystatus_klist, kMemorystatusPressure);
-               ret = TRUE;
+       }
+
+       if (found_knote) {
+               KNOTE(&memorystatus_klist, 0);
+               ret = TRUE;
        } else {
                if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) {
                        ret = TRUE;
                }
        }
+
        memorystatus_klist_unlock();
 
        return ret;
 }
 
+/*
+ * Can only be set by the current task on itself.
+ */
+int
+memorystatus_low_mem_privileged_listener(uint32_t op_flags)
+{
+       boolean_t set_privilege = FALSE;
+       /*
+        * Need an entitlement check here?
+        */
+       if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE) {
+               set_privilege = TRUE;
+       } else if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE) {
+               set_privilege = FALSE;
+       } else {
+               return EINVAL;
+       }
+
+       return (task_low_mem_privileged_listener(current_task(), set_privilege, NULL));
+}
+
 int
 memorystatus_send_pressure_note(pid_t pid) {
        MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid);      
@@ -3045,13 +4365,19 @@ void
 memorystatus_send_low_swap_note(void) {
        
        struct knote *kn = NULL;
-    
+
        memorystatus_klist_lock();
        SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
+               /* We call is_knote_registered_modify_task_pressure_bits to check if the sfflags for the
+                * current note contain NOTE_MEMORYSTATUS_LOW_SWAP. Once we find one note in the memorystatus_klist
+                * that has the NOTE_MEMORYSTATUS_LOW_SWAP flags in its sfflags set, we call KNOTE with
+                * kMemoryStatusLowSwap as the hint to process and update all knotes on the memorystatus_klist accordingly. */
                if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) {
-                       KNOTE(&memorystatus_klist, kMemorystatusLowSwap);
+                       KNOTE(&memorystatus_klist, kMemorystatusLowSwap);
+                       break;
                }
        }
+
        memorystatus_klist_unlock();
 }
 
@@ -3151,6 +4477,7 @@ kern_return_t
 memorystatus_update_vm_pressure(boolean_t target_foreground_process) 
 {
        struct knote                    *kn_max = NULL;
+       struct knote                    *kn_cur = NULL, *kn_temp = NULL;  /* for safe list traversal */
         pid_t                          target_pid = -1;
         struct klist                   dispatch_klist = { NULL };
        proc_t                          target_proc = PROC_NULL;
@@ -3181,7 +4508,15 @@ memorystatus_update_vm_pressure(boolean_t target_foreground_process)
                        break;
                }
                idle_kill_counter++;
-               delay(1000000);    /* 1 second */
+
+               if (memorystatus_manual_testing_on == TRUE) {
+                       /*
+                        * Skip the delay when testing
+                        * the pressure notification scheme.
+                        */
+               } else {
+                       delay(1000000);    /* 1 second */
+               }
        }
 #endif /* !CONFIG_JETSAM */
 
@@ -3253,7 +4588,6 @@ memorystatus_update_vm_pressure(boolean_t target_foreground_process)
                        continue;
                }
                proc_list_unlock();
-               memorystatus_klist_unlock();
                
                target_pid = target_proc->p_pid;
 
@@ -3285,19 +4619,27 @@ memorystatus_update_vm_pressure(boolean_t target_foreground_process)
                }
 
                if (found_candidate == FALSE) {
+                       proc_rele(target_proc);
+                       memorystatus_klist_unlock();
                        continue;
                }
 
-               memorystatus_klist_lock();
-               KNOTE_DETACH(&memorystatus_klist, kn_max);
-               KNOTE_ATTACH(&dispatch_klist, kn_max);
-               memorystatus_klist_unlock();
+               SLIST_FOREACH_SAFE(kn_cur, &memorystatus_klist, kn_selnext, kn_temp) {
+                       proc_t knote_proc = kn_cur->kn_kq->kq_p;
+                       pid_t knote_pid = knote_proc->p_pid;
+                       if (knote_pid == target_pid) {
+                               KNOTE_DETACH(&memorystatus_klist, kn_cur);
+                               KNOTE_ATTACH(&dispatch_klist, kn_cur);
+                       }
+               }
 
                KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure);
 
-               memorystatus_klist_lock();
-               KNOTE_DETACH(&dispatch_klist, kn_max);
-               KNOTE_ATTACH(&memorystatus_klist, kn_max);
+               SLIST_FOREACH_SAFE(kn_cur, &dispatch_klist, kn_selnext, kn_temp) {
+                       KNOTE_DETACH(&dispatch_klist, kn_cur);
+                       KNOTE_ATTACH(&memorystatus_klist, kn_cur);
+               }
+
                memorystatus_klist_unlock();
 
                microuptime(&target_proc->vm_pressure_last_notify_tstamp);
@@ -3593,12 +4935,18 @@ memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t
                list_entry->priority = p->p_memstat_effectivepriority;
                list_entry->user_data = p->p_memstat_userdata;
 #if LEGACY_HIWATER
-               if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) ||
-                    (p->p_memstat_memlimit <= 0)) {
-                       task_get_phys_footprint_limit(p->task, &list_entry->limit);  
-               } else {
-                       list_entry->limit = p->p_memstat_memlimit;
-               }
+
+               /*
+                * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore.
+                * Background limits are described via the inactive limit slots.
+                * So, here, the cached limit should always be valid.
+                */
+
+               if (p->p_memstat_memlimit <= 0) {
+                        task_get_phys_footprint_limit(p->task, &list_entry->limit);
+                } else {
+                        list_entry->limit = p->p_memstat_memlimit;
+                }
 #else
                task_get_phys_footprint_limit(p->task, &list_entry->limit);
 #endif
@@ -3719,10 +5067,94 @@ memorystatus_update_levels_locked(boolean_t critical_only) {
 #endif
 }
 
+/*
+ * Get the at_boot snapshot
+ */
 static int
-memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) {
+memorystatus_get_at_boot_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) {
        size_t input_size = *snapshot_size;
+
+       /*
+        * The at_boot snapshot has no entry list.
+        */
+       *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t);
+
+       if (size_only) {
+               return 0;
+       }
+
+       /*
+        * Validate the size of the snapshot buffer
+        */
+       if (input_size < *snapshot_size) {
+               return EINVAL;
+       }
+
+       /*
+        * Update the notification_time only
+        */
+       memorystatus_at_boot_snapshot.notification_time = mach_absolute_time();
+       *snapshot = &memorystatus_at_boot_snapshot;
+
+       MEMORYSTATUS_DEBUG(7, "memorystatus_get_at_boot_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%d)\n",
+                          (long)input_size, (long)*snapshot_size, 0);
+       return 0;
+}
+
+static int
+memorystatus_get_on_demand_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) {
+       size_t input_size = *snapshot_size;
+       uint32_t ods_list_count = memorystatus_list_count;
+       memorystatus_jetsam_snapshot_t *ods = NULL;     /* The on_demand snapshot buffer */
+
+       *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (ods_list_count));
+
+       if (size_only) {
+               return 0;
+       }
+
+       /*
+        * Validate the size of the snapshot buffer.
+        * This is inherently racey. May want to revisit
+        * this error condition and trim the output when
+        * it doesn't fit.
+        */
+       if (input_size < *snapshot_size) {
+               return EINVAL;
+       }
+
+       /*
+        * Allocate and initialize a snapshot buffer.
+        */
+       ods = (memorystatus_jetsam_snapshot_t *)kalloc(*snapshot_size);
+       if (!ods) {
+               return (ENOMEM);
+       }
+
+       memset(ods, 0, *snapshot_size);
+
+       proc_list_lock();
+       memorystatus_init_jetsam_snapshot_locked(ods, ods_list_count);
+       proc_list_unlock();
+
+       /*
+        * Return the kernel allocated, on_demand buffer.
+        * The caller of this routine will copy the data out
+        * to user space and then free the kernel allocated
+        * buffer.
+        */
+       *snapshot = ods;
+
+       MEMORYSTATUS_DEBUG(7, "memorystatus_get_on_demand_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n",
+                                  (long)input_size, (long)*snapshot_size, (long)ods_list_count);
        
+       return 0;
+}
+
+static int
+memorystatus_get_jetsam_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) {
+       size_t input_size = *snapshot_size;
+
        if (memorystatus_jetsam_snapshot_count > 0) {
                *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count));
        } else {
@@ -3738,30 +5170,99 @@ memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *sna
        }
 
        *snapshot = memorystatus_jetsam_snapshot;
-       
-       MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size);
-       
+
+       MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n",
+                                  (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_count);
+
        return 0;
 }
 
 
 static int
-memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) {
+memorystatus_cmd_get_jetsam_snapshot(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval) {
        int error = EINVAL;
        boolean_t size_only;
+       boolean_t is_default_snapshot = FALSE;
+       boolean_t is_on_demand_snapshot = FALSE;
+       boolean_t is_at_boot_snapshot = FALSE;
        memorystatus_jetsam_snapshot_t *snapshot;
-       
+
        size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE);
-       
-       error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only);
+
+       if (flags == 0) {
+               /* Default */
+               is_default_snapshot = TRUE;
+               error = memorystatus_get_jetsam_snapshot(&snapshot, &buffer_size, size_only);
+       } else {
+               if (flags & ~(MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) {
+                       /*
+                        * Unsupported bit set in flag.
+                        */
+                       return EINVAL;
+               }
+
+               if ((flags & (MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) ==
+                   (MEMORYSTATUS_SNAPSHOT_ON_DEMAND |  MEMORYSTATUS_SNAPSHOT_AT_BOOT)) {
+                       /*
+                        * Can't have both set at the same time.
+                        */
+                       return EINVAL;
+               }
+
+               if (flags & MEMORYSTATUS_SNAPSHOT_ON_DEMAND) {
+                       is_on_demand_snapshot = TRUE;
+                       /*
+                        * When not requesting the size only, the following call will allocate
+                        * an on_demand snapshot buffer, which is freed below.
+                        */
+                       error = memorystatus_get_on_demand_snapshot(&snapshot, &buffer_size, size_only);
+
+               } else if (flags & MEMORYSTATUS_SNAPSHOT_AT_BOOT) {
+                       is_at_boot_snapshot = TRUE;
+                       error = memorystatus_get_at_boot_snapshot(&snapshot, &buffer_size, size_only);
+               } else {
+                       /*
+                        * Invalid flag setting.
+                        */
+                       return EINVAL;
+               }
+       }
+
        if (error) {
                goto out;
        }
 
-       /* Copy out and reset */
+       /*
+        * Copy the data out to user space and clear the snapshot buffer.
+        * If working with the jetsam snapshot,
+        *      clearing the buffer means, reset the count.
+        * If working with an on_demand snapshot
+        *      clearing the buffer means, free it.
+        * If working with the at_boot snapshot
+        *      there is nothing to clear or update.
+        */
        if (!size_only) {
                if ((error = copyout(snapshot, buffer, buffer_size)) == 0) {
-                       snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
+                       if (is_default_snapshot) {
+                               /*
+                                * The jetsam snapshot is never freed, its count is simply reset.
+                                */
+                               snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
+
+                               proc_list_lock();
+                               memorystatus_jetsam_snapshot_last_timestamp = 0;
+                               proc_list_unlock();
+                       }
+               }
+
+               if (is_on_demand_snapshot) {
+                       /*
+                        * The on_demand snapshot is always freed,
+                        * even if the copyout failed.
+                        */
+                       if(snapshot) {
+                               kfree(snapshot, buffer_size);
+                       }
                }
        }
 
@@ -3965,65 +5466,120 @@ out:
 
 
 /*
- * This routine is meant solely for the purpose of adjusting jetsam priorities and bands.
- * It is _not_ meant to be used for the setting of memory limits, especially, since we can't
- * tell if the memory limit being set is fatal or not.
- *
- * So the the last 5 args to the memorystatus_update() call below, related to memory limits,  are all 0 or FALSE.
+ * This routine is used to update a process's jetsam priority position and stored user_data.
+ * It is not used for the setting of memory limits, which is why the last 6 args to the
+ * memorystatus_update() call are 0 or FALSE.
  */
        
 static int
 memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) {
-       const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */
-       
-       int error;
-       uint32_t i;
-       uint32_t entry_count;
-       memorystatus_priority_properties_t *entries;
-       
+       int error = 0;
+       memorystatus_priority_properties_t mpp_entry;
+
        /* Validate inputs */
-       if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
+       if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_properties_t))) {
                return EINVAL;
        }
        
-       /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */
-       entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t));
-       if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) {
-               return EINVAL;
-       }
-               
-       entries = (memorystatus_priority_properties_t *)kalloc(buffer_size);
-               
-       error = copyin(buffer, entries, buffer_size);
-       
-       for (i = 0; i < entry_count; i++) {
+       error = copyin(buffer, &mpp_entry, buffer_size);
+
+       if (error == 0) {
                proc_t p;
                 
-               if (error) {
-                       break;
-               }
-               
                p = proc_find(pid);
                if (!p) {
-                       error = ESRCH;
-                       break;         
+                       return ESRCH;
                }
                
                if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
-                       error = EPERM;
                        proc_rele(p);
-                       break;          
+                       return EPERM;
                }
        
-               error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0, FALSE);
+               error = memorystatus_update(p, mpp_entry.priority, mpp_entry.user_data, FALSE, FALSE, 0, 0, FALSE, FALSE, FALSE);
                proc_rele(p);
        }
        
-       kfree(entries, buffer_size);
-       
-       return error;
+       return(error);
+}
+
+static int
+memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) {
+       int error = 0;
+       memorystatus_memlimit_properties_t mmp_entry;
+
+       /* Validate inputs */
+       if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) {
+               return EINVAL;
+       }
+
+       error = copyin(buffer, &mmp_entry, buffer_size);
+
+       if (error == 0) {
+               error = memorystatus_set_memlimit_properties(pid, &mmp_entry);
+       }
+
+       return(error);
+}
+
+/*
+ * When getting the memlimit settings, we can't simply call task_get_phys_footprint_limit().
+ * That gets the proc's cached memlimit and there is no guarantee that the active/inactive
+ * limits will be the same in the no-limit case.  Instead we convert limits <= 0 using
+ * task_convert_phys_footprint_limit(). It computes the same limit value that would be written
+ * to the task's ledgers via task_set_phys_footprint_limit().
+ */
+static int
+memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) {
+       int error = 0;
+       memorystatus_memlimit_properties_t mmp_entry;
+
+       /* Validate inputs */
+       if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) {
+               return EINVAL;
+       }
+
+       memset (&mmp_entry, 0, sizeof(memorystatus_memlimit_properties_t));
+
+       proc_t p = proc_find(pid);
+       if (!p) {
+               return ESRCH;
+       }
+
+       /*
+        * Get the active limit and attributes.
+        * No locks taken since we hold a reference to the proc.
+        */
+
+       if (p->p_memstat_memlimit_active > 0 ) {
+               mmp_entry.memlimit_active = p->p_memstat_memlimit_active;
+       } else {
+               task_convert_phys_footprint_limit(-1, &mmp_entry.memlimit_active);
+       }
+
+       if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) {
+               mmp_entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
+       }
+
+       /*
+        * Get the inactive limit and attributes
+        */
+       if (p->p_memstat_memlimit_inactive <= 0) {
+               task_convert_phys_footprint_limit(-1, &mmp_entry.memlimit_inactive);
+       } else {
+               mmp_entry.memlimit_inactive = p->p_memstat_memlimit_inactive;
+       }
+       if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) {
+               mmp_entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
+       }
+       proc_rele(p);
+
+       error = copyout(&mmp_entry, buffer, buffer_size);
+
+       return(error);
 }
 
+
 static int
 memorystatus_cmd_get_pressure_status(int32_t *retval) {        
        int error;
@@ -4040,51 +5596,157 @@ memorystatus_cmd_get_pressure_status(int32_t *retval) {
        return error;
 }
 
+int
+memorystatus_get_pressure_status_kdp() {
+       return (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0;
+}
+
 /*
  * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM.
+ *
+ * This call is inflexible -- it does not distinguish between active/inactive, fatal/non-fatal
+ * So, with 2-level HWM preserving previous behavior will map as follows.
+ *      - treat the limit passed in as both an active and inactive limit.
+ *      - treat the is_fatal_limit flag as though it applies to both active and inactive limits.
+ *
+ * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK
+ *      - the is_fatal_limit is FALSE, meaning the active and inactive limits are non-fatal/soft
+ *      - so mapping is (active/non-fatal, inactive/non-fatal)
+ *
+ * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT
+ *      - the is_fatal_limit is TRUE, meaning the process's active and inactive limits are fatal/hard
+ *      - so mapping is (active/fatal, inactive/fatal)
  */
 
 static int
 memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) {
        int error = 0;
+       memorystatus_memlimit_properties_t entry;
+
+       entry.memlimit_active = high_water_mark;
+       entry.memlimit_active_attr = 0;
+       entry.memlimit_inactive = high_water_mark;
+       entry.memlimit_inactive_attr = 0;
+
+       if (is_fatal_limit == TRUE) {
+               entry.memlimit_active_attr   |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
+               entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
+       }
+
+       error = memorystatus_set_memlimit_properties(pid, &entry);
+       return (error);
+}
+
+static int
+memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry) {
+
+       int32_t  memlimit_active;
+       boolean_t memlimit_active_is_fatal;
+       int32_t  memlimit_inactive;
+       boolean_t memlimit_inactive_is_fatal;
+       uint32_t valid_attrs = 0;
+       int       error = 0;
         
        proc_t p = proc_find(pid);
        if (!p) {
                return ESRCH;
        }
-        
-       if (high_water_mark <= 0) {
-               high_water_mark = -1; /* Disable */
+
+       /*
+        * Check for valid attribute flags.
+        */
+       valid_attrs |= (MEMORYSTATUS_MEMLIMIT_ATTR_FATAL);
+       if ((entry->memlimit_active_attr & (~valid_attrs)) != 0) {
+               proc_rele(p);
+               return EINVAL;
+       }
+       if ((entry->memlimit_inactive_attr & (~valid_attrs)) != 0) {
+               proc_rele(p);
+               return EINVAL;
        }
-    
-       proc_list_lock();
-    
-       p->p_memstat_memlimit = high_water_mark;
-       if (memorystatus_highwater_enabled) {
-               if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) {
 
-                       memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority, false);
-                       
-                       /*
-                        * The update priority call above takes care to set/reset the fatal memory limit state
-                        * IF the process is transitioning between foreground <-> background and has a background
-                        * memory limit.
-                        * Here, however, the process won't be doing any such transitions and so we explicitly tackle
-                        * the fatal limit state.
-                        */
-                       is_fatal_limit = FALSE;
+       /*
+        * Setup the active memlimit properties
+        */
+       memlimit_active = entry->memlimit_active;
+       if (entry->memlimit_active_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL) {
+               memlimit_active_is_fatal = TRUE;
+       } else {
+               memlimit_active_is_fatal = FALSE;
+       }
 
-               } else {
-                       error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL;
-               }
+       /*
+        * Setup the inactive memlimit properties
+        */
+       memlimit_inactive = entry->memlimit_inactive;
+       if (entry->memlimit_inactive_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL) {
+               memlimit_inactive_is_fatal = TRUE;
+       } else {
+               memlimit_inactive_is_fatal = FALSE;
        }
 
-       if (error == 0) {
-               if (is_fatal_limit == TRUE) {
-                       p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
+       /*
+        * Setting a limit of <= 0 implies that the process has no
+        * high-water-mark and has no per-task-limit.  That means
+        * the system_wide task limit is in place, which by the way,
+        * is always fatal.
+        */
+
+       if (memlimit_active <= 0) {
+               /*
+                * Enforce the fatal system_wide task limit while process is active.
+                */
+               memlimit_active = -1;
+               memlimit_active_is_fatal = TRUE;
+       }
+
+       if (memlimit_inactive <= 0) {
+               /*
+                * Enforce the fatal system_wide task limit while process is inactive.
+                */
+               memlimit_inactive = -1;
+               memlimit_inactive_is_fatal = TRUE;
+       }
+
+       proc_list_lock();
+
+       /*
+        * Store the active limit variants in the proc.
+        */
+       SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal);
+
+       /*
+        * Store the inactive limit variants in the proc.
+        */
+       SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal);
+
+       /*
+        * Enforce appropriate limit variant by updating the cached values
+        * and writing the ledger.
+        * Limit choice is based on process active/inactive state.
+        */
+
+       if (memorystatus_highwater_enabled) {
+               boolean_t trigger_exception;
+               /*
+                * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore.
+                * Background limits are described via the inactive limit slots.
+                */
+
+               if (proc_jetsam_state_is_active_locked(p) == TRUE) {
+                       CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception);
                } else {
-                       p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
+                       CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception);
                }
+
+               /* Enforce the limit by writing to the ledgers */
+               assert(trigger_exception == TRUE);
+               error = (task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, trigger_exception) == 0) ? 0 : EINVAL;
+
+               MEMORYSTATUS_DEBUG(3, "memorystatus_set_memlimit_properties: new limit on pid %d (%dMB %s) current priority (%d) dirty_state?=0x%x %s\n",
+                                  p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
+                                  (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty,
+                                  (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
        }
 
        proc_list_unlock();
@@ -4109,6 +5771,60 @@ proc_get_memstat_priority(proc_t p, boolean_t effective_priority)
        }
        return 0;
 }
+
+/*
+ * Description:
+ *     Evaluates active vs. inactive process state.
+ *     Processes that opt into dirty tracking are evaluated
+ *     based on clean vs dirty state.
+ *     dirty ==> active
+ *     clean ==> inactive
+ *
+ *     Process that do not opt into dirty tracking are
+ *     evalulated based on priority level.
+ *     Foreground or above ==> active
+ *     Below Foreground    ==> inactive
+ *
+ *     Return: TRUE if active
+ *             False if inactive
+ */
+
+static boolean_t
+proc_jetsam_state_is_active_locked(proc_t p) {
+
+       if (p->p_memstat_dirty & P_DIRTY_TRACK) {
+               /*
+                * process has opted into dirty tracking
+                * active state is based on dirty vs. clean
+                */
+               if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
+                       /*
+                        * process is dirty
+                        * implies active state
+                        */
+                       return TRUE;
+               } else {
+                       /*
+                        * process is clean
+                        * implies inactive state
+                        */
+                       return FALSE;
+               }
+       } else if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) {
+               /*
+                * process is Foreground or higher
+                * implies active state
+                */
+               return TRUE;
+       } else {
+               /*
+                * process found below Foreground
+                * implies inactive state
+                */
+               return FALSE;
+       }
+}
+
 #endif /* CONFIG_JETSAM */
 
 int
@@ -4139,19 +5855,35 @@ memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *
        case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES:
                error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret);
                break;
+       case MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES:
+               error = memorystatus_cmd_set_memlimit_properties(args->pid, args->buffer, args->buffersize, ret);
+               break;
+       case MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES:
+               error = memorystatus_cmd_get_memlimit_properties(args->pid, args->buffer, args->buffersize, ret);
+               break;
        case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES:
                error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret);
                break;          
        case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT:
-               error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret);
+               error = memorystatus_cmd_get_jetsam_snapshot((int32_t)args->flags, args->buffer, args->buffersize, ret);
                break;
        case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS:
                error = memorystatus_cmd_get_pressure_status(ret);
                break;
        case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK:
+               /*
+                * This call does not distinguish between active and inactive limits.
+                * Default behavior in 2-level HWM world is to set both.
+                * Non-fatal limit is also assumed for both.
+                */
                error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE);
                break;
        case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT:
+               /*
+                * This call does not distinguish between active and inactive limits.
+                * Default behavior in 2-level HWM world is to set both.
+                * Fatal limit is also assumed for both.
+                */
                error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE);
                break;
        /* Test commands */
@@ -4159,11 +5891,18 @@ memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *
        case MEMORYSTATUS_CMD_TEST_JETSAM:
                error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL;
                break;
+       case MEMORYSTATUS_CMD_TEST_JETSAM_SORT:
+               error = memorystatus_cmd_test_jetsam_sort(args->pid, (int32_t)args->flags);
+               break;
        case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS:
                error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize);
                break;
 #endif /* DEVELOPMENT || DEBUG */
 #endif /* CONFIG_JETSAM */
+       case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE:
+       case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE:
+               error = memorystatus_low_mem_privileged_listener(args->command);
+               break;
        default:
                break;
        }
@@ -4193,24 +5932,24 @@ filt_memorystatus(struct knote *kn __unused, long hint)
                switch (hint) {
                case kMemorystatusNoPressure:
                        if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
-                               kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
+                               kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
                        }
                        break;
                case kMemorystatusPressure:
                        if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) {
                                if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
-                                       kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
+                                       kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
                                }
                        } else if (memorystatus_vm_pressure_level == kVMPressureCritical) {
 
                                if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
-                                       kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
+                                       kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
                                }
                        }
                        break;
                case kMemorystatusLowSwap:
                        if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) {
-                               kn->kn_fflags |= NOTE_MEMORYSTATUS_LOW_SWAP;
+                               kn->kn_fflags = NOTE_MEMORYSTATUS_LOW_SWAP;
                        }
                        break;
                default:
@@ -4245,13 +5984,8 @@ memorystatus_knote_register(struct knote *kn) {
        
        if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) {
 
-               if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) {
-                       error = suser(kauth_cred_get(), 0);
-               }
+               KNOTE_ATTACH(&memorystatus_klist, kn);
 
-               if (error == 0) {
-                       KNOTE_ATTACH(&memorystatus_klist, kn);
-               }
        } else {          
                error = ENOTSUP;
        }
@@ -4280,3 +6014,267 @@ memorystatus_issue_pressure_kevent(boolean_t pressured) {
 }
 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
 #endif /* 0 */
+
+#if CONFIG_JETSAM
+/* Coalition support */
+
+/* sorting info for a particular priority bucket */
+typedef struct memstat_sort_info {
+       coalition_t     msi_coal;
+       uint64_t        msi_page_count;
+       pid_t           msi_pid;
+       int             msi_ntasks;
+} memstat_sort_info_t;
+
+/* 
+ * qsort from smallest page count to largest page count
+ *
+ * return < 0 for a < b
+ *          0 for a == b
+ *        > 0 for a > b
+ */
+static int memstat_asc_cmp(const void *a, const void *b)
+{
+        const memstat_sort_info_t *msA = (const memstat_sort_info_t *)a;
+        const memstat_sort_info_t *msB = (const memstat_sort_info_t *)b;
+
+        return (int)((uint64_t)msA->msi_page_count - (uint64_t)msB->msi_page_count);
+}
+
+/*
+ * Return the number of pids rearranged during this sort.
+ */
+static int
+memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order)
+{
+#define MAX_SORT_PIDS          80
+#define MAX_COAL_LEADERS       10
+
+       unsigned int b = bucket_index;
+       int nleaders = 0;
+       int ntasks = 0;
+       proc_t p = NULL;
+       coalition_t coal = COALITION_NULL;
+       int pids_moved = 0;
+       int total_pids_moved = 0;
+       int i;
+
+       /* 
+        * The system is typically under memory pressure when in this
+        * path, hence, we want to avoid dynamic memory allocation.
+        */
+       memstat_sort_info_t leaders[MAX_COAL_LEADERS];
+       pid_t pid_list[MAX_SORT_PIDS];
+
+       if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
+                return(0);
+        }
+
+       /*
+        * Clear the array that holds coalition leader information
+        */
+       for (i=0; i < MAX_COAL_LEADERS; i++) {
+               leaders[i].msi_coal = COALITION_NULL;
+               leaders[i].msi_page_count = 0;          /* will hold total coalition page count */
+               leaders[i].msi_pid = 0;                 /* will hold coalition leader pid */
+               leaders[i].msi_ntasks = 0;              /* will hold the number of tasks in a coalition */
+       }
+
+        p = memorystatus_get_first_proc_locked(&b, FALSE);
+        while (p) {
+                if (coalition_is_leader(p->task, COALITION_TYPE_JETSAM, &coal)) {
+                       if (nleaders < MAX_COAL_LEADERS) {
+                               int coal_ntasks = 0;
+                               uint64_t coal_page_count = coalition_get_page_count(coal, &coal_ntasks);
+                               leaders[nleaders].msi_coal = coal;
+                               leaders[nleaders].msi_page_count = coal_page_count;
+                               leaders[nleaders].msi_pid = p->p_pid;           /* the coalition leader */
+                               leaders[nleaders].msi_ntasks = coal_ntasks;
+                               nleaders++;
+                       } else {
+                               /* 
+                                * We've hit MAX_COAL_LEADERS meaning we can handle no more coalitions.
+                                * Abandoned coalitions will linger at the tail of the priority band 
+                                * when this sort session ends.
+                                * TODO:  should this be an assert?
+                                */
+                               printf("%s: WARNING: more than %d leaders in priority band [%d]\n",
+                                      __FUNCTION__, MAX_COAL_LEADERS, bucket_index);
+                               break;
+                       }
+                }
+                p=memorystatus_get_next_proc_locked(&b, p, FALSE);
+        }
+
+       if (nleaders == 0) {
+               /* Nothing to sort */
+               return(0);
+       }
+
+       /* 
+        * Sort the coalition leader array, from smallest coalition page count
+        * to largest coalition page count.  When inserted in the priority bucket,
+        * smallest coalition is handled first, resulting in the last to be jetsammed.
+        */
+       if (nleaders > 1) {
+               qsort(leaders, nleaders, sizeof(memstat_sort_info_t), memstat_asc_cmp);
+       }
+
+#if 0
+       for (i = 0; i < nleaders; i++) {
+               printf("%s: coal_leader[%d of %d] pid[%d] pages[%llu] ntasks[%d]\n",
+                      __FUNCTION__, i, nleaders, leaders[i].msi_pid, leaders[i].msi_page_count,
+                       leaders[i].msi_ntasks);
+       }
+#endif
+
+       /*
+        * During coalition sorting, processes in a priority band are rearranged
+        * by being re-inserted at the head of the queue.  So, when handling a
+        * list, the first process that gets moved to the head of the queue,
+        * ultimately gets pushed toward the queue tail, and hence, jetsams last.
+        *
+        * So, for example, the coalition leader is expected to jetsam last,
+        * after its coalition members.  Therefore, the coalition leader is
+        * inserted at the head of the queue first.
+        *
+        * After processing a coalition, the jetsam order is as follows:
+        *   undefs(jetsam first), extensions, xpc services, leader(jetsam last)
+        */
+
+       /*
+        * Coalition members are rearranged in the priority bucket here,
+        * based on their coalition role.
+        */
+       total_pids_moved = 0;
+       for (i=0; i < nleaders; i++) {
+               
+               /* a bit of bookkeeping */
+               pids_moved = 0;
+
+               /* Coalition leaders are jetsammed last, so move into place first */
+               pid_list[0] = leaders[i].msi_pid;
+               pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 1);
+
+               /* xpc services should jetsam after extensions */
+               ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_XPC,
+                                                coal_sort_order, pid_list, MAX_SORT_PIDS);
+
+               if (ntasks > 0) {
+                       pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 
+                                                                   (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS));
+               }
+
+               /* extensions should jetsam after unmarked processes */
+               ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_EXT,
+                                                coal_sort_order, pid_list, MAX_SORT_PIDS);
+
+               if (ntasks > 0) {
+                       pids_moved += memorystatus_move_list_locked(bucket_index, pid_list,
+                                                                   (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS));
+               }
+
+               /* undefined coalition members should be the first to jetsam */
+               ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_UNDEF,
+                                                coal_sort_order, pid_list, MAX_SORT_PIDS);
+
+               if (ntasks > 0) {
+                       pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 
+                                                                   (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS));
+               }
+
+#if 0
+               if (pids_moved == leaders[i].msi_ntasks) {
+                       /*
+                        * All the pids in the coalition were found in this band.
+                        */
+                       printf("%s: pids_moved[%d]  equal  total coalition ntasks[%d] \n", __FUNCTION__,
+                              pids_moved, leaders[i].msi_ntasks);
+               } else if (pids_moved > leaders[i].msi_ntasks) {
+                       /*
+                        * Apparently new coalition members showed up during the sort?
+                        */
+                       printf("%s: pids_moved[%d] were greater than expected coalition ntasks[%d] \n", __FUNCTION__,
+                              pids_moved, leaders[i].msi_ntasks);
+               } else {
+                       /*
+                        * Apparently not all the pids in the coalition were found in this band?
+                        */
+                       printf("%s: pids_moved[%d] were less than  expected coalition ntasks[%d] \n", __FUNCTION__,
+                              pids_moved, leaders[i].msi_ntasks);
+               }
+#endif
+
+               total_pids_moved += pids_moved;
+
+       } /* end for */
+
+       return(total_pids_moved);
+}
+
+
+/*
+ * Traverse a list of pids, searching for each within the priority band provided.
+ * If pid is found, move it to the front of the priority band.
+ * Never searches outside the priority band provided.
+ * 
+ * Input:
+ *     bucket_index - jetsam priority band.
+ *     pid_list - pointer to a list of pids.
+ *     list_sz  - number of pids in the list.
+ *
+ * Pid list ordering is important in that, 
+ * pid_list[n] is expected to jetsam ahead of pid_list[n+1].
+ * The sort_order is set by the coalition default.
+ *
+ * Return: 
+ *     the number of pids found and hence moved within the priority band.
+ */
+static int
+memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz)
+{
+       memstat_bucket_t *current_bucket;
+       int i;
+       int found_pids = 0;
+
+       if ((pid_list == NULL) || (list_sz <= 0)) {
+               return(0);
+       }
+
+       if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
+                return(0);
+        }
+
+       current_bucket = &memstat_bucket[bucket_index];
+       for (i=0; i < list_sz; i++) {
+               unsigned int b = bucket_index;
+               proc_t p = NULL;
+               proc_t aProc = NULL;
+               pid_t  aPid;
+               int list_index;
+
+               list_index = ((list_sz - 1) - i);
+                aPid = pid_list[list_index];
+
+                /* never search beyond bucket_index provided */
+                p = memorystatus_get_first_proc_locked(&b, FALSE);
+                while (p) {
+                        if (p->p_pid == aPid) {
+                                aProc = p;
+                                break;
+                        }
+                        p = memorystatus_get_next_proc_locked(&b, p, FALSE);
+                }
+
+                if (aProc == NULL) {
+                       /* pid not found in this band, just skip it */
+                        continue;
+                } else {
+                        TAILQ_REMOVE(&current_bucket->list, aProc, p_memstat_list);
+                        TAILQ_INSERT_HEAD(&current_bucket->list, aProc, p_memstat_list);
+                       found_pids++;
+                }
+        }
+       return(found_pids);
+}
+#endif  /* CONFIG_JETSAM */
index b3bbdd9d6703f2a77896c3c6ec1742d20b5df2be..ed3d86e6e033f57334bea71455a2c0406348e79a 100644 (file)
@@ -98,6 +98,7 @@
 #include <kern/task.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
+#include <vm/vm_protos.h>
 #include <mach/host_info.h>
 #include <kern/pms.h>
 
@@ -331,6 +332,15 @@ sysctl_pagesize
        return sysctl_io_number(req, l, sizeof(l), NULL, NULL);
 }
 
+static int
+sysctl_pagesize32
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       long long l;
+       l = (long long) PAGE_SIZE;
+       return sysctl_io_number(req, l, sizeof(l), NULL, NULL);
+}
+
 static int
 sysctl_tbfrequency
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
@@ -356,6 +366,7 @@ SYSCTL_INT     (_hw, OID_AUTO, cpufamily, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LO
 SYSCTL_OPAQUE  (_hw, OID_AUTO, cacheconfig, CTLFLAG_RD | CTLFLAG_LOCKED, &cacheconfig, sizeof(cacheconfig), "Q", "");
 SYSCTL_OPAQUE  (_hw, OID_AUTO, cachesize, CTLFLAG_RD | CTLFLAG_LOCKED, &cachesize, sizeof(cachesize), "Q", "");
 SYSCTL_PROC       (_hw, OID_AUTO, pagesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_pagesize, "Q", "");
+SYSCTL_PROC       (_hw, OID_AUTO, pagesize32, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_pagesize32, "Q", "");
 SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_hz, "");
 SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency_min, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_min_hz, "");
 SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency_max, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_max_hz, "");
index 27bee276c9abb4429e5d8e75f1db20bf96610f59..1dd5bd3f07d29ef3688f810d2bd8d117a38f8df8 100644 (file)
 #include <mach/mach_vm.h>
 #include <mach/vm_map.h>
 #include <mach/host_priv.h>
+#include <mach/sdt.h>
 
 #include <machine/machine_routines.h>
 
@@ -164,6 +165,13 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        int fd = uap->fd;
        int num_retries = 0;
 
+       /*
+        * Note that for UNIX03 conformance, there is additional parameter checking for
+        * mmap() system call in libsyscall prior to entering the kernel.  The sanity 
+        * checks and argument validation done in this function are not the only places
+        * one can get returned errnos.
+        */
+
        user_map = current_map();
        user_addr = (vm_map_offset_t)uap->addr;
        user_size = (vm_map_size_t) uap->len;
@@ -212,9 +220,26 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        user_size = vm_map_round_page(user_size,        
                                      vm_map_page_mask(user_map)); /* hi end */
 
-       if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){
-               return EINVAL;
+       if (flags & MAP_JIT) {
+               if ((flags & MAP_FIXED) ||
+                   (flags & MAP_SHARED) ||
+                   !(flags & MAP_ANON) ||
+                   (flags & MAP_RESILIENT_CODESIGN)) {
+                       return EINVAL;
+               }
+       }
+
+       if ((flags & MAP_RESILIENT_CODESIGN) ||
+           (flags & MAP_RESILIENT_MEDIA)) {
+               assert(!(flags & MAP_JIT));
+               if (flags & MAP_ANON) {
+                       return EINVAL;
+               }
+               if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
+                       return EPERM;
+               }
        }
+
        /*
         * Check for illegal addresses.  Watch out for address wrap... Note
         * that VM_*_ADDRESS are not constants due to casts (argh).
@@ -404,7 +429,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
                        handle = (void *)vp;
 #if CONFIG_MACF
                        error = mac_file_check_mmap(vfs_context_ucred(ctx),
-                           fp->f_fglob, prot, flags, &maxprot);
+                           fp->f_fglob, prot, flags, file_pos, &maxprot);
                        if (error) {
                                (void)vnode_put(vp);
                                goto bad;
@@ -420,8 +445,6 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
                                }
                        }
 #endif /* CONFIG_PROTECT */
-
-
                }
        }
 
@@ -475,9 +498,14 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        if (flags & MAP_NOCACHE)
                alloc_flags |= VM_FLAGS_NO_CACHE;
 
-       if (flags & MAP_JIT){
+       if (flags & MAP_JIT) {
                alloc_flags |= VM_FLAGS_MAP_JIT;
        }
+
+       if (flags & MAP_RESILIENT_CODESIGN) {
+               alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
+       }
+
        /*
         * Lookup/allocate object.
         */
@@ -568,7 +596,19 @@ map_anon_retry:
                if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
                        maxprot |= VM_PROT_READ;
 #endif /* radar 3777787 */
+
 map_file_retry:
+               if ((flags & MAP_RESILIENT_CODESIGN) ||
+                   (flags & MAP_RESILIENT_MEDIA)) {
+                       if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
+                               assert(!mapanon);
+                               vnode_put(vp);
+                               error = EPERM;
+                               goto bad;
+                       }
+                       /* strictly limit access to "prot" */
+                       maxprot &= prot;
+               }
                result = vm_map_enter_mem_object_control(user_map,
                                                 &user_addr, user_size,
                                                 0, alloc_flags,
@@ -909,6 +949,13 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
                case MADV_CAN_REUSE:
                        new_behavior = VM_BEHAVIOR_CAN_REUSE;
                        break;
+               case MADV_PAGEOUT:
+#if MACH_ASSERT
+                       new_behavior = VM_BEHAVIOR_PAGEOUT;
+                       break;
+#else /* MACH_ASSERT */
+                       return ENOTSUP;
+#endif /* MACH_ASSERT */
                default:
                        return(EINVAL);
        }
@@ -916,6 +963,7 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
        start = (mach_vm_offset_t) uap->addr;
        size = (mach_vm_size_t) uap->len;
        
+
        user_map = current_map();
 
        result = mach_vm_behavior_set(user_map, start, size, new_behavior);
@@ -1060,7 +1108,7 @@ mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
        size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map));
 
        /* have to call vm_map_wire directly to pass "I don't know" protections */
-       result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
+       result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), TRUE);
 
        if (result == KERN_RESOURCE_SHORTAGE)
                return EAGAIN;
@@ -1114,7 +1162,7 @@ mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __u
     uint32_t   cryptid;
     cpu_type_t cputype;
     cpu_subtype_t      cpusubtype;
-    pager_crypt_info_t crypt_info;
+    pager_crypt_info_t crypt_info;
     const char * cryptname = 0;
     char *vpath;
     int len, ret;
@@ -1188,13 +1236,19 @@ mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __u
     kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
             __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
 #endif
-    
+
     /* set up decrypter first */
     crypt_file_data_t crypt_data = {
         .filename = vpath,
         .cputype = cputype,
         .cpusubtype = cpusubtype };
     result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
+#if DEVELOPMENT || DEBUG
+    printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
+          p->p_pid, p->p_comm,
+          user_map, (uint64_t) user_addr, (uint64_t) (user_addr + user_size),
+          __FUNCTION__, vpath, result);
+#endif /* DEVELOPMENT || DEBUG */
     FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
     
     if(result) {
@@ -1209,13 +1263,20 @@ mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __u
     }
     
     /* now remap using the decrypter */
-    result = vm_map_apple_protected(user_map, user_addr, user_addr+user_size, &crypt_info);
+    vm_object_offset_t crypto_backing_offset;
+    crypto_backing_offset = -1;        /* i.e. use map entry's offset */
+    result = vm_map_apple_protected(user_map,
+                                   user_addr,
+                                   user_addr+user_size,
+                                   crypto_backing_offset,
+                                   &crypt_info);
     if (result) {
         printf("%s: mapping failed with %d\n", __FUNCTION__, result);
-        crypt_info.crypt_end(crypt_info.crypt_ops);
+    }
+   
+    if (result) {
         return (EPERM);
     }
-    
     return 0;
 }
 #endif /* CONFIG_CODE_DECRYPTION */
index a0a72cb5c5183b210b862a359aa084bd6f116073..fdd86a9484075352b894bf5f6287deeaf009e67a 100644 (file)
@@ -1752,7 +1752,7 @@ kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, s
        name2mib_oid[1] = 3;
 
        oidlen = sizeof(oid);
-       error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, (void *)name, strlen(name));
+       error = kernel_sysctl(current_proc(), name2mib_oid, 2, oid, &oidlen, __DECONST(void *, name), strlen(name));
        oidlen /= sizeof(int);
        
        /* now use the OID */
index 7b8aace099e8fdca24ac9ff72e10b4e2e7d3a1e3..9213b82f3ff312fc7f294d0ef7fbc35656ef5ae8 100644 (file)
@@ -147,27 +147,6 @@ struct proclist allproc;
 struct proclist zombproc;
 extern struct tty cons;
 
-#if CONFIG_LCTX
-/*
- * Login Context
- */
-static pid_t   lastlcid = 1;
-static int     alllctx_cnt;
-
-#define        LCID_MAX        8192    /* Does this really need to be large? */
-static int     maxlcid = LCID_MAX;
-
-LIST_HEAD(lctxlist, lctx);
-static struct lctxlist alllctx;
-
-lck_mtx_t alllctx_lock;
-lck_grp_t * lctx_lck_grp;
-lck_grp_attr_t * lctx_lck_grp_attr;
-lck_attr_t * lctx_lck_attr;
-
-static void    lctxinit(void);
-#endif
-
 extern int cs_debug;
 
 #if DEBUG
@@ -176,6 +155,10 @@ extern int cs_debug;
 /* Name to give to core files */
 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"};
 
+#if PROC_REF_DEBUG
+extern uint32_t fastbacktrace(uintptr_t* bt, uint32_t max_frames) __attribute__((noinline));
+#endif
+
 static void orphanpg(struct pgrp *pg);
 void   proc_name_kdp(task_t t, char * buf, int size);
 int    proc_threadname_kdp(void *uth, char *buf, size_t size);
@@ -210,9 +193,6 @@ procinit(void)
        pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
        sesshashtbl = hashinit(maxproc / 4, M_PROC, &sesshash);
        uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
-#if CONFIG_LCTX
-       lctxinit();
-#endif
 }
 
 /*
@@ -385,6 +365,56 @@ proc_findthread(thread_t thread)
        return(p);
 }
 
+#if PROC_REF_DEBUG
+void
+uthread_reset_proc_refcount(void *uthread) {
+       uthread_t uth;
+
+       if (proc_ref_tracking_disabled) {
+               return;
+       }
+
+       uth = (uthread_t) uthread;
+
+       uth->uu_proc_refcount = 0;
+       uth->uu_pindex = 0;
+}
+
+int
+uthread_get_proc_refcount(void *uthread) {
+       uthread_t uth;
+
+       if (proc_ref_tracking_disabled) {
+               return 0;
+       }
+
+       uth = (uthread_t) uthread;
+
+       return uth->uu_proc_refcount;
+}
+
+static void
+record_procref(proc_t p, int count) {
+       uthread_t uth;
+
+       if (proc_ref_tracking_disabled) {
+               return;
+       }
+
+       uth = current_uthread();
+       uth->uu_proc_refcount += count;
+
+       if (count == 1) {
+               if (uth->uu_pindex < NUM_PROC_REFS_TO_TRACK) {
+                       fastbacktrace((uintptr_t *) &uth->uu_proc_pcs[uth->uu_pindex], PROC_REF_STACK_DEPTH);
+
+                       uth->uu_proc_ps[uth->uu_pindex] = p;
+                       uth->uu_pindex++;
+               }
+       }
+}
+#endif
+
 int 
 proc_rele(proc_t p)
 {
@@ -419,8 +449,12 @@ proc_ref_locked(proc_t p)
        if ((p == PROC_NULL) || ((p->p_listflag & P_LIST_INCREATE) != 0))
                        return (PROC_NULL);
        /* do not return process marked for termination */
-       if ((p->p_stat != SZOMB) && ((p->p_listflag & P_LIST_EXITED) == 0) && ((p->p_listflag & (P_LIST_DRAINWAIT | P_LIST_DRAIN | P_LIST_DEAD)) == 0))
+       if ((p->p_stat != SZOMB) && ((p->p_listflag & P_LIST_EXITED) == 0) && ((p->p_listflag & (P_LIST_DRAINWAIT | P_LIST_DRAIN | P_LIST_DEAD)) == 0)) {
                p->p_refcount++;
+#if PROC_REF_DEBUG
+               record_procref(p, 1);
+#endif
+       }
        else 
                p1 = PROC_NULL;
 
@@ -433,6 +467,9 @@ proc_rele_locked(proc_t p)
 
        if (p->p_refcount > 0) {
                p->p_refcount--;
+#if PROC_REF_DEBUG
+               record_procref(p, -1);
+#endif
                if ((p->p_refcount == 0) && ((p->p_listflag & P_LIST_DRAINWAIT) == P_LIST_DRAINWAIT)) {
                        p->p_listflag &= ~P_LIST_DRAINWAIT;
                        wakeup(&p->p_refcount);
@@ -609,13 +646,17 @@ proc_checkdeadrefs(__unused proc_t p)
 int
 proc_pid(proc_t p)
 {
-       return (p->p_pid);
+       if (p != NULL)
+               return (p->p_pid);
+       return -1;
 }
 
-int 
+int
 proc_ppid(proc_t p)
 {
-       return (p->p_ppid);
+       if (p != NULL)
+               return (p->p_ppid);
+       return -1;
 }
 
 int
@@ -715,9 +756,13 @@ void
 proc_name_kdp(task_t t, char * buf, int size)
 {
        proc_t p = get_bsdtask_info(t);
+       if (p == PROC_NULL)
+               return;
 
-       if (p != PROC_NULL)
-               strlcpy(buf, &p->p_comm[0], size);
+       if ((size_t)size > sizeof(p->p_comm))
+               strlcpy(buf, &p->p_name[0], MIN((int)sizeof(p->p_name), size));
+       else
+               strlcpy(buf, &p->p_comm[0], MIN((int)sizeof(p->p_comm), size));
 }
 
 
@@ -907,14 +952,15 @@ proc_puniqueid(proc_t p)
        return(p->p_puniqueid);
 }
 
-uint64_t
-proc_coalitionid(__unused proc_t p)
+void
+proc_coalitionids(__unused proc_t p, __unused uint64_t ids[COALITION_NUM_TYPES])
 {
 #if CONFIG_COALITIONS
-       return(task_coalition_id(p->task));
+       task_coalition_ids(p->task, ids);
 #else
-       return 0;
+       memset(ids, 0, sizeof(uint64_t [COALITION_NUM_TYPES]));
 #endif
+       return;
 }
 
 uint64_t
@@ -1552,13 +1598,10 @@ out:
        return;
 }
 
-
-
-/* XXX should be __private_extern__ */
 int
-proc_is_classic(proc_t p)
+proc_is_classic(proc_t p __unused)
 {
-    return (p->p_flag & P_TRANSLATED) ? 1 : 0;
+    return (0);
 }
 
 /* XXX Why does this function exist?  Need to kill it off... */
@@ -1641,221 +1684,6 @@ toolong:
        return (1);
 }
 
-#if CONFIG_LCTX
-
-static void
-lctxinit(void)
-{
-       LIST_INIT(&alllctx);
-       alllctx_cnt = 0;
-
-       /* allocate lctx lock group attribute and group */
-       lctx_lck_grp_attr = lck_grp_attr_alloc_init();
-       lck_grp_attr_setstat(lctx_lck_grp_attr);
-
-       lctx_lck_grp = lck_grp_alloc_init("lctx", lctx_lck_grp_attr);
-       /* Allocate lctx lock attribute */
-       lctx_lck_attr = lck_attr_alloc_init();
-
-       lck_mtx_init(&alllctx_lock, lctx_lck_grp, lctx_lck_attr);
-}
-
-/*
- * Locate login context by number.
- */
-struct lctx *
-lcfind(pid_t lcid)
-{
-       struct lctx *l;
-
-       ALLLCTX_LOCK;
-       LIST_FOREACH(l, &alllctx, lc_list) {
-               if (l->lc_id == lcid) {
-                       LCTX_LOCK(l);
-                       break;
-               }
-       }
-       ALLLCTX_UNLOCK;
-       return (l);
-}
-
-#define        LCID_INC                                \
-       do {                                    \
-               lastlcid++;                     \
-               if (lastlcid > maxlcid) \
-                       lastlcid = 1;           \
-       } while (0)                             \
-
-struct lctx *
-lccreate(void)
-{
-       struct lctx *l;
-       pid_t newlcid;
-
-       /* Not very efficient but this isn't a common operation. */
-       while ((l = lcfind(lastlcid)) != NULL) {
-               LCTX_UNLOCK(l);
-               LCID_INC;
-       }
-       newlcid = lastlcid;
-       LCID_INC;
-
-       MALLOC(l, struct lctx *, sizeof(struct lctx), M_LCTX, M_WAITOK|M_ZERO);
-       l->lc_id = newlcid;
-       LIST_INIT(&l->lc_members);
-       lck_mtx_init(&l->lc_mtx, lctx_lck_grp, lctx_lck_attr);
-#if CONFIG_MACF
-       l->lc_label = mac_lctx_label_alloc();
-#endif
-       ALLLCTX_LOCK;
-       LIST_INSERT_HEAD(&alllctx, l, lc_list);
-       alllctx_cnt++;
-       ALLLCTX_UNLOCK;
-
-       return (l);
-}
-
-/*
- * Call with proc protected (either by being invisible
- * or by having the all-login-context lock held) and
- * the lctx locked.
- *
- * Will unlock lctx on return.
- */
-void
-enterlctx (proc_t p, struct lctx *l, __unused int create)
-{
-       if (l == NULL)
-               return;
-
-       p->p_lctx = l;
-       LIST_INSERT_HEAD(&l->lc_members, p, p_lclist);
-       l->lc_mc++;
-
-#if CONFIG_MACF
-       if (create)
-               mac_lctx_notify_create(p, l);
-       else
-               mac_lctx_notify_join(p, l);
-#endif
-       LCTX_UNLOCK(l);
-
-       return;
-}
-
-/*
- * Remove process from login context (if any). Called with p protected by
- * the alllctx lock.
- */
-void
-leavelctx (proc_t p)
-{
-       struct lctx *l;
-
-       if (p->p_lctx == NULL) {
-               return;
-       }
-
-       LCTX_LOCK(p->p_lctx);
-       l = p->p_lctx;
-       p->p_lctx = NULL;
-       LIST_REMOVE(p, p_lclist);
-       l->lc_mc--;
-#if CONFIG_MACF
-       mac_lctx_notify_leave(p, l);
-#endif
-       if (LIST_EMPTY(&l->lc_members)) {
-               LIST_REMOVE(l, lc_list);
-               alllctx_cnt--;
-               LCTX_UNLOCK(l);
-               lck_mtx_destroy(&l->lc_mtx, lctx_lck_grp);
-#if CONFIG_MACF
-               mac_lctx_label_free(l->lc_label);
-               l->lc_label = NULL;
-#endif
-               FREE(l, M_LCTX);
-       } else {
-               LCTX_UNLOCK(l);
-       }
-       return;
-}
-
-static int
-sysctl_kern_lctx SYSCTL_HANDLER_ARGS
-{
-       int *name = (int*) arg1;
-       u_int namelen = arg2;
-       struct kinfo_lctx kil;
-       struct lctx *l;
-       int error;
-
-       error = 0;
-
-       switch (oidp->oid_number) {
-       case KERN_LCTX_ALL:
-               ALLLCTX_LOCK;
-               /* Request for size. */
-               if (!req->oldptr) {
-                       error = SYSCTL_OUT(req, 0,
-                               sizeof(struct kinfo_lctx) * (alllctx_cnt + 1));
-                       goto out;
-               }
-               break;
-
-       case KERN_LCTX_LCID:
-               /* No space */
-               if (req->oldlen < sizeof(struct kinfo_lctx))
-                       return (ENOMEM);
-               /* No argument */
-               if (namelen != 1)
-                       return (EINVAL);
-               /* No login context */
-               l = lcfind((pid_t)name[0]);
-               if (l == NULL)
-                       return (ENOENT);
-               kil.id = l->lc_id;
-               kil.mc = l->lc_mc;
-               LCTX_UNLOCK(l);
-               return (SYSCTL_OUT(req, (caddr_t)&kil, sizeof(kil)));
-
-       default:
-               return (EINVAL);
-       }
-
-       /* Provided buffer is too small. */
-       if (req->oldlen < (sizeof(struct kinfo_lctx) * alllctx_cnt)) {
-               error = ENOMEM;
-               goto out;
-       }
-
-       LIST_FOREACH(l, &alllctx, lc_list) {
-               LCTX_LOCK(l);
-               kil.id = l->lc_id;
-               kil.mc = l->lc_mc;
-               LCTX_UNLOCK(l);
-               error = SYSCTL_OUT(req, (caddr_t)&kil, sizeof(kil));
-               if (error)
-                       break;
-       }
-out:
-       ALLLCTX_UNLOCK;
-
-       return (error);
-}
-
-SYSCTL_NODE(_kern, KERN_LCTX, lctx, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Login Context");
-
-SYSCTL_PROC(_kern_lctx, KERN_LCTX_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT | CTLFLAG_LOCKED,
-           0, 0, sysctl_kern_lctx, "S,lctx",
-           "Return entire login context table");
-SYSCTL_NODE(_kern_lctx, KERN_LCTX_LCID, lcid, CTLFLAG_RD | CTLFLAG_LOCKED,
-           sysctl_kern_lctx, "Login Context Table");
-SYSCTL_INT(_kern_lctx, OID_AUTO, last,  CTLFLAG_RD | CTLFLAG_LOCKED, &lastlcid, 0, ""); 
-SYSCTL_INT(_kern_lctx, OID_AUTO, count, CTLFLAG_RD | CTLFLAG_LOCKED, &alllctx_cnt, 0, "");
-SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW | CTLFLAG_LOCKED, &maxlcid, 0, "");
-
-#endif /* LCTX */
-
 /* Code Signing related routines */
 
 int 
@@ -1962,6 +1790,8 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user
                        retflags = pt->p_csflags;
                        if (cs_enforcement(pt))
                                retflags |= CS_ENFORCEMENT;
+                       if (csproc_get_platform_binary(pt))
+                               retflags |= CS_PLATFORM_BINARY;
                        proc_unlock(pt);
 
                        if (uaddr != USER_ADDR_NULL)
@@ -2158,14 +1988,6 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user
                        break;
                }
 
-               case CS_OPS_SIGPUP_INSTALL:
-                       error = sigpup_install(uaddr);
-                       break;
-
-               case CS_OPS_SIGPUP_DROP:
-                       error = sigpup_drop();
-                       break;
-
                default:
                        error = EINVAL;
                        break;
@@ -3309,3 +3131,26 @@ int proc_shadow_max(void)
        return max;
 }
 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
+
+void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid);
+void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid)
+{
+       if (target_proc != NULL) {
+               target_proc->p_responsible_pid = responsible_pid;
+       }
+       return;
+}
+
+int
+proc_chrooted(proc_t p)
+{
+       int retval = 0;
+
+       if (p) {
+               proc_fdlock(p);
+               retval = (p->p_fd->fd_rdir != NULL) ? 1 : 0;
+               proc_fdunlock(p);
+       }
+
+       return retval;
+}
index 23c602e8bb05d6cafe2590240b9592d914b6122b..5df82a23f0fecd9416231a961d737d78c7c0fcb0 100644 (file)
 #include <sys/times.h>
 #include <sys/malloc.h>
 
-#include <security/audit/audit.h>
+#define chgproccnt_ok(p) 1
 
-#if CONFIG_LCTX
-#include <sys/lctx.h>
-#endif
+#include <security/audit/audit.h>
 
 #if CONFIG_MACF
 #include <security/mac_framework.h>
@@ -780,7 +778,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
                         * may be able to decrement the proc count of B before we can increment it. This results in a panic.
                         * Incrementing the proc count of the target ruid, B, before setting the process credentials prevents this race.
                         */
-                       if (ruid != KAUTH_UID_NONE) {
+                       if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                (void)chgproccnt(ruid, 1);
                        }
 
@@ -799,7 +797,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
                                 * We didn't successfully switch to the new ruid, so decrement
                                 * the procs/uid count that we incremented above.
                                 */
-                               if (ruid != KAUTH_UID_NONE) {
+                               if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                        (void)chgproccnt(ruid, -1);
                                }
                                kauth_cred_unref(&my_new_cred);
@@ -818,7 +816,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
                         * If we've updated the ruid, decrement the count of procs running
                         * under the previous ruid
                         */
-                       if (ruid != KAUTH_UID_NONE) {
+                       if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                (void)chgproccnt(my_pcred->cr_ruid, -1);
                        }
                }
@@ -1028,7 +1026,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
                         * may be able to decrement the proc count of B before we can increment it. This results in a panic.
                         * Incrementing the proc count of the target ruid, B, before setting the process credentials prevents this race.
                         */
-                       if (ruid != KAUTH_UID_NONE) {
+                       if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                (void)chgproccnt(ruid, 1);
                        }
 
@@ -1043,7 +1041,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
                         */
                        if (p->p_ucred != my_cred) {
                                proc_unlock(p);
-                               if (ruid != KAUTH_UID_NONE) {
+                               if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                        /*
                                         * We didn't successfully switch to the new ruid, so decrement
                                         * the procs/uid count that we incremented above.
@@ -1063,7 +1061,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
                        OSBitOrAtomic(P_SUGID, &p->p_flag);
                        proc_unlock(p);
 
-                       if (ruid != KAUTH_UID_NONE) {
+                       if (ruid != KAUTH_UID_NONE && chgproccnt_ok(p)) {
                                /*
                                 * We switched to a new ruid, so decrement the count of procs running
                                 * under the previous ruid
@@ -2059,6 +2057,18 @@ set_security_token(proc_t p)
 }
 
 
+int get_audit_token_pid(audit_token_t *audit_token);
+
+int
+get_audit_token_pid(audit_token_t *audit_token)
+{
+       /* keep in-sync with set_security_token (above) */
+       if (audit_token)
+               return (int)audit_token->val[5];
+       return -1;
+}
+
+
 /*
  * Fill in a struct xucred based on a kauth_cred_t.
  */
@@ -2074,170 +2084,3 @@ cru2x(kauth_cred_t cr, struct xucred *xcr)
        xcr->cr_ngroups = pcr->cr_ngroups;
        bcopy(pcr->cr_groups, xcr->cr_groups, sizeof(xcr->cr_groups));
 }
-
-#if CONFIG_LCTX
-
-/*
- * Set Login Context ID
- */
-/*
- * MPSAFE - assignment of (visible) process to context protected by ALLLCTX_LOCK,
- *         LCTX by its own locks.
- */
-int
-setlcid(proc_t p0, struct setlcid_args *uap, __unused int32_t *retval)
-{
-       proc_t p;
-       struct lctx *l;
-       int error = 0;
-       int refheld = 0;
-
-       AUDIT_ARG(pid, uap->pid);
-       AUDIT_ARG(value32, uap->lcid);
-       if (uap->pid == LCID_PROC_SELF) {       /* Create/Join/Leave */
-               p = p0;
-       } else {                                /* Adopt/Orphan */
-               p = proc_find(uap->pid);
-               if (p == NULL)
-                       return (ESRCH);
-               refheld = 1;
-       }
-
-#if CONFIG_MACF
-       error = mac_proc_check_setlcid(p0, p, uap->pid, uap->lcid);
-       if (error)
-               goto out;
-#endif
-
-       switch (uap->lcid) {
-       /* Leave/Orphan */
-       case LCID_REMOVE:
-
-               /* Only root may Leave/Orphan. */
-               if (!kauth_cred_issuser(kauth_cred_get())) {
-                       error = EPERM;
-                       goto out;
-               }
-
-               /* Process not in login context. */
-               if (p->p_lctx == NULL) {
-                       error = ENOATTR;
-                       goto out;
-               }
-
-               l = NULL;
-
-               break;
-
-       /* Create */
-       case LCID_CREATE:
-
-               /* Create only valid for self! */
-               if (uap->pid != LCID_PROC_SELF) {
-                       error = EPERM;
-                       goto out;
-               }
-
-               /* Already in a login context. */
-               if (p->p_lctx != NULL) {
-                       error = EPERM;
-                       goto out;
-               }
-
-               l = lccreate();
-               if (l == NULL) {
-                       error = ENOMEM;
-                       goto out;
-               }
-
-               LCTX_LOCK(l);
-
-               break;
-
-       /* Join/Adopt */
-       default:
-
-               /* Only root may Join/Adopt. */
-               if (!kauth_cred_issuser(kauth_cred_get())) {
-                       error = EPERM;
-                       goto out;
-               }
-
-               l = lcfind(uap->lcid);
-               if (l == NULL) {
-                       error = ENOATTR;
-                       goto out;
-               }
-
-               break;
-       }
-
-       ALLLCTX_LOCK;
-       leavelctx(p);
-       enterlctx(p, l, (uap->lcid == LCID_CREATE) ? 1 : 0);
-       ALLLCTX_UNLOCK;
-
-out:
-       if (refheld != 0)
-               proc_rele(p);
-       return (error);
-}
-
-/*
- * Get Login Context ID
- */
-/*
- * MPSAFE - membership of (visible) process in a login context
- *         protected by the all-context lock.
- */
-int
-getlcid(proc_t p0, struct getlcid_args *uap, int32_t *retval)
-{
-       proc_t p;
-       int error = 0;
-       int refheld = 0;
-
-       AUDIT_ARG(pid, uap->pid);
-       if (uap->pid == LCID_PROC_SELF) {
-               p = p0;
-       } else {
-               p = proc_find(uap->pid);
-               if (p == NULL)
-                       return (ESRCH);
-               refheld = 1;
-       }
-
-#if CONFIG_MACF
-       error = mac_proc_check_getlcid(p0, p, uap->pid);
-       if (error)
-               goto out;
-#endif
-       ALLLCTX_LOCK;
-       if (p->p_lctx == NULL) {
-               error = ENOATTR;
-               ALLLCTX_UNLOCK;
-               goto out;
-       }
-       *retval = p->p_lctx->lc_id;
-       ALLLCTX_UNLOCK;
- out:
-       if (refheld != 0)
-               proc_rele(p);
-
-       return (error);
-}
-#else  /* LCTX */
-int
-setlcid(proc_t p0, struct setlcid_args *uap, int32_t *retval)
-{
-
-       return (ENOSYS);
-}
-
-int
-getlcid(proc_t p0, struct getlcid_args *uap, int32_t *retval)
-{
-
-       return (ENOSYS);
-}
-#endif /* !LCTX */
index 2900cd52bac3ea357a9242f54c27eff5e24c9b5c..a994b8bd623b400b14044adc9d68f1fe20def78c 100644 (file)
 
 #include <kern/assert.h>
 #include <sys/resource.h>
+#include <IOKit/IOBSD.h>
 
 int    donice(struct proc *curp, struct proc *chgp, int n);
 int    dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
@@ -611,23 +612,8 @@ proc_set_darwin_role(proc_t curp, proc_t targetp, int priority)
 
        integer_t role = 0;
 
-       switch (priority) {
-               case PRIO_DARWIN_ROLE_DEFAULT:
-                       role = TASK_UNSPECIFIED;
-                       break;
-               case PRIO_DARWIN_ROLE_UI_FOCAL:
-                       role = TASK_FOREGROUND_APPLICATION;
-                       break;
-               case PRIO_DARWIN_ROLE_UI:
-                       role = TASK_BACKGROUND_APPLICATION;
-                       break;
-               case PRIO_DARWIN_ROLE_NON_UI:
-                       role = TASK_NONUI_APPLICATION;
-                       break;
-               default:
-                       error = EINVAL;
-                       goto out;
-       }
+       if ((error = proc_darwin_role_to_task_role(priority, &role)))
+               goto out;
 
        proc_set_task_policy(proc_task(targetp), THREAD_NULL,
                             TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role);
@@ -665,21 +651,7 @@ proc_get_darwin_role(proc_t curp, proc_t targetp, int *priority)
        role = proc_get_task_policy(proc_task(targetp), THREAD_NULL,
                                    TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
 
-       switch (role) {
-               case TASK_FOREGROUND_APPLICATION:
-                       *priority = PRIO_DARWIN_ROLE_UI_FOCAL;
-                       break;
-               case TASK_BACKGROUND_APPLICATION:
-                       *priority = PRIO_DARWIN_ROLE_UI;
-                       break;
-               case TASK_NONUI_APPLICATION:
-                       *priority = PRIO_DARWIN_ROLE_NON_UI;
-                       break;
-               case TASK_UNSPECIFIED:
-               default:
-                       *priority = PRIO_DARWIN_ROLE_DEFAULT;
-                       break;
-       }
+       *priority = proc_task_role_to_darwin_role(role);
 
 out:
        kauth_cred_unref(&target_cred);
@@ -1632,8 +1604,13 @@ iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_pa
        switch(cmd) {
                case IOPOL_CMD_SET:
                        if (0 == kauth_cred_issuser(kauth_cred_get())) {
-                               error = EPERM;
-                               goto out;
+                               /* If it's a non-root process, it needs to have the entitlement to set the policy */
+                               boolean_t entitled = FALSE;
+                               entitled = IOTaskHasEntitlement(current_task(), "com.apple.private.iopol.case_sensitivity");
+                               if (!entitled) {
+                                       error = EPERM;
+                                       goto out;
+                               }
                        }
 
                        switch (policy) {
index 4e231826d10c3f69483503ff52206ed97c2332af..3858ce83e10ed21b146a0a5f050d8a50e7ad697b 100644 (file)
@@ -74,7 +74,6 @@
 uint32_t system_inshutdown = 0;
 
 /* XXX should be in a header file somewhere, but isn't */
-extern void md_prepare_for_shutdown(int, int, char *);
 extern void (*unmountroot_pre_hook)(void);
 
 unsigned int proc_shutdown_exitcount = 0;
@@ -83,7 +82,7 @@ static int  sd_openlog(vfs_context_t);
 static int  sd_closelog(vfs_context_t);
 static void sd_log(vfs_context_t, const char *, ...);
 static void proc_shutdown(void);
-
+static void kernel_hwm_panic_info(void);
 extern void IOSystemShutdownNotification(void);
 
 struct sd_filterargs{
@@ -109,10 +108,37 @@ static int  sd_callback1(proc_t p, void * arg);
 static int  sd_callback2(proc_t p, void * arg);
 static int  sd_callback3(proc_t p, void * arg);
 
+extern boolean_t panic_include_zprint;
+extern vm_offset_t panic_kext_memory_info;
+extern vm_size_t panic_kext_memory_size; 
+
+static void
+kernel_hwm_panic_info(void)
+{
+       mach_memory_info_t      *memory_info;
+       unsigned int            num_sites;
+       kern_return_t           kr;
+
+       panic_include_zprint = TRUE;
+       panic_kext_memory_info = 0;
+       panic_kext_memory_size = 0;
+
+       num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
+       panic_kext_memory_size = round_page(num_sites * sizeof(mach_zone_info_t));
+       
+       kr = kmem_alloc(kernel_map, (vm_offset_t *) &panic_kext_memory_info, panic_kext_memory_size, VM_KERN_MEMORY_OSFMK);
+       if (kr != KERN_SUCCESS) {
+               panic_kext_memory_info = 0;
+               return;
+       }
+       memory_info = (mach_memory_info_t *)panic_kext_memory_info;
+       vm_page_diagnose(memory_info, num_sites);
+       return;
+}
+
 int
-boot(int paniced, int howto, char *command)
+reboot_kernel(int howto, char *message)
 {
-       struct proc *p = current_proc();        /* XXX */
        int hostboot_option=0;
 
        if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
@@ -126,12 +152,10 @@ boot(int paniced, int howto, char *command)
         */
        IOSystemShutdownNotification();
 
-       md_prepare_for_shutdown(paniced, howto, command);
-
        if ((howto&RB_QUICK)==RB_QUICK) {
                printf("Quick reboot...\n");
                if ((howto&RB_NOSYNC)==0) {
-                       sync(p, (void *)NULL, (int *)NULL);
+                       sync((proc_t)NULL, (void *)NULL, (int *)NULL);
                }
        }
        else if ((howto&RB_NOSYNC)==0) {
@@ -143,7 +167,7 @@ boot(int paniced, int howto, char *command)
                 * Release vnodes held by texts before sync.
                 */
 
-               /* handle live procs (deallocate their root and current directories). */                
+               /* handle live procs (deallocate their root and current directories), suspend initproc */
                proc_shutdown();
 
 #if CONFIG_AUDIT
@@ -153,15 +177,7 @@ boot(int paniced, int howto, char *command)
                if (unmountroot_pre_hook != NULL)
                        unmountroot_pre_hook();
 
-               sync(p, (void *)NULL, (int *)NULL);
-
-               /*
-                * Now that all processes have been terminated and system is
-                * sync'ed up, suspend init
-                */
-                       
-               if (initproc && p != initproc)
-                       task_suspend(initproc->task);
+               sync((proc_t)NULL, (void *)NULL, (int *)NULL);
 
                if (kdebug_enable)
                        kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
@@ -194,12 +210,18 @@ boot(int paniced, int howto, char *command)
 #endif /* NETWORKING */
 
 force_reboot:
+
+       if (howto & RB_PANIC) {
+               if (strncmp(message, "Kernel memory has exceeded limits", 33) == 0) {
+                       kernel_hwm_panic_info();
+               }
+               panic ("userspace panic: %s", message);
+       }
+
        if (howto & RB_POWERDOWN)
                hostboot_option = HOST_REBOOT_HALT;
        if (howto & RB_HALT)
                hostboot_option = HOST_REBOOT_HALT;
-       if (paniced == RB_PANIC)
-               hostboot_option = HOST_REBOOT_HALT;
 
        if (howto & RB_UPSDELAY) {
                hostboot_option = HOST_REBOOT_UPSDELAY;
@@ -568,6 +590,11 @@ sigterm_loop:
 
        sd_closelog(ctx);
 
+       /*
+        * Now that all other processes have been terminated, suspend init
+        */
+       task_suspend_internal(initproc->task);
+
        /* drop the ref on initproc */
        proc_rele(initproc);
        printf("continuing\n");
index bb44111fe96b4f051793d3502dcd884f4bacf6f0..5f3e5960cbae0ad29d903cc144798e04ae3c7ba3 100644 (file)
 #include <libkern/OSAtomic.h>
 
 #include <sys/sdt.h>
+#include <sys/codesign.h>
 
 /*
  * Missing prototypes that Mach should export
@@ -121,8 +122,6 @@ extern int thread_enable_fpe(thread_t act, int onoff);
 extern thread_t        port_name_to_thread(mach_port_name_t port_name);
 extern kern_return_t get_signalact(task_t , thread_t *, int);
 extern unsigned int get_useraddr(void);
-extern kern_return_t task_suspend_internal(task_t);
-extern kern_return_t task_resume_internal(task_t);
 
 /*
  * ---
@@ -134,10 +133,11 @@ extern void doexception(int exc, mach_exception_code_t code,
 static void stop(proc_t, proc_t);
 int cansignal(proc_t, kauth_cred_t, proc_t, int, int);
 int killpg1(proc_t, int, int, int, int);
-int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
 static void psignal_uthread(thread_t, int);
+static void psignal_try_thread(proc_t, thread_t, int signum);
 kern_return_t do_bsdexception(int, int, int);
 void __posix_sem_syscall_return(kern_return_t);
+char *proc_name_address(void *p);
 
 /* implementations in osfmk/kern/sync_sema.c. We do not want port.h in this scope, so void * them  */
 kern_return_t semaphore_timedwait_signal_trap_internal(mach_port_name_t, mach_port_name_t, unsigned int, clock_res_t, void (*)(kern_return_t));
@@ -148,7 +148,7 @@ kern_return_t semaphore_wait_trap_internal(mach_port_name_t, void (*)(kern_retur
 static int     filt_sigattach(struct knote *kn);
 static void    filt_sigdetach(struct knote *kn);
 static int     filt_signal(struct knote *kn, long hint);
-static void    filt_signaltouch(struct knote *kn, struct kevent64_s *kev, 
+static void    filt_signaltouch(struct knote *kn, struct kevent_internal_s *kev, 
                long type);
 
 struct filterops sig_filtops = {
@@ -185,6 +185,7 @@ static kern_return_t get_signalthread(proc_t, int, thread_t *);
 #define PSIG_LOCKED     0x1
 #define PSIG_VFORK      0x2
 #define PSIG_THREAD     0x4
+#define PSIG_TRY_THREAD 0x8
 
 
 static void psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum);
@@ -305,6 +306,10 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie)
        if (p == q)
                return(1);
 
+       /* you can't send launchd SIGKILL, even if root */
+       if (signum == SIGKILL && q == initproc)
+               return(0);
+
        if (!suser(uc, NULL))
                return (1);             /* root can always signal */
 
@@ -349,6 +354,53 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie)
        return (0);
 }
 
+/*
+ * <rdar://problem/21952708> Some signals can be restricted from being handled,
+ * forcing the default action for that signal. This behavior applies only to
+ * non-root (EUID != 0) processes, and is configured with the "sigrestrict=x"
+ * bootarg:
+ *
+ *   0 (default): Disallow use of restricted signals. Trying to register a handler
+ *             returns ENOTSUP, which userspace may use to take special action (e.g. abort).
+ *   1: As above, but return EINVAL. Restricted signals behave similarly to SIGKILL.
+ *   2: Usual POSIX semantics.
+ */
+unsigned sigrestrict_arg = 0;
+
+#if PLATFORM_WatchOS || PLATFORM_AppleTVOS
+static int
+sigrestrictmask(void)
+{
+       if (kauth_getuid() != 0 && sigrestrict_arg != 2) {
+               return SIGRESTRICTMASK;
+       }
+       return 0;
+}
+
+static int
+signal_is_restricted(proc_t p, int signum)
+{
+       if (sigmask(signum) & sigrestrictmask()) {
+               if (sigrestrict_arg == 0 &&
+                               task_get_apptype(p->task) == TASK_APPTYPE_APP_DEFAULT) {
+                       return ENOTSUP;
+               } else {
+                       return EINVAL;
+               }
+       }
+       return 0;
+}
+
+#else
+
+static inline int
+signal_is_restricted(proc_t p, int signum)
+{
+       (void)p;
+       (void)signum;
+       return 0;
+}
+#endif /* !(PLATFORM_WatchOS || PLATFORM_AppleTVOS) */
 
 /*
  * Returns:    0                       Success
@@ -375,9 +427,17 @@ sigaction(proc_t p, struct sigaction_args *uap, __unused int32_t *retval)
 
        signum = uap->signum;
        if (signum <= 0 || signum >= NSIG ||
-           signum == SIGKILL || signum == SIGSTOP)
+                       signum == SIGKILL || signum == SIGSTOP)
                return (EINVAL);
 
+       if ((error = signal_is_restricted(p, signum))) {
+               if (error == ENOTSUP) {
+                       printf("%s(%d): denied attempt to register action for signal %d\n",
+                                       proc_name_address(p), proc_pid(p), signum);
+               }
+               return error;
+       }
+
        if (uap->osa) {
                sa->sa_handler = ps->ps_sigact[signum];
                sa->sa_mask = ps->ps_catchmask[signum];
@@ -1662,7 +1722,7 @@ get_signalthread(proc_t p, int signum, thread_t * thr)
        thread_t sig_thread;
        struct task * sig_task = p->task;
        kern_return_t kret;
-       
+
        *thr = THREAD_NULL;
 
        if ((p->p_lflag & P_LINVFORK) && p->p_vforkact) {
@@ -1673,9 +1733,10 @@ get_signalthread(proc_t p, int signum, thread_t * thr)
                        return(KERN_SUCCESS);
                }else
                        return(KERN_FAILURE);
-       } 
+       }
 
        proc_lock(p);
+
        TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
                if(((uth->uu_flag & UT_NO_SIGMASK)== 0) && 
                        (((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask))) {
@@ -1733,6 +1794,12 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
         }
 #endif /* SIGNAL_DEBUG */
 
+       /* catch unexpected initproc kills early for easier debuggging */
+       if (signum == SIGKILL && p == initproc)
+               panic_plain("unexpected SIGKILL of %s %s",
+                           (p->p_name[0] != '\0' ? p->p_name : "initproc"),
+                           ((p->p_csflags & CS_KILLED) ? "(CS_KILLED)" : ""));
+
        /*
         *      We will need the task pointer later.  Grab it now to
         *      check for a zombie process.  Also don't send signals
@@ -1746,6 +1813,10 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                sig_task = get_threadtask(thread);
                sig_thread = thread;
                sig_proc = (proc_t)get_bsdtask_info(sig_task);
+       } else if (flavor & PSIG_TRY_THREAD) {
+               sig_task = p->task;
+               sig_thread = thread;
+               sig_proc = p;
        } else {
                sig_task = p->task;
                sig_thread = (struct thread *)0;
@@ -1782,7 +1853,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
         *      the corresponding task data structures around too.  This
         *      reference is released by thread_deallocate.
         */
-       
+
 
        if (((flavor & PSIG_VFORK) == 0) && ((sig_proc->p_lflag & P_LTRACED) == 0) && (sig_proc->p_sigignore & mask)) {
                DTRACE_PROC3(signal__discard, thread_t, sig_thread, proc_t, sig_proc, int, signum);
@@ -1793,6 +1864,16 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                action = SIG_DFL;
                act_set_astbsd(sig_thread);
                kret = KERN_SUCCESS;
+       } else if (flavor & PSIG_TRY_THREAD) {
+               uth = get_bsdthread_info(sig_thread);
+               if (((uth->uu_flag & UT_NO_SIGMASK) == 0) &&
+                               (((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask)) &&
+                               ((kret = check_actforsig(sig_proc->task, sig_thread, 1)) == KERN_SUCCESS)) {
+                       /* deliver to specified thread */
+               } else {
+                       /* deliver to any willing thread */
+                       kret = get_signalthread(sig_proc, signum, &sig_thread);
+               }
        } else if (flavor & PSIG_THREAD) {
                /* If successful return with ast set */
                kret = check_actforsig(sig_task, sig_thread, 1);
@@ -1807,7 +1888,6 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                goto psigout;
        }
 
-
        uth = get_bsdthread_info(sig_thread);
 
        /*
@@ -1838,7 +1918,6 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
                }
        }
 
-
        proc_lock(sig_proc);
 
        if (sig_proc->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
@@ -2192,6 +2271,12 @@ psignal_uthread(thread_t thread, int signum)
        psignal_internal(PROC_NULL, TASK_NULL, thread, PSIG_THREAD, signum);
 }
 
+/* same as psignal(), but prefer delivery to 'thread' if possible */
+static void
+psignal_try_thread(proc_t p, thread_t thread, int signum)
+{
+       psignal_internal(p, NULL, thread, PSIG_TRY_THREAD, signum);
+}
 
 /*
  * If the current process has received a signal (should be caught or cause
@@ -2395,21 +2480,6 @@ issignal_locked(proc_t p)
                switch ((long)p->p_sigacts->ps_sigact[signum]) {
                
                case (long)SIG_DFL:
-                       /*
-                        * Don't take default actions on system processes.
-                        */
-                       if (p->p_ppid == 0) {
-#if DIAGNOSTIC
-                               /*
-                                * Are you sure you want to ignore SIGSEGV
-                                * in init? XXX
-                                */
-                               printf("Process (pid %d) got signal %d\n",
-                                       p->p_pid, signum);
-#endif
-                               break;                          /* == ignore */
-                       }
-                       
                        /*
                         * If there is a pending stop signal to process
                         * with default action, stop here,
@@ -2557,21 +2627,6 @@ CURSIG(proc_t p)
                switch ((long)p->p_sigacts->ps_sigact[signum]) {
                
                case (long)SIG_DFL:
-                       /*
-                        * Don't take default actions on system processes.
-                        */
-                       if (p->p_ppid == 0) {
-#if DIAGNOSTIC
-                               /*
-                                * Are you sure you want to ignore SIGSEGV
-                                * in init? XXX
-                                */
-                               printf("Process (pid %d) got signal %d\n",
-                                       p->p_pid, signum);
-#endif
-                               break;                          /* == ignore */
-                       }
-                       
                        /*
                         * If there is a pending stop signal to process
                         * with default action, stop here,
@@ -2840,7 +2895,7 @@ filt_signal(struct knote *kn, long hint)
 }
 
 static void
-filt_signaltouch(struct knote *kn, struct kevent64_s *kev, long type)
+filt_signaltouch(struct knote *kn, struct kevent_internal_s *kev, long type)
 {
        proc_klist_lock();
        switch (type) {
@@ -2856,7 +2911,7 @@ filt_signaltouch(struct knote *kn, struct kevent64_s *kev, long type)
                }
                break;
        default:
-               panic("filt_machporttouch() - invalid type (%ld)", type);
+               panic("filt_signaltouch() - invalid type (%ld)", type);
                break;
        }
        proc_klist_unlock();
@@ -2891,7 +2946,7 @@ bsd_ast(thread_t thread)
                        else
                                task_vtimer_clear(p->task, TASK_VTIMER_USER);
 
-                       psignal(p, SIGVTALRM);
+                       psignal_try_thread(p, thread, SIGVTALRM);
                }
        }
 
@@ -2906,7 +2961,7 @@ bsd_ast(thread_t thread)
                        else
                                task_vtimer_clear(p->task, TASK_VTIMER_PROF);
 
-                       psignal(p, SIGPROF);
+                       psignal_try_thread(p, thread, SIGPROF);
                }
        }
 
@@ -2927,7 +2982,7 @@ bsd_ast(thread_t thread)
 
                        task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
 
-                       psignal(p, SIGXCPU);
+                       psignal_try_thread(p, thread, SIGXCPU);
                }
        }
 
index eada997ca8e509ec8ba174d4d0bb76ac2886eb8a..610c94936b9efb8ffdf3b1301cd3adbecdac8e18 100644 (file)
@@ -1004,9 +1004,6 @@ void uio_update( uio_t a_uio, user_size_t a_count )
                                a_uio->uio_iovs.uiovp->iov_base += a_count;
                                a_uio->uio_iovs.uiovp->iov_len -= a_count;
                        }
-                       if (a_uio->uio_resid_64 < 0) {
-                               a_uio->uio_resid_64 = 0;
-                       }
                        if (a_count > (user_size_t)a_uio->uio_resid_64) {
                                a_uio->uio_offset += a_uio->uio_resid_64;
                                a_uio->uio_resid_64 = 0;
@@ -1040,9 +1037,6 @@ void uio_update( uio_t a_uio, user_size_t a_count )
                                a_uio->uio_iovs.kiovp->iov_base += a_count;
                                a_uio->uio_iovs.kiovp->iov_len -= a_count;
                        }
-                       if (a_uio->uio_resid_64 < 0) {
-                               a_uio->uio_resid_64 = 0;
-                       }
                        if (a_count > (user_size_t)a_uio->uio_resid_64) {
                                a_uio->uio_offset += a_uio->uio_resid_64;
                                a_uio->uio_resid_64 = 0;
index 0e9d6c9c67bbcd20307e88a0e6fd6087f0af8ba4..9cd79fca4c8ef3afd48f778895b60d8aa399e571 100644 (file)
@@ -51,6 +51,7 @@
 #include <sys/stat.h>
 #include <sys/disk.h>
 #include <sys/conf.h>
+#include <sys/content_protection.h>
 
 #include <mach-o/loader.h>
 #include <mach-o/nlist.h>
@@ -58,7 +59,7 @@
 #include <kern/kalloc.h>
 #include <vm/vm_kern.h>
 #include <pexpert/pexpert.h>
-#include <IOKit/IOHibernatePrivate.h>
+#include <IOKit/IOPolledInterface.h>
 
 /* This function is called from kern_sysctl in the current process context;
  * it is exported with the System6.0.exports, but this appears to be a legacy
@@ -79,6 +80,7 @@ struct kern_direct_file_io_ref_t
     dev_t          device;
     uint32_t      blksize;
     off_t          filelength;
+    char           cf;
     char           pinned;
 };
 
@@ -99,7 +101,7 @@ static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t
 static int
 kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end)
 {
-    int error;
+    int error = 0;
     int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
     void * p1;
     void * p2;
@@ -125,6 +127,18 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl,
        p2 = ref->ctx;
        do_ioctl = &device_ioctl;
     }
+
+    if (_DKIOCCSPINEXTENT == theIoctl) {
+           /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */
+           pin.cp_extent.length = end;
+           pin.cp_flags = _DKIOCCSHIBERNATEIMGSIZE;
+           (void) do_ioctl(p1, p2, _DKIOCCSPINEXTENT, (caddr_t)&pin);
+    } else if (_DKIOCCSUNPINEXTENT == theIoctl) {
+           /* Tell CS hibernation is done, so it can stop blocking overlapping writes */
+           pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST;
+           (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin);
+    }
+
     while (offset < end) 
     {
         if (ref->vp->v_type == VREG)
@@ -161,28 +175,40 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl,
            error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin);
            if (error && (ENOTTY != error))
            {
-               printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", 
-                       error, pin.cp_extent.offset, pin.cp_extent.length);
+               printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", error, pin.cp_extent.offset, pin.cp_extent.length);
+           }
+       }
+       else if (_DKIOCCSUNPINEXTENT == theIoctl)
+       {
+           pin.cp_extent.offset = fileblk;
+           pin.cp_extent.length = filechunk;
+           pin.cp_flags = _DKIOCCSPINFORHIBERNATION;
+           error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin);
+           if (error && (ENOTTY != error))
+           {
+               printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n", error, pin.cp_extent.offset, pin.cp_extent.length);
            }
        }
        else error = EINVAL;
 
-       if (error) break;
+        if (error) break;
         offset += filechunk;
     }
     return (error);
 }
 
+extern uint32_t freespace_mb(vnode_t vp);
 
 struct kern_direct_file_io_ref_t *
 kern_open_file_for_direct_io(const char * name, 
-                 boolean_t create_file,
+                            boolean_t create_file,
                             kern_get_file_extents_callback_t callback, 
                             void * callback_ref,
                              off_t set_file_size,
+                             off_t fs_free_size,
                              off_t write_file_offset,
-                             caddr_t write_file_addr,
-                             vm_size_t write_file_len,
+                             void * write_file_addr,
+                             size_t write_file_len,
                             dev_t * partition_device_result,
                             dev_t * image_device_result,
                              uint64_t * partitionbase_result,
@@ -191,20 +217,24 @@ kern_open_file_for_direct_io(const char * name,
 {
     struct kern_direct_file_io_ref_t * ref;
 
-    proc_t                     p;
-    struct vnode_attr          va;
-    int                                error;
-    off_t                      f_offset;
-    uint64_t                    fileblk;
-    size_t                      filechunk;
-    uint64_t                    physoffset;
-    dev_t                      device;
-    dev_t                      target = 0;
-    int                                isssd = 0;
-    uint32_t                    flags = 0;
-    uint32_t                   blksize;
-    off_t                      maxiocount, count, segcount;
-    boolean_t                   locked = FALSE;
+    proc_t            p;
+    struct vnode_attr va;
+    int               error;
+    off_t             f_offset;
+    uint64_t          fileblk;
+    size_t            filechunk;
+    uint64_t          physoffset;
+    dev_t             device;
+    dev_t             target = 0;
+    int               isssd = 0;
+    uint32_t          flags = 0;
+    uint32_t          blksize;
+    off_t             maxiocount, count, segcount;
+    boolean_t         locked = FALSE;
+    int               fmode, cmode;
+    struct            nameidata nd;
+    u_int32_t         ndflags;
+    off_t             mpFree;
 
     int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
     void * p1 = NULL;
@@ -221,12 +251,19 @@ kern_open_file_for_direct_io(const char * name,
 
     bzero(ref, sizeof(*ref));
     p = kernproc;
-    ref->ctx = vfs_context_create(vfs_context_current());
+    ref->ctx = vfs_context_create(vfs_context_kernel());
 
-    if ((error = vnode_open(name, (create_file) ? (O_CREAT | FWRITE) : FWRITE, 
-                            (0), 0, &ref->vp, ref->ctx)))
-        goto out;
+    fmode  = (create_file) ? (O_CREAT | FWRITE) : FWRITE;
+    cmode =  S_IRUSR | S_IWUSR;
+    ndflags = NOFOLLOW;
+    NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx);
+    VATTR_INIT(&va);
+    VATTR_SET(&va, va_mode, cmode);
+    VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+    VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D);
+    if ((error = vn_open_auth(&nd, &fmode, &va))) goto out;
 
+    ref->vp = nd.ni_vp;
     if (ref->vp->v_type == VREG)
     {
         vnode_lock_spin(ref->vp);
@@ -236,8 +273,7 @@ kern_open_file_for_direct_io(const char * name,
 
     if (write_file_addr && write_file_len)
     {
-       if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, 0)))
-           goto out;
+       if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, 0))) goto out;
     }
 
     VATTR_INIT(&va);
@@ -247,18 +283,17 @@ kern_open_file_for_direct_io(const char * name,
     VATTR_WANTED(&va, va_data_alloc);
     VATTR_WANTED(&va, va_nlink);
     error = EFAULT;
-    if (vnode_getattr(ref->vp, &va, ref->ctx))
-       goto out;
+    if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out;
 
-    kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev));
-    kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid));
-    kprintf("vp size %qd alloc %qd\n", va.va_data_size, va.va_data_alloc);
+    mpFree = freespace_mb(ref->vp);
+    mpFree <<= 20;
+    kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", 
+               name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size);
 
     if (ref->vp->v_type == VREG)
     {
-               /* Don't dump files with links. */
-               if (va.va_nlink != 1)
-                       goto out;
+        /* Don't dump files with links. */
+        if (va.va_nlink != 1) goto out;
 
         device = va.va_fsid;
         ref->filelength = va.va_data_size;
@@ -267,14 +302,21 @@ kern_open_file_for_direct_io(const char * name,
         p2 = p;
         do_ioctl = &file_ioctl;
 
-               if (set_file_size)
-           {
-                       error = vnode_setsize(ref->vp, set_file_size, 
-                                                                 IO_NOZEROFILL | IO_NOAUTH, ref->ctx);
-                       if (error)
-                               goto out;
-                       ref->filelength = set_file_size;
+        if (set_file_size)
+        {
+            if (fs_free_size)
+            {
+               mpFree += va.va_data_alloc;
+               if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size))
+               {
+                   error = ENOSPC;
+                   goto out;
                }
+           }
+           error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx);
+           if (error) goto out;
+           ref->filelength = set_file_size;
+        }
     }
     else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
     {
@@ -288,11 +330,17 @@ kern_open_file_for_direct_io(const char * name,
     else
     {
        /* Don't dump to non-regular files. */
-       error = EFAULT;
+        error = EFAULT;
         goto out;
     }
     ref->device = device;
 
+    // probe for CF
+    dk_corestorage_info_t cs_info;
+    memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
+    error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info);
+    ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES);
+
     // get block size
 
     error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
@@ -302,8 +350,7 @@ kern_open_file_for_direct_io(const char * name,
     if (ref->vp->v_type != VREG)
     {
         error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
-        if (error)
-            goto out;
+        if (error) goto out;
        ref->filelength = fileblk * ref->blksize;    
     }
 
@@ -316,8 +363,7 @@ kern_open_file_for_direct_io(const char * name,
     // generate the block list
 
     error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
-    if (error)
-        goto out;
+    if (error) goto out;
     locked = TRUE;
 
     f_offset = 0;
@@ -330,8 +376,7 @@ kern_open_file_for_direct_io(const char * name,
 
             error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno,
                                                                  &filechunk, NULL, VNODE_WRITE, NULL);
-            if (error)
-                goto out;
+            if (error) goto out;
 
             fileblk = blkno * ref->blksize;
         }
@@ -350,8 +395,7 @@ kern_open_file_for_direct_io(const char * name,
             getphysreq.offset = fileblk + physoffset;
             getphysreq.length = (filechunk - physoffset);
             error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
-            if (error)
-                goto out;
+            if (error) goto out;
             if (!target)
             {
                 target = getphysreq.dev;
@@ -376,8 +420,13 @@ kern_open_file_for_direct_io(const char * name,
     }
     callback(callback_ref, 0ULL, 0ULL);
 
-    if (ref->vp->v_type == VREG)
-        p1 = &target;
+    if (ref->vp->v_type == VREG) p1 = &target;
+    else
+    {
+       p1 = &target;
+       p2 = p;
+       do_ioctl = &file_ioctl;
+    }
 
     // get partition base
 
@@ -446,7 +495,7 @@ kern_open_file_for_direct_io(const char * name,
 
     error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
     if (!error && isssd)
-        flags |= kIOHibernateOptionSSD;
+        flags |= kIOPolledFileSSD;
 
     if (partition_device_result)
         *partition_device_result = device;
@@ -455,8 +504,16 @@ kern_open_file_for_direct_io(const char * name,
     if (oflags)
         *oflags = flags;
 
+    if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
+    {
+        vnode_close(ref->vp, FWRITE, ref->ctx);
+        ref->vp = NULLVP;
+       vfs_context_rele(ref->ctx);
+       ref->ctx = NULL;
+    }
+
 out:
-    kprintf("kern_open_file_for_direct_io(%d)\n", error);
+    printf("kern_open_file_for_direct_io(%d)\n", error);
 
     if (error && locked)
     {
@@ -466,17 +523,9 @@ out:
 
     if (error && ref)
     {
-    if (ref->pinned)
-    {
-        _dk_cs_pin_t pin;
-        bzero(&pin, sizeof(pin));
-
-           pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST;
-        p1 = &device;
-        (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin);
-    }
        if (ref->vp)
        {
+           (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0);
            vnode_close(ref->vp, FWRITE, ref->ctx);
            ref->vp = NULLVP;
        }
@@ -489,7 +538,7 @@ out:
 }
 
 int
-kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag)
+kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag)
 {
     return (vn_rdwr(UIO_WRITE, ref->vp,
                        addr, len, offset,
@@ -498,14 +547,29 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad
                        vfs_context_proc(ref->ctx)));
 }
 
+int
+kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag)
+{
+    return (vn_rdwr(UIO_READ, ref->vp,
+                       addr, len, offset,
+                       UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT, 
+                        vfs_context_ucred(ref->ctx), (int *) 0,
+                       vfs_context_proc(ref->ctx)));
+}
+
+
+struct mount *
+kern_file_mount(struct kern_direct_file_io_ref_t * ref)
+{
+    return (ref->vp->v_mount);
+}
 
 void
 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
-                             off_t write_offset, caddr_t addr, vm_size_t write_length,
+                             off_t write_offset, void * addr, size_t write_length,
                              off_t discard_offset, off_t discard_end)
 {
     int error;
-    _dk_cs_pin_t pin;
     kprintf("kern_close_file_for_direct_io\n");
 
     if (!ref) return;
@@ -531,18 +595,21 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
         }
         (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
 
-        if (ref->pinned)
-        {
-            bzero(&pin, sizeof(pin));
-            pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST;
-            (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin);
-        }
+               //XXX If unmapping extents then don't also need to unpin; except ...
+               //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and
+               //XXX unmap is subset, so save extra walk over file extents (and the risk
+               //XXX that CF drain starts) vs leaving partial units pinned to SSD
+               //XXX (until whatever was sharing also unmaps).  Err on cleaning up fully.
+               boolean_t will_unmap = (!ref->pinned || ref->cf) && (discard_end > discard_offset);
+               boolean_t will_unpin = (ref->pinned && ref->cf /* && !will_unmap */);
 
-        
-        if (discard_offset && discard_end && !ref->pinned)
+               (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (will_unpin) ? ref->filelength : 0);
+
+        if (will_unmap)
         {
-            (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end);
+            (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, (ref->cf) ? ref->filelength : discard_end);
         }
+
         if (addr && write_length)
         {
             (void) kern_write_file(ref, write_offset, addr, write_length, 0);
@@ -553,7 +620,10 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
         ref->vp = NULLVP;
         kprintf("vnode_close(%d)\n", error);
     }
-    vfs_context_rele(ref->ctx);
-    ref->ctx = NULL;
+    if (ref->ctx)
+    {
+       vfs_context_rele(ref->ctx);
+       ref->ctx = NULL;
+    }
     kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
 }
index c7978d82e01270e2bf692e3fa9baf59fb9ccb55c..8239a10fd792eea1060b071d99e3bb42c22b025c 100644 (file)
 #include <sys/user.h>
 #include <sys/aio_kern.h>
 #include <sys/reboot.h>
+#include <sys/memory_maintenance.h>
+#include <sys/priv.h>
 
 #include <security/audit/audit.h>
 #include <kern/kalloc.h>
 #include <kern/thread.h>
 #include <kern/processor.h>
 #include <kern/debug.h>
+#include <kern/sched_prim.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <mach/host_info.h>
@@ -240,9 +243,6 @@ STATIC int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg);
 STATIC int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg);
 STATIC int  sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg);
 STATIC int  sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg);
-#if CONFIG_LCTX
-STATIC int  sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg);
-#endif
 int sysdoproc_callback(proc_t p, void *arg);
 
 
@@ -294,6 +294,7 @@ STATIC int sysctl_sysctl_native(struct sysctl_oid *oidp, void *arg1, int arg2, s
 STATIC int sysctl_sysctl_cputype(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_safeboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_singleuser(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_minimalboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 STATIC int sysctl_slide(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 
 
@@ -459,6 +460,14 @@ sysctl_sched_stats_enable(__unused struct sysctl_oid *oidp, __unused void *arg1,
 
 SYSCTL_PROC(_kern, OID_AUTO, sched_stats_enable, CTLFLAG_LOCKED | CTLFLAG_WR, 0, 0, sysctl_sched_stats_enable, "-", "");
 
+extern uint32_t sched_debug_flags;
+SYSCTL_INT(_debug, OID_AUTO, sched, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_debug_flags, 0, "scheduler debug");
+
+#if (DEBUG || DEVELOPMENT)
+extern boolean_t doprnt_hide_pointers;
+SYSCTL_INT(_debug, OID_AUTO, hide_kernel_pointers, CTLFLAG_RW | CTLFLAG_LOCKED, &doprnt_hide_pointers, 0, "hide kernel pointers from log");
+#endif
+
 extern int get_kernel_symfile(proc_t, char **);
 
 #if COUNT_SYSCALLS
@@ -662,18 +671,6 @@ sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg)
                return(1);
 }
 
-#if CONFIG_LCTX
-STATIC int
-sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg)
-{
-       if ((p->p_lctx == NULL) ||
-               (p->p_lctx->lc_id != (pid_t)*(int*)arg))
-               return(0);
-       else
-               return(1);
-}
-#endif
-
 /*
  * try over estimating by 5 procs
  */
@@ -779,11 +776,6 @@ sysctl_prochandle SYSCTL_HANDLER_ARGS
                        ruidcheck = 1;
                        break;
 
-#if CONFIG_LCTX
-               case KERN_PROC_LCID:
-                       filterfn = sysdoproc_filt_KERN_PROC_LCID;
-                       break;
-#endif
                case KERN_PROC_ALL:
                        break;
 
@@ -914,10 +906,6 @@ fill_user32_eproc(proc_t p, struct user32_eproc *__restrict ep)
                if (sessp != SESSION_NULL && sessp->s_ttyvp)
                        ep->e_flag = EPROC_CTTY;
        }
-#if CONFIG_LCTX
-       if (p->p_lctx)
-               ep->e_lcid = p->p_lctx->lc_id;
-#endif
        ep->e_ppid = p->p_ppid;
        if (p->p_ucred) {
                my_cred = kauth_cred_proc_ref(p);
@@ -974,10 +962,6 @@ fill_user64_eproc(proc_t p, struct user64_eproc *__restrict ep)
                if (sessp != SESSION_NULL && sessp->s_ttyvp)
                        ep->e_flag = EPROC_CTTY;
        }
-#if CONFIG_LCTX
-       if (p->p_lctx)
-               ep->e_lcid = p->p_lctx->lc_id;
-#endif
        ep->e_ppid = p->p_ppid;
        if (p->p_ucred) {
                my_cred = kauth_cred_proc_ref(p);
@@ -1164,8 +1148,8 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS
        case KERN_KDSETREG:
        case KERN_KDGETREG:
        case KERN_KDREADTR:
-        case KERN_KDWRITETR:
-        case KERN_KDWRITEMAP:
+       case KERN_KDWRITETR:
+       case KERN_KDWRITEMAP:
        case KERN_KDPIDTR:
        case KERN_KDTHRMAP:
        case KERN_KDPIDEX:
@@ -1176,9 +1160,12 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS
        case KERN_KDDISABLE_BG_TRACE:
        case KERN_KDREADCURTHRMAP:
        case KERN_KDSET_TYPEFILTER:
-        case KERN_KDBUFWAIT:
+       case KERN_KDBUFWAIT:
        case KERN_KDCPUMAP:
-
+       case KERN_KDWAIT_BG_TRACE_RESET:
+       case KERN_KDSET_BG_TYPEFILTER:
+       case KERN_KDWRITEMAP_V3:
+       case KERN_KDWRITETR_V3:
                ret = kdbg_control(name, namelen, oldp, oldlenp);
                break;
        default:
@@ -1386,7 +1373,7 @@ sysctl_procargsx(int *name, u_int namelen, user_addr_t where,
                return(EINVAL);
 
 
-       ret = kmem_alloc(kernel_map, &copy_start, round_page(arg_size));
+       ret = kmem_alloc(kernel_map, &copy_start, round_page(arg_size), VM_KERN_MEMORY_BSD);
        if (ret != KERN_SUCCESS) {
                vm_map_deallocate(proc_map);
                return(ENOMEM);
@@ -1424,6 +1411,20 @@ sysctl_procargsx(int *name, u_int namelen, user_addr_t where,
                size = arg_size;
        }
 
+       /*
+        * When these sysctls were introduced, the first string in the strings
+        * section was just the bare path of the executable.  However, for security
+        * reasons we now prefix this string with executable_path= so it can be
+        * parsed getenv style.  To avoid binary compatability issues with exising
+        * callers of this sysctl, we strip it off here if present.
+        * (rdar://problem/13746466)
+        */
+#define        EXECUTABLE_KEY "executable_path="
+       if (strncmp(EXECUTABLE_KEY, data, strlen(EXECUTABLE_KEY)) == 0){
+               data += strlen(EXECUTABLE_KEY);
+               size -= strlen(EXECUTABLE_KEY);
+       }
+
        if (argc_yes) {
                /* Put processes argc as the first word in the copyout buffer */
                suword(where, p->p_argc);
@@ -1856,6 +1857,10 @@ SYSCTL_INT(_kern, OID_AUTO, ignore_is_ssd,
                CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
                &ignore_is_ssd, 0, "");
 
+SYSCTL_INT(_kern, OID_AUTO, root_is_CF_drive,
+               CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+               &root_is_CF_drive, 0, "");
+
 SYSCTL_UINT(_kern, OID_AUTO, preheat_max_bytes, 
                CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
                &preheat_max_bytes, 0, "");
@@ -2597,6 +2602,16 @@ SYSCTL_PROC(_kern, OID_AUTO, singleuser,
                CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
                0, 0, sysctl_singleuser, "I", "");
 
+STATIC int sysctl_minimalboot
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       return sysctl_io_number(req, minimalboot, sizeof(int), NULL, NULL);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, minimalboot,
+               CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
+               0, 0, sysctl_minimalboot, "I", "");
+
 /*
  * Controls for debugging affinity sets - see osfmk/kern/affinity.c
  */
@@ -2666,8 +2681,12 @@ SYSCTL_INT(_vm, OID_AUTO, vm_page_filecache_min, CTLFLAG_RW | CTLFLAG_LOCKED, &v
 
 extern int     vm_compressor_mode;
 extern int     vm_compressor_is_active;
+extern int     vm_compressor_available;
+extern uint32_t        vm_ripe_target_age;
 extern uint32_t        swapout_target_age;
 extern int64_t  compressor_bytes_used;
+extern int64_t  c_segment_input_bytes;
+extern int64_t  c_segment_compressed_bytes;
 extern uint32_t        compressor_eval_period_in_msecs;
 extern uint32_t        compressor_sample_min_in_msecs;
 extern uint32_t        compressor_sample_max_in_msecs;
@@ -2678,10 +2697,16 @@ extern uint32_t vm_compressor_majorcompact_threshold_divisor;
 extern uint32_t        vm_compressor_unthrottle_threshold_divisor;
 extern uint32_t        vm_compressor_catchup_threshold_divisor;
 
+SYSCTL_QUAD(_vm, OID_AUTO, compressor_input_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_input_bytes, "");
+SYSCTL_QUAD(_vm, OID_AUTO, compressor_compressed_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, &c_segment_compressed_bytes, "");
+SYSCTL_QUAD(_vm, OID_AUTO, compressor_bytes_used, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_bytes_used, "");
+
 SYSCTL_INT(_vm, OID_AUTO, compressor_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_mode, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, compressor_is_active, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_is_active, 0, "");
-SYSCTL_QUAD(_vm, OID_AUTO, compressor_bytes_used, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_bytes_used, "");
 SYSCTL_INT(_vm, OID_AUTO, compressor_swapout_target_age, CTLFLAG_RD | CTLFLAG_LOCKED, &swapout_target_age, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, compressor_available, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_available, 0, "");
+
+SYSCTL_INT(_vm, OID_AUTO, vm_ripe_target_age_in_secs, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_ripe_target_age, 0, "");
 
 SYSCTL_INT(_vm, OID_AUTO, compressor_eval_period_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_eval_period_in_msecs, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, compressor_sample_min_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_sample_min_in_msecs, 0, "");
@@ -2709,12 +2734,12 @@ SYSCTL_INT(_vm, OID_AUTO, phantom_cache_thrashing_threshold_ssd, CTLFLAG_RW | CT
 #if (DEVELOPMENT || DEBUG)
 
 SYSCTL_UINT(_vm, OID_AUTO, vm_page_creation_throttled_hard,
-           CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
-           &vm_page_creation_throttled_hard, 0, "");
+               CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
+               &vm_page_creation_throttled_hard, 0, "");
 
 SYSCTL_UINT(_vm, OID_AUTO, vm_page_creation_throttled_soft,
-           CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
-           &vm_page_creation_throttled_soft, 0, "");
+               CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
+               &vm_page_creation_throttled_soft, 0, "");
 
 #endif /* DEVELOPMENT || DEBUG */
 
@@ -2747,13 +2772,6 @@ SYSCTL_INT(_kern, OID_AUTO, ipc_portbt,
  * Scheduler sysctls
  */
 
-/*
- * See osfmk/kern/sched_prim.c for the corresponding definition
- * in osfmk/. If either version changes, update the other.
- */
-#define SCHED_STRING_MAX_LENGTH (48)
-
-extern char sched_string[SCHED_STRING_MAX_LENGTH];
 SYSCTL_STRING(_kern, OID_AUTO, sched,
                          CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
                          sched_string, sizeof(sched_string),
@@ -2944,3 +2962,5 @@ SYSCTL_INT(_kern, OID_AUTO, hv_support,
                CTLFLAG_KERN | CTLFLAG_RD | CTLFLAG_LOCKED, 
                &hv_support_available, 0, "");
 #endif
+
+
diff --git a/bsd/kern/kern_tests.c b/bsd/kern/kern_tests.c
deleted file mode 100644 (file)
index 8499a35..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/***************************************************************
- *                 Test Declarations Go Here                  *
- ***************************************************************/
-#include <pexpert/pexpert.h>
-#include <sys/sysctl.h>
-#include <kern/debug.h>
-#include <sys/kern_tests.h>
-
-/***************************************************************
- *                 End Test Declarations                      *
- ***************************************************************/
-typedef int (*xnu_test_func_t)(void);
-
-typedef struct xnu_test {
-       xnu_test_func_t t_func;
-       const char *t_name;
-} xnu_test_t;
-
-#define DEFINE_XNU_TEST(func) { func, #func }
-
-xnu_test_t xnu_tests[] = {
-};
-
-#define NUM_XNU_TESTS (sizeof(xnu_tests) / sizeof(xnu_test_t))
-
-static int
-run_xnu_tests
-(struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
-{
-       unsigned i;
-       int result;
-
-       for (i = 0; i < NUM_XNU_TESTS; i++) {
-               result = xnu_tests[i].t_func();
-               if (result == 0) {
-                       kprintf("xnu_tests: %s passed.\n", xnu_tests[i].t_name);
-               } else{
-                       panic("xnu_tests: %s failed.\n", xnu_tests[i].t_name);
-               } 
-       }
-
-       return sysctl_handle_int(oidp, NULL, 0, req);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, kern_tests,
-               CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
-               0, 0, run_xnu_tests, "I", "");
-
index fc94a14e09f171f30e9eb5ffcdf62e3a2493dda6..dde94f5a20aeea88538e48a20ee874e751472129 100644 (file)
@@ -88,7 +88,7 @@
 int
 reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retval)
 {
-       char command[64];
+       char message[128];
        int error=0;
        size_t dummy=0;
 #if CONFIG_MACF
@@ -97,14 +97,23 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retva
 
        AUDIT_ARG(cmd, uap->opt);
 
-       command[0] = '\0';
+       message[0] = '\0';
 
        if ((error = suser(kauth_cred_get(), &p->p_acflag)))
                return(error);  
        
        if (uap->opt & RB_COMMAND)
-               error = copyinstr(uap->command,
-                                       (void *)command, sizeof(command), (size_t *)&dummy);
+                return ENOSYS;
+
+        if (uap->opt & RB_PANIC) {
+#if !(DEVELOPMENT || DEBUG)
+               if (p != initproc) {
+                        return EPERM;
+                }
+#endif
+               error = copyinstr(uap->command, (void *)message, sizeof(message), (size_t *)&dummy);
+        }
+
 #if CONFIG_MACF
        if (error)
                return (error);
@@ -114,7 +123,7 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retva
 #endif
        if (!error) {
                OSBitOrAtomic(P_REBOOT, &p->p_flag);  /* No more signals for this proc */
-               error = boot(RB_BOOT, uap->opt, command);
+               error = reboot_kernel(uap->opt, message);
        }
        return(error);
 }
index faaf98c299cce57a010631706eb6d0102d9a71d2..c40ca8189d68159dc65172634f78d3bf286aece7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -320,7 +320,7 @@ errno_t     mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf)
 errno_t mbuf_prepend(mbuf_t *orig, size_t len, mbuf_how_t how)
 {
        /* Must set *orig to NULL in failure case */
-       *orig = m_prepend_2(*orig, len, how);
+       *orig = m_prepend_2(*orig, len, how, 0);
        
        return (*orig == NULL) ? ENOMEM : 0;
 }
@@ -1369,3 +1369,16 @@ mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area, size_t *area_len)
        *area_len = m_scratch_get(m, area);
        return (0);
 }
+
+errno_t
+mbuf_get_unsent_data_bytes(const mbuf_t m, u_int32_t *unsent_data)
+{
+       if (m == NULL || unsent_data == NULL || !(m->m_flags & M_PKTHDR))
+               return (EINVAL);
+
+       if (!(m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA))
+               return (EINVAL);
+
+       *unsent_data = m->m_pkthdr.pkt_unsent_databytes;
+       return (0);
+}
index 07c65bf39d18172c503812d1fb132c3765e25538..09818e3b0ec0c57badc311e661da5992b4159164 100644 (file)
@@ -1038,14 +1038,14 @@ sock_set_tcp_stream_priority(socket_t sock)
  * Caller must have ensured socket is valid and won't be going away.
  */
 void
-socket_set_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
+socket_set_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
 {
-       (void) OSBitOrAtomic(flags, &sock->so_traffic_mgt_flags);
+       (void) OSBitOrAtomic8(flags, &sock->so_traffic_mgt_flags);
        sock_set_tcp_stream_priority(sock);
 }
 
 void
-socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags)
+socket_set_traffic_mgt_flags(socket_t sock, u_int8_t flags)
 {
        socket_lock(sock, 1);
        socket_set_traffic_mgt_flags_locked(sock, flags);
@@ -1056,14 +1056,14 @@ socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags)
  * Caller must have ensured socket is valid and won't be going away.
  */
 void
-socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
+socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int8_t flags)
 {
-       (void) OSBitAndAtomic(~flags, &sock->so_traffic_mgt_flags);
+       (void) OSBitAndAtomic8(~flags, &sock->so_traffic_mgt_flags);
        sock_set_tcp_stream_priority(sock);
 }
 
 void
-socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags)
+socket_clear_traffic_mgt_flags(socket_t sock, u_int8_t flags)
 {
        socket_lock(sock, 1);
        socket_clear_traffic_mgt_flags_locked(sock, flags);
index ac9dfcb1c5c5f41b02f97446b16fb3339dbd6a4b..bdebcc1825d875e628e53c11d7f8d3f90144d8a4 100644 (file)
@@ -1256,12 +1256,16 @@ sflt_register(const struct sflt_filter *filter, int domain, int type,
        struct socket_filter *sock_filt = NULL;
        struct socket_filter *match = NULL;
        int error = 0;
-       struct protosw *pr = pffindproto(domain, protocol, type);
+       struct protosw *pr;
        unsigned int len;
        struct socket *so;
        struct inpcb *inp;
        struct solist *solisthead = NULL, *solist = NULL;
 
+       if ((domain != PF_INET) && (domain != PF_INET6))
+               return (ENOTSUP);
+
+       pr = pffindproto(domain, protocol, type);
        if (pr == NULL)
                return (ENOENT);
 
index 81419cb5fd88f93f29a8f64aee91859ff4028569..b5666e88155c142c2501c6bfda80e9c5889b7b34 100644 (file)
@@ -89,6 +89,8 @@
 extern pmap_t  pmap_create(ledger_t ledger, vm_map_size_t size,
                                boolean_t is_64bit);
 
+extern kern_return_t machine_thread_neon_state_initialize(thread_t thread);
+
 /* XXX should have prototypes in a shared header file */
 extern int     get_map_nentries(vm_map_t);
 
@@ -112,6 +114,7 @@ static load_result_t load_result_null = {
        .validentry = 0,
        .using_lcmain = 0,
        .csflags = 0,
+       .has_pagezero = 0,
        .uuid = { 0 },
        .min_vm_addr = MACH_VM_MAX_ADDRESS,
        .max_vm_addr = MACH_VM_MIN_ADDRESS,
@@ -171,9 +174,10 @@ set_code_unprotect(
        caddr_t                         addr,
        vm_map_t                        map,
        int64_t                         slide,
-       struct vnode            *vp,
+       struct vnode                    *vp,
+       off_t                           macho_offset,
        cpu_type_t                      cputype,
-       cpu_subtype_t           cpusubtype);
+       cpu_subtype_t                   cpusubtype);
 #endif
 
 static
@@ -286,6 +290,7 @@ note_all_image_info_section(const struct segment_command_64 *scp,
        }
 }
 
+
 load_return_t
 load_machfile(
        struct image_params     *imgp,
@@ -344,15 +349,19 @@ load_machfile(
                }
                pmap = pmap_create(get_task_ledger(ledger_task),
                                   (vm_map_size_t) 0,
-                                  (imgp->ip_flags & IMGPF_IS_64BIT));
+                                  ((imgp->ip_flags & IMGPF_IS_64BIT) != 0));
                pal_switch_pmap(thread, pmap, imgp->ip_flags & IMGPF_IS_64BIT);
                map = vm_map_create(pmap,
                                0,
-                               vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)),
+                               vm_compute_max_offset(((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT)),
                                TRUE);
        } else
                map = new_map;
 
+#if   (__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS)
+       /* enforce 16KB alignment for watch targets with new ABI */
+       vm_map_set_page_shift(map, SIXTEENK_PAGE_SHIFT);
+#endif /* __arm64__ */
 
 #ifndef        CONFIG_ENFORCE_SIGNED_CODE
        /* This turns off faulting for executable pages, which allows
@@ -412,12 +421,14 @@ load_machfile(
        /*
         * Check to see if the page zero is enforced by the map->min_offset.
         */ 
-       if (enforce_hard_pagezero && (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) {
-               if (create_map) {
-                       vm_map_deallocate(map); /* will lose pmap reference too */
+       if (enforce_hard_pagezero &&
+           (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) {
+               {
+                       if (create_map) {
+                               vm_map_deallocate(map); /* will lose pmap reference too */
+                       }
+                       return (LOAD_BADMACHO);
                }
-               printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings);
-               return (LOAD_BADMACHO);
        }
 
        /*
@@ -464,6 +475,8 @@ load_machfile(
                        workqueue_mark_exiting(p);
                        task_complete_halt(task);
                        workqueue_exit(p);
+                       kqueue_dealloc(p->p_wqkqueue);
+                       p->p_wqkqueue = NULL;
                }
                old_map = swap_task_map(old_task, thread, map, !spawn);
                vm_map_deallocate(old_map);
@@ -471,6 +484,14 @@ load_machfile(
        return(LOAD_SUCCESS);
 }
 
+int macho_printf = 0;
+#define MACHO_PRINTF(args)                             \
+       do {                                            \
+               if (macho_printf) {                     \
+                       printf args;                    \
+               }                                       \
+       } while (0)
+
 /*
  * The file size of a mach-o file is limited to 32 bits; this is because
  * this is the limit on the kalloc() of enough bytes for a mach_header and
@@ -511,7 +532,7 @@ parse_machfile(
        int                     pass;
        proc_t                  p = current_proc();             /* XXXX */
        int                     error;
-       int resid=0;
+       int                     resid = 0;
        size_t                  mach_header_sz = sizeof(struct mach_header);
        boolean_t               abi64;
        boolean_t               got_code_signatures = FALSE;
@@ -613,12 +634,20 @@ parse_machfile(
         /*
         *  Scan through the commands, processing each one as necessary.
         *  We parse in three passes through the headers:
+        *  0: determine if TEXT and DATA boundary can be page-aligned
         *  1: thread state, uuid, code signature
         *  2: segments
         *  3: dyld, encryption, check entry point
         */
        
-       for (pass = 1; pass <= 3; pass++) {
+       for (pass = 0; pass <= 3; pass++) {
+
+               if (pass == 0) {
+                       /* see if we need to adjust the slide to re-align... */
+                       /* no re-alignment needed on X86_64 or ARM32 kernel */
+                       continue;
+               } else if (pass == 1) {
+               }
 
                /*
                 * Check that the entry point is contained in an executable segments
@@ -667,6 +696,10 @@ parse_machfile(
                         */
                        switch(lcp->cmd) {
                        case LC_SEGMENT:
+                               if (pass == 0) {
+                                       break;
+                               }
+
                                if (pass != 2)
                                        break;
 
@@ -819,7 +852,7 @@ parse_machfile(
                                        break;
                                ret = set_code_unprotect(
                                        (struct encryption_info_command *) lcp,
-                                       addr, map, slide, vp,
+                                       addr, map, slide, vp, file_offset,
                                        header->cputype, header->cpusubtype);
                                if (ret != LOAD_SUCCESS) {
                                        printf("proc %d: set_code_unprotect() error %d "
@@ -836,7 +869,7 @@ parse_machfile(
                                                proc_lock(p);
                                                p->p_lflag |= P_LTERM_DECRYPTFAIL;
                                                proc_unlock(p);
-                                       }
+                                        }
                                         psignal(p, SIGKILL);
                                }
                                break;
@@ -858,34 +891,34 @@ parse_machfile(
                        if (cs_enforcement(NULL)) {
                                ret = LOAD_FAILURE;
                        } else {
-                               /*
-                                * No embedded signatures: look for detached by taskgated,
-                                * this is only done on OSX, on embedded platforms we expect everything
-                                * to be have embedded signatures.
-                                */
+                               /*
+                                * No embedded signatures: look for detached by taskgated,
+                                * this is only done on OSX, on embedded platforms we expect everything
+                                * to be have embedded signatures.
+                                */
                                struct cs_blob *blob;
 
                                blob = ubc_cs_blob_get(vp, -1, file_offset);
                                if (blob != NULL) {
-                                   unsigned int cs_flag_data = blob->csb_flags;
-                                   if(0 != ubc_cs_generation_check(vp)) {
-                                       if (0 != ubc_cs_blob_revalidate(vp, blob, 0)) {
-                                               /* clear out the flag data if revalidation fails */
-                                               cs_flag_data = 0;
-                                               result->csflags &= ~CS_VALID;
+                                       unsigned int cs_flag_data = blob->csb_flags;
+                                       if(0 != ubc_cs_generation_check(vp)) {
+                                               if (0 != ubc_cs_blob_revalidate(vp, blob, 0)) {
+                                                       /* clear out the flag data if revalidation fails */
+                                                       cs_flag_data = 0;
+                                                       result->csflags &= ~CS_VALID;
+                                               }
                                        }
-                                   }
-                                   /* get flags to be applied to the process */
-                                   result->csflags |= cs_flag_data;
+                                       /* get flags to be applied to the process */
+                                       result->csflags |= cs_flag_data;
                                }
                        }
                }
 
                /* Make sure if we need dyld, we got it */
-               if ((ret == LOAD_SUCCESS) && result->needs_dynlinker && !dlp) {
+               if (result->needs_dynlinker && !dlp) {
                        ret = LOAD_FAILURE;
                }
-               
+
                if ((ret == LOAD_SUCCESS) && (dlp != 0)) {
                        /*
                         * load the dylinker, and slide it by the independent DYLD ASLR
@@ -910,7 +943,7 @@ parse_machfile(
 
 #if CONFIG_CODE_DECRYPTION
 
-#define        APPLE_UNPROTECTED_HEADER_SIZE   (3 * PAGE_SIZE_64)
+#define        APPLE_UNPROTECTED_HEADER_SIZE   (3 * 4096)
 
 static load_return_t
 unprotect_dsmos_segment(
@@ -953,9 +986,20 @@ unprotect_dsmos_segment(
                crypt_info.crypt_end = NULL;
 #pragma unused(vp, macho_offset)
                crypt_info.crypt_ops = (void *)0x2e69cf40;
+               vm_map_offset_t crypto_backing_offset;
+               crypto_backing_offset = -1; /* i.e. use map entry's offset */
+#if DEVELOPMENT || DEBUG
+               struct proc *p;
+               p = current_proc();
+               printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s)\n",
+                      p->p_pid, p->p_comm, map,
+                      (uint64_t) map_addr, (uint64_t) (map_addr + map_size),
+                      __FUNCTION__, vp->v_name);
+#endif /* DEVELOPMENT || DEBUG */
                kr = vm_map_apple_protected(map,
                                            map_addr,
                                            map_addr + map_size,
+                                           crypto_backing_offset,
                                            &crypt_info);
        }
 
@@ -979,29 +1023,166 @@ unprotect_dsmos_segment(
 }
 #endif /* CONFIG_CODE_DECRYPTION */
 
+
+/*
+ * map_segment:
+ *     Maps a Mach-O segment, taking care of mis-alignment (wrt the system
+ *     page size) issues.
+ * 
+ *     The mapping might result in 1, 2 or 3 map entries:
+ *     1. for the first page, which could be overlap with the previous
+ *        mapping,
+ *     2. for the center (if applicable),
+ *     3. for the last page, which could overlap with the next mapping.
+ *
+ *     For each of those map entries, we might have to interpose a
+ *     "fourk_pager" to deal with mis-alignment wrt the system page size,
+ *     either in the mapping address and/or size or the file offset and/or
+ *     size.
+ *     The "fourk_pager" itself would be mapped with proper alignment
+ *     wrt the system page size and would then be populated with the
+ *     information about the intended mapping, with a "4KB" granularity.
+ */
+static kern_return_t
+map_segment(
+       vm_map_t                map,
+       vm_map_offset_t         vm_start,
+       vm_map_offset_t         vm_end,
+       memory_object_control_t control,
+       vm_map_offset_t         file_start,
+       vm_map_offset_t         file_end,
+       vm_prot_t               initprot,
+       vm_prot_t               maxprot)
+{
+       int             extra_vm_flags, cur_extra_vm_flags;
+       vm_map_offset_t cur_offset, cur_start, cur_end;
+       kern_return_t   ret;
+       vm_map_offset_t effective_page_mask;
+       
+       if (vm_end < vm_start ||
+           file_end < file_start) {
+               return LOAD_BADMACHO;
+       }
+       if (vm_end == vm_start ||
+           file_end == file_start) {
+               /* nothing to map... */
+               return LOAD_SUCCESS;
+       }
+
+       effective_page_mask = MAX(PAGE_MASK, vm_map_page_mask(map));
+
+       extra_vm_flags = 0;
+       if (vm_map_page_aligned(vm_start, effective_page_mask) &&
+           vm_map_page_aligned(vm_end, effective_page_mask) &&
+           vm_map_page_aligned(file_start, effective_page_mask) &&
+           vm_map_page_aligned(file_end, effective_page_mask)) {
+               /* all page-aligned and map-aligned: proceed */
+       } else {
+               panic("map_segment: unexpected mis-alignment "
+                     "vm[0x%llx:0x%llx] file[0x%llx:0x%llx]\n",
+                     (uint64_t) vm_start,
+                     (uint64_t) vm_end,
+                     (uint64_t) file_start,
+                     (uint64_t) file_end);
+       }
+
+       cur_offset = 0;
+       cur_start = vm_start;
+       cur_end = vm_start;
+       if (cur_end >= vm_start + (file_end - file_start)) {
+               /* all mapped: done */
+               goto done;
+       }
+       if (vm_map_round_page(cur_end, effective_page_mask) >=
+           vm_map_trunc_page(vm_start + (file_end - file_start),
+                             effective_page_mask)) {
+               /* no middle */
+       } else {
+               cur_start = cur_end;
+               if ((vm_start & effective_page_mask) !=
+                   (file_start & effective_page_mask)) {
+                       /* one 4K pager for the middle */
+                       cur_extra_vm_flags = extra_vm_flags;
+               } else {
+                       /* regular mapping for the middle */
+                       cur_extra_vm_flags = 0;
+               }
+               cur_end = vm_map_trunc_page(vm_start + (file_end -
+                                                       file_start),
+                                           effective_page_mask);
+               if (control != MEMORY_OBJECT_CONTROL_NULL) {
+                       ret = vm_map_enter_mem_object_control(
+                               map,
+                               &cur_start,
+                               cur_end - cur_start,
+                               (mach_vm_offset_t)0,
+                               VM_FLAGS_FIXED | cur_extra_vm_flags,
+                               control,
+                               file_start + cur_offset,
+                               TRUE, /* copy */
+                               initprot, maxprot,
+                               VM_INHERIT_DEFAULT);
+               } else {
+                       ret = vm_map_enter_mem_object(
+                               map,
+                               &cur_start,
+                               cur_end - cur_start,
+                               (mach_vm_offset_t)0,
+                               VM_FLAGS_FIXED | cur_extra_vm_flags,
+                               IPC_PORT_NULL,
+                               0, /* offset */
+                               TRUE, /* copy */
+                               initprot, maxprot,
+                               VM_INHERIT_DEFAULT);
+               }
+               if (ret != KERN_SUCCESS) {
+                       return (LOAD_NOSPACE);
+               }
+               cur_offset += cur_end - cur_start;
+       }
+       if (cur_end >= vm_start + (file_end - file_start)) {
+               /* all mapped: done */
+               goto done;
+       }
+       cur_start = cur_end;
+done:
+       assert(cur_end >= vm_start + (file_end - file_start));
+       return LOAD_SUCCESS;
+}
+
 static
 load_return_t
 load_segment(
-       struct load_command             *lcp,
-       uint32_t                        filetype,
-       void *                          control,
-       off_t                           pager_offset,
-       off_t                           macho_size,
-       struct vnode                    *vp,
-       vm_map_t                        map,
-       int64_t                         slide,
-       load_result_t           *result
-)
+       struct load_command     *lcp,
+       uint32_t                filetype,
+       void *                  control,
+       off_t                   pager_offset,
+       off_t                   macho_size,
+       struct vnode            *vp,
+       vm_map_t                map,
+       int64_t                 slide,
+       load_result_t           *result)
 {
        struct segment_command_64 segment_command, *scp;
        kern_return_t           ret;
-       vm_map_offset_t         map_addr, map_offset;
-       vm_map_size_t           map_size, seg_size, delta_size;
+       vm_map_size_t           delta_size;
        vm_prot_t               initprot;
        vm_prot_t               maxprot;
        size_t                  segment_command_size, total_section_size,
                                single_section_size;
-       
+       vm_map_offset_t         file_offset, file_size;
+       vm_map_offset_t         vm_offset, vm_size;
+       vm_map_offset_t         vm_start, vm_end, vm_end_aligned;
+       vm_map_offset_t         file_start, file_end;
+       kern_return_t           kr;
+       boolean_t               verbose;
+       vm_map_size_t           effective_page_size;
+       vm_map_offset_t         effective_page_mask;
+
+       effective_page_size = MAX(PAGE_SIZE, vm_map_page_size(map));
+       effective_page_mask = MAX(PAGE_MASK, vm_map_page_mask(map));
+
+       verbose = FALSE;
        if (LC_SEGMENT_64 == lcp->cmd) {
                segment_command_size = sizeof(struct segment_command_64);
                single_section_size  = sizeof(struct section_64);
@@ -1013,31 +1194,57 @@ load_segment(
                return (LOAD_BADMACHO);
        total_section_size = lcp->cmdsize - segment_command_size;
 
-       if (LC_SEGMENT_64 == lcp->cmd)
+       if (LC_SEGMENT_64 == lcp->cmd) {
                scp = (struct segment_command_64 *)lcp;
-       else {
+       else {
                scp = &segment_command;
                widen_segment_command((struct segment_command *)lcp, scp);
        }
 
+       if (verbose) {
+               MACHO_PRINTF(("+++ load_segment %s "
+                             "vm[0x%llx:0x%llx] file[0x%llx:0x%llx] "
+                             "prot %d/%d flags 0x%x\n",
+                             scp->segname,
+                             (uint64_t)(slide + scp->vmaddr),
+                             (uint64_t)(slide + scp->vmaddr + scp->vmsize),
+                             pager_offset + scp->fileoff,
+                             pager_offset + scp->fileoff + scp->filesize,
+                             scp->initprot,
+                             scp->maxprot,
+                             scp->flags));
+       }
+
        /*
         * Make sure what we get from the file is really ours (as specified
         * by macho_size).
         */
        if (scp->fileoff + scp->filesize < scp->fileoff ||
-           scp->fileoff + scp->filesize > (uint64_t)macho_size)
+           scp->fileoff + scp->filesize > (uint64_t)macho_size) {
                return (LOAD_BADMACHO);
+       }
        /*
         * Ensure that the number of sections specified would fit
         * within the load command size.
         */
-       if (total_section_size / single_section_size < scp->nsects)
+       if (total_section_size / single_section_size < scp->nsects) {
                return (LOAD_BADMACHO);
+       }
        /*
         * Make sure the segment is page-aligned in the file.
         */
-       if ((scp->fileoff & PAGE_MASK_64) != 0)
+       file_offset = pager_offset + scp->fileoff;      /* limited to 32 bits */
+       file_size = scp->filesize;
+       if ((file_offset & PAGE_MASK_64) != 0 ||
+               /* we can't mmap() it if it's not page-aligned in the file */
+           (file_offset & vm_map_page_mask(map)) != 0) {
+               /*
+                * The 1st test would have failed if the system's page size
+                * was what this process believe is the page size, so let's
+                * fail here too for the sake of consistency.
+                */
                return (LOAD_BADMACHO);
+       }
 
        /*
         * If we have a code signature attached for this slice
@@ -1053,21 +1260,14 @@ load_segment(
                return LOAD_BADMACHO;
        }
 
-       /*
-        *      Round sizes to page size.
-        */
-       seg_size = round_page_64(scp->vmsize);
-       map_size = round_page_64(scp->filesize);
-       map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */
-
-       seg_size = vm_map_round_page(seg_size, vm_map_page_mask(map));
-       map_size = vm_map_round_page(map_size, vm_map_page_mask(map));
-
-       if (seg_size == 0)
-               return (KERN_SUCCESS);
-       if (map_addr == 0 &&
-           map_size == 0 &&
-           seg_size != 0 &&
+       vm_offset = scp->vmaddr + slide;
+       vm_size = scp->vmsize;
+
+       if (vm_size == 0)
+               return (LOAD_SUCCESS);
+       if (scp->vmaddr == 0 &&
+           file_size == 0 &&
+           vm_size != 0 &&
            (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE &&
            (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) {
                /*
@@ -1076,9 +1276,6 @@ load_segment(
                 * between the end of page zero and the beginning of the first
                 * slid segment.
                 */
-               seg_size += slide;
-               slide = 0;
-
                /*
                 * This is a "page zero" segment:  it starts at address 0,
                 * is not mapped from the binary file and is not accessible.
@@ -1086,53 +1283,89 @@ load_segment(
                 * make it completely off limits by raising the VM map's
                 * minimum offset.
                 */
-               ret = vm_map_raise_min_offset(map, seg_size);
+               vm_end = vm_offset + vm_size;
+               if (vm_end < vm_offset) {
+                       return (LOAD_BADMACHO);
+               }
+               if (verbose) {
+                       MACHO_PRINTF(("++++++ load_segment: "
+                                     "page_zero up to 0x%llx\n",
+                                     (uint64_t) vm_end));
+               }
+               {
+                       vm_end = vm_map_round_page(vm_end,
+                                                  PAGE_MASK_64);
+                       vm_end_aligned = vm_end;
+               }
+               ret = vm_map_raise_min_offset(map,
+                                             vm_end_aligned);
+                       
                if (ret != KERN_SUCCESS) {
                        return (LOAD_FAILURE);
                }
                return (LOAD_SUCCESS);
+       } else {
        }
 
-       /* If a non-zero slide was specified by the caller, apply now */
-       map_addr += slide;
+       {
+               file_start = vm_map_trunc_page(file_offset,
+                                              effective_page_mask);
+               file_end = vm_map_round_page(file_offset + file_size,
+                                            effective_page_mask);
+               vm_start = vm_map_trunc_page(vm_offset,
+                                            effective_page_mask);
+               vm_end = vm_map_round_page(vm_offset + vm_size,
+                                          effective_page_mask);
+       }
 
-       if (map_addr < result->min_vm_addr)
-               result->min_vm_addr = map_addr;
-       if (map_addr+seg_size > result->max_vm_addr)
-               result->max_vm_addr = map_addr+seg_size;
+       if (vm_start < result->min_vm_addr)
+               result->min_vm_addr = vm_start;
+       if (vm_end > result->max_vm_addr)
+               result->max_vm_addr = vm_end;
 
        if (map == VM_MAP_NULL)
                return (LOAD_SUCCESS);
 
-       map_offset = pager_offset + scp->fileoff;       /* limited to 32 bits */
-
-       if (map_size > 0) {
+       if (vm_size > 0) {
                initprot = (scp->initprot) & VM_PROT_ALL;
                maxprot = (scp->maxprot) & VM_PROT_ALL;
                /*
                 *      Map a copy of the file into the address space.
                 */
-               ret = vm_map_enter_mem_object_control(map,
-                               &map_addr, map_size, (mach_vm_offset_t)0,
-                               VM_FLAGS_FIXED, control, map_offset, TRUE,
-                               initprot, maxprot,
-                               VM_INHERIT_DEFAULT);
-               if (ret != KERN_SUCCESS) {
-                       return (LOAD_NOSPACE);
+               if (verbose) {
+                       MACHO_PRINTF(("++++++ load_segment: "
+                                     "mapping at vm [0x%llx:0x%llx] of "
+                                     "file [0x%llx:0x%llx]\n",
+                                     (uint64_t) vm_start,
+                                     (uint64_t) vm_end,
+                                     (uint64_t) file_start,
+                                     (uint64_t) file_end));
                }
-       
+               ret = map_segment(map,
+                                 vm_start,
+                                 vm_end,
+                                 control,
+                                 file_start,
+                                 file_end,
+                                 initprot,
+                                 maxprot);
+               if (ret) {
+                       return LOAD_NOSPACE;
+               }
+
+#if FIXME
                /*
                 *      If the file didn't end on a page boundary,
                 *      we need to zero the leftover.
                 */
                delta_size = map_size - scp->filesize;
-#if FIXME
                if (delta_size > 0) {
                        mach_vm_offset_t        tmp;
        
-                       ret = mach_vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE);
-                       if (ret != KERN_SUCCESS)
+                       ret = mach_vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE| VM_MAKE_TAG(VM_KERN_MEMORY_BSD));
+                       if (ret != KERN_SUCCESS) {
                                return(LOAD_RESOURCE);
+                       }
        
                        if (copyout(tmp, map_addr + scp->filesize,
                                                                delta_size)) {
@@ -1151,40 +1384,66 @@ load_segment(
         *      than the size from the file, we need to allocate
         *      zero fill memory for the rest.
         */
-       delta_size = seg_size - map_size;
+       if ((vm_end - vm_start) > (file_end - file_start)) {
+               delta_size = (vm_end - vm_start) - (file_end - file_start);
+       } else {
+               delta_size = 0;
+       }
        if (delta_size > 0) {
-               mach_vm_offset_t tmp = map_addr + map_size;
-
-               ret = mach_vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED,
-                                 NULL, 0, FALSE,
-                                 scp->initprot, scp->maxprot,
-                                 VM_INHERIT_DEFAULT);
-               if (ret != KERN_SUCCESS)
+               mach_vm_offset_t tmp;
+
+               tmp = vm_start + (file_end - file_start);
+               if (verbose) {
+                       MACHO_PRINTF(("++++++ load_segment: "
+                                     "delta mapping vm [0x%llx:0x%llx]\n",
+                                     (uint64_t) tmp,
+                                     (uint64_t) (tmp + delta_size)));
+               }
+               kr = map_segment(map,
+                                tmp,
+                                tmp + delta_size,
+                                MEMORY_OBJECT_CONTROL_NULL,
+                                0,
+                                delta_size,
+                                scp->initprot,
+                                scp->maxprot);
+               if (kr != KERN_SUCCESS) {
                        return(LOAD_NOSPACE);
+               }
        }
 
        if ( (scp->fileoff == 0) && (scp->filesize != 0) )
-               result->mach_header = map_addr;
+               result->mach_header = vm_offset;
 
        if (scp->flags & SG_PROTECTED_VERSION_1) {
-               ret = unprotect_dsmos_segment(scp->fileoff,
-                                       scp->filesize,
-                                       vp,
-                                       pager_offset,
-                                       map,
-                                       map_addr,
-                                       map_size);
+               ret = unprotect_dsmos_segment(file_start,
+                                             file_end - file_start,
+                                             vp,
+                                             pager_offset,
+                                             map,
+                                             vm_start,
+                                             vm_end - vm_start);
+               if (ret != LOAD_SUCCESS) {
+                       return ret;
+               }
        } else {
                ret = LOAD_SUCCESS;
        }
-       if (LOAD_SUCCESS == ret && filetype == MH_DYLINKER &&
-           result->all_image_info_addr == MACH_VM_MIN_ADDRESS)
+
+       if (LOAD_SUCCESS == ret &&
+           filetype == MH_DYLINKER &&
+           result->all_image_info_addr == MACH_VM_MIN_ADDRESS) {
                note_all_image_info_section(scp,
-                   LC_SEGMENT_64 == lcp->cmd, single_section_size,
-                   (const char *)lcp + segment_command_size, slide, result);
+                                           LC_SEGMENT_64 == lcp->cmd,
+                                           single_section_size,
+                                           ((const char *)lcp +
+                                            segment_command_size),
+                                           slide,
+                                           result);
+       }
 
        if (result->entry_point != MACH_VM_MIN_ADDRESS) {
-               if ((result->entry_point >= map_addr) && (result->entry_point < (map_addr + map_size))) {
+               if ((result->entry_point >= vm_offset) && (result->entry_point < (vm_offset + vm_size))) {
                        if ((scp->initprot & (VM_PROT_READ|VM_PROT_EXECUTE)) == (VM_PROT_READ|VM_PROT_EXECUTE)) {
                                result->validentry = 1;
                        } else {
@@ -1274,6 +1533,7 @@ load_main(
                return(LOAD_FAILURE);
        }
 
+
        result->unixproc = TRUE;
        result->thread_count++;
 
@@ -1350,6 +1610,7 @@ load_unixthread(
        if (ret != LOAD_SUCCESS)
                return (ret);
 
+
        result->unixproc = TRUE;
        result->thread_count++;
 
@@ -1747,8 +2008,9 @@ load_code_signature(
                            cputype,
                            macho_offset,
                            addr,
-                           lcp->datasize, 
-                           0)) {
+                           lcp->datasize,
+                           0,
+                           &blob)) {
                ret = LOAD_FAILURE;
                goto out;
        } else {
@@ -1760,11 +2022,12 @@ load_code_signature(
        ubc_cs_validation_bitmap_allocate( vp );
 #endif
                
-       blob = ubc_cs_blob_get(vp, cputype, macho_offset);
-
        ret = LOAD_SUCCESS;
 out:
        if (ret == LOAD_SUCCESS) {
+               if (blob == NULL)
+                       panic("sucess, but no blob!");
+
                result->csflags |= blob->csb_flags;
                result->platform_binary = blob->csb_platform_binary;
                result->cs_end_offset = blob->csb_end_offset;
@@ -1782,15 +2045,16 @@ out:
 
 static load_return_t
 set_code_unprotect(
-                  struct encryption_info_command *eip,
-                  caddr_t addr,        
-                  vm_map_t map,
-                  int64_t slide,
-                  struct vnode *vp,
-                  cpu_type_t cputype,
-                  cpu_subtype_t cpusubtype)
+       struct encryption_info_command *eip,
+       caddr_t addr,   
+       vm_map_t map,
+       int64_t slide,
+       struct vnode *vp,
+       off_t macho_offset,
+       cpu_type_t cputype,
+       cpu_subtype_t cpusubtype)
 {
-       int result, len;
+       int error, len;
        pager_crypt_info_t crypt_info;
        const char * cryptname = 0;
        char *vpath;
@@ -1799,6 +2063,7 @@ set_code_unprotect(
        struct segment_command_64 *seg64;
        struct segment_command *seg32;
        vm_map_offset_t map_offset, map_size;
+       vm_object_offset_t crypto_backing_offset;
        kern_return_t kr;
 
        if (eip->cmdsize < sizeof(*eip)) return LOAD_BADMACHO;
@@ -1826,8 +2091,8 @@ set_code_unprotect(
        if(vpath == NULL) return LOAD_FAILURE;
        
        len = MAXPATHLEN;
-       result = vn_getpath(vp, vpath, &len);
-       if(result) {
+       error = vn_getpath(vp, vpath, &len);
+       if (error) {
                FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
                return LOAD_FAILURE;
        }
@@ -1838,6 +2103,12 @@ set_code_unprotect(
                .cputype = cputype,
                .cpusubtype = cpusubtype};
        kr=text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
+#if DEVELOPMENT || DEBUG
+       struct proc *p;
+       p  = current_proc();
+       printf("APPLE_PROTECT: %d[%s] map %p %s(%s) -> 0x%x\n",
+              p->p_pid, p->p_comm, map, __FUNCTION__, vpath, kr);
+#endif /* DEVELOPMENT || DEBUG */
        FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
        
        if(kr) {
@@ -1876,6 +2147,7 @@ set_code_unprotect(
                                     eip->cryptoff+eip->cryptsize)) {
                                        map_offset = seg64->vmaddr + eip->cryptoff - seg64->fileoff + slide;
                                        map_size = eip->cryptsize;
+                                       crypto_backing_offset = macho_offset + eip->cryptoff;
                                        goto remap_now;
                                }
                        case LC_SEGMENT:
@@ -1885,6 +2157,7 @@ set_code_unprotect(
                                     eip->cryptoff+eip->cryptsize)) {
                                        map_offset = seg32->vmaddr + eip->cryptoff - seg32->fileoff + slide;
                                        map_size = eip->cryptsize;
+                                       crypto_backing_offset = macho_offset + eip->cryptoff;
                                        goto remap_now;
                                }
                }
@@ -1895,10 +2168,16 @@ set_code_unprotect(
        
 remap_now:
        /* now remap using the decrypter */
-       kr = vm_map_apple_protected(map, map_offset, map_offset+map_size, &crypt_info);
-       if(kr) {
+       MACHO_PRINTF(("+++ set_code_unprotect: vm[0x%llx:0x%llx]\n",
+                     (uint64_t) map_offset,
+                     (uint64_t) (map_offset+map_size)));
+       kr = vm_map_apple_protected(map,
+                                   map_offset,
+                                   map_offset+map_size,
+                                   crypto_backing_offset,
+                                   &crypt_info);
+       if (kr) {
                printf("set_code_unprotect(): mapping failed with %x\n", kr);
-               crypt_info.crypt_end(crypt_info.crypt_ops);
                return LOAD_PROTECT;
        }
        
index b6ab1feb11f4cc660f5d6d7e62db21747c37eafb..5600cb42fa11955f2d0d59c0e21bfd14a7837594 100644 (file)
@@ -64,6 +64,7 @@ typedef struct _load_result {
                                prog_allocated_stack    :1,
                                prog_stack_size : 1,    
                                validentry      :1,
+                               has_pagezero    :1,
                                using_lcmain    :1,
                                                :0;
        unsigned int            csflags;
index ef8ebffcd60e29d96e328c2174dea826a31a4f3f..d8bc4f07acc2f53f855dc0cb8d5c670fb6f0114b 100644 (file)
@@ -156,20 +156,38 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
         *      Intercept and deal with "please trace me" request.
         */      
        if (uap->req == PT_TRACE_ME) {
-               proc_lock(p);
-               SET(p->p_lflag, P_LTRACED);
-               /* Non-attached case, our tracer is our parent. */
-               p->p_oppid = p->p_ppid;
-               /* Check whether child and parent are allowed to run modified
-                * code (they'll have to) */
-               struct proc *pproc=proc_find(p->p_oppid);
-               proc_unlock(p);
-               cs_allow_invalid(p);
-               if(pproc) {
+retry_trace_me:;
+               proc_t pproc = proc_parent(p);
+               if (pproc == NULL)
+                       return (EINVAL);
+#if CONFIG_MACF
+               /*
+                * NB: Cannot call kauth_authorize_process(..., KAUTH_PROCESS_CANTRACE, ...)
+                *     since that assumes the process being checked is the current process
+                *     when, in this case, it is the current process's parent.
+                *     Most of the other checks in cantrace() don't apply either.
+                */
+               if ((error = mac_proc_check_debug(pproc, p)) == 0) {
+#endif
+                       proc_lock(p);
+                       /* Make sure the process wasn't re-parented. */
+                       if (p->p_ppid != pproc->p_pid) {
+                               proc_unlock(p);
+                               proc_rele(pproc);
+                               goto retry_trace_me;
+                       }
+                       SET(p->p_lflag, P_LTRACED);
+                       /* Non-attached case, our tracer is our parent. */
+                       p->p_oppid = p->p_ppid;
+                       proc_unlock(p);
+                       /* Child and parent will have to be able to run modified code. */
+                       cs_allow_invalid(p);
                        cs_allow_invalid(pproc);
-                       proc_rele(pproc);
+#if CONFIG_MACF
                }
-               return(0);
+#endif
+               proc_rele(pproc);
+               return (error);
        }
        if (uap->req == PT_SIGEXC) {
                proc_lock(p);
@@ -200,12 +218,16 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
 
        task = t->task;
        if (uap->req == PT_ATTACHEXC) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
                uap->req = PT_ATTACH;
                tr_sigexc = 1;
        }
        if (uap->req == PT_ATTACH) {
+#pragma clang diagnostic pop
                int             err;
-               
+
+
                if ( kauth_authorize_process(proc_ucred(p), KAUTH_PROCESS_CANTRACE, 
                                                                         t, (uintptr_t)&err, 0, 0) == 0 ) {
                        /* it's OK to attach */
@@ -403,8 +425,10 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
                        goto out;
                }
                th_act = port_name_to_thread(CAST_MACH_PORT_TO_NAME(uap->addr));
-               if (th_act == THREAD_NULL)
-                       return (ESRCH);
+               if (th_act == THREAD_NULL) {
+                       error = ESRCH;
+                       goto out;
+               }
                ut = (uthread_t)get_bsdthread_info(th_act);
                if (uap->data)
                        ut->uu_siglist |= sigmask(uap->data);
index 13d56f58a62a1cf400c8228de32b6afe14fd04c3..7317f55b82d2221aaa81ea899f578a66273404fb 100755 (executable)
@@ -502,7 +502,7 @@ s/\$//g
                                                         argtype[i] == "sigset_t" || argtype[i] == "gid_t" || argtype[i] == "unsigned int" ||
                                                         argtype[i] == "mode_t" || argtype[i] == "key_t" ||
                                                         argtype[i] == "mach_port_name_t" || argtype[i] == "au_asid_t" ||
-                                                        argtype[i] == "associd_t" || argtype[i] == "connid_t") {
+                                                        argtype[i] == "sae_associd_t" || argtype[i] == "sae_connid_t") {
                                                munge32 = munge32 "w"
                                                size32 += 4
                                        }
@@ -582,7 +582,7 @@ s/\$//g
                        }
                }
 
-               printf("#if CONFIG_REQUIRES_U32_MUNGING\n") > sysent
+               printf("#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))\n") > sysent
                printf("\t{ \(sy_call_t *\)%s, %s, %s, %s, %s},", 
                                tempname, munge32, munge_ret, argssize, size32) > sysent
                linesize = length(tempname) + length(munge32) + \
@@ -673,7 +673,7 @@ s/\$//g
                printf("};\n") > sysent
                printf("int     nsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent
                printf("/* Verify that NUM_SYSENT reflects the latest syscall count */\n") > sysent
-               printf("int     nsysent_size_check[((sizeof(sysent) / sizeof(sysent[0])) == NUM_SYSENT) ? 1 : -1] __unused;\n") > sysent
+               printf("_Static_assert(((sizeof(sysent) / sizeof(sysent[0])) == NUM_SYSENT), \"NUM_SYSENT needs to be updated to match syscall count\");\n") > sysent
 
                printf("};\n") > syscallnamestempfile
                printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall_num) \
index 22e6497d2770224b3673d38646572e2ba6789d64..e8bdddb34f618145b1219e2bcd8800f2c27723e6 100644 (file)
@@ -53,7 +53,6 @@
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/dhcp_options.h>
-#include <netinet/in_dhcp.h>
 
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
@@ -603,6 +602,40 @@ find_interface(void)
     return (ifp);
 }
 
+static const struct sockaddr_in blank_sin = {
+    sizeof(struct sockaddr_in),
+    AF_INET,
+    0,
+    { 0 },
+    { 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+static int
+inet_aifaddr(struct socket * so, const char * name,
+            const struct in_addr * addr,
+            const struct in_addr * mask,
+            const struct in_addr * broadcast)
+{
+    struct ifaliasreq  ifra;
+
+    bzero(&ifra, sizeof(ifra));
+    strlcpy(ifra.ifra_name, name, sizeof(ifra.ifra_name));
+    if (addr) {
+       *((struct sockaddr_in *)(void *)&ifra.ifra_addr) = blank_sin;
+       ((struct sockaddr_in *)(void *)&ifra.ifra_addr)->sin_addr = *addr;
+    }
+    if (mask) {
+       *((struct sockaddr_in *)(void *)&ifra.ifra_mask) = blank_sin;
+       ((struct sockaddr_in *)(void *)&ifra.ifra_mask)->sin_addr = *mask;
+    }
+    if (broadcast) {
+       *((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr) = blank_sin;
+       ((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr)->sin_addr = *broadcast;
+    }
+    return (ifioctl(so, SIOCAIFADDR, (caddr_t)&ifra, current_proc()));
+}
+
+
 int
 netboot_mountroot(void)
 {
@@ -642,12 +675,8 @@ netboot_mountroot(void)
 
     /* grab information from the registry */
     if (get_ip_parameters(&iaddr, &netmask, &router) == FALSE) {
-       /* use DHCP to retrieve IP address, netmask and router */
-       error = dhcp(ifp, &iaddr, 64, &netmask, &router, procp);
-       if (error) {
-           printf("netboot: DHCP failed %d\n", error);
-           goto failed;
-       }
+       printf("netboot: can't retrieve IP parameters\n");
+       goto failed;
     }
     printf("netboot: IP address " IP_FORMAT, IP_LIST(&iaddr));
     if (netmask.s_addr) {
index 95ae2d593d9f85b93b7d2586bf00616d01158a62..1c8791c9ea5ab3f3b74911c8c24d5bd05dc84c59 100644 (file)
@@ -118,7 +118,7 @@ common_hook(void)
        return rv;
 }
 
-#if (MAC_POLICY_OPS_VERSION != 32)
+#if (MAC_POLICY_OPS_VERSION != 37)
 # error "struct mac_policy_ops doesn't match definition in mac_policy.h"
 #endif
 /*
@@ -201,15 +201,15 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_HOOK(ipq_label_init)
        CHECK_SET_HOOK(ipq_label_update)
 
-       CHECK_SET_HOOK(lctx_check_label_update)
-       CHECK_SET_HOOK(lctx_label_destroy)
-       CHECK_SET_HOOK(lctx_label_externalize)
-       CHECK_SET_HOOK(lctx_label_init)
-       CHECK_SET_HOOK(lctx_label_internalize)
-       CHECK_SET_HOOK(lctx_label_update)
-       CHECK_SET_HOOK(lctx_notify_create)
-       CHECK_SET_HOOK(lctx_notify_join)
-       CHECK_SET_HOOK(lctx_notify_leave)
+       .mpo_reserved1 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved2 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved3 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved4 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved5 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved7 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved8 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved9 = (mpo_reserved_hook_t *)common_hook,
 
        CHECK_SET_HOOK(mbuf_label_associate_bpfdesc)
        CHECK_SET_HOOK(mbuf_label_associate_ifnet)
@@ -265,13 +265,13 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_HOOK(system_check_sysctlbyname)
        CHECK_SET_HOOK(proc_check_inherit_ipc_ports)
        CHECK_SET_HOOK(vnode_check_rename)
-       .mpo_reserved4 = (mpo_reserved_hook_t *)common_hook,
-       .mpo_reserved5 = (mpo_reserved_hook_t *)common_hook,
-       .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook,
-       .mpo_reserved7 = (mpo_reserved_hook_t *)common_hook,
-       .mpo_reserved8 = (mpo_reserved_hook_t *)common_hook,
-       .mpo_reserved9 = (mpo_reserved_hook_t *)common_hook,
-       .mpo_reserved10 = (mpo_reserved_hook_t *)common_hook,
+       CHECK_SET_HOOK(kext_check_query)
+       CHECK_SET_HOOK(iokit_check_nvram_get)
+       CHECK_SET_HOOK(iokit_check_nvram_set)
+       CHECK_SET_HOOK(iokit_check_nvram_delete)
+       CHECK_SET_HOOK(proc_check_expose_task)
+       CHECK_SET_HOOK(proc_check_set_host_special_port)
+       CHECK_SET_HOOK(proc_check_set_host_exception_port)
        .mpo_reserved11 = (mpo_reserved_hook_t *)common_hook,
        .mpo_reserved12 = (mpo_reserved_hook_t *)common_hook,
        .mpo_reserved13 = (mpo_reserved_hook_t *)common_hook,
@@ -461,8 +461,7 @@ static struct mac_policy_ops policy_ops = {
        CHECK_SET_HOOK(vnode_check_uipc_bind)
        CHECK_SET_HOOK(vnode_check_uipc_connect)
 
-       /* CHECK_SET_HOOK(proc_check_run_cs_invalid) */
-       .mpo_proc_check_run_cs_invalid = (mac_proc_check_run_cs_invalid_t *)common_hook,
+       CHECK_SET_HOOK(proc_check_run_cs_invalid)
        CHECK_SET_HOOK(proc_check_suspend_resume)
 
        CHECK_SET_HOOK(thread_userret)
@@ -486,8 +485,8 @@ static struct mac_policy_ops policy_ops = {
 
        CHECK_SET_HOOK(vnode_notify_rename)
 
-       CHECK_SET_HOOK(thread_label_init)
-       CHECK_SET_HOOK(thread_label_destroy)
+       .mpo_reserved32 = (mpo_reserved_hook_t *)common_hook,
+       .mpo_reserved33 = (mpo_reserved_hook_t *)common_hook,
 
        CHECK_SET_HOOK(system_check_kas_info)
 
index 4a0a848a2227b3f59852268cbe7fc565641fffc4..e14baf815e932decbe39ea8506c53805c35e514a 100644 (file)
@@ -742,7 +742,8 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd,
                pshmobj_next_p = &pshmobj->pshmo_next;
        }
        
-       pinfo->pshm_flags = PSHM_ALLOCATED;
+       pinfo->pshm_flags |= PSHM_ALLOCATED;
+       pinfo->pshm_flags &= ~(PSHM_ALLOCATING);
        pinfo->pshm_length = total_size;
        PSHM_SUBSYS_UNLOCK();
        return(0);
index a2b82a6e4280951e9bb32fad0a7aec1ef2f59fa0..a5da30245812cb7faf61867cfec5ad5eddc0aa85 100644 (file)
 #include <sys/event.h>
 #include <sys/codesign.h>
 
+/* Needed by proc_listcoalitions() */
+#ifdef CONFIG_COALITIONS
+#include <sys/coalition.h>
+#endif
+
 struct pshmnode;
 struct psemnode;
 struct pipe;
@@ -133,6 +138,7 @@ int __attribute__ ((noinline)) proc_dirtycontrol(int pid, int flavor, uint64_t a
 int __attribute__ ((noinline)) proc_terminate(int pid, int32_t * retval);
 int __attribute__ ((noinline)) proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval);
 int __attribute__ ((noinline)) proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) proc_listcoalitions(int flavor, int coaltype, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 
 /* protos for procpidinfo calls */
 int __attribute__ ((noinline)) proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
@@ -154,23 +160,24 @@ void __attribute__ ((noinline)) proc_piduniqidentifierinfo(proc_t p, struct proc
 void __attribute__ ((noinline)) proc_archinfo(proc_t p, struct proc_archinfo *pai);
 void __attribute__ ((noinline)) proc_pidcoalitioninfo(proc_t p, struct proc_pidcoalitioninfo *pci);
 int __attribute__ ((noinline)) proc_pidnoteexit(proc_t p, uint64_t arg,  uint32_t *data);
+int __attribute__ ((noinline)) proc_pidoriginatorpid_uuid(uuid_t uuid, uint32_t buffersize, pid_t *pid);
 
 
 /* protos for proc_pidfdinfo calls */
-int __attribute__ ((noinline)) pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_socketinfo(socket_t  so, struct fileproc *fp, int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_pseminfo(struct psemnode * psem, struct fileproc * fp,  int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp,  int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_pipeinfo(struct pipe * p, struct fileproc * fp,  int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp,  int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
-int __attribute__ ((noinline)) pid_atalkinfo(struct atalk  * at, struct fileproc * fp,  int closeonexec, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp,proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp,proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_socketinfo(socket_t  so, struct fileproc *fp,proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_pseminfo(struct psemnode * psem, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_pipeinfo(struct pipe * p, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
+int __attribute__ ((noinline)) pid_atalkinfo(struct atalk  * at, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, uint32_t buffersize, int32_t * retval);
 
 
 /* protos for misc */
 
 int fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo);
-void  fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * finfo);
+void  fill_fileinfo(struct fileproc * fp, proc_t proc, int fd, struct proc_fileinfo * finfo);
 int proc_security_policy(proc_t targetp, int callnum, int flavor, boolean_t check_same_user);
 static void munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp);
 static int proc_piduuidinfo(pid_t pid, uuid_t uuid_buf, uint32_t buffersize);
@@ -236,6 +243,9 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b
                        return (proc_pid_rusage(pid, flavor, buffer, retval));
                case PROC_INFO_CALL_PIDORIGINATORINFO:
                        return (proc_pidoriginatorinfo(pid, flavor, buffer, buffersize, retval));
+               case PROC_INFO_CALL_LISTCOALITIONS:
+                       return proc_listcoalitions(pid /* flavor */, flavor /* coaltype */, buffer,
+                                                  buffersize, retval);
                default:
                                return(EINVAL);
        }
@@ -1146,10 +1156,10 @@ proc_piduuidinfo(pid_t pid, uuid_t uuid_buf, uint32_t buffersize)
 }
 
 /*
- * Function to get the uuid of the originator of the voucher.
+ * Function to get the uuid and pid of the originator of the voucher.
  */
 int
-proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize)
+proc_pidoriginatorpid_uuid(uuid_t uuid, uint32_t buffersize, pid_t *pid)
 {
        pid_t originator_pid;
        kern_return_t kr;
@@ -1171,10 +1181,21 @@ proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize)
                return error;
        }
 
+       *pid = originator_pid;
        error = proc_piduuidinfo(originator_pid, uuid, buffersize);
        return error;
 }
 
+/*
+ * Function to get the uuid of the originator of the voucher.
+ */
+int
+proc_pidoriginatoruuid(uuid_t uuid, uint32_t buffersize)
+{
+       pid_t originator_pid;
+       return (proc_pidoriginatorpid_uuid(uuid, buffersize, &originator_pid));
+}
+
 /***************************** proc_pidoriginatorinfo ***************************/
 
 int
@@ -1190,6 +1211,9 @@ proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t  buffer
                case PROC_PIDORIGINATOR_BGSTATE:
                        size = PROC_PIDORIGINATOR_BGSTATE_SIZE;
                        break;
+               case PROC_PIDORIGINATOR_PID_UUID:
+                       size = PROC_PIDORIGINATOR_PID_UUID_SIZE;
+                       break;
                default:
                        return(EINVAL);
        }
@@ -1214,6 +1238,24 @@ proc_pidoriginatorinfo(int pid, int flavor, user_addr_t buffer, uint32_t  buffer
                }
                break;
 
+               case PROC_PIDORIGINATOR_PID_UUID: {
+                       struct proc_originatorinfo originator_info;
+
+                       error = proc_pidoriginatorpid_uuid(originator_info.originator_uuid,
+                                               sizeof(uuid_t), &originator_info.originator_pid);
+                       if (error != 0)
+                               goto out;
+
+                       originator_info.p_reserve2 = 0;
+                       originator_info.p_reserve3 = 0;
+                       originator_info.p_reserve4 = 0;
+
+                       error = copyout(&originator_info, buffer, size);
+                       if (error == 0)
+                               *retval = size;
+               }
+               break;
+
                case PROC_PIDORIGINATOR_BGSTATE: {
                        uint32_t is_backgrounded;
                        error = proc_get_originatorbgstate(&is_backgrounded);
@@ -1233,6 +1275,106 @@ out:
        return error;
 }
 
+/***************************** proc_listcoalitions ***************************/
+int proc_listcoalitions(int flavor, int type, user_addr_t buffer,
+                       uint32_t buffersize, int32_t *retval)
+{
+#if CONFIG_COALITIONS
+       int error = ENOTSUP;
+       int coal_type;
+       uint32_t elem_size;
+       void *coalinfo = NULL;
+       uint32_t k_buffersize = 0, copyout_sz = 0;
+       int ncoals = 0, ncoals_ = 0;
+
+       /* struct procinfo_coalinfo; */
+
+       switch (flavor) {
+       case LISTCOALITIONS_ALL_COALS:
+               elem_size = LISTCOALITIONS_ALL_COALS_SIZE;
+               coal_type = -1;
+               break;
+       case LISTCOALITIONS_SINGLE_TYPE:
+               elem_size = LISTCOALITIONS_SINGLE_TYPE_SIZE;
+               coal_type = type;
+               break;
+       default:
+               return EINVAL;
+       }
+
+       /* find the total number of coalitions */
+       ncoals = coalitions_get_list(coal_type, NULL, 0);
+
+       if (ncoals == 0 || buffer == 0 || buffersize == 0) {
+               /*
+                * user just wants buffer size
+                * or there are no coalitions
+                */
+               error = 0;
+               *retval = (int)(ncoals * elem_size);
+               goto out;
+       }
+
+       k_buffersize = ncoals * elem_size;
+       coalinfo = kalloc((vm_size_t)k_buffersize);
+       if (!coalinfo) {
+               error = ENOMEM;
+               goto out;
+       }
+       bzero(coalinfo, k_buffersize);
+
+       switch (flavor) {
+       case LISTCOALITIONS_ALL_COALS:
+       case LISTCOALITIONS_SINGLE_TYPE:
+               ncoals_ = coalitions_get_list(coal_type, coalinfo, ncoals);
+               break;
+       default:
+               panic("memory corruption?!");
+       }
+
+       if (ncoals_ == 0) {
+               /* all the coalitions disappeared... weird but valid */
+               error = 0;
+               *retval = 0;
+               goto out;
+       }
+
+       /*
+        * Some coalitions may have disappeared between our initial check,
+        * and the the actual list acquisition.
+        * Only copy out what we really need.
+        */
+       copyout_sz = k_buffersize;
+       if (ncoals_ < ncoals)
+               copyout_sz = ncoals_ * elem_size;
+
+       /*
+        * copy the list up to user space
+        * (we're guaranteed to have a non-null pointer/size here)
+        */
+       error = copyout(coalinfo, buffer,
+                       copyout_sz < buffersize ? copyout_sz : buffersize);
+
+       if (error == 0)
+               *retval = (int)copyout_sz;
+
+out:
+       if (coalinfo)
+               kfree(coalinfo, k_buffersize);
+
+       return error;
+#else
+       /* no coalition support */
+       (void)flavor;
+       (void)type;
+       (void)buffer;
+       (void)buffersize;
+       (void)retval;
+       return ENOTSUP;
+#endif
+}
+
+
 /********************************** proc_pidinfo ********************************/
 
 
@@ -1365,6 +1507,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
                case PROC_PIDT_SHORTBSDINFO:
                case PROC_PIDUNIQIDENTIFIERINFO:
                case PROC_PIDPATHINFO:
+               case PROC_PIDCOALITIONINFO:
                        check_same_user = NO_CHECK_SAME_USER;
                        break;
                default:
@@ -1545,7 +1688,8 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
                        if (error == 0) {
                                *retval = sizeof(struct proc_archinfo);
                        }
-               }
+               }
+               break;
 
                case PROC_PIDCOALITIONINFO: {
                        struct proc_pidcoalitioninfo pci;
@@ -1557,7 +1701,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
                }
                break;
 
-               case PROC_PIDNOTEEXIT: {
+               case PROC_PIDNOTEEXIT: {
                        uint32_t data;
                        error = proc_pidnoteexit(p, arg, &data);
                        if (error == 0) {
@@ -1582,8 +1726,8 @@ out:
 }
 
 
-int 
-pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval) 
+int
+pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
        struct vnode_fdinfo vfi;
        int error= 0;
@@ -1592,7 +1736,7 @@ pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, u
                return(error);
        }
        bzero(&vfi, sizeof(struct vnode_fdinfo));
-       fill_fileinfo(fp, closeonexec, &vfi.pfi);
+       fill_fileinfo(fp, proc, fd, &vfi.pfi);
        error = fill_vnodeinfo(vp, &vfi.pvi);
        vnode_put(vp);
        if (error == 0) {
@@ -1603,8 +1747,8 @@ pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, u
        return(error);
 }
 
-int 
-pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval) 
+int
+pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
        struct vnode_fdinfowithpath vfip;
        int count, error= 0;
@@ -1613,7 +1757,7 @@ pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexe
                return(error);
        }
        bzero(&vfip, sizeof(struct vnode_fdinfowithpath));
-       fill_fileinfo(fp, closeonexec, &vfip.pfi);
+       fill_fileinfo(fp, proc, fd, &vfip.pfi);
        error = fill_vnodeinfo(vp, &vfip.pvip.vip_vi) ;
        if (error == 0) {
                count = MAXPATHLEN;
@@ -1628,8 +1772,8 @@ pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexe
        return(error);
 }
 
-void  
-fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * fproc)
+void
+fill_fileinfo(struct fileproc * fp, proc_t proc, int fd, struct proc_fileinfo * fproc)
 {
        fproc->fi_openflags = fp->f_fglob->fg_flag;
        fproc->fi_status = 0;
@@ -1637,9 +1781,12 @@ fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * fpro
        fproc->fi_type = FILEGLOB_DTYPE(fp->f_fglob);
        if (fp->f_fglob->fg_count > 1)
                fproc->fi_status |= PROC_FP_SHARED;
-       if (closeonexec != 0)
-               fproc->fi_status |= PROC_FP_CLEXEC;
-
+       if (proc != PROC_NULL) {
+               if ((FDFLAGS_GET(proc, fd) & UF_EXCLOSE) != 0)
+                       fproc->fi_status |= PROC_FP_CLEXEC;
+               if ((FDFLAGS_GET(proc, fd) & UF_FORKCLOSE) != 0)
+                       fproc->fi_status |= PROC_FP_CLFORK;
+       }
        if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
                fproc->fi_status |= PROC_FP_GUARDED;
                fproc->fi_guardflags = 0;
@@ -1685,34 +1832,34 @@ out:
 }
 
 int
-pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
+pid_socketinfo(socket_t so, struct fileproc *fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
 #if SOCKETS
        struct socket_fdinfo s;
        int error = 0;
 
        bzero(&s, sizeof(struct socket_fdinfo));
-       fill_fileinfo(fp, closeonexec, &s.pfi);
+       fill_fileinfo(fp, proc, fd, &s.pfi);
        if ((error = fill_socketinfo(so, &s.psi)) == 0) {
                if ((error = copyout(&s, buffer, sizeof(struct socket_fdinfo))) == 0)
                                *retval = sizeof(struct socket_fdinfo);
        }
        return (error);
 #else
-#pragma unused(so, fp, closeonexec, buffer)
+#pragma unused(so, fp, proc, fd, buffer)
        *retval = 0;
        return (ENOTSUP);
 #endif
 }
 
 int
-pid_pseminfo(struct psemnode *psem, struct fileproc *fp,  int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
+pid_pseminfo(struct psemnode *psem, struct fileproc *fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
        struct psem_fdinfo pseminfo;
        int error = 0;
+
        bzero(&pseminfo, sizeof(struct psem_fdinfo));
-       fill_fileinfo(fp, closeonexec, &pseminfo.pfi);
+       fill_fileinfo(fp, proc, fd, &pseminfo.pfi);
 
        if ((error = fill_pseminfo(psem, &pseminfo.pseminfo)) == 0) {
                if ((error = copyout(&pseminfo, buffer, sizeof(struct psem_fdinfo))) == 0)
@@ -1723,13 +1870,13 @@ pid_pseminfo(struct psemnode *psem, struct fileproc *fp,  int closeonexec, user_
 }
 
 int
-pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp,  int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
+pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
        struct pshm_fdinfo pshminfo;
        int error = 0;
+
        bzero(&pshminfo, sizeof(struct pshm_fdinfo));
-       fill_fileinfo(fp, closeonexec, &pshminfo.pfi);
+       fill_fileinfo(fp, proc, fd, &pshminfo.pfi);
 
        if ((error = fill_pshminfo(pshm, &pshminfo.pshminfo)) == 0) {
                if ((error = copyout(&pshminfo, buffer, sizeof(struct pshm_fdinfo))) == 0)
@@ -1740,13 +1887,13 @@ pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp,  int closeonexec, user_
 }
 
 int
-pid_pipeinfo(struct pipe *  p, struct fileproc *fp,  int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
+pid_pipeinfo(struct pipe *  p, struct fileproc *fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
        struct pipe_fdinfo pipeinfo;
        int error = 0;
 
        bzero(&pipeinfo, sizeof(struct pipe_fdinfo));
-       fill_fileinfo(fp, closeonexec, &pipeinfo.pfi);
+       fill_fileinfo(fp, proc, fd, &pipeinfo.pfi);
        if ((error = fill_pipeinfo(p, &pipeinfo.pipeinfo)) == 0) {
                if ((error = copyout(&pipeinfo, buffer, sizeof(struct pipe_fdinfo))) == 0)
                                *retval = sizeof(struct pipe_fdinfo);
@@ -1756,14 +1903,18 @@ pid_pipeinfo(struct pipe *  p, struct fileproc *fp,  int closeonexec, user_addr_
 }
 
 int
-pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp,  int closeonexec, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
+pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, proc_t proc, int fd, user_addr_t  buffer, __unused uint32_t buffersize, int32_t * retval)
 {
        struct kqueue_fdinfo kqinfo;
        int error = 0;
-       
+
        bzero(&kqinfo, sizeof(struct kqueue_fdinfo));
-       fill_fileinfo(fp, closeonexec, &kqinfo.pfi);
+
+       /* not all kq's are associated with a file (e.g. workqkq) */
+       if (fp) {
+               assert(fd >= 0);
+               fill_fileinfo(fp, proc, fd, &kqinfo.pfi);
+       }
 
        if ((error = fill_kqueueinfo(kq, &kqinfo.kqueueinfo)) == 0) {
                if ((error = copyout(&kqinfo, buffer, sizeof(struct kqueue_fdinfo))) == 0)
@@ -1774,7 +1925,7 @@ pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp,  int closeonexec, user_a
 }
 
 int
-pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp,  __unused int closeonexec, __unused user_addr_t  buffer, __unused uint32_t buffersize, __unused int32_t * retval)
+pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp,  __unused proc_t proc, __unused int fd, __unused user_addr_t  buffer, __unused uint32_t buffersize, __unused int32_t * retval)
 {
        return ENOTSUP;
 }
@@ -1787,9 +1938,8 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
 {
        proc_t p;
        int error = ENOTSUP;
-       struct fileproc * fp;
+       struct fileproc * fp = NULL;
        uint32_t size;
-       int closeonexec = 0;
 
        switch (flavor) {
                case PROC_PIDFDVNODEINFO:
@@ -1813,6 +1963,11 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                case PROC_PIDFDKQUEUEINFO:
                        size = PROC_PIDFDKQUEUEINFO_SIZE;
                        break;
+               case PROC_PIDFDKQUEUE_EXTINFO:
+                       size = PROC_PIDFDKQUEUE_EXTINFO_SIZE;
+                       if (buffer == (user_addr_t)0)
+                               size = 0;
+                       break;
                case PROC_PIDFDATALKINFO:
                        size = PROC_PIDFDATALKINFO_SIZE;
                        break;
@@ -1843,8 +1998,7 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                                goto out1;
                        }
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_vnodeinfo(vp, vid, fp, closeonexec, buffer, buffersize, retval);
+                       error =  pid_vnodeinfo(vp, vid, fp, p, fd, buffer, buffersize, retval);
                }
                break;
 
@@ -1857,8 +2011,7 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                        }
 
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_vnodeinfopath(vp, vid, fp, closeonexec, buffer, buffersize, retval);
+                       error =  pid_vnodeinfopath(vp, vid, fp, p, fd, buffer, buffersize, retval);
                }
                break;
 
@@ -1869,8 +2022,7 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                                goto out1;
                        }
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_socketinfo(so, fp, closeonexec, buffer, buffersize, retval);
+                       error =  pid_socketinfo(so, fp, p, fd, buffer, buffersize, retval);
                }
                break;
 
@@ -1881,8 +2033,7 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                                goto out1;
                        }
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_pseminfo(psem, fp, closeonexec, buffer, buffersize, retval);
+                       error =  pid_pseminfo(psem, fp, p, fd, buffer, buffersize, retval);
                }
                break;
 
@@ -1893,8 +2044,7 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                                goto out1;
                        }
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_pshminfo(pshm, fp, closeonexec, buffer, buffersize, retval);
+                       error =  pid_pshminfo(pshm, fp, p, fd, buffer, buffersize, retval);
                }
                break;
 
@@ -1905,20 +2055,41 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                                goto out1;
                        }
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_pipeinfo(cpipe, fp, closeonexec, buffer, buffersize, retval);
+                       error =  pid_pipeinfo(cpipe, fp, p, fd, buffer, buffersize, retval);
                }
                break;
 
                case PROC_PIDFDKQUEUEINFO: {
                        struct kqueue * kq;
 
-                       if ((error = fp_getfkq(p, fd, &fp,  &kq)) !=0) {
+                       if (fd == -1) {
+                               if ((kq = p->p_wqkqueue) == NULL) {
+                                       /* wqkqueue is initialized on-demand */
+                                       error = 0;
+                                       break;
+                               }
+                       } else if ((error = fp_getfkq(p, fd, &fp,  &kq)) != 0) {
                                goto out1;
                        }
+
                        /* no need to be under the fdlock */
-                       closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE;
-                       error =  pid_kqueueinfo(kq, fp, closeonexec, buffer, buffersize, retval);
+                       error = pid_kqueueinfo(kq, fp, p, fd, buffer, buffersize, retval);
+               }
+               break;
+
+               case PROC_PIDFDKQUEUE_EXTINFO: {
+                       struct kqueue * kq;
+
+                       if (fd == -1) {
+                               if ((kq = p->p_wqkqueue) == NULL) {
+                                       /* wqkqueue is initialized on-demand */
+                                       error = 0;
+                                       break;
+                               }
+                       } else if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) {
+                               goto out1;
+                       }
+                       error = pid_kqueue_extinfo(p, kq, buffer, buffersize, retval);
                }
                break;
 
@@ -1928,7 +2099,9 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
                }
        }
 
-       fp_drop(p, fd, fp , 0);         
+       if (fp) {
+               fp_drop(p, fd, fp , 0);
+       }
 out1 :
        proc_rele(p);
 out:
@@ -1966,7 +2139,7 @@ proc_fileport_info(__unused mach_port_name_t name,
                        break;
                }
                vp = (struct vnode *)fg->fg_data;
-               error = pid_vnodeinfopath(vp, vnode_vid(vp), fp, 0,
+               error = pid_vnodeinfopath(vp, vnode_vid(vp), fp, PROC_NULL, 0,
                    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
        }       break;
 
@@ -1978,7 +2151,7 @@ proc_fileport_info(__unused mach_port_name_t name,
                        break;
                }
                so = (socket_t)fg->fg_data;
-               error = pid_socketinfo(so, fp, 0,
+               error = pid_socketinfo(so, fp, PROC_NULL, 0,
                    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
        }       break;
 
@@ -1990,7 +2163,7 @@ proc_fileport_info(__unused mach_port_name_t name,
                        break;
                }
                pshm = (struct pshmnode *)fg->fg_data;
-               error = pid_pshminfo(pshm, fp, 0,
+               error = pid_pshminfo(pshm, fp, PROC_NULL, 0,
                    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
        }       break;
 
@@ -2002,7 +2175,7 @@ proc_fileport_info(__unused mach_port_name_t name,
                        break;
                }
                cpipe = (struct pipe *)fg->fg_data;
-               error = pid_pipeinfo(cpipe, fp, 0,
+               error = pid_pipeinfo(cpipe, fp, PROC_NULL, 0,
                    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
        }       break;
 
@@ -2024,7 +2197,7 @@ proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name,
        uint32_t size;
        struct fileport_info_args fia;
 
-       /* fileport types are restricted by filetype_issendable() */
+       /* fileport types are restricted by file_issendable() */
 
        switch (flavor) {
        case PROC_PIDFILEPORTVNODEPATHINFO:
@@ -2436,7 +2609,7 @@ void
 proc_pidcoalitioninfo(proc_t p, struct proc_pidcoalitioninfo *ppci)
 {
        bzero(ppci, sizeof(*ppci));
-       ppci->coalition_id = proc_coalitionid(p);
+       proc_coalitionids(p, ppci->coalition_id);
 }
 
 
index 70ded5774dea3e2945758313ef988559608a15b7..cfa58910a481fdbcb753a93675450ed8fa404145 100644 (file)
@@ -65,6 +65,7 @@
 
 #include <sys/types.h>
 //#include <stdlib.h>
+#include <sys/kpi_private.h>
 
 __private_extern__
 void
@@ -199,3 +200,9 @@ loop:       SWAPINIT(a, es);
        }
 /*             qsort(pn - r, r / es, es, cmp);*/
 }
+
+/* private KPI */
+void 
+kx_qsort (void *array, size_t nm, size_t member_size, int (*cmpf)(const void *, const void *)) {
+       qsort (array, nm, member_size, cmpf);
+}
index 73725bbb4d25fe07a244c9ccb0ba9c85b37b221e..4713bf2608379adb9a9551169499f04511aabd22 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2005-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -222,24 +222,8 @@ fill_socketinfo(struct socket *so, struct socket_info *si)
                            ev_pcb->evp_vendor_code_filter;
                        kesi->kesi_class_filter = ev_pcb->evp_class_filter;
                        kesi->kesi_subclass_filter = ev_pcb->evp_subclass_filter;
-
                } else if (SOCK_PROTO(so) == SYSPROTO_CONTROL) {
-                       struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb;
-                       struct kern_ctl_info *kcsi =
-                           &si->soi_proto.pri_kern_ctl;
-                       struct kctl *kctl = kcb->kctl;
-
-                       si->soi_kind = SOCKINFO_KERN_CTL;
-
-                       if (kctl == 0)
-                               break;
-                       kcsi->kcsi_id = kctl->id;
-                       kcsi->kcsi_reg_unit = kctl->id;
-                       kcsi->kcsi_flags = kctl->flags;
-                       kcsi->kcsi_recvbufsize = kctl->recvbufsize;
-                       kcsi->kcsi_sendbufsize = kctl->sendbufsize;
-                       kcsi->kcsi_unit = kcb->unit;
-                       strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME);
+                       kctl_fill_socketinfo(so, si);
                }
                break;
 
index 5bb098bbbd2e62daf354349227adc90c641c608f..d14302bec1194eec06d693ada7b21bdb6a7f5d39 100644 (file)
@@ -130,7 +130,8 @@ extern int  __doprnt(const char *fmt,
                                         va_list    argp,
                                         void       (*)(int, void *),
                                         void       *arg,
-                                        int        radix);
+                                        int        radix,
+                                        int        is_log);
 
 /*
  *     Record cpu that panic'd and lock around panic data
@@ -171,7 +172,7 @@ uprintf(const char *fmt, ...)
                if (pca.tty != NULL)
                        tty_lock(pca.tty);
                va_start(ap, fmt);
-               __doprnt(fmt, ap, putchar, &pca, 10);
+               __doprnt(fmt, ap, putchar, &pca, 10, FALSE);
                va_end(ap);
                if (pca.tty != NULL)
                tty_unlock(pca.tty);
@@ -236,7 +237,7 @@ tprintf(tpr_t tpr, const char *fmt, ...)
        pca.flags = flags;
        pca.tty   = tp;
        va_start(ap, fmt);
-       __doprnt(fmt, ap, putchar, &pca, 10);
+       __doprnt(fmt, ap, putchar, &pca, 10, FALSE);
        va_end(ap);
 
        if (tp != NULL)
@@ -265,7 +266,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...)
                pca.tty   = tp;
                
                va_start(ap, fmt);
-               __doprnt(fmt, ap, putchar, &pca, 10);
+               __doprnt(fmt, ap, putchar, &pca, 10, TRUE);
                va_end(ap);
        }
 }
@@ -314,7 +315,7 @@ vaddlog(const char *fmt, va_list ap)
        }
 
        bsd_log_lock();
-       __doprnt(fmt, ap, putchar, &pca, 10);
+       __doprnt(fmt, ap, putchar, &pca, 10, TRUE);
        bsd_log_unlock();
        
        logwakeup();
@@ -334,7 +335,7 @@ _printf(int flags, struct tty *ttyp, const char *format, ...)
                tty_lock(ttyp);
        
                va_start(ap, format);
-               __doprnt(format, ap, putchar, &pca, 10);
+               __doprnt(format, ap, putchar, &pca, 10, TRUE);
                va_end(ap);
 
                tty_unlock(ttyp);
@@ -349,7 +350,7 @@ prf(const char *fmt, va_list ap, int flags, struct tty *ttyp)
        pca.flags = flags;
        pca.tty   = ttyp;
 
-       __doprnt(fmt, ap, putchar, &pca, 10);
+       __doprnt(fmt, ap, putchar, &pca, 10, TRUE);
 
        return 0;
 }
@@ -442,7 +443,7 @@ vprintf(const char *fmt, va_list ap)
 
        pca.flags = TOLOG | TOCONS;
        pca.tty   = NULL;
-       __doprnt(fmt, ap, putchar, &pca, 10);
+       __doprnt(fmt, ap, putchar, &pca, 10, TRUE);
        return 0;
 }
 
@@ -462,7 +463,7 @@ vsprintf(char *buf, const char *cfmt, va_list ap)
        info.str = buf;
        info.remain = 999999;
 
-       retval = __doprnt(cfmt, ap, snprintf_func, &info, 10);
+       retval = __doprnt(cfmt, ap, snprintf_func, &info, 10, FALSE);
        if (info.remain >= 1) {
                *info.str++ = '\0';
        }
@@ -495,7 +496,7 @@ vsnprintf(char *str, size_t size, const char *format, va_list ap)
 
        info.str = str;
        info.remain = size;
-       retval = __doprnt(format, ap, snprintf_func, &info, 10);
+       retval = __doprnt(format, ap, snprintf_func, &info, 10, FALSE);
        if (info.remain >= 1)
                *info.str++ = '\0';
        return retval;
@@ -515,7 +516,7 @@ snprintf_func(int ch, void *arg)
 int
 kvprintf(char const *fmt, void (*func)(int, void*), void *arg, int radix, va_list ap)
 {
-       __doprnt(fmt, ap, func, arg, radix);
+       __doprnt(fmt, ap, func, arg, radix, TRUE);
        return 0;
 }
 
index 3255fb0d691712207c9cb4033372c73537e48b6b..a20ce301fc12cb214d8bdcac5fa1c46dd54c0b03 100644 (file)
@@ -30,14 +30,15 @@ coalition_create_syscall(user_addr_t cidp, uint32_t flags)
        kern_return_t kr;
        uint64_t cid;
        coalition_t coal;
+       int type = COALITION_CREATE_FLAGS_GET_TYPE(flags);
+       boolean_t privileged = !!(flags & COALITION_CREATE_FLAGS_PRIVILEGED);
 
-       if ((flags & (~COALITION_CREATE_FLAG_MASK)) != 0) {
+       if ((flags & (~COALITION_CREATE_FLAGS_MASK)) != 0)
+               return EINVAL;
+       if (type < 0 || type > COALITION_TYPE_MAX)
                return EINVAL;
-       }
-
-       boolean_t privileged = flags & COALITION_CREATE_FLAG_PRIVILEGED;
 
-       kr = coalition_create_internal(&coal, privileged);
+       kr = coalition_create_internal(type, privileged, &coal);
        if (kr != KERN_SUCCESS) {
                /* for now, the only kr is KERN_RESOURCE_SHORTAGE */
                error = ENOMEM;
@@ -46,9 +47,7 @@ coalition_create_syscall(user_addr_t cidp, uint32_t flags)
 
        cid = coalition_id(coal);
 
-#if COALITION_DEBUG
-       printf("%s(addr, %u) -> %llu\n", __func__, flags, cid);
-#endif
+       coal_dbg("(addr, %u) -> %llu", flags, cid);
        error = copyout(&cid, cidp, sizeof(cid));
 out:
        return error;
@@ -98,17 +97,19 @@ coalition_request_terminate_syscall(user_addr_t cidp, uint32_t flags)
                break;
        case KERN_DEFAULT_SET:
                error = EPERM;
+               break;
        case KERN_TERMINATED:
                error = EALREADY;
+               break;
        case KERN_INVALID_NAME:
                error = ESRCH;
+               break;
        default:
                error = EIO;
+               break;
        }
 
-#if COALITION_DEBUG
-       printf("%s(%llu, %u) -> %d\n", __func__, cid, flags, error);
-#endif
+       coal_dbg("(%llu, %u) -> %d", cid, flags, error);
 
        return error;
 }
@@ -160,17 +161,19 @@ coalition_reap_syscall(user_addr_t cidp, uint32_t flags)
                break;
        case KERN_DEFAULT_SET:
                error = EPERM;
+               break;
        case KERN_TERMINATED:
                error = ESRCH;
+               break;
        case KERN_FAILURE:
                error = EBUSY;
+               break;
        default:
                error = EIO;
+               break;
        }
 
-#if COALITION_DEBUG
-       printf("%s(%llu, %u) -> %d\n", __func__, cid, flags, error);
-#endif
+       coal_dbg("(%llu, %u) -> %d", cid, flags, error);
 
        return error;
 }
@@ -184,8 +187,9 @@ int coalition(proc_t p, struct coalition_args *cap, __unused int32_t *retval)
        user_addr_t cidp = cap->cid;
        uint32_t flags = cap->flags;
        int error = 0;
+       int type = COALITION_CREATE_FLAGS_GET_TYPE(flags);
 
-       if (!task_is_in_privileged_coalition(p->task)) {
+       if (!task_is_in_privileged_coalition(p->task, type)) {
                return EPERM;
        }
 
@@ -279,3 +283,235 @@ bad:
        coalition_release(coal);
        return error;
 }
+
+#if defined(DEVELOPMENT) || defined(DEBUG)
+static int sysctl_coalition_get_ids SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error, pid;
+       proc_t tproc;
+       uint64_t value;
+       uint64_t ids[COALITION_NUM_TYPES];
+
+
+       error = SYSCTL_IN(req, &value, sizeof(value));
+       if (error)
+               return error;
+       if (!req->newptr)
+               pid = req->p->p_pid;
+       else
+               pid = (int)value;
+
+       coal_dbg("looking up coalitions for pid:%d", pid);
+       tproc = proc_find(pid);
+       if (tproc == NULL) {
+               coal_dbg("ERROR: Couldn't find pid:%d", pid);
+               return ESRCH;
+       }
+
+       task_coalition_ids(tproc->task, ids);
+       proc_rele(tproc);
+
+       return SYSCTL_OUT(req, ids, sizeof(ids));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, coalitions, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_coalition_get_ids, "Q", "coalition ids of a given process");
+
+
+static int sysctl_coalition_get_roles SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error, pid;
+       proc_t tproc;
+       int value;
+       int roles[COALITION_NUM_TYPES];
+
+
+       error = SYSCTL_IN(req, &value, sizeof(value));
+       if (error)
+               return error;
+       if (!req->newptr)
+               pid = req->p->p_pid;
+       else
+               pid = (int)value;
+
+       coal_dbg("looking up coalitions for pid:%d", pid);
+       tproc = proc_find(pid);
+       if (tproc == NULL) {
+               coal_dbg("ERROR: Couldn't find pid:%d", pid);
+               return ESRCH;
+       }
+
+       task_coalition_roles(tproc->task, roles);
+       proc_rele(tproc);
+
+       return SYSCTL_OUT(req, roles, sizeof(roles));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, coalition_roles, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_coalition_get_roles, "I", "coalition roles of a given process");
+
+
+static int sysctl_coalition_get_page_count SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error, pid;
+       proc_t tproc;
+       coalition_t coal;
+       uint64_t value;
+       uint64_t pgcount[COALITION_NUM_TYPES];
+
+
+       error = SYSCTL_IN(req, &value, sizeof(value));
+       if (error)
+               return error;
+       if (!req->newptr)
+               pid = req->p->p_pid;
+       else
+               pid = (int)value;
+
+       coal_dbg("looking up coalitions for pid:%d", pid);
+       tproc = proc_find(pid);
+       if (tproc == NULL) {
+               coal_dbg("ERROR: Couldn't find pid:%d", pid);
+               return ESRCH;
+       }
+
+       memset(pgcount, 0, sizeof(pgcount));
+
+       for (int t = 0; t < COALITION_NUM_TYPES; t++) {
+               coal = COALITION_NULL;
+               coalition_is_leader(tproc->task, t, &coal);
+               if (coal != COALITION_NULL) {
+                       int ntasks = 0;
+                       pgcount[t] = coalition_get_page_count(coal, &ntasks);
+                       coal_dbg("PID:%d, Coalition:%lld, type:%d, pgcount:%lld",
+                                pid, coalition_id(coal), t, pgcount[t]);
+               }
+       }
+
+       proc_rele(tproc);
+
+       return SYSCTL_OUT(req, pgcount, sizeof(pgcount));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, coalition_page_count, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_coalition_get_page_count, "Q", "coalition page count of a specified process");
+
+
+static int sysctl_coalition_get_pid_list SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error, type, sort_order, pid;
+       int value[3];
+       int has_pid = 1;
+
+       coalition_t coal = COALITION_NULL;
+       proc_t tproc = PROC_NULL;
+       int npids = 0;
+       int pidlist[100] = { 0, };
+
+
+       error = SYSCTL_IN(req, &value, sizeof(value));
+       if (error) {
+               has_pid = 0;
+               error = SYSCTL_IN(req, &value, sizeof(value) - sizeof(value[0]));
+       }
+       if (error)
+               return error;
+       if (!req->newptr) {
+               type = COALITION_TYPE_RESOURCE;
+               sort_order = COALITION_SORT_DEFAULT;
+               pid = req->p->p_pid;
+       } else {
+               type = value[0];
+               sort_order = value[1];
+               if (has_pid)
+                       pid = value[2];
+               else
+                       pid = req->p->p_pid;
+       }
+
+       if (type < 0 || type >= COALITION_NUM_TYPES)
+               return EINVAL;
+
+       coal_dbg("getting constituent PIDS for coalition of type %d "
+                "containing pid:%d (sort:%d)", type, pid, sort_order);
+       tproc = proc_find(pid);
+       if (tproc == NULL) {
+               coal_dbg("ERROR: Couldn't find pid:%d", pid);
+               return ESRCH;
+       }
+
+       (void)coalition_is_leader(tproc->task, type, &coal);
+       if (coal == COALITION_NULL) {
+               goto out;
+       }
+
+       npids = coalition_get_pid_list(coal, COALITION_ROLEMASK_ALLROLES, sort_order,
+                                      pidlist, sizeof(pidlist) / sizeof(pidlist[0]));
+       if (npids > (int)(sizeof(pidlist) / sizeof(pidlist[0]))) {
+               coal_dbg("Too many members in coalition %llu (from pid:%d): %d!",
+                        coalition_id(coal), pid, npids);
+               npids = sizeof(pidlist) / sizeof(pidlist[0]);
+       }
+
+out:
+       proc_rele(tproc);
+
+       if (npids == 0)
+               return ENOENT;
+
+       return SYSCTL_OUT(req, pidlist, sizeof(pidlist[0]) * npids);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, coalition_pid_list, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_coalition_get_pid_list, "I", "list of PIDS which are members of the coalition of the current process");
+
+#if DEVELOPMENT
+static int sysctl_coalition_notify SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error, should_set;
+       coalition_t coal;
+       uint64_t value[2];
+
+       should_set = 1;
+       error = SYSCTL_IN(req, value, sizeof(value));
+       if (error) {
+               error = SYSCTL_IN(req, value, sizeof(value) - sizeof(value[0]));
+               if (error)
+                       return error;
+               should_set = 0;
+       }
+       if (!req->newptr)
+               return error;
+
+       coal = coalition_find_by_id(value[0]);
+       if (coal == COALITION_NULL) {
+               coal_dbg("Can't find coalition with ID:%lld", value[0]);
+               return ESRCH;
+       }
+
+       if (should_set)
+               coalition_set_notify(coal, (int)value[1]);
+
+       value[0] = (uint64_t)coalition_should_notify(coal);
+
+       coalition_release(coal);
+
+       return SYSCTL_OUT(req, value, sizeof(value[0]));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, coalition_notify, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_coalition_notify, "Q", "get/set coalition notification flag");
+
+extern int unrestrict_coalition_syscalls;
+SYSCTL_INT(_kern, OID_AUTO, unrestrict_coalitions,
+          CTLFLAG_RW, &unrestrict_coalition_syscalls, 0,
+          "unrestrict the coalition interface");
+
+#endif /* DEVELOPMENT */
+
+#endif /* DEVELOPMENT || DEBUG */
index 1247ff35582114d500385bcbfd83b7bd4d05e9b5..d6c46f58db4658d7f9e9a759e44085c3e059948c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <kern/ledger.h>
 #include <kern/task.h>
 #include <kern/telemetry.h>
+#include <kern/waitq.h>
+#include <kern/sched_prim.h>
 
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_debug.h>
 /* for wait queue based select */
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/kalloc.h>
 #include <sys/vnode_internal.h>
 
@@ -144,6 +146,7 @@ void evpipefree(struct pipe *);
 void postpipeevent(struct pipe *, int);
 void postevent(struct socket *, struct sockbuf *, int);
 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
+extern void delay(int);
 
 int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
 int wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval);
@@ -159,16 +162,16 @@ __private_extern__ void   donefileread(struct proc *p, struct fileproc *fp_ret, in
 
 
 /* Conflict wait queue for when selects collide (opaque type) */
-struct wait_queue select_conflict_queue;
+struct waitq select_conflict_queue;
 
 /*
  * Init routine called from bsd_init.c
  */
-void select_wait_queue_init(void);
+void select_waitq_init(void);
 void
-select_wait_queue_init(void)
+select_waitq_init(void)
 {
-       wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO);
+       waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO | SYNC_POLICY_DISABLE_IRQ);
 }
 
 #define f_flag f_fglob->fg_flag
@@ -933,7 +936,7 @@ int selwait, nselcoll;
 extern int selcontinue(int error);
 extern int selprocess(int error, int sel_pass);
 static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata,
-                       int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
+                       int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset);
 static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
 static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
@@ -957,13 +960,14 @@ int
 select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
 {
        int error = 0;
-       u_int ni, nw, size;
+       u_int ni, nw;
        thread_t th_act;
        struct uthread  *uth;
        struct _select *sel;
        struct _select_data *seldata;
        int needzerofill = 1;
        int count = 0;
+       size_t sz = 0;
 
        th_act = current_thread();
        uth = get_bsdthread_info(th_act);
@@ -973,6 +977,8 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
 
        seldata->args = uap;
        seldata->retval = retval;
+       seldata->wqp = NULL;
+       seldata->count = 0;
 
        if (uap->nd < 0) {
                return (EINVAL);
@@ -1074,28 +1080,57 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
                        goto continuation;
        }
 
+       /*
+        * We need an array of waitq pointers. This is due to the new way
+        * in which waitqs are linked to sets. When a thread selects on a
+        * file descriptor, a waitq (embedded in a selinfo structure) is
+        * added to the thread's local waitq set. There is no longer any
+        * way to directly iterate over all members of a given waitq set.
+        * The process of linking a waitq into a set may allocate a link
+        * table object. Because we can't iterate over all the waitqs to
+        * which our thread waitq set belongs, we need a way of removing
+        * this link object!
+        *
+        * Thus we need a buffer which will hold one waitq pointer
+        * per FD being selected. During the tear-down phase we can use
+        * these pointers to dis-associate the underlying selinfo's waitq
+        * from our thread's waitq set.
+        *
+        * Because we also need to allocate a waitq set for this thread,
+        * we use a bare buffer pointer to hold all the memory. Note that
+        * this memory is cached in the thread pointer and not reaped until
+        * the thread exists. This is generally OK because threads that
+        * call select tend to keep calling select repeatedly.
+        */
+       sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t));
+       if (sz > uth->uu_wqstate_sz) {
+               /* (re)allocate a buffer to hold waitq pointers */
+               if (uth->uu_wqset) {
+                       if (waitq_set_is_valid(uth->uu_wqset))
+                               waitq_set_deinit(uth->uu_wqset);
+                       FREE(uth->uu_wqset, M_SELECT);
+               } else if (uth->uu_wqstate_sz && !uth->uu_wqset)
+                       panic("select: thread structure corrupt! "
+                             "uu_wqstate_sz:%ld, wqstate_buf == NULL",
+                             uth->uu_wqstate_sz);
+               uth->uu_wqstate_sz = sz;
+               MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK);
+               if (!uth->uu_wqset)
+                       panic("can't allocate %ld bytes for wqstate buffer",
+                             uth->uu_wqstate_sz);
+               waitq_set_init(uth->uu_wqset,
+                              SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ, NULL);
+       }
+
+       if (!waitq_set_is_valid(uth->uu_wqset))
+               waitq_set_init(uth->uu_wqset,
+                              SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ, NULL);
+
+       /* the last chunk of our buffer is an array of waitq pointers */
+       seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set)));
+       bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set)));
+
        seldata->count = count;
-       size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
-       if (uth->uu_allocsize) {
-               if (uth->uu_wqset == 0)
-                       panic("select: wql memory smashed");
-               /* needed for the select now */
-               if (size > uth->uu_allocsize) {
-                       kfree(uth->uu_wqset,  uth->uu_allocsize);
-                       uth->uu_allocsize = size;
-                       uth->uu_wqset = (wait_queue_set_t)kalloc(size);
-                       if (uth->uu_wqset == (wait_queue_set_t)NULL)
-                               panic("failed to allocate memory for waitqueue\n");
-               }
-       } else {
-               uth->uu_allocsize = size;
-               uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
-               if (uth->uu_wqset == (wait_queue_set_t)NULL)
-                       panic("failed to allocate memory for waitqueue\n");
-       }
-       bzero(uth->uu_wqset, size);
-       seldata->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
-       wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
 
 continuation:
 
@@ -1152,40 +1187,31 @@ selprocess(int error, int sel_pass)
        retval = seldata->retval;
 
        if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
-                       unwind = 0;
+               unwind = 0;
        if (seldata->count == 0)
-                       unwind = 0;
+               unwind = 0;
 retry:
-       if (error != 0) {
-               sel_pass = SEL_FIRSTPASS;       /* Reset for seldrop */
+       if (error != 0)
                goto done;
-       }
 
        ncoll = nselcoll;
        OSBitOrAtomic(P_SELECT, &p->p_flag);
+
        /* skip scans if the select is just for timeouts */
        if (seldata->count) {
-               /*
-                * Clear out any dangling refs from prior calls; technically
-                * there should not be any.
-                */
-               if (sel_pass == SEL_FIRSTPASS)
-                       wait_queue_sub_clearrefs(uth->uu_wqset);
-
-               error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
+               error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset);
                if (error || *retval) {
                        goto done;
                }
-               if (prepost) {
-                       /* if the select of log, then we canwakeup and discover some one
-                       * else already read the data; go toselct again if time permits
-                       */
-                       prepost = 0;
-                       doretry = 1;
-               }
-               if (somewakeup) {
-                       somewakeup = 0;
-                       doretry = 1;
+               if (prepost || somewakeup) {
+                       /*
+                        * if the select of log, then we can wakeup and
+                        * discover some one else already read the data;
+                        * go to select again if time permits
+                        */
+                       prepost = 0;
+                       somewakeup = 0;
+                       doretry = 1;
                }
        }
 
@@ -1221,13 +1247,15 @@ retry:
        OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
 
        /* if the select is just for timeout skip check */
-       if (seldata->count &&(sel_pass == SEL_SECONDPASS))
+       if (seldata->count && (sel_pass == SEL_SECONDPASS))
                panic("selprocess: 2nd pass assertwaiting");
 
-       /* Wait Queue Subordinate has waitqueue as first element */
-       wait_result = wait_queue_assert_wait_with_leeway((wait_queue_t)uth->uu_wqset,
-                                            NULL, THREAD_ABORTSAFE,
-                                            TIMEOUT_URGENCY_USER_NORMAL, seldata->abstime, 0);
+       /* waitq_set has waitqueue as first element */
+       wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset,
+                                                NO_EVENT64, THREAD_ABORTSAFE,
+                                                TIMEOUT_URGENCY_USER_NORMAL,
+                                                seldata->abstime,
+                                                TIMEOUT_NO_LEEWAY);
        if (wait_result != THREAD_AWAKENED) {
                /* there are no preposted events */
                error = tsleep1(NULL, PSOCK | PCATCH,
@@ -1245,8 +1273,14 @@ retry:
        }
 done:
        if (unwind) {
-               wait_subqueue_unlink_all(uth->uu_wqset);
                seldrop(p, sel->ibits, uap->nd);
+               waitq_set_deinit(uth->uu_wqset);
+               /*
+                * zero out the waitq pointer array to avoid use-after free
+                * errors in the selcount error path (seldrop_locked) if/when
+                * the thread re-calls select().
+                */
+               bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz);
        }
        OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
        /* select is not restarted after signals... */
@@ -1276,6 +1310,119 @@ done:
 }
 
 
+/**
+ * remove the fileproc's underlying waitq from the supplied waitq set;
+ * clear FP_INSELECT when appropriate
+ *
+ * Parameters:
+ *             fp      File proc that is potentially currently in select
+ *             wqset   Waitq set to which the fileproc may belong
+ *                     (usually this is the thread's private waitq set)
+ * Conditions:
+ *             proc_fdlock is held
+ */
+static void selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset)
+{
+       int valid_set = waitq_set_is_valid(wqset);
+       int valid_q = !!wqp_id;
+
+       /*
+        * This could be called (from selcount error path) before we setup
+        * the thread's wqset. Check the wqset passed in, and only unlink if
+        * the set is valid.
+        */
+
+       /* unlink the underlying waitq from the input set (thread waitq set) */
+       if (valid_q && valid_set)
+               waitq_unlink_by_prepost_id(wqp_id, wqset);
+
+       /* allow passing a NULL/invalid fp for seldrop unwind */
+       if (!fp || !(fp->f_flags & (FP_INSELECT|FP_SELCONFLICT)))
+               return;
+
+       /*
+        * We can always remove the conflict queue from our thread's set: this
+        * will not affect other threads that potentially need to be awoken on
+        * the conflict queue during a fileproc_drain - those sets will still
+        * be linked with the global conflict queue, and the last waiter
+        * on the fp clears the CONFLICT marker.
+        */
+       if (valid_set && (fp->f_flags & FP_SELCONFLICT))
+               waitq_unlink(&select_conflict_queue, wqset);
+
+       /* jca: TODO:
+        * This isn't quite right - we don't actually know if this
+        * fileproc is in another select or not! Here we just assume
+        * that if we were the first thread to select on the FD, then
+        * we'll be the one to clear this flag...
+        */
+       if (valid_set && fp->f_wset == (void *)wqset) {
+               fp->f_flags &= ~FP_INSELECT;
+               fp->f_wset = NULL;
+       }
+}
+
+/**
+ * connect a fileproc to the given wqset, potentially bridging to a waitq
+ * pointed to indirectly by wq_data
+ *
+ * Parameters:
+ *             fp      File proc potentially currently in select
+ *             wq_data Pointer to a pointer to a waitq (could be NULL)
+ *             wqset   Waitq set to which the fileproc should now belong
+ *                     (usually this is the thread's private waitq set)
+ *
+ * Conditions:
+ *             proc_fdlock is held
+ */
+static uint64_t sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset)
+{
+       struct waitq *f_wq = NULL;
+
+       if ((fp->f_flags & FP_INSELECT) != FP_INSELECT) {
+               if (wq_data)
+                       panic("non-null data:%p on fp:%p not in select?!"
+                             "(wqset:%p)", wq_data, fp, wqset);
+               return 0;
+       }
+
+       if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
+               /*
+                * The conflict queue requires disabling interrupts, so we
+                * need to explicitly reserve a link object to avoid a
+                * panic/assert in the waitq code. Hopefully this extra step
+                * can be avoided if we can split the waitq structure into
+                * blocking and linkage sub-structures.
+                */
+               uint64_t reserved_link = waitq_link_reserve(&select_conflict_queue);
+               waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
+               waitq_link_release(reserved_link);
+       }
+
+       /*
+        * The wq_data parameter has potentially been set by selrecord called
+        * from a subsystems fo_select() function. If the subsystem does not
+        * call selrecord, then wq_data will be NULL
+        *
+        * Use memcpy to get the value into a proper pointer because
+        * wq_data most likely points to a stack variable that could be
+        * unaligned on 32-bit systems.
+        */
+       if (wq_data) {
+               memcpy(&f_wq, wq_data, sizeof(f_wq));
+               if (!waitq_is_valid(f_wq))
+                       f_wq = NULL;
+       }
+
+       /* record the first thread's wqset in the fileproc structure */
+       if (!fp->f_wset)
+               fp->f_wset = (void *)wqset;
+
+       /* handles NULL f_wq */
+       return waitq_get_prepost_id(f_wq);
+}
+
+
 /*
  * selscan
  *
@@ -1285,7 +1432,7 @@ done:
  *             retval                  The per thread system call return area
  *             sel_pass                Which pass this is; allowed values are
  *                                             SEL_FIRSTPASS and SEL_SECONDPASS
- *             wqsub                   The per thread wait queue set
+ *             wqset                   The per thread wait queue set
  *
  * Returns:    0                       Success
  *             EIO                     Invalid p->p_fd field XXX Obsolete?
@@ -1293,8 +1440,8 @@ done:
  *                                             invalid.
  */
 static int
-selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int nfd, int32_t *retval,
-       int sel_pass, wait_queue_sub_t wqsub)
+selscan(struct proc *p, struct _select *sel, struct _select_data * seldata,
+       int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset)
 {
        struct filedesc *fdp = p->p_fd;
        int msk, i, j, fd;
@@ -1306,8 +1453,7 @@ selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int
        u_int32_t *iptr, *optr;
        u_int nw;
        u_int32_t *ibits, *obits;
-       char * wql;
-       char * wql_ptr;
+       uint64_t reserved_link, *rl_ptr = NULL;
        int count;
        struct vfs_context context = *vfs_context_current();
 
@@ -1321,75 +1467,98 @@ selscan(struct proc *p, struct _select *sel, struct _select_data * seldata, int
        }
        ibits = sel->ibits;
        obits = sel->obits;
-       wql = seldata->wql;
 
        nw = howmany(nfd, NFDBITS);
 
        count = seldata->count;
 
        nc = 0;
-       if (count) {
-               proc_fdlock(p);
-               for (msk = 0; msk < 3; msk++) {
-                       iptr = (u_int32_t *)&ibits[msk * nw];
-                       optr = (u_int32_t *)&obits[msk * nw];
+       if (!count) {
+               *retval = 0;
+               return 0;
+       }
+
+       proc_fdlock(p);
+       for (msk = 0; msk < 3; msk++) {
+               iptr = (u_int32_t *)&ibits[msk * nw];
+               optr = (u_int32_t *)&obits[msk * nw];
+
+               for (i = 0; i < nfd; i += NFDBITS) {
+                       bits = iptr[i/NFDBITS];
 
-                       for (i = 0; i < nfd; i += NFDBITS) {
-                               bits = iptr[i/NFDBITS];
+                       while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
+                               bits &= ~(1 << j);
 
-                               while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
-                                       bits &= ~(1 << j);
+                               if (fd < fdp->fd_nfiles)
+                                       fp = fdp->fd_ofiles[fd];
+                               else
+                                       fp = NULL;
 
-                                       if (fd < fdp->fd_nfiles)
-                                               fp = fdp->fd_ofiles[fd];
+                               if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+                                       /*
+                                        * If we abort because of a bad
+                                        * fd, let the caller unwind...
+                                        */
+                                       proc_fdunlock(p);
+                                       return(EBADF);
+                               }
+                               if (sel_pass == SEL_SECONDPASS) {
+                                       reserved_link = 0;
+                                       rl_ptr = NULL;
+                                       selunlinkfp(fp, seldata->wqp[nc], wqset);
+                               } else {
+                                       reserved_link = waitq_link_reserve((struct waitq *)wqset);
+                                       rl_ptr = &reserved_link;
+                                       if (fp->f_flags & FP_INSELECT)
+                                               /* someone is already in select on this fp */
+                                               fp->f_flags |= FP_SELCONFLICT;
                                        else
-                                               fp = NULL;
-
-                                       if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
-                                               /*
-                                                * If we abort because of a bad
-                                                * fd, let the caller unwind...
-                                                */
-                                               proc_fdunlock(p);
-                                               return(EBADF);
-                                       }
-                                       if (sel_pass == SEL_SECONDPASS) {
-                                               wql_ptr = (char *)0;
-                                               if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) {
-                                                       fp->f_flags &= ~FP_INSELECT;
-                                                       fp->f_waddr = (void *)0;
-                                               }
-                                       } else {
-                                               wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
-                                               if (fp->f_flags & FP_INSELECT) {
-                                                       /* someone is already in select on this fp */
-                                                       fp->f_flags |= FP_SELCONFLICT;
-                                                       wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub);
-                                               } else {
-                                                       fp->f_flags |= FP_INSELECT;
-                                                       fp->f_waddr = (void *)wqsub;
-                                               }
-                                       }
+                                               fp->f_flags |= FP_INSELECT;
+                               }
 
-                                       context.vc_ucred = fp->f_cred;
+                               context.vc_ucred = fp->f_cred;
 
-                                       /* The select; set the bit, if true */
-                                       if (fp->f_ops && fp->f_type
-                                               && fo_select(fp, flag[msk], wql_ptr, &context)) {
-                                               optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
-                                               n++;
-                                       }
-                                       nc++;
+                               /*
+                                * stash this value b/c fo_select may replace
+                                * reserved_link with a pointer to a waitq object
+                                */
+                               uint64_t rsvd = reserved_link;
+
+                               /* The select; set the bit, if true */
+                               if (fp->f_ops && fp->f_type
+                                       && fo_select(fp, flag[msk], rl_ptr, &context)) {
+                                       optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
+                                       n++;
+                               }
+                               if (sel_pass == SEL_FIRSTPASS) {
+                                       waitq_link_release(rsvd);
+                                       /*
+                                        * If the fp's supporting selinfo structure was linked
+                                        * to this thread's waitq set, then 'reserved_link'
+                                        * will have been updated by selrecord to be a pointer
+                                        * to the selinfo's waitq.
+                                        */
+                                       if (reserved_link == rsvd)
+                                               rl_ptr = NULL; /* fo_select never called selrecord() */
+                                       /*
+                                        * Hook up the thread's waitq set either to
+                                        * the fileproc structure, or to the global
+                                        * conflict queue: but only on the first
+                                        * select pass.
+                                        */
+                                       seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset);
                                }
+                               nc++;
                        }
                }
-               proc_fdunlock(p);
        }
+       proc_fdunlock(p);
+
        *retval = n;
        return (0);
 }
 
-int poll_callback(struct kqueue *, struct kevent64_s *, void *);
+int poll_callback(struct kqueue *, struct kevent_internal_s *, void *);
 
 struct poll_continue_args {
        user_addr_t pca_fds;
@@ -1466,7 +1635,6 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
        OSBitOrAtomic(P_SELECT, &p->p_flag);
        for (i = 0; i < nfds; i++) {
                short events = fds[i].events;
-               struct kevent64_s kev;
                int kerror = 0;
 
                /* per spec, ignore fd values below zero */
@@ -1476,13 +1644,10 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
                }
 
                /* convert the poll event into a kqueue kevent */
-               kev.ident = fds[i].fd;
-               kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
-               kev.udata = CAST_USER_ADDR_T(&fds[i]);
-               kev.fflags = 0;
-               kev.data = 0;
-               kev.ext[0] = 0;
-               kev.ext[1] = 0;
+               struct kevent_internal_s kev = {
+                       .ident = fds[i].fd,
+                       .flags = EV_ADD | EV_ONESHOT | EV_POLL,
+                       .udata = CAST_USER_ADDR_T(&fds[i]) };
 
                /* Handle input events */
                if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
@@ -1554,7 +1719,7 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
 }
 
 int
-poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
+poll_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp, void *data)
 {
        struct poll_continue_args *cont = (struct poll_continue_args *)data;
        struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
@@ -1572,10 +1737,9 @@ poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
                if (fds->revents & POLLHUP)
                        mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
                else {
-                       if ((kevp->flags & EV_ERROR) == 0 && kevp->data != 0)
-                               mask = (POLLIN | POLLRDNORM );
+                       mask = (POLLIN | POLLRDNORM);
                        if (kevp->flags & EV_OOBAND)
-                               mask |= ( POLLPRI | POLLRDBAND );
+                               mask |= (POLLPRI | POLLRDBAND);
                }
                fds->revents |= (fds->events & mask);
                break;
@@ -1690,7 +1854,7 @@ selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
 bad:
        dropcount = 0;
        
-       if (n== 0)
+       if (n == 0)
                goto out;
        /* Ignore error return; it's already EBADF */
        (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
@@ -1711,7 +1875,7 @@ out:
  * outstanding per fileproc f_iocount() picked up during the selcount().
  *
  * Parameters: p                       Process performing the select
- *             ibits                   Input pit bector of fd's
+ *             ibits                   Input bit bector of fd's
  *             nfd                     Number of fd's
  *             lim                     Limit to number of vector entries to
  *                                             consider, or -1 for "all"
@@ -1733,7 +1897,7 @@ static int
 seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
 {
        struct filedesc *fdp = p->p_fd;
-       int msk, i, j, fd;
+       int msk, i, j, nc, fd;
        u_int32_t bits;
        struct fileproc *fp;
        u_int32_t *iptr;
@@ -1741,6 +1905,7 @@ seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wak
        int error = 0;
        int dropcount = 0;
        uthread_t uth = get_bsdthread_info(current_thread());
+       struct _select_data *seldata;
 
        *need_wakeup = 0;
 
@@ -1753,7 +1918,9 @@ seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wak
        }
 
        nw = howmany(nfd, NFDBITS);
+       seldata = &uth->uu_kevent.ss_select_data;
 
+       nc = 0;
        for (msk = 0; msk < 3; msk++) {
                iptr = (u_int32_t *)&ibits[msk * nw];
                for (i = 0; i < nfd; i += NFDBITS) {
@@ -1768,20 +1935,22 @@ seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wak
                                if ((fromselcount != 0) && (++dropcount > lim))
                                        goto done;
 
+                               /*
+                                * unlink even potentially NULL fileprocs.
+                                * If the FD was closed from under us, we
+                                * still need to cleanup the waitq links!
+                                */
+                               selunlinkfp(fp,
+                                           seldata->wqp ? seldata->wqp[nc] : 0,
+                                           uth->uu_wqset);
+
+                               nc++;
+
                                if (fp == NULL) {
                                        /* skip (now) bad fds */
                                        error = EBADF;
                                        continue;
                                }
-                               /*
-                                * Only clear the flag if we set it.  We'll
-                                * only find that we set it if we had made
-                                * at least one [partial] pass through selscan().
-                                */
-                               if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) {
-                                       fp->f_flags &= ~FP_INSELECT;
-                                       fp->f_waddr = (void *)0;
-                               }
 
                                fp->f_iocount--;
                                if (fp->f_iocount < 0)
@@ -1828,33 +1997,59 @@ seldrop(struct proc *p, u_int32_t *ibits, int nfd)
  * Record a select request.
  */
 void
-selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
+selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data)
 {
        thread_t        cur_act = current_thread();
        struct uthread * ut = get_bsdthread_info(cur_act);
+       /* on input, s_data points to the 64-bit ID of a reserved link object */
+       uint64_t *reserved_link = (uint64_t *)s_data;
 
        /* need to look at collisions */
 
        /*do not record if this is second pass of select */
-       if(p_wql == (void *)0) {
+       if (!s_data)
                return;
-       }
 
        if ((sip->si_flags & SI_INITED) == 0) {
-               wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
+               waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO | SYNC_POLICY_DISABLE_IRQ);
                sip->si_flags |= SI_INITED;
                sip->si_flags &= ~SI_CLEAR;
        }
 
-       if (sip->si_flags & SI_RECORDED) {
+       if (sip->si_flags & SI_RECORDED)
                sip->si_flags |= SI_COLL;
-       else
+       else
                sip->si_flags &= ~SI_COLL;
 
        sip->si_flags |= SI_RECORDED;
-       if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
-               wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
-                                       (wait_queue_link_t)p_wql);
+       /* note: this checks for pre-existing linkage */
+       waitq_link(&sip->si_waitq, ut->uu_wqset,
+                  WAITQ_SHOULD_LOCK, reserved_link);
+
+       /*
+        * Always consume the reserved link.
+        * We can always call waitq_link_release() safely because if
+        * waitq_link is successful, it consumes the link and resets the
+        * value to 0, in which case our call to release becomes a no-op.
+        * If waitq_link fails, then the following release call will actually
+        * release the reserved link object.
+        */
+       waitq_link_release(*reserved_link);
+       *reserved_link = 0;
+
+       /*
+        * Use the s_data pointer as an output parameter as well
+        * This avoids changing the prototype for this function which is
+        * used by many kexts. We need to surface the waitq object
+        * associated with the selinfo we just added to the thread's select
+        * set. New waitq sets do not have back-pointers to set members, so
+        * the only way to clear out set linkage objects is to go from the
+        * waitq to the set. We use a memcpy because s_data could be
+        * pointing to an unaligned value on the stack
+        * (especially on 32-bit systems)
+        */
+       void *wqptr = (void *)&sip->si_waitq;
+       memcpy((void *)s_data, (void *)&wqptr, sizeof(void *));
 
        return;
 }
@@ -1877,7 +2072,8 @@ selwakeup(struct selinfo *sip)
        }
 
        if (sip->si_flags & SI_RECORDED) {
-               wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED);
+               waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64,
+                                  THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
                sip->si_flags &= ~SI_RECORDED;
        }
 
@@ -1886,6 +2082,7 @@ selwakeup(struct selinfo *sip)
 void 
 selthreadclear(struct selinfo *sip)
 {
+       struct waitq *wq;
 
        if ((sip->si_flags & SI_INITED) == 0) {
                return;
@@ -1895,7 +2092,18 @@ selthreadclear(struct selinfo *sip)
                        sip->si_flags &= ~(SI_RECORDED | SI_COLL);
        }
        sip->si_flags |= SI_CLEAR;
-       wait_queue_unlink_all(&sip->si_wait_queue);
+       sip->si_flags &= ~SI_INITED;
+
+       wq = &sip->si_waitq;
+
+       /*
+        * Higher level logic may have a handle on this waitq's prepost ID,
+        * but that's OK because the waitq_deinit will remove/invalidate the
+        * prepost object (as well as mark the waitq invalid). This de-couples
+        * us from any callers that may have a handle to this waitq via the
+        * prepost ID.
+        */
+       waitq_deinit(wq);
 }
 
 
@@ -2967,3 +3175,385 @@ telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t
 
        return (error);
 }
+
+#if defined(DEVELOPMENT) || defined(DEBUG)
+#if CONFIG_WAITQ_DEBUG
+static uint64_t g_wqset_num = 0;
+struct g_wqset {
+       queue_chain_t      link;
+       struct waitq_set  *wqset;
+};
+
+static queue_head_t         g_wqset_list;
+static struct waitq_set    *g_waitq_set = NULL;
+
+static inline struct waitq_set *sysctl_get_wqset(int idx)
+{
+       struct g_wqset *gwqs;
+
+       if (!g_wqset_num)
+               queue_init(&g_wqset_list);
+
+       /* don't bother with locks: this is test-only code! */
+       qe_foreach_element(gwqs, &g_wqset_list, link) {
+               if ((int)(wqset_id(gwqs->wqset) & 0xffffffff) == idx)
+                       return gwqs->wqset;
+       }
+
+       /* allocate a new one */
+       ++g_wqset_num;
+       gwqs = (struct g_wqset *)kalloc(sizeof(*gwqs));
+       assert(gwqs != NULL);
+
+       gwqs->wqset = waitq_set_alloc(SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ);
+       enqueue_tail(&g_wqset_list, &gwqs->link);
+       printf("[WQ]: created new waitq set 0x%llx\n", wqset_id(gwqs->wqset));
+
+       return gwqs->wqset;
+}
+
+#define MAX_GLOBAL_TEST_QUEUES 64
+static int g_wq_init = 0;
+static struct waitq  g_wq[MAX_GLOBAL_TEST_QUEUES];
+
+static inline struct waitq *global_test_waitq(int idx)
+{
+       if (idx < 0)
+               return NULL;
+
+       if (!g_wq_init) {
+               g_wq_init = 1;
+               for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++)
+                       waitq_init(&g_wq[i], SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
+       }
+
+       return &g_wq[idx % MAX_GLOBAL_TEST_QUEUES];
+}
+
+static int sysctl_waitq_wakeup_one SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       int index;
+       struct waitq *waitq;
+       kern_return_t kr;
+       int64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       if (event64 < 0) {
+               index = (int)((-event64) & 0xffffffff);
+               waitq = wqset_waitq(sysctl_get_wqset(index));
+               index = -index;
+       } else {
+               index = (int)event64;
+               waitq = global_test_waitq(index);
+       }
+
+       event64 = 0;
+
+       printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
+              index, event64);
+       kr = waitq_wakeup64_one(waitq, (event64_t)event64, THREAD_AWAKENED,
+                               WAITQ_ALL_PRIORITIES);
+       printf("[WQ]: \tkr=%d\n", kr);
+
+       return SYSCTL_OUT(req, &kr, sizeof(kr));
+}
+SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_one, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_waitq_wakeup_one, "Q", "wakeup one thread waiting on given event");
+
+
+static int sysctl_waitq_wakeup_all SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       int index;
+       struct waitq *waitq;
+       kern_return_t kr;
+       int64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       if (event64 < 0) {
+               index = (int)((-event64) & 0xffffffff);
+               waitq = wqset_waitq(sysctl_get_wqset(index));
+               index = -index;
+       } else {
+               index = (int)event64;
+               waitq = global_test_waitq(index);
+       }
+
+       event64 = 0;
+
+       printf("[WQ]: Waking all threads on waitq [%d] event:0x%llx\n",
+              index, event64);
+       kr = waitq_wakeup64_all(waitq, (event64_t)event64,
+                               THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
+       printf("[WQ]: \tkr=%d\n", kr);
+
+       return SYSCTL_OUT(req, &kr, sizeof(kr));
+}
+SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_waitq_wakeup_all, "Q", "wakeup all threads waiting on given event");
+
+
+static int sysctl_waitq_wait SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       int index;
+       struct waitq *waitq;
+       kern_return_t kr;
+       int64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       if (event64 < 0) {
+               index = (int)((-event64) & 0xffffffff);
+               waitq = wqset_waitq(sysctl_get_wqset(index));
+               index = -index;
+       } else {
+               index = (int)event64;
+               waitq = global_test_waitq(index);
+       }
+
+       event64 = 0;
+
+       printf("[WQ]: Current thread waiting on waitq [%d] event:0x%llx\n",
+              index, event64);
+       kr = waitq_assert_wait64(waitq, (event64_t)event64, THREAD_INTERRUPTIBLE, 0);
+       if (kr == THREAD_WAITING)
+               thread_block(THREAD_CONTINUE_NULL);
+       printf("[WQ]: \tWoke Up: kr=%d\n", kr);
+
+       return SYSCTL_OUT(req, &kr, sizeof(kr));
+}
+SYSCTL_PROC(_kern, OID_AUTO, waitq_wait, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_waitq_wait, "Q", "start waiting on given event");
+
+
+static int sysctl_wqset_select SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       struct waitq_set *wqset;
+       uint64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               goto out;
+
+       wqset = sysctl_get_wqset((int)(event64 & 0xffffffff));
+       g_waitq_set = wqset;
+
+       event64 = wqset_id(wqset);
+       printf("[WQ]: selected wqset 0x%llx\n", event64);
+
+out:
+       if (g_waitq_set)
+               event64 = wqset_id(g_waitq_set);
+       else
+               event64 = (uint64_t)(-1);
+
+       return SYSCTL_OUT(req, &event64, sizeof(event64));
+}
+SYSCTL_PROC(_kern, OID_AUTO, wqset_select, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_wqset_select, "Q", "select/create a global waitq set");
+
+
+static int sysctl_waitq_link SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       int index;
+       struct waitq *waitq;
+       struct waitq_set *wqset;
+       kern_return_t kr;
+       uint64_t reserved_link = 0;
+       int64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       if (!g_waitq_set)
+               g_waitq_set = sysctl_get_wqset(1);
+       wqset = g_waitq_set;
+
+       if (event64 < 0) {
+               struct waitq_set *tmp;
+               index = (int)((-event64) & 0xffffffff);
+               tmp = sysctl_get_wqset(index);
+               if (tmp == wqset)
+                       goto out;
+               waitq = wqset_waitq(tmp);
+               index = -index;
+       } else {
+               index = (int)event64;
+               waitq = global_test_waitq(index);
+       }
+
+       printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n",
+              index, wqset_id(wqset));
+       reserved_link = waitq_link_reserve(waitq);
+       kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
+       waitq_link_release(reserved_link);
+
+       printf("[WQ]: \tkr=%d\n", kr);
+
+out:
+       return SYSCTL_OUT(req, &kr, sizeof(kr));
+}
+SYSCTL_PROC(_kern, OID_AUTO, waitq_link, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_waitq_link, "Q", "link global waitq to test waitq set");
+
+
+static int sysctl_waitq_unlink SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       int index;
+       struct waitq *waitq;
+       struct waitq_set *wqset;
+       kern_return_t kr;
+       uint64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       if (!g_waitq_set)
+               g_waitq_set = sysctl_get_wqset(1);
+       wqset = g_waitq_set;
+
+       index = (int)event64;
+       waitq = global_test_waitq(index);
+
+       printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n",
+              index, wqset_id(wqset));
+
+       kr = waitq_unlink(waitq, wqset);
+       printf("[WQ]: \tkr=%d\n", kr);
+
+       return SYSCTL_OUT(req, &kr, sizeof(kr));
+}
+SYSCTL_PROC(_kern, OID_AUTO, waitq_unlink, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_waitq_unlink, "Q", "unlink global waitq from test waitq set");
+
+
+static int sysctl_waitq_clear_prepost SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       struct waitq *waitq;
+       uint64_t event64 = 0;
+       int error, index;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       index = (int)event64;
+       waitq = global_test_waitq(index);
+
+       printf("[WQ]: clearing prepost on waitq [%d]\n", index);
+       waitq_clear_prepost(waitq);
+
+       return SYSCTL_OUT(req, &event64, sizeof(event64));
+}
+SYSCTL_PROC(_kern, OID_AUTO, waitq_clear_prepost, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_waitq_clear_prepost, "Q", "clear prepost on given waitq");
+
+
+static int sysctl_wqset_unlink_all SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int error;
+       struct waitq_set *wqset;
+       kern_return_t kr;
+       uint64_t event64 = 0;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               return SYSCTL_OUT(req, &event64, sizeof(event64));
+
+       if (!g_waitq_set)
+               g_waitq_set = sysctl_get_wqset(1);
+       wqset = g_waitq_set;
+
+       printf("[WQ]: unlinking all queues from global wqset (0x%llx)\n",
+              wqset_id(wqset));
+
+       kr = waitq_set_unlink_all(wqset);
+       printf("[WQ]: \tkr=%d\n", kr);
+
+       return SYSCTL_OUT(req, &kr, sizeof(kr));
+}
+SYSCTL_PROC(_kern, OID_AUTO, wqset_unlink_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_wqset_unlink_all, "Q", "unlink all queues from test waitq set");
+
+
+static int sysctl_wqset_clear_preposts SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       struct waitq_set *wqset = NULL;
+       uint64_t event64 = 0;
+       int error, index;
+
+       error = SYSCTL_IN(req, &event64, sizeof(event64));
+       if (error)
+               return error;
+
+       if (!req->newptr)
+               goto out;
+
+       index = (int)((event64) & 0xffffffff);
+       wqset = sysctl_get_wqset(index);
+       assert(wqset != NULL);
+
+       printf("[WQ]: clearing preposts on wqset 0x%llx\n", wqset_id(wqset));
+       waitq_set_clear_preposts(wqset);
+
+out:
+       if (wqset)
+               event64 = wqset_id(wqset);
+       else
+               event64 = (uint64_t)(-1);
+
+       return SYSCTL_OUT(req, &event64, sizeof(event64));
+}
+SYSCTL_PROC(_kern, OID_AUTO, wqset_clear_preposts, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+           0, 0, sysctl_wqset_clear_preposts, "Q", "clear preposts on given waitq set");
+
+#endif /* CONFIG_WAITQ_DEBUG */
+#endif /* defined(DEVELOPMENT) || defined(DEBUG) */
index 374d82381f46dc38b56986ac68bf6a043e894dec..1e64ce7378334adaff46a973580a962064ff1cea 100644 (file)
@@ -317,7 +317,7 @@ pipe_touch(struct pipe *tpipe, int touch)
        }
 }
 
-static const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 };
+static const unsigned int pipesize_blocks[] = {512,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 };
 
 /* 
  * finds the right size from possible sizes in pipesize_blocks 
@@ -329,6 +329,12 @@ choose_pipespace(unsigned long current, unsigned long expected)
        int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1;
        unsigned long target;
 
+       /*
+        * assert that we always get an atomic transaction sized pipe buffer,
+        * even if the system pipe buffer high-water mark has been crossed.
+        */
+       assert(PIPE_BUF == pipesize_blocks[0]);
+
        if (expected > current) 
                target = expected;
        else
diff --git a/bsd/kern/sys_work_interval.c b/bsd/kern/sys_work_interval.c
new file mode 100644 (file)
index 0000000..45b36c7
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/kernel_types.h>
+#include <sys/sysproto.h>
+#include <sys/priv.h>
+#include <sys/work_interval.h>
+#include <kern/sched_prim.h>
+#include <kern/thread.h>
+#include <libkern/libkern.h>
+
+int
+work_interval_ctl(__unused proc_t p, struct work_interval_ctl_args *uap, __unused int32_t *retval)
+{
+       uint32_t        operation = uap->operation;
+       int                     error = 0;
+       kern_return_t   kret = KERN_SUCCESS;
+       uint64_t        work_interval_id;
+       struct work_interval_notification       notification;
+
+       switch (operation) {
+               case WORK_INTERVAL_OPERATION_CREATE:
+                       if (uap->arg == USER_ADDR_NULL || uap->work_interval_id != 0) {
+                               return EINVAL;
+                       }
+                       if (uap->len < sizeof(work_interval_id)) {
+                               return ERANGE;
+                       }
+
+                       /*
+                        * Privilege check performed up-front, and then the work
+                        * ID is allocated for use by the thread
+                        */
+                       error = priv_check_cred(kauth_cred_get(), PRIV_WORK_INTERVAL, 0);
+                       if (error) {
+                               return (error);
+                       }
+
+                       kret = thread_policy_create_work_interval(current_thread(),
+                                                                                                         &work_interval_id);
+                       if (kret == KERN_SUCCESS) {
+                               error = copyout(&work_interval_id, uap->arg, sizeof(work_interval_id));
+                       } else {
+                               error = EINVAL;
+                       }
+
+                       break;
+               case WORK_INTERVAL_OPERATION_DESTROY:
+                       if (uap->arg != USER_ADDR_NULL || uap->work_interval_id == 0) {
+                               return EINVAL;
+                       }
+
+                       /*
+                        * No privilege check, we assume a previous WORK_INTERVAL_OPERATION_CREATE
+                        * operation would have allocated a work interval ID for the current
+                        * thread, which the scheduler will validate.
+                        */
+                       kret = thread_policy_destroy_work_interval(current_thread(),
+                                                                                                          uap->work_interval_id);
+                       if (kret != KERN_SUCCESS) {
+                               error = EINVAL;
+                       }
+
+                       break;
+               case WORK_INTERVAL_OPERATION_NOTIFY:
+                       if (uap->arg == USER_ADDR_NULL || uap->work_interval_id == 0) {
+                               return EINVAL;
+                       }
+                       if (uap->len < sizeof(notification)) {
+                               return EINVAL;
+                       }
+
+                       /*
+                        * No privilege check, we assume a previous WORK_INTERVAL_OPERATION_CREATE
+                        * operation would have allocated a work interval ID for the current
+                        * thread, which the scheduler will validate.
+                        */
+                       error = copyin(uap->arg, &notification, sizeof(notification));
+                       if (error) {
+                               break;
+                       }
+
+                       kret = sched_work_interval_notify(current_thread(),
+                                                                                         uap->work_interval_id,
+                                                                                         notification.start,
+                                                                                         notification.finish,
+                                                                                         notification.deadline,
+                                                                                         notification.next_start,
+                                                                                         notification.flags);
+                       if (kret != KERN_SUCCESS) {
+                               error = EINVAL;
+                               break;
+                       }
+
+                       break;
+               default:
+                       error = ENOTSUP;
+                       break;
+       }
+
+       return (error);
+}
index 624fd33dcb3d136df2e66bab4a6c3536d5a5a4d2..5f58f92639543426325b13207538af28739e6950 100644 (file)
 175    AUE_NULL        ALL     { int nosys(void); }   { old gc_control }
 176    AUE_NULL        ALL     { int nosys(void); }   { old add_profil }
 177    AUE_NULL        ALL     { int nosys(void); } 
-178    AUE_NULL        ALL     { int nosys(void); } 
+178    AUE_KDEBUGTRACE ALL     { uint64_t kdebug_trace_string(uint32_t debugid, uint64_t str_id, const char *str) NO_SYSCALL_STUB; }
 179    AUE_KDEBUGTRACE ALL     { int kdebug_trace64(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) NO_SYSCALL_STUB; } 
 180    AUE_KDEBUGTRACE ALL     { int kdebug_trace(uint32_t code, u_long arg1, u_long arg2, u_long arg3, u_long arg4) NO_SYSCALL_STUB; } 
 181    AUE_SETGID      ALL     { int setgid(gid_t gid); } 
 #endif
 372    AUE_NULL        ALL     { uint64_t thread_selfid (void) NO_SYSCALL_STUB; } 
 373    AUE_LEDGER      ALL     { int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3); } 
-374    AUE_NULL        ALL     { int nosys(void); } 
+374    AUE_NULL        ALL     { int kevent_qos(int fd, const struct kevent_qos_s *changelist, int nchanges, struct kevent_qos_s *eventlist, int nevents, void *data_out, size_t *data_available, unsigned int flags); } 
 375    AUE_NULL        ALL     { int nosys(void); } 
 376    AUE_NULL        ALL     { int nosys(void); } 
 377    AUE_NULL        ALL     { int nosys(void); } 
 388    AUE_MAC_GET_FD  ALL     { int __mac_get_fd(int fd, struct mac *mac_p); } 
 389    AUE_MAC_SET_FD  ALL     { int __mac_set_fd(int fd, struct mac *mac_p); } 
 390    AUE_MAC_GET_PID ALL     { int __mac_get_pid(pid_t pid, struct mac *mac_p); } 
-391    AUE_MAC_GET_LCID        ALL     { int __mac_get_lcid(pid_t lcid, struct mac *mac_p); }
-392    AUE_MAC_GET_LCTX        ALL     { int __mac_get_lctx(struct mac *mac_p); }
-393    AUE_MAC_SET_LCTX        ALL     { int __mac_set_lctx(struct mac *mac_p); }
 #else
 381     AUE_MAC_SYSCALL ALL     { int enosys(void); }
 382     AUE_MAC_GET_FILE        ALL     { int nosys(void); }
 388     AUE_MAC_GET_FD  ALL     { int nosys(void); }
 389     AUE_MAC_SET_FD  ALL     { int nosys(void); }
 390     AUE_MAC_GET_PID ALL     { int nosys(void); }
-391     AUE_MAC_GET_LCID        ALL     { int nosys(void); }
-392     AUE_MAC_GET_LCTX        ALL     { int nosys(void); }
-393     AUE_MAC_SET_LCTX        ALL     { int nosys(void); }
 #endif
-394    AUE_SETLCID     ALL     { int setlcid(pid_t pid, pid_t lcid) NO_SYSCALL_STUB; }
-395    AUE_GETLCID     ALL     { int getlcid(pid_t pid) NO_SYSCALL_STUB; }
+391    AUE_NULL        ALL     { int enosys(void); }
+392    AUE_NULL        ALL     { int enosys(void); }
+393    AUE_NULL        ALL     { int enosys(void); }
+394    AUE_NULL        ALL     { int enosys(void); }
+395    AUE_NULL        ALL     { int enosys(void); }
 396    AUE_NULL        ALL     { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } 
 397    AUE_NULL        ALL     { user_ssize_t write_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } 
 398    AUE_OPEN_RWTC   ALL     { int open_nocancel(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } 
 445    AUE_NULL        ALL     { int nosys(void); } { old __proc_suppress } 
 446    AUE_NULL        ALL     { int proc_rlimit_control(pid_t pid, int flavor, void *arg); }
 #if SOCKETS
-447    AUE_CONNECT     ALL     { int connectx(int s, struct sockaddr *src, socklen_t srclen, struct sockaddr *dsts, socklen_t dstlen, uint32_t ifscope, associd_t aid, connid_t *cid); } 
-448    AUE_NULL        ALL     { int disconnectx(int s, associd_t aid, connid_t cid); } 
-449    AUE_NULL        ALL     { int peeloff(int s, associd_t aid); } 
+447    AUE_CONNECT     ALL     { int connectx(int socket, const sa_endpoints_t *endpoints, sae_associd_t associd, unsigned int flags, const struct iovec *iov, unsigned int iovcnt, size_t *len, sae_connid_t *connid); }
+448    AUE_NULL        ALL     { int disconnectx(int s, sae_associd_t aid, sae_connid_t cid); }
+449    AUE_NULL        ALL     { int peeloff(int s, sae_associd_t aid); }
 450    AUE_SOCKET      ALL     { int socket_delegate(int domain, int type, int protocol, pid_t epid); } 
 #else
 447    AUE_NULL        ALL     { int nosys(void); }
 484    AUE_NULL        ALL     { int guarded_open_dprotected_np(const char *path, const guardid_t *guard, u_int guardflags, int flags, int dpclass, int dpflags, int mode) NO_SYSCALL_STUB; }
 485    AUE_NULL        ALL     { user_ssize_t guarded_write_np(int fd, const guardid_t *guard, user_addr_t cbuf, user_size_t nbyte); }
 486    AUE_PWRITE      ALL     { user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, user_addr_t buf, user_size_t nbyte, off_t offset); }
-487    AUE_WRITEV      ALL     { user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, u_int iovcnt); }
+487    AUE_WRITEV      ALL     { user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, int iovcnt); }
 #if CONFIG_SECLUDED_RENAME
 488    AUE_RENAME      ALL     { int rename_ext(char *from, char *to, u_int flags) NO_SYSCALL_STUB; }
 #else
 #else
 489    AUE_NULL        ALL     { int enosys(void); }
 #endif
+#if NETWORKING
+490    AUE_NULL        ALL     { int netagent_trigger(uuid_t agent_uuid, size_t agent_uuidlen); }
+#else
+490    AUE_NULL        ALL     { int nosys(void); }
+#endif /* NETWORKING */
+491    AUE_STACKSNAPSHOT ALL   { int stack_snapshot_with_config(int stackshot_config_version, user_addr_t stackshot_config, size_t stackshot_config_size) NO_SYSCALL_STUB; }
+#if CONFIG_TELEMETRY
+492    AUE_STACKSNAPSHOT ALL   { int microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags) NO_SYSCALL_STUB; }
+#else
+492    AUE_NULL        ALL { int enosys(void); }
+#endif /* CONFIG_TELEMETRY */
+493    AUE_NULL        ALL     { user_ssize_t grab_pgo_data (user_addr_t uuid, int flags, user_addr_t buffer, user_ssize_t size); }
+494    AUE_NULL        ALL     { int enosys(void); }
+495    AUE_NULL        ALL     { int enosys(void); }
+496    AUE_NULL        ALL     { int enosys(void); }
+497    AUE_NULL        ALL     { int enosys(void); }
+498    AUE_NULL        ALL     { int enosys(void); }
+499    AUE_NULL        ALL     { int work_interval_ctl(uint32_t operation, uint64_t work_interval_id, void *arg, size_t len) NO_SYSCALL_STUB; }
index caed4c433c277da2bf3a9e2efe4b484e02215af2..41a6bb8736b60ea3e48de698196ae1816de0a50e 100644 (file)
@@ -93,6 +93,7 @@
 
 #include <vm/vm_map.h>
 #include <vm/vm_protos.h>
+#include <vm/vm_kern.h>
 
 #include <kern/locks.h>
 
@@ -169,6 +170,7 @@ static int shm_delete_mapping(struct proc *, struct shmmap_state *, int);
 #define DEFAULT_SHMMNI 32
 #define DEFAULT_SHMSEG 8
 #define DEFAULT_SHMALL 1024
+
 struct  shminfo shminfo = {
         DEFAULT_SHMMAX,
         DEFAULT_SHMMIN,
@@ -368,8 +370,8 @@ shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval)
        mach_vm_address_t       attach_va;      /* attach address in/out */
        mach_vm_size_t          map_size;       /* size of map entry */
        mach_vm_size_t          mapped_size;
-       vm_prot_t               prot;
-       size_t                  size;
+       vm_prot_t           prot;
+    size_t              size;
        kern_return_t           rv;
        int                     shmat_ret;
        int                     vm_flags;
@@ -389,6 +391,11 @@ shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval)
 
        if (shmmap_s == NULL) {
                size = shminfo.shmseg * sizeof(struct shmmap_state);
+        if (size == 0 || size / shminfo.shmseg != sizeof(struct shmmap_state)) {
+            /* overflow */
+            shmat_ret = ENOMEM;
+            goto shmat_out;
+        }
                MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
                if (shmmap_s == NULL) {
                        shmat_ret = ENOMEM;
@@ -910,7 +917,7 @@ int
 shmfork(struct proc *p1, struct proc *p2)
 {
        struct shmmap_state *shmmap_s;
-       size_t size;
+    size_t size;
        int i;
        int shmfork_ret = 0;
 
@@ -919,8 +926,12 @@ shmfork(struct proc *p1, struct proc *p2)
        if (!shm_inited) {
                shminit(NULL);
        }
-               
-       size = shminfo.shmseg * sizeof(struct shmmap_state);
+    size = shminfo.shmseg * sizeof(struct shmmap_state);
+    if (size == 0 || size / shminfo.shmseg != sizeof(struct shmmap_state)) {
+        /* overflow */
+        shmfork_ret = 1;
+        goto shmfork_out;
+    }
        MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
        if (shmmap_s != NULL) {
                bcopy((caddr_t)p1->vm_shm, (caddr_t)shmmap_s, size);
@@ -1037,6 +1048,9 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1,
        int error = 0;
        int sysctl_shminfo_ret = 0;
        uint64_t        saved_shmmax;
+    uint64_t    saved_shmseg;
+    uint64_t    saved_shmmni;
+    uint64_t    saved_shmall;
 
        error = SYSCTL_OUT(req, arg1, sizeof(int64_t));
        if (error || req->newptr == USER_ADDR_NULL)
@@ -1049,7 +1063,10 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1,
                sysctl_shminfo_ret = EPERM;
                goto sysctl_shminfo_out;
        }
-       saved_shmmax = shminfo.shmmax;
+    saved_shmmax = shminfo.shmmax;
+    saved_shmseg = shminfo.shmseg;
+    saved_shmmni = shminfo.shmmni;
+    saved_shmall = shminfo.shmall;
 
        if ((error = SYSCTL_IN(req, arg1, sizeof(int64_t))) != 0) {
                sysctl_shminfo_ret = error;
@@ -1064,6 +1081,30 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1,
                        goto sysctl_shminfo_out;
                }
        }
+    else if (arg1 == &shminfo.shmseg) {
+        /* add a sanity check - 20847256 */
+        if (shminfo.shmseg > INT32_MAX || shminfo.shmseg < 0) {
+            shminfo.shmseg = saved_shmseg;
+            sysctl_shminfo_ret = EINVAL;
+            goto sysctl_shminfo_out;
+        }
+    }
+    else if (arg1 == &shminfo.shmmni) {
+        /* add a sanity check - 20847256 */
+        if (shminfo.shmmni > INT32_MAX || shminfo.shmmni < 0) {
+            shminfo.shmmni = saved_shmmni;
+            sysctl_shminfo_ret = EINVAL;
+            goto sysctl_shminfo_out;
+        }
+    }
+    else if (arg1 == &shminfo.shmall) {
+        /* add a sanity check - 20847256 */
+        if (shminfo.shmall > INT32_MAX || shminfo.shmall < 0) {
+            shminfo.shmall = saved_shmall;
+            sysctl_shminfo_ret = EINVAL;
+            goto sysctl_shminfo_out;
+        }
+    }
        sysctl_shminfo_ret = 0;
 sysctl_shminfo_out:
        SYSV_SHM_SUBSYS_UNLOCK();
index 57de6588d46805c05e5499f3fcf3766ed15f10bb..2a5a362063e03c28518ddf9869dc51d9be83f326 100644 (file)
 0x1300488      MACH_vm_page_wait_block
 0x130048C      MACH_vm_page_sleep
 0x1300490      MACH_vm_page_expedite
+0x13004c0      MACH_vm_pressure_event
 0x1400000      MACH_SCHED
 0x1400004      MACH_STKATTACH
 0x1400008      MACH_STKHANDOFF
 0x1400024      MACH_IDLE
 0x1400028      MACH_STACK_DEPTH
 0x140002c      MACH_MOVED
-0x1400030      MACH_FAIRSHARE_ENTER
-0x1400034      MACH_FAIRSHARE_EXIT
 0x1400038      MACH_FAILSAFE
 0x140003C      MACH_BLOCK
 0x1400040      MACH_WAIT
 0x1400084      MACH_QUANTUM_HANDOFF
 0x1400088      MACH_MULTIQ_DEQUEUE
 0x140008C      MACH_SCHED_THREAD_SWITCH
+0x1400094      MACH_SCHED_REMOTE_DEFERRED_AST
+0x1400098      MACH_SCHED_REMOTE_CANCEL_AST
+0x140009C      MACH_SCHED_CHANGE_PRIORITY
+0x14000A0      MACH_SCHED_UPDATE_REC_CORES
+0x14000A4      MACH_STACK_WAIT
+0x14000A8      MACH_THREAD_BIND
+0x14000AC      MACH_WAITQ_PROMOTE
+0x14000B0      MACH_WAITQ_DEMOTE
 0x1500000      MACH_MSGID_INVALID
 0x1600000      MTX_SLEEP
 0x1600004      MTX_SLEEP_DEADLINE
 0x1a20028      SFI_GLOBAL_DEFER
 0x1a30004      ENERGY_PERF_GPU_DESCRIPTION
 0x1a30008      ENERGY_PERF_GPU_TIME
+0x1a40000      SYSDIAGNOSE_notify_user
 0x2010000      L_IP_In_Beg
 0x2010004      L_IP_Out_Beg
 0x2010008      L_IP_In_End
 0x3080058      HFS_syncer_timed
 0x308005C      HFS_ScanUnmapBlocks
 0x3080060      HFS_issue_unmap
+0x3080064      HFS_KR
 0x30A0000      SMB_vop_mount
 0x30A0004      SMB_vop_unmount
 0x30A0008      SMB_vop_root
 0x40c02bc      BSC_obs_gc_control
 0x40c02c0      BSC_add_profil
 0x40c02c4      BSC_#177
-0x40c02c8      BSC_#178
+0x40c02c8      BSC_kdebug_trace_string
 0x40c02cc      BSC_kdebug_trace64
 0x40c02d0      BSC_kdebug_trace
 0x40c02d4      BSC_setgid
 0x40c0610      BSC_mac_get_fd
 0x40c0614      BSC_mac_set_fd
 0x40c0618      BSC_mac_get_pid
-0x40c061c      BSC_mac_get_lcid
-0x40c0620      BSC_mac_get_lctx
-0x40c0624      BSC_mac_set_lctx
-0x40c0628      BSC_setlcid
-0x40c062c      BSC_getlcid
+0x40c061c      BSC_#391
+0x40c0620      BSC_#392
+0x40c0624      BSC_#393
+0x40c0628      BSC_#394
+0x40c062c      BSC_#395
 0x40c0630      BSC_read_nocancel
 0x40c0634      BSC_write_nocancel
 0x40c0638      BSC_open_nocancel
 0x40c071c      BSC_vfs_purge
 0x40c0720      BSC_sfi_ctl
 0x40c0724      BSC_sfi_pidctl
+0x40c0728      BSC_coalition
+0x40c072c      BSC_coalition_info
 0x40c0734      BSC_getattrlistbulk
 0x40c073c      BSC_openat
 0x40c0740      BSC_openat_nocancel
 0x40c0784      BSC_sendmsg_x
 0x40c0788      BSC_thread_selfusage
 0x40c07a4      BSC_mremap_encrypted
+0x40c07b8      BSC_reserved
+0x40c07cc      BSC_work_interval_ctl
 0x40e0104      BSC_msync_extended_info
 0x40e0264      BSC_pread_extended_info
 0x40e0268      BSC_pwrite_extended_info
 0x7000004      TRACE_DATA_NEWTHREAD
 0x7000008      TRACE_DATA_EXEC
 0x700000c      TRACE_DATA_THREAD_TERMINATE
+0x7010000      TRACE_STRING_GLOBAL
 0x7010004      TRACE_STRING_NEWTHREAD
 0x7010008      TRACE_STRING_EXEC
 0x7020000      TRACE_PANIC
 0x25060014     PERF_KPC_ConfReg
 0x25060018     PERF_KPC_Data32
 0x2506001c     PERF_KPC_ConfReg32
+0x25060020     PERF_KPC_Data_Thread
+0x25060024     PERF_KPC_Data_Thread32
+0x25070000     PERF_KDBG_Handler
+0x25080000     PERF_CS_Handler
+0x25090000     PERF_SP_Handler
+0x250a0000     PERF_MI_Sample
+0x250a0004     PERF_MI_Data
 0x26100008     imp_assertion_hold
 0x2610000c     imp_assertion_hold_ext
 0x26100020     imp_assertion_externalize
 0x2700E020     PERF_SRAMEMA_DOM2
 0x2700E030     PERF_SRAMEMA_DOM3
 0x2a100004     ATM_MIN_CALLED
-0x2a100008     ATM_MIN_LINK_LIST
+0x2a100008     ATM_LINK_LIST_TRIM
 0x2a200004     ATM_VALUE_REPLACED
 0x2a200008     ATM_VALUE_ADDED
 0x2a300004     ATM_VALUE_UNREGISTERED
index 2586c482fe01d2f6beb8c2a146ac50f1037e28cf..b863e27ff805183219b96e09c8599ca9f6c36d5c 100644 (file)
@@ -341,11 +341,6 @@ tty_unlock(struct tty *tp)
 int
 ttyopen(dev_t device, struct tty *tp)
 {
-       proc_t p = current_proc();
-       struct pgrp *pg, *oldpg;
-       struct session *sessp, *oldsess;
-       struct tty *oldtp;
-
        TTY_LOCK_OWNED(tp);     /* debug assert */
 
        tp->t_dev = device;
@@ -357,57 +352,6 @@ ttyopen(dev_t device, struct tty *tp)
                bzero(&tp->t_winsize, sizeof(tp->t_winsize));
        }
 
-       pg = proc_pgrp(p);
-       sessp = proc_session(p);
-
-       /*
-        * First tty open affter setsid() call makes this tty its controlling
-        * tty, if the tty does not already have a session associated with it.
-        */
-       if (SESS_LEADER(p, sessp) &&    /* the process is the session leader */
-           sessp->s_ttyvp == NULL &&   /* but has no controlling tty */
-           tp->t_session == NULL ) {   /* and tty not controlling */
-               session_lock(sessp);
-               if ((sessp->s_flags & S_NOCTTY) == 0) { /* and no O_NOCTTY */
-                       oldtp = sessp->s_ttyp;
-                       ttyhold(tp);
-                       sessp->s_ttyp = tp;
-                       OSBitOrAtomic(P_CONTROLT, &p->p_flag);
-                       session_unlock(sessp);
-                       proc_list_lock();
-                       oldpg = tp->t_pgrp;
-                       oldsess = tp->t_session;
-                       if (oldsess != SESSION_NULL)
-                               oldsess->s_ttypgrpid = NO_PID;
-                       tp->t_session = sessp;
-                       tp->t_pgrp = pg;
-                       sessp->s_ttypgrpid = pg->pg_id;
-                       proc_list_unlock();
-                       /* SAFE: All callers drop the lock on return */
-                       tty_unlock(tp);
-                       if (oldpg != PGRP_NULL)
-                               pg_rele(oldpg);
-                       if (oldsess != SESSION_NULL)
-                               session_rele(oldsess);  
-                       if (NULL != oldtp)
-                               ttyfree(oldtp);
-                       tty_lock(tp);
-                       goto out;
-               }
-               session_unlock(sessp);
-       }
-
-       /* SAFE: All callers drop the lock on return */
-       tty_unlock(tp);
-       if (sessp != SESSION_NULL)
-               session_rele(sessp);
-       if (pg != PGRP_NULL)
-               pg_rele(pg);
-       tty_lock(tp);
-
-out:
-
-       /* XXX may be an error code */
        return (0);
 }
 
@@ -1075,6 +1019,7 @@ int
 ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
 {
        int error = 0;
+       int bogusData = 1;
        struct uthread *ut;
        struct pgrp *pg, *oldpg;
        struct session *sessp, *oldsessp;
@@ -1171,7 +1116,6 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
        }
        case TIOCSCONS: {
                /* Set current console device to this line */
-               int bogusData = 1;
                data = (caddr_t) &bogusData;
 
                /* No break - Fall through to BSD code */
@@ -1408,21 +1352,58 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
                break;
        case TIOCSCTTY:                 /* become controlling tty */
                /* Session ctty vnode pointer set in vnode layer. */
-               pg = proc_pgrp(p);
                sessp = proc_session(p);
-               if (!SESS_LEADER(p, sessp) ||
-                   ((sessp->s_ttyvp || tp->t_session) &&
-                   (tp->t_session != sessp))) {
+               if (sessp == SESSION_NULL) {
+                       error = EPERM;
+                       goto out;
+               }
+
+               /*
+                * This can only be done by a session leader.
+                */
+               if (!SESS_LEADER(p, sessp)) {
                        /* SAFE: All callers drop the lock on return */
                        tty_unlock(tp);
-                       if (sessp != SESSION_NULL)
-                               session_rele(sessp);
-                       if (pg != PGRP_NULL)
+                       session_rele(sessp);
+                       tty_lock(tp);
+                       error = EPERM;
+                       goto out;
+               }
+               /*
+                * If this terminal is already the controlling terminal for the
+                * session, nothing to do here.
+                */
+               if (tp->t_session == sessp) {
+                       /* SAFE: All callers drop the lock on return */
+                       tty_unlock(tp);
+                       session_rele(sessp);
+                       tty_lock(tp);
+                       error = 0;
+                       goto out;
+               }
+               pg = proc_pgrp(p);
+               /*
+                * Deny if the terminal is already attached to another session or
+                * the session already has a terminal vnode.
+                */
+               session_lock(sessp);
+               if (sessp->s_ttyvp || tp->t_session) {
+                       session_unlock(sessp);
+                       /* SAFE: All callers drop the lock on return */
+                       tty_unlock(tp);
+                       if (pg != PGRP_NULL) {
                                pg_rele(pg);
+                       }
+                       session_rele(sessp);
                        tty_lock(tp);
                        error = EPERM;
                        goto out;
                }
+               sessp->s_ttypgrpid = pg->pg_id;
+               oldtp = sessp->s_ttyp;
+               ttyhold(tp);
+               sessp->s_ttyp = tp;
+               session_unlock(sessp);
                proc_list_lock();
                oldsessp = tp->t_session;
                oldpg = tp->t_pgrp;
@@ -1430,14 +1411,8 @@ ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p)
                        oldsessp->s_ttypgrpid = NO_PID;
                /* do not drop refs on sessp and pg as tp holds them */
                tp->t_session = sessp;
-               sessp->s_ttypgrpid = pg->pg_id;
                tp->t_pgrp = pg;
                proc_list_unlock();
-               session_lock(sessp);
-               oldtp = sessp->s_ttyp;
-               ttyhold(tp);
-               sessp->s_ttyp = tp;
-               session_unlock(sessp);
                OSBitOrAtomic(P_CONTROLT, &p->p_flag);
                /* SAFE: All callers drop the lock on return */
                tty_unlock(tp);
index f4bb4dac6f80ef6aedfb338ac0b61e86699747ed..7c4f14e6a980c2297b1c1673fa32f877379a56cf 100644 (file)
@@ -148,11 +148,13 @@ pty_init(int n_ptys)
        int i;
        int j;
 
+       n_ptys = min(n_ptys, NPTY); /* clamp to avoid pt_ioctl overflow */
+
        /* create the pseudo tty device nodes */
        for (j = 0; j < 10; j++) {
                for (i = 0; i < HEX_BASE; i++) {
                        int m = j * HEX_BASE + i;
-                       if (m == n_ptys)
+                       if (m >= n_ptys)
                                goto done;
                        pt_ioctl[m].pt_devhandle = devfs_make_node(makedev(TTY_MAJOR, m),
                                                                   DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666,
index 83785b788edc58ffd4c50e91333403ea8bfc5a57..fc62b1afc6aa9dbcfcdf3d0f61588cd141a6cf27 100644 (file)
@@ -69,6 +69,7 @@
 #include <vm/vm_protos.h> /* last */
 
 #include <libkern/crypto/sha1.h>
+#include <libkern/crypto/sha2.h>
 #include <libkern/libkern.h>
 
 #include <security/mac_framework.h>
@@ -144,14 +145,79 @@ cs_valid_range(
        return TRUE;
 }
 
+typedef void (*cs_md_init)(void *ctx);
+typedef void (*cs_md_update)(void *ctx, const void *data, size_t size);
+typedef void (*cs_md_final)(void *hash, void *ctx);
+
+struct cs_hash {
+    uint8_t            cs_type;
+    size_t             cs_cd_size;
+    size_t             cs_size;
+    size_t             cs_digest_size;
+    cs_md_init         cs_init;
+    cs_md_update       cs_update;
+    cs_md_final                cs_final;
+};
+
+static struct cs_hash cs_hash_sha1 = {
+    .cs_type = CS_HASHTYPE_SHA1,
+    .cs_cd_size = CS_SHA1_LEN,
+    .cs_size = CS_SHA1_LEN,
+    .cs_digest_size = SHA_DIGEST_LENGTH,
+    .cs_init = (cs_md_init)SHA1Init,
+    .cs_update = (cs_md_update)SHA1Update,
+    .cs_final = (cs_md_final)SHA1Final,
+};
+#if CRYPTO_SHA2
+static struct cs_hash cs_hash_sha256 = {
+    .cs_type = CS_HASHTYPE_SHA256,
+    .cs_cd_size = SHA256_DIGEST_LENGTH,
+    .cs_size = SHA256_DIGEST_LENGTH,
+    .cs_digest_size = SHA256_DIGEST_LENGTH,
+    .cs_init = (cs_md_init)SHA256_Init,
+    .cs_update = (cs_md_update)SHA256_Update,
+    .cs_final = (cs_md_final)SHA256_Final,
+};
+static struct cs_hash cs_hash_sha256_truncate = {
+    .cs_type = CS_HASHTYPE_SHA256_TRUNCATED,
+    .cs_cd_size = CS_SHA256_TRUNCATED_LEN,
+    .cs_size = CS_SHA256_TRUNCATED_LEN,
+    .cs_digest_size = SHA256_DIGEST_LENGTH,
+    .cs_init = (cs_md_init)SHA256_Init,
+    .cs_update = (cs_md_update)SHA256_Update,
+    .cs_final = (cs_md_final)SHA256_Final,
+};
+#endif
+    
+static struct cs_hash *
+cs_find_md(uint8_t type)
+{
+       if (type == CS_HASHTYPE_SHA1) {
+               return &cs_hash_sha1;
+#if CRYPTO_SHA2
+       } else if (type == CS_HASHTYPE_SHA256) {
+               return &cs_hash_sha256;
+       } else if (type == CS_HASHTYPE_SHA256_TRUNCATED) {
+               return &cs_hash_sha256_truncate;
+#endif
+       }
+       return NULL;
+}
+
+union cs_hash_union {
+       SHA1_CTX                sha1ctxt;
+       SHA256_CTX              sha256ctx;
+};
+
+
 /*
  * Locate the CodeDirectory from an embedded signature blob
  */
 const 
 CS_CodeDirectory *findCodeDirectory(
        const CS_SuperBlob *embedded,
-       char *lower_bound,
-       char *upper_bound)
+       const char *lower_bound,
+       const char *upper_bound)
 {
        const CS_CodeDirectory *cd = NULL;
 
@@ -209,9 +275,10 @@ CS_CodeDirectory *findCodeDirectory(
 static const unsigned char *
 hashes(
        const CS_CodeDirectory *cd,
-       unsigned page,
-       char *lower_bound,
-       char *upper_bound)
+       uint32_t page,
+       size_t hash_len,
+       const char *lower_bound,
+       const char *upper_bound)
 {
        const unsigned char *base, *top, *hash;
        uint32_t nCodeSlots = ntohl(cd->nCodeSlots);
@@ -260,9 +327,9 @@ hashes(
 
                                /* base = address of first hash covered by scatter */
                                base = (const unsigned char *)cd + ntohl(cd->hashOffset) + 
-                                       hashindex * SHA1_RESULTLEN;
+                                       hashindex * hash_len;
                                /* top = address of first hash after this scatter */
-                               top = base + scount * SHA1_RESULTLEN;
+                               top = base + scount * hash_len;
                                if (!cs_valid_range(base, top, lower_bound, 
                                                    upper_bound) ||
                                    hashindex > nCodeSlots) {
@@ -278,20 +345,20 @@ hashes(
                        scatter++;
                } while(1);
                
-               hash = base + (page - sbase) * SHA1_RESULTLEN;
+               hash = base + (page - sbase) * hash_len;
        } else {
                base = (const unsigned char *)cd + ntohl(cd->hashOffset);
-               top = base + nCodeSlots * SHA1_RESULTLEN;
+               top = base + nCodeSlots * hash_len;
                if (!cs_valid_range(base, top, lower_bound, upper_bound) ||
                    page > nCodeSlots) {
                        return NULL;
                }
                assert(page < nCodeSlots);
 
-               hash = base + page * SHA1_RESULTLEN;
+               hash = base + page * hash_len;
        }
        
-       if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
+       if (!cs_valid_range(hash, hash + hash_len,
                            lower_bound, upper_bound)) {
                hash = NULL;
        }
@@ -315,27 +382,31 @@ hashes(
 static int
 cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length)
 {
+       struct cs_hash *hashtype;
 
        if (length < sizeof(*cd))
                return EBADEXEC;
        if (ntohl(cd->magic) != CSMAGIC_CODEDIRECTORY)
                return EBADEXEC;
-       if (cd->hashSize != SHA1_RESULTLEN)
-               return EBADEXEC;
        if (cd->pageSize != PAGE_SHIFT_4K)
                return EBADEXEC;
-       if (cd->hashType != CS_HASHTYPE_SHA1)
+       hashtype = cs_find_md(cd->hashType);
+       if (hashtype == NULL)
                return EBADEXEC;
 
+       if (cd->hashSize != hashtype->cs_cd_size)
+               return EBADEXEC;
+
+
        if (length < ntohl(cd->hashOffset))
                return EBADEXEC;
 
        /* check that nSpecialSlots fits in the buffer in front of hashOffset */
-       if (ntohl(cd->hashOffset) / SHA1_RESULTLEN < ntohl(cd->nSpecialSlots))
+       if (ntohl(cd->hashOffset) / hashtype->cs_size < ntohl(cd->nSpecialSlots))
                return EBADEXEC;
 
        /* check that codeslots fits in the buffer */
-       if ((length - ntohl(cd->hashOffset)) / SHA1_RESULTLEN <  ntohl(cd->nCodeSlots))
+       if ((length - ntohl(cd->hashOffset)) / hashtype->cs_size <  ntohl(cd->nCodeSlots))
                return EBADEXEC;
        
        if (ntohl(cd->version) >= CS_SUPPORTSSCATTER && cd->scatterOffset) {
@@ -343,8 +414,8 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length)
                if (length < ntohl(cd->scatterOffset))
                        return EBADEXEC;
 
-               SC_Scatter *scatter = (SC_Scatter *)
-                       (((uint8_t *)cd) + ntohl(cd->scatterOffset));
+               const SC_Scatter *scatter = (const SC_Scatter *)
+                       (((const uint8_t *)cd) + ntohl(cd->scatterOffset));
                uint32_t nPages = 0;
 
                /*
@@ -378,7 +449,7 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length)
 
        /* identifier is NUL terminated string */
        if (cd->identOffset) {
-               uint8_t *ptr = (uint8_t *)cd + ntohl(cd->identOffset);
+               const uint8_t *ptr = (const uint8_t *)cd + ntohl(cd->identOffset);
                if (memchr(ptr, 0, length - ntohl(cd->identOffset)) == NULL)
                        return EBADEXEC;
        }
@@ -388,7 +459,7 @@ cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length)
                if (length < ntohl(cd->teamOffset))
                        return EBADEXEC;
 
-               uint8_t *ptr = (uint8_t *)cd + ntohl(cd->teamOffset);
+               const uint8_t *ptr = (const uint8_t *)cd + ntohl(cd->teamOffset);
                if (memchr(ptr, 0, length - ntohl(cd->teamOffset)) == NULL)
                        return EBADEXEC;
        }
@@ -429,7 +500,7 @@ static int
 cs_validate_csblob(const uint8_t *addr, size_t length,
                   const CS_CodeDirectory **rcd)
 {
-       const CS_GenericBlob *blob = (const CS_GenericBlob *)(void *)addr;
+       const CS_GenericBlob *blob = (const CS_GenericBlob *)(const void *)addr;
        int error;
 
        *rcd = NULL;
@@ -458,7 +529,7 @@ cs_validate_csblob(const uint8_t *addr, size_t length,
                                return EBADEXEC;
 
                        const CS_GenericBlob *subBlob =
-                               (const CS_GenericBlob *)(void *)(addr + ntohl(blobIndex->offset));
+                               (const CS_GenericBlob *)(const void *)(addr + ntohl(blobIndex->offset));
 
                        size_t subLength = length - ntohl(blobIndex->offset);
 
@@ -477,7 +548,7 @@ cs_validate_csblob(const uint8_t *addr, size_t length,
 
        } else if (ntohl(blob->magic) == CSMAGIC_CODEDIRECTORY) {
 
-               if ((error = cs_validate_codedirectory((const CS_CodeDirectory *)(void *)addr, length)) != 0)
+               if ((error = cs_validate_codedirectory((const CS_CodeDirectory *)(const void *)addr, length)) != 0)
                        return error;
                *rcd = (const CS_CodeDirectory *)blob;
        } else {
@@ -495,7 +566,7 @@ cs_validate_csblob(const uint8_t *addr, size_t length,
  *
  * Find an blob from the superblob/code directory. The blob must have
  * been been validated by cs_validate_csblob() before calling
- * this. Use cs_find_blob() instead.
+ * this. Use csblob_find_blob() instead.
  * 
  * Will also find a "raw" code directory if its stored as well as
  * searching the superblob.
@@ -509,10 +580,10 @@ cs_validate_csblob(const uint8_t *addr, size_t length,
  *             NULL                    Buffer not found
  */
 
-static const CS_GenericBlob *
-cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t magic)
+const CS_GenericBlob *
+csblob_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t magic)
 {
-       const CS_GenericBlob *blob = (const CS_GenericBlob *)(void *)addr;
+       const CS_GenericBlob *blob = (const CS_GenericBlob *)(const void *)addr;
 
        if (ntohl(blob->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
                const CS_SuperBlob *sb = (const CS_SuperBlob *)blob;
@@ -524,7 +595,7 @@ cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t m
                        uint32_t offset = ntohl(sb->index[n].offset);
                        if (length - sizeof(const CS_GenericBlob) < offset)
                                return NULL;
-                       blob = (const CS_GenericBlob *)(void *)(addr + offset);
+                       blob = (const CS_GenericBlob *)(const void *)(addr + offset);
                        if (ntohl(blob->magic) != magic)
                                continue;
                        return blob;
@@ -538,167 +609,70 @@ cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t m
 
 
 const CS_GenericBlob *
-cs_find_blob(struct cs_blob *csblob, uint32_t type, uint32_t magic)
+csblob_find_blob(struct cs_blob *csblob, uint32_t type, uint32_t magic)
 {
        if ((csblob->csb_flags & CS_VALID) == 0)
                return NULL;
-       return cs_find_blob_bytes((const uint8_t *)csblob->csb_mem_kaddr, csblob->csb_mem_size, type, magic);
+       return csblob_find_blob_bytes((const uint8_t *)csblob->csb_mem_kaddr, csblob->csb_mem_size, type, magic);
 }
 
 static const uint8_t *
-cs_find_special_slot(const CS_CodeDirectory *cd, uint32_t slot)
+find_special_slot(const CS_CodeDirectory *cd, size_t slotsize, uint32_t slot)
 {
        /* there is no zero special slot since that is the first code slot */
        if (ntohl(cd->nSpecialSlots) < slot || slot == 0)
                return NULL;
 
-       return ((const uint8_t *)cd + ntohl(cd->hashOffset) - (SHA1_RESULTLEN * slot));
+       return ((const uint8_t *)cd + ntohl(cd->hashOffset) - (slotsize * slot));
 }
 
-/*
- * CODESIGNING
- * End of routines to navigate code signing data structures in the kernel.
- */
-
-/*
- * ENTITLEMENTS
- * Routines to navigate entitlements in the kernel.
- */
-
-/* Retrieve the entitlements blob for a process.
- * Returns:
- *   EINVAL    no text vnode associated with the process
- *   EBADEXEC   invalid code signing data
- *   0         no error occurred
- *
- * On success, out_start and out_length will point to the
- * entitlements blob if found; or will be set to NULL/zero
- * if there were no entitlements.
- */
-
-static uint8_t sha1_zero[SHA1_RESULTLEN] = { 0 };
+static uint8_t cshash_zero[CS_HASH_MAX_SIZE] = { 0 };
 
 int
-cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
+csblob_get_entitlements(struct cs_blob *csblob, void **out_start, size_t *out_length)
 {
-       uint8_t computed_hash[SHA1_RESULTLEN];
+       uint8_t computed_hash[CS_HASH_MAX_SIZE];
        const CS_GenericBlob *entitlements;
        const CS_CodeDirectory *code_dir;
-       struct cs_blob *csblob;
        const uint8_t *embedded_hash;
-       SHA1_CTX context;
+       union cs_hash_union context;
 
        *out_start = NULL;
        *out_length = 0;
 
-       if (NULL == p->p_textvp)
-               return EINVAL;
+       if (csblob->csb_hashtype == NULL || csblob->csb_hashtype->cs_digest_size > sizeof(computed_hash))
+           return EBADEXEC;
 
-       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
+       if ((code_dir = (const CS_CodeDirectory *)csblob_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL)
                return 0;
 
-       if ((code_dir = (const CS_CodeDirectory *)cs_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL)
-               return 0;
-
-       entitlements = cs_find_blob(csblob, CSSLOT_ENTITLEMENTS, CSMAGIC_EMBEDDED_ENTITLEMENTS);
-       embedded_hash = cs_find_special_slot(code_dir, CSSLOT_ENTITLEMENTS);
+       entitlements = csblob_find_blob(csblob, CSSLOT_ENTITLEMENTS, CSMAGIC_EMBEDDED_ENTITLEMENTS);
+       embedded_hash = find_special_slot(code_dir, csblob->csb_hashtype->cs_size, CSSLOT_ENTITLEMENTS);
 
        if (embedded_hash == NULL) {
                if (entitlements)
                        return EBADEXEC;
                return 0;
-       } else if (entitlements == NULL && memcmp(embedded_hash, sha1_zero, SHA1_RESULTLEN) != 0) {
+       } else if (entitlements == NULL && memcmp(embedded_hash, cshash_zero, csblob->csb_hashtype->cs_size) != 0) {
                return EBADEXEC;
        }
 
-       SHA1Init(&context);
-       SHA1Update(&context, entitlements, ntohl(entitlements->length));
-       SHA1Final(computed_hash, &context);
-       if (memcmp(computed_hash, embedded_hash, SHA1_RESULTLEN) != 0)
+       csblob->csb_hashtype->cs_init(&context);
+       csblob->csb_hashtype->cs_update(&context, entitlements, ntohl(entitlements->length));
+       csblob->csb_hashtype->cs_final(computed_hash, &context);
+
+       if (memcmp(computed_hash, embedded_hash, csblob->csb_hashtype->cs_size) != 0)
                return EBADEXEC;
 
-       *out_start = (void *)entitlements;
+       *out_start = __DECONST(void *, entitlements);
        *out_length = ntohl(entitlements->length);
 
        return 0;
 }
 
-/* Retrieve the codesign identity for a process.
- * Returns:
- *   NULL      an error occured
- *   string    the cs_identity
- */
-
-const char *
-cs_identity_get(proc_t p)
-{
-       const CS_CodeDirectory *code_dir;
-       struct cs_blob *csblob;
-
-       if (NULL == p->p_textvp)
-               return NULL;
-
-       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
-               return NULL;
-
-       if ((code_dir = (const CS_CodeDirectory *)cs_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL)
-               return NULL;
-
-       if (code_dir->identOffset == 0)
-               return NULL;
-
-       return ((const char *)code_dir) + ntohl(code_dir->identOffset);
-}
-
-
-
-/* Retrieve the codesign blob for a process.
- * Returns:
- *   EINVAL    no text vnode associated with the process
- *   0         no error occurred
- *
- * On success, out_start and out_length will point to the
- * cms blob if found; or will be set to NULL/zero
- * if there were no blob.
- */
-
-int
-cs_blob_get(proc_t p, void **out_start, size_t *out_length)
-{
-       struct cs_blob *csblob;
-
-       *out_start = NULL;
-       *out_length = 0;
-
-       if (NULL == p->p_textvp)
-               return EINVAL;
-
-       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
-               return 0;
-
-       *out_start = (void *)csblob->csb_mem_kaddr;
-       *out_length = csblob->csb_mem_size;
-
-       return 0;
-}
-
-uint8_t *
-cs_get_cdhash(struct proc *p)
-{
-       struct cs_blob *csblob;
-
-       if (NULL == p->p_textvp)
-               return NULL;
-
-       if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL)
-               return NULL;
-
-       return csblob->csb_sha1;
-}
-
 /*
- * ENTITLEMENTS
- * End of routines to navigate entitlements in the kernel.
+ * CODESIGNING
+ * End of routines to navigate code signing data structures in the kernel.
  */
 
 
@@ -921,11 +895,7 @@ ubc_info_deallocate(struct ubc_info *uip)
         ubc_info_free(uip);
 }
 
-/* 
- * This should be public but currently it is only used below so we
- * defer making that change.
- */
-static errno_t mach_to_bsd_errno(kern_return_t mach_err)
+errno_t mach_to_bsd_errno(kern_return_t mach_err)
 {
        switch (mach_err) {
        case KERN_SUCCESS:
@@ -2736,6 +2706,40 @@ SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_b
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob");
 
+/*
+ * Function: csblob_parse_teamid
+ *
+ * Description: This function returns a pointer to the team id
+               stored within the codedirectory of the csblob.
+               If the codedirectory predates team-ids, it returns
+               NULL.
+               This does not copy the name but returns a pointer to
+               it within the CD. Subsequently, the CD must be
+               available when this is used.
+*/
+
+static const char *
+csblob_parse_teamid(struct cs_blob *csblob)
+{
+       const CS_CodeDirectory *cd;
+
+       if ((cd = (const CS_CodeDirectory *)csblob_find_blob(
+                                               csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL)
+               return NULL;
+
+       if (ntohl(cd->version) < CS_SUPPORTSTEAMID)
+               return NULL;
+
+       if (cd->teamOffset == 0)
+               return NULL;
+
+       const char *name = ((const char *)cd) + ntohl(cd->teamOffset);
+       if (cs_debug > 1)
+               printf("found team-id %s in cdblob\n", name);
+
+       return name;
+}
+
 
 kern_return_t
 ubc_cs_blob_allocate(
@@ -2746,9 +2750,9 @@ ubc_cs_blob_allocate(
 
 #if CS_BLOB_PAGEABLE
        *blob_size_p = round_page(*blob_size_p);
-       kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
+       kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p, VM_KERN_MEMORY_SECURITY);
 #else  /* CS_BLOB_PAGEABLE */
-       *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
+       *blob_addr_p = (vm_offset_t) kalloc_tag(*blob_size_p, VM_KERN_MEMORY_SECURITY);
        if (*blob_addr_p == 0) {
                kr = KERN_NO_SPACE;
        } else {
@@ -2769,120 +2773,6 @@ ubc_cs_blob_deallocate(
        kfree((void *) blob_addr, blob_size);
 #endif /* CS_BLOB_PAGEABLE */
 }
-       
-int
-ubc_cs_sigpup_add(
-       struct vnode    *vp,
-       vm_address_t    address,
-       vm_size_t       size)
-{
-       kern_return_t           kr;
-       struct ubc_info         *uip;
-       struct cs_blob          *blob;
-       memory_object_control_t control;
-       const CS_CodeDirectory *cd;
-       int                     error;
-
-       control = ubc_getobject(vp, UBC_FLAGS_NONE);
-       if (control == MEMORY_OBJECT_CONTROL_NULL)
-               return KERN_INVALID_ARGUMENT;
-
-       if (memory_object_is_signed(control))
-               return 0;
-
-       blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
-       if (blob == NULL)
-               return ENOMEM;
-
-       /* fill in the new blob */
-       blob->csb_cpu_type = CPU_TYPE_ANY;
-       blob->csb_base_offset = 0;
-       blob->csb_mem_size = size;
-       blob->csb_mem_offset = 0;
-       blob->csb_mem_handle = IPC_PORT_NULL;
-       blob->csb_mem_kaddr = address;
-       blob->csb_sigpup = 1;
-       blob->csb_platform_binary = 0;
-       blob->csb_teamid = NULL;
-       
-       /*
-        * Validate the blob's contents
-        */
-       cd = findCodeDirectory(
-               (const CS_SuperBlob *) address, 
-               (char *) address, 
-               (char *) address + blob->csb_mem_size);
-       if (cd == NULL) {
-               /* no code directory => useless blob ! */
-               error = EINVAL;
-               goto out;
-       }
-
-       blob->csb_flags = ntohl(cd->flags) | CS_VALID;
-       blob->csb_end_offset = round_page_4K(ntohl(cd->codeLimit));
-       if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) {
-               const SC_Scatter *scatter = (const SC_Scatter*)
-                   ((const char*)cd + ntohl(cd->scatterOffset));
-               blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE_4K;
-       } else {
-               blob->csb_start_offset = (blob->csb_end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE_4K));
-       }
-
-       /* 
-        * We don't need to check with the policy module, since the input data is supposed to be already checked
-        */
-       
-       vnode_lock(vp);
-       if (! UBCINFOEXISTS(vp)) {
-               vnode_unlock(vp);
-               if (cs_debug)
-                       printf("out ubc object\n");
-               error = ENOENT;
-               goto out;
-       }
-       uip = vp->v_ubcinfo;
-
-       /* someone raced us to adding the code directory */
-       if (uip->cs_blobs != NULL) {
-               if (cs_debug)
-                       printf("sigpup: vnode already have CD ?\n");
-               vnode_unlock(vp);
-               error = EEXIST;
-               goto out;
-       }
-
-       blob->csb_next = uip->cs_blobs;
-       uip->cs_blobs = blob;
-
-       OSAddAtomic(+1, &cs_blob_count);
-       OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size);
-
-       /* mark this vnode's VM object as having "signed pages" */
-       kr = memory_object_signed(uip->ui_control, TRUE);
-       if (kr != KERN_SUCCESS) {
-               vnode_unlock(vp);
-               if (cs_debug)
-                       printf("sigpup: not signable ?\n");
-               error = ENOENT;
-               goto out;
-       }
-
-       vnode_unlock(vp);
-
-       error = 0;
-out:
-       if (error) {
-               if (cs_debug)
-                       printf("sigpup: not signable ?\n");
-               /* we failed; release what we allocated */
-               if (blob) {
-                       kfree(blob, sizeof (*blob));
-                       blob = NULL;
-               }
-       }
-
-       return error;
-}
 
 int
 ubc_cs_blob_add(
@@ -2891,7 +2781,8 @@ ubc_cs_blob_add(
        off_t           base_offset,
        vm_address_t    addr,
        vm_size_t       size,
-       __unused int flags)
+       __unused int    flags,
+       struct cs_blob  **ret_blob)
 {
        kern_return_t           kr;
        struct ubc_info         *uip;
@@ -2901,12 +2792,14 @@ ubc_cs_blob_add(
        memory_object_size_t    blob_size;
        const CS_CodeDirectory *cd;
        off_t                   blob_start_offset, blob_end_offset;
-       SHA1_CTX                sha1ctxt;
+       union cs_hash_union     mdctx;
        boolean_t               record_mtime;
-       int                     is_platform_binary;
+       int                     cs_flags;
 
        record_mtime = FALSE;
-       is_platform_binary = 0;
+       cs_flags = 0;
+       if (ret_blob)
+           *ret_blob = NULL;
 
        blob_handle = IPC_PORT_NULL;
 
@@ -2943,7 +2836,6 @@ ubc_cs_blob_add(
 
        /* fill in the new blob */
        blob->csb_cpu_type = cputype;
-       blob->csb_sigpup = 0;
        blob->csb_base_offset = base_offset;
        blob->csb_mem_size = size;
        blob->csb_mem_offset = 0;
@@ -2951,6 +2843,7 @@ ubc_cs_blob_add(
        blob->csb_mem_kaddr = addr;
        blob->csb_flags = 0;
        blob->csb_platform_binary = 0;
+       blob->csb_platform_path = 0;
        blob->csb_teamid = NULL;
        
        /*
@@ -2964,12 +2857,23 @@ ubc_cs_blob_add(
                blob->csb_flags = 0;
                blob->csb_start_offset = 0;
                blob->csb_end_offset = 0;
-               memset(blob->csb_sha1, 0, SHA1_RESULTLEN);
+               memset(blob->csb_cdhash, 0, sizeof(blob->csb_cdhash));
                /* let the vnode checker determine if the signature is valid or not */
        } else {
-               const unsigned char *sha1_base;
-               int sha1_size;
-
+               const unsigned char *md_base;
+               uint8_t hash[CS_HASH_MAX_SIZE];
+               int md_size;
+
+               blob->csb_hashtype = cs_find_md(cd->hashType);
+               if (blob->csb_hashtype == NULL || blob->csb_hashtype->cs_digest_size > sizeof(hash))
+                       panic("validated CodeDirectory but unsupported type");
+               if (blob->csb_hashtype->cs_cd_size < CS_CDHASH_LEN) {
+                       if (cs_debug) 
+                               printf("cs_cd_size is too small for a cdhash\n");
+                       error = EINVAL;
+                       goto out;
+               }
+                   
                blob->csb_flags = (ntohl(cd->flags) & CS_ALLOWED_MACHO) | CS_VALID;
                blob->csb_end_offset = round_page_4K(ntohl(cd->codeLimit));
                if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) {
@@ -2977,15 +2881,17 @@ ubc_cs_blob_add(
                                ((const char*)cd + ntohl(cd->scatterOffset));
                        blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE_4K;
                } else {
-                       blob->csb_start_offset = (blob->csb_end_offset -
-                                                 (ntohl(cd->nCodeSlots) * PAGE_SIZE_4K));
+                       blob->csb_start_offset = 0;
                }
-               /* compute the blob's SHA1 hash */
-               sha1_base = (const unsigned char *) cd;
-               sha1_size = ntohl(cd->length);
-               SHA1Init(&sha1ctxt);
-               SHA1Update(&sha1ctxt, sha1_base, sha1_size);
-               SHA1Final(blob->csb_sha1, &sha1ctxt);
+               /* compute the blob's cdhash */
+               md_base = (const unsigned char *) cd;
+               md_size = ntohl(cd->length);
+
+               blob->csb_hashtype->cs_init(&mdctx);
+               blob->csb_hashtype->cs_update(&mdctx, md_base, md_size);
+               blob->csb_hashtype->cs_final(hash, &mdctx);
+
+               memcpy(blob->csb_cdhash, hash, CS_CDHASH_LEN);
        }
 
        /* 
@@ -2994,16 +2900,15 @@ ubc_cs_blob_add(
 #if CONFIG_MACF
        error = mac_vnode_check_signature(vp, 
                                          base_offset, 
-                                         blob->csb_sha1, 
-                                         (const void*)cd,
-                                         size, flags, 
-                                         &is_platform_binary);
+                                         blob->csb_cdhash, 
+                                         (const void*)addr, size,
+                                         flags, &cs_flags);
        if (error) {
                if (cs_debug) 
                        printf("check_signature[pid: %d], error = %d\n", current_proc()->p_pid, error);
                goto out;
        }
-       if ((flags & MAC_VNODE_CHECK_DYLD_SIM) && !is_platform_binary) {
+       if ((flags & MAC_VNODE_CHECK_DYLD_SIM) && !(cs_flags & CS_PLATFORM_BINARY)) {
                if (cs_debug)
                        printf("check_signature[pid: %d], is not apple signed\n", current_proc()->p_pid);
                error = EPERM;
@@ -3011,13 +2916,15 @@ ubc_cs_blob_add(
        }
 #endif 
        
-       if (is_platform_binary) {
+       if (cs_flags & CS_PLATFORM_BINARY) {
                if (cs_debug > 1)
                        printf("check_signature[pid: %d]: platform binary\n", current_proc()->p_pid);
                blob->csb_platform_binary = 1;
+               blob->csb_platform_path = !!(cs_flags & CS_PLATFORM_PATH);
        } else {
                blob->csb_platform_binary = 0;
-               blob->csb_teamid = csblob_get_teamid(blob);
+               blob->csb_platform_path = 0;
+               blob->csb_teamid = csblob_parse_teamid(blob);
                if (cs_debug > 1) {
                        if (blob->csb_teamid)
                                printf("check_signature[pid: %d]: team-id is %s\n", current_proc()->p_pid, blob->csb_teamid);
@@ -3094,9 +3001,9 @@ ubc_cs_blob_add(
                             (blob->csb_cpu_type == CPU_TYPE_ANY ||
                              oblob->csb_cpu_type == CPU_TYPE_ANY ||
                              blob->csb_cpu_type == oblob->csb_cpu_type) &&
-                            !bcmp(blob->csb_sha1,
-                                  oblob->csb_sha1,
-                                  SHA1_RESULTLEN)) {
+                            !bcmp(blob->csb_cdhash,
+                                  oblob->csb_cdhash,
+                                  CS_CDHASH_LEN)) {
                                 /*
                                  * We already have this blob:
                                  * we'll return success but
@@ -3112,6 +3019,8 @@ ubc_cs_blob_add(
                                         oblob->csb_cpu_type = cputype;
                                 }
                                 vnode_unlock(vp);
+                                if (ret_blob)
+                                        *ret_blob = oblob;
                                 error = EAGAIN;
                                 goto out;
                         } else {
@@ -3185,6 +3094,9 @@ ubc_cs_blob_add(
                vnode_mtime(vp, &uip->cs_mtime, vfs_context_current());
        }
 
+       if (ret_blob)
+               *ret_blob = blob;
+
        error = 0;      /* success ! */
 
 out:
@@ -3219,6 +3131,42 @@ out:
        return error;
 }
 
+void
+csvnode_print_debug(struct vnode *vp)
+{
+       const char      *name = NULL;
+       struct ubc_info *uip;
+       struct cs_blob *blob;
+
+       name = vnode_getname_printable(vp);
+       if (name) {
+               printf("csvnode: name: %s\n", name);
+               vnode_putname_printable(name);
+       }
+
+       vnode_lock_spin(vp);
+
+       if (! UBCINFOEXISTS(vp)) {
+               blob = NULL;
+               goto out;
+       }
+
+       uip = vp->v_ubcinfo;
+       for (blob = uip->cs_blobs; blob != NULL; blob = blob->csb_next) {
+               printf("csvnode: range: %lu -> %lu flags: 0x%08x platform: %s path: %s team: %s\n",
+                      (unsigned long)blob->csb_start_offset,
+                      (unsigned long)blob->csb_end_offset,
+                      blob->csb_flags,
+                      blob->csb_platform_binary ? "yes" : "no",
+                      blob->csb_platform_path ? "yes" : "no",
+                      blob->csb_teamid ? blob->csb_teamid : "<NO-TEAM>");
+       }
+
+out:
+       vnode_unlock(vp);
+
+}
+
 struct cs_blob *
 ubc_cs_blob_get(
        struct vnode    *vp,
@@ -3253,10 +3201,6 @@ ubc_cs_blob_get(
                }
        }
 
-       if (cs_debug && blob != NULL && blob->csb_sigpup) {
-               printf("found sig pup blob\n");
-       }
-
 out:
        vnode_unlock(vp);
 
@@ -3273,7 +3217,7 @@ ubc_cs_free(
             blob != NULL;
             blob = next_blob) {
                next_blob = blob->csb_next;
-               if (blob->csb_mem_kaddr != 0 && !blob->csb_sigpup) {
+               if (blob->csb_mem_kaddr != 0) {
                        ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
                                               blob->csb_mem_size);
                        blob->csb_mem_kaddr = 0;
@@ -3322,7 +3266,7 @@ ubc_cs_blob_revalidate(
 {
        int error = 0;
 #if CONFIG_MACF
-       int is_platform_binary = 0;
+       int cs_flags = 0;
 #endif
        const CS_CodeDirectory *cd = NULL;
        
@@ -3339,7 +3283,9 @@ ubc_cs_blob_revalidate(
 
        /* callout to mac_vnode_check_signature */
 #if CONFIG_MACF
-       error = mac_vnode_check_signature(vp, blob->csb_base_offset, blob->csb_sha1, (const void*)cd, blob->csb_cpu_type, flags, &is_platform_binary);
+       error = mac_vnode_check_signature(vp, blob->csb_base_offset, blob->csb_cdhash,
+                                         (const void*)blob->csb_mem_kaddr, (int)blob->csb_mem_size,
+                                         flags, &cs_flags);
        if (cs_debug && error) {
                        printf("revalidate: check_signature[pid: %d], error = %d\n", current_proc()->p_pid, error);
        }
@@ -3430,8 +3376,9 @@ cs_validate_page(
        const void              *data,
        unsigned                *tainted)
 {
-       SHA1_CTX                sha1ctxt;
-       unsigned char           actual_hash[SHA1_RESULTLEN];
+       union cs_hash_union     mdctx;
+       struct cs_hash          *hashtype = NULL;
+       unsigned char           actual_hash[CS_HASH_MAX_SIZE];
        unsigned char           expected_hash[SHA1_RESULTLEN];
        boolean_t               found_hash;
        struct cs_blob          *blobs, *blob;
@@ -3442,7 +3389,7 @@ cs_validate_page(
        off_t                   offset; /* page offset in the file */
        size_t                  size;
        off_t                   codeLimit = 0;
-       char                    *lower_bound, *upper_bound;
+       const char              *lower_bound, *upper_bound;
        vm_offset_t             kaddr, blob_addr;
        vm_size_t               ksize;
        kern_return_t           kr;
@@ -3487,8 +3434,6 @@ cs_validate_page(
                                break;
                        }
                }
-               if (blob->csb_sigpup && cs_debug)
-                       printf("checking for a sigpup CD\n");
 
                blob_addr = kaddr + blob->csb_mem_offset;
                
@@ -3498,43 +3443,32 @@ cs_validate_page(
                embedded = (const CS_SuperBlob *) blob_addr;
                cd = findCodeDirectory(embedded, lower_bound, upper_bound);
                if (cd != NULL) {
-                       if (cd->pageSize != PAGE_SHIFT_4K ||
-                           cd->hashType != CS_HASHTYPE_SHA1 ||
-                           cd->hashSize != SHA1_RESULTLEN) {
-                               /* bogus blob ? */
-                               if (blob->csb_sigpup && cs_debug)
-                                       printf("page foo bogus sigpup CD\n");
-                               continue;
-                       }
+                       /* all CD's that have been injected is already validated */
 
                        offset = page_offset - blob->csb_base_offset;
                        if (offset < blob->csb_start_offset ||
                            offset >= blob->csb_end_offset) {
                                /* our page is not covered by this blob */
-                               if (blob->csb_sigpup && cs_debug)
-                                       printf("OOB sigpup CD\n");
                                continue;
                        }
 
+                       hashtype = blob->csb_hashtype;
+                       if (hashtype == NULL)
+                               panic("unknown hash type ?");
+                       if (hashtype->cs_digest_size > sizeof(actual_hash))
+                               panic("hash size too large");
+
                        codeLimit = ntohl(cd->codeLimit);
-                       if (blob->csb_sigpup && cs_debug)
-                               printf("sigpup codesize %d\n", (int)codeLimit);
 
-                       hash = hashes(cd, (unsigned)(offset>>PAGE_SHIFT_4K),
+                       hash = hashes(cd, (uint32_t)(offset>>PAGE_SHIFT_4K),
+                                     hashtype->cs_size,
                                      lower_bound, upper_bound);
                        if (hash != NULL) {
-                               bcopy(hash, expected_hash,
-                                     sizeof (expected_hash));
+                               bcopy(hash, expected_hash, sizeof(expected_hash));
                                found_hash = TRUE;
-                               if (blob->csb_sigpup && cs_debug)
-                                       printf("sigpup hash\n");
                        }
 
                        break;
-               } else {
-                       if (blob->csb_sigpup && cs_debug)
-                               printf("sig pup had no valid CD\n");
-
                }
        }
 
@@ -3567,15 +3501,15 @@ cs_validate_page(
                        size = (size_t) (codeLimit & PAGE_MASK_4K);
                        *tainted |= CS_VALIDATE_NX;
                }
-               /* compute the actual page's SHA1 hash */
-               SHA1Init(&sha1ctxt);
-               SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
-               SHA1Final(actual_hash, &sha1ctxt);
+
+               hashtype->cs_init(&mdctx);
+               hashtype->cs_update(&mdctx, data, size);
+               hashtype->cs_final(actual_hash, &mdctx);
 
                asha1 = (const uint32_t *) actual_hash;
                esha1 = (const uint32_t *) expected_hash;
 
-               if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
+               if (bcmp(expected_hash, actual_hash, hashtype->cs_cd_size) != 0) {
                        if (cs_debug) {
                                printf("CODE SIGNING: cs_validate_page: "
                                       "mobj %p off 0x%llx size 0x%lx: "
@@ -3633,7 +3567,7 @@ ubc_cs_getcdhash(
                ret = EBADEXEC; /* XXX any better error ? */
        } else {
                /* get the SHA1 hash of that blob */
-               bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
+               bcopy(blob->csb_cdhash, cdhash, sizeof (blob->csb_cdhash));
                ret = 0;
        }
 
index 9c5801dde34f38b7ed0bf5b7623752aa01b60f01..7fde6ee3e75aa242c3a0fab1839b14b098ead5b9 100644 (file)
@@ -104,6 +104,19 @@ decl_lck_mtx_data(static, domain_timeout_mtx);
 
 static u_int64_t _net_uptime;
 
+#if (DEVELOPMENT || DEBUG)
+
+SYSCTL_DECL(_kern_ipc);
+
+static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS;
+
+SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains,
+       CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED,
+       0, 0,
+       sysctl_do_drain_domains, "I", "force manual drain domains");
+
+#endif /* DEVELOPMENT || DEBUG */
+
 static void
 pr_init_old(struct protosw *pp, struct domain *dp)
 {
@@ -1052,3 +1065,24 @@ domain_unguard_release(domain_unguard_t unguard)
        else
                lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
 }
+
+#if (DEVELOPMENT || DEBUG)
+static int
+sysctl_do_drain_domains SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       int dummy = 0;
+
+       error = sysctl_handle_int(oidp, &dummy, 0, req);        
+       if (error || req->newptr == USER_ADDR_NULL)
+               return (error);
+
+       net_drain_domains();
+
+       return (0);
+}
+
+#endif /* DEVELOPMENT || DEBUG */
\ No newline at end of file
index d5f73128e1cabc77cb7b6dac3596c080c94bee0b..be9cded69bf88b68ddf184698da6e0466d1194bf 100644 (file)
  *                     |                               |
  *                     v                               |
  *                 [freelist] ----------->>------------+
- *      (objects never get purged to VM)
+ *      (objects get purged to VM only on demand)
  *
  * b. Composite object:
  *
  *
  * The mclaudit[] array is allocated at initialization time, but its contents
  * get populated when the corresponding cluster is created.  Because a page
- * can be turned into NMBPBG number of mbufs, we preserve enough space for the
+ * can be turned into NMBPG number of mbufs, we preserve enough space for the
  * mbufs so that there is a 1-to-1 mapping between them.  A page that never
  * gets (or has not yet) turned into mbufs will use only cl_audit[0] with the
  * remaining entries unused.  For 16KB cluster, only one entry from the first
@@ -402,22 +402,22 @@ typedef struct mcl_slab {
  * whenever a new piece of memory mapped in from the VM crosses the 1MB
  * boundary.
  */
-#define        NSLABSPMB       ((1 << MBSHIFT) >> PGSHIFT)     /* 256 slabs/grp */
+#define        NSLABSPMB       ((1 << MBSHIFT) >> PAGE_SHIFT)
 
 typedef struct mcl_slabg {
-       mcl_slab_t      slg_slab[NSLABSPMB];    /* group of slabs */
+       mcl_slab_t      *slg_slab;      /* group of slabs */
 } mcl_slabg_t;
 
 /*
  * Number of slabs needed to control a 16KB cluster object.
  */
-#define        NSLABSP16KB     (M16KCLBYTES >> PGSHIFT)
+#define        NSLABSP16KB     (M16KCLBYTES >> PAGE_SHIFT)
 
 /*
  * Per-cluster audit structure.
  */
 typedef struct {
-       mcache_audit_t  *cl_audit[NMBPBG];      /* array of audits */
+       mcache_audit_t  **cl_audit;     /* array of audits */
 } mcl_audit_t;
 
 typedef struct {
@@ -476,8 +476,8 @@ static unsigned int slabgrp;        /* # of entries in slabs table */
 int nclusters;                 /* # of clusters for non-jumbo (legacy) sizes */
 int njcl;                      /* # of clusters for jumbo sizes */
 int njclbytes;                 /* size of a jumbo cluster */
-union mbigcluster *mbutl;      /* first mapped cluster address */
-union mbigcluster *embutl;     /* ending virtual address of mclusters */
+unsigned char *mbutl;          /* first mapped cluster address */
+unsigned char *embutl;         /* ending virtual address of mclusters */
 int _max_linkhdr;              /* largest link-level header */
 int _max_protohdr;             /* largest protocol header */
 int max_hdr;                   /* largest link+protocol header */
@@ -788,7 +788,8 @@ static boolean_t mbuf_report_usage(mbuf_class_t);
 }
 
 #define        MBUF_IN_MAP(addr)                                               \
-       ((void *)(addr) >= (void *)mbutl && (void *)(addr) < (void *)embutl)
+       ((unsigned char *)(addr) >= mbutl &&                            \
+       (unsigned char *)(addr) < embutl)
 
 #define        MRANGE(addr) {                                                  \
        if (!MBUF_IN_MAP(addr))                                         \
@@ -801,21 +802,28 @@ static boolean_t mbuf_report_usage(mbuf_class_t);
 #define        MTOD(m, t)      ((t)((m)->m_data))
 
 /*
- * Macros to obtain (4KB) cluster index and base cluster address.
+ * Macros to obtain page index given a base cluster address
  */
-
-#define        MTOBG(x)        (((char *)(x) - (char *)mbutl) >> MBIGCLSHIFT)
-#define        BGTOM(x)        ((union mbigcluster *)(mbutl + (x)))
+#define        MTOPG(x)        (((unsigned char *)x - mbutl) >> PAGE_SHIFT)
+#define PGTOM(x)       (mbutl + (x << PAGE_SHIFT))
 
 /*
  * Macro to find the mbuf index relative to a base.
  */
-#define        MCLIDX(c, m)    (((char *)(m) - (char *)(c)) >> MSIZESHIFT)
+#define        MBPAGEIDX(c, m) \
+       (((unsigned char *)(m) - (unsigned char *)(c)) >> MSIZESHIFT)
 
 /*
  * Same thing for 2KB cluster index.
  */
-#define        CLBGIDX(c, m)   (((char *)(m) - (char *)(c)) >> MCLSHIFT)
+#define        CLPAGEIDX(c, m) \
+       (((unsigned char *)(m) - (unsigned char *)(c)) >> MCLSHIFT)
+
+/*
+ * Macro to find 4KB cluster index relative to a base
+ */
+#define BCLPAGEIDX(c, m) \
+       (((unsigned char *)(m) - (unsigned char *)(c)) >> MBIGCLSHIFT)
 
 /*
  * Macros used during mbuf and cluster initialization.
@@ -1178,7 +1186,7 @@ static void
 mbuf_table_init(void)
 {
        unsigned int b, c, s;
-       int m;
+       int m, config_mbuf_jumbo = 0;
 
        MALLOC(omb_stat, struct omb_stat *, OMB_STAT_SIZE(NELEM(mbuf_table)),
            M_TEMP, M_WAITOK | M_ZERO);
@@ -1193,38 +1201,44 @@ mbuf_table_init(void)
                mbuf_table[m].mtbl_stats = &mb_stat->mbs_class[m];
 
 #if CONFIG_MBUF_JUMBO
-       /*
-        * Set aside 1/3 of the mbuf cluster map for jumbo clusters; we do
-        * this only on platforms where jumbo cluster pool is enabled.
-        */
-       njcl = nmbclusters / 3;
-       njclbytes = M16KCLBYTES;
+       config_mbuf_jumbo = 1;
 #endif /* CONFIG_MBUF_JUMBO */
 
+       if (config_mbuf_jumbo == 1 || PAGE_SIZE == M16KCLBYTES) {
+               /*
+                * Set aside 1/3 of the mbuf cluster map for jumbo
+                * clusters; we do this only on platforms where jumbo
+                * cluster pool is enabled.
+                */
+               njcl = nmbclusters / 3;
+               njclbytes = M16KCLBYTES;
+       }
+
        /*
         * nclusters holds both the 2KB and 4KB pools, so ensure it's
         * a multiple of 4KB clusters.
         */
-       nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPBG);
+       nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPG);
        if (njcl > 0) {
                /*
                 * Each jumbo cluster takes 8 2KB clusters, so make
                 * sure that the pool size is evenly divisible by 8;
                 * njcl is in 2KB unit, hence treated as such.
                 */
-               njcl = P2ROUNDDOWN(nmbclusters - nclusters, 8);
+               njcl = P2ROUNDDOWN(nmbclusters - nclusters, NCLPJCL);
 
                /* Update nclusters with rounded down value of njcl */
-               nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPBG);
+               nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPG);
        }
 
        /*
-        * njcl is valid only on platforms with 16KB jumbo clusters, where
-        * it is configured to 1/3 of the pool size.  On these platforms,
-        * the remaining is used for 2KB and 4KB clusters.  On platforms
-        * without 16KB jumbo clusters, the entire pool is used for both
-        * 2KB and 4KB clusters.  A 4KB cluster can either be splitted into
-        * 16 mbufs, or into 2 2KB clusters.
+        * njcl is valid only on platforms with 16KB jumbo clusters or
+        * with 16KB pages, where it is configured to 1/3 of the pool
+        * size.  On these platforms, the remaining is used for 2KB
+        * and 4KB clusters.  On platforms without 16KB jumbo clusters,
+        * the entire pool is used for both 2KB and 4KB clusters.  A 4KB
+        * cluster can either be splitted into 16 mbufs, or into 2 2KB
+        * clusters.
         *
         *  +---+---+------------ ... -----------+------- ... -------+
         *  | c | b |              s             |        njcl       |
@@ -1233,8 +1247,8 @@ mbuf_table_init(void)
         * 1/32th of the shared region is reserved for pure 2KB and 4KB
         * clusters (1/64th each.)
         */
-       c = P2ROUNDDOWN((nclusters >> 6), 2);           /* in 2KB unit */
-       b = P2ROUNDDOWN((nclusters >> (6 + NCLPBGSHIFT)), 2); /* in 4KB unit */
+       c = P2ROUNDDOWN((nclusters >> 6), NCLPG);       /* in 2KB unit */
+       b = P2ROUNDDOWN((nclusters >> (6 + NCLPBGSHIFT)), NBCLPG); /* in 4KB unit */
        s = nclusters - (c + (b << NCLPBGSHIFT));       /* in 2KB unit */
 
        /*
@@ -1468,7 +1482,7 @@ mbinit(void)
         * mcl_slab_g_t units, each one representing a MB of memory.
         */
        maxslabgrp =
-           (P2ROUNDUP(nmbclusters, (MBSIZE >> 11)) << MCLSHIFT) >> MBSHIFT;
+           (P2ROUNDUP(nmbclusters, (MBSIZE >> MCLSHIFT)) << MCLSHIFT) >> MBSHIFT;
        MALLOC(slabstbl, mcl_slabg_t **, maxslabgrp * sizeof (mcl_slabg_t *),
            M_TEMP, M_WAITOK | M_ZERO);
        VERIFY(slabstbl != NULL);
@@ -1476,17 +1490,25 @@ mbinit(void)
        /*
         * Allocate audit structures, if needed:
         *
-        *      maxclaudit = (maxslabgrp * 1024 * 1024) / 4096
+        *      maxclaudit = (maxslabgrp * 1024 * 1024) / PAGE_SIZE
         *
         * This yields mcl_audit_t units, each one representing a page.
         */
        PE_parse_boot_argn("mbuf_debug", &mbuf_debug, sizeof (mbuf_debug));
        mbuf_debug |= mcache_getflags();
        if (mbuf_debug & MCF_DEBUG) {
-               maxclaudit = ((maxslabgrp << MBSHIFT) >> PGSHIFT);
+               int l;
+               mcl_audit_t *mclad;
+               maxclaudit = ((maxslabgrp << MBSHIFT) >> PAGE_SHIFT);
                MALLOC(mclaudit, mcl_audit_t *, maxclaudit * sizeof (*mclaudit),
                    M_TEMP, M_WAITOK | M_ZERO);
                VERIFY(mclaudit != NULL);
+               for (l = 0, mclad = mclaudit; l < maxclaudit; l++) {
+                       MALLOC(mclad[l].cl_audit, mcache_audit_t **,
+                           NMBPG * sizeof(mcache_audit_t *),
+                           M_TEMP, M_WAITOK | M_ZERO);
+                       VERIFY(mclad[l].cl_audit != NULL);
+               }
 
                mcl_audit_con_cache = mcache_create("mcl_audit_contents",
                    AUDIT_CONTENTS_SIZE, sizeof (u_int64_t), 0, MCR_SLEEP);
@@ -1507,7 +1529,7 @@ mbinit(void)
        mleak_activate();
 
        /* Calculate the number of pages assigned to the cluster pool */
-       mcl_pages = (nmbclusters * MCLBYTES) / CLBYTES;
+       mcl_pages = (nmbclusters << MCLSHIFT) / PAGE_SIZE;
        MALLOC(mcl_paddr, ppnum_t *, mcl_pages * sizeof (ppnum_t),
            M_TEMP, M_WAITOK);
        VERIFY(mcl_paddr != NULL);
@@ -1516,9 +1538,8 @@ mbinit(void)
        mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages);
        bzero((char *)mcl_paddr, mcl_pages * sizeof (ppnum_t));
 
-       embutl = (union mbigcluster *)
-           ((void *)((unsigned char *)mbutl + (nmbclusters * MCLBYTES)));
-       VERIFY((((char *)embutl - (char *)mbutl) % MBIGCLBYTES) == 0);
+       embutl = (mbutl + (nmbclusters * MCLBYTES));
+       VERIFY(((embutl - mbutl) % MBIGCLBYTES) == 0);
 
        /* Prime up the freelist */
        PE_parse_boot_argn("initmcl", &initmcl, sizeof (initmcl));
@@ -1659,8 +1680,6 @@ slab_alloc(mbuf_class_t class, int wait)
 
        lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
-       VERIFY(class != MC_16KCL || njcl > 0);
-
        /* This should always be NULL for us */
        VERIFY(m_cobjlist(class) == NULL);
 
@@ -1671,7 +1690,8 @@ slab_alloc(mbuf_class_t class, int wait)
         * more than one buffer chunks (e.g. mbuf slabs).  For other
         * slabs, this probably doesn't make much of a difference.
         */
-       if ((class == MC_MBUF || class == MC_CL) && (wait & MCR_COMP))
+       if ((class == MC_MBUF || class == MC_CL || class == MC_BIGCL)
+           && (wait & MCR_COMP))
                sp = (mcl_slab_t *)TAILQ_LAST(&m_slablist(class), mcl_slhead);
        else
                sp = (mcl_slab_t *)TAILQ_FIRST(&m_slablist(class));
@@ -1688,16 +1708,12 @@ slab_alloc(mbuf_class_t class, int wait)
            (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED);
        buf = sp->sl_head;
        VERIFY(slab_inrange(sp, buf) && sp == slab_get(buf));
+       sp->sl_head = buf->obj_next;
+       /* Increment slab reference */
+       sp->sl_refcnt++;
+
+       VERIFY(sp->sl_head != NULL || sp->sl_refcnt == sp->sl_chunks);
 
-       if (class == MC_MBUF) {
-               sp->sl_head = buf->obj_next;
-               VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NMBPBG - 1));
-       } else if (class == MC_CL) {
-               sp->sl_head = buf->obj_next;
-               VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NCLPBG - 1));
-       } else {
-               sp->sl_head = NULL;
-       }
        if (sp->sl_head != NULL && !slab_inrange(sp, sp->sl_head)) {
                slab_nextptr_panic(sp, sp->sl_head);
                /* In case sl_head is in the map but not in the slab */
@@ -1705,9 +1721,6 @@ slab_alloc(mbuf_class_t class, int wait)
                /* NOTREACHED */
        }
 
-       /* Increment slab reference */
-       sp->sl_refcnt++;
-
        if (mclaudit != NULL) {
                mcache_audit_t *mca = mcl_audit_buf2mca(class, buf);
                mca->mca_uflags = 0;
@@ -1719,20 +1732,20 @@ slab_alloc(mbuf_class_t class, int wait)
        if (class == MC_CL) {
                mbstat.m_clfree = (--m_infree(MC_CL)) + m_infree(MC_MBUF_CL);
                /*
-                * A 2K cluster slab can have at most NCLPBG references.
+                * A 2K cluster slab can have at most NCLPG references.
                 */
-               VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NCLPBG &&
-                   sp->sl_chunks == NCLPBG &&
-                   sp->sl_len == m_maxsize(MC_BIGCL));
-               VERIFY(sp->sl_refcnt < NCLPBG || sp->sl_head == NULL);
+               VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NCLPG &&
+                   sp->sl_chunks == NCLPG && sp->sl_len == PAGE_SIZE);
+               VERIFY(sp->sl_refcnt < NCLPG || sp->sl_head == NULL);
        } else if (class == MC_BIGCL) {
                mbstat.m_bigclfree = (--m_infree(MC_BIGCL)) +
                    m_infree(MC_MBUF_BIGCL);
                /*
-                * A 4K cluster slab can have at most 1 reference.
+                * A 4K cluster slab can have NBCLPG references.
                 */
-               VERIFY(sp->sl_refcnt == 1 && sp->sl_chunks == 1 &&
-                   sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
+               VERIFY(sp->sl_refcnt >= 1 && sp->sl_chunks == NBCLPG &&
+                   sp->sl_len == PAGE_SIZE && 
+                   (sp->sl_refcnt < NBCLPG || sp->sl_head == NULL));
        } else if (class == MC_16KCL) {
                mcl_slab_t *nsp;
                int k;
@@ -1770,18 +1783,19 @@ slab_alloc(mbuf_class_t class, int wait)
                 * Since we have incremented the reference count above,
                 * an mbuf slab (formerly a 4KB cluster slab that was cut
                 * up into mbufs) must have a reference count between 1
-                * and NMBPBG at this point.
+                * and NMBPG at this point.
                 */
-               VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NMBPBG &&
-                   sp->sl_chunks == NMBPBG &&
-                   sp->sl_len == m_maxsize(MC_BIGCL));
-               VERIFY(sp->sl_refcnt < NMBPBG || sp->sl_head == NULL);
+               VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NMBPG &&
+                   sp->sl_chunks == NMBPG &&
+                   sp->sl_len == PAGE_SIZE);
+               VERIFY(sp->sl_refcnt < NMBPG || sp->sl_head == NULL);
        }
 
        /* If empty, remove this slab from the class's freelist */
        if (sp->sl_head == NULL) {
-               VERIFY(class != MC_MBUF || sp->sl_refcnt == NMBPBG);
-               VERIFY(class != MC_CL || sp->sl_refcnt == NCLPBG);
+               VERIFY(class != MC_MBUF || sp->sl_refcnt == NMBPG);
+               VERIFY(class != MC_CL || sp->sl_refcnt == NCLPG);
+               VERIFY(class != MC_BIGCL || sp->sl_refcnt == NBCLPG);
                slab_remove(sp, class);
        }
 
@@ -1795,11 +1809,14 @@ static void
 slab_free(mbuf_class_t class, mcache_obj_t *buf)
 {
        mcl_slab_t *sp;
+       boolean_t reinit_supercl = false;
+       mbuf_class_t super_class;
 
        lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
        VERIFY(class != MC_16KCL || njcl > 0);
        VERIFY(buf->obj_next == NULL);
+
        sp = slab_get(buf);
        VERIFY(sp->sl_class == class && slab_inrange(sp, buf) &&
            (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED);
@@ -1813,20 +1830,17 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
                 * A slab that has been splitted for 2KB clusters can have
                 * at most 1 outstanding reference at this point.
                 */
-               VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NCLPBG - 1) &&
-                   sp->sl_chunks == NCLPBG &&
-                   sp->sl_len == m_maxsize(MC_BIGCL));
-               VERIFY(sp->sl_refcnt < (NCLPBG - 1) ||
+               VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NCLPG - 1) &&
+                   sp->sl_chunks == NCLPG && sp->sl_len == PAGE_SIZE);
+               VERIFY(sp->sl_refcnt < (NCLPG - 1) ||
                    (slab_is_detached(sp) && sp->sl_head == NULL));
        } else if (class == MC_BIGCL) {
-               VERIFY(IS_P2ALIGNED(buf, MCLBYTES));
-               /*
-                * A 4KB cluster slab can have at most 1 reference
-                * which must be 0 at this point.
-                */
-               VERIFY(sp->sl_refcnt == 0 && sp->sl_chunks == 1 &&
-                   sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
-               VERIFY(slab_is_detached(sp));
+               VERIFY(IS_P2ALIGNED(buf, MBIGCLBYTES));
+
+               /* A 4KB cluster slab can have NBCLPG references at most */
+               VERIFY(sp->sl_refcnt >= 0 && sp->sl_chunks == NBCLPG);
+               VERIFY(sp->sl_refcnt < (NBCLPG - 1) ||
+                   (slab_is_detached(sp) && sp->sl_head == NULL));
        } else if (class == MC_16KCL) {
                mcl_slab_t *nsp;
                int k;
@@ -1834,7 +1848,7 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
                 * A 16KB cluster takes NSLABSP16KB slabs, all must
                 * now have 0 reference.
                 */
-               VERIFY(IS_P2ALIGNED(buf, MBIGCLBYTES));
+               VERIFY(IS_P2ALIGNED(buf, PAGE_SIZE));
                VERIFY(sp->sl_refcnt == 0 && sp->sl_chunks == 1 &&
                    sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
                VERIFY(slab_is_detached(sp));
@@ -1852,15 +1866,17 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
                }
        } else {
                /*
-                * A slab that has been splitted for mbufs has at most NMBPBG
-                * reference counts.  Since we have decremented one reference
-                * above, it must now be between 0 and NMBPBG-1.
+                * A slab that has been splitted for mbufs has at most
+                * NMBPG reference counts.  Since we have decremented
+                * one reference above, it must now be between 0 and
+                * NMBPG-1.
                 */
                VERIFY(class == MC_MBUF);
-               VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NMBPBG - 1) &&
-                   sp->sl_chunks == NMBPBG &&
-                   sp->sl_len == m_maxsize(MC_BIGCL));
-               VERIFY(sp->sl_refcnt < (NMBPBG - 1) ||
+               VERIFY(sp->sl_refcnt >= 0 &&
+                   sp->sl_refcnt <= (NMBPG - 1) &&
+                   sp->sl_chunks == NMBPG &&
+                   sp->sl_len == PAGE_SIZE);
+               VERIFY(sp->sl_refcnt < (NMBPG - 1) ||
                    (slab_is_detached(sp) && sp->sl_head == NULL));
        }
 
@@ -1872,7 +1888,8 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
        if (mclaudit != NULL) {
                mcache_audit_t *mca = mcl_audit_buf2mca(class, buf);
                if (mclverify) {
-                       mcache_audit_free_verify(mca, buf, 0, m_maxsize(class));
+                       mcache_audit_free_verify(mca, buf, 0,
+                           m_maxsize(class));
                }
                mca->mca_uflags &= ~MB_SCVALID;
        }
@@ -1883,6 +1900,7 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
        } else if (class == MC_BIGCL) {
                mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) +
                    m_infree(MC_MBUF_BIGCL);
+               buf->obj_next = sp->sl_head;
        } else if (class == MC_16KCL) {
                ++m_infree(MC_16KCL);
        } else {
@@ -1892,24 +1910,25 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
        sp->sl_head = buf;
 
        /*
-        * If a slab has been splitted to either one which holds 2KB clusters,
-        * or one which holds mbufs, turn it back to one which holds a 4KB
-        * cluster.
+        * If a slab has been split to either one which holds 2KB clusters,
+        * or one which holds mbufs, turn it back to one which holds a
+        * 4 or 16 KB cluster depending on the page size.
         */
+       if (m_maxsize(MC_BIGCL) == PAGE_SIZE) {
+               super_class = MC_BIGCL;
+       } else {
+               VERIFY(PAGE_SIZE == m_maxsize(MC_16KCL));
+               super_class = MC_16KCL;
+       }
        if (class == MC_MBUF && sp->sl_refcnt == 0 &&
-           m_total(class) > m_minlimit(class) &&
-           m_total(MC_BIGCL) < m_maxlimit(MC_BIGCL)) {
-               int i = NMBPBG;
+           m_total(class) >= (m_minlimit(class) + NMBPG) &&
+           m_total(super_class) < m_maxlimit(super_class)) {
+               int i = NMBPG;
 
-               m_total(MC_BIGCL)++;
-               mbstat.m_bigclusters = m_total(MC_BIGCL);
-               m_total(MC_MBUF) -= NMBPBG;
+               m_total(MC_MBUF) -= NMBPG;
                mbstat.m_mbufs = m_total(MC_MBUF);
-               m_infree(MC_MBUF) -= NMBPBG;
-               mtype_stat_add(MT_FREE, -((unsigned)NMBPBG));
-
-               VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL));
-               VERIFY(m_total(MC_MBUF) >= m_minlimit(MC_MBUF));
+               m_infree(MC_MBUF) -= NMBPG;
+               mtype_stat_add(MT_FREE, -((unsigned)NMBPG));
 
                while (i--) {
                        struct mbuf *m = sp->sl_head;
@@ -1917,37 +1936,15 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
                        sp->sl_head = m->m_next;
                        m->m_next = NULL;
                }
-               VERIFY(sp->sl_head == NULL);
-
-               /* Remove the slab from the mbuf class's slab list */
-               slab_remove(sp, class);
-
-               /* Reinitialize it as a 4KB cluster slab */
-               slab_init(sp, MC_BIGCL, sp->sl_flags, sp->sl_base, sp->sl_base,
-                   sp->sl_len, 0, 1);
-
-               if (mclverify) {
-                       mcache_set_pattern(MCACHE_FREE_PATTERN,
-                           (caddr_t)sp->sl_head, m_maxsize(MC_BIGCL));
-               }
-               mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) +
-                   m_infree(MC_MBUF_BIGCL);
-
-               VERIFY(slab_is_detached(sp));
-               /* And finally switch class */
-               class = MC_BIGCL;
+               reinit_supercl = true;
        } else if (class == MC_CL && sp->sl_refcnt == 0 &&
-           m_total(class) > m_minlimit(class) &&
-           m_total(MC_BIGCL) < m_maxlimit(MC_BIGCL)) {
-               int i = NCLPBG;
+           m_total(class) >=  (m_minlimit(class) + NCLPG) &&
+           m_total(super_class) < m_maxlimit(super_class)) {
+               int i = NCLPG;
 
-               m_total(MC_BIGCL)++;
-               mbstat.m_bigclusters = m_total(MC_BIGCL);
-               m_total(MC_CL) -= NCLPBG;
+               m_total(MC_CL) -= NCLPG;
                mbstat.m_clusters = m_total(MC_CL);
-               m_infree(MC_CL) -= NCLPBG;
-               VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL));
-               VERIFY(m_total(MC_CL) >= m_minlimit(MC_CL));
+               m_infree(MC_CL) -= NCLPG;
 
                while (i--) {
                        union mcluster *c = sp->sl_head;
@@ -1955,25 +1952,56 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
                        sp->sl_head = c->mcl_next;
                        c->mcl_next = NULL;
                }
-               VERIFY(sp->sl_head == NULL);
+               reinit_supercl = true;
+       } else if (class == MC_BIGCL && super_class != MC_BIGCL &&
+           sp->sl_refcnt == 0 &&
+           m_total(class) >= (m_minlimit(class) + NBCLPG) &&
+           m_total(super_class) < m_maxlimit(super_class)) {
+               int i = NBCLPG;
+
+               VERIFY(super_class == MC_16KCL);
+               m_total(MC_BIGCL) -= NBCLPG;
+               mbstat.m_bigclusters = m_total(MC_BIGCL);
+               m_infree(MC_BIGCL) -= NBCLPG;
 
-               /* Remove the slab from the 2KB cluster class's slab list */
+               while (i--) {
+                       union mbigcluster *bc = sp->sl_head;
+                       VERIFY(bc != NULL);
+                       sp->sl_head = bc->mbc_next;
+                       bc->mbc_next = NULL;
+               }
+               reinit_supercl = true;
+       }
+
+       if (reinit_supercl) {
+               VERIFY(sp->sl_head == NULL);
+               VERIFY(m_total(class) >= m_minlimit(class));
                slab_remove(sp, class);
 
-               /* Reinitialize it as a 4KB cluster slab */
-               slab_init(sp, MC_BIGCL, sp->sl_flags, sp->sl_base, sp->sl_base,
-                   sp->sl_len, 0, 1);
+               /* Reinitialize it as a cluster for the super class */
+               m_total(super_class)++;
+               m_infree(super_class)++;
+               VERIFY(sp->sl_flags == (SLF_MAPPED | SLF_DETACHED) &&
+                   sp->sl_len == PAGE_SIZE && sp->sl_refcnt == 0);
 
-               if (mclverify) {
+               slab_init(sp, super_class, SLF_MAPPED, sp->sl_base,
+                   sp->sl_base, PAGE_SIZE, 0, 1);
+               if (mclverify)
                        mcache_set_pattern(MCACHE_FREE_PATTERN,
-                           (caddr_t)sp->sl_head, m_maxsize(MC_BIGCL));
+                           (caddr_t)sp->sl_base, sp->sl_len);
+               ((mcache_obj_t *)(sp->sl_base))->obj_next = NULL;
+
+               if (super_class == MC_BIGCL) {
+                       mbstat.m_bigclusters = m_total(MC_BIGCL);
+                       mbstat.m_bigclfree = m_infree(MC_BIGCL) +
+                           m_infree(MC_MBUF_BIGCL);
                }
-               mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) +
-                   m_infree(MC_MBUF_BIGCL);
 
                VERIFY(slab_is_detached(sp));
+               VERIFY(m_total(super_class) <= m_maxlimit(super_class));
+
                /* And finally switch class */
-               class = MC_BIGCL;
+               class = super_class;
        }
 
        /* Reinsert the slab to the class's slab list */
@@ -2013,7 +2041,7 @@ mbuf_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait)
                                 * it later when we run out of elements.
                                 */
                                if (!mbuf_cached_above(class, wait) &&
-                                   m_infree(class) < m_total(class) >> 5) {
+                                   m_infree(class) < (m_total(class) >> 5)) {
                                        (void) freelist_populate(class, 1,
                                            M_DONTWAIT);
                                }
@@ -2203,9 +2231,10 @@ cslab_alloc(mbuf_class_t class, mcache_obj_t ***plist, unsigned int num)
 
                if (class == MC_MBUF_CL) {
                        VERIFY(clsp->sl_refcnt >= 1 &&
-                           clsp->sl_refcnt <= NCLPBG);
+                           clsp->sl_refcnt <= NCLPG);
                } else {
-                       VERIFY(clsp->sl_refcnt == 1);
+                       VERIFY(clsp->sl_refcnt >= 1 &&
+                           clsp->sl_refcnt <= NBCLPG);
                }
 
                if (class == MC_MBUF_16KCL) {
@@ -2290,9 +2319,10 @@ cslab_free(mbuf_class_t class, mcache_obj_t *list, int purged)
                VERIFY(MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms));
                if (cl_class == MC_CL) {
                        VERIFY(clsp->sl_refcnt >= 1 &&
-                           clsp->sl_refcnt <= NCLPBG);
+                           clsp->sl_refcnt <= NCLPG);
                } else {
-                       VERIFY(clsp->sl_refcnt == 1);
+                       VERIFY(clsp->sl_refcnt >= 1 && 
+                           clsp->sl_refcnt <= NBCLPG);
                }
                if (cl_class == MC_16KCL) {
                        int k;
@@ -2486,7 +2516,8 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed,
                        lck_mtx_lock(mbuf_mlock);
                        mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m);
                        ms = MCA_SAVED_MBUF_PTR(mca);
-                       cl_mca = mcl_audit_buf2mca(MC_CL, (mcache_obj_t *)cl);
+                       cl_mca = mcl_audit_buf2mca(cl_class,
+                           (mcache_obj_t *)cl);
 
                        /*
                         * Pair them up.  Note that this is done at the time
@@ -2601,14 +2632,21 @@ mbuf_cslab_free(void *arg, mcache_obj_t *list, int purged)
 static void
 mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 {
-       mbuf_class_t class = (mbuf_class_t)arg;
+       mbuf_class_t class = (mbuf_class_t)arg, cl_class;
        mcache_audit_t *mca;
        struct mbuf *m, *ms;
        mcl_slab_t *clsp, *nsp;
-       size_t size;
+       size_t cl_size;
        void *cl;
 
        ASSERT(MBUF_CLASS_VALID(class) && MBUF_CLASS_COMPOSITE(class));
+       if (class == MC_MBUF_CL)
+               cl_class = MC_CL;
+       else if (class == MC_MBUF_BIGCL)
+               cl_class = MC_BIGCL;
+       else
+               cl_class = MC_16KCL;
+       cl_size = m_maxsize(cl_class);
 
        while ((m = ms = (struct mbuf *)list) != NULL) {
                lck_mtx_lock(mbuf_mlock);
@@ -2638,9 +2676,10 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
                VERIFY(MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms));
                if (class == MC_MBUF_CL)
                        VERIFY(clsp->sl_refcnt >= 1 &&
-                           clsp->sl_refcnt <= NCLPBG);
+                           clsp->sl_refcnt <= NCLPG);
                else
-                       VERIFY(clsp->sl_refcnt == 1);
+                       VERIFY(clsp->sl_refcnt >= 1 &&
+                           clsp->sl_refcnt <= NBCLPG);
 
                if (class == MC_MBUF_16KCL) {
                        int k;
@@ -2652,14 +2691,9 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
                        }
                }
 
-               mca = mcl_audit_buf2mca(MC_CL, cl);
-               if (class == MC_MBUF_CL)
-                       size = m_maxsize(MC_CL);
-               else if (class == MC_MBUF_BIGCL)
-                       size = m_maxsize(MC_BIGCL);
-               else
-                       size = m_maxsize(MC_16KCL);
-               mcl_audit_cluster(mca, cl, size, alloc, FALSE);
+
+               mca = mcl_audit_buf2mca(cl_class, cl);
+               mcl_audit_cluster(mca, cl, cl_size, alloc, FALSE);
                if (mcltrace)
                        mcache_buffer_log(mca, cl, m_cache(class), &mb_start);
 
@@ -2679,17 +2713,29 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 static int
 m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 {
-       int i;
+       int i, count = 0;
        vm_size_t size = 0;
-       int numpages = 0, large_buffer = (bufsize == m_maxsize(MC_16KCL));
+       int numpages = 0, large_buffer;
        vm_offset_t page = 0;
        mcache_audit_t *mca_list = NULL;
        mcache_obj_t *con_list = NULL;
        mcl_slab_t *sp;
+       mbuf_class_t class;
 
+       /* Set if a buffer allocation needs allocation of multiple pages */
+       large_buffer = ((bufsize == m_maxsize(MC_16KCL)) &&
+               PAGE_SIZE < M16KCLBYTES);
        VERIFY(bufsize == m_maxsize(MC_BIGCL) ||
            bufsize == m_maxsize(MC_16KCL));
 
+       VERIFY((bufsize == PAGE_SIZE) ||
+           (bufsize > PAGE_SIZE && bufsize == m_maxsize(MC_16KCL)));
+
+       if (bufsize == m_size(MC_BIGCL))
+               class = MC_BIGCL;
+       else
+               class = MC_16KCL;
+
        lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
        /*
@@ -2733,8 +2779,8 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 
        if (page == 0) {
                if (bufsize == m_maxsize(MC_BIGCL)) {
-                       /* Try for 1 page if failed, only 4KB request */
-                       size = NBPG;
+                       /* Try for 1 page if failed */
+                       size = PAGE_SIZE;
                        page = kmem_mb_alloc(mb_map, size, 0);
                }
 
@@ -2744,8 +2790,8 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
                }
        }
 
-       VERIFY(IS_P2ALIGNED(page, NBPG));
-       numpages = size / NBPG;
+       VERIFY(IS_P2ALIGNED(page, PAGE_SIZE));
+       numpages = size / PAGE_SIZE;
 
        /* If auditing is enabled, allocate the audit structures now */
        if (mclaudit != NULL) {
@@ -2754,19 +2800,23 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
                /*
                 * Yes, I realize this is a waste of memory for clusters
                 * that never get transformed into mbufs, as we may end
-                * up with NMBPBG-1 unused audit structures per cluster.
+                * up with NMBPG-1 unused audit structures per cluster.
                 * But doing so tremendously simplifies the allocation
                 * strategy, since at this point we are not holding the
                 * mbuf lock and the caller is okay to be blocked.
                 */
-               if (bufsize == m_maxsize(MC_BIGCL)) {
-                       needed = numpages * NMBPBG;
+               if (bufsize == PAGE_SIZE) {
+                       needed = numpages * NMBPG;
 
                        i = mcache_alloc_ext(mcl_audit_con_cache,
                            &con_list, needed, MCR_SLEEP);
 
                        VERIFY(con_list != NULL && i == needed);
                } else {
+                       /*
+                        * if multiple 4K pages are being used for a
+                        * 16K cluster 
+                        */ 
                        needed = numpages / NSLABSP16KB;
                }
 
@@ -2778,19 +2828,19 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 
        lck_mtx_lock(mbuf_mlock);
 
-       for (i = 0; i < numpages; i++, page += NBPG) {
-               ppnum_t offset = ((char *)page - (char *)mbutl) / NBPG;
+       for (i = 0; i < numpages; i++, page += PAGE_SIZE) {
+               ppnum_t offset =
+                   ((unsigned char *)page - mbutl) >> PAGE_SHIFT;
                ppnum_t new_page = pmap_find_phys(kernel_pmap, page);
-               mbuf_class_t class = MC_BIGCL;
 
                /*
-                * If there is a mapper the appropriate I/O page is returned;
-                * zero out the page to discard its past contents to prevent
-                * exposing leftover kernel memory.
+                * If there is a mapper the appropriate I/O page is
+                * returned; zero out the page to discard its past
+                * contents to prevent exposing leftover kernel memory.
                 */
                VERIFY(offset < mcl_pages);
                if (mcl_paddr_base != 0) {
-                       bzero((void *)(uintptr_t) page, page_size);
+                       bzero((void *)(uintptr_t) page, PAGE_SIZE);
                        new_page = IOMapperInsertPage(mcl_paddr_base,
                            offset, new_page);
                }
@@ -2799,36 +2849,42 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
                /* Pattern-fill this fresh page */
                if (mclverify) {
                        mcache_set_pattern(MCACHE_FREE_PATTERN,
-                           (caddr_t)page, NBPG);
+                           (caddr_t)page, PAGE_SIZE);
                }
-               if (bufsize == m_maxsize(MC_BIGCL)) {
-                       union mbigcluster *mbc = (union mbigcluster *)page;
-
+               if (bufsize == PAGE_SIZE) {
+                       mcache_obj_t *buf;
                        /* One for the entire page */
-                       sp = slab_get(mbc);
+                       sp = slab_get((void *)page);
                        if (mclaudit != NULL) {
-                               mcl_audit_init(mbc, &mca_list, &con_list,
-                                   AUDIT_CONTENTS_SIZE, NMBPBG);
+                               mcl_audit_init((void *)page,
+                                   &mca_list, &con_list,
+                                   AUDIT_CONTENTS_SIZE, NMBPG);
                        }
                        VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0);
-                       slab_init(sp, MC_BIGCL, SLF_MAPPED,
-                           mbc, mbc, bufsize, 0, 1);
+                       slab_init(sp, class, SLF_MAPPED, (void *)page,
+                           (void *)page, PAGE_SIZE, 0, 1);
+                       buf = (mcache_obj_t *)page;
+                       buf->obj_next = NULL;
 
                        /* Insert this slab */
-                       slab_insert(sp, MC_BIGCL);
-
-                       /* Update stats now since slab_get() drops the lock */
-                       mbstat.m_bigclfree = ++m_infree(MC_BIGCL) +
-                           m_infree(MC_MBUF_BIGCL);
-                       mbstat.m_bigclusters = ++m_total(MC_BIGCL);
-                       VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL));
-                       class = MC_BIGCL;
-               } else if ((i % NSLABSP16KB) == 0) {
+                       slab_insert(sp, class);
+
+                       /* Update stats now since slab_get drops the lock */
+                       ++m_infree(class);
+                       ++m_total(class);
+                       VERIFY(m_total(class) <= m_maxlimit(class));
+                       if (class == MC_BIGCL) {
+                               mbstat.m_bigclfree = m_infree(MC_BIGCL) +
+                                   m_infree(MC_MBUF_BIGCL);
+                               mbstat.m_bigclusters = m_total(MC_BIGCL);
+                       }
+                       ++count;
+               } else if ((bufsize > PAGE_SIZE) &&
+                   (i % NSLABSP16KB) == 0) {
                        union m16kcluster *m16kcl = (union m16kcluster *)page;
                        mcl_slab_t *nsp;
                        int k;
-
-                       VERIFY(njcl > 0);
+                               
                        /* One for the entire 16KB */
                        sp = slab_get(m16kcl);
                        if (mclaudit != NULL)
@@ -2837,6 +2893,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
                        VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0);
                        slab_init(sp, MC_16KCL, SLF_MAPPED,
                            m16kcl, m16kcl, bufsize, 0, 1);
+                       m16kcl->m16kcl_next = NULL;
 
                        /*
                         * 2nd-Nth page's slab is part of the first one,
@@ -2850,21 +2907,21 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
                                    SLF_MAPPED | SLF_PARTIAL,
                                    m16kcl, NULL, 0, 0, 0);
                        }
-
                        /* Insert this slab */
                        slab_insert(sp, MC_16KCL);
 
-                       /* Update stats now since slab_get() drops the lock */
-                       m_infree(MC_16KCL)++;
-                       m_total(MC_16KCL)++;
+                       /* Update stats now since slab_get drops the lock */
+                       ++m_infree(MC_16KCL);
+                       ++m_total(MC_16KCL);
                        VERIFY(m_total(MC_16KCL) <= m_maxlimit(MC_16KCL));
-                       class = MC_16KCL;
+                       ++count;
                }
-               if (!mb_peak_newreport && mbuf_report_usage(class))
-                       mb_peak_newreport = TRUE;
        }
        VERIFY(mca_list == NULL && con_list == NULL);
 
+       if (!mb_peak_newreport && mbuf_report_usage(class))
+               mb_peak_newreport = TRUE;
+
        /* We're done; let others enter */
        mb_clalloc_busy = FALSE;
        if (mb_clalloc_waiters > 0) {
@@ -2872,12 +2929,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
                wakeup(mb_clalloc_waitchan);
        }
 
-       if (bufsize == m_maxsize(MC_BIGCL))
-               return (numpages);
-
-       VERIFY(bufsize == m_maxsize(MC_16KCL));
-       return (numpages / NSLABSP16KB);
-
+       return (count);
 out:
        lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
@@ -2892,7 +2944,7 @@ out:
         * When non-blocking we kick a thread if we have to grow the
         * pool or if the number of free clusters is less than requested.
         */
-       if (bufsize == m_maxsize(MC_BIGCL)) {
+       if (class == MC_BIGCL) {
                if (i > 0) {
                        /*
                         * Remember total number of 4KB clusters needed
@@ -2936,94 +2988,98 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait)
 {
        mcache_obj_t *o = NULL;
        int i, numpages = 0, count;
+       mbuf_class_t super_class;
 
        VERIFY(class == MC_MBUF || class == MC_CL || class == MC_BIGCL ||
            class == MC_16KCL);
 
        lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
-       switch (class) {
-       case MC_MBUF:
-       case MC_CL:
-       case MC_BIGCL:
-               numpages = (num * m_size(class) + NBPG - 1) / NBPG;
-               i = m_clalloc(numpages, wait, m_maxsize(MC_BIGCL));
+       VERIFY(PAGE_SIZE == m_maxsize(MC_BIGCL) ||
+           PAGE_SIZE == m_maxsize(MC_16KCL));
 
-               /* Respect the 4KB clusters minimum limit */
-               if (m_total(MC_BIGCL) == m_maxlimit(MC_BIGCL) &&
-                   m_infree(MC_BIGCL) <= m_minlimit(MC_BIGCL)) {
-                       if (class != MC_BIGCL || (wait & MCR_COMP))
-                               return (0);
-               }
-               if (class == MC_BIGCL)
-                       return (i != 0);
-               break;
+       if (m_maxsize(class) >= PAGE_SIZE)
+               return(m_clalloc(num, wait, m_maxsize(class)) != 0);
 
-       case MC_16KCL:
-               return (m_clalloc(num, wait, m_maxsize(class)) != 0);
-               /* NOTREACHED */
+       /*
+        * The rest of the function will allocate pages and will slice
+        * them up into the right size
+        */
 
-       default:
-               VERIFY(0);
-               /* NOTREACHED */
-       }
+       numpages = (num * m_size(class) + PAGE_SIZE - 1) / PAGE_SIZE;
+
+       /* Currently assume that pages are 4K or 16K */
+       if (PAGE_SIZE == m_maxsize(MC_BIGCL))
+               super_class = MC_BIGCL;
+       else
+               super_class = MC_16KCL;
 
-       VERIFY(class == MC_MBUF || class == MC_CL);
+       i = m_clalloc(numpages, wait, m_maxsize(super_class));
+
+       /* Respect the minimum limit  of super class */
+       if (m_total(super_class) == m_maxlimit(super_class) &&
+           m_infree(super_class) <= m_minlimit(super_class))
+               if (wait & MCR_COMP)
+                               return (0);
 
        /* how many objects will we cut the page into? */
-       int numobj = (class == MC_MBUF ? NMBPBG : NCLPBG);
+       int numobj = PAGE_SIZE / m_maxsize(class);
 
        for (count = 0; count < numpages; count++) {
-
                /* respect totals, minlimit, maxlimit */
-               if (m_total(MC_BIGCL) <= m_minlimit(MC_BIGCL) ||
+               if (m_total(super_class) <= m_minlimit(super_class) ||
                    m_total(class) >= m_maxlimit(class))
                        break;
 
-               if ((o = slab_alloc(MC_BIGCL, wait)) == NULL)
+               if ((o = slab_alloc(super_class, wait)) == NULL)
                        break;
 
                struct mbuf *m = (struct mbuf *)o;
                union mcluster *c = (union mcluster *)o;
+               union mbigcluster *mbc = (union mbigcluster *)o;
                mcl_slab_t *sp = slab_get(o);
                mcache_audit_t *mca = NULL;
 
-               VERIFY(slab_is_detached(sp) &&
-                   (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED);
-
+               /*
+                * since one full page will be converted to MC_MBUF or
+                * MC_CL, verify that the reference count will match that
+                * assumption
+                */
+               VERIFY(sp->sl_refcnt == 1 && slab_is_detached(sp)); 
+               VERIFY((sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED);
                /*
                 * Make sure that the cluster is unmolested
                 * while in freelist
                 */
                if (mclverify) {
-                       mca = mcl_audit_buf2mca(MC_BIGCL, o);
-                       mcache_audit_free_verify(mca, o, 0,
-                           m_maxsize(MC_BIGCL));
+                       mca = mcl_audit_buf2mca(super_class,
+                           (mcache_obj_t *)o);
+                       mcache_audit_free_verify(mca,
+                           (mcache_obj_t *)o, 0, m_maxsize(super_class));
                }
 
-               /* Reinitialize it as an mbuf or 2K slab */
+               /* Reinitialize it as an mbuf or 2K or 4K slab */
                slab_init(sp, class, sp->sl_flags,
-                   sp->sl_base, NULL, sp->sl_len, 0, numobj);
+                   sp->sl_base, NULL, PAGE_SIZE, 0, numobj);
 
-               VERIFY(o == (mcache_obj_t *)sp->sl_base);
                VERIFY(sp->sl_head == NULL);
 
-               VERIFY(m_total(MC_BIGCL) > 0);
-               m_total(MC_BIGCL)--;
-               mbstat.m_bigclusters = m_total(MC_BIGCL);
+               VERIFY(m_total(super_class) >= 1);
+               m_total(super_class)--;
+
+               if (super_class == MC_BIGCL)
+                       mbstat.m_bigclusters = m_total(MC_BIGCL);
 
                m_total(class) += numobj;
                m_infree(class) += numobj;
 
-               VERIFY(m_total(MC_BIGCL) >= m_minlimit(MC_BIGCL));
-               VERIFY(m_total(class) <= m_maxlimit(class));
                if (!mb_peak_newreport && mbuf_report_usage(class))
                        mb_peak_newreport = TRUE;
 
                i = numobj;
                if (class == MC_MBUF) {
                        mbstat.m_mbufs = m_total(MC_MBUF);
-                       mtype_stat_add(MT_FREE, NMBPBG);
+                       mtype_stat_add(MT_FREE, NMBPG);
                        while (i--) {
                                /*
                                 * If auditing is enabled, construct the
@@ -3045,7 +3101,7 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait)
                                m->m_next = sp->sl_head;
                                sp->sl_head = (void *)m++;
                        }
-               } else { /* MC_CL */
+               } else if (class == MC_CL) { /* MC_CL */
                        mbstat.m_clfree =
                            m_infree(MC_CL) + m_infree(MC_MBUF_CL);
                        mbstat.m_clusters = m_total(MC_CL);
@@ -3053,9 +3109,18 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait)
                                c->mcl_next = sp->sl_head;
                                sp->sl_head = (void *)c++;
                        }
+               } else {
+                       VERIFY(class == MC_BIGCL);
+                       mbstat.m_bigclusters = m_total(MC_BIGCL);
+                       mbstat.m_bigclfree = m_infree(MC_BIGCL) +
+                           m_infree(MC_MBUF_BIGCL);
+                       while (i--) {
+                               mbc->mbc_next = sp->sl_head;
+                               sp->sl_head = (void *)mbc++;
+                       }
                }
 
-               /* Insert into the mbuf or 2k slab list */
+               /* Insert into the mbuf or 2k or 4k slab list */
                slab_insert(sp, class);
 
                if ((i = mb_waiters) > 0)
@@ -3737,6 +3802,7 @@ m_classifier_init(struct mbuf *m, uint32_t pktf_mask)
 #if MEASURE_BW
        m->m_pkthdr.pkt_bwseq  = 0;
 #endif /* MEASURE_BW */
+       m->m_pkthdr.pkt_enqueue_ts = 0;
 }
 
 void
@@ -3935,9 +4001,9 @@ m_allocpacket_internal(unsigned int *numlist, size_t packetlen,
                }
        } else if (bufsize == m_maxsize(MC_16KCL)) {
                VERIFY(njcl > 0);
-               nsegs = ((packetlen - 1) >> (PGSHIFT + 2)) + 1;
+               nsegs = ((packetlen - 1) >> M16KCLSHIFT) + 1;
        } else if (bufsize == m_maxsize(MC_BIGCL)) {
-               nsegs = ((packetlen - 1) >> PGSHIFT) + 1;
+               nsegs = ((packetlen - 1) >> MBIGCLSHIFT) + 1;
        } else {
                nsegs = ((packetlen - 1) >> MCLSHIFT) + 1;
        }
@@ -4498,8 +4564,13 @@ m_prepend(struct mbuf *m, int len, int how)
        }
        mn->m_next = m;
        m = mn;
-       if (len < MHLEN)
+       if (m->m_flags & M_PKTHDR) {
+               VERIFY(len <= MHLEN);
                MH_ALIGN(m, len);
+       } else {
+               VERIFY(len <= MLEN);
+               M_ALIGN(m, len);
+       }
        m->m_len = len;
        return (m);
 }
@@ -4509,9 +4580,10 @@ m_prepend(struct mbuf *m, int len, int how)
  * chain, copy junk along, and adjust length.
  */
 struct mbuf *
-m_prepend_2(struct mbuf *m, int len, int how)
+m_prepend_2(struct mbuf *m, int len, int how, int align)
 {
-       if (M_LEADINGSPACE(m) >= len) {
+       if (M_LEADINGSPACE(m) >= len &&
+           (!align || IS_P2ALIGNED((m->m_data - len), sizeof(u_int32_t)))) {
                m->m_data -= len;
                m->m_len += len;
        } else {
@@ -5279,12 +5351,6 @@ m_howmany(int num, size_t bufsize)
 
        } else { /* 16K CL */
                VERIFY(njcl > 0);
-               /* Under minimum */
-               if (m_16kclusters < MIN16KCL)
-                       return (MIN16KCL - m_16kclusters);
-               if (m_16kclfree >= M16KCL_LOWAT)
-                       return (0);
-
                /* Ensure at least num clusters are available */
                if (num >= m_16kclfree)
                        i = num - m_16kclfree;
@@ -5717,9 +5783,10 @@ nospace:
 
 #define        MBUF_MULTIPAGES(m)                                              \
        (((m)->m_flags & M_EXT) &&                                      \
-       ((IS_P2ALIGNED((m)->m_data, NBPG) && (m)->m_len > NBPG) ||      \
-       (!IS_P2ALIGNED((m)->m_data, NBPG) &&                            \
-       P2ROUNDUP((m)->m_data, NBPG) < ((uintptr_t)(m)->m_data + (m)->m_len))))
+       ((IS_P2ALIGNED((m)->m_data, PAGE_SIZE)                          \
+       && (m)->m_len > PAGE_SIZE) ||                                   \
+       (!IS_P2ALIGNED((m)->m_data, PAGE_SIZE) &&                       \
+       P2ROUNDUP((m)->m_data, PAGE_SIZE) < ((uintptr_t)(m)->m_data + (m)->m_len))))
 
 static struct mbuf *
 m_expand(struct mbuf *m, struct mbuf **last)
@@ -5739,11 +5806,11 @@ m_expand(struct mbuf *m, struct mbuf **last)
                struct mbuf *n;
 
                data = data0;
-               if (IS_P2ALIGNED(data, NBPG) && len0 > NBPG)
-                       len = NBPG;
-               else if (!IS_P2ALIGNED(data, NBPG) &&
-                   P2ROUNDUP(data, NBPG) < (data + len0))
-                       len = P2ROUNDUP(data, NBPG) - data;
+               if (IS_P2ALIGNED(data, PAGE_SIZE) && len0 > PAGE_SIZE)
+                       len = PAGE_SIZE;
+               else if (!IS_P2ALIGNED(data, PAGE_SIZE) &&
+                   P2ROUNDUP(data, PAGE_SIZE) < (data + len0))
+                       len = P2ROUNDUP(data, PAGE_SIZE) - data;
                else
                        len = len0;
 
@@ -6260,7 +6327,7 @@ slab_get(void *buf)
        lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
        VERIFY(MBUF_IN_MAP(buf));
-       ix = ((char *)buf - (char *)mbutl) >> MBSHIFT;
+       ix = ((unsigned char *)buf - mbutl) >> MBSHIFT;
        VERIFY(ix < maxslabgrp);
 
        if ((slg = slabstbl[ix]) == NULL) {
@@ -6283,7 +6350,9 @@ slab_get(void *buf)
                /* This is a new buffer; create the slabs group for it */
                MALLOC(slg, mcl_slabg_t *, sizeof (*slg), M_TEMP,
                    M_WAITOK | M_ZERO);
-               VERIFY(slg != NULL);
+               MALLOC(slg->slg_slab, mcl_slab_t *, sizeof(mcl_slab_t) * NSLABSPMB,
+                   M_TEMP, M_WAITOK | M_ZERO);
+               VERIFY(slg != NULL && slg->slg_slab != NULL);
 
                lck_mtx_lock(mbuf_mlock);
                /*
@@ -6308,7 +6377,7 @@ slab_get(void *buf)
                }
        }
 
-       ix = MTOBG(buf) % NSLABSPMB;
+       ix = MTOPG(buf) % NSLABSPMB;
        VERIFY(ix < NSLABSPMB);
 
        return (&slg->slg_slab[ix]);
@@ -6335,13 +6404,17 @@ slab_insert(mcl_slab_t *sp, mbuf_class_t class)
        m_slab_cnt(class)++;
        TAILQ_INSERT_TAIL(&m_slablist(class), sp, sl_link);
        sp->sl_flags &= ~SLF_DETACHED;
+
+       /*
+        * If a buffer spans multiple contiguous pages then mark them as
+        * detached too
+        */
        if (class == MC_16KCL) {
                int k;
                for (k = 1; k < NSLABSP16KB; k++) {
                        sp = sp->sl_next;
                        /* Next slab must already be present */
-                       VERIFY(sp != NULL);
-                       VERIFY(slab_is_detached(sp));
+                       VERIFY(sp != NULL && slab_is_detached(sp));
                        sp->sl_flags &= ~SLF_DETACHED;
                }
        }
@@ -6350,13 +6423,13 @@ slab_insert(mcl_slab_t *sp, mbuf_class_t class)
 static void
 slab_remove(mcl_slab_t *sp, mbuf_class_t class)
 {
+       int k;
        VERIFY(!slab_is_detached(sp));
        VERIFY(m_slab_cnt(class) > 0);
        m_slab_cnt(class)--;
        TAILQ_REMOVE(&m_slablist(class), sp, sl_link);
        slab_detach(sp);
        if (class == MC_16KCL) {
-               int k;
                for (k = 1; k < NSLABSP16KB; k++) {
                        sp = sp->sl_next;
                        /* Next slab must already be present */
@@ -6429,14 +6502,14 @@ mcl_audit_init(void *buf, mcache_audit_t **mca_list,
        boolean_t save_contents = (con_list != NULL);
        unsigned int i, ix;
 
-       ASSERT(num <= NMBPBG);
+       ASSERT(num <= NMBPG);
        ASSERT(con_list == NULL || con_size != 0);
 
-       ix = MTOBG(buf);
+       ix = MTOPG(buf);
        VERIFY(ix < maxclaudit);
 
        /* Make sure we haven't been here before */
-       for (i = 0; i < NMBPBG; i++)
+       for (i = 0; i < NMBPG; i++)
                VERIFY(mclaudit[ix].cl_audit[i] == NULL);
 
        mca = mca_tail = *mca_list;
@@ -6482,7 +6555,7 @@ mcl_audit_free(void *buf, unsigned int num)
        unsigned int i, ix;
        mcache_audit_t *mca, *mca_list;
 
-       ix = MTOBG(buf);
+       ix = MTOPG(buf);
        VERIFY(ix < maxclaudit);
        
        if (mclaudit[ix].cl_audit[0] != NULL) {
@@ -6504,13 +6577,16 @@ mcl_audit_free(void *buf, unsigned int num)
  * the corresponding audit structure for that buffer.
  */
 static mcache_audit_t *
-mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *o)
+mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *mobj)
 {
        mcache_audit_t *mca = NULL;
-       int ix = MTOBG(o);
+       int ix = MTOPG(mobj), m_idx = 0;
+       unsigned char *page_addr;
 
        VERIFY(ix < maxclaudit);
-       VERIFY(IS_P2ALIGNED(o, MIN(m_maxsize(class), NBPG)));
+       VERIFY(IS_P2ALIGNED(mobj, MIN(m_maxsize(class), PAGE_SIZE)));
+
+       page_addr = PGTOM(ix);
 
        switch (class) {
        case MC_MBUF:
@@ -6521,19 +6597,25 @@ mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *o)
                 * mbuf index relative to the page base and use
                 * it to locate the audit structure.
                 */
-               VERIFY(MCLIDX(BGTOM(ix), o) < (int)NMBPBG);
-               mca = mclaudit[ix].cl_audit[MCLIDX(BGTOM(ix), o)];
+               m_idx = MBPAGEIDX(page_addr, mobj);
+               VERIFY(m_idx < (int)NMBPG);
+               mca = mclaudit[ix].cl_audit[m_idx];
                break;
 
        case MC_CL:
                /*
                 * Same thing as above, but for 2KB clusters in a page.
                 */
-               VERIFY(CLBGIDX(BGTOM(ix), o) < (int)NCLPBG);
-               mca = mclaudit[ix].cl_audit[CLBGIDX(BGTOM(ix), o)];
+               m_idx = CLPAGEIDX(page_addr, mobj);
+               VERIFY(m_idx < (int)NCLPG);
+               mca = mclaudit[ix].cl_audit[m_idx];
                break;
 
        case MC_BIGCL:
+               m_idx = BCLPAGEIDX(page_addr, mobj);
+               VERIFY(m_idx < (int)NBCLPG);
+               mca = mclaudit[ix].cl_audit[m_idx];
+               break;
        case MC_16KCL:
                /*
                 * Same as above, but only return the first element.
@@ -7344,6 +7426,7 @@ mbuf_report_peak_usage(void)
        for (i = 0; i < NELEM(mbuf_table); i++) {
                m_peak(m_class(i)) = m_total(m_class(i));
                memreleased += m_release_cnt(i);
+               m_release_cnt(i) = 0;
        }
        mb_peak_newreport = FALSE;
        lck_mtx_unlock(mbuf_mlock);
@@ -7353,6 +7436,7 @@ mbuf_report_peak_usage(void)
        ns_data.u.mb_stats.total_256b = m_peak(MC_MBUF);
        ns_data.u.mb_stats.total_2kb = m_peak(MC_CL);
        ns_data.u.mb_stats.total_4kb = m_peak(MC_BIGCL);
+       ns_data.u.mb_stats.total_16kb = m_peak(MC_16KCL);
        ns_data.u.mb_stats.sbmb_total = total_sbmb_cnt_peak;
        ns_data.u.mb_stats.sb_atmbuflimit = sbmb_limreached;
        ns_data.u.mb_stats.draincnt = mbstat.m_drain;
@@ -7478,23 +7562,25 @@ m_drain(void)
                        slab_remove(sp, mc);
                        switch (mc) {
                        case MC_MBUF:
-                               m_infree(mc) -= NMBPBG;
-                               m_total(mc) -= NMBPBG;
+                               m_infree(mc) -= NMBPG;
+                               m_total(mc) -= NMBPG;
                                if (mclaudit != NULL)
-                                       mcl_audit_free(sp->sl_base, NMBPBG);
+                                       mcl_audit_free(sp->sl_base, NMBPG);
                                break;
                        case MC_CL:
-                               m_infree(mc) -= NCLPBG;
-                               m_total(mc) -= NCLPBG;
+                               m_infree(mc) -= NCLPG;
+                               m_total(mc) -= NCLPG;
                                if (mclaudit != NULL)
-                                       mcl_audit_free(sp->sl_base, NMBPBG);
+                                       mcl_audit_free(sp->sl_base, NMBPG);
                                break;
                        case MC_BIGCL:
-                               m_infree(mc)--;
-                               m_total(mc)--;
+                       {
+                               m_infree(mc) -= NBCLPG;
+                               m_total(mc) -= NBCLPG;
                                if (mclaudit != NULL)
-                                       mcl_audit_free(sp->sl_base, NMBPBG);
+                                       mcl_audit_free(sp->sl_base, NMBPG);
                                break;
+                       }
                        case MC_16KCL:
                                m_infree(mc)--;
                                m_total(mc)--;
@@ -7520,7 +7606,9 @@ m_drain(void)
                        }
                        m_release_cnt(mc) += m_size(mc);
                        released += m_size(mc);
-                       offset = ((char *)sp->sl_base - (char *)mbutl) / NBPG;
+                       VERIFY(sp->sl_base != NULL &&
+                           sp->sl_len >= PAGE_SIZE);
+                       offset = MTOPG(sp->sl_base);
                        /*
                         * Make sure the IOMapper points to a valid, but
                         * bogus, address.  This should prevent further DMA
index 2dc33d759b411b7d92e1acb88711bd0e45d82eae..d73d61a4b24afbf2b63aaf89e818c2ae8a6ba087 100644 (file)
 #include <netinet/mptcp_var.h>
 #endif /* MULTIPATH */
 
+#define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
+
+#if DEBUG || DEVELOPMENT
+#define        DEBUG_KERNEL_ADDRPERM(_v) (_v)
+#else
+#define        DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
+#endif
+
 /* TODO: this should be in a header file somewhere */
 extern char *proc_name_address(void *p);
 
@@ -152,6 +160,8 @@ static void filt_sowdetach(struct knote *kn);
 static int     filt_sowrite(struct knote *kn, long hint);
 static void    filt_sockdetach(struct knote *kn);
 static int     filt_sockev(struct knote *kn, long hint);
+static void    filt_socktouch(struct knote *kn, struct kevent_internal_s *kev,
+    long type);
 
 static int sooptcopyin_timeval(struct sockopt *, struct timeval *);
 static int sooptcopyout_timeval(struct sockopt *, const struct timeval *);
@@ -172,6 +182,7 @@ static struct filterops sock_filtops = {
        .f_isfd = 1,
        .f_detach = filt_sockdetach,
        .f_event = filt_sockev,
+       .f_touch = filt_socktouch,
 };
 
 SYSCTL_DECL(_kern_ipc);
@@ -236,6 +247,15 @@ int sosendjcl_ignore_capab = 0;
 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab,
        CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, "");
 
+/*
+ * Set this to ignore SOF1_IF_2KCL and use big clusters for large
+ * writes on the socket for all protocols on any network interfaces.
+ * Be extra careful when setting this to 1, because sending down packets with
+ * clusters larger that 2 KB might lead to system panics or data corruption.
+ * When set to 0, the system will respect SOF1_IF_2KCL, which is set
+ * on the outgoing interface
+ * Set this to 1  for testing/debugging purposes only.
+ */
 int sosendbigcl_ignore_capab = 0;
 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab,
        CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, "");
@@ -256,6 +276,10 @@ int sorestrictsend = 1;
 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED,
        &sorestrictsend, 0, "Enable outbound interface restrictions");
 
+int soreserveheadroom = 1;
+SYSCTL_INT(_kern_ipc, OID_AUTO, soreserveheadroom, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &soreserveheadroom, 0, "To allocate contiguous datagram buffers");
+
 extern struct inpcbinfo tcbinfo;
 
 /* TODO: these should be in header file */
@@ -270,10 +294,39 @@ static struct zone *se_zone;                      /* zone for sockaddr_entry */
 
 vm_size_t      so_cache_zone_element_size;
 
-static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, user_ssize_t *);
+static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **,
+    user_ssize_t *);
 static void cached_sock_alloc(struct socket **, int);
 static void cached_sock_free(struct socket *);
 
+/*
+ * Maximum of extended background idle sockets per process
+ * Set to zero to disable further setting of the option
+ */
+
+#define        SO_IDLE_BK_IDLE_MAX_PER_PROC    1
+#define        SO_IDLE_BK_IDLE_TIME            600
+#define        SO_IDLE_BK_IDLE_RCV_HIWAT       131072
+
+struct soextbkidlestat soextbkidlestat;
+
+SYSCTL_UINT(_kern_ipc, OID_AUTO, maxextbkidleperproc,
+       CTLFLAG_RW | CTLFLAG_LOCKED, &soextbkidlestat.so_xbkidle_maxperproc, 0,
+       "Maximum of extended background idle sockets per process");
+
+SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidletime, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &soextbkidlestat.so_xbkidle_time, 0,
+       "Time in seconds to keep extended background idle sockets");
+
+SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidlercvhiwat, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &soextbkidlestat.so_xbkidle_rcvhiwat, 0,
+       "High water mark for extended background idle sockets");
+
+SYSCTL_STRUCT(_kern_ipc, OID_AUTO, extbkidlestat, CTLFLAG_RD | CTLFLAG_LOCKED,
+       &soextbkidlestat, soextbkidlestat, "");
+
+int so_set_extended_bk_idle(struct socket *, int);
+
 /*
  * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
  * setting the DSCP code on the packet based on the service class; see
@@ -289,6 +342,22 @@ socketinit(void)
        _CASSERT(sizeof(so_gencnt) == sizeof(uint64_t));
        VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t)));
 
+#ifdef __LP64__
+       _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user64_sa_endpoints));
+       _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user64_sa_endpoints, sae_srcif));
+       _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user64_sa_endpoints, sae_srcaddr));
+       _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user64_sa_endpoints, sae_srcaddrlen));
+       _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user64_sa_endpoints, sae_dstaddr));
+       _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user64_sa_endpoints, sae_dstaddrlen));
+#else
+       _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user32_sa_endpoints));
+       _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user32_sa_endpoints, sae_srcif));
+       _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user32_sa_endpoints, sae_srcaddr));
+       _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user32_sa_endpoints, sae_srcaddrlen));
+       _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user32_sa_endpoints, sae_dstaddr));
+       _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user32_sa_endpoints, sae_dstaddrlen));
+#endif
+
        if (socketinit_done) {
                printf("socketinit: already called...\n");
                return;
@@ -321,7 +390,7 @@ socketinit(void)
        so_cache_zone_element_size = (vm_size_t)(sizeof (struct socket) + 4
            + get_inpcb_str_size() + 4 + get_tcp_str_size());
 
-       so_cache_zone = zinit(so_cache_zone_element_size, 
+       so_cache_zone = zinit(so_cache_zone_element_size,
            (120000 * so_cache_zone_element_size), 8192, "socache zone");
        zone_change(so_cache_zone, Z_CALLERACCT, FALSE);
        zone_change(so_cache_zone, Z_NOENCRYPT, TRUE);
@@ -344,6 +413,10 @@ socketinit(void)
        zone_change(se_zone, Z_CALLERACCT, FALSE);
        zone_change(se_zone, Z_EXPAND, TRUE);
 
+       bzero(&soextbkidlestat, sizeof(struct soextbkidlestat));
+       soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC;
+       soextbkidlestat.so_xbkidle_time = SO_IDLE_BK_IDLE_TIME;
+       soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT;
 
        in_pcbinit();
        sflt_init();
@@ -390,8 +463,8 @@ cached_sock_alloc(struct socket **so, int waitok)
                bzero((caddr_t)*so, sizeof (struct socket));
 
                /*
-                * Define offsets for extra structures into our 
-                * single block of memory. Align extra structures 
+                * Define offsets for extra structures into our
+                * single block of memory. Align extra structures
                 * on longword boundaries.
                 */
 
@@ -409,7 +482,7 @@ cached_sock_alloc(struct socket **so, int waitok)
                    (caddr_t)offset;
        }
 
-       (*so)->cached_in_sock_layer = true;
+       OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER, &(*so)->so_flags1);
 }
 
 static void
@@ -465,10 +538,12 @@ so_update_policy(struct socket *so)
 
 #if NECP
 static void
-so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr)
+so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr,
+    struct sockaddr *override_remote_addr)
 {
        if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)
-               inp_update_necp_policy(sotoinpcb(so), override_local_addr, override_remote_addr, 0);
+               inp_update_necp_policy(sotoinpcb(so), override_local_addr,
+                   override_remote_addr, 0);
 }
 #endif /* NECP */
 
@@ -486,7 +561,7 @@ so_cache_timer(void)
        while (!STAILQ_EMPTY(&so_cache_head)) {
                VERIFY(cached_sock_count > 0);
                p = STAILQ_FIRST(&so_cache_head);
-               if ((so_cache_time - p->cache_timestamp) < 
+               if ((so_cache_time - p->cache_timestamp) <
                        SO_CACHE_TIME_LIMIT)
                        break;
 
@@ -657,7 +732,8 @@ socreate_internal(int dom, struct socket **aso, int type, int proto,
         * If this thread or task is marked to create backgrounded sockets,
         * mark the socket as background.
         */
-       if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) {
+       if (proc_get_effective_thread_policy(current_thread(),
+           TASK_POLICY_NEW_SOCKETS_BG)) {
                socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
                so->so_background_thread = current_thread();
        }
@@ -768,11 +844,11 @@ sobindlock(struct socket *so, struct sockaddr *nam, int dolock)
 
        so_update_last_owner_locked(so, p);
        so_update_policy(so);
-       
+
 #if NECP
        so_update_necp_policy(so, nam, NULL);
 #endif /* NECP */
-       
+
        /*
         * If this is a bind request on a socket that has been marked
         * as inactive, reject it now before we go any further.
@@ -780,7 +856,7 @@ sobindlock(struct socket *so, struct sockaddr *nam, int dolock)
        if (so->so_flags & SOF_DEFUNCT) {
                error = EINVAL;
                SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
-                   __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   __func__, proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    SOCK_DOM(so), SOCK_TYPE(so), error));
                goto out;
        }
@@ -825,7 +901,7 @@ sodealloc(struct socket *so)
        mac_socket_label_destroy(so);
 #endif /* MAC_SOCKET */
 
-       if (so->cached_in_sock_layer) {
+       if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) {
                cached_sock_free(so);
        } else {
                FREE_ZONE(so, sizeof (*so), so->so_zone);
@@ -861,11 +937,11 @@ solisten(struct socket *so, int backlog)
 
        so_update_last_owner_locked(so, p);
        so_update_policy(so);
-       
+
 #if NECP
        so_update_necp_policy(so, NULL, NULL);
 #endif /* NECP */
-       
+
        if (so->so_proto == NULL) {
                error = EINVAL;
                goto out;
@@ -887,7 +963,7 @@ solisten(struct socket *so, int backlog)
                if (so->so_flags & SOF_DEFUNCT) {
                        SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
                            "(%d)\n", __func__, proc_pid(p),
-                           (uint64_t)VM_KERNEL_ADDRPERM(so),
+                           (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                            SOCK_DOM(so), SOCK_TYPE(so), error));
                }
                goto out;
@@ -1052,6 +1128,11 @@ soclose_locked(struct socket *so)
        }
 #endif /* CONTENT_FILTER */
 
+       if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
+               soresume(current_proc(), so, 1);
+               so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
+       }
+
        if ((so->so_options & SO_ACCEPTCONN)) {
                struct socket *sp, *sonext;
                int socklock = 0;
@@ -1377,7 +1458,7 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
                if (so->so_flags & SOF_DEFUNCT) {
                        SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
                            "(%d)\n", __func__, proc_pid(p),
-                           (uint64_t)VM_KERNEL_ADDRPERM(so),
+                           (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                            SOCK_DOM(so), SOCK_TYPE(so), error));
                }
                if (dolock)
@@ -1454,14 +1535,14 @@ soconnect2(struct socket *so1, struct socket *so2)
 int
 soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, uio_t auio, user_ssize_t *bytes_written)
 {
        int error;
 
        so_update_last_owner_locked(so, p);
        so_update_policy(so);
-       
+
        /*
         * If this is a listening socket or if this is a previously-accepted
         * socket that has been marked as inactive, reject the connect request.
@@ -1471,7 +1552,7 @@ soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl,
                if (so->so_flags & SOF_DEFUNCT) {
                        SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] "
                            "(%d)\n", __func__, proc_pid(p),
-                           (uint64_t)VM_KERNEL_ADDRPERM(so),
+                           (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                            SOCK_DOM(so), SOCK_TYPE(so), error));
                }
                return (error);
@@ -1503,7 +1584,7 @@ soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl,
                } else {
                        error = (*so->so_proto->pr_usrreqs->pru_connectx)
                            (so, src_sl, dst_sl, p, ifscope, aid, pcid,
-                           flags, arg, arglen);
+                           flags, arg, arglen, auio, bytes_written);
                }
        }
 
@@ -1545,7 +1626,7 @@ sodisconnect(struct socket *so)
 }
 
 int
-sodisconnectxlocked(struct socket *so, associd_t aid, connid_t cid)
+sodisconnectxlocked(struct socket *so, sae_associd_t aid, sae_connid_t cid)
 {
        int error;
 
@@ -1566,7 +1647,7 @@ sodisconnectxlocked(struct socket *so, associd_t aid, connid_t cid)
 }
 
 int
-sodisconnectx(struct socket *so, associd_t aid, connid_t cid)
+sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
 {
        int error;
 
@@ -1577,7 +1658,7 @@ sodisconnectx(struct socket *so, associd_t aid, connid_t cid)
 }
 
 int
-sopeelofflocked(struct socket *so, associd_t aid, struct socket **psop)
+sopeelofflocked(struct socket *so, sae_associd_t aid, struct socket **psop)
 {
        return ((*so->so_proto->pr_usrreqs->pru_peeloff)(so, aid, psop));
 }
@@ -1636,7 +1717,8 @@ restart:
 defunct:
                error = EPIPE;
                SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
-                   __func__, proc_selfpid(), (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   __func__, proc_selfpid(),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    SOCK_DOM(so), SOCK_TYPE(so), error));
                return (error);
        }
@@ -1651,7 +1733,7 @@ defunct:
                        cfil_sock_data_pending(&so->so_snd) != 0)
                        CFIL_LOG(LOG_INFO,
                                "so %llx ignore SS_CANTSENDMORE",
-                               (uint64_t)VM_KERNEL_ADDRPERM(so));
+                               (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
                else
 #endif /* CONTENT_FILTER */
                        return (EPIPE);
@@ -1665,16 +1747,17 @@ defunct:
        if ((so->so_state & SS_ISCONNECTED) == 0) {
                if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
                        if (((so->so_state & SS_ISCONFIRMING) == 0) &&
-                           (resid != 0 || clen == 0)) {
+                           (resid != 0 || clen == 0) &&
+                           !(so->so_flags1 & SOF1_PRECONNECT_DATA)) {
 #if MPTCP
-                               /* 
-                                * MPTCP Fast Join sends data before the 
+                               /*
+                                * MPTCP Fast Join sends data before the
                                 * socket is truly connected.
                                 */
                                if ((so->so_flags & (SOF_MP_SUBFLOW |
                                        SOF_MPTCP_FASTJOIN)) !=
                                    (SOF_MP_SUBFLOW | SOF_MPTCP_FASTJOIN))
-#endif /* MPTCP */                     
+#endif /* MPTCP */
                                return (ENOTCONN);
                        }
                } else if (addr == 0 && !(flags&MSG_HOLD)) {
@@ -1682,6 +1765,7 @@ defunct:
                            ENOTCONN : EDESTADDRREQ);
                }
        }
+
        if (so->so_flags & SOF_ENABLE_MSGS)
                space = msgq_sbspace(so, control);
        else
@@ -1694,8 +1778,21 @@ defunct:
                return (EMSGSIZE);
 
        if ((space < resid + clen &&
-           (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) ||
+           (atomic || (space < (int32_t)so->so_snd.sb_lowat) ||
+           space < clen)) ||
            (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
+               /*
+                * don't block the connectx call when there's more data
+                * than can be copied.
+                */
+               if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+                       if (space == 0) {
+                               return (EWOULDBLOCK);
+                       }
+                       if (space < (int32_t)so->so_snd.sb_lowat) {
+                               return (0);
+                       }
+               }
                if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
                    assumelock) {
                        return (EWOULDBLOCK);
@@ -1779,12 +1876,14 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 {
        struct mbuf **mp;
        struct mbuf *m, *freelist = NULL;
-       user_ssize_t space, len, resid;
+       user_ssize_t space, len, resid, orig_resid;
        int clen = 0, error, dontroute, mlen, sendflags;
        int atomic = sosendallatonce(so) || top;
        int sblocked = 0;
        struct proc *p = current_proc();
        struct mbuf *control_copy = NULL;
+       uint16_t headroom = 0;
+       boolean_t en_tracing = FALSE;
 
        if (uio != NULL)
                resid = uio_resid(uio);
@@ -1796,18 +1895,36 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 
        socket_lock(so, 1);
 
+       /*
+        * trace if tracing & network (vs. unix) sockets & and
+        * non-loopback
+        */
+       if (ENTR_SHOULDTRACE &&
+           (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
+               struct inpcb *inp = sotoinpcb(so);
+               if (inp->inp_last_outifp != NULL &&
+                   !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
+                       en_tracing = TRUE;
+                       KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
+                           VM_KERNEL_ADDRPERM(so),
+                           ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
+                           (int64_t)resid);
+                       orig_resid = resid;
+               }
+       }
+
        /*
         * Re-injection should not affect process accounting
         */
        if ((flags & MSG_SKIPCFIL) == 0) {
-       so_update_last_owner_locked(so, p);
-       so_update_policy(so);
-       
+               so_update_last_owner_locked(so, p);
+               so_update_policy(so);
+
 #if NECP
-       so_update_necp_policy(so, NULL, addr);
+               so_update_necp_policy(so, NULL, addr);
 #endif /* NECP */
        }
-       
+
        if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
                error = EOPNOTSUPP;
                socket_unlock(so, 1);
@@ -1842,6 +1959,9 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
        if (control != NULL)
                clen = control->m_len;
 
+       if (soreserveheadroom != 0)
+               headroom = so->so_pktheadroom;
+
        do {
                error = sosendcheck(so, addr, resid, clen, atomic, flags,
                    &sblocked, control);
@@ -1868,22 +1988,26 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                int bytes_to_copy;
                                boolean_t jumbocl;
                                boolean_t bigcl;
+                               int bytes_to_alloc;
 
                                bytes_to_copy = imin(resid, space);
 
+                               bytes_to_alloc = bytes_to_copy;
+                               if (top == NULL)
+                                       bytes_to_alloc += headroom;
+
                                if (sosendminchain > 0)
                                        chainlength = 0;
                                else
                                        chainlength = sosendmaxchain;
 
                                /*
-                                * Use big 4 KB cluster only when outgoing
-                                * interface does not want 2 LB clusters
+                                * Use big 4 KB cluster when the outgoing interface
+                                * does not prefer 2 KB clusters
                                 */
-                               bigcl = 
-                                   !(so->so_flags1 & SOF1_IF_2KCL) ||
+                               bigcl = !(so->so_flags1 & SOF1_IF_2KCL) ||
                                    sosendbigcl_ignore_capab;
-                               
+
                                /*
                                 * Attempt to use larger than system page-size
                                 * clusters for large writes only if there is
@@ -1917,12 +2041,12 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                         * haven't yet consumed.
                                         */
                                        if (freelist == NULL &&
-                                           bytes_to_copy > MBIGCLBYTES &&
+                                           bytes_to_alloc > MBIGCLBYTES &&
                                            jumbocl) {
                                                num_needed =
-                                                   bytes_to_copy / M16KCLBYTES;
+                                                   bytes_to_alloc / M16KCLBYTES;
 
-                                               if ((bytes_to_copy -
+                                               if ((bytes_to_alloc -
                                                    (num_needed * M16KCLBYTES))
                                                    >= MINCLSIZE)
                                                        num_needed++;
@@ -1939,12 +2063,12 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                        }
 
                                        if (freelist == NULL &&
-                                           bytes_to_copy > MCLBYTES &&
+                                           bytes_to_alloc > MCLBYTES &&
                                            bigcl) {
                                                num_needed =
-                                                   bytes_to_copy / MBIGCLBYTES;
+                                                   bytes_to_alloc / MBIGCLBYTES;
 
-                                               if ((bytes_to_copy -
+                                               if ((bytes_to_alloc -
                                                    (num_needed * MBIGCLBYTES)) >=
                                                    MINCLSIZE)
                                                        num_needed++;
@@ -1960,12 +2084,34 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                                 */
                                        }
 
-                                       if (freelist == NULL &&
-                                           bytes_to_copy > MINCLSIZE) {
+                                       /*
+                                        * Allocate a cluster as we want to
+                                        * avoid to split the data in more
+                                        * that one segment and using MINCLSIZE
+                                        * would lead us to allocate two mbufs
+                                        */
+                                       if (soreserveheadroom != 0 &&
+                                           freelist == NULL &&
+                                           ((top == NULL &&
+                                           bytes_to_alloc > _MHLEN) ||
+                                           bytes_to_alloc > _MLEN)) {
+                                               num_needed = ROUNDUP(bytes_to_alloc, MCLBYTES) /
+                                                   MCLBYTES;
+                                               freelist =
+                                                   m_getpackets_internal(
+                                                   (unsigned int *)&num_needed,
+                                                   hdrs_needed, M_WAIT, 0,
+                                                   MCLBYTES);
+                                               /*
+                                                * Fall back to a single mbuf
+                                                * if allocation failed
+                                                */
+                                       } else if (freelist == NULL &&
+                                           bytes_to_alloc > MINCLSIZE) {
                                                num_needed =
-                                                   bytes_to_copy / MCLBYTES;
+                                                   bytes_to_alloc / MCLBYTES;
 
-                                               if ((bytes_to_copy -
+                                               if ((bytes_to_alloc -
                                                    (num_needed * MCLBYTES)) >=
                                                    MINCLSIZE)
                                                        num_needed++;
@@ -1980,7 +2126,20 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                                 * if allocation failed
                                                 */
                                        }
-
+                                       /*
+                                        * For datagram protocols, leave
+                                        * headroom for protocol headers
+                                        * in the first cluster of the chain
+                                        */
+                                       if (freelist != NULL && atomic &&
+                                           top == NULL && headroom > 0) {
+                                               freelist->m_data += headroom;
+                                       }
+                                       
+                                       /*
+                                        * Fall back to regular mbufs without
+                                        * reserving the socket headroom
+                                        */
                                        if (freelist == NULL) {
                                                if (top == NULL)
                                                        MGETHDR(freelist,
@@ -2010,12 +2169,13 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                        m->m_next = NULL;
 
                                        if ((m->m_flags & M_EXT))
-                                               mlen = m->m_ext.ext_size;
+                                               mlen = m->m_ext.ext_size -
+                                                   m_leadingspace(m);
                                        else if ((m->m_flags & M_PKTHDR))
                                                mlen =
                                                    MHLEN - m_leadingspace(m);
                                        else
-                                               mlen = MLEN;
+                                               mlen = MLEN - m_leadingspace(m);
                                        len = imin(mlen, bytes_to_copy);
 
                                        chainlength += len;
@@ -2074,19 +2234,20 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                        if (dontroute)
                                so->so_options |= SO_DONTROUTE;
 
-                       /* Compute flags here, for pru_send and NKEs */
+                       /*
+                        * Compute flags here, for pru_send and NKEs
+                        *
+                        * If the user set MSG_EOF, the protocol
+                        * understands this flag and nothing left to
+                        * send then use PRU_SEND_EOF instead of PRU_SEND.
+                        */
                        sendflags = (flags & MSG_OOB) ? PRUS_OOB :
-                           /*
-                            * If the user set MSG_EOF, the protocol
-                            * understands this flag and nothing left to
-                            * send then use PRU_SEND_EOF instead of PRU_SEND.
-                            */
                            ((flags & MSG_EOF) &&
-                            (so->so_proto->pr_flags & PR_IMPLOPCL) &&
-                            (resid <= 0)) ? PRUS_EOF :
-                            /* If there is more to send set PRUS_MORETOCOME */
-                            (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
-                       
+                           (so->so_proto->pr_flags & PR_IMPLOPCL) &&
+                           (resid <= 0)) ? PRUS_EOF :
+                           /* If there is more to send set PRUS_MORETOCOME */
+                           (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
+
                        if ((flags & MSG_SKIPCFIL) == 0) {
                                /*
                                 * Socket filter processing
@@ -2108,7 +2269,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                 * Content filter processing
                                 */
                                error = cfil_sock_data_out(so, addr, top,
-                                  control, (sendflags & MSG_OOB) ?
+                                   control, (sendflags & MSG_OOB) ?
                                    sock_data_filt_flag_oob : 0);
                                if (error) {
                                        if (error == EJUSTRETURN) {
@@ -2163,24 +2324,44 @@ out:
        if (control_copy != NULL)
                m_freem(control_copy);
 
-       KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid, so->so_snd.sb_cc,
-           space, error);
+       /*
+        * One write has been done. This was enough. Get back to "normal"
+        * behavior.
+        */
+       if (so->so_flags1 & SOF1_PRECONNECT_DATA)
+               so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
+
+       if (en_tracing) {
+               /* resid passed here is the bytes left in uio */
+               KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
+                   VM_KERNEL_ADDRPERM(so),
+                   ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
+                   (int64_t)(orig_resid - resid));
+       }
+       KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid,
+           so->so_snd.sb_cc, space, error);
 
        return (error);
 }
 
+/*
+ * Supported only connected sockets (no address) without ancillary data
+ * (control mbuf) for atomic protocols
+ */
 int
-sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray,
-     u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags)
+sosend_list(struct socket *so, struct uio **uioarray, u_int uiocnt, int flags)
 {
        struct mbuf *m, *freelist = NULL;
        user_ssize_t len, resid;
-       int clen = 0, error, dontroute, mlen;
-       int atomic = sosendallatonce(so) || top;
+       int error, dontroute, mlen;
+       int atomic = sosendallatonce(so);
        int sblocked = 0;
        struct proc *p = current_proc();
        u_int uiofirst = 0;
        u_int uiolast = 0;
+       struct mbuf *top = NULL;
+       uint16_t headroom = 0;
+       boolean_t bigcl;
 
        KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt,
            so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
@@ -2201,10 +2382,7 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray,
                error = EINVAL;
                goto out;
        }
-       if (uioarray != NULL)
-               resid = uio_array_resid(uioarray, uiocnt);
-       else
-               resid = mbuf_pkt_list_len(top);
+       resid = uio_array_resid(uioarray, uiocnt);
 
        /*
         * In theory resid should be unsigned.
@@ -2220,166 +2398,186 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray,
                error = EINVAL;
                goto out;
        }
-       /*
-        * Disallow functionality not currently supported
-        * Note: Will need to treat arrays of addresses and controls
-        */
-       if (addr != NULL) {
-               printf("%s addr not supported\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
-       if (control != NULL) {
-               printf("%s control not supported\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
 
        socket_lock(so, 1);
        so_update_last_owner_locked(so, p);
        so_update_policy(so);
-       
+
 #if NECP
-       so_update_necp_policy(so, NULL, addr);
+       so_update_necp_policy(so, NULL, NULL);
 #endif /* NECP */
-       
+
        dontroute = (flags & MSG_DONTROUTE) &&
            (so->so_options & SO_DONTROUTE) == 0 &&
            (so->so_proto->pr_flags & PR_ATOMIC);
        OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
 
-       if (control != NULL)
-               clen = control->m_len;
-
-       error = sosendcheck(so, addr, resid, clen, atomic, flags,
-           &sblocked, control);
+       error = sosendcheck(so, NULL, resid, 0, atomic, flags,
+           &sblocked, NULL);
        if (error)
                goto release;
 
+       /*
+        * Use big 4 KB clusters when the outgoing interface does not prefer
+        * 2 KB clusters
+        */
+       bigcl = !(so->so_flags1 & SOF1_IF_2KCL) || sosendbigcl_ignore_capab;
+
+       if (soreserveheadroom != 0)
+               headroom = so->so_pktheadroom;
+
        do {
                int i;
+               int num_needed = 0;
+               int chainlength;
+               size_t maxpktlen = 0;
+               int bytes_to_alloc;
 
-               if (uioarray == NULL) {
-                       /*
-                        * Data is prepackaged in "top".
-                        */
-                       resid = 0;
-               } else {
-                       int num_needed = 0;
-                       int chainlength;
-                       size_t maxpktlen = 0;
+               if (sosendminchain > 0)
+                       chainlength = 0;
+               else
+                       chainlength = sosendmaxchain;
 
-                       if (sosendminchain > 0)
-                               chainlength = 0;
-                       else
-                               chainlength = sosendmaxchain;
+               socket_unlock(so, 0);
 
-                       socket_unlock(so, 0);
+               /*
+                * Find a set of uio that fit in a reasonable number
+                * of mbuf packets
+                */
+               for (i = uiofirst; i < uiocnt; i++) {
+                       struct uio *auio = uioarray[i];
 
-                       /*
-                        * Find a set of uio that fit in a reasonable number
-                        * of mbuf packets 
-                        */
-                       for (i = uiofirst; i < uiocnt; i++) {
-                               struct uio *auio = uioarray[i];
+                       len = uio_resid(auio);
 
-                               len = uio_resid(auio);
+                       /* Do nothing for empty messages */
+                       if (len == 0)
+                               continue;
 
-                               /* Do nothing for empty messages */
-                               if (len == 0)
-                                       continue;
+                       num_needed += 1;
+                       uiolast += 1;
 
-                               num_needed += 1;
-                               uiolast += 1;
-                               
-                               if (len > maxpktlen)
-                                       maxpktlen = len;
+                       if (len > maxpktlen)
+                               maxpktlen = len;
 
-                               chainlength += len;
-                               if (chainlength > sosendmaxchain)
-                                       break;
-                       }
-                       /*
-                        * Nothing left to send
-                        */
-                       if (num_needed == 0) {
-                               socket_lock(so, 0);
+                       chainlength += len;
+                       if (chainlength > sosendmaxchain)
                                break;
-                       }
-                       /*
-                        * Allocate the mbuf packets at once
-                        */
+               }
+               /*
+                * Nothing left to send
+                */
+               if (num_needed == 0) {
+                       socket_lock(so, 0);
+                       break;
+               }
+               /*
+                * Allocate buffer large enough to include headroom space for
+                * network and link header
+                * 
+                */
+               bytes_to_alloc = maxpktlen + headroom;
+
+               /*
+                * Allocate a single contiguous buffer of the smallest available
+                * size when possible
+                */
+               if (bytes_to_alloc > MCLBYTES &&
+                   bytes_to_alloc <= MBIGCLBYTES && bigcl) {
+                       freelist = m_getpackets_internal(
+                           (unsigned int *)&num_needed,
+                           num_needed, M_WAIT, 1,
+                           MBIGCLBYTES);
+               } else if (bytes_to_alloc > _MHLEN &&
+                   bytes_to_alloc <= MCLBYTES) {
+                       freelist = m_getpackets_internal(
+                           (unsigned int *)&num_needed,
+                           num_needed, M_WAIT, 1,
+                           MCLBYTES);
+               } else {
                        freelist = m_allocpacket_internal(
                            (unsigned int *)&num_needed,
-                           maxpktlen, NULL, M_WAIT, 1, 0);
+                           bytes_to_alloc, NULL, M_WAIT, 1, 0);
+               }
+               
+               if (freelist == NULL) {
+                       socket_lock(so, 0);
+                       error = ENOMEM;
+                       goto release;
+               }
+               /*
+                * Copy each uio of the set into its own mbuf packet
+                */
+               for (i = uiofirst, m = freelist;
+                   i < uiolast && m != NULL;
+                   i++) {
+                       int bytes_to_copy;
+                       struct mbuf *n;
+                       struct uio *auio = uioarray[i];
 
-                       if (freelist == NULL) {
-                               socket_lock(so, 0);
-                               error = ENOMEM;
-                               goto release;
-                       }
+                       bytes_to_copy = uio_resid(auio);
+
+                       /* Do nothing for empty messages */
+                       if (bytes_to_copy == 0)
+                               continue;
                        /*
-                        * Copy each uio of the set into its own mbuf packet
+                        * Leave headroom for protocol headers
+                        * in the first mbuf of the chain
                         */
-                       for (i = uiofirst, m = freelist;
-                           i < uiolast && m != NULL;
-                           i++) {
-                               int bytes_to_copy;
-                               struct mbuf *n;
-                               struct uio *auio = uioarray[i];
-
-                               bytes_to_copy = uio_resid(auio);
-
-                               /* Do nothing for empty messages */
-                               if (bytes_to_copy == 0)
-                                       continue;
-
-                               for (n = m; n != NULL; n = n->m_next) {
-                                       mlen = mbuf_maxlen(n);
-
-                                       len = imin(mlen, bytes_to_copy);
-
-                                       /* 
-                                        * Note: uiomove() decrements the iovec
-                                        * length
-                                        */
-                                       error = uiomove(mtod(n, caddr_t),
-                                           len, auio);
-                                       if (error != 0)
-                                               break;
-                                       n->m_len = len;
-                                       m->m_pkthdr.len += len;
+                       m->m_data += headroom;
+
+                       for (n = m; n != NULL; n = n->m_next) {
+                               if ((m->m_flags & M_EXT))
+                                       mlen = m->m_ext.ext_size -
+                                           m_leadingspace(m);
+                               else if ((m->m_flags & M_PKTHDR))
+                                       mlen =
+                                           MHLEN - m_leadingspace(m);
+                               else
+                                       mlen = MLEN - m_leadingspace(m);
+                               len = imin(mlen, bytes_to_copy);
 
-                                       VERIFY(m->m_pkthdr.len <= maxpktlen);
-                                       
-                                       bytes_to_copy -= len;
-                                       resid -= len;
-                               }
-                               if (m->m_pkthdr.len == 0) {
-                                       printf("%s so %llx pkt %llx len null\n",
-                                           __func__,
-                                           (uint64_t)VM_KERNEL_ADDRPERM(so),
-                                           (uint64_t)VM_KERNEL_ADDRPERM(m));
-                               }
+                               /*
+                                * Note: uiomove() decrements the iovec
+                                * length
+                                */
+                               error = uiomove(mtod(n, caddr_t),
+                                   len, auio);
                                if (error != 0)
                                        break;
-                               m = m->m_nextpkt;
-                       }
+                               n->m_len = len;
+                               m->m_pkthdr.len += len;
 
-                       socket_lock(so, 0);
+                               VERIFY(m->m_pkthdr.len <= maxpktlen);
 
-                       if (error)
-                               goto release;
-                       top = freelist;
-                       freelist = NULL;
+                               bytes_to_copy -= len;
+                               resid -= len;
+                       }
+                       if (m->m_pkthdr.len == 0) {
+                               printf(
+                                   "%s:%d so %llx pkt %llx type %u len null\n",
+                                   __func__, __LINE__,
+                                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                                   (uint64_t)DEBUG_KERNEL_ADDRPERM(m),
+                                   m->m_type);
+                       }
+                       if (error != 0)
+                               break;
+                       m = m->m_nextpkt;
                }
 
+               socket_lock(so, 0);
+
+               if (error)
+                       goto release;
+               top = freelist;
+               freelist = NULL;
+
                if (dontroute)
                        so->so_options |= SO_DONTROUTE;
 
                if ((flags & MSG_SKIPCFIL) == 0) {
                        struct mbuf **prevnextp = NULL;
-                       
+
                        for (i = uiofirst, m = top;
                            i < uiolast && m != NULL;
                            i++) {
@@ -2388,18 +2586,18 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray,
                                /*
                                 * Socket filter processing
                                 */
-                               error = sflt_data_out(so, addr, &m,
-                                   &control, 0);
+                               error = sflt_data_out(so, NULL, &m,
+                                   NULL, 0);
                                if (error != 0 && error != EJUSTRETURN)
                                        goto release;
-                               
+
 #if CONTENT_FILTER
                                if (error == 0) {
                                        /*
                                         * Content filter processing
                                         */
-                                       error = cfil_sock_data_out(so, addr, m,
-                                          control, 0);
+                                       error = cfil_sock_data_out(so, NULL, m,
+                                           NULL, 0);
                                        if (error != 0 && error != EJUSTRETURN)
                                                goto release;
                                }
@@ -2414,8 +2612,8 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray,
                                                *prevnextp = nextpkt;
                                        else
                                                top = nextpkt;
-                               }                               
-                               
+                               }
+
                                m = nextpkt;
                                if (m != NULL)
                                        prevnextp = &m->m_nextpkt;
@@ -2423,12 +2621,11 @@ sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uioarray,
                }
                if (top != NULL)
                        error = (*so->so_proto->pr_usrreqs->pru_send_list)
-                           (so, 0, top, addr, control, p);
+                           (so, 0, top, NULL, NULL, p);
 
                if (dontroute)
                        so->so_options &= ~SO_DONTROUTE;
 
-               clen = 0;
                top = NULL;
                uiofirst = uiolast;
        } while (resid > 0 && error == 0);
@@ -2440,8 +2637,6 @@ release:
 out:
        if (top != NULL)
                m_freem(top);
-       if (control != NULL)
-               m_freem(control);
        if (freelist != NULL)
                m_freem_list(freelist);
 
@@ -2451,6 +2646,256 @@ out:
        return (error);
 }
 
+/*
+ * May return ERESTART when packet is dropped by MAC policy check
+ */
+static int
+soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa,
+    int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait)
+{
+       int error = 0;
+       struct mbuf *m = *mp;
+       struct mbuf *nextrecord = *nextrecordp;
+
+       KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
+#if CONFIG_MACF_SOCKET_SUBSET
+       /*
+        * Call the MAC framework for policy checking if we're in
+        * the user process context and the socket isn't connected.
+        */
+       if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
+               struct mbuf *m0 = m;
+               /*
+                * Dequeue this record (temporarily) from the receive
+                * list since we're about to drop the socket's lock
+                * where a new record may arrive and be appended to
+                * the list.  Upon MAC policy failure, the record
+                * will be freed.  Otherwise, we'll add it back to
+                * the head of the list.  We cannot rely on SB_LOCK
+                * because append operation uses the socket's lock.
+                */
+               do {
+                       m->m_nextpkt = NULL;
+                       sbfree(&so->so_rcv, m);
+                       m = m->m_next;
+               } while (m != NULL);
+               m = m0;
+               so->so_rcv.sb_mb = nextrecord;
+               SB_EMPTY_FIXUP(&so->so_rcv);
+               SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
+               SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
+               socket_unlock(so, 0);
+
+               if (mac_socket_check_received(proc_ucred(p), so,
+                   mtod(m, struct sockaddr *)) != 0) {
+                       /*
+                        * MAC policy failure; free this record and
+                        * process the next record (or block until
+                        * one is available).  We have adjusted sb_cc
+                        * and sb_mbcnt above so there is no need to
+                        * call sbfree() again.
+                        */
+                       m_freem(m);
+                       /*
+                        * Clear SB_LOCK but don't unlock the socket.
+                        * Process the next record or wait for one.
+                        */
+                       socket_lock(so, 0);
+                       sbunlock(&so->so_rcv, TRUE); /* stay locked */
+                       error = ERESTART;
+                       goto done;
+               }
+               socket_lock(so, 0);
+               /*
+                * If the socket has been defunct'd, drop it.
+                */
+               if (so->so_flags & SOF_DEFUNCT) {
+                       m_freem(m);
+                       error = ENOTCONN;
+                       goto done;
+               }
+               /*
+                * Re-adjust the socket receive list and re-enqueue
+                * the record in front of any packets which may have
+                * been appended while we dropped the lock.
+                */
+               for (m = m0; m->m_next != NULL; m = m->m_next)
+                       sballoc(&so->so_rcv, m);
+               sballoc(&so->so_rcv, m);
+               if (so->so_rcv.sb_mb == NULL) {
+                       so->so_rcv.sb_lastrecord = m0;
+                       so->so_rcv.sb_mbtail = m;
+               }
+               m = m0;
+               nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
+               so->so_rcv.sb_mb = m;
+               SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
+               SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
+       }
+#endif /* CONFIG_MACF_SOCKET_SUBSET */
+       if (psa != NULL) {
+               *psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait);
+               if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
+                       error = EWOULDBLOCK;
+                       goto done;
+               }
+       }
+       if (flags & MSG_PEEK) {
+               m = m->m_next;
+       } else {
+               sbfree(&so->so_rcv, m);
+               if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
+                       panic("%s: about to create invalid socketbuf",
+                           __func__);
+                       /* NOTREACHED */
+               }
+               MFREE(m, so->so_rcv.sb_mb);
+               m = so->so_rcv.sb_mb;
+               if (m != NULL) {
+                       m->m_nextpkt = nextrecord;
+               } else {
+                       so->so_rcv.sb_mb = nextrecord;
+                       SB_EMPTY_FIXUP(&so->so_rcv);
+               }
+       }
+done:
+       *mp = m;
+       *nextrecordp = nextrecord;
+
+       return (error);
+}
+
+/*
+ * Process one or more MT_CONTROL mbufs present before any data mbufs
+ * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
+ * just copy the data; if !MSG_PEEK, we call into the protocol to
+ * perform externalization.
+ */
+static int
+soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags,
+    struct mbuf **mp, struct mbuf **nextrecordp)
+{
+       int error = 0;
+       struct mbuf *cm = NULL, *cmn;
+       struct mbuf **cme = &cm;
+       struct sockbuf *sb_rcv = &so->so_rcv;
+       struct mbuf **msgpcm = NULL;
+       struct mbuf *m = *mp;
+       struct mbuf *nextrecord = *nextrecordp;
+       struct protosw *pr = so->so_proto;
+
+       /*
+        * Externalizing the control messages would require us to
+        * drop the socket's lock below.  Once we re-acquire the
+        * lock, the mbuf chain might change.  In order to preserve
+        * consistency, we unlink all control messages from the
+        * first mbuf chain in one shot and link them separately
+        * onto a different chain.
+        */
+       do {
+               if (flags & MSG_PEEK) {
+                       if (controlp != NULL) {
+                               if (*controlp == NULL) {
+                                       msgpcm = controlp;
+                               }
+                               *controlp = m_copy(m, 0, m->m_len);
+
+                               /*
+                                * If we failed to allocate an mbuf,
+                                * release any previously allocated
+                                * mbufs for control data. Return
+                                * an error. Keep the mbufs in the
+                                * socket as this is using
+                                * MSG_PEEK flag.
+                                */
+                               if (*controlp == NULL) {
+                                       m_freem(*msgpcm);
+                                       error = ENOBUFS;
+                                       goto done;
+                               }
+                               controlp = &(*controlp)->m_next;
+                       }
+                       m = m->m_next;
+               } else {
+                       m->m_nextpkt = NULL;
+                       sbfree(sb_rcv, m);
+                       sb_rcv->sb_mb = m->m_next;
+                       m->m_next = NULL;
+                       *cme = m;
+                       cme = &(*cme)->m_next;
+                       m = sb_rcv->sb_mb;
+               }
+       } while (m != NULL && m->m_type == MT_CONTROL);
+
+       if (!(flags & MSG_PEEK)) {
+               if (sb_rcv->sb_mb != NULL) {
+                       sb_rcv->sb_mb->m_nextpkt = nextrecord;
+               } else {
+                       sb_rcv->sb_mb = nextrecord;
+                       SB_EMPTY_FIXUP(sb_rcv);
+               }
+               if (nextrecord == NULL)
+                       sb_rcv->sb_lastrecord = m;
+       }
+
+       SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
+       SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
+
+       while (cm != NULL) {
+               int cmsg_type;
+
+               cmn = cm->m_next;
+               cm->m_next = NULL;
+               cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
+
+               /*
+                * Call the protocol to externalize SCM_RIGHTS message
+                * and return the modified message to the caller upon
+                * success.  Otherwise, all other control messages are
+                * returned unmodified to the caller.  Note that we
+                * only get into this loop if MSG_PEEK is not set.
+                */
+               if (pr->pr_domain->dom_externalize != NULL &&
+                   cmsg_type == SCM_RIGHTS) {
+                       /*
+                        * Release socket lock: see 3903171.  This
+                        * would also allow more records to be appended
+                        * to the socket buffer.  We still have SB_LOCK
+                        * set on it, so we can be sure that the head
+                        * of the mbuf chain won't change.
+                        */
+                       socket_unlock(so, 0);
+                       error = (*pr->pr_domain->dom_externalize)(cm);
+                       socket_lock(so, 0);
+               } else {
+                       error = 0;
+               }
+
+               if (controlp != NULL && error == 0) {
+                       *controlp = cm;
+                       controlp = &(*controlp)->m_next;
+               } else {
+                       (void) m_free(cm);
+               }
+               cm = cmn;
+       }
+       /*
+        * Update the value of nextrecord in case we received new
+        * records when the socket was unlocked above for
+        * externalizing SCM_RIGHTS.
+        */
+       if (m != NULL)
+               nextrecord = sb_rcv->sb_mb->m_nextpkt;
+       else
+               nextrecord = sb_rcv->sb_mb;
+
+done:
+       *mp = m;
+       *nextrecordp = nextrecord;
+
+       return (error);
+}
+
 /*
  * Implement receive operations on a socket.
  * We depend on the way that records are added to the sockbuf
@@ -2497,15 +2942,13 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
        int flags, error, offset;
        user_ssize_t len;
        struct protosw *pr = so->so_proto;
-       int moff, type =0;
+       int moff, type = 0;
        user_ssize_t orig_resid = uio_resid(uio);
        user_ssize_t delayed_copy_len;
        int can_delay;
        int need_event;
        struct proc *p = current_proc();
-
-       KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, uio_resid(uio),
-           so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
+       boolean_t en_tracing = FALSE;
 
        /*
         * Sanity check on the length passed by caller as we are making 'int'
@@ -2514,6 +2957,10 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
        if (orig_resid < 0 || orig_resid > INT_MAX)
                return (EINVAL);
 
+       KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so,
+           uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
+           so->so_rcv.sb_hiwat);
+
        socket_lock(so, 1);
        so_update_last_owner_locked(so, p);
        so_update_policy(so);
@@ -2544,7 +2991,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
 
                error = ENOTCONN;
                SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
-                   __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   __func__, proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    SOCK_DOM(so), SOCK_TYPE(so), error));
                /*
                 * This socket should have been disconnected and flushed
@@ -2557,6 +3004,40 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
                return (error);
        }
 
+       if ((so->so_flags1 & SOF1_PRECONNECT_DATA) &&
+           pr->pr_usrreqs->pru_preconnect) {
+               /*
+                * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not
+                * calling write() right after this. *If* the app calls a read
+                * we do not want to block this read indefinetely. Thus,
+                * we trigger a connect so that the session gets initiated.
+                */
+               error = (*pr->pr_usrreqs->pru_preconnect)(so);
+
+               if (error) {
+                       socket_unlock(so, 1);
+                       return (error);
+               }
+       }
+
+       if (ENTR_SHOULDTRACE &&
+           (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
+               /*
+                * enable energy tracing for inet sockets that go over
+                * non-loopback interfaces only.
+                */
+               struct inpcb *inp = sotoinpcb(so);
+               if (inp->inp_last_outifp != NULL &&
+                   !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
+                       en_tracing = TRUE;
+                       KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_START,
+                           VM_KERNEL_ADDRPERM(so),
+                           ((so->so_state & SS_NBIO) ?
+                           kEnTrFlagNonBlocking : 0),
+                           (int64_t)orig_resid);
+               }
+       }
+
        /*
         * When SO_WANTOOBFLAG is set we try to get out-of-band data
         * regardless of the flags argument. Here is the case were
@@ -2602,6 +3083,11 @@ bad:
                        }
                }
                socket_unlock(so, 1);
+               if (en_tracing) {
+                       KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+                           VM_KERNEL_ADDRPERM(so), 0,
+                           (int64_t)(orig_resid - uio_resid(uio)));
+               }
                KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
                    0, 0, 0, 0);
 
@@ -2621,7 +3107,7 @@ restart:
 #ifdef MORE_LOCKING_DEBUG
        if (so->so_usecount <= 1)
                printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
-                   (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount);
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
 #endif
        /*
         * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
@@ -2650,6 +3136,11 @@ restart:
                socket_unlock(so, 1);
                KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
                    0, 0, 0, 0);
+               if (en_tracing) {
+                       KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+                           VM_KERNEL_ADDRPERM(so), 0,
+                           (int64_t)(orig_resid - uio_resid(uio)));
+               }
                return (error);
        }
 
@@ -2696,8 +3187,8 @@ restart:
                                cfil_sock_data_pending(&so->so_rcv) != 0)
                                CFIL_LOG(LOG_INFO,
                                        "so %llx ignore SS_CANTRCVMORE",
-                                       (uint64_t)VM_KERNEL_ADDRPERM(so));
-                       else 
+                                       (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
+                       else
 #endif /* CONTENT_FILTER */
                        if (m != NULL)
                                goto dontblock;
@@ -2716,6 +3207,7 @@ restart:
                }
                if (uio_resid(uio) == 0)
                        goto release;
+
                if ((so->so_state & SS_NBIO) ||
                    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
                        error = EWOULDBLOCK;
@@ -2743,6 +3235,11 @@ restart:
                        socket_unlock(so, 1);
                        KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
                            0, 0, 0, 0);
+                       if (en_tracing) {
+                               KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+                                   VM_KERNEL_ADDRPERM(so), 0,
+                                   (int64_t)(orig_resid - uio_resid(uio)));
+                       }
                        return (error);
                }
                goto restart;
@@ -2752,111 +3249,15 @@ dontblock:
        SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
        SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
        nextrecord = m->m_nextpkt;
-       if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
-               KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
-#if CONFIG_MACF_SOCKET_SUBSET
-               /*
-                * Call the MAC framework for policy checking if we're in
-                * the user process context and the socket isn't connected.
-                */
-               if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
-                       struct mbuf *m0 = m;
-                       /*
-                        * Dequeue this record (temporarily) from the receive
-                        * list since we're about to drop the socket's lock
-                        * where a new record may arrive and be appended to
-                        * the list.  Upon MAC policy failure, the record
-                        * will be freed.  Otherwise, we'll add it back to
-                        * the head of the list.  We cannot rely on SB_LOCK
-                        * because append operation uses the socket's lock.
-                        */
-                       do {
-                               m->m_nextpkt = NULL;
-                               sbfree(&so->so_rcv, m);
-                               m = m->m_next;
-                       } while (m != NULL);
-                       m = m0;
-                       so->so_rcv.sb_mb = nextrecord;
-                       SB_EMPTY_FIXUP(&so->so_rcv);
-                       SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
-                       SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
-                       socket_unlock(so, 0);
 
-                       if (mac_socket_check_received(proc_ucred(p), so,
-                           mtod(m, struct sockaddr *)) != 0) {
-                               /*
-                                * MAC policy failure; free this record and
-                                * process the next record (or block until
-                                * one is available).  We have adjusted sb_cc
-                                * and sb_mbcnt above so there is no need to
-                                * call sbfree() again.
-                                */
-                               do {
-                                       m = m_free(m);
-                               } while (m != NULL);
-                               /*
-                                * Clear SB_LOCK but don't unlock the socket.
-                                * Process the next record or wait for one.
-                                */
-                               socket_lock(so, 0);
-                               sbunlock(&so->so_rcv, TRUE); /* stay locked */
-                               goto restart;
-                       }
-                       socket_lock(so, 0);
-                       /*
-                        * If the socket has been defunct'd, drop it.
-                        */
-                       if (so->so_flags & SOF_DEFUNCT) {
-                               m_freem(m);
-                               error = ENOTCONN;
-                               goto release;
-                       }
-                       /*
-                        * Re-adjust the socket receive list and re-enqueue
-                        * the record in front of any packets which may have
-                        * been appended while we dropped the lock.
-                        */
-                       for (m = m0; m->m_next != NULL; m = m->m_next)
-                               sballoc(&so->so_rcv, m);
-                       sballoc(&so->so_rcv, m);
-                       if (so->so_rcv.sb_mb == NULL) {
-                               so->so_rcv.sb_lastrecord = m0;
-                               so->so_rcv.sb_mbtail = m;
-                       }
-                       m = m0;
-                       nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
-                       so->so_rcv.sb_mb = m;
-                       SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
-                       SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
-               }
-#endif /* CONFIG_MACF_SOCKET_SUBSET */
+       if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
+               error = soreceive_addr(p, so, psa, flags, &m, &nextrecord,
+                   mp0 == NULL);
+               if (error == ERESTART)
+                       goto restart;
+               else if (error != 0)
+                       goto release;
                orig_resid = 0;
-               if (psa != NULL) {
-                       *psa = dup_sockaddr(mtod(m, struct sockaddr *),
-                           mp0 == NULL);
-                       if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
-                               error = EWOULDBLOCK;
-                               goto release;
-                       }
-               }
-               if (flags & MSG_PEEK) {
-                       m = m->m_next;
-               } else {
-                       sbfree(&so->so_rcv, m);
-                       if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
-                               panic("%s: about to create invalid socketbuf",
-                                   __func__);
-                               /* NOTREACHED */
-                       }
-                       MFREE(m, so->so_rcv.sb_mb);
-                       m = so->so_rcv.sb_mb;
-                       if (m != NULL) {
-                               m->m_nextpkt = nextrecord;
-                       } else {
-                               so->so_rcv.sb_mb = nextrecord;
-                               SB_EMPTY_FIXUP(&so->so_rcv);
-                       }
-               }
        }
 
        /*
@@ -2866,116 +3267,9 @@ dontblock:
         * perform externalization.
         */
        if (m != NULL && m->m_type == MT_CONTROL) {
-               struct mbuf *cm = NULL, *cmn;
-               struct mbuf **cme = &cm;
-               struct sockbuf *sb_rcv = &so->so_rcv;
-               struct mbuf **msgpcm = NULL;
-
-               /*
-                * Externalizing the control messages would require us to
-                * drop the socket's lock below.  Once we re-acquire the
-                * lock, the mbuf chain might change.  In order to preserve
-                * consistency, we unlink all control messages from the
-                * first mbuf chain in one shot and link them separately
-                * onto a different chain.
-                */
-               do {
-                       if (flags & MSG_PEEK) {
-                               if (controlp != NULL) {
-                                       if (*controlp == NULL) {
-                                               msgpcm = controlp;
-                                       }
-                                       *controlp = m_copy(m, 0, m->m_len);
-
-                                       /*
-                                        * If we failed to allocate an mbuf,
-                                        * release any previously allocated
-                                        * mbufs for control data. Return
-                                        * an error. Keep the mbufs in the
-                                        * socket as this is using
-                                        * MSG_PEEK flag.
-                                        */
-                                       if (*controlp == NULL) {
-                                               m_freem(*msgpcm);
-                                               error = ENOBUFS;
-                                               goto release;
-                                       }
-                                       controlp = &(*controlp)->m_next;
-                               }
-                               m = m->m_next;
-                       } else {
-                               m->m_nextpkt = NULL;
-                               sbfree(sb_rcv, m);
-                               sb_rcv->sb_mb = m->m_next;
-                               m->m_next = NULL;
-                               *cme = m;
-                               cme = &(*cme)->m_next;
-                               m = sb_rcv->sb_mb;
-                       }
-               } while (m != NULL && m->m_type == MT_CONTROL);
-
-               if (!(flags & MSG_PEEK)) {
-                       if (sb_rcv->sb_mb != NULL) {
-                               sb_rcv->sb_mb->m_nextpkt = nextrecord;
-                       } else {
-                               sb_rcv->sb_mb = nextrecord;
-                               SB_EMPTY_FIXUP(sb_rcv);
-                       }
-                       if (nextrecord == NULL)
-                               sb_rcv->sb_lastrecord = m;
-               }
-
-               SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
-               SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
-
-               while (cm != NULL) {
-                       int cmsg_type;
-
-                       cmn = cm->m_next;
-                       cm->m_next = NULL;
-                       cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
-
-                       /*
-                        * Call the protocol to externalize SCM_RIGHTS message
-                        * and return the modified message to the caller upon
-                        * success.  Otherwise, all other control messages are
-                        * returned unmodified to the caller.  Note that we
-                        * only get into this loop if MSG_PEEK is not set.
-                        */
-                       if (pr->pr_domain->dom_externalize != NULL &&
-                           cmsg_type == SCM_RIGHTS) {
-                               /*
-                                * Release socket lock: see 3903171.  This
-                                * would also allow more records to be appended
-                                * to the socket buffer.  We still have SB_LOCK
-                                * set on it, so we can be sure that the head
-                                * of the mbuf chain won't change.
-                                */
-                               socket_unlock(so, 0);
-                               error = (*pr->pr_domain->dom_externalize)(cm);
-                               socket_lock(so, 0);
-                       } else {
-                               error = 0;
-                       }
-
-                       if (controlp != NULL && error == 0) {
-                               *controlp = cm;
-                               controlp = &(*controlp)->m_next;
-                               orig_resid = 0;
-                       } else {
-                               (void) m_free(cm);
-                       }
-                       cm = cmn;
-               }
-               /*
-                * Update the value of nextrecord in case we received new
-                * records when the socket was unlocked above for
-                * externalizing SCM_RIGHTS.
-                */
-               if (m != NULL)
-                       nextrecord = sb_rcv->sb_mb->m_nextpkt;
-               else
-                       nextrecord = sb_rcv->sb_mb;
+               error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
+               if (error != 0)
+                       goto release;
                orig_resid = 0;
        }
 
@@ -3390,6 +3684,12 @@ release:
 
        sbunlock(&so->so_rcv, FALSE);   /* will unlock socket */
 
+       if (en_tracing) {
+               KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
+                   VM_KERNEL_ADDRPERM(so),
+                   ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
+                   (int64_t)(orig_resid - uio_resid(uio)));
+       }
        KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio),
            so->so_rcv.sb_cc, 0, error);
 
@@ -3425,54 +3725,59 @@ sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list,
        return (error);
 }
 
+static int
+sodelayed_copy_list(struct socket *so, struct recv_msg_elem *msgarray,
+    u_int uiocnt, struct mbuf **free_list, user_ssize_t *resid)
+{
+#pragma unused(so)
+       int error = 0;
+       struct mbuf *ml, *m;
+       int i = 0;
+       struct uio *auio;
+
+       for (ml = *free_list, i = 0; ml != NULL && i < uiocnt;
+           ml = ml->m_nextpkt, i++) {
+               auio = msgarray[i].uio;
+               for (m = ml; m != NULL; m = m->m_next) {
+                       error = uiomove(mtod(m, caddr_t), m->m_len, auio);
+                       if (error != 0)
+                               goto out;
+               }
+       }
+out:
+       m_freem_list(*free_list);
+
+       *free_list = NULL;
+       *resid = 0;
+
+       return (error);
+}
+
 int
-soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
-       u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+soreceive_list(struct socket *so, struct recv_msg_elem *msgarray, u_int uiocnt,
+    int *flagsp)
 {
-       struct mbuf *m, **mp;
+       struct mbuf *m;
        struct mbuf *nextrecord;
-       struct mbuf *ml = NULL, *free_list = NULL;
-       int flags, error, offset;
-       user_ssize_t len;
+       struct mbuf *ml = NULL, *free_list = NULL, *free_tail = NULL;
+       int error;
+       user_ssize_t len, pktlen, delayed_copy_len = 0;
        struct protosw *pr = so->so_proto;
-       user_ssize_t orig_resid, resid;
+       user_ssize_t resid;
        struct proc *p = current_proc();
        struct uio *auio = NULL;
-       int i = 0;
+       int npkts = 0;
        int sblocked = 0;
+       struct sockaddr **psa = NULL;
+       struct mbuf **controlp = NULL;
+       int can_delay;
+       int flags;
+       struct mbuf *free_others = NULL;
 
        KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START,
            so, uiocnt,
            so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
 
-       mp = mp0;
-       if (psa != NULL)
-               *psa = NULL;
-       if (controlp != NULL)
-               *controlp = NULL;
-       if (flagsp != NULL)
-               flags = *flagsp &~ MSG_EOR;
-       else
-               flags = 0;
-       /*
-        * Disallow functionality not currently supported
-        */
-       if (mp0 != NULL) {
-               printf("%s mp0 not supported\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
-       if (psa != NULL) {
-               printf("%s sockaddr not supported\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
-       if (controlp != NULL) {
-               printf("%s control not supported\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
-
        /*
         * Sanity checks:
         * - Only supports don't wait flags
@@ -3481,9 +3786,14 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
         * - Protocol must support packet chains
         * - The uio array is NULL (should we panic?)
         */
-       if (flags & ~(MSG_DONTWAIT | MSG_NBIO)) {
-               printf("%s flags not supported\n", __func__);
-               error = EOPNOTSUPP;
+       if (flagsp != NULL)
+               flags = *flagsp;
+       else
+               flags = 0;
+       if (flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA |
+           MSG_NBIO)) {
+               printf("%s invalid flags 0x%x\n", __func__, flags);
+               error = EINVAL;
                goto out;
        }
        if (so->so_type != SOCK_DGRAM) {
@@ -3498,7 +3808,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
                error = EPROTONOSUPPORT;
                goto out;
        }
-       if (uioarray == NULL) {
+       if (msgarray == NULL) {
                printf("%s uioarray is NULL\n", __func__);
                error = EINVAL;
                goto out;
@@ -3512,12 +3822,17 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
         * Sanity check on the length passed by caller as we are making 'int'
         * comparisons
         */
-       resid = orig_resid = uio_array_resid(uioarray, uiocnt);
-       if (orig_resid < 0 || orig_resid > INT_MAX) {
+       resid = recv_msg_array_resid(msgarray, uiocnt);
+       if (resid < 0 || resid > INT_MAX) {
                error = EINVAL;
                goto out;
        }
 
+       if (!(flags & MSG_PEEK) && sorecvmincopy > 0)
+               can_delay = 1;
+       else
+               can_delay = 0;
+
        socket_lock(so, 1);
        so_update_last_owner_locked(so, p);
        so_update_policy(so);
@@ -3525,7 +3840,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
 #if NECP
        so_update_necp_policy(so, NULL, NULL);
 #endif /* NECP */
-       
+
        /*
         * If a recv attempt is made on a previously-accepted socket
         * that has been marked as inactive (disconnected), reject
@@ -3536,7 +3851,7 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
 
                error = ENOTCONN;
                SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
-                   __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   __func__, proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    SOCK_DOM(so), SOCK_TYPE(so), error));
                /*
                 * This socket should have been disconnected and flushed
@@ -3547,8 +3862,15 @@ soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uioarray,
                        sb_empty_assert(sb, __func__);
                goto release;
        }
-       if (mp != NULL)
-               *mp = NULL;
+
+next:
+       /*
+        * The uio may be empty
+        */
+       if (npkts >= uiocnt) {
+               error = 0;
+               goto release;
+       }
 restart:
        /*
         * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
@@ -3574,23 +3896,13 @@ restart:
        }
        sblocked = 1;
 
-       /*
-        * Skip empty uio
-        */
-       auio = uioarray[i];
-       while (uio_resid(auio) == 0) {
-               i++;
-               if (i >= uiocnt) {
-                       error = 0;
-                       goto release;
-               }
-       }
-
        m = so->so_rcv.sb_mb;
        /*
         * Block awaiting more datagram if needed
         */
-       if (m == NULL) {
+       if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
+           (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+           ((flags & MSG_WAITALL) && npkts < uiocnt))))) {
                /*
                 * Panic if we notice inconsistencies in the socket's
                 * receive list; both sb_mb and sb_cc should correctly
@@ -3602,6 +3914,8 @@ restart:
 
                if (so->so_error) {
                        error = so->so_error;
+                       if ((flags & MSG_PEEK) == 0)
+                               so->so_error = 0;
                        goto release;
                }
                if (so->so_state & SS_CANTRCVMORE) {
@@ -3619,14 +3933,12 @@ restart:
                }
                /*
                 * Do not block if we got some data
-                * Note: We could use MSG_WAITALL to wait
                 */
-               resid = uio_array_resid(uioarray, uiocnt);
-               if (resid != orig_resid) {
+               if (free_list != NULL) {
                        error = 0;
                        goto release;
                }
-               
+
                SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
 
@@ -3640,13 +3952,6 @@ restart:
                goto restart;
        }
 
-       if (m->m_pkthdr.len == 0) {
-               printf("%s so %llx pkt %llx len is null\n",
-                       __func__,
-                       (uint64_t)VM_KERNEL_ADDRPERM(so),
-                       (uint64_t)VM_KERNEL_ADDRPERM(m));
-               goto restart;
-       }
        OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
        SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
        SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
@@ -3654,97 +3959,106 @@ restart:
        /*
         * Consume the current uio index as we have a datagram
         */
-       i += 1;
+       auio = msgarray[npkts].uio;
+       resid = uio_resid(auio);
+       msgarray[npkts].which |= SOCK_MSG_DATA;
+       psa = (msgarray[npkts].which & SOCK_MSG_SA) ?
+           &msgarray[npkts].psa : NULL;
+       controlp = (msgarray[npkts].which & SOCK_MSG_CONTROL) ?
+           &msgarray[npkts].controlp : NULL;
+       npkts += 1;
        nextrecord = m->m_nextpkt;
 
-#if SO_RECEIVE_LIST_SOCKADDR_NOT_YET
        if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
-               /*
-                * to be adapted from soreceive()
-                */
+               error = soreceive_addr(p, so, psa, flags, &m, &nextrecord, 1);
+               if (error == ERESTART)
+                       goto restart;
+               else if (error != 0)
+                       goto release;
        }
-#endif /* SO_RECEIVE_LIST_SOCKADDR_NOT_YET */
 
-#if SO_RECEIVE_LIST_CONTROL_NOT_YET
-       /*
-        * Process one or more MT_CONTROL mbufs present before any data mbufs
-        * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
-        * just copy the data; if !MSG_PEEK, we call into the protocol to
-        * perform externalization.
-        */
        if (m != NULL && m->m_type == MT_CONTROL) {
-               /*
-                * to be adapted from soreceive()
-                */
+               error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
+               if (error != 0)
+                       goto release;
        }
-#endif /* SO_RECEIVE_LIST_CONTROL_NOT_YET */
 
-       offset = 0;
+       if (m->m_pkthdr.len == 0) {
+               printf("%s:%d so %llx pkt %llx type %u pktlen null\n",
+                   __func__, __LINE__,
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(m),
+                   m->m_type);
+       }
 
        /*
-        * Loop to copy out the mbufs of the current record
+        * Loop to copy the mbufs of the current record
+        * Support zero length packets
         */
-       while (m != NULL && uio_resid(auio) > 0 && error == 0) {
-               len = uio_resid(auio);
-
+       ml = NULL;
+       pktlen = 0;
+       while (m != NULL && (len = resid - pktlen) >= 0 && error == 0) {
                if (m->m_len == 0)
-                       printf("%s: so %llx m %llx m_len is 0\n",
-                               __func__,
-                               (uint64_t)VM_KERNEL_ADDRPERM(so),
-                               (uint64_t)VM_KERNEL_ADDRPERM(m));
-
+                       panic("%p m_len zero", m);
+               if (m->m_type == 0)
+                       panic("%p m_type zero", m);
                /*
                 * Clip to the residual length
                 */
                if (len > m->m_len)
                        len = m->m_len;
+               pktlen += len;
                /*
-                * If mp is set, just pass back the mbufs.
-                * Otherwise copy them out via the uio, then free.
+                * Copy the mbufs via the uio or delay the copy
                 * Sockbuf must be consistent here (points to current mbuf,
                 * it points to next record) when we drop priority;
                 * we must note any additions to the sockbuf when we
                 * block interrupts again.
                 */
-               if (mp != NULL) {
-                       uio_setresid(auio, (uio_resid(auio) - len));
-               } else {
-                       SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
-                       SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
-
+               if (len > 0 && can_delay == 0) {
                        socket_unlock(so, 0);
                        error = uiomove(mtod(m, caddr_t), (int)len, auio);
                        socket_lock(so, 0);
-
                        if (error)
                                goto release;
+               } else {
+                       delayed_copy_len += len;
                }
+
                if (len == m->m_len) {
                        /*
-                        * m was entirely copied  
+                        * m was entirely copied
                         */
-                       nextrecord = m->m_nextpkt;
                        sbfree(&so->so_rcv, m);
+                       nextrecord = m->m_nextpkt;
                        m->m_nextpkt = NULL;
 
                        /*
-                        * Move to m_next 
+                        * Set the first packet to the head of the free list
                         */
-                       if (mp != NULL) {
-                               *mp = m;
-                               mp = &m->m_next;
-                               so->so_rcv.sb_mb = m = m->m_next;
-                               *mp = NULL;
-                       } else {
-                               if (free_list == NULL)
-                                       free_list = m;
-                               else
-                                       ml->m_next = m;
-                               ml = m;
-                               so->so_rcv.sb_mb = m = m->m_next;
-                               ml->m_next = NULL;
-                               ml->m_nextpkt = NULL;
+                       if (free_list == NULL)
+                               free_list = m;
+                       /*
+                        * Link current packet to tail of free list
+                        */
+                       if (ml == NULL) {
+                               if (free_tail != NULL)
+                                       free_tail->m_nextpkt = m;
+                               free_tail = m;
                        }
+                       /*
+                        * Link current mbuf to last mbuf of current packet
+                        */
+                       if (ml != NULL)
+                               ml->m_next = m;
+                       ml = m;
+
+                       /*
+                        * Move next buf to head of socket buffer
+                        */
+                       so->so_rcv.sb_mb = m = ml->m_next;
+                       ml->m_next = NULL;
+
                        if (m != NULL) {
                                m->m_nextpkt = nextrecord;
                                if (nextrecord == NULL)
@@ -3759,27 +4073,6 @@ restart:
                        /*
                         * Stop the loop on partial copy
                         */
-                       if (mp != NULL) {
-                               int copy_flag;
-
-                               if (flags & MSG_DONTWAIT)
-                                       copy_flag = M_DONTWAIT;
-                               else
-                                       copy_flag = M_WAIT;
-                               *mp = m_copym(m, 0, len, copy_flag);
-                               /*
-                                * Failed to allocate an mbuf?
-                                * Adjust uio_resid back, it was
-                                * adjusted down by len bytes which
-                                * we didn't copy over.
-                                */
-                               if (*mp == NULL) {
-                                       uio_setresid(auio,
-                                           (uio_resid(auio) + len));
-                                       error = ENOMEM;
-                                       break;
-                               }
-                       }
                        break;
                }
        }
@@ -3787,7 +4080,7 @@ restart:
        if (so->so_usecount <= 1) {
                panic("%s: after big while so=%llx ref=%d on socket\n",
                    __func__,
-                   (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount);
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
                /* NOTREACHED */
        }
 #endif
@@ -3796,6 +4089,22 @@ restart:
         */
        if (m != NULL) {
                if (so->so_options & SO_DONTTRUNC) {
+                       /*
+                        * Copyout first the freelist then the partial mbuf
+                        */
+                       socket_unlock(so, 0);
+                       if (delayed_copy_len)
+                               error = sodelayed_copy_list(so, msgarray,
+                                   uiocnt, &free_list, &delayed_copy_len);
+
+                       if (error == 0) {
+                               error = uiomove(mtod(m, caddr_t), (int)len,
+                                   auio);
+                       }
+                       socket_lock(so, 0);
+                       if (error)
+                               goto release;
+
                        m->m_data += len;
                        m->m_len -= len;
                        so->so_rcv.sb_cc -= len;
@@ -3832,21 +4141,24 @@ restart:
         * - There was no error
         * - A packet was not truncated
         * - We can still receive more data
-        */             
-       if (i < uiocnt && error == 0 &&
-           (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 
-           && (so->so_state & SS_CANTRCVMORE) == 0) {
+        */
+       if (npkts < uiocnt && error == 0 &&
+           (flags & (MSG_RCVMORE | MSG_TRUNC)) == 0 &&
+           (so->so_state & SS_CANTRCVMORE) == 0) {
                sbunlock(&so->so_rcv, TRUE);    /* keep socket locked */
                sblocked = 0;
 
-               goto restart;
+               goto next;
        }
+       if (flagsp != NULL)
+               *flagsp |= flags;
 
 release:
        /*
         * pru_rcvd may cause more data to be received if the socket lock
         * is dropped so we set MSG_HAVEMORE now based on what we know.
-        * That way the caller won't be surprised if it receives less data than requested.
+        * That way the caller won't be surprised if it receives less data
+        * than requested.
         */
        if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0)
                flags |= MSG_HAVEMORE;
@@ -3854,18 +4166,22 @@ release:
        if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
                (*pr->pr_usrreqs->pru_rcvd)(so, flags);
 
-       if (flagsp != NULL)
-               *flagsp |= flags;
        if (sblocked)
                sbunlock(&so->so_rcv, FALSE);   /* will unlock socket */
        else
                socket_unlock(so, 1);
+
+       if (delayed_copy_len)
+               error = sodelayed_copy_list(so, msgarray, uiocnt,
+                   &free_list, &delayed_copy_len);
 out:
        /*
-        * Amortize the cost 
+        * Amortize the cost of freeing the mbufs
         */
        if (free_list != NULL)
                m_freem_list(free_list);
+       if (free_others != NULL)
+               m_freem_list(free_others);
 
        KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error,
            0, 0, 0, 0);
@@ -3971,7 +4287,7 @@ soshutdownlock(struct socket *so, int how)
                }
        }
 #endif /* CONTENT_FILTER */
-       
+
        error = soshutdownlock_final(so, how);
 
 done:
@@ -4369,7 +4685,8 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
                                if (so->so_snd.sb_flags & SB_UNIX) {
                                        struct unpcb *unp =
                                            (struct unpcb *)(so->so_pcb);
-                                       if (unp != NULL && unp->unp_conn != NULL) {
+                                       if (unp != NULL &&
+                                           unp->unp_conn != NULL) {
                                                hiwat += unp->unp_conn->unp_cc;
                                        }
                                }
@@ -4382,13 +4699,13 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
                                        sowwakeup(so);
                                }
                                break;
-                       }
+                       }
                        case SO_RCVLOWAT: {
                                int64_t data_len;
                                so->so_rcv.sb_lowat =
                                    (optval > so->so_rcv.sb_hiwat) ?
                                    so->so_rcv.sb_hiwat : optval;
-                               data_len = so->so_rcv.sb_cc 
+                               data_len = so->so_rcv.sb_cc
                                    - so->so_rcv.sb_ctl;
                                if (data_len >= so->so_rcv.sb_lowat)
                                    sorwakeup(so);
@@ -4656,7 +4973,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
                                SODEFUNCTLOG(("%s[%d]: so 0x%llx [%s %s:%d -> "
                                    "%s:%d] is now marked as %seligible for "
                                    "defunct\n", __func__, proc_selfpid(),
-                                   (uint64_t)VM_KERNEL_ADDRPERM(so),
+                                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                                    (SOCK_TYPE(so) == SOCK_STREAM) ?
                                    "TCP" : "UDP", inet_ntop(SOCK_DOM(so),
                                    ((SOCK_DOM(so) == PF_INET) ?
@@ -4674,7 +4991,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
                                SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] is "
                                    "now marked as %seligible for defunct\n",
                                    __func__, proc_selfpid(),
-                                   (uint64_t)VM_KERNEL_ADDRPERM(so),
+                                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                                    SOCK_DOM(so), SOCK_TYPE(so),
                                    (so->so_flags & SOF_NODEFUNCT) ?
                                    "not " : ""));
@@ -4736,7 +5053,7 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
                        error = so_set_effective_uuid(so, euuid, sopt->sopt_p);
                        break;
                }
-                               
+
 #if NECP
                case SO_NECP_ATTRIBUTES:
                        error = necp_set_socket_attributes(so, sopt);
@@ -4763,6 +5080,13 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
                        break;
 #endif /* MPTCP */
 
+               case SO_EXTENDED_BK_IDLE:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error == 0)
+                               error = so_set_extended_bk_idle(so, optval);
+                       break;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -5027,7 +5351,7 @@ integer:
                        goto integer;
 
                case SO_AWDL_UNRESTRICTED:
-                       if (SOCK_DOM(so) == PF_INET || 
+                       if (SOCK_DOM(so) == PF_INET ||
                            SOCK_DOM(so) == PF_INET6) {
                                optval = inp_get_awdl_unrestricted(
                                    sotoinpcb(so));
@@ -5140,7 +5464,7 @@ integer:
                        error = flow_divert_token_get(so, sopt);
                        break;
 #endif /* FLOW_DIVERT */
-                       
+
 #if NECP
                case SO_NECP_ATTRIBUTES:
                        error = necp_get_socket_attributes(so, sopt);
@@ -5153,7 +5477,7 @@ integer:
 
                        sock_id = cfil_sock_id_from_socket(so);
 
-                       error = sooptcopyout(sopt, &sock_id, 
+                       error = sooptcopyout(sopt, &sock_id,
                                sizeof(cfil_sock_id_t));
                        break;
                }
@@ -5168,9 +5492,14 @@ integer:
                                break;
                        }
                        optval = (so->so_flags & SOF_MPTCP_FASTJOIN);
-                       break;
+                       /* Fixed along with rdar://19391339 */
+                       goto integer;
 #endif /* MPTCP */
 
+               case SO_EXTENDED_BK_IDLE:
+                       optval = (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED);
+                       goto integer;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -5395,7 +5724,7 @@ soo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
                /*
                 * If the caller explicitly asked for OOB results (e.g. poll()),
                 * save that off in the hookid field and reserve the kn_flags
-                * EV_OOBAND bit for output only).
+                * EV_OOBAND bit for output only.
                 */
                if (kn->kn_flags & EV_OOBAND) {
                        kn->kn_flags &= ~EV_OOBAND;
@@ -5412,6 +5741,8 @@ soo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
        case EVFILT_SOCK:
                kn->kn_fop = &sock_filtops;
                skl = &so->so_klist;
+               kn->kn_hookid = 0;
+               kn->kn_status |= KN_TOUCH;
                break;
        default:
                socket_unlock(so, 1);
@@ -5478,14 +5809,19 @@ filt_soread(struct knote *kn, long hint)
        }
 
        /* socket isn't a listener */
+       /*
+        * NOTE_LOWAT specifies new low water mark in data, i.e.
+        * the bytes of protocol data. We therefore exclude any
+        * control bytes.
+        */
        kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
+
        /*
         * Clear out EV_OOBAND that filt_soread may have set in the
         * past.
         */
        kn->kn_flags &= ~EV_OOBAND;
-
-       if ((so->so_oobmark) || (so->so_state & SS_RCVATMARK)){
+       if ((so->so_oobmark) || (so->so_state & SS_RCVATMARK)) {
                kn->kn_flags |= EV_OOBAND;
                /*
                 * If caller registered explicit interest in OOB data,
@@ -5503,7 +5839,7 @@ filt_soread(struct knote *kn, long hint)
                        return (1);
                }
        }
-       
+
        if ((so->so_state & SS_CANTRCVMORE)
 #if CONTENT_FILTER
            && cfil_sock_data_pending(&so->so_rcv) == 0
@@ -5523,6 +5859,11 @@ filt_soread(struct knote *kn, long hint)
        }
 
        int64_t lowwat = so->so_rcv.sb_lowat;
+       /*
+        * Ensure that when NOTE_LOWAT is used, the derived
+        * low water mark is bounded by socket's rcv buf's
+        * high and low water mark values.
+        */
        if (kn->kn_sfflags & NOTE_LOWAT) {
                if (kn->kn_sdata > so->so_rcv.sb_hiwat)
                        lowwat = so->so_rcv.sb_hiwat;
@@ -5533,7 +5874,15 @@ filt_soread(struct knote *kn, long hint)
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_unlock(so, 1);
 
-       return (kn->kn_data >= lowwat);
+       /*
+        * The order below is important. Since NOTE_LOWAT
+        * overrides sb_lowat, check for NOTE_LOWAT case
+        * first.
+        */
+       if (kn->kn_sfflags & NOTE_LOWAT)
+               return (kn->kn_data >= lowwat);
+
+       return (so->so_rcv.sb_cc >= lowwat);
 }
 
 static void
@@ -5581,11 +5930,14 @@ filt_sowrite(struct knote *kn, long hint)
                ret = 1;
                goto out;
        }
-       if (((so->so_state & SS_ISCONNECTED) == 0) &&
-           (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+       if (!socanwrite(so)) {
                ret = 0;
                goto out;
        }
+       if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+               ret = 1;
+               goto out;
+       }
        int64_t lowwat = so->so_snd.sb_lowat;
        if (kn->kn_sfflags & NOTE_LOWAT) {
                if (kn->kn_sdata > so->so_snd.sb_hiwat)
@@ -5607,7 +5959,8 @@ filt_sowrite(struct knote *kn, long hint)
                        }
 #endif
                        else {
-                               return (1);
+                               ret = 1;
+                               goto out;
                        }
                } else {
                        ret = 1;
@@ -5639,6 +5992,7 @@ filt_sockev(struct knote *kn, long hint)
        int ret = 0, locked = 0;
        struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
        long ev_hint = (hint & SO_FILT_HINT_EV);
+       uint32_t level_trigger = 0;
 
        if ((hint & SO_FILT_HINT_LOCKED) == 0) {
                socket_lock(so, 1);
@@ -5646,72 +6000,76 @@ filt_sockev(struct knote *kn, long hint)
        }
 
        if (ev_hint & SO_FILT_HINT_CONNRESET) {
-               if (kn->kn_sfflags & NOTE_CONNRESET)
-                       kn->kn_fflags |= NOTE_CONNRESET;
+               kn->kn_fflags |= NOTE_CONNRESET;
        }
        if (ev_hint & SO_FILT_HINT_TIMEOUT) {
-               if (kn->kn_sfflags & NOTE_TIMEOUT)
-                       kn->kn_fflags |= NOTE_TIMEOUT;
+               kn->kn_fflags |= NOTE_TIMEOUT;
        }
        if (ev_hint & SO_FILT_HINT_NOSRCADDR) {
-               if (kn->kn_sfflags & NOTE_NOSRCADDR)
-                       kn->kn_fflags |= NOTE_NOSRCADDR;
+               kn->kn_fflags |= NOTE_NOSRCADDR;
        }
        if (ev_hint & SO_FILT_HINT_IFDENIED) {
-               if ((kn->kn_sfflags & NOTE_IFDENIED))
-                       kn->kn_fflags |= NOTE_IFDENIED;
+               kn->kn_fflags |= NOTE_IFDENIED;
        }
        if (ev_hint & SO_FILT_HINT_KEEPALIVE) {
-               if (kn->kn_sfflags & NOTE_KEEPALIVE)
-                       kn->kn_fflags |= NOTE_KEEPALIVE;
+               kn->kn_fflags |= NOTE_KEEPALIVE;
        }
        if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) {
-               if (kn->kn_sfflags & NOTE_ADAPTIVE_WTIMO)
-                       kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
+               kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
        }
        if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) {
-               if (kn->kn_sfflags & NOTE_ADAPTIVE_RTIMO)
-                       kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
+               kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
        }
-       if (ev_hint & SO_FILT_HINT_CONNECTED) {
-               if (kn->kn_sfflags & NOTE_CONNECTED)
-                       kn->kn_fflags |= NOTE_CONNECTED;
+       if ((ev_hint & SO_FILT_HINT_CONNECTED) ||
+           (so->so_state & SS_ISCONNECTED)) {
+               kn->kn_fflags |= NOTE_CONNECTED;
+               level_trigger |= NOTE_CONNECTED;
        }
-       if (ev_hint & SO_FILT_HINT_DISCONNECTED) {
-               if (kn->kn_sfflags & NOTE_DISCONNECTED)
-                       kn->kn_fflags |= NOTE_DISCONNECTED;
+       if ((ev_hint & SO_FILT_HINT_DISCONNECTED) ||
+           (so->so_state & SS_ISDISCONNECTED)) {
+               kn->kn_fflags |= NOTE_DISCONNECTED;
+               level_trigger |= NOTE_DISCONNECTED;
        }
        if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) {
                if (so->so_proto != NULL &&
-                   (so->so_proto->pr_flags & PR_EVCONNINFO) &&
-                   (kn->kn_sfflags & NOTE_CONNINFO_UPDATED))
+                   (so->so_proto->pr_flags & PR_EVCONNINFO))
                        kn->kn_fflags |= NOTE_CONNINFO_UPDATED;
        }
 
-       if ((kn->kn_sfflags & NOTE_READCLOSED) &&
-           (so->so_state & SS_CANTRCVMORE)
+       if ((so->so_state & SS_CANTRCVMORE)
 #if CONTENT_FILTER
-               && cfil_sock_data_pending(&so->so_rcv) == 0
+           && cfil_sock_data_pending(&so->so_rcv) == 0
 #endif /* CONTENT_FILTER */
-               )
+           ) {
                kn->kn_fflags |= NOTE_READCLOSED;
+               level_trigger |= NOTE_READCLOSED;
+       }
 
-       if ((kn->kn_sfflags & NOTE_WRITECLOSED) &&
-           (so->so_state & SS_CANTSENDMORE))
+       if (so->so_state & SS_CANTSENDMORE) {
                kn->kn_fflags |= NOTE_WRITECLOSED;
+               level_trigger |= NOTE_WRITECLOSED;
+       }
 
-       if ((kn->kn_sfflags & NOTE_SUSPEND) &&
-           ((ev_hint & SO_FILT_HINT_SUSPEND) ||
-           (so->so_flags & SOF_SUSPENDED))) {
+       if ((ev_hint & SO_FILT_HINT_SUSPEND) ||
+           (so->so_flags & SOF_SUSPENDED)) {
                kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
+
+               /* If resume event was delivered before, reset it */
+               kn->kn_hookid &= ~NOTE_RESUME;
+
                kn->kn_fflags |= NOTE_SUSPEND;
+               level_trigger |= NOTE_SUSPEND;
        }
 
-       if ((kn->kn_sfflags & NOTE_RESUME) &&
-           ((ev_hint & SO_FILT_HINT_RESUME) ||
-           (so->so_flags & SOF_SUSPENDED) == 0)) {
+       if ((ev_hint & SO_FILT_HINT_RESUME) ||
+           (so->so_flags & SOF_SUSPENDED) == 0) {
                kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
+
+               /* If suspend event was delivered before, reset it */
+               kn->kn_hookid &= ~NOTE_SUSPEND;
+
                kn->kn_fflags |= NOTE_RESUME;
+               level_trigger |= NOTE_RESUME;
        }
 
        if (so->so_error != 0) {
@@ -5722,7 +6080,16 @@ filt_sockev(struct knote *kn, long hint)
                get_sockev_state(so, (u_int32_t *)&(kn->kn_data));
        }
 
-       if (kn->kn_fflags != 0)
+       /* Reset any events that are not requested on this knote */
+       kn->kn_fflags &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
+       level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
+
+       /* Find the level triggerred events that are already delivered */
+       level_trigger &= kn->kn_hookid;
+       level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK;
+
+       /* Do not deliver level triggerred events more than once */
+       if ((kn->kn_fflags & ~level_trigger) != 0)
                ret = 1;
 
        if (locked)
@@ -5731,6 +6098,51 @@ filt_sockev(struct knote *kn, long hint)
        return (ret);
 }
 
+static void
+filt_socktouch(struct knote *kn, struct kevent_internal_s *kev, long type)
+{
+#pragma unused(kev)
+       switch (type) {
+       case EVENT_REGISTER:
+       {
+               uint32_t changed_flags;
+               changed_flags = (kn->kn_sfflags ^ kn->kn_hookid);
+
+               /*
+                * Since we keep track of events that are already
+                * delivered, if any of those events are not requested
+                * anymore the state related to them can be reset
+                */
+               kn->kn_hookid &=
+                   ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK);
+               break;
+       }
+       case EVENT_PROCESS:
+               /*
+                * Store the state of the events being delivered. This
+                * state can be used to deliver level triggered events
+                * ateast once and still avoid waking up the application
+                * multiple times as long as the event is active.
+                */
+               if (kn->kn_fflags != 0)
+                       kn->kn_hookid |= (kn->kn_fflags &
+                               EVFILT_SOCK_LEVEL_TRIGGER_MASK);
+
+               /*
+                * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver
+                * only one of them and remember the last one that was
+                * delivered last
+                */
+               if (kn->kn_fflags & NOTE_SUSPEND)
+                       kn->kn_hookid &= ~NOTE_RESUME;
+               if (kn->kn_fflags & NOTE_RESUME)
+                       kn->kn_hookid &= ~NOTE_SUSPEND;
+               break;
+       default:
+               break;
+       }
+}
+
 void
 get_sockev_state(struct socket *so, u_int32_t *statep)
 {
@@ -5921,15 +6333,45 @@ sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
                        SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) "
                            "so 0x%llx [%d,%d] is not eligible for defunct "
                            "(%d)\n", __func__, proc_selfpid(), proc_pid(p),
-                           level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+                           level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                            SOCK_DOM(so), SOCK_TYPE(so), err));
                        return (err);
                }
                so->so_flags &= ~SOF_NODEFUNCT;
                SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
                    "[%d,%d] defunct by force\n", __func__, proc_selfpid(),
-                   proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   proc_pid(p), level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    SOCK_DOM(so), SOCK_TYPE(so)));
+       } else if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
+               struct inpcb *inp = (struct inpcb *)so->so_pcb;
+               struct ifnet *ifp = inp->inp_last_outifp;
+
+               if (ifp && IFNET_IS_CELLULAR(ifp)) {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nocell);
+               } else if (so->so_flags & SOF_DELEGATED) {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
+               } else if (soextbkidlestat.so_xbkidle_time == 0) {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_notime);
+               } else if (noforce) {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_active);
+               
+                       so->so_flags1 |= SOF1_EXTEND_BK_IDLE_INPROG;
+                       so->so_extended_bk_start = net_uptime();
+                       OSBitOrAtomic(P_LXBKIDLEINPROG, &p->p_ladvflag);
+                       
+                       inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
+                       
+                       err = EOPNOTSUPP;
+                       SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) "
+                           "extend bk idle "
+                           "so 0x%llx rcv hw %d cc %d\n",
+                           __func__, proc_selfpid(), proc_pid(p),
+                           level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                           so->so_rcv.sb_hiwat, so->so_rcv.sb_cc));
+                       return (err);
+               } else {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_forced);
+               }
        }
 
        so->so_flags |= SOF_DEFUNCT;
@@ -5952,9 +6394,10 @@ sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
 
 done:
        SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%d,%d] %s "
-           "defunct\n", __func__, proc_selfpid(), proc_pid(p), level,
-           (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so),
-           defunct ? "is already" : "marked as"));
+           "defunct%s\n", __func__, proc_selfpid(), proc_pid(p), level,
+           (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so),
+           defunct ? "is already" : "marked as",
+           (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ? " extbkidle" : ""));
 
        return (err);
 }
@@ -5982,7 +6425,7 @@ sodefunct(struct proc *p, struct socket *so, int level)
                SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%s "
                    "%s:%d -> %s:%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
                    "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(),
-                   proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   proc_pid(p), level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP",
                    inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ?
                    (void *)&inp->inp_laddr.s_addr : (void *)&inp->in6p_laddr),
@@ -5997,7 +6440,7 @@ sodefunct(struct proc *p, struct socket *so, int level)
                SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx "
                    "[%d,%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, "
                    "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(),
-                   proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so),
+                   proc_pid(p), level, (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
                    SOCK_DOM(so), SOCK_TYPE(so), (uint32_t)rcv->sb_sel.si_flags,
                    (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags,
                    snd->sb_flags));
@@ -6051,6 +6494,192 @@ done:
        return (0);
 }
 
+int
+soresume(struct proc *p, struct socket *so, int locked)
+{
+       if (locked == 0)
+               socket_lock(so, 1);
+
+       if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
+               SODEFUNCTLOG(("%s[%d]: )target pid %d) so 0x%llx [%d,%d] "
+                   "resumed from bk idle\n",
+                   __func__, proc_selfpid(), proc_pid(p),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                   SOCK_DOM(so), SOCK_TYPE(so)));
+
+               so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
+               so->so_extended_bk_start = 0;
+               OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
+
+               OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resumed);
+               OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
+               VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
+       }
+       if (locked == 0)
+               socket_unlock(so, 1);
+
+       return (0);
+}
+
+/*
+ * Does not attempt to account for sockets that are delegated from
+ * the current process
+ */
+int
+so_set_extended_bk_idle(struct socket *so, int optval)
+{
+       int error = 0;
+
+       if ((SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) ||
+           SOCK_PROTO(so) != IPPROTO_TCP) {
+               OSDecrementAtomic(&soextbkidlestat.so_xbkidle_notsupp);
+               error = EOPNOTSUPP;
+       } else if (optval == 0) {
+               so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
+
+               soresume(current_proc(), so, 1);
+       } else {
+               struct proc *p = current_proc();
+               int i;
+               struct filedesc *fdp;
+               int count = 0;
+
+               proc_fdlock(p);
+
+               fdp = p->p_fd;
+               for (i = 0; i < fdp->fd_nfiles; i++) {
+                       struct fileproc *fp = fdp->fd_ofiles[i];
+                       struct socket *so2;
+
+                       if (fp == NULL ||
+                           (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+                           FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET)
+                               continue;
+
+                       so2 = (struct socket *)fp->f_fglob->fg_data;
+                       if (so != so2 &&
+                           so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED)
+                               count++;
+                       if (count >= soextbkidlestat.so_xbkidle_maxperproc)
+                               break;
+               }
+               if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_toomany);
+                       error = EBUSY;
+               } else if (so->so_flags & SOF_DELEGATED) {
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
+                       error = EBUSY;
+               } else {
+                       so->so_flags1 |= SOF1_EXTEND_BK_IDLE_WANTED;
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_wantok);
+               }
+               SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] "
+                   "%s marked for extended bk idle\n",
+                   __func__, proc_selfpid(),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                   SOCK_DOM(so), SOCK_TYPE(so),
+                   (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
+                   "is" : "not"));
+
+               proc_fdunlock(p);
+       }
+
+       return (error);
+}
+
+static void
+so_stop_extended_bk_idle(struct socket *so)
+{
+       so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
+       so->so_extended_bk_start = 0;
+
+       OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
+       VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
+       /*
+        * Force defunct
+        */
+       sosetdefunct(current_proc(), so,
+           SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
+       if (so->so_flags & SOF_DEFUNCT) {
+               sodefunct(current_proc(), so,
+                   SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
+       }
+}
+
+void
+so_drain_extended_bk_idle(struct socket *so)
+{
+       if (so && (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
+               /*
+                * Only penalize sockets that have outstanding data
+                */
+               if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
+                       so_stop_extended_bk_idle(so);
+
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_drained);
+               }
+       }
+}
+
+/*
+ * Return values tells if socket is still in extended background idle
+ */
+int
+so_check_extended_bk_idle_time(struct socket *so)
+{
+       int ret = 1;
+
+       if ((so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
+               SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d]\n",
+                   __func__, proc_selfpid(),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                   SOCK_DOM(so), SOCK_TYPE(so)));
+               if (net_uptime() - so->so_extended_bk_start >
+                   soextbkidlestat.so_xbkidle_time) {
+                       so_stop_extended_bk_idle(so);
+
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_expired);
+
+                       ret = 0;
+               } else {
+                       struct inpcb *inp = (struct inpcb *)so->so_pcb;
+
+                       inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
+                       OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resched);
+               }
+       }
+       
+       return (ret);
+}
+
+void
+resume_proc_sockets(proc_t p)
+{
+       if (p->p_ladvflag & P_LXBKIDLEINPROG) {
+               struct filedesc *fdp;
+               int i;
+
+               proc_fdlock(p);
+               fdp = p->p_fd;
+               for (i = 0; i < fdp->fd_nfiles; i++) {
+                       struct fileproc *fp;
+                       struct socket *so;
+
+                       fp = fdp->fd_ofiles[i];
+                       if (fp == NULL || 
+                           (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+                           FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET)
+                               continue;
+
+                       so = (struct socket *)fp->f_fglob->fg_data;
+                       (void) soresume(p, so, 0);
+               }
+               proc_fdunlock(p);
+
+               OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
+       }
+}
+
 __private_extern__ int
 so_set_recv_anyif(struct socket *so, int optval)
 {
@@ -6108,7 +6737,7 @@ so_set_restrictions(struct socket *so, uint32_t vals)
        nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
        noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
        so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN |
-           SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR | 
+           SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR |
            SO_RESTRICT_DENY_EXPENSIVE));
        nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
        noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
@@ -6123,7 +6752,10 @@ so_set_restrictions(struct socket *so, uint32_t vals)
        if (SOCK_DOM(so) == PF_INET) {
 #endif /* !INET6 */
                if (nocell_new - nocell_old != 0) {
-                       /* if deny cellular is now set, do what's needed for INPCB */
+                       /*
+                        * if deny cellular is now set, do what's needed
+                        * for INPCB
+                        */
                        inp_set_nocellular(sotoinpcb(so));
                }
                if (noexpensive_new - noexpensive_old != 0) {
@@ -6138,7 +6770,7 @@ uint32_t
 so_get_restrictions(struct socket *so)
 {
        return (so->so_restrictions & (SO_RESTRICT_DENY_IN |
-           SO_RESTRICT_DENY_OUT | 
+           SO_RESTRICT_DENY_OUT |
            SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE));
 }
 
@@ -6331,14 +6963,16 @@ done:
                uuid_unparse(so->e_uuid, buf);
                log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
                    "euuid %s%s\n", __func__, proc_name_address(p),
-                   proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
-                   SOCK_TYPE(so), so->e_pid, proc_name_address(ep), buf,
+                   proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                   SOCK_DOM(so), SOCK_TYPE(so),
+                   so->e_pid, proc_name_address(ep), buf,
                    ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
        } else if (error != 0 && net_io_policy_log) {
                log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
                    "ERROR (%d)\n", __func__, proc_name_address(p),
-                   proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
-                   SOCK_TYPE(so), epid, (ep == PROC_NULL) ? "PROC_NULL" :
+                   proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
+                   SOCK_DOM(so), SOCK_TYPE(so),
+                   epid, (ep == PROC_NULL) ? "PROC_NULL" :
                    proc_name_address(ep), error);
        }
 
@@ -6367,7 +7001,7 @@ so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p)
        /* UUID must not be all-zeroes (reserved for kernel) */
        if (uuid_is_null(euuid)) {
                error = EINVAL;
-               goto done;;
+               goto done;
        }
 
        /*
@@ -6431,14 +7065,14 @@ done:
                uuid_unparse(so->e_uuid, buf);
                log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
                    "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p),
-                   (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
                    SOCK_TYPE(so), so->e_pid, buf,
                    ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
        } else if (error != 0 && net_io_policy_log) {
                uuid_unparse(euuid, buf);
                log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
                    "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p),
-                   (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
+                   (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
                    SOCK_TYPE(so), buf, error);
        }
 
@@ -6480,7 +7114,7 @@ netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data,
 }
 
 void
-socket_post_kev_msg(uint32_t ev_code, 
+socket_post_kev_msg(uint32_t ev_code,
     struct kev_socket_event_data *ev_data,
     uint32_t ev_datalen)
 {
@@ -6517,7 +7151,7 @@ socket_post_kev_msg_closed(struct socket *so)
                            min(peersa->sa_len,
                            sizeof (ev.ev_data.kev_peername)));
                        socket_post_kev_msg(KEV_SOCKET_CLOSED,
-                           &ev.ev_data, sizeof (ev));  
+                           &ev.ev_data, sizeof (ev));
                }
        }
        if (socksa != NULL)
index 5cbf06334926438d72ee9c732a68643cb15aaccb..f8b94b90469b28646301a04dfc9591179bc54b01 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #define        DBG_FNC_SBDROP          NETDBG_CODE(DBG_NETSOCK, 4)
 #define        DBG_FNC_SBAPPEND        NETDBG_CODE(DBG_NETSOCK, 5)
 
+SYSCTL_DECL(_kern_ipc);
+
+__private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
+
 static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
 static struct socket *sonewconn_internal(struct socket *, int);
 static int sbappendaddr_internal(struct sockbuf *, struct sockaddr *,
@@ -131,6 +137,7 @@ u_int32_t   high_sb_max = SB_MAX;
 static u_int32_t sb_efficiency = 8;    /* parameter for sbreserve() */
 int32_t total_sbmb_cnt __attribute__((aligned(8))) = 0;
 int32_t total_sbmb_cnt_peak __attribute__((aligned(8))) = 0;
+int32_t total_snd_byte_count __attribute__((aligned(8))) = 0;
 int64_t sbmb_limreached __attribute__((aligned(8))) = 0;
 
 /* Control whether to throttle sockets eligible to be throttled */
@@ -189,6 +196,8 @@ soisconnected(struct socket *so)
        so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
        so->so_state |= SS_ISCONNECTED;
 
+       soreserve_preconnect(so, 0);
+
        sflt_notify(so, sock_evt_connected, NULL);
 
        if (head && (so->so_state & SS_INCOMP)) {
@@ -218,6 +227,15 @@ soisconnected(struct socket *so)
        }
 }
 
+boolean_t
+socanwrite(struct socket *so)
+{
+       return ((so->so_state & SS_ISCONNECTED) ||
+              !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+              (so->so_flags1 & SOF1_PRECONNECT_DATA));
+
+}
+
 void
 soisdisconnecting(struct socket *so)
 {
@@ -671,6 +689,14 @@ bad:
        return (ENOBUFS);
 }
 
+void
+soreserve_preconnect(struct socket *so, unsigned int pre_cc)
+{
+       /* As of now, same bytes for both preconnect read and write */
+       so->so_snd.sb_preconn_hiwat = pre_cc;
+       so->so_rcv.sb_preconn_hiwat = pre_cc;
+}
+
 /*
  * Allot mbufs to a sockbuf.
  * Attempt to scale mbmax so that mbcnt doesn't become limiting
@@ -898,7 +924,8 @@ sblastmbufchk(struct sockbuf *sb, const char *where)
                for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
                        printf("\t");
                        for (n = m; n != NULL; n = n->m_next)
-                               printf("0x%llx ", (uint64_t)VM_KERNEL_ADDRPERM(n));
+                               printf("0x%llx ",
+                                   (uint64_t)VM_KERNEL_ADDRPERM(n));
                        printf("\n");
                }
                panic("sblastmbufchk from %s", where);
@@ -1147,7 +1174,8 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
 
 #if CONTENT_FILTER
                if (error == 0)
-                       error = cfil_sock_data_in(sb->sb_so, asa, m0, control, 0);
+                       error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
+                           0);
 #endif /* CONTENT_FILTER */
 
                if (error) {
@@ -1249,7 +1277,8 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf   *m0, struct mbuf *control,
 
 #if CONTENT_FILTER
                if (error == 0)
-                       error = cfil_sock_data_in(sb->sb_so, NULL, m0, control, 0);
+                       error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
+                           0);
 #endif /* CONTENT_FILTER */
 
                if (error) {
@@ -2033,10 +2062,10 @@ pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 int
 pru_connectx_notsupp(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
-#pragma unused(so, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen)
+#pragma unused(so, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
        return (EOPNOTSUPP);
 }
 
@@ -2063,7 +2092,7 @@ pru_disconnect_notsupp(struct socket *so)
 }
 
 int
-pru_disconnectx_notsupp(struct socket *so, associd_t aid, connid_t cid)
+pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
 {
 #pragma unused(so, aid, cid)
        return (EOPNOTSUPP);
@@ -2077,7 +2106,7 @@ pru_listen_notsupp(struct socket *so, struct proc *p)
 }
 
 int
-pru_peeloff_notsupp(struct socket *so, associd_t aid, struct socket **psop)
+pru_peeloff_notsupp(struct socket *so, sae_associd_t aid, struct socket **psop)
 {
 #pragma unused(so, aid, psop)
        return (EOPNOTSUPP);
@@ -2152,10 +2181,10 @@ pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
 }
 
 int
-pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr, struct uio **uio,
-    u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags)
+pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
+    u_int uiocnt, int flags)
 {
-#pragma unused(so, addr, uio, uiocnt, top, control, flags)
+#pragma unused(so, uio, uiocnt, flags)
        return (EOPNOTSUPP);
 }
 
@@ -2168,10 +2197,10 @@ pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
 }
 
 int
-pru_soreceive_list_notsupp(struct socket *so, struct sockaddr **paddr,
-    struct uio **uio, u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+pru_soreceive_list_notsupp(struct socket *so, 
+    struct recv_msg_elem *recv_msg_array, u_int uiocnt, int *flagsp)
 {
-#pragma unused(so, paddr, uio, uiocnt, mp0, controlp, flagsp)
+#pragma unused(so, recv_msg_array, uiocnt, flagsp)
        return (EOPNOTSUPP);
 }
 
@@ -2206,6 +2235,13 @@ pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
        return (0);
 }
 
+static int
+pru_preconnect_null(struct socket *so)
+{
+#pragma unused(so)
+       return (0);
+}
+
 void
 pru_sanitize(struct pr_usrreqs *pru)
 {
@@ -2237,6 +2273,7 @@ pru_sanitize(struct pr_usrreqs *pru)
        DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
        DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
        DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
+       DEFAULT(pru->pru_preconnect, pru_preconnect_null);
 #undef DEFAULT
 }
 
@@ -2267,6 +2304,10 @@ sbspace(struct sockbuf *sb)
        int pending = 0;
        int space = imin((int)(sb->sb_hiwat - sb->sb_cc),
            (int)(sb->sb_mbmax - sb->sb_mbcnt));
+
+       if (sb->sb_preconn_hiwat != 0)
+               space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
+
        if (space < 0)
                space = 0;
 
@@ -2291,7 +2332,7 @@ msgq_sbspace(struct socket *so, struct mbuf *control)
 {
        int space = 0, error;
        u_int32_t msgpri;
-       VERIFY(so->so_type == SOCK_STREAM && 
+       VERIFY(so->so_type == SOCK_STREAM &&
                SOCK_PROTO(so) == IPPROTO_TCP);
        if (control != NULL) {
                error = tcp_get_msg_priority(control, &msgpri);
@@ -2323,7 +2364,7 @@ soreadable(struct socket *so)
 #if CONTENT_FILTER
            && cfil_sock_data_pending(&so->so_rcv) == 0
 #endif /* CONTENT_FILTER */
-            ) ||
+           ) ||
            so->so_comp.tqh_first || so->so_error);
 }
 
@@ -2335,15 +2376,16 @@ sowriteable(struct socket *so)
        if ((so->so_state & SS_CANTSENDMORE) ||
            so->so_error > 0)
                return (1);
+       if (so_wait_for_if_feedback(so) || !socanwrite(so))
+               return (0);
+       if (so->so_flags1 & SOF1_PRECONNECT_DATA)
+               return(1);
 
-       if (!so_wait_for_if_feedback(so) &&
-           sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat &&
-           ((so->so_state & SS_ISCONNECTED) ||
-           !(so->so_proto->pr_flags & PR_CONNREQUIRED))) {
+       if (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat) {
                if (so->so_flags & SOF_NOTSENT_LOWAT) {
-                       if ((SOCK_DOM(so) == PF_INET6
-                           || SOCK_DOM(so) == PF_INET)
-                           && so->so_type == SOCK_STREAM) {
+                       if ((SOCK_DOM(so) == PF_INET6 ||
+                           SOCK_DOM(so) == PF_INET) &&
+                           so->so_type == SOCK_STREAM) {
                                return (tcp_notsent_lowat_check(so));
                        }
 #if MPTCP
@@ -2382,6 +2424,13 @@ sballoc(struct sockbuf *sb, struct mbuf *m)
        VERIFY(total_sbmb_cnt > 0);
        if (total_sbmb_cnt > total_sbmb_cnt_peak)
                total_sbmb_cnt_peak = total_sbmb_cnt;
+
+       /*
+        * If data is being appended to the send socket buffer,
+        * update the send byte count
+        */
+       if (!(sb->sb_flags & SB_RECV))
+               OSAddAtomic(cnt, &total_snd_byte_count);
 }
 
 /* adjust counters in sb reflecting freeing of m */
@@ -2401,6 +2450,14 @@ sbfree(struct sockbuf *sb, struct mbuf *m)
        }
        OSAddAtomic(cnt, &total_sbmb_cnt);
        VERIFY(total_sbmb_cnt >= 0);
+
+       /*
+        * If data is being removed from the send socket buffer,
+        * update the send byte count
+        */
+       if (!(sb->sb_flags & SB_RECV)) {
+               OSAddAtomic(cnt, &total_snd_byte_count);
+       }
 }
 
 /*
@@ -2442,7 +2499,7 @@ sblock(struct sockbuf *sb, uint32_t flags)
                 */
                if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
                        panic("%s: SB_LOCK not held for %p\n",
-                        __func__, sb);
+                           __func__, sb);
 
                /* Keep the sockbuf locked */
                return (0);
@@ -2537,25 +2594,25 @@ sbunlock(struct sockbuf *sb, boolean_t keeplocked)
                 * been cleared by sodefunct()
                 */
                if (!(so->so_flags & SOF_DEFUNCT) &&
-                   !(sb->sb_flags & SB_LOCK) &&
+                   !(sb->sb_flags & SB_LOCK) &&
                    !(so->so_state & SS_DEFUNCT) &&
                    !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
                        panic("%s: SB_LOCK not held for %p\n",
-                               __func__, sb);
+                           __func__, sb);
                }
-               /* Keep the sockbuf locked and proceed*/
+               /* Keep the sockbuf locked and proceed */
        } else {
                VERIFY((sb->sb_flags & SB_LOCK) ||
-                      (so->so_state & SS_DEFUNCT) ||
-                      (so->so_flags1 & SOF1_DEFUNCTINPROG));
+                   (so->so_state & SS_DEFUNCT) ||
+                   (so->so_flags1 & SOF1_DEFUNCTINPROG));
 
                sb->sb_flags &= ~SB_LOCK;
 
                if (sb->sb_wantlock > 0) {
                        /*
-                        * We may get here from sorflush(), in which case "sb" may not
-                        * point to the real socket buffer.  Use the actual socket
-                        * buffer address from the socket instead.
+                        * We may get here from sorflush(), in which case "sb"
+                        * may not point to the real socket buffer.  Use the
+                        * actual socket buffer address from the socket instead.
                         */
                        wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
                            &so->so_snd.sb_flags);
@@ -2606,7 +2663,7 @@ soevent(struct socket *so, long hint)
         * Don't post an event if this a subflow socket or
         * the app has opted out of using cellular interface
         */
-       if ((hint & SO_FILT_HINT_IFDENIED) && 
+       if ((hint & SO_FILT_HINT_IFDENIED) &&
            !(so->so_flags & SOF_MP_SUBFLOW) &&
            !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
            !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
@@ -2618,12 +2675,17 @@ soevupcall(struct socket *so, u_int32_t hint)
 {
        if (so->so_event != NULL) {
                caddr_t so_eventarg = so->so_eventarg;
+               int locked = hint & SO_FILT_HINT_LOCKED;
 
                hint &= so->so_eventmask;
                if (hint != 0) {
-                       socket_unlock(so, 0);
+                       if (locked)
+                               socket_unlock(so, 0);
+
                        so->so_event(so, so_eventarg, hint);
-                       socket_lock(so, 0);
+
+                       if (locked)
+                               socket_lock(so, 0);
                }
        }
 }
@@ -2793,7 +2855,7 @@ sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
  * Based on the policy set by an all knowing decison maker, throttle sockets
  * that either have been marked as belonging to "background" process.
  */
-int
+inline int
 soisthrottled(struct socket *so)
 {
        /*
@@ -2804,23 +2866,32 @@ soisthrottled(struct socket *so)
                (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND));
 }
 
-int
+inline int
 soisprivilegedtraffic(struct socket *so)
 {
        return ((so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0);
 }
 
-int
+inline int
 soissrcbackground(struct socket *so)
 {
        return ((so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND) ||
                IS_SO_TC_BACKGROUND(so->so_traffic_class));
 }
 
-int
+inline int
 soissrcrealtime(struct socket *so)
 {
-       return (so->so_traffic_class >= SO_TC_AV);
+       return (so->so_traffic_class >= SO_TC_AV &&
+           so->so_traffic_class <= SO_TC_VO);
+}
+
+inline int
+soissrcbesteffort(struct socket *so)
+{
+       return (so->so_traffic_class == SO_TC_BE ||
+           so->so_traffic_class == SO_TC_RD ||
+           so->so_traffic_class == SO_TC_OAM);
 }
 
 void
index 106e11dc2d906e61e40fda2868a50e74d805c788..f44291282652491235f99d5e12165aab78502774 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -89,6 +89,7 @@
 #include <sys/kauth.h>
 #include <kern/task.h>
 #include <sys/priv.h>
+#include <sys/sysctl.h>
 
 #include <security/audit/audit.h>
 
 #define        DBG_FNC_SENDMSG_X       NETDBG_CODE(DBG_NETSOCK, (11 << 8))
 #define        DBG_FNC_RECVMSG_X       NETDBG_CODE(DBG_NETSOCK, (12 << 8))
 
+#if DEBUG || DEVELOPMENT
+#define        DEBUG_KERNEL_ADDRPERM(_v) (_v)
+#define        DBG_PRINTF(...) printf(__VA_ARGS__)
+#else
+#define        DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
+#define        DBG_PRINTF(...) do { } while (0)
+#endif
 
 /* TODO: should be in header file */
 int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
 
-static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int,
-    int32_t *);
+static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
+    int, int32_t *);
 static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
     int32_t *);
 static int connectit(struct socket *, struct sockaddr *);
@@ -148,19 +156,36 @@ static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
 #endif /* SENDFILE */
 static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
 static int connectitx(struct socket *, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
 static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
 static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
     int *);
 static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
 
 static int internalize_user_msghdr_array(const void *, int, int, u_int,
-                       struct user_msghdr_x *, struct uio **);
+    struct user_msghdr_x *, struct uio **);
 static u_int externalize_user_msghdr_array(void *, int, int, u_int,
-                        const struct user_msghdr_x *, struct uio **);
+    const struct user_msghdr_x *, struct uio **);
 
 static void free_uio_array(struct uio **, u_int);
 static int uio_array_is_valid(struct uio **, u_int);
+static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
+static int internalize_recv_msghdr_array(const void *, int, int,
+    u_int, struct user_msghdr_x *, struct recv_msg_elem *);
+static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
+    const struct user_msghdr_x *, struct recv_msg_elem *);
+static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
+static void free_recv_msg_array(struct recv_msg_elem *, u_int);
+
+SYSCTL_DECL(_kern_ipc);
+
+static u_int somaxsendmsgx = 100;
+SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
+       CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
+static u_int somaxrecvmsgx = 100;
+SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
+       CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
 
 /*
  * System call interface to the socket abstraction.
@@ -250,6 +275,10 @@ socket_common(struct proc *p,
                proc_fdunlock(p);
 
                *retval = fd;
+               if (ENTR_SHOULDTRACE) {
+                       KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
+                           fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
+               }
        }
        return (error);
 }
@@ -601,6 +630,11 @@ releasefd:
 
 out:
        file_drop(fd);
+
+       if (error == 0 && ENTR_SHOULDTRACE) {
+               KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
+                   newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
+       }
        return (error);
 }
 
@@ -608,7 +642,8 @@ int
 accept(struct proc *p, struct accept_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval));
+       return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
+           retval));
 }
 
 /*
@@ -638,7 +673,8 @@ int
 connect(struct proc *p, struct connect_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval));
+       return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
+           retval));
 }
 
 int
@@ -695,11 +731,17 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
 #pragma unused(p, retval)
        struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
        struct socket *so;
-       int error, fd = uap->s;
+       int error, error1, fd = uap->socket;
        boolean_t dgram;
-       connid_t cid = CONNID_ANY;
+       sae_connid_t cid = SAE_CONNID_ANY;
+       struct user32_sa_endpoints ep32;
+       struct user64_sa_endpoints ep64;
+       struct user_sa_endpoints ep;
+       user_ssize_t bytes_written = 0;
+       struct user_iovec *iovp;
+       uio_t auio = NULL;
 
-       AUDIT_ARG(fd, uap->s);
+       AUDIT_ARG(fd, uap->socket);
        error = file_socket(fd, &so);
        if (error != 0)
                return (error);
@@ -708,11 +750,32 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
                goto out;
        }
 
-       /*
-        * XXX Workaround to ensure connectx does not fail because
-        * of unreaped so_error.
-        */
-       so->so_error = 0;
+       if (uap->endpoints == USER_ADDR_NULL) {
+               error = EINVAL;
+               goto out;
+       }
+
+       if (IS_64BIT_PROCESS(p)) {
+               error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
+               if (error != 0)
+                       goto out;
+
+               ep.sae_srcif = ep64.sae_srcif;
+               ep.sae_srcaddr = ep64.sae_srcaddr;
+               ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
+               ep.sae_dstaddr = ep64.sae_dstaddr;
+               ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
+       } else {
+               error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
+               if (error != 0)
+                       goto out;
+
+               ep.sae_srcif = ep32.sae_srcif;
+               ep.sae_srcaddr = ep32.sae_srcaddr;
+               ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
+               ep.sae_dstaddr = ep32.sae_dstaddr;
+               ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
+       }
 
        /*
         * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
@@ -725,27 +788,87 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
         * sockaddr_list for src address for convenience, if present,
         * even though it won't hold more than one.
         */
-       if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so,
-           &src_sl, uap->src, uap->srclen, dgram)) != 0)
+       if (ep.sae_srcaddr != USER_ADDR_NULL && (error = getsockaddrlist(so,
+           &src_sl, (user_addr_t)(caddr_t)ep.sae_srcaddr, ep.sae_srcaddrlen,
+           dgram)) != 0)
                goto out;
 
-       error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram);
+       if (ep.sae_dstaddr == USER_ADDR_NULL) {
+               error = EINVAL;
+               goto out;
+       }
+
+       error = getsockaddrlist(so, &dst_sl, (user_addr_t)(caddr_t)ep.sae_dstaddr,
+           ep.sae_dstaddrlen, dgram);
        if (error != 0)
                goto out;
 
        VERIFY(dst_sl != NULL &&
            !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
 
-       error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope,
-           uap->aid, &cid);
+       if (uap->iov != USER_ADDR_NULL) {
+               /* Verify range before calling uio_create() */
+               if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
+                       return (EINVAL);
+
+               if (uap->len == USER_ADDR_NULL)
+                       return (EINVAL);
+
+               /* allocate a uio to hold the number of iovecs passed */
+               auio = uio_create(uap->iovcnt, 0,
+                   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
+                   UIO_WRITE);
+
+               if (auio == NULL) {
+                       error = ENOMEM;
+                       goto out;
+               }
+
+               /*
+                * get location of iovecs within the uio.
+                * then copyin the iovecs from user space.
+                */
+               iovp = uio_iovsaddr(auio);
+               if (iovp == NULL) {
+                       error = ENOMEM;
+                       goto out;
+               }
+               error = copyin_user_iovec_array(uap->iov,
+                       IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+                       uap->iovcnt, iovp);
+               if (error != 0)
+                       goto out;
+
+               /* finish setup of uio_t */
+               error = uio_calculateresid(auio);
+               if (error != 0) {
+                       goto out;
+               }
+       }
+
+       error = connectitx(so, &src_sl, &dst_sl, p, ep.sae_srcif, uap->associd,
+           &cid, auio, uap->flags, &bytes_written);
        if (error == ERESTART)
                error = EINTR;
 
-       if (uap->cid != USER_ADDR_NULL)
-               (void) copyout(&cid, uap->cid, sizeof (cid));
+       if (uap->len != USER_ADDR_NULL) {
+               error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
+               /* give precedence to connectitx errors */
+               if ((error1 != 0) && (error == 0))
+                       error = error1;
+       }
 
+       if (uap->connid != USER_ADDR_NULL) {
+               error1 = copyout(&cid, uap->connid, sizeof (cid));
+               /* give precedence to connectitx errors */
+               if ((error1 != 0) && (error == 0))
+                       error = error1;
+       }
 out:
        file_drop(fd);
+       if (auio != NULL) {
+               uio_free(auio);
+       }
        if (src_sl != NULL)
                sockaddrlist_free(src_sl);
        if (dst_sl != NULL)
@@ -816,10 +939,12 @@ out:
 static int
 connectitx(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid)
+    sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
+    user_ssize_t *bytes_written)
 {
        struct sockaddr_entry *se;
        int error;
+#pragma unused (flags)
 
        VERIFY(dst_sl != NULL && *dst_sl != NULL);
 
@@ -839,12 +964,50 @@ connectitx(struct socket *so, struct sockaddr_list **src_sl,
                error = EALREADY;
                goto out;
        }
+
+       if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
+           (flags & CONNECT_DATA_IDEMPOTENT))
+               so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
+
+       /*
+        * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
+        * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
+        * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
+        * Case 3 allows user to combine write with connect even if they have
+        * no use for TFO (such as regular TCP, and UDP).
+        * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
+        */
+       if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
+           ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
+               so->so_flags1 |= SOF1_PRECONNECT_DATA;
+
+       /*
+        * If a user sets data idempotent and does not pass an uio, or
+        * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
+        * SOF1_DATA_IDEMPOTENT.
+        */
+       if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
+           (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
+               /* We should return EINVAL instead perhaps. */
+               so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
+       }
+
        error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
-           aid, pcid, 0, NULL, 0);
+           aid, pcid, 0, NULL, 0, auio, bytes_written);
        if (error != 0) {
                so->so_state &= ~SS_ISCONNECTING;
                goto out;
        }
+       /*
+        * If, after the call to soconnectxlocked the flag is still set (in case
+        * data has been queued and the connect() has actually been triggered,
+        * it will have been unset by the transport), we exit immediately. There
+        * is no reason to wait on any event.
+        */
+       if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+               error = 0;
+               goto out;
+       }
        if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
                error = EINPROGRESS;
                goto out;
@@ -1113,7 +1276,7 @@ free1:
  *     sockargs:???
  */
 static int
-sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
+sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
     int flags, int32_t *retval)
 {
        struct mbuf *control = NULL;
@@ -1121,20 +1284,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
        struct sockaddr *to = NULL;
        boolean_t want_free = TRUE;
        int error;
-       struct socket *so;
        user_ssize_t len;
 
        KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
 
-       error = file_socket(s, &so);
-       if (error) {
-               KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
-               return (error);
-       }
-       if (so == NULL) {
-               error = EBADF;
-               goto out;
-       }
        if (mp->msg_name != USER_ADDR_NULL) {
                if (mp->msg_namelen > sizeof (ss)) {
                        error = getsockaddr(so, &to, mp->msg_name,
@@ -1166,9 +1319,9 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
        /*
         * We check the state without holding the socket lock;
         * if a race condition occurs, it would simply result
-        * in an extra call to the MAC check function. 
+        * in an extra call to the MAC check function.
         */
-       if ( to != NULL &&
+       if (to != NULL &&
            !(so->so_state & SS_DEFUNCT) &&
            (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
                goto bad;
@@ -1192,7 +1345,7 @@ bad:
                FREE(to, M_SONAME);
 out:
        KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
-       file_drop(s);
+
        return (error);
 }
 
@@ -1217,6 +1370,7 @@ sendto_nocancel(struct proc *p,
        struct user_msghdr msg;
        int error;
        uio_t auio = NULL;
+       struct socket *so;
 
        KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
        AUDIT_ARG(fd, uap->s);
@@ -1225,7 +1379,8 @@ sendto_nocancel(struct proc *p,
            (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
            UIO_WRITE);
        if (auio == NULL) {
-               return (ENOMEM);
+               error = ENOMEM;
+               goto done;
        }
        uio_addiov(auio, uap->buf, uap->len);
 
@@ -1237,12 +1392,21 @@ sendto_nocancel(struct proc *p,
        msg.msg_control = 0;
        msg.msg_flags = 0;
 
-       error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
+       error = file_socket(uap->s, &so);
+       if (error)
+               goto done;
 
-       if (auio != NULL) {
-               uio_free(auio);
+       if (so == NULL) {
+               error = EBADF;
+       } else {
+               error = sendit(p, so, &msg, auio, uap->flags, retval);
        }
 
+       file_drop(uap->s);
+done:
+       if (auio != NULL)
+               uio_free(auio);
+
        KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
 
        return (error);
@@ -1258,11 +1422,13 @@ int
 sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
+       return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
+           retval));
 }
 
 int
-sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval)
+sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
+    int32_t *retval)
 {
        struct user32_msghdr msg32;
        struct user64_msghdr msg64;
@@ -1272,6 +1438,7 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret
        int error;
        uio_t auio = NULL;
        struct user_iovec *iovp;
+       struct socket *so;
 
        KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
        AUDIT_ARG(fd, uap->s);
@@ -1350,7 +1517,16 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret
        /* msg_flags is ignored for send */
        user_msg.msg_flags = 0;
 
-       error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
+       error = file_socket(uap->s, &so);
+       if (error) {
+               goto done;
+       }
+       if (so == NULL) {
+               error = EBADF;
+       } else {
+               error = sendit(p, so, &user_msg, auio, uap->flags, retval);
+       }
+       file_drop(uap->s);
 done:
        if (auio != NULL) {
                uio_free(auio);
@@ -1364,17 +1540,17 @@ int
 sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
 {
        int error = 0;
-       struct user_msghdr_x *user_msg = NULL;
+       struct user_msghdr_x *user_msg_x = NULL;
        struct uio **uiop = NULL;
        struct socket *so;
        u_int i;
        struct sockaddr *to = NULL;
-       struct mbuf *control = NULL;
        user_ssize_t len_before = 0, len_after;
        int need_drop = 0;
        size_t size_of_msghdr;
        void *umsgp = NULL;
        u_int uiocnt;
+       int has_addr_or_ctl = 0;
 
        KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
 
@@ -1387,11 +1563,6 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
                error = EBADF;
                goto out;
        }
-       if (so->so_proto->pr_usrreqs->pru_sosend_list == NULL) {
-               printf("%s no pru_sosend_list\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
 
        /*
         * Input parameter range check
@@ -1400,17 +1571,23 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
                error = EINVAL;
                goto out;
        }
-       user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), 
+       /*
+        * Clip to max currently allowed
+        */
+       if (uap->cnt > somaxsendmsgx)
+               uap->cnt = somaxsendmsgx;
+
+       user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
                        M_TEMP, M_WAITOK | M_ZERO);
-       if (user_msg == NULL) {
-               printf("%s _MALLOC() user_msg failed\n", __func__);
+       if (user_msg_x == NULL) {
+               DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
                error = ENOMEM;
                goto out;
        }
        uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
                M_TEMP, M_WAITOK | M_ZERO);
        if (uiop == NULL) {
-               printf("%s _MALLOC() uiop failed\n", __func__);
+               DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
                error = ENOMEM;
                goto out;
        }
@@ -1418,23 +1595,23 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
        size_of_msghdr = IS_64BIT_PROCESS(p) ?
                sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
 
-       umsgp = _MALLOC(uap->cnt * size_of_msghdr, 
+       umsgp = _MALLOC(uap->cnt * size_of_msghdr,
                        M_TEMP, M_WAITOK | M_ZERO);
        if (umsgp == NULL) {
-               printf("%s _MALLOC() user_msg failed\n", __func__);
+               printf("%s _MALLOC() user_msg_x failed\n", __func__);
                error = ENOMEM;
                goto out;
        }
        error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
        if (error) {
-               printf("%s copyin() failed\n", __func__);
+               DBG_PRINTF("%s copyin() failed\n", __func__);
                goto out;
        }
        error = internalize_user_msghdr_array(umsgp,
                IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
-               UIO_WRITE, uap->cnt, user_msg, uiop);
+               UIO_WRITE, uap->cnt, user_msg_x, uiop);
        if (error) {
-               printf("%s copyin_user_msghdr_array() failed\n", __func__);
+               DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
                goto out;
        }
        /*
@@ -1450,7 +1627,7 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
         * Sanity check on passed arguments
         */
        for (i = 0; i < uap->cnt; i++) {
-               struct user_msghdr_x *mp = &user_msg[i];
+               struct user_msghdr_x *mp = user_msg_x + i;
 
                /*
                 * No flags on send message
@@ -1462,43 +1639,72 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
                /*
                 * No support for address or ancillary data (yet)
                 */
-               if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
-                       error = EINVAL;
-                       goto out;
-               }
+               if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
+                       has_addr_or_ctl = 1;
+
                if (mp->msg_control != USER_ADDR_NULL ||
-                   mp->msg_controllen != 0) {
-                       error = EINVAL;
-                       goto out;
-               }
+                   mp->msg_controllen != 0)
+                       has_addr_or_ctl = 1;
+
 #if CONFIG_MACF_SOCKET_SUBSET
                /*
                 * We check the state without holding the socket lock;
                 * if a race condition occurs, it would simply result
-                * in an extra call to the MAC check function. 
+                * in an extra call to the MAC check function.
                 *
                 * Note: The following check is never true taken with the
                 * current limitation that we do not accept to pass an address,
-                * this is effectively placeholder code. If we add support for addresses,
-                * we will have to check every address.
+                * this is effectively placeholder code. If we add support for
+                * addresses, we will have to check every address.
                 */
-               if ( to != NULL &&
+               if (to != NULL &&
                    !(so->so_state & SS_DEFUNCT) &&
-                   (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
+                   (error = mac_socket_check_send(kauth_cred_get(), so, to))
+                       != 0)
                        goto out;
 #endif /* MAC_SOCKET_SUBSET */
        }
 
        len_before = uio_array_resid(uiop, uap->cnt);
 
-       error = so->so_proto->pr_usrreqs->pru_sosend_list(so, to, uiop, 
-               uap->cnt, 0, control, uap->flags);
-
+       /*
+        * Feed list of packets at once only for connected socket without
+        * control message
+        */
+       if (so->so_proto->pr_usrreqs->pru_sosend_list !=
+           pru_sosend_list_notsupp &&
+           has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
+               error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
+                   uap->cnt, uap->flags);
+       } else {
+               for (i = 0; i < uap->cnt; i++) {
+                       struct user_msghdr_x *mp = user_msg_x + i;
+                       struct user_msghdr user_msg;
+                       uio_t auio = uiop[i];
+                       int32_t tmpval;
+
+                       user_msg.msg_flags = mp->msg_flags;
+                       user_msg.msg_controllen = mp->msg_controllen;
+                       user_msg.msg_control = mp->msg_control;
+                       user_msg.msg_iovlen = mp->msg_iovlen;
+                       user_msg.msg_iov = mp->msg_iov;
+                       user_msg.msg_namelen = mp->msg_namelen;
+                       user_msg.msg_name = mp->msg_name;
+
+                       error = sendit(p, so, &user_msg, auio, uap->flags,
+                           &tmpval);
+                       if (error != 0)
+                               break;
+               }
+       }
        len_after = uio_array_resid(uiop, uap->cnt);
 
+       VERIFY(len_after <= len_before);
+
        if (error != 0) {
                if (len_after != len_before && (error == ERESTART ||
-                   error == EINTR || error == EWOULDBLOCK))
+                   error == EINTR || error == EWOULDBLOCK ||
+                   error == ENOBUFS))
                        error = 0;
                /* Generation of SIGPIPE can be controlled per socket */
                if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
@@ -1507,7 +1713,7 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
        if (error == 0) {
                uiocnt = externalize_user_msghdr_array(umsgp,
                    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
-                   UIO_WRITE, uap->cnt, user_msg, uiop);
+                   UIO_WRITE, uap->cnt, user_msg_x, uiop);
 
                *retval = (int)(uiocnt);
        }
@@ -1520,14 +1726,130 @@ out:
                free_uio_array(uiop, uap->cnt);
                _FREE(uiop, M_TEMP);
        }
-       if (user_msg != NULL)
-               _FREE(user_msg, M_TEMP);
+       if (user_msg_x != NULL)
+               _FREE(user_msg_x, M_TEMP);
 
        KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
 
        return (error);
 }
 
+
+static int
+copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
+{
+       int error = 0;
+       socklen_t sa_len = 0;
+       ssize_t len;
+
+       len = *namelen;
+       if (len <= 0 || fromsa == 0) {
+               len = 0;
+       } else {
+#ifndef MIN
+#define        MIN(a, b) ((a) > (b) ? (b) : (a))
+#endif
+               sa_len = fromsa->sa_len;
+               len = MIN((unsigned int)len, sa_len);
+               error = copyout(fromsa, name, (unsigned)len);
+               if (error)
+                       goto out;
+       }
+       *namelen = sa_len;
+out:
+       return (0);
+}
+
+static int
+copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
+    socklen_t *controllen, int *flags)
+{
+       int error = 0;
+       ssize_t len;
+       user_addr_t ctlbuf;
+
+       len = *controllen;
+       *controllen = 0;
+       ctlbuf = control;
+
+       while (m && len > 0) {
+               unsigned int tocopy;
+               struct cmsghdr *cp = mtod(m, struct cmsghdr *);
+               int cp_size = CMSG_ALIGN(cp->cmsg_len);
+               int buflen = m->m_len;
+
+               while (buflen > 0 && len > 0) {
+                       /*
+                        * SCM_TIMESTAMP hack because  struct timeval has a
+                        * different size for 32 bits and 64 bits processes
+                        */
+                       if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
+                               unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
+                               struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
+                               int tmp_space;
+                               struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
+
+                               tmp_cp->cmsg_level = SOL_SOCKET;
+                               tmp_cp->cmsg_type = SCM_TIMESTAMP;
+
+                               if (proc_is64bit(p)) {
+                                       struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
+
+                                       tv64->tv_sec = tv->tv_sec;
+                                       tv64->tv_usec = tv->tv_usec;
+
+                                       tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
+                                       tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
+                               } else {
+                                       struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
+
+                                       tv32->tv_sec = tv->tv_sec;
+                                       tv32->tv_usec = tv->tv_usec;
+
+                                       tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
+                                       tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
+                               }
+                               if (len >= tmp_space) {
+                                       tocopy = tmp_space;
+                               } else {
+                                       *flags |= MSG_CTRUNC;
+                                       tocopy = len;
+                               }
+                               error = copyout(tmp_buffer, ctlbuf, tocopy);
+                               if (error)
+                                       goto out;
+                       } else {
+                               if (cp_size > buflen) {
+                                       panic("cp_size > buflen, something"
+                                           "wrong with alignment!");
+                               }
+                               if (len >= cp_size) {
+                                       tocopy = cp_size;
+                               } else {
+                                       *flags |= MSG_CTRUNC;
+                                       tocopy = len;
+                               }
+                               error = copyout((caddr_t) cp, ctlbuf, tocopy);
+                               if (error)
+                                       goto out;
+                       }
+
+                       ctlbuf += tocopy;
+                       len -= tocopy;
+
+                       buflen -= cp_size;
+                       cp = (struct cmsghdr *)(void *)
+                           ((unsigned char *) cp + cp_size);
+                       cp_size = CMSG_ALIGN(cp->cmsg_len);
+               }
+
+               m = m->m_next;
+       }
+       *controllen = ctlbuf - control;
+out:
+       return (error);
+}
+
 /*
  * Returns:    0                       Success
  *             ENOTSOCK
@@ -1556,8 +1878,7 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
 {
        ssize_t len;
        int error;
-       struct mbuf *m, *control = 0;
-       user_addr_t ctlbuf;
+       struct mbuf *control = 0;
        struct socket *so;
        struct sockaddr *fromsa = 0;
        struct fileproc *fp;
@@ -1614,120 +1935,26 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
        }
-
        if (error)
                goto out;
 
        *retval = len - uio_resid(uiop);
-       if (mp->msg_name) {
-               socklen_t sa_len = 0;
 
-               len = mp->msg_namelen;
-               if (len <= 0 || fromsa == 0) {
-                       len = 0;
-               } else {
-#ifndef MIN
-#define        MIN(a, b) ((a) > (b) ? (b) : (a))
-#endif
-                       sa_len = fromsa->sa_len;
-                       len = MIN((unsigned int)len, sa_len);
-                       error = copyout(fromsa, mp->msg_name, (unsigned)len);
-                       if (error)
-                               goto out;
-               }
-               mp->msg_namelen = sa_len;
+       if (mp->msg_name) {
+               error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
+               if (error)
+                       goto out;
                /* return the actual, untruncated address length */
                if (namelenp &&
-                   (error = copyout((caddr_t)&sa_len, namelenp,
+                   (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
                    sizeof (int)))) {
                        goto out;
                }
        }
-       if (mp->msg_control) {
-               len = mp->msg_controllen;
-               m = control;
-               mp->msg_controllen = 0;
-               ctlbuf = mp->msg_control;
-
-               while (m && len > 0) {
-                       unsigned int tocopy;
-                       struct cmsghdr *cp = mtod(m, struct cmsghdr *);
-                       int cp_size = CMSG_ALIGN(cp->cmsg_len);
-                       int buflen = m->m_len;
-
-                       while (buflen > 0 && len > 0) {
-
-                               /* 
-                                SCM_TIMESTAMP hack because  struct timeval has a 
-                                * different size for 32 bits and 64 bits processes
-                                */
-                               if (cp->cmsg_level == SOL_SOCKET &&  cp->cmsg_type == SCM_TIMESTAMP) {
-                                       unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
-                                       struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
-                                       int tmp_space;
-                                       struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
-
-                                       tmp_cp->cmsg_level = SOL_SOCKET;
-                                       tmp_cp->cmsg_type = SCM_TIMESTAMP;
-
-                                       if (proc_is64bit(p)) {
-                                               struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
-
-                                               tv64->tv_sec = tv->tv_sec;
-                                               tv64->tv_usec = tv->tv_usec;
-
-                                               tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
-                                               tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
-                                       } else {
-                                               struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
-
-                                               tv32->tv_sec = tv->tv_sec;
-                                               tv32->tv_usec = tv->tv_usec;
-
-                                               tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
-                                               tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
-                                       }
-                                       if (len >= tmp_space) {
-                                               tocopy = tmp_space;
-                                       } else {
-                                               mp->msg_flags |= MSG_CTRUNC;
-                                               tocopy = len;
-                                       }
-                                       error = copyout(tmp_buffer, ctlbuf, tocopy);
-                                       if (error)
-                                               goto out;
-
-                               } else {
-
-                                       if (cp_size > buflen) {
-                                               panic("cp_size > buflen, something wrong with alignment!");
-                                       }
-
-                                       if (len >= cp_size) {
-                                               tocopy = cp_size;
-                                       } else {
-                                               mp->msg_flags |= MSG_CTRUNC;
-                                               tocopy = len;
-                                       }
-
-                                       error = copyout((caddr_t) cp, ctlbuf,
-                                                                       tocopy);
-                                       if (error)
-                                               goto out;
-                               }
-
-
-                               ctlbuf += tocopy;
-                               len -= tocopy;
-
-                               buflen -= cp_size;
-                               cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size);
-                               cp_size = CMSG_ALIGN(cp->cmsg_len);
-                       }
 
-                       m = m->m_next;
-               }
-               mp->msg_controllen = ctlbuf - mp->msg_control;
+       if (mp->msg_control) {
+               error = copyout_control(p, control, mp->msg_control,
+                   &mp->msg_controllen, &mp->msg_flags);
        }
 out:
        if (fromsa)
@@ -1759,11 +1986,13 @@ int
 recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval));
+       return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
+           retval));
 }
 
 int
-recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval)
+recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
+    int32_t *retval)
 {
        struct user_msghdr msg;
        int error;
@@ -1820,11 +2049,13 @@ int
 recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
 {
        __pthread_testcancel(1);
-       return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval));
+       return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
+           retval));
 }
 
 int
-recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval)
+recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
+    int32_t *retval)
 {
        struct user32_msghdr msg32;
        struct user64_msghdr msg64;
@@ -1944,8 +2175,8 @@ int
 recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
 {
        int error = EOPNOTSUPP;
-       struct user_msghdr_x *user_msg = NULL;
-       struct uio **uiop = NULL;
+       struct user_msghdr_x *user_msg_x = NULL;
+       struct recv_msg_elem *recv_msg_array = NULL;
        struct socket *so;
        user_ssize_t len_before = 0, len_after;
        int need_drop = 0;
@@ -1965,12 +2196,6 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
                error = EBADF;
                goto out;
        }
-       if (so->so_proto->pr_usrreqs->pru_soreceive_list == NULL) {
-               printf("%s no pru_soreceive_list\n", __func__);
-               error = EOPNOTSUPP;
-               goto out;
-       }
-
        /*
         * Input parameter range check
         */
@@ -1978,73 +2203,61 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
                error = EINVAL;
                goto out;
        }
-       user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), 
+       if (uap->cnt > somaxrecvmsgx)
+               uap->cnt = somaxrecvmsgx;
+
+       user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
            M_TEMP, M_WAITOK | M_ZERO);
-       if (user_msg == NULL) {
-               printf("%s _MALLOC() user_msg failed\n", __func__);
+       if (user_msg_x == NULL) {
+               DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
                error = ENOMEM;
                goto out;
        }
-       uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
-           M_TEMP, M_WAITOK | M_ZERO);
-       if (uiop == NULL) {
-               printf("%s _MALLOC() uiop failed\n", __func__);
+       recv_msg_array = alloc_recv_msg_array(uap->cnt);
+       if (recv_msg_array == NULL) {
+               DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
                error = ENOMEM;
                goto out;
        }
-
        size_of_msghdr = IS_64BIT_PROCESS(p) ?
            sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
 
        umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
        if (umsgp == NULL) {
-               printf("%s _MALLOC() user_msg failed\n", __func__);
+               DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
                error = ENOMEM;
                goto out;
        }
        error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
        if (error) {
-               printf("%s copyin() failed\n", __func__);
+               DBG_PRINTF("%s copyin() failed\n", __func__);
                goto out;
        }
-       error = internalize_user_msghdr_array(umsgp,
+       error = internalize_recv_msghdr_array(umsgp,
            IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
-           UIO_READ, uap->cnt, user_msg, uiop);
+           UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
        if (error) {
-               printf("%s copyin_user_msghdr_array() failed\n", __func__);
+               DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
                goto out;
        }
        /*
         * Make sure the size of each message iovec and
         * the aggregate size of all the iovec is valid
         */
-       if (uio_array_is_valid(uiop, uap->cnt) == 0) {
+       if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
                error = EINVAL;
                goto out;
        }
-
        /*
         * Sanity check on passed arguments
         */
        for (i = 0; i < uap->cnt; i++) {
-               struct user_msghdr_x *mp = &user_msg[i];
+               struct user_msghdr_x *mp = user_msg_x + i;
 
                if (mp->msg_flags != 0) {
                        error = EINVAL;
                        goto out;
                }
-               /*
-                * No support for address or ancillary data (yet)
-                */
-               if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) {
-                       error = EINVAL;
-                       goto out;
-               }
-               if (mp->msg_control != USER_ADDR_NULL ||
-                   mp->msg_controllen != 0) {
-                       error = EINVAL;
-                       goto out;
-               }
        }
 #if CONFIG_MACF_SOCKET_SUBSET
        /*
@@ -2059,44 +2272,107 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
                goto out;
 #endif /* MAC_SOCKET_SUBSET */
 
-       len_before = uio_array_resid(uiop, uap->cnt);
+       len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
 
-       error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, NULL, uiop,
-           uap->cnt, (struct mbuf **)0, NULL, NULL);
+       if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
+           pru_soreceive_list_notsupp &&
+           somaxrecvmsgx == 0) {
+               error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
+                   recv_msg_array, uap->cnt, &uap->flags);
+       } else {
+               int flags = uap->flags;
 
-       len_after = uio_array_resid(uiop, uap->cnt);
+               for (i = 0; i < uap->cnt; i++) {
+                       struct recv_msg_elem *recv_msg_elem;
+                       uio_t auio;
+                       struct sockaddr **psa;
+                       struct mbuf **controlp;
+
+                       recv_msg_elem = recv_msg_array + i;
+                       auio = recv_msg_elem->uio;
+
+                       /*
+                        * Do not block if we got at least one packet
+                        */
+                       if (i > 0)
+                               flags |= MSG_DONTWAIT;
+
+                       psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
+                           &recv_msg_elem->psa : NULL;
+                       controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
+                           &recv_msg_elem->controlp : NULL;
+
+                       error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
+                           auio, (struct mbuf **)0, controlp, &flags);
+                       if (error)
+                               break;
+                       /*
+                        * We have some data
+                        */
+                       recv_msg_elem->which |= SOCK_MSG_DATA;
+                       /*
+                        * Stop on partial copy
+                        */
+                       if (flags & (MSG_RCVMORE | MSG_TRUNC))
+                               break;
+               }
+               if ((uap->flags & MSG_DONTWAIT) == 0)
+                       flags &= ~MSG_DONTWAIT;
+               uap->flags = flags;
+       }
+
+       len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
 
        if (error) {
                if (len_after != len_before && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
+               else
+                       goto out;
        }
-       if (error == 0) {
-               uiocnt = externalize_user_msghdr_array(umsgp,
-                   IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
-                   UIO_READ, uap->cnt, user_msg, uiop);
 
-               error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
-               if (error) {
-                       printf("%s copyout() failed\n", __func__);
-                       goto out;
+       uiocnt = externalize_recv_msghdr_array(umsgp,
+           IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+           UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
+
+       error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
+       if (error) {
+               DBG_PRINTF("%s copyout() failed\n", __func__);
+               goto out;
+       }
+       *retval = (int)(uiocnt);
+
+       for (i = 0; i < uap->cnt; i++) {
+               struct user_msghdr_x *mp = user_msg_x + i;
+               struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+               struct sockaddr *fromsa = recv_msg_elem->psa;
+
+               if (mp->msg_name) {
+                       error = copyout_sa(fromsa, mp->msg_name,
+                           &mp->msg_namelen);
+                       if (error)
+                               goto out;
+               }
+               if (mp->msg_control) {
+                       error = copyout_control(p, recv_msg_elem->controlp,
+                           mp->msg_control, &mp->msg_controllen,
+                           &mp->msg_flags);
+                       if (error)
+                               goto out;
                }
-               *retval = (int)(uiocnt);
        }
 out:
        if (need_drop)
                file_drop(uap->s);
        if (umsgp != NULL)
                _FREE(umsgp, M_TEMP);
-       if (uiop != NULL) {
-               free_uio_array(uiop, uap->cnt);
-               _FREE(uiop, M_TEMP);
-       }
-       if (user_msg != NULL)
-               _FREE(user_msg, M_TEMP);
-               
+       if (recv_msg_array != NULL)
+               free_recv_msg_array(recv_msg_array, uap->cnt);
+       if (user_msg_x != NULL)
+               _FREE(user_msg_x, M_TEMP);
+
        KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
-       
+
        return (error);
 }
 
@@ -2419,16 +2695,20 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
 
        size_t alloc_buflen = (size_t)buflen;
 
-       if(alloc_buflen > INT_MAX/2) 
+       if (alloc_buflen > INT_MAX/2)
                return (EINVAL);
 #ifdef __LP64__
-       /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
-       if(type == MT_CONTROL)
-               alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
+       /*
+        * The fd's in the buffer must expand to be pointers, thus we need twice
+        * as much space
+        */
+       if (type == MT_CONTROL)
+               alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
+                   sizeof(struct cmsghdr);
 #endif
        if (alloc_buflen > MLEN) {
                if (type == MT_SONAME && alloc_buflen <= 112)
-                       alloc_buflen = MLEN;            /* unix domain compat. hack */
+                       alloc_buflen = MLEN;    /* unix domain compat. hack */
                else if (alloc_buflen > MCLBYTES)
                        return (EINVAL);
        }
@@ -2442,8 +2722,10 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
                        return (ENOBUFS);
                }
        }
-       /* K64: We still copyin the original buflen because it gets expanded later
-        * and we lie about the size of the mbuf because it only affects unp_* functions
+       /*
+        * K64: We still copyin the original buflen because it gets expanded
+        * later and we lie about the size of the mbuf because it only affects
+        * unp_* functions
         */
        m->m_len = buflen;
        error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
@@ -2623,20 +2905,22 @@ getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
 
 int
 internalize_user_msghdr_array(const void *src, int spacetype, int direction,
-       u_int count, struct user_msghdr_x *dst, struct uio **uiop)
+    u_int count, struct user_msghdr_x *dst, struct uio **uiop)
 {
        int error = 0;
        u_int i;
+       u_int namecnt = 0;
+       u_int ctlcnt = 0;
 
        for (i = 0; i < count; i++) {
                uio_t auio;
                struct user_iovec *iovp;
-               struct user_msghdr_x *user_msg = &dst[i];
+               struct user_msghdr_x *user_msg = dst + i;
 
                if (spacetype == UIO_USERSPACE64) {
-                       struct user64_msghdr_x *msghdr64;
+                       const struct user64_msghdr_x *msghdr64;
 
-                       msghdr64 = ((struct user64_msghdr_x *)src) + i;
+                       msghdr64 = ((const struct user64_msghdr_x *)src) + i;
 
                        user_msg->msg_name = msghdr64->msg_name;
                        user_msg->msg_namelen = msghdr64->msg_namelen;
@@ -2647,9 +2931,9 @@ internalize_user_msghdr_array(const void *src, int spacetype, int direction,
                        user_msg->msg_flags = msghdr64->msg_flags;
                        user_msg->msg_datalen = msghdr64->msg_datalen;
                } else {
-                       struct user32_msghdr_x *msghdr32;
+                       const struct user32_msghdr_x *msghdr32;
 
-                       msghdr32 = ((struct user32_msghdr_x *)src) + i;
+                       msghdr32 = ((const struct user32_msghdr_x *)src) + i;
 
                        user_msg->msg_name = msghdr32->msg_name;
                        user_msg->msg_namelen = msghdr32->msg_namelen;
@@ -2660,45 +2944,128 @@ internalize_user_msghdr_array(const void *src, int spacetype, int direction,
                        user_msg->msg_flags = msghdr32->msg_flags;
                        user_msg->msg_datalen = msghdr32->msg_datalen;
                }
-               
-               if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) {
+
+               if (user_msg->msg_iovlen <= 0 ||
+                   user_msg->msg_iovlen > UIO_MAXIOV) {
                        error = EMSGSIZE;
                        goto done;
                }
-               auio = uio_create(user_msg->msg_iovlen, 0, spacetype, direction);
+               auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
+                   direction);
                if (auio == NULL) {
                        error = ENOMEM;
                        goto done;
                }
                uiop[i] = auio;
 
-               if (user_msg->msg_iovlen) {
-                       iovp = uio_iovsaddr(auio);
-                       if (iovp == NULL) {
-                               error = ENOMEM;
-                               goto done;
-                       }
-                       error = copyin_user_iovec_array(user_msg->msg_iov,
-                               spacetype, user_msg->msg_iovlen, iovp);
-                        if (error)
-                               goto done;
-                       user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
+               iovp = uio_iovsaddr(auio);
+               if (iovp == NULL) {
+                       error = ENOMEM;
+                       goto done;
+               }
+               error = copyin_user_iovec_array(user_msg->msg_iov,
+                       spacetype, user_msg->msg_iovlen, iovp);
+               if (error)
+                       goto done;
+               user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
 
-                       error = uio_calculateresid(auio);
-                       if (error)
-                               goto done;
-                       user_msg->msg_datalen = uio_resid(auio);
+               error = uio_calculateresid(auio);
+               if (error)
+                       goto done;
+               user_msg->msg_datalen = uio_resid(auio);
+
+               if (user_msg->msg_name && user_msg->msg_namelen)
+                       namecnt++;
+               if (user_msg->msg_control && user_msg->msg_controllen)
+                       ctlcnt++;
+       }
+done:
+
+       return (error);
+}
+
+int
+internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
+    u_int count, struct user_msghdr_x *dst,
+    struct recv_msg_elem *recv_msg_array)
+{
+       int error = 0;
+       u_int i;
+
+       for (i = 0; i < count; i++) {
+               struct user_iovec *iovp;
+               struct user_msghdr_x *user_msg = dst + i;
+               struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+               if (spacetype == UIO_USERSPACE64) {
+                       const struct user64_msghdr_x *msghdr64;
+
+                       msghdr64 = ((const struct user64_msghdr_x *)src) + i;
+
+                       user_msg->msg_name = msghdr64->msg_name;
+                       user_msg->msg_namelen = msghdr64->msg_namelen;
+                       user_msg->msg_iov = msghdr64->msg_iov;
+                       user_msg->msg_iovlen = msghdr64->msg_iovlen;
+                       user_msg->msg_control = msghdr64->msg_control;
+                       user_msg->msg_controllen = msghdr64->msg_controllen;
+                       user_msg->msg_flags = msghdr64->msg_flags;
+                       user_msg->msg_datalen = msghdr64->msg_datalen;
                } else {
-                       user_msg->msg_datalen = 0;
+                       const struct user32_msghdr_x *msghdr32;
+
+                       msghdr32 = ((const struct user32_msghdr_x *)src) + i;
+
+                       user_msg->msg_name = msghdr32->msg_name;
+                       user_msg->msg_namelen = msghdr32->msg_namelen;
+                       user_msg->msg_iov = msghdr32->msg_iov;
+                       user_msg->msg_iovlen = msghdr32->msg_iovlen;
+                       user_msg->msg_control = msghdr32->msg_control;
+                       user_msg->msg_controllen = msghdr32->msg_controllen;
+                       user_msg->msg_flags = msghdr32->msg_flags;
+                       user_msg->msg_datalen = msghdr32->msg_datalen;
                }
+
+               if (user_msg->msg_iovlen <= 0 ||
+                   user_msg->msg_iovlen > UIO_MAXIOV) {
+                       error = EMSGSIZE;
+                       goto done;
+               }
+               recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
+                   spacetype, direction);
+               if (recv_msg_elem->uio == NULL) {
+                       error = ENOMEM;
+                       goto done;
+               }
+
+               iovp = uio_iovsaddr(recv_msg_elem->uio);
+               if (iovp == NULL) {
+                       error = ENOMEM;
+                       goto done;
+               }
+               error = copyin_user_iovec_array(user_msg->msg_iov,
+                       spacetype, user_msg->msg_iovlen, iovp);
+               if (error)
+                       goto done;
+               user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
+
+               error = uio_calculateresid(recv_msg_elem->uio);
+               if (error)
+                       goto done;
+               user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
+
+               if (user_msg->msg_name && user_msg->msg_namelen)
+                       recv_msg_elem->which |= SOCK_MSG_SA;
+               if (user_msg->msg_control && user_msg->msg_controllen)
+                       recv_msg_elem->which |= SOCK_MSG_CONTROL;
        }
 done:
+
        return (error);
 }
 
 u_int
 externalize_user_msghdr_array(void *dst, int spacetype, int direction,
-        u_int count, const struct user_msghdr_x *src, struct uio **uiop)
+    u_int count, const struct user_msghdr_x *src, struct uio **uiop)
 {
 #pragma unused(direction)
        u_int i;
@@ -2706,13 +3073,60 @@ externalize_user_msghdr_array(void *dst, int spacetype, int direction,
        u_int retcnt = 0;
 
        for (i = 0; i < count; i++) {
-               const struct user_msghdr_x *user_msg = &src[i];
+               const struct user_msghdr_x *user_msg = src + i;
                uio_t auio = uiop[i];
                user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
 
                if (user_msg->msg_datalen != 0 && len == 0)
                        seenlast = 1;
-               
+
+               if (seenlast == 0)
+                       retcnt ++;
+
+               if (spacetype == UIO_USERSPACE64) {
+                       struct user64_msghdr_x *msghdr64;
+
+                       msghdr64 = ((struct user64_msghdr_x *)dst) + i;
+
+                       msghdr64->msg_flags = user_msg->msg_flags;
+                       msghdr64->msg_datalen = len;
+
+               } else {
+                       struct user32_msghdr_x *msghdr32;
+
+                       msghdr32 = ((struct user32_msghdr_x *)dst) + i;
+
+                       msghdr32->msg_flags = user_msg->msg_flags;
+                       msghdr32->msg_datalen = len;
+               }
+       }
+       return (retcnt);
+}
+
+u_int
+externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
+    u_int count, const struct user_msghdr_x *src,
+    struct recv_msg_elem *recv_msg_array)
+{
+       u_int i;
+       int seenlast = 0;
+       u_int retcnt = 0;
+
+       for (i = 0; i < count; i++) {
+               const struct user_msghdr_x *user_msg = src + i;
+               struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+               user_ssize_t len;
+
+               len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
+
+               if (direction == UIO_READ) {
+                       if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
+                               seenlast = 1;
+               } else {
+                       if (user_msg->msg_datalen != 0 && len == 0)
+                               seenlast = 1;
+               }
+
                if (seenlast == 0)
                        retcnt ++;
 
@@ -2723,7 +3137,7 @@ externalize_user_msghdr_array(void *dst, int spacetype, int direction,
 
                        msghdr64->msg_flags = user_msg->msg_flags;
                        msghdr64->msg_datalen = len;
-                               
+
                } else {
                        struct user32_msghdr_x *msghdr32;
 
@@ -2756,7 +3170,7 @@ uio_array_resid(struct uio **uiop, u_int count)
        for (i = 0; i < count; i++) {
                struct uio *auio = uiop[i];
 
-               if (auio!= NULL)
+               if (auio != NULL)
                        len += uio_resid(auio);
        }
        return (len);
@@ -2770,17 +3184,90 @@ uio_array_is_valid(struct uio **uiop, u_int count)
 
        for (i = 0; i < count; i++) {
                struct uio *auio = uiop[i];
-               
+
                if (auio != NULL) {
                        user_ssize_t resid = uio_resid(auio);
-                       
+
                        /*
                         * Sanity check on the validity of the iovec:
                         * no point of going over sb_max
                         */
                        if (resid < 0 || (u_int32_t)resid > sb_max)
                                return (0);
-                               
+
+                       len += resid;
+                       if (len < 0 || (u_int32_t)len > sb_max)
+                               return (0);
+               }
+       }
+       return (1);
+}
+
+
+struct recv_msg_elem *
+alloc_recv_msg_array(u_int count)
+{
+       struct recv_msg_elem *recv_msg_array;
+
+       recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
+           M_TEMP, M_WAITOK | M_ZERO);
+
+       return (recv_msg_array);
+}
+
+void
+free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
+{
+       u_int i;
+
+       for (i = 0; i < count; i++) {
+               struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+               if (recv_msg_elem->uio != NULL)
+                       uio_free(recv_msg_elem->uio);
+               if (recv_msg_elem->psa != NULL)
+                       _FREE(recv_msg_elem->psa, M_TEMP);
+               if (recv_msg_elem->controlp != NULL)
+                       m_freem(recv_msg_elem->controlp);
+       }
+       _FREE(recv_msg_array, M_TEMP);
+}
+
+
+__private_extern__ user_ssize_t
+recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
+{
+       user_ssize_t len = 0;
+       u_int i;
+
+       for (i = 0; i < count; i++) {
+               struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+               if (recv_msg_elem->uio != NULL)
+                       len += uio_resid(recv_msg_elem->uio);
+       }
+       return (len);
+}
+
+int
+recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
+{
+       user_ssize_t len = 0;
+       u_int i;
+
+       for (i = 0; i < count; i++) {
+               struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+               if (recv_msg_elem->uio != NULL) {
+                       user_ssize_t resid = uio_resid(recv_msg_elem->uio);
+
+                       /*
+                        * Sanity check on the validity of the iovec:
+                        * no point of going over sb_max
+                        */
+                       if (resid < 0 || (u_int32_t)resid > sb_max)
+                               return (0);
+
                        len += resid;
                        if (len < 0 || (u_int32_t)len > sb_max)
                                return (0);
@@ -2794,11 +3281,11 @@ uio_array_is_valid(struct uio **uiop, u_int count)
 #define        SFUIOBUFS 64
 
 /* Macros to compute the number of mbufs needed depending on cluster size */
-#define        HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
-#define        HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
+#define        HOWMANY_16K(n)  ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
+#define        HOWMANY_4K(n)   ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
 
 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
-#define SENDFILE_MAX_BYTES     (SFUIOBUFS << PGSHIFT)
+#define        SENDFILE_MAX_BYTES      (SFUIOBUFS << PGSHIFT)
 
 /* Upper send limit in the number of mbuf clusters */
 #define        SENDFILE_MAX_16K        HOWMANY_16K(SENDFILE_MAX_BYTES)
@@ -2871,13 +3358,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
        size_t sizeof_hdtr;
        off_t file_size;
        struct vfs_context context = *vfs_context_current();
-#define ENXIO_10146739_DBG(err_str) {  \
-       if (error == ENXIO) {           \
-               printf(err_str,         \
-               __func__,               \
-               "File a radar related to rdar://10146739 \n");  \
-       }                               \
-}
+
        KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
            0, 0, 0, 0);
 
@@ -2889,7 +3370,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
         * type and connected socket out, positive offset.
         */
        if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
-               ENXIO_10146739_DBG("%s: fp_getfvp error. %s"); 
                goto done;
        }
        if ((fp->f_flag & FREAD) == 0) {
@@ -2902,7 +3382,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
        }
        error = file_socket(uap->s, &so);
        if (error) {
-               ENXIO_10146739_DBG("%s: file_socket error. %s");
                goto done1;
        }
        if (so == NULL) {
@@ -2986,7 +3465,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
                        nuap.iovcnt = user_hdtr.hdr_cnt;
                        error = writev_nocancel(p, &nuap, &writev_retval);
                        if (error) {
-                               ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
                                goto done2;
                        }
                        sbytes += writev_retval;
@@ -2999,7 +3477,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
         *  2. We don't want to read past the end of file
         */
        if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
-               ENXIO_10146739_DBG("%s: vnode_size error. %s");
                goto done2;
        }
 
@@ -3113,7 +3590,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
                            error == EINTR || error == EWOULDBLOCK)) {
                                error = 0;
                        } else {
-                               ENXIO_10146739_DBG("%s: fo_read error. %s");
                                mbuf_freem(m0);
                                goto done3;
                        }
@@ -3124,7 +3600,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
                    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
 
                if (xfsize == 0) {
-                       //printf("sendfile: fo_read 0 bytes, EOF\n");
+                       // printf("sendfile: fo_read 0 bytes, EOF\n");
                        break;
                }
                if (xfsize + off > file_size)
@@ -3163,7 +3639,6 @@ retry_space:
                                so->so_error = 0;
                        }
                        m_freem(m0);
-                       ENXIO_10146739_DBG("%s: Unexpected socket error. %s");
                        goto done3;
                }
                /*
@@ -3206,7 +3681,6 @@ retry_space:
                                        error = 0;
                                        continue;
                                }
-                               ENXIO_10146739_DBG("%s: sflt_data_out error. %s");
                                goto done3;
                        }
                        /*
@@ -3220,7 +3694,6 @@ retry_space:
                KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
                    uap->s, 0, 0, 0, 0);
                if (error) {
-                       ENXIO_10146739_DBG("%s: pru_send error. %s");
                        goto done3;
                }
        }
@@ -3236,7 +3709,6 @@ retry_space:
                nuap.iovcnt = user_hdtr.trl_cnt;
                error = writev_nocancel(p, &nuap, &writev_retval);
                if (error) {
-                       ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
                        goto done2;
                }
                sbytes += writev_retval;
index 71c4fce53769ac5ee48fa7c096b94dde26c623ec..cfe63ef281a3b34ea43ff97511598d2d42a8ae22 100644 (file)
@@ -89,6 +89,7 @@
 #include <sys/unpcb.h>
 #include <sys/vnode_internal.h>
 #include <sys/kdebug.h>
+#include <sys/mcache.h>
 
 #include <kern/zalloc.h>
 #include <kern/locks.h>
 
 #include <mach/vm_param.h>
 
+/*
+ * Maximum number of FDs that can be passed in an mbuf
+ */
+#define UIPC_MAX_CMSG_FD       512
+
 #define        f_msgcount f_fglob->fg_msgcount
 #define        f_cred f_fglob->fg_cred
 #define        f_ops f_fglob->fg_ops
@@ -166,10 +172,9 @@ static void        unp_disconnect(struct unpcb *);
 static void    unp_shutdown(struct unpcb *);
 static void    unp_drop(struct unpcb *, int);
 __private_extern__ void        unp_gc(void);
-static void    unp_scan(struct mbuf *, void (*)(struct fileglob *));
-static void    unp_mark(struct fileglob *);
-static void    unp_discard(struct fileglob *);
-static void    unp_discard_fdlocked(struct fileglob *, proc_t);
+static void    unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
+static void    unp_mark(struct fileglob *, __unused void *);
+static void    unp_discard(struct fileglob *, void *);
 static int     unp_internalize(struct mbuf *, proc_t);
 static int     unp_listen(struct unpcb *, proc_t);
 static void    unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
@@ -1870,9 +1875,16 @@ unp_externalize(struct mbuf *rights)
        struct fileglob **rp = (struct fileglob **)(cm + 1);
        int *fds = (int *)(cm + 1);
        struct fileproc *fp;
-       struct fileglob *fg;
+       struct fileglob **fgl;
        int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
-       int f;
+       int f, error = 0;
+
+       MALLOC(fgl, struct fileglob **, newfds * sizeof (struct fileglob *),
+               M_TEMP, M_WAITOK);
+       if (fgl == NULL) {
+               error = ENOMEM;
+               goto discard;
+       }
 
        proc_fdlock(p);
 
@@ -1880,14 +1892,9 @@ unp_externalize(struct mbuf *rights)
         * if the new FD's will not fit, then we free them all
         */
        if (!fdavail(p, newfds)) {
-               for (i = 0; i < newfds; i++) {
-                       fg = *rp;
-                       unp_discard_fdlocked(fg, p);
-                       *rp++ = NULL;
-               }
                proc_fdunlock(p);
-
-               return (EMSGSIZE);
+               error = EMSGSIZE;
+               goto discard;
        }
        /*
         * now change each pointer to an fd in the global table to
@@ -1903,34 +1910,55 @@ unp_externalize(struct mbuf *rights)
                 * If receive access is denied, don't pass along
                 * and error message, just discard the descriptor.
                 */
-               if (mac_file_check_receive(kauth_cred_get(), *rp)) {
-                       fg = *rp;
-                       *rp++ = 0;
-                       unp_discard_fdlocked(fg, p);
+               if (mac_file_check_receive(kauth_cred_get(), rp[i])) {
+                       proc_fdunlock(p);
+                       unp_discard(rp[i], p);
+                       fds[i] = 0;
+                       proc_fdlock(p);
                        continue;
                }
 #endif
                if (fdalloc(p, 0, &f))
                        panic("unp_externalize:fdalloc");
-               fg = rp[i];
                fp = fileproc_alloc_init(NULL);
                if (fp == NULL)
                        panic("unp_externalize: MALLOC_ZONE");
                fp->f_iocount = 0;
-               fp->f_fglob = fg;
-               fg_removeuipc(fg);
+               fp->f_fglob = rp[i];
+               if (fg_removeuipc_mark(rp[i]))
+                       fgl[i] = rp[i];
+               else
+                       fgl[i] = NULL;
                procfdtbl_releasefd(p, f, fp);
-               (void) OSAddAtomic(-1, &unp_rights);
                fds[i] = f;
        }
        proc_fdunlock(p);
 
-       return (0);
+       for (i = 0; i < newfds; i++) {
+               if (fgl[i] != NULL) {
+                       VERIFY(fgl[i]->fg_lflags & FG_RMMSGQ);
+                       fg_removeuipc(fgl[i]);
+               }
+               if (fds[i])
+                       (void) OSAddAtomic(-1, &unp_rights);
+       }
+
+discard:
+       if (fgl)
+               FREE(fgl, M_TEMP);
+       if (error) {
+               for (i = 0; i < newfds; i++) {
+                       unp_discard(*rp, p);
+                       *rp++ = NULL;
+               }
+       }
+       return (error);
 }
 
 void
 unp_init(void)
 {
+       _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
        unp_zone = zinit(sizeof (struct unpcb),
            (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
 
@@ -1979,6 +2007,7 @@ unp_internalize(struct mbuf *control, proc_t p)
        struct fileproc *fp;
        int i, error;
        int oldfds;
+       uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
 
        /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
@@ -1986,6 +2015,7 @@ unp_internalize(struct mbuf *control, proc_t p)
                return (EINVAL);
        }
        oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
+       bzero(fg_ins, sizeof(fg_ins));
 
        proc_fdlock(p);
        fds = (int *)(cm + 1);
@@ -1995,7 +2025,7 @@ unp_internalize(struct mbuf *control, proc_t p)
                if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) {
                        proc_fdunlock(p);
                        return (error);
-               } else if (!filetype_issendable(FILEGLOB_DTYPE(tmpfp->f_fglob))) {
+               } else if (!file_issendable(p, tmpfp)) {
                        proc_fdunlock(p);
                        return (EINVAL);
                } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) {
@@ -2012,12 +2042,20 @@ unp_internalize(struct mbuf *control, proc_t p)
         */
        for (i = (oldfds - 1); i >= 0; i--) {
                (void) fdgetf_noref(p, fds[i], &fp);
-               fg_insertuipc(fp->f_fglob);
+               if (fg_insertuipc_mark(fp->f_fglob))
+                       fg_ins[i / 8] |= 0x80 >> (i % 8);
                rp[i] = fp->f_fglob;
-               (void) OSAddAtomic(1, &unp_rights);
        }
        proc_fdunlock(p);
 
+       for (i = 0; i < oldfds; i++) {
+               if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
+                       VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
+                       fg_insertuipc(rp[i]);
+               }
+               (void) OSAddAtomic(1, &unp_rights);
+       }
+
        return (0);
 }
 
@@ -2152,7 +2190,7 @@ unp_gc(void)
                         */
                        lck_mtx_unlock(&fg->fg_lock);
 
-                       unp_scan(so->so_rcv.sb_mb, unp_mark);
+                       unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                }
        } while (unp_defer);
        /*
@@ -2265,7 +2303,7 @@ void
 unp_dispose(struct mbuf *m)
 {
        if (m) {
-               unp_scan(m, unp_discard);
+               unp_scan(m, unp_discard, NULL);
        }
 }
 
@@ -2283,7 +2321,7 @@ unp_listen(struct unpcb *unp, proc_t p)
 }
 
 static void
-unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
+unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
 {
        struct mbuf *m;
        struct fileglob **rp;
@@ -2303,7 +2341,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
                                    sizeof (int);
                                rp = (struct fileglob **)(cm + 1);
                                for (i = 0; i < qfds; i++)
-                                       (*op)(*rp++);
+                                       (*op)(*rp++, arg);
                                break;          /* XXX, but saves time */
                        }
                m0 = m0->m_act;
@@ -2311,7 +2349,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
 }
 
 static void
-unp_mark(struct fileglob *fg)
+unp_mark(struct fileglob *fg, __unused void *arg)
 {
        lck_mtx_lock(&fg->fg_lock);
 
@@ -2327,23 +2365,21 @@ unp_mark(struct fileglob *fg)
 }
 
 static void
-unp_discard(struct fileglob *fg)
+unp_discard(struct fileglob *fg, void *p)
 {
-       proc_t p = current_proc();              /* XXX */
+       if (p == NULL)
+               p = current_proc();             /* XXX */
 
        (void) OSAddAtomic(1, &unp_disposed);
+       if (fg_removeuipc_mark(fg)) {
+               VERIFY(fg->fg_lflags & FG_RMMSGQ);
+               fg_removeuipc(fg);
+       }
+       (void) OSAddAtomic(-1, &unp_rights);
 
        proc_fdlock(p);
-       unp_discard_fdlocked(fg, p);
-       proc_fdunlock(p);
-}
-static void
-unp_discard_fdlocked(struct fileglob *fg, proc_t p)
-{
-       fg_removeuipc(fg);
-
-       (void) OSAddAtomic(-1, &unp_rights);
        (void) closef_locked((struct fileproc *)0, fg, p);
+       proc_fdunlock(p);
 }
 
 int
index 27c1aed10f9b3d8ae9469f4acfa95517a909a6c4..028411c9ab2b3544bd4cce1c7f2ec251c7e9f90b 100644 (file)
@@ -360,7 +360,7 @@ void vm_find_pressure_candidate(void)
                goto exit;
        }
 
-       VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0);
+       VM_DEBUG_CONSTANT_EVENT(vm_pressure_event, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0);
        VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
 
        KNOTE_DETACH(&vm_pressure_klist, kn_max);
@@ -475,6 +475,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int
                struct task*            t = TASK_NULL;
                int                     curr_task_importance = 0;
                boolean_t               consider_knote = FALSE;
+               boolean_t               privileged_listener = FALSE;
 
                p = kn->kn_kq->kq_p;
                proc_list_lock();
@@ -528,7 +529,42 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int
 
                curr_task_importance = task_importance_estimate(t);
 
-                /* 
+               /*
+                * Privileged listeners are only considered in the multi-level pressure scheme
+                * AND only if the pressure is increasing.
+                */
+               if (level > 0) {
+
+                       if (task_has_been_notified(t, level) == FALSE) {
+
+                               /*
+                                * Is this a privileged listener?
+                                */
+                               if (task_low_mem_privileged_listener(t, FALSE, &privileged_listener) == 0) {
+
+                                       if (privileged_listener) {
+                                               kn_max = kn;
+                                               proc_rele(p);
+                                               goto done_scanning;
+                                       }
+                               }
+                       } else {
+                               proc_rele(p);
+                               continue;
+                       }
+               } else if (level == 0) {
+
+                       /*
+                        * Task wasn't notified when the pressure was increasing and so
+                        * no need to notify it that the pressure is decreasing.
+                        */
+                       if ((task_has_been_notified(t, kVMPressureWarning) == FALSE) && (task_has_been_notified(t, kVMPressureCritical) == FALSE)) {
+                               proc_rele(p);
+                               continue;
+                       }
+               }
+
+               /*
                  * We don't want a small process to block large processes from 
                  * being notified again. <rdar://problem/7955532>
                  */
@@ -551,9 +587,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int
                                                 * b) has importance equal to that of the current selected process but is larger
                                                 */
 
-                                               if (task_has_been_notified(t, level) == FALSE) {
-                                                       consider_knote = TRUE;
-                                               }
+                                               consider_knote = TRUE;
                                        }
                                } else {
                                        if ((curr_task_importance > selected_task_importance) ||
@@ -566,9 +600,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int
                                                 * b) has importance equal to that of the current selected process but is larger
                                                 */
 
-                                               if (task_has_been_notified(t, level) == FALSE) {
-                                                       consider_knote = TRUE;
-                                               }
+                                               consider_knote = TRUE;
                                        }
                                }
                        } else if (level == 0) {
@@ -578,9 +610,7 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int
                                if ((curr_task_importance > selected_task_importance) ||
                                    ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
 
-                                       if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) {
-                                               consider_knote = TRUE;
-                                       }
+                                       consider_knote = TRUE;
                                }
                        } else if (level == -1) {
 
@@ -606,8 +636,10 @@ vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int
                proc_rele(p);
         }
 
+done_scanning:
        if (kn_max) {
-               VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
+               VM_DEBUG_CONSTANT_EVENT(vm_pressure_event, VM_PRESSURE_EVENT, DBG_FUNC_NONE, kn_max->kn_kq->kq_p->p_pid, resident_max, 0, 0);
+               VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
        }
 
        return kn_max;
index 825806a4161c8ad765b6a9a898d1a57a8b927fa9..65e206405e9ee3d53b618ada073576d990a94f8e 100644 (file)
@@ -8,11 +8,11 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 
-DATAFILES = \
+KERNELFILES = \
        libkern.h
 
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR = libkern
 
index fced31c67919529e43cb120424ff25acceed6d32..28b335d4647127602541aa672ca173d139a2db7d 100644 (file)
@@ -36,7 +36,7 @@ memchr(const void *bigptr, int ch, size_t length)
        size_t n;
        for (n = 0; n < length; n++)
                if (big[n] == ch)
-                       return (void *)&big[n];
+                       return __DECONST(void *, &big[n]);
        return NULL;
 }
 
index b7c7225dc870eee62b2464126e1fc0ab710cfddc..556aeb5061b9c948b94204275c6defb62141234e 100644 (file)
@@ -15,6 +15,9 @@ DATAFILES = \
        vmparam.h _types.h _limits.h _param.h \
        _mcontext.h
 
+PRIVATE_DATAFILES = \
+       disklabel.h
+
 KERNELFILES = \
        disklabel.h \
        byte_order.h  endian.h \
@@ -25,7 +28,7 @@ KERNELFILES = \
 
 
 INSTALL_MI_LIST = ${DATAFILES}
-INSTALL_MI_LCL_LIST = ${DATAFILES} disklabel.h
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_MI_DIR = machine
 
index 5c906991d092a61ec2c927fa87b91001e0a7b1fa..075ebeb1695b103a6b7acd1a075c10c3509c31a7 100644 (file)
@@ -35,6 +35,8 @@ DATAFILES = \
        chroot.2                \
        close.2                 \
        connect.2               \
+       connectx.2              \
+       disconnectx.2           \
        dup.2                   \
        dup2.2                  \
        execve.2                \
@@ -81,7 +83,6 @@ DATAFILES = \
        getgid.2                \
        getgroups.2             \
        getitimer.2             \
-       getlcid.2               \
        getlogin.2              \
        getpeername.2           \
        getpgrp.2               \
@@ -105,6 +106,7 @@ DATAFILES = \
        kill.2                  \
        kevent.2                \
        kevent64.2              \
+       kevent_qos.2            \
        kqueue.2                \
        lchown.2                \
        link.2                  \
@@ -178,7 +180,6 @@ DATAFILES = \
        setgid.2                \
        setgroups.2             \
        setitimer.2             \
-       setlcid.2               \
        setlogin.2              \
        setpgid.2               \
        setpgrp.2               \
index cbe52679955289f70351bfcd645d27d5d970ed36..63f1838128fa984e8f150be4bd55fe0ed29e05d3 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)accept.2   8.2 (Berkeley) 12/11/93
 .\"
-.Dd December 11, 1993
+.Dd March 18, 2015
 .Dt ACCEPT 2
 .Os BSD 4.2
 .Sh NAME
@@ -201,6 +201,7 @@ is necessary.
 .Sh SEE ALSO
 .Xr bind 2 ,
 .Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr listen 2 ,
 .Xr select 2 ,
 .Xr socket 2 ,
index 1e968edaa2ee1bedf45d13bdefba67aa1b6811eb..8374320dabc531ec6cea2beed4667018986d06cc 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)bind.2     8.1 (Berkeley) 6/4/93
 .\"
-.Dd June 4, 1993
+.Dd March 18, 2015
 .Dt BIND 2
 .Os BSD 4.2
 .Sh NAME
 .Fa "socklen_t address_len"
 .Fc
 .Sh DESCRIPTION
-.Fn Bind
+.Fn bind
 assigns a name to an unnamed socket.
 When a socket is created 
 with
 .Xr socket 2
 it exists in a name space (address family)
 but has no name assigned.
-.Fn Bind
+.Fn bind
 requests that
 .Fa address
 be assigned to the socket.
@@ -172,6 +172,7 @@ The include file
 is necessary.
 .Sh SEE ALSO
 .Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr getsockname 2 ,
 .Xr listen 2 ,
 .Xr socket 2 ,
index 0d16cab190b2ef5e25a1ce64a3f59b87e3954c8b..8df34340760043da4b7a4c2093bd0aa2e8c3c479 100644 (file)
@@ -96,7 +96,7 @@ flags may only be set or unset by the super-user.
 They may be set at any time, but normally may only be unset when
 the system is in single-user mode.
 (See
-.Xr init 8
+.Xr launchd 8
 for details.)
 .Sh RETURN VALUES
 Upon successful completion, a value of 0 is returned.
@@ -162,7 +162,7 @@ The operation isn't supported by the filesystem.
 .Xr fflagstostr 3 ,
 .Xr lchflags 3 ,
 .Xr strtofflags 3 ,
-.Xr init 8
+.Xr launchd 8
 .Sh HISTORY
 The
 .Fn chflags
index 186b01c20839a178921c2bab61b29ce4d3e3d2f3..c5a06d67458d1c1d2e53c5cc1a8815dfa5143fbe 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)connect.2  8.1 (Berkeley) 6/4/93
 .\"
-.Dd June 4, 1993
+.Dd March 18, 2015
 .Dt CONNECT 2
 .Os BSD 4.2
 .Sh NAME
@@ -74,7 +74,9 @@ only once; datagram sockets may use
 .Fn connect
 multiple times to change their association.
 Datagram sockets may dissolve the association
-by connecting to an invalid address, such as a null address 
+by calling
+.Xr disconnectx 2 ,
+or by connecting to an invalid address, such as a null address
 or an address with 
 the address family set to 
 .Dv AF_UNSPEC 
@@ -220,6 +222,8 @@ The include file
 is necessary.
 .Sh SEE ALSO
 .Xr accept 2 ,
+.Xr connectx 2 ,
+.Xr disconnectx 2 ,
 .Xr getsockname 2 ,
 .Xr select 2 ,
 .Xr socket 2 ,
index 96e85c7abbf5ab1f3b1841b164407f6eb4271519..7fbca576e4bca2d3add684156bec36eff519ba03 100644 (file)
@@ -1,5 +1,5 @@
 .\" 
-.\" Copyright (c) 2012 Apple Inc. All rights reserved.
+.\" Copyright (c) 2015 Apple Inc. All rights reserved.
 .\" 
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 .\" 
 .\" 
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 .\"
-.Dd November 14, 2012
+.Dd March 26, 2015
 .Dt CONNECTX 2
 .Os Darwin
 .Sh NAME
 .Nm connectx
-.Nd initiate one or more connections on a socket
+.Nd initiate a connection on a socket
 .Sh SYNOPSIS
 .Fd #include <sys/socket.h>
 .Ft int
 .Fo connectx
 .Fa "int socket"
-.Fa "const struct sockaddr *saddress"
-.Fa "socklen_t saddress_len"
-.Fa "const struct sockaddr *daddress"
-.Fa "socklen_t daddress_len"
-.Fa "unsigned int ifscope"
-.Fa "associd_t associd"
-.Fa "connid_t *connid"
+.Fa "const sa_endpoints_t *endpoints"
+.Fa "sae_associd_t associd"
+.Fa "unsigned int flags"
+.Fa "const struct iovec *iov"
+.Fa "unsigned int iovcnt"
+.Fa "size_t *len"
+.Fa "sae_connid_t *connid"
 .Fc
 .Sh DESCRIPTION
 The parameter
 .Fa socket
-is a socket.  The communication domain of the socket determines the
-availability and behavior of
-.Fn connectx .
+is a socket.
 In general,
 .Fn connectx
 may be used as a substitute for cases when
 .Xr bind 2
 and
 .Xr connect 2
-are issued in succession.
+are issued in succession, as well as a mechanism to transmit data
+at connection establishment time.
 .Pp
-When the source address
-.Fa saddress
+The
+.Fn connectx
+system call uses a
+.Fa sa_endpoints
+structure to minimize the number of directly supplied arguments. This structure
+has the following form, as defined in
+.In sys/socket.h :
+.Pp
+.Bd -literal
+typedef struct sa_endpoints {
+       unsigned int     sae_srcif;      /* optional source interface   */
+       struct sockaddr *sae_srcaddr;    /* optional source address     */
+       socklen_t        sae_srcaddrlen; /* size of source address      */
+       struct sockaddr *sae_dstaddr;    /* destination address         */
+       socklen_t        sae_dstaddrlen; /* size of destination address */
+}sa_endpoints_t;
+.Ed
+.Pp
+When the optional source address
+.Fa sae_srcaddr
 parameter is specified,
 .Fn connectx
-binds the connection to one of the addresses, as if
+binds the connection to the address, as if
 .Xr bind 2
 is used.  The length of
-.Fa saddress
+.Fa sae_srcaddr
 buffer is specified by
-.Fa saddress_len .
-This buffer may hold more than one addresses, where each successive address
-immediately follows the previous one.  The parameter
-.Fa ifscope
-may also be specified instead of
-.Fa saddress ,
-in order to bind the connection to the interface whose interface index
-equals to
-.Fa ifscope .
-Both
-.Fa saddress
+.Fa sae_srcaddrlen .
+.\" This buffer may hold more than one addresses, where each successive address
+.\" immediately follows the previous one.
+The source address can be obtained by calling
+.Xr getifaddrs 3 .
+.Pp
+The optional parameter
+.Fa sae_srcif
+may also be specified, in order to force the connection to use the interface
+whose interface index equals to
+.Fa sae_srcif .
+The value for
+.Fa sae_srcif
+may be obtained by issuing a call to
+.Xr if_nametoindex 3 .
+If only
+.Fa sae_srcif
+is specified, the communication domain will choose a source address on that
+interface for communicating to the peer socket.  Both
+.Fa sae_srcaddr
 and
-.Fa ifscope
-parameters may be specified in order to add more constraints to the connection.
+.Fa sae_srcif
+parameters may also be specified in order to add more constraints to the connection, and
+.Fn connectx
+will fail unless the address is currently assigned to that interface.
 .Pp
-At least one destination address must be specified in the
-.Fa daddress
+A destination address must be specified in the
+.Fa sae_dstaddr
 parameter.  The
-.Fa daddress_len
-specifies the length of that buffer.  When more than one addresses
-is specified, each successive address immediately follows the previous one.
+.Fa sae_dstaddrlen
+specifies the length of that buffer.
+.\" When more than one addresses
+.\" is specified, each successive address immediately follows the previous one.
+.\" Each communication domain interprets the
+.\" .Fa sae_srcaddr
+.\" and
+.\" .Fa sae_dstaddr
+.\" parameters in its own way.
+.\" When multiple addresses are specified, one of the addresses will be chosen.
+.\" The rules used in selecting the eligible addresses as well as their address family requirements vary between communication domains.
+.\" .Pp
+.\" Changes related to the connection state may be monitored by registering for the
+.\" .Dv NOTE_CONNINFO_UPDATED
+.\" .Xr kqueue 2
+.\" event, using the predefined system filter
+.\" .Dv EVFILT_SOCK .
+.\" Details regarding the event may be retrieved by calling
+.\" .Xr getconninfo 3 .
+.\" .Sh MULTIPATH
+.\" On a multipath socket,
+.\" .Fn connectx
+.\" may be used multiple times, in order to establish the initial session
+.\" association with the peer socket upon the first connection, and to further
+.\" establish additional connections related to that association on subsequent
+.\" ones.
+.\" .Pp
+.\" The parameter
+.\" .Fa associd
+.\" specifies the association identifier.  When
+.\" .Fn connectx
+.\" is initially called to establish an associtation, the association identifier
+.\" is not yet known, and
+.\" .Dv ASSOCID_ANY
+.\" must be specified.  After the initial connection is established, the
+.\" association identifier may be retrieved using
+.\" .Xr getassocids 3 ,
+.\" and the value may then be used on subsequent
+.\" .Fn connectx
+.\" calls.
+.\" .Pp
+.\" If the initial connection is established without any protocol-level
+.\" multipath association, the error
+.\" .Er EPROTO
+.\" will be returned, and the connection can be extracted to a new socket with
+.\" the same properties of
+.\" .Fa socket ,
+.\" by calling
+.\" .Xr peeloff 2 .
+.\" .Pp
+.\" An association representing one or more connections, or a single connection
+.\" may be dissolved by calling
+.\" .Xr disconnectx 2 .
+.\" .Sh NON-MULTIPATH
+.\" On non-multipath socket,
+.\" .Fn connectx
+.\" behaves much like a combination of
+.\" .Xr bind 2
+.\" and
+.\" .Xr connect 2 .
 .Pp
-Each communications domain interprets the
-.Fa saddress
+Data to be transmitted may optionally be defined via the
+.Fa iovcnt
+buffers specified by members of the
+.Fa iov
+array, along with a non-NULL
+.Fa len
+parameter, which upon success, indicates the number of bytes enqueued for
+transmission.
+.Pp
+When the
+.Fa iov
 and
-.Fa daddress
-parameters in its own way.  When multiple addresses are specified, one
-of the addresses will be chosen.  The rules used in selecting the
-address vary between communicaton domains.
+.Fa len 
+parameters are non-NULL, the communication domain will copy the data to the
+socket send buffer. The communication domain may impose a limit on the amount of data allowed to be buffered before connection establishment.
+.Pp
+When the flags parameter is set to CONNECT_RESUME_ON_READ_WRITE and an
+.Fa iov
+is not passed in, the communication domain will trigger the actual connection
+establishment upon the first read or write following the
+.Xr connectx 2
+system call. This flag is ignored if the iov is specified in the
+.Xr connectx 2
+call itself.
 .Pp
-Changes related to the connection state may be monitored by registering for the
-.Dv NOTE_CONNINFO_UPDATED
-.Xr kqueue 2
-event, using the predefined system filter
-.Dv EVFILT_SOCK .
-Details regarding the event may be retrieved by calling
-.Xr getconninfo 3 .
-.Sh MULTIPATH
-On a multipath socket,
+The flags parameter may also be set to CONNECT_DATA_IDEMPOTENT to indicate to
+the communication domain that the data is idempotent. For example, this will
+trigger TCP Fast Open (RFC 7413) with SOCK_STREAM type. The data must be passed in the
+.Fa iov 
+parameter in
+.Xr connectx 2 
+, or passed in with the first write call such as with the
+.Xr writev 2
+or similar system call if the CONNECT_RESUME_ON_READ_WRITE is also set.
+.Pp
+In general, the communication domain makes the final decision on the amount of
+data that may get transmitted at connection establishment time.  If the socket
+requires the data be sent atomically and the data size makes this impossible,
+EMSGSIZE will be returned and the state of the socket is left unchanged as if
 .Fn connectx
-may be used multiple times, in order to establish the initial session
-association with the peer socket upon the first connection, and to further
-establish additional connections related to that assocication on subsequent
-ones.
+was not called.
 .Pp
 The parameter
 .Fa associd
-specifies the association identifier.  When
-.Fn connectx
-is initially called to establish an associtation, the association identifier
-is not yet known, and
-.Dv ASSOCID_ANY
-must be specified.  After the initial connection is established, the
-association identifier may be retrieved using
-.Xr getassocids 3 ,
-and the value may then be used on subsequent
+is reserved for future use, and must always be set to
+.Dv SAE_ASSOCID_ANY .
+The parameter
+.Fa connid
+is also reserved for future use and should be set to NULL.
+.Sh NOTES
 .Fn connectx
-calls.
+is currently supported only on AF_INET and AF_INET6 sockets of type SOCK_DGRAM
+and SOCK_STREAM.
 .Pp
-If the initial connection is established without any protocol-level
-multipath association, the error
-.Er EPROTO
-will be returned, and the connection can be extracted to a new socket with
-the same properties of
-.Fa socket ,
-by calling
-.Xr peeloff 2 .
-.Pp
-An association representing one or more connections, or a single connection
-may be dissolved by calling
-.Xr disconnectx 2 .
-.Sh NON-MULTIPATH
-On non-multipath socket,
+Generally,
+.\" non-multipath
+connection-oriented sockets may successfully
 .Fn connectx
-behaves much like a combination of
-.Xr bind 2
-and
-.Xr connect 2 .
-The parameter
-.Fa associd
-must always be set to
-.Dv ASSOCID_ANY .
-.Pp
-Generally, non-multipath stream sockets may successfully
+only once.  Connectionless sockets may use
 .Fn connectx
-only once; datagram sockets may use
+to create an association to the peer socket, and it may call
+.Xr disconnectx 2
+to dissolve any existing association.  Unlike connection-oriented sockets,
+connectionless sockets may call
+.Fn connectx
+again afterwards to associate to another peer socket.
+.Pp
+If CONNECT_RESUME_ON_READ_WRITE is set without data 
+supplied,
 .Fn connectx
-multiple times to change their association, after first dissolving the
-existing association by calling
-.Xr disconnectx 2 .
+will immediately return success, assuming the rest of the parameters are valid.
+.Xr select 2
+will indicate that the socket is ready for writing, and the actual connection
+establishment is attempted once the initial data is written to the socket via
+.Xr writev 2
+or similar.  Subsequent attempts to write more data will fail until the existing
+connection establishment attempt is successful.  The error status of the socket
+may be retrieved via the SO_ERROR option using
+.Xr getsockopt 2 .
 .Sh RETURN VALUES
-Upon successful completion, a value of 0 is returned and the connection
-identifier is returned through the
-.Fa connid
-parameter.  If the initial connection establishes an association with
-a peer socket, the association identifier may be retrieved by calling
-.Xr getassocids 2 .
-Both of these identifiers are unique
-on a per
-.Fa socket
-basis.  Upon failure, a value of -1 is returned and the global integer
+Upon successful completion, a value of 0 is returned.
+.\" and an opaque value may be returned through the
+.\" .Fa connid
+.\" parameter.  
+The number of bytes from
+.Fa iov
+array which were enqueued for transmission is returned via
+.Fa len .
+.\" If the initial connection establishes an association with a peer socket, the association identifier may be retrieved by calling
+.\" .Xr getassocids 2 .
+.\" Both of these identifiers are unique
+.\" on a per
+.\" .Fa socket
+.\" basis.
+Upon failure, a value of -1 is returned and the global integer
 variable
 .Va errno
 is set to indicate the error.
@@ -177,25 +278,25 @@ The
 system call will fail if:
 .Bl -tag -width Er
 .\" ==========
-.It Bq Er EACCES
-The destination address is a broadcast address and the 
-socket option 
-.Dv SO_BROADCAST 
-is not set.
-.\" ==========
 .It Bq Er EADDRINUSE
-The address is already in use.
+The address specified in
+.Fa sae_srcaddr
+parameter is already in use.
 .\" ==========
 .It Bq Er EADDRNOTAVAIL
-The specified address is not available on this machine.
+The specified in
+.Fa sae_srcaddr
+parameter is not available on this machine, or is not assigned to the interface specified by
+.Fa sae_srcif .
 .\" ==========
 .It Bq Er EAFNOSUPPORT
-Addresses in the specified address family cannot be used with this socket.
+The
+.Fa socket
+cannot find any usable addresses of a specific address family
+as required by the communication domain.
 .\" ==========
 .It Bq Er EALREADY
-The socket is non-blocking
-and a previous connection attempt
-has not yet been completed.
+A previous connection attempt has not yet been completed.
 .\" ==========
 .It Bq Er EBADF
 .Fa socket
@@ -207,31 +308,39 @@ The attempt to connect was ignored
 or explicitly rejected.
 .\" ==========
 .It Bq Er EFAULT
-The
-.Fa address
-parameter specifies an area outside
-the process address space.
+Part of
+.Fa iov
+or data to be written to
+.Fa socket
+points outside the process's allocated address space.
 .\" ==========
 .It Bq Er EHOSTUNREACH
 The target host cannot be reached (e.g., down, disconnected).
 .\" ==========
 .It Bq Er EINPROGRESS
-The socket is non-blocking 
-and the connection cannot
-be completed immediately.
+The connection cannot be completed immediately.
 It is possible to
 .Xr select 2
-for completion by selecting the socket for writing.
+for completion by selecting the
+.Fa socket
+for writing.
 .\" ==========
 .It Bq Er EINTR
 Its execution was interrupted by a signal.
 .\" ==========
+.It Bq Er EMSGSIZE
+The size of the message exceeds the available send buffer space in the
+.Fa socket .
+.\" ==========
 .It Bq Er EINVAL
 An invalid argument was detected
 (e.g.,
-.Fa address_len
-is not valid for the address family,
-the specified address family is invalid).
+.Fa sae_dstaddrlen
+is not valid, the contents of
+.Fa sae_srcaddr
+or
+.Fa sae_dstaddr,
+buffer is invalid, etc.)
 .\" ==========
 .It Bq Er EISCONN
 The socket is already connected.
@@ -254,35 +363,38 @@ Because
 .Fa socket
 is listening, no connection is allowed.
 .\" ==========
-.It Bq Er EPROTO
-The connection was successfully established without any protocol-level
-association.  The connection can be extracted to a new socket using
-.Xr peeloff 2 .
+.\".It Bq Er EPROTO
+.\"The connection was successfully established without any protocol-level
+.\"association.  The connection can be extracted to a new socket using
+.\".Xr peeloff 2 .
 .\" ==========
-.It Bq Er EPROTOTYPE
-.Fa address
-has a different type than the socket
-that is bound to the specified peer address.
+.\".It Bq Er EPROTOTYPE
+.\".Fa address
+.\"has a different type than the socket
+.\"that is bound to the specified peer address.
 .\" ==========
 .It Bq Er ETIMEDOUT
 Connection establishment timed out without establishing a connection.
 .\" ==========
-.It Bq Er ECONNRESET
-Remote host reset the connection request.
+.El
 .Sh SEE ALSO
-.Xr accept 2 ,
+.\".Xr accept 2 ,
 .Xr bind 2 ,
 .Xr connect 2 ,
 .Xr disconnectx 2 ,
-.Xr kqueue 2 ,
-.Xr peeloff 2 ,
+.Xr disconnectx 2 ,
+.Xr getsockopt 2 ,
+.\".Xr kqueue 2 ,
+.\".Xr peeloff 2 ,
+.\".Xr shutdown 2 ,
 .Xr select 2 ,
 .Xr socket 2 ,
-.Xr getassocids 3 ,
-.Xr getconnids 3 ,
-.Xr getconninfo 3 ,
+.\".Xr getassocids 3 ,
+.\".Xr getconnids 3 ,
+.\".Xr getconninfo 3 ,
+.Xr writev 2 ,
 .Xr compat 5
 .Sh HISTORY
 The
 .Fn connectx
-function call appeared in Darwin 13.0.0
+function call appeared in Darwin 15.0.0
index eed45a0de911d038f5add03cf6d7148d78116747..d9990242bd7c0860d8d07c29b4b4470ac136c991 100644 (file)
@@ -1,5 +1,5 @@
 .\" 
-.\" Copyright (c) 2012 Apple Inc. All rights reserved.
+.\" Copyright (c) 2015 Apple Inc. All rights reserved.
 .\" 
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 .\" 
@@ -25,7 +25,7 @@
 .\" 
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 .\"
-.Dd November 14, 2012
+.Dd April 10, 2015
 .Dt DISCONNECTX 2
 .Os Darwin
 .Sh NAME
@@ -36,8 +36,8 @@
 .Ft int
 .Fo disconnectx
 .Fa "int socket"
-.Fa "associd_t associd"
-.Fa "connid_t connid"
+.Fa "sae_associd_t associd"
+.Fa "sae_connid_t connid"
 .Fc
 .Sh DESCRIPTION
 The parameter
@@ -45,43 +45,50 @@ The parameter
 is a socket.  The communication domain of the socket determines the
 availability and behavior of
 .Fn disconnectx .
-In general,
+For connection-oriented socket,
 .Fn disconnectx
 is analogous to
 .Xr shutdown 2
 .Dv with SHUT_RDWR
-issued on the connection identified by
-.Fa connid ,
-or on all connections associated with the
-.Fa associd
-association.
+issued on the socket. For connectionless socket, it disassociates any existing
+association to the peer socket.
+.\" identified by
+.\" .Fa connid ,
+.\" or on all connections associated with the
+.\" .Fa associd
+.\" association.
 .Pp
 The parameter
 .Fa associd
-specifies the association identifier.  It may be set to
-.Dv ASSOCID_ANY
-when there is only one association present;
-.Dv ASSOCID_ALL
-to specify all existing associations; or one of the identifiers returned from
-.Xr getassocids 3 .
+specifies the association identifier.  It should be set to
+.Dv SAE_ASSOCID_ANY .
+.\"when there is only one association present;
+.\".Dv SAE_ASSOCID_ALL
+.\"to specify all existing associations; or one of the identifiers returned from
+.\".Xr getassocids 3 .
 .Pp
 The parameter
 .Fa connid
-specifies the connection identifier.  It may be set to
-.Dv CONNID_ANY
-or
-.Dv CONNID_ALL ,
-in which case the association represented by
-.Fa associd
-will be dissolved; or the value returned from
-.Xr connectx 2
-or
-.Xr getconnids 3 ,
-which indicates that the disconnection occurs only on that connection
-while keeping the session association intact.  For the latter, the connection
-associated with
-.Fa connid
-will no longer be valid upon success.
+should be set to
+.\" specifies the connection identifier.  It may be set to
+.Dv SAE_CONNID_ANY .
+.\" or
+.\".Dv SAE_CONNID_ALL ,
+.\" in which case the association represented by
+.\" .Fa associd
+.\" will be dissolved; or the value returned from
+.\" .Xr connectx 2
+.\" or
+.\" .Xr getconnids 3 ,
+.\" which indicates that the disconnection occurs only on that connection
+.\" while keeping the session association intact.  For the latter, the connection
+.\" associated with
+.\" .Fa connid
+.\" will no longer be valid upon success.
+.Sh NOTES
+.Fn disconnectx
+is currently supported only on AF_INET and AF_INET6 sockets of type SOCK_DGRAM
+and SOCK_STREAM.
 .Sh RETURN VALUES
 The
 .Fn disconnectx
@@ -96,8 +103,9 @@ system call succeeds unless:
 .Bl -tag -width Er
 .\" ===========
 .It Bq Er EALREADY
-Operation already in progress for the session association represented by
-.Fa associd .
+Operation already in progress.
+.\" for the session association represented by
+.\" .Fa associd .
 .\" ===========
 .It Bq Er EBADF
 .Fa Socket
@@ -112,8 +120,11 @@ argument is invalid or the underlying protocol is no longer attached to
 .Fa socket .
 .\" ===========
 .It Bq Er ENOTCONN
-The session association repreresented by
-.Fa associd
+.\" The session association repreresented by
+.\" .Fa associd
+.\" is not connected.
+The
+.Fa socket
 is not connected.
 .\" ===========
 .It Bq Er ENOTSOCK
@@ -121,13 +132,14 @@ is not connected.
 is a file, not a socket.
 .El
 .Sh SEE ALSO
+.Xr connect 2 ,
 .Xr connectx 2 ,
 .Xr socket 2 ,
-.Xr getassocids 3 ,
-.Xr getconnids 3 ,
-.Xr getconninfo 3 ,
+.\" .Xr getassocids 3 ,
+.\" .Xr getconnids 3 ,
+.\" .Xr getconninfo 3 ,
 .Xr compat 5
 .Sh HISTORY
 The
 .Fn disconnectx
-function call appeared in Darwin 13.0.0
+function call appeared in Darwin 15.0.0
index 346402ef7069d698d2afe448b55863bd107f228f..ee1b5a769fae1c02ed89baddcbd471b49b123a98 100644 (file)
@@ -151,6 +151,9 @@ It is typical to ask for a combination of common, file, and directory
 attributes and then use the value of the 
 .Dv ATTR_CMN_OBJTYPE 
 attribute to parse the resulting attribute buffer.
+.Pp
+A directory which is a mount point for a file system, will have a value of "DIR_MNTSTATUS_MNTPOINT" set for it's the
+ATTR_DIR_MOUNTSTATUS attribute entry. However the attributes for the mount point will be those from the (underlying) file system. The only way to get the attributes of mounted root directory is to call getattrlist(2) on the mount point.
 .
 .Sh RETURN VALUES
 Upon successful completion the numbers of entries successfully read
diff --git a/bsd/man/man2/getlcid.2 b/bsd/man/man2/getlcid.2
deleted file mode 100644 (file)
index 6625da5..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-.\" Copyright (c) 2005 SPARTA, Inc.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.\" Note: The date here should be updated whenever a non-trivial
-.\" change is made to the manual page.
-.Dd May 5, 2005
-.Dt GETLCID 2
-.Os
-.Sh NAME
-.Nm getlcid
-.Nd "get login context"
-.Sh SYNOPSIS
-.In sys/lctx.h
-.Ft pid_t
-.Fn getlcid "pid_t pid"
-.Sh DESCRIPTION
-The login context of the process identified by
-.Fa pid
-is returned by
-.Fn getlcid .
-If
-.Fa pid
-is zero,
-.Fn getlcid
-returns the login context of the current process.
-.Pp
-Login contexts are used for tracking processes that originated from a users
-login session.  A login context is inherited via
-.Fn fork
-or by explicit creation using the
-.Fn setlcid
-call.
-.Sh RETURN VALUES
-If successful,
-.Fn getlcid
-returns a non-negative integer, the Login Context ID; otherwise the
-value \-1 is returned and the global variable
-.Fa errno
-is set to indicate
-the error.
-.Sh ERRORS
-The
-.Fn getlcid
-function will fail if:
-.Bl -tag -width Er
-.It Bq Er EPERM
-Operation not permitted.
-.It Bq Er ESRCH
-No such process.
-.It Bq Er ENOATTR
-Attribute not found.
-.El
-.Sh SEE ALSO
-.Xr setlcid 2
-.Sh HISTORY
-The
-.Nm
-manual page
-first appeared in
-.Fx 6.0 .
-.Sh AUTHORS
-This
-manual page was written by
-.An Matthew N. Dodd Aq mdodd@FreeBSD.org .
index 4ca0f72e0200272b398d5f8a3a66761428866cb2..58950e7df00f337fc6dc67ecb1664ad41415662d 100644 (file)
@@ -50,7 +50,7 @@
 .Sh DESCRIPTION
 The
 .Fn getsockname
-fynction returns the current 
+function returns the current
 .Fa address
 for the specified socket. 
 .Pp
index a9b3005554ff8cb9769e78aa35bff5e4f44ba25d..179e9bc0cb44e5d620552fee9cc3ae529f0c7360 100644 (file)
@@ -123,7 +123,7 @@ the appropriate part of the year.
 .Pp
 Only the super-user may set the time of day or time zone.
 If the system securelevel is greater than 1 (see
-.Xr init 8 ),
+.Xr launchd 8 ),
 the time may only be advanced.
 This limitation is imposed to prevent a malicious super-user
 from setting arbitrary time stamps on files.
index 1f6ba149591534b9a33fb191fef79dad3cde34f4..de40a734d862216bb3e25485aff22e9dd86c5265 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)intro.2    8.3 (Berkeley) 12/11/93
 .\"
-.Dd December 11, 1993
+.Dd March 18, 2015
 .Dt INTRO 2
 .Os BSD 4
 .Sh NAME
@@ -240,7 +240,8 @@ same routine may complete normally.
 .It Er 36 EINPROGRESS Em "Operation now in progress" .
 An operation that takes a long time to complete (such as
 a
-.Xr connect 2 )
+.Xr connect 2 or
+.Xr connectx 2 )
 was attempted on a non-blocking object (see
 .Xr fcntl 2 ) .
 .It Er 37 EALREADY Em "Operation already in progress" .
@@ -308,6 +309,8 @@ the system lacked sufficient buffer space or because a queue was full.
 .It Er 56 EISCONN Em "Socket is already connected" .
 A
 .Xr connect
+or
+.Xr connectx
 request was made on an already connected socket; or,
 a
 .Xr sendto
@@ -326,7 +329,8 @@ had already been shut down with a previous
 call.
 .It Er 60 ETIMEDOUT Em "Operation timed out" .
 A
-.Xr connect
+.Xr connect ,
+.Xr connectx
 or
 .Xr send
 request failed because the connected party did not
@@ -476,7 +480,7 @@ A new process is created by a currently active process; (see
 The parent process ID of a process is initially the process ID of its creator.
 If the creating process exits,
 the parent process ID of each child is set to the ID of a system process,
-.Xr init .
+.Xr launchd 8 .
 .It  Process Group
 Each active process is a member of a process group that is identified by
 a non-negative integer called the process group ID.  This is the process
@@ -523,7 +527,7 @@ as the group,
 but is in a different process group.
 Note that when a process exits, the parent process for its children
 is changed to be
-.Xr init ,
+.Xr launchd 8 ,
 which is in a separate session.
 Not all members of an orphaned process group are necessarily orphaned
 processes (those whose creating process has exited).
@@ -582,7 +586,7 @@ process and is granted special privileges if its effective user ID is 0.
 .It  Special Processes
 The processes with process IDs of 0, 1, and 2 are special.
 Process 0 is the scheduler.  Process 1 is the initialization process
-.Xr init ,
+.Xr launchd 8 ,
 and is the ancestor of every other process in the system.
 It is used to control the process structure.
 Process 2 is the paging daemon.
diff --git a/bsd/man/man2/kevent_qos.2 b/bsd/man/man2/kevent_qos.2
new file mode 100644 (file)
index 0000000..9f491e6
--- /dev/null
@@ -0,0 +1 @@
+.so man2/kqueue.2
index c3e668072bc0b61145571253ec603c095881d447..5e8702457ef6385aa179de6e777ce56dcb2cbcbb 100644 (file)
@@ -53,8 +53,9 @@
 .Sh NAME
 .Nm kqueue ,
 .Nm kevent ,
-and
 .Nm kevent64
+and
+.Nm kevent_qos
 .Nd kernel event notification mechanism
 .Sh LIBRARY
 .Lb libc
@@ -68,21 +69,28 @@ and
 .Fn kevent "int kq" "const struct kevent *changelist" "int nchanges" "struct kevent *eventlist" "int nevents" "const struct timespec *timeout"
 .Ft int
 .Fn kevent64 "int kq" "const struct kevent64_s *changelist" "int nchanges" "struct kevent64_s *eventlist" "int nevents" "unsigned int flags" "const struct timespec *timeout"
+.Ft int
+.Fn kevent_qos "int kq" "const struct kevent_qos_s *changelist" "int nchanges" "struct kevent_qos_s *eventlist" "int nevents" "void *data_out" "size_t *data_available" "unsigned int flags"
 .Fn EV_SET "&kev" ident filter flags fflags data udata
 .Fn EV_SET64 "&kev" ident filter flags fflags data udata "ext[0]" "ext[1]"
+.Fn EV_SET_QOS "&kev" ident filter flags qos udata fflags xflags data "ext[0]" "ext[1]" "ext[2]" "ext[3]"
 .Sh DESCRIPTION
 The
 .Fn kqueue
-system call
+system call allocates a kqueue file descriptor.  This file descriptor
 provides a generic method of notifying the user when a kernel
 event (kevent) happens or a condition holds, based on the results
 of small pieces of kernel code termed filters.
-A kevent is identified by an (ident, filter) pair and specifies
-the interesting conditions to be notified about for that pair. 
-An (ident, filter) pair can only appear once in a given kqueue.
-Subsequent attempts to register the same pair for a given kqueue
+.Pp
+A kevent is identified by an (ident, filter, and optional udata value) 
+tuple.  It specifies the interesting conditions to be notified about
+for that tuple. An (ident, filter, and optional udata value) tuple can
+only appear once in a given kqueue.
+Subsequent attempts to register the same tuple for a given kqueue
 will result in the replacement of the conditions being watched,
 not an addition.
+Whether the udata value is considered as part of the tuple is controlled
+by the EV_UDATA_SPECIFIC flag on the kevent.
 .Pp
 The filter identified in a kevent is executed upon the initial
 registration of that event in order to detect whether a preexisting
@@ -112,9 +120,10 @@ The queue is not inherited by a child created with
 .Xr fork 2 .
 .Pp
 The
-.Fn kevent
-and
+.Fn kevent,
 .Fn kevent64
+and
+.Fn kevent_qos
 system calls
 are used to register events with the queue, and return any pending
 events to the user.
@@ -122,9 +131,10 @@ The
 .Fa changelist
 argument
 is a pointer to an array of
-.Va kevent
-or
+.Va kevent,
 .Va kevent64_s
+or
+.Va kevent_qos_s
 structures, as defined in
 .Aq Pa sys/event.h .
 All changes contained in the
@@ -138,16 +148,28 @@ gives the size of
 The
 .Fa eventlist
 argument
-is a pointer to an array of 
-.Va kevent
-or
+is a pointer to an array of out
+.Va kevent,
 .Va kevent64_s 
+or
+.Va kevent_qos_s 
 structures.
 The
 .Fa nevents
-argument
-determines the size of
+argument determines the size of
 .Fa eventlist .
+If the KEVENT_FLAG_STACK_EVENTS flag is provided on the system call,
+the eventlist array is filled in in stack order (starting in the
+highest available index) instead of typical array order.
+The
+.Fa out_data
+argument provides space for extra out data provided by specific filters.
+The
+.Fa data_available
+argument's contents specified the space available in the data pool on input,
+and contains the amount still remaining on output.
+If the KEVENT_FLAG_STACK_DATA flag is specified on the system call,
+the data is allocated from the pool in stack order instead of typical heap order. 
 If
 .Fa timeout
 is a non-NULL pointer, it specifies a maximum interval to wait
@@ -158,6 +180,13 @@ is a NULL pointer, both
 and
 .Fn kevent64
 wait indefinitely.  To effect a poll, the
+.Fa flags
+argument to
+.Fn kevent64
+or
+.Fn kevent_qos
+can include the KEVENT_FLAG_IMMEDIATE value to indicate an
+immediate timeout.  Alternatively, the
 .Fa timeout
 argument should be non-NULL, pointing to a zero-valued
 .Va timespec
@@ -174,12 +203,17 @@ structure. Similarly,
 .Fn EV_SET64 
 initializes a
 .Va kevent64_s
+structure and
+.Fn EV_SET_QOS
+initializes a
+.Va kevent_qos_s
 structure.
 .Pp
 The
-.Va kevent
-and
+.Va kevent,
 .Va kevent64_s
+and
+.Va kevent_qos_s
 structures are defined as:
 .Bd -literal
 struct kevent {
@@ -191,7 +225,6 @@ struct kevent {
        void            *udata;         /* opaque user data identifier */
 };
 
-
 struct kevent64_s {
         uint64_t        ident;          /* identifier for this event */
        int16_t         filter;         /* filter for event */
@@ -201,18 +234,31 @@ struct kevent64_s {
        uint64_t        udata;          /* opaque user data identifier */
        uint64_t        ext[2];         /* filter-specific extensions */
 };
+
+struct kevent_qos_s {
+        uint64_t        ident;          /* identifier for this event */
+       int16_t         filter;         /* filter for event */
+       uint16_t        flags;          /* general flags */
+       uint32_t        qos;            /* quality of service when servicing event */
+       uint64_t        udata;          /* opaque user data identifier */
+       uint32_t        fflags;         /* filter-specific flags */
+       uint32_t        xflags;         /* extra filter-specific flags */
+       int64_t         data;           /* filter-specific data */
+       uint64_t        ext[4];         /* filter-specific extensions */
+};
 .Ed
 .Pp
 ----
 .Pp
 The fields of
-.Fa struct kevent
-and
+.Fa struct kevent,
 .Fa struct kevent64_s
+and
+.Fa struct kevent_qos_s
 are:
 .Bl -tag -width XXXfilter
 .It ident
-Value used to identify this event.
+Value used to identify the source of the event.
 The exact interpretation is determined by the attached filter,
 but often is a file descriptor.
 .It filter
@@ -225,7 +271,8 @@ Filter-specific flags.
 .It data
 Filter-specific data value.
 .It udata
-Opaque user-defined value passed through the kernel unchanged.
+Opaque user-defined value passed through the kernel unchanged. It can
+optionally be part of the uniquing decision of the kevent system
 .El
 .Pp
 In addition,
@@ -237,6 +284,16 @@ This field stores extensions for the event's filter. What type of extension depe
 what type of filter is being used.
 .El
 .Pp
+In addition,
+.Fa struct kevent_qos_s
+contains:
+.Bl -tag -width XXXfilter
+.It xflags
+Extra filter-specific flags.
+.It ext[4]
+The QoS variant provides twice as many extension values for filter-specific uses.
+.El
+.Pp
 ----
 .Pp
 The
@@ -250,15 +307,17 @@ in a duplicate entry.  Adding an event automatically enables it,
 unless overridden by the EV_DISABLE flag.
 .It EV_ENABLE
 Permit
-.Fn kevent
-and
+.Fn kevent,
 .Fn kevent64
+and
+.Fn kevent_qos
 to return the event if it is triggered.
 .It EV_DISABLE
 Disable the event so
-.Fn kevent
-and
+.Fn kevent,
 .Fn kevent64
+and
+.Fn kevent_qos
 will not return it.  The filter itself is not disabled.
 .It EV_DELETE
 Removes the event from the kqueue.  Events which are attached to
@@ -294,13 +353,15 @@ below.
 .Pp
 The predefined system filters are listed below.
 Arguments may be passed to and from the filter via the
+.Va data,
 .Va fflags
-and
-.Va data
+and optionally
+.Va xflags
 fields in the
-.Va kevent
-or
+.Va kevent,
 .Va kevent64_s
+or
+.Va kevent_qos_s
 structure.
 .Bl -tag -width EVFILT_MACHPORT
 .It EVFILT_READ
@@ -328,6 +389,8 @@ flag in
 .Va fflags ,
 and specifying the new low water mark in
 .Va data .
+The derived per filter low water mark value is, however, bounded
+by socket receive buffer's high and low water mark values.
 On return,
 .Va data
 contains the number of bytes of protocol data available to read.
@@ -351,7 +414,7 @@ Returns when the file pointer is not at the end of file.
 contains the offset from current position to end of file,
 and may be negative.
 .It "Fifos, Pipes"
-Returns when the there is data to read;
+Returns when there is data to read;
 .Va data
 contains the number of bytes available.
 .Pp
@@ -360,6 +423,17 @@ When the last writer disconnects, the filter will set EV_EOF in
 This may be cleared by passing in EV_CLEAR, at which point the
 filter will resume waiting for data to become available before
 returning.
+.It "Device nodes"
+Returns when there is data to read from the device;
+.Va data
+contains the number of bytes available.  If the device does
+not support returning number of bytes, it will not allow the
+filter to be attached.  However, if the NOTE_LOWAT flag is
+specified and the
+.Va data
+field contains 1 on input, those devices will attach - but
+cannot be relied upon to provide an accurate count of bytes
+to be read on output.
 .El
 .It EVFILT_WRITE
 Takes a file descriptor as the identifier, and returns whenever
@@ -431,7 +505,8 @@ or the underlying fileystem was unmounted.
 .Pp
 On return,
 .Va fflags
-contains the events which triggered the filter.
+contains the filter-specific flags which are associated with
+the triggered events seen by this filter.
 .It EVFILT_PROC
 Takes the process ID to monitor as the identifier and the events to watch for
 in
@@ -484,12 +559,22 @@ This filter automatically sets the EV_CLEAR flag internally.
 .It EVFILT_MACHPORT
 Takes the name of a mach port, or port set, in 
 .Va ident
-and waits until a message is received on the port or port set. When a message 
-is recieved, the size of the message is returned in 
-.Va data 
-and if
+and waits until a message is enqueued on the port or port set. When a message 
+is detected, but not directly received by the kevent call, the name of the
+specific port where the message is enqueued is returned in
+.Va data .
+If
 .Va fflags 
-is set to MACH_RCV_MSG, a pointer to the message is returned in ext[0].
+contains MACH_RCV_MSG, the ext[0] and ext[1] flags are assumed to contain
+a pointer to the buffer where the message is to be received and the size
+of the receive buffer, respectively.  If MACH_RCV_MSG is specifed, yet the
+buffer size in ext[1] is zero, The space for the buffer may be carved out
+of the
+.Va
+data_out
+area provided to
+.Fn kevent_qos
+if there is enough space remaining there.
 .It EVFILT_TIMER
 Establishes an interval timer with the data
 timer identified by
@@ -520,9 +605,11 @@ If fflags is not set, the default is milliseconds. The timer will be periodic un
 On return,
 .Va data
 contains the number of times the timeout has expired since the last call to
-.Fn kevent 
+.Fn kevent , 
+.Fn kevent64
 or
-.Fn kevent64 .
+.Fn kevent_qos .
+
 This filter automatically sets the EV_CLEAR flag internally.
 .El
 .Pp
@@ -554,9 +641,10 @@ If there was an error creating the kernel event queue, a value of -1 is
 returned and errno set.
 .Pp
 The
-.Fn kevent
-and
+.Fn kevent ,
 .Fn kevent64
+and
+.Fn kevent_qos
 system calls
 return the number of events placed in the
 .Fa eventlist ,
@@ -580,9 +668,10 @@ will be returned, and
 .Dv errno
 will be set to indicate the error condition.
 If the time limit expires, then
-.Fn kevent
-and
+.Fn kevent ,
 .Fn kevent64
+and
+.Fn kevent_qos
 return 0.
 .Sh ERRORS
 The
index f62a62c1e487dec410533b92576e212a44d8ca4c..3949b41a5524a20704ea6439d2dafcfa25eb7b1c 100644 (file)
@@ -34,7 +34,7 @@
 .\"
 .\"     @(#)listen.2   8.2 (Berkeley) 12/11/93
 .\"
-.Dd December 11, 1993
+.Dd March 18, 2015
 .Dt LISTEN 2
 .Os BSD 4.2
 .Sh NAME
@@ -60,9 +60,9 @@ accepted with
 The
 .Fn listen
 call applies only to sockets of type
-.Dv SOCK_STREAM
-or
-.Dv SOCK_SEQPACKET.
+.Dv SOCK_STREAM.
+.\"or
+.\".Dv SOCK_SEQPACKET.
 .Pp
 The
 .Fa backlog
@@ -109,6 +109,7 @@ The socket is not of a type that supports the operation
 .Sh SEE ALSO
 .Xr accept 2 ,
 .Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr socket 2
 .Sh BUGS
 The
index b55d054e109830005a47351dc028a8662dd01f27..5707d6c2d17002017df7292254fdaaba8ac1f35f 100644 (file)
@@ -126,6 +126,9 @@ argument by
 .Em or Ns 'ing
 the following values:
 .Bl -tag -width MAP_HASSEMAPHORE
+.It Dv MAP_ANONYMOUS
+Synonym for
+.Dv MAP_ANON.
 .It Dv MAP_ANON
 Map anonymous memory not associated with any specific file.
 The
@@ -265,7 +268,9 @@ does not include either MAP_PRIVATE or MAP_SHARED.
 The
 .Fa len
 argument
-was negative.
+was negative or zero. Historically, the system call would not return an error if the argument was zero. 
+See other potential additional restrictions in the 
+COMPATIBILITY section below. 
 .It Bq Er EINVAL
 The
 .Fa offset
@@ -314,7 +319,7 @@ The
 parameter must specify either MAP_PRIVATE or MAP_SHARED.
 .It
 The
-.Fa size
+.Fa len
 parameter must not be 0.
 .It
 The
index dc70c857afeba2f7ae32780874d34883086b4415..a40dc3112bd3d689fa09673c9e2404b8ee670350 100644 (file)
@@ -92,6 +92,8 @@ Do not interpret special files on the filesystem.
 Union with underlying filesystem instead of obscuring it.
 .It Dv MNT_SYNCHRONOUS
 All I/O to the filesystem should be done synchronously.
+.It Dv MNT_CPROTECT
+Enable data protection on the filesystem if the filesystem is configured for it.
 .El
 .Pp
 The flag
@@ -120,7 +122,7 @@ The format for these argument structures is described in the
 manual page for each filesystem.
 .Pp
 The
-.Fn umount
+.Fn unmount
 function call disassociates the filesystem from the specified
 mount point
 .Fa dir .
@@ -142,8 +144,8 @@ and the variable
 .Va errno
 is set to indicate the error.
 .Pp
-.Nm Umount
-returns the value 0 if the umount succeeded; otherwise -1 is returned
+.Nm unmount
+returns the value 0 if the unmount succeeded; otherwise -1 is returned
 and the variable
 .Va errno
 is set to indicate the error.
@@ -183,7 +185,7 @@ Another process currently holds a reference to
 points outside the process's allocated address space.
 .El
 .Pp
-.Nm Umount
+.Nm unmount
 may fail with one of the following errors:
 .Bl -tag -width [ENAMETOOLONG]
 .It Bq Er EPERM
@@ -215,12 +217,12 @@ points outside the process's allocated address space.
 .El
 .Sh SEE ALSO
 .Xr mount 8 ,
-.Xr umount 8
+.Xr unmount 8
 .Sh BUGS
 Some of the error codes need translation to more obvious messages.
 .Sh HISTORY
 .Fn Mount
 and
-.Fn umount
+.Fn unmount
 function calls appeared in
 .At v6 .
index 11e795ccc0d82364b85b521cc4882c67dd9af6bc..3ba0acb186c3140dd154f8a72925746ca8a027b7 100644 (file)
@@ -36,7 +36,7 @@
 .Ft int
 .Fo peeloff
 .Fa "int socket"
-.Fa "associd_t associd"
+.Fa "sae_associd_t associd"
 .Fc
 .Sh DESCRIPTION
 The parameter
@@ -53,7 +53,7 @@ into its own separate socket.
 The parameter
 .Fa associd
 specifies the association identifier.  It may be set to
-.Dv ASSOCID_ANY
+.Dv SAE_ASSOCID_ANY
 when there is only one association present; or one of the identifiers
 returned from
 .Xr getassocids 3 .
index c0da7211029fb3cfc54ffd52e22bca4d68491791..c0fa79ea21f25bdfde62c7f4f46af985959425ad 100644 (file)
@@ -48,7 +48,7 @@
 .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 27, 2005
+.Dd March 18, 2015
 .Dt POLL 2
 .Os
 .Sh NAME
@@ -63,7 +63,7 @@
 .Fa "int timeout"
 .Fc
 .Sh DESCRIPTION
-.Fn Poll
+.Fn poll
 examines a set of file descriptors
 to see if some of them are ready for I/O
 or if certain events have occurred on them.
@@ -167,7 +167,7 @@ will return without blocking. If the value of
 .Fa timeout
 is -1, the poll blocks indefinitely.
 .Sh RETURN VALUES
-.Fn Poll
+.Fn poll
 returns the number of descriptors that are ready for I/O,
 or -1 if an error occurred.
 If the time limit expires,
@@ -183,7 +183,7 @@ array will be unmodified and the global variable
 .Va errno
 will be set to indicate the error.
 .Sh ERRORS
-.Fn Poll
+.Fn poll
 will fail if:
 .Bl -tag -width Er
 .\" ===========
@@ -213,6 +213,7 @@ system call currently does not support devices.
 .Sh SEE ALSO
 .Xr accept 2 ,
 .Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr kevent 2 ,
 .Xr read 2 ,
 .Xr recv 2 ,
index 5df4371b24ebaf756cb41890d8cb39eaea7d4ad3..2415874d62c2ae18e825814a8ca5cbafb72cbd7b 100644 (file)
@@ -2,7 +2,7 @@
 .\"    $NetBSD: ptrace.2,v 1.3 1996/02/23 01:39:41 jtc Exp $
 .\"
 .\" This file is in the public domain.
-.Dd November 7, 1994
+.Dd March 25, 2015
 .Dt PTRACE 2
 .Os
 .Sh NAME
@@ -94,6 +94,8 @@ had been used with
 .Dv SIGKILL
 given as the signal to be delivered.
 .It Dv PT_ATTACH
+This call has been replaced with PT_ATTACHEXC.
+.It Dv PT_ATTACHEXC
 This request allows a process to gain control of an otherwise unrelated
 process and begin tracing it.  It does not need any cooperation from
 the to-be-traced process.  In this case,
@@ -104,7 +106,10 @@ must have the same real UID as the tracing process, and that it must
 not be executing a setuid or setgid executable.  (If the tracing
 process is running as root, these restrictions do not apply.)  The
 tracing process will see the newly-traced process stop and may then
-control it as if it had been traced all along.
+control it as if it had been traced all along. Note that this call differs
+from the prior call (
+.Dv PT_ATTACH )
+in that signals from the child are delivered to the parent as Mach exceptions (see EXC_SOFT_SIGNAL).
 .It Dv PT_DETACH
 This request is like PT_CONTINUE, except that it does not allow
 specifying an alternate place to continue execution, and after it
@@ -128,7 +133,7 @@ No process having the specified process ID exists.
 .Bl -bullet -compact
 .It
 A process attempted to use
-.Dv PT_ATTACH
+.Dv PT_ATTACHEXC
 on itself.
 .It
 The
@@ -152,27 +157,27 @@ normally true only of system processes.)
 .It Bq Er EBUSY
 .Bl -bullet -compact
 .It
-.Dv PT_ATTACH
+.Dv PT_ATTACHEXC
 was attempted on a process that was already being traced.
 .It
 A request attempted to manipulate a process that was being traced by
 some process other than the one making the request.
 .It
 A request (other than
-.Dv PT_ATTACH )
+.Dv PT_ATTACHEXC )
 specified a process that wasn't stopped.
 .El
 .It Bq Er EPERM
 .Bl -bullet -compact
 .It
 A request (other than
-.Dv PT_ATTACH )
+.Dv PT_ATTACHEXC )
 attempted to manipulate a process that wasn't being traced at all.
 .It
 An attempt was made to use
-.Dv PT_ATTACH
+.Dv PT_ATTACHEXC
 on a process in violation of the requirements listed under
-.Dv PT_ATTACH
+.Dv PT_ATTACHEXC
 above.
 .El
 .El
index 4edf1225213af08f7a2ecd2ca1716dacaba39551..38791c085489d9a1b689c0575afbe7180926634e 100644 (file)
@@ -90,9 +90,9 @@ the processor is simply halted; no reboot takes place.
 This option should be used with caution.
 .It Dv RB_INITNAME
 An option allowing the specification of an init program (see
-.Xr init 8 )
+.Xr launchd 8 )
 other than
-.Pa /sbin/init
+.Pa /sbin/launchd 
 to be run when the system reboots.
 This switch is not currently available.
 .It Dv RB_KDB
@@ -124,7 +124,7 @@ prevents this, booting the system with a single-user shell
 on the console.
 .Dv RB_SINGLE
 is actually interpreted by the
-.Xr init 8
+.Xr launchd 8
 program in the newly booted system.
 .Pp
 When no options are given (i.e.,
index b777106b3f0db2e950e1a9eaf5c7d342d3283d57..251fe51c68f66366cab92304eee3011aa129bff2 100644 (file)
@@ -31,7 +31,7 @@
 .\"
 .\"     @(#)recv.2     8.3 (Berkeley) 2/21/94
 .\"
-.Dd May 15, 2006
+.Dd March 18, 2015
 .Dt RECV 2
 .Os
 .Sh NAME
@@ -94,7 +94,9 @@ The
 function is normally used only on a
 .Em connected
 socket (see
-.Xr connect 2 )
+.Xr connect 2 
+or
+.Xr connectx 2 )
 and is identical to
 .Fn recvfrom
 with a
@@ -258,8 +260,9 @@ The
 field is set on return according to the message received.
 .Dv MSG_EOR
 indicates end-of-record;
-the data returned completed a record (generally used with sockets of type
-.Dv SOCK_SEQPACKET ) .
+the data returned completed a record.
+.\" (generally used with sockets of type
+.\".Dv SOCK_SEQPACKET ) .
 .Dv MSG_TRUNC
 indicates that
 the trailing portion of a datagram was discarded
@@ -311,7 +314,8 @@ An attempt to allocate a memory buffer fails.
 .It Bq Er ENOTCONN
 The socket is associated with a connection-oriented protocol
 and has not been connected (see
-.Xr connect 2
+.Xr connect 2,
+.Xr connectx 2,
 and
 .Xr accept 2 ) .
 .\" ===========
index 456b8dc7bd0b834d7485e390e3cbf3c38b993db7..f141a54374db4f85887ae3affdf22d3d9696ba7b 100644 (file)
@@ -580,7 +580,9 @@ The
 parameter contains an invalid flag or sizeofsearchparams1/2 is greater than
 SEARCHFS_MAX_SEARCHPARMS (see attr.h).  Additionally, filesystems that do
 not support SRCHFS_SKIPLINKS may return EINVAL if this search option
-is requested.
+is requested. EINVAL may also be returned if you request attributes for either
+searching or to be returned for matched entries if the filesystem does not support
+vending that particular attribute.
 .
 .It Bq Er EAGAIN
 The search terminated with partial results, either because 
@@ -609,17 +611,23 @@ An I/O error occurred while reading from or writing to the file system.
 .
 .Sh CAVEATS
 
-The list of attributes that are valid as search criteria currently includes the 
-following list of attributes.  Note that this list is substantially smaller than 
-what is available via 
+The list of attributes valid for searching and returning to the caller may
+be substantially smaller than that of the
 .Xr getattrlist 2
-for a particular filesystem object.  In general, a filesystem that supports 
+system call. See the following lists for the currently available search criteria.
+In general, a filesystem that supports 
 .Fn searchfs
 will typically supply per-item attributes for matched objects that are also 
 supported by the
 .Xr getdirentries 2
 system call.  This varies from filesystem to filesystem.
 
+
+.Sh SEARCH ATTRIBUTES
+
+The list of attributes that are valid as search criteria currently includes the 
+following list of attributes for a particular filesystem object.
+
 .Pp
 .
 .Bl -item -compact
@@ -669,6 +677,97 @@ ATTR_FILE_RSRCLENGTH
 ATTR_FILE_RSRCALLOCSIZE
 .El
 .
+
+.Sh RETURN ATTRIBUTES
+
+As mentioned above, the list of attributes that are available to be returned to the caller
+vary by filesystem, but should include the following attributes, in the following order.
+The buffer should be assumed to be packed similar to the output buffer of the 
+.Xr getattrlist 2
+system call. Note that again, this list may be substantially smaller than what is available via
+.Xr getattrlist 2
+
+.Pp
+.
+.Bl -item -compact
+.It 
+ATTR_CMN_NAME
+.It
+ATTR_CMN_DEVID
+.It
+ATTR_CMN_FSID
+.It
+ATTR_CMN_OBJTYPE
+.It
+ATTR_CMN_OBJTAG
+.It
+ATTR_CMN_OBJID
+.It
+ATTR_CMN_OBJPERMANENTID
+.It
+ATTR_CMN_PAROBJID
+.It
+ATTR_CMN_SCRIPT
+.It
+ATTR_CMN_CRTIME
+.It
+ATTR_CMN_MODTIME
+.It
+ATTR_CMN_CHGTIME
+.It
+ATTR_CMN_ACCTIME
+.It
+ATTR_CMN_BKUPTIME
+.It
+ATTR_CMN_FNDRINFO
+.It
+ATTR_CMN_OWNERID
+.It
+ATTR_CMN_GRPID
+.It
+ATTR_CMN_ACCESSMASK
+.It
+ATTR_CMN_FLAGS
+.It
+ATTR_CMN_USERACCESS
+.It
+ATTR_CMN_FILEID
+.It
+ATTR_CMN_PARENTID
+.Pp
+.
+.It 
+ATTR_DIR_LINKCOUNT
+.It
+ATTR_DIR_ENTRYCOUNT
+.It
+ATTR_DIR_MOUNTSTATUS
+.Pp
+.
+.It
+ATTR_FILE_LINKCOUNT
+.It
+ATTR_FILE_TOTALSIZE
+.It
+ATTR_FILE_ALLOCSIZE
+.It
+ATTR_FILE_IOBLOCKSIZE
+.It
+ATTR_FILE_CLUMPSIZE
+.It
+ATTR_FILE_DEVTYPE
+.It
+ATTR_FILE_DATALENGTH
+.It
+ATTR_FILE_DATAALLOCSIZE
+.It
+ATTR_FILE_RSRCLENGTH
+.It
+ATTR_FILE_RSRCALLOCSIZE
+.El
+.
+
+
 .Sh EXAMPLES
 .
 The following code searches a volume for files of the specified type and creator.
index eeb8aceb8e467e3203afae274e6ee8b9476352ca..fd0833dc1f98f27def260c0086ee38253ddde6da 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)select.2   8.2 (Berkeley) 3/25/94
 .\"
-.Dd March 25, 1994
+.Dd March 18, 2015
 .Dt SELECT 2
 .Os BSD 4.2
 .Sh NAME
@@ -80,7 +80,7 @@
 .Fa "struct timeval *restrict timeout"
 .Fc
 .Sh DESCRIPTION
-.Fn Select
+.Fn select
 examines the I/O descriptor sets whose addresses are passed in
 .Fa readfds ,
 .Fa writefds ,
@@ -102,7 +102,7 @@ On return,
 replaces the given descriptor sets
 with subsets consisting of those descriptors that are ready
 for the requested operation.
-.Fn Select
+.Fn select
 returns the total number of ready descriptors in all the sets.
 .Pp
 The descriptor sets are stored as bit fields in arrays of integers.
@@ -160,7 +160,7 @@ and
 .Fa errorfds
 may be given as nil pointers if no descriptors are of interest.
 .Sh RETURN VALUES
-.Fn Select
+.Fn select
 returns the number of ready descriptors that are contained in
 the descriptor sets,
 or -1 if an error occurred.
@@ -242,6 +242,7 @@ or compile with -D_DARWIN_UNLIMITED_SELECT.
 .Sh SEE ALSO
 .Xr accept 2 ,
 .Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr getdtablesize 2 ,
 .Xr gettimeofday 2 ,
 .Xr read 2 ,
@@ -267,7 +268,7 @@ a larger definition of
 before the inclusion of
 .Aq Pa sys/types.h .
 .Pp
-.Fn Select
+.Fn select
 should probably have been designed to return the time remaining from the
 original timeout, if any, by modifying the time value in place.
 However, it is unlikely this semantic will ever be implemented, as the
index 423e98ae4b3bcd9f06fd96ff300e4970e49dbca9..9f06c7775d5caf0c031e01ca5f42f06ed53d721d 100644 (file)
@@ -142,7 +142,7 @@ descriptors in use.
 .It Bq Er ENAMETOOLONG
 .Fa name
 exceeded
-.Dv SEM_NAME_LEN
+.Dv PSEMNAMLEN
 characters.
 .It Bq Er ENFILE
 Too many semaphores or file descriptors are open on the system.
index 7fc7e9c4dc766212cb68b5a839c1c0ceb3b2ba1e..ba0fbbd4fac0cfae390ad9e1b7555c996312e82d 100644 (file)
@@ -58,7 +58,7 @@ Permission is denied to be remove the semaphore.
 .It Bq Er ENAMETOOLONG
 .Fa name
 exceeded
-.Dv SEM_NAME_LEN
+.Dv PSEMNAMLEN
 characters.
 .It Bq Er ENOENT
 The named semaphore does not exist.
index c0e393ee64a78152babc3175e87f91c8a633dbe6..68898eb3c747b804a552d95c1e6404cc77f7d4df 100644 (file)
@@ -85,6 +85,55 @@ The
 parameter for
 .Fn fsetattrlist
 must be a valid file descriptor for the calling process.
+.
+The list of potentially settable attributes via 
+.Fn setattrlist
+is different than the list of attributes that are accessible via 
+.Fn getattrlist
+In particular, only the following attributes are modifiable via 
+.Fn setattrlist
+and not all of them may be supported on all filesystems.
+.Pp
+.
+.Bl -item -compact
+.It
+ATTR_CMN_SCRIPT
+.It 
+ATTR_CMN_CRTIME
+.It
+ATTR_CMN_MODTIME
+.It
+ATTR_CMN_CHGTIME
+.It
+ATTR_CMN_ACCTIME
+.It
+ATTR_CMN_BKUPTIME
+.It
+ATTR_CMN_FNDRINFO
+.It
+ATTR_CMN_OWNERID
+.It
+ATTR_CMN_GRPID
+.It
+ATTR_CMN_ACCESSMASK
+.It
+ATTR_CMN_FLAGS
+.It
+ATTR_CMN_EXTENDED_SECURITY
+.It
+ATTR_CMN_GRPUUID
+.Pp
+.It
+ATTR_VOL_NAME
+.It
+ATTR_VOL_INFO
+.Pp
+.It
+ATTR_FILE_DEVTYPE
+.El
+.Pp
+.
+.
 You must own the file system object in order to set any of the 
 following attributes: 
 .Pp
@@ -101,15 +150,16 @@ ATTR_CMN_CRTIME
 .It
 ATTR_CMN_MODTIME
 .It
-ATTR_CMN_CHGTIME
-.It
 ATTR_CMN_ACCTIME
+.Pp
+ATTR_CMN_CHGTIME 
+.Fa cannot be set programmatically. Any attempt to set change time is ignored.
 .El
 .Pp
 .
 You must be root (that is, your process's effective UID must be 0) in order to change the 
 .Dv ATTR_CMN_OWNERID
-attribute.
+attribute
 Setting other attributes requires that you have write access to the object.
 .Pp
 .
diff --git a/bsd/man/man2/setlcid.2 b/bsd/man/man2/setlcid.2
deleted file mode 100644 (file)
index a564bfd..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-.\" Copyright (c) 2005 SPARTA, Inc.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.\" Note: The date here should be updated whenever a non-trivial
-.\" change is made to the manual page.
-.Dd May 5, 2005
-.Dt SETLCID 3
-.Os
-.Sh NAME
-.Nm setlcid
-.Nd "set login context"
-.Sh SYNOPSIS
-.In sys/lctx.h 
-.Ft int
-.Fn setlcid "pid_t pid" "pid_t lcid"
-.Sh DESCRIPTION
-The
-.Fn setlcid
-system call sets the login context of the specified process
-.Fa pid
-to the specified
-.Fa lcid .
-If the
-.Fa pid
-is zero, then the call applies to the current process.
-If the
-.Fa lcid
-is zero a new login context will be created.
-If the
-.Fa lcid
-is \-1 the process will be removed from the login context
-it is currently a member of, if any.
-.Pp
-Creation of a new login context is only valid for the current process.
-A process may not create a new login context if it is currently a member
-of one.
-.Pp
-Superuser privilege is required to add or remove a process from
-a login context.
-.Sh RETURN VALUES
-.Rv -std setlcid
-.Sh ERRORS
-The
-.Fn setlcid
-function will fail if:
-.Bl -tag -width Er
-.It Bq Er EPERM
-Operation not permitted.
-.It Bq Er ESRCH
-No such process.
-.It Bq Er ENOMEM
-Cannot allocate memory.
-.It Bq Er ENOATTR
-Attribute not found.
-.El
-.Sh SEE ALSO
-.Xr getlcid 2
-.Sh HISTORY
-The
-.Nm
-manual page
-first appeared in
-.Fx 6.0 .
-.Sh AUTHORS
-This
-manual page was written by
-.An Matthew N. Dodd Aq mdodd@FreeBSD.org .
index 36d82683af7c78ed7a74403555346739ba668557..63ab9783add76aa6180ff2fae9bfde6505c9cb0c 100644 (file)
@@ -65,11 +65,6 @@ If the invoker is not the super-user, then the affected process
 must have the same effective user-id as the invoker or be a descendant
 of the invoking process.
 .Pp
-If the calling process is not already a session leader,
-.Fn setpgrp
-sets the process group ID of the calling process
-to that of the calling process.
-Any new session that this creates will have no controlling terminal.
 .Sh RETURN VALUES
 .Fn Setpgid
 returns 0 when the operation was successful.
index 4ac61004523c9ee37dc406f20bd22d4311f739fa..5110848ab1b47c9f62f691e3837ee789d5db5969 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)shutdown.2 8.1 (Berkeley) 6/4/93
 .\"
-.Dd June 4, 1993
+.Dd March 18, 2015
 .Dt SHUTDOWN 2
 .Os BSD 4.2
 .Sh NAME
@@ -92,6 +92,8 @@ is a file, not a socket.
 .El
 .Sh SEE ALSO
 .Xr connect 2 ,
+.Xr connectx 2 ,
+.Xr disconnectx 2 ,
 .Xr socket 2
 .Sh HISTORY
 The
index 069197f90bc2a53d95b49a67fd724f372d9608ae..9a0b47079c466f4344842130e90f33fe04e6e77b 100644 (file)
@@ -52,7 +52,7 @@ struct  sigaction {
 
 union __sigaction_u {
        void    (*__sa_handler)(int);
-       void    (*__sa_sigaction)(int, struct __siginfo *,
+       void    (*__sa_sigaction)(int, siginfo_t *,
                       void *);
 };
 
index c19161723a70a6a0ecc084dd5ce53d7fe77d260e..4815d2e941c00e6d039821a13059025aec20e505 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)socket.2   8.1 (Berkeley) 6/4/93
 .\"
-.Dd June 4, 1993
+.Dd March 18, 2015
 .Dt SOCKET 2
 .Os 
 .Sh NAME
@@ -48,7 +48,7 @@
 .Fa "int protocol"
 .Fc
 .Sh DESCRIPTION
-.Fn Socket
+.Fn socket
 creates an endpoint for communication and returns a descriptor.
 .Pp
 The
@@ -80,8 +80,8 @@ defined types are:
 SOCK_STREAM
 SOCK_DGRAM
 SOCK_RAW
-SOCK_SEQPACKET
-SOCK_RDM
+.\"SOCK_SEQPACKET
+.\"SOCK_RDM
 .Ed
 .Pp
 A
@@ -94,23 +94,24 @@ A
 socket supports
 datagrams (connectionless, unreliable messages of
 a fixed (typically small) maximum length).
-A
-.Dv SOCK_SEQPACKET
-socket may provide a sequenced, reliable,
-two-way connection-based data transmission path for datagrams
-of fixed maximum length; a consumer may be required to read
-an entire packet with each read system call.
-This facility is protocol specific, and presently implemented
-only for
-.Dv PF_NS .
+.\"A
+.\".Dv SOCK_SEQPACKET
+.\"socket may provide a sequenced, reliable,
+.\"two-way connection-based data transmission path for datagrams
+.\"of fixed maximum length; a consumer may be required to read
+.\"an entire packet with each read system call.
+.\"This facility is protocol specific, and presently implemented
+.\"only for
+.\".Dv PF_NS .
 .Dv SOCK_RAW
 sockets provide access to internal network protocols and interfaces.
-The types
+The type
 .Dv SOCK_RAW ,
-which is available only to the super-user, and
-.Dv SOCK_RDM ,
-which is planned,
-but not yet implemented, are not described here.
+which is available only to the super-user. 
+.\" , and
+.\" .Dv SOCK_RDM ,
+.\" which is planned,
+.\" but not yet implemented, are not described here.
 .Pp
 The
 .Fa protocol
@@ -131,6 +132,8 @@ to pipes.  A stream socket must be in a
 state before any data may be sent or received
 on it.  A connection to another socket is created with a
 .Xr connect 2
+or
+.Xr connectx 2
 call.  Once connected, data may be transferred using
 .Xr read 2
 and
@@ -172,16 +175,16 @@ A
 signal is raised if a process sends
 on a broken stream; this causes naive processes,
 which do not handle the signal, to exit.
-.Pp
-.Dv SOCK_SEQPACKET
-sockets employ the same system calls
-as
-.Dv SOCK_STREAM
-sockets.  The only difference
-is that 
-.Xr read 2
-calls will return only the amount of data requested,
-and any remaining in the arriving packet will be discarded.
+.\" .Pp
+.\" .Dv SOCK_SEQPACKET
+.\" sockets employ the same system calls
+.\" as
+.\" .Dv SOCK_STREAM
+.\" sockets.  The only difference
+.\" is that 
+.\" .Xr read 2
+.\" calls will return only the amount of data requested,
+.\" and any remaining in the arriving packet will be discarded.
 .Pp
 .Dv SOCK_DGRAM
 and
@@ -266,6 +269,8 @@ is necessary.
 .Xr accept 2 ,
 .Xr bind 2 ,
 .Xr connect 2 ,
+.Xr connectx 2 ,
+.Xr disconnectx 2 ,
 .Xr getsockname 2 ,
 .Xr getsockopt 2 ,
 .Xr ioctl 2 ,
index e9acf874f1716401c65f814e2646b5a3c7975a13..7282de5a91b76f2c131cf970d42c87cab042ff70 100644 (file)
@@ -105,10 +105,7 @@ returns information about the link,
 while
 .Fn stat
 returns information about the file the link references.
-Unlike other filesystem objects,
-symbolic links may not have an owner, group, access mode, times, etc.
-Instead, these attributes may be taken from the directory that
-contains the link.
+The attributes cannot be relied on in case of symbolic links.
 In this case, the only attributes returned from an
 .Fn lstat
 that refer to the symbolic link itself are the file type (S_IFLNK),
@@ -259,9 +256,7 @@ system calls.
 .It st_birthtime
 Time of file creation. Only set once when the file is created. This field is 
 only available in the 64 bit inode variants. On filesystems where birthtime is 
-not available, this field holds the
-.Fa ctime
-instead.
+not available, this field is set to 0 (i.e. epoch).
 .El
 .Pp
 The size-related fields of the structures are as follows:
index fe41a2e5316a8135fb74b83b42a124b7b5713260..5d5ebcd8b09f21f6f8c5ce2fbfd3de2b9f896368 100644 (file)
@@ -32,7 +32,7 @@
 .\"     @(#)write.2    8.5 (Berkeley) 4/2/94
 .\" $FreeBSD: src/lib/libc/sys/write.2,v 1.12.2.7 2001/12/14 18:34:02 ru Exp $
 .\"
-.Dd April 2, 1994
+.Dd March 18, 2015 
 .Dt WRITE 2
 .Os
 .Sh NAME
 .Fa "int iovcnt"
 .Fc
 .Sh DESCRIPTION
-.Fn Write
+.Fn write
 attempts to write
 .Fa nbyte
 of data to the object referenced by the descriptor
 .Fa fildes
 from the buffer pointed to by
 .Fa buf .
-.Fn Writev
+.Fn writev
 performs the same action, but gathers the output data
 from the
 .Fa iovcnt
 buffers specified by the members of the
 .Fa iov
 array: iov[0], iov[1], ..., iov[iovcnt\|-\|1].
-.Fn Pwrite
+.Fn pwrite
 performs the same function, but writes to the specified position in
 the file without modifying the file pointer.
 .Pp
@@ -100,7 +100,7 @@ Each
 .Fa iovec
 entry specifies the base address and length of an area
 in memory from which data should be written.
-.Fn Writev
+.Fn writev
 will always write a complete area before proceeding
 to the next.
 .Pp
@@ -255,6 +255,8 @@ The destination is no longer available when writing to a
 .Ux
 domain datagram socket on which
 .Xr connect 2
+or
+.Xr connectx 2
 had been used to set a destination address.
 .\" ===========
 .It Bq Er EINVAL
index 52ba9239bfbaeaa6928f70abaf55d2b349baea90..9ea32c94d45398d266424b47fc86f75933987fd6 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)inet.4     8.1 (Berkeley) 6/5/93
 .\"
-.Dd June 5, 1993
+.Dd March 18, 2015
 .Dt INET 4
 .Os BSD 4.2
 .Sh NAME
@@ -84,7 +84,8 @@ to effect
 .Dq wildcard
 matching on incoming messages. 
 The address in a
-.Xr connect 2
+.Xr connect 2 ,
+.Xr connectx 2
 or
 .Xr sendto 2
 call may be given as
index 34088e51a57805d46a21fb29113ec4ff92c9164b..e4f456335351c149d4cf3b61fba5826e80be7935 100644 (file)
@@ -185,7 +185,6 @@ Messages include:
 #define        RTM_DELETE      0x2    /* Delete Route */
 #define        RTM_CHANGE      0x3    /* Change Metrics, Flags, or Gateway */
 #define        RTM_GET         0x4    /* Report Information */
-#define        RTM_LOOSING     0x5    /* Kernel Suspects Partitioning */
 #define        RTM_REDIRECT    0x6    /* Told to use different route */
 #define        RTM_MISS        0x7    /* Lookup failed on this address */
 #define        RTM_RESOLVE     0xb    /* request to resolve dst to LL addr */
index 05584e2557b27888ab65b7420fa81bf7bbebcf86..9f011e793e0b5e523993419aaa55d5e0e3c4a112 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)tcp.4      8.1 (Berkeley) 6/5/93
 .\"
-.Dd April 16, 2014
+.Dd March 18, 2015
 .Dt TCP 4
 .Os BSD 4.2
 .Sh NAME
@@ -87,6 +87,8 @@ Only passive sockets may use the
 call to accept incoming connections.
 Only active sockets may use the
 .Xr connect 2
+or
+.Xr connectx 2
 call to initiate connections.
 .Pp
 Passive sockets may
@@ -200,7 +202,7 @@ acknowledgement for every other data packet.
 .It Dv TCP_ENABLE_ECN
 Using Explicit Congestion Notification (ECN) on
 .Tn TCP
-allows end-to-end notification of congestion without dropping packets. Conventionally TCP/IP networks signal congestion by dropping packets. When ECN is successfully negotiated, an ECN-aware router may set a mark in the IP header instead of dropping a packet in order to signal impending congestion. The 
+allows bi-directional end-to-end notification of congestion without dropping packets. Conventionally TCP/IP networks signal congestion by dropping packets. When ECN is successfully negotiated, an ECN-aware router may set a mark in the IP header instead of dropping a packet in order to signal impending congestion. The
 .Tn TCP
 receiver of the packet echoes congestion indication to the 
 .Tn TCP
@@ -209,6 +211,15 @@ sender, which reduces it's transmission rate as if it detected a dropped packet.
 The send socket buffer of a
 .Tn TCP sender has unsent and unacknowledged data. This option allows a 
 .Tn TCP sender to control the amount of unsent data kept in the send socket buffer. The value of the option should be the maximum amount of unsent data in bytes. Kevent, poll and select will generate a write notification when the unsent data falls below the amount given by this option. This will allow an application to generate just-in-time fresh updates for real-time communication.
+.It Dv TCP_FASTOPEN
+The TCP listener can set this option to use TCP Fast Open feature. After
+setting this option, an
+.Xr accept 2
+may return a socket that is in SYN_RECEIVED state but is readable and writable.
+.It Dv TCP_CONNECTION_INFO
+This socket option can be used to obtain TCP connection level statistics. The
+"struct tcp_connection_info" defined in <netinet/tcp_var.h> is copied to the
+user buffer.
 .El
 .Pp
 The option level for the
@@ -236,6 +247,8 @@ When a
 .Tn TCP
 socket is set non-blocking, and the connection cannot be established immediately, 
 .Xr connect 2
+or
+.Xr connectx 2
 returns with the error
 .Dv EINPROGRESS ,
 and the connection is established asynchronously.
@@ -255,6 +268,8 @@ can be retrieved via the socket option
 Note that even if the socket is non-blocking, it is possible for the connection 
 to be established immediately. In that case 
 .Xr connect 2
+or
+.Xr connectx 2
 does not return with 
 .Dv EINPROGRESS .
 .Sh DIAGNOSTICS
@@ -290,16 +305,29 @@ address;
 .It Bq Er EINPROGRESS
 returned by
 .Xr connect 2
+or
+.Xr connectx 2
 when the socket is set nonblocking, and the connection cannot be 
 immediately established;
 .It Bq Er EALREADY
 returned by
 .Xr connect 2
+or
+.Xr connectx 2
 when connection request is already in progress for the specified socket.
+.It Bq Er ENODATA
+returned by
+.Xr recv 2
+or
+.Xr send 2
+in case a connection is experiencing a data-stall (probably due to a middlebox issue).
+It is advised that the current connection gets closed by the application and a
+new attempt is being made.
 .
 .El
 .Sh SEE ALSO
 .Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr getsockopt 2 ,
 .Xr kqueue 2 ,
 .Xr poll 2 ,
index c104618fec4aee28bb6727a3e09cb24072d47442..381cc7795abd96079224d12e0aac508cb5eeba68 100644 (file)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)udp.4      8.1 (Berkeley) 6/5/93
 .\"
-.Dd June 5, 1993
+.Dd March 18, 2015 
 .Dt UDP 4
 .Os BSD 4.2
 .Sh NAME
@@ -59,6 +59,8 @@ and
 .Xr recvfrom
 calls, though the
 .Xr connect 2
+or
+.Xr connectx 2
 call may also be used to fix the destination for future
 packets (in which case the 
 .Xr recv 2
@@ -125,6 +127,8 @@ socket with a network address for which no network interface
 exists.
 .El
 .Sh SEE ALSO
+.Xr connect 2 ,
+.Xr connectx 2 ,
 .Xr getsockopt 2 ,
 .Xr recv 2 ,
 .Xr send 2 ,
index 46c7c695b2e3f57d33b744fbdaf4125670965770..163dea89362a685bf30b12b9c1ce2c3a0245dc09 100644 (file)
@@ -10,6 +10,12 @@ include $(MakeInc_def)
 DATAFILES = \
        devfs.h 
 
+KERNELFILES = \
+       devfs.h \
+       fdesc.h \
+       devfs_proto.h \
+       devfsdefs.h
+
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = miscfs/devfs
@@ -18,12 +24,10 @@ INSTALL_KF_MI_LIST = ${DATAFILES}
 
 INSTALL_KF_MI_LCL_LIST = ${DATAFILES} devfs_proto.h devfsdefs.h
 
-EXPORT_MI_LIST = ${DATAFILES} fdesc.h devfs_proto.h devfsdefs.h
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR =        miscfs/devfs
 
-INSTALL_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
-
 include $(MakeInc_rule)
 include $(MakeInc_dir)
 
index 45d529e8d2c001dbd1b098cd8d5d069fac08484d..21912549a05ec9c325c7c5c1015cd87395283a3a 100644 (file)
@@ -1156,7 +1156,16 @@ retry:
 
                DEVFS_UNLOCK();
 
-               error = vnode_getwithvid(vn_p, vid);
+               /*
+                * We want to use the drainok variant of vnode_getwithvid
+                * because we _don't_ want to get an iocount if the vnode is
+                * is blocked in vnode_drain as it can cause infinite
+                * loops in vn_open_auth. While in use vnodes are typically
+                * only reclaimed on forced unmounts, In use devfs tty vnodes
+                * can  be quite frequently reclaimed by revoke(2) or by the
+                * exit of a controlling process.
+                */
+               error = vnode_getwithvid_drainok(vn_p, vid);
 
                DEVFS_LOCK();
 
@@ -1180,22 +1189,25 @@ retry:
                         * vnode.  Therefore, ENOENT is a valid result.
                         */
                        error = ENOENT;
+               } else if (error == ENODEV) {
+                       /*
+                        * The Filesystem is getting unmounted.
+                        */
+                       error = ENOENT;
                } else if (error && (nretries < DEV_MAX_VNODE_RETRY)) {
                        /*
                         * If we got an error from vnode_getwithvid, it means
                         * we raced with a recycle and lost i.e. we asked for
-                        * an iocount only after vnode_drain had completed on
-                        * the vnode and returned with an error only after
-                        * devfs_reclaim was called on the vnode. While
-                        * devfs_reclaim sets dn_vn to NULL but while we were
-                        * waiting to reacquire DEVFS_LOCK, another vnode might
-                        * have gotten associated with the dnp. In either case,
-                        * we need to retry otherwise we will end up returning
-                        * an ENOENT for this lookup but the next lookup will
-                        * succeed because it creates a new vnode (or a racing
-                        * lookup created a new vnode already).
-                        *
-                        * We cap the number of retries at 8.
+                        * an iocount only after vnode_drain had been entered
+                        * for the vnode and returned with an error only after
+                        * devfs_reclaim was called on the vnode.  devfs_reclaim
+                        * sets dn_vn to NULL but while we were waiting to
+                        * reacquire DEVFS_LOCK, another vnode might have gotten
+                        * associated with the dnp. In either case, we need to
+                        * retry otherwise we will end up returning an ENOENT
+                        * for this lookup but the next lookup will  succeed
+                        * because it creates a new vnode (or a racing  lookup
+                        * created a new vnode already).
                         */
                        error = 0;
                        nretries++;
index 6ad89993971ceebb797d45af69ceb95ab1fd9b26..f8406a25194029d3e9945c86d809937dc492666e 100644 (file)
@@ -182,7 +182,7 @@ devfs_mount(struct mount *mp, __unused vnode_t devvp, __unused user_addr_t data,
        /*-
         *  Fill out some fields
         */
-       mp->mnt_data = (qaddr_t)devfs_mp_p;
+       __IGNORE_WCASTALIGN(mp->mnt_data = (qaddr_t)devfs_mp_p);
        mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p;
        mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
        mp->mnt_flag |= MNT_LOCAL;
index 2694ee502a841ee81f72e42d345e72c4dd5d782d..14f1db88979c573bbe2f6352e77a78fc6bbb987b 100644 (file)
@@ -7,10 +7,10 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-DATAFILES = \
+KERNELFILES = \
        fifo.h
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR =        miscfs/fifofs
 
index 23c596618a262f16b556e85df9b19569a70d9dde..62cc11fed13b85cc7c737ce202219acec79b4f8d 100644 (file)
@@ -327,6 +327,8 @@ fifo_read(struct vnop_read_args *ap)
        if (error != EWOULDBLOCK) {
                error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0,
                                                (struct mbuf **)0, &rflags);
+               if (error == 0 && ap->a_vp->v_knotes.slh_first != NULL)
+                       KNOTE(&ap->a_vp->v_knotes, 0);
        }
        else {
                /* clear EWOULDBLOCK and return EOF (zero) */
@@ -358,6 +360,8 @@ fifo_write(struct vnop_write_args *ap)
 #endif
        error = sosend(wso, (struct sockaddr *)0, ap->a_uio, NULL,
                       (struct mbuf *)0, (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0);
+       if (error == 0 && ap->a_vp->v_knotes.slh_first != NULL)
+               KNOTE(&ap->a_vp->v_knotes, 0);
 
        return (error);
 }
index 2394edf466a1fe7641fcf06c3e2e783188efa783..109c5fc29fdd4a8a1e7832d44882a23ebff4136b 100644 (file)
@@ -10,11 +10,14 @@ include $(MakeInc_def)
 DATAFILES = \
        specdev.h
 
+KERNELFILES = \
+       ${DATAFILES}
+
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = miscfs/specfs
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR =        miscfs/specfs
 
index fe762c21fa36237187ba40beaacba3d90554fe44..fd79c99fa304d08d732784ab4055fe871416dba9 100644 (file)
@@ -218,6 +218,7 @@ struct _throttle_io_info_t {
        int32_t throttle_refcnt;
        int32_t throttle_alloc;
        int32_t throttle_disabled;
+       int32_t throttle_is_fusion_with_priority;
 };
 
 struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
@@ -661,7 +662,7 @@ spec_ioctl(struct vnop_ioctl_args *ap)
        int     retval = 0;
 
        KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_START,
-                             (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, (unsigned int)ap->a_vp->v_type, 0);
+               dev, ap->a_command, ap->a_fflag, ap->a_vp->v_type, 0);
 
        switch (ap->a_vp->v_type) {
 
@@ -681,8 +682,14 @@ spec_ioctl(struct vnop_ioctl_args *ap)
                                extent = unmap->extents;
 
                                for (i = 0; i < unmap->extentsCount; i++, extent++) {
-                                       KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev, extent->offset/ap->a_vp->v_specsize, extent->length, 0, 0);
+                                       KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev,    
+                                               extent->offset/ap->a_vp->v_specsize, extent->length, 0, 0);
                                }
+                       } else if (ap->a_command == DKIOCSYNCHRONIZE) {
+                               dk_synchronize_t *synch;
+                               synch = (dk_synchronize_t *)ap->a_data;
+                               KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 1) | DBG_FUNC_NONE, dev, ap->a_command,                             
+                                       synch->options, 0, 0);
                        }
                }
                retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, p);
@@ -693,7 +700,7 @@ spec_ioctl(struct vnop_ioctl_args *ap)
                /* NOTREACHED */
        }
        KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_END,
-                             (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, retval, 0);
+               dev, ap->a_command, ap->a_fflag, retval, 0);
 
        return (retval);
 }
@@ -721,23 +728,20 @@ int
 spec_kqfilter(vnode_t vp, struct knote *kn)
 {
        dev_t dev;
-       int err = EINVAL;
+       int err;
+
+       assert(vnode_ischr(vp));
 
-       /*
-        * For a few special kinds of devices, we can attach knotes.
-        * Each filter function must check whether the dev type matches it.
-        */
        dev = vnode_specrdev(vp);
 
-       if (vnode_istty(vp)) {
-               /* We can hook into TTYs... */
-               err = filt_specattach(kn);
-       } else {
 #if NETWORKING
-               /* Try a bpf device, as defined in bsd/net/bpf.c */
-               err = bpfkqfilter(dev, kn);
-#endif
+       /* Try a bpf device, as defined in bsd/net/bpf.c */
+       if ((err = bpfkqfilter(dev, kn)) == 0) {
+               return err;
        }
+#endif
+       /* Try to attach to other char special devices */
+       err = filt_specattach(kn);
 
        return err;
 }
@@ -1207,7 +1211,7 @@ throttle_init_throttle_period(struct _throttle_io_info_t *info, boolean_t isssd)
         */
 
        /* Assign global defaults */
-       if (isssd == TRUE)
+       if ((isssd == TRUE) && (info->throttle_is_fusion_with_priority == 0))
                info->throttle_io_periods = &throttle_io_period_ssd_msecs[0];
        else
                info->throttle_io_periods = &throttle_io_period_msecs[0];
@@ -1274,6 +1278,7 @@ throttle_init(void)
                }
                info->throttle_next_wake_level = THROTTLE_LEVEL_END;
                info->throttle_disabled = 0;
+               info->throttle_is_fusion_with_priority = 0;
        }
 #if CONFIG_IOSCHED
        if (PE_parse_boot_argn("iosched", &iosched, sizeof(iosched))) {
@@ -1610,6 +1615,12 @@ throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp)
        else
                info = mp->mnt_throttle_info;
 
+       if (info->throttle_is_fusion_with_priority) {
+               uthread_t ut = get_bsdthread_info(current_thread());
+               if (ut->uu_lowpri_window == 0)
+                       return (THROTTLE_DISENGAGED);
+       }
+
        if (info->throttle_disabled)
                return (THROTTLE_DISENGAGED);
        else
@@ -1762,6 +1773,9 @@ void throttle_info_reset_window(uthread_t ut)
 {
        struct _throttle_io_info_t *info;
 
+       if (ut == NULL) 
+               ut = get_bsdthread_info(current_thread());
+
        if ( (info = ut->uu_throttle_info) ) {
                throttle_info_rel(info);
 
@@ -1892,7 +1906,7 @@ void throttle_info_update_by_mask(void *throttle_info_handle, int flags)
  * support I/O scheduling.
  */
 
-void throttle_info_disable_throttle(int devno)
+void throttle_info_disable_throttle(int devno, boolean_t isfusion)
 {
        struct _throttle_io_info_t *info;
 
@@ -1900,7 +1914,13 @@ void throttle_info_disable_throttle(int devno)
                panic("Illegal devno (%d) passed into throttle_info_disable_throttle()", devno);
 
        info = &_throttle_io_info[devno];
-       info->throttle_disabled = 1;
+       // don't disable software throttling on devices that are part of a fusion device
+       // and override the software throttle periods to use HDD periods
+       if (isfusion) {
+               info->throttle_is_fusion_with_priority = isfusion;
+               throttle_init_throttle_period(info, FALSE);
+       }
+       info->throttle_disabled = !info->throttle_is_fusion_with_priority;
        return;
 } 
 
@@ -2368,15 +2388,20 @@ filt_specattach(struct knote *kn)
                return ENXIO;
        }
 
-       if ((cdevsw_flags[major(dev)] & CDEVSW_SELECT_KQUEUE) == 0) {
+       /*
+        * For a few special kinds of devices, we can attach knotes with
+        * no restrictions because their "select" vectors return the amount
+        * of data available.  Others require an explicit NOTE_LOWAT with
+        * data of 1, indicating that the caller doesn't care about actual
+        * data counts, just an indication that the device has data.
+        */
+
+       if ((cdevsw_flags[major(dev)] & CDEVSW_SELECT_KQUEUE) == 0 &&
+           ((kn->kn_sfflags & NOTE_LOWAT) == 0 || kn->kn_sdata != 1)) {
                return EINVAL;
        }
 
-       /* Resulting wql is safe to unlink even if it has never been linked */
-       kn->kn_hook = wait_queue_link_allocate();
-       if (kn->kn_hook == NULL) {
-               return EAGAIN;
-       }
+       kn->kn_hook_data = 0;
 
        kn->kn_fop = &spec_filtops;
        kn->kn_hookid = vnode_vid(vp);
@@ -2389,21 +2414,24 @@ filt_specattach(struct knote *kn)
 static void 
 filt_specdetach(struct knote *kn)
 {
-       kern_return_t ret;
+       knote_clearstayqueued(kn);
 
-       /* 
-        * Given wait queue link and wait queue set, unlink.  This is subtle.
-        * If the device has been revoked from under us, selclearthread() will
-        * have removed our link from the kqueue's wait queue set, which 
-        * wait_queue_set_unlink_one() will detect and handle.
+       /*
+        * This is potentially tricky: the device's selinfo waitq that was
+        * tricked into being part of this knote's waitq set may not be a part
+        * of any other set, and the device itself may have revoked the memory
+        * in which the waitq was held. We use the knote's kn_hook_data field
+        * to keep the ID of the waitq's prepost table object. This
+        * object keeps a pointer back to the waitq, and gives us a safe way
+        * to decouple the dereferencing of driver allocated memory: if the
+        * driver goes away (taking the waitq with it) then the prepost table
+        * object will be invalidated. The waitq details are handled in the
+        * waitq API invoked here.
         */
-       ret = wait_queue_set_unlink_one(kn->kn_kq->kq_wqs, kn->kn_hook);
-       if (ret != KERN_SUCCESS) {
-               panic("filt_specdetach(): failed to unlink wait queue link.");
+       if (kn->kn_hook_data) {
+               waitq_unlink_by_prepost_id(kn->kn_hook_data, kn->kn_kq->kq_wqs);
+               kn->kn_hook_data = 0;
        }
-       knote_clearstayqueued(kn);
-       (void)wait_queue_link_free(kn->kn_hook);
-       kn->kn_hook = NULL;
 }
 
 static int 
@@ -2411,15 +2439,15 @@ filt_spec(struct knote *kn, long hint)
 {
        vnode_t vp;
        uthread_t uth;
-       wait_queue_set_t old_wqs;
+       struct waitq_set *old_wqs;
        vfs_context_t ctx;
        int selres;
        int error;
        int use_offset;
        dev_t dev;
        uint64_t flags;
-
-       assert(kn->kn_hook != NULL);
+       uint64_t rsvd, rsvd_arg;
+       uint64_t *rlptr = NULL;
 
        if (hint != 0) {
                panic("filt_spec(): nonzero hint?");
@@ -2438,14 +2466,60 @@ filt_spec(struct knote *kn, long hint)
        dev = vnode_specrdev(vp);
        flags = cdevsw_flags[major(dev)];
        use_offset = ((flags & CDEVSW_USE_OFFSET) != 0);
-       assert((flags & CDEVSW_SELECT_KQUEUE) != 0);
 
-       /* Trick selrecord() into hooking kqueue's wait queue set into device wait queue */
+       /*
+        * This function may be called many times to link or re-link the
+        * underlying vnode to the kqueue.  If we've already linked the two,
+        * we will have a valid kn_hook_data which ties us to the underlying
+        * device's waitq via a the waitq's prepost table object. However,
+        * devices can abort any select action by calling selthreadclear().
+        * This is OK because the table object will be invalidated by the
+        * driver (through a call to selthreadclear), so any attempt to access
+        * the associated waitq will fail because the table object is invalid.
+        *
+        * Even if we've already registered, we need to pass a pointer
+        * to a reserved link structure. Otherwise, selrecord() will
+        * infer that we're in the second pass of select() and won't
+        * actually do anything!
+        */
+       rsvd = rsvd_arg = waitq_link_reserve(NULL);
+       rlptr = (void *)&rsvd_arg;
+
+       /*
+        * Trick selrecord() into hooking kqueue's wait queue set
+        * set into device's selinfo wait queue
+        */
        old_wqs = uth->uu_wqset;
        uth->uu_wqset = kn->kn_kq->kq_wqs;
-       selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), 0, kn->kn_hook, ctx);
+       selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter),
+                            0, rlptr, ctx);
        uth->uu_wqset = old_wqs;
 
+       /*
+        * make sure to cleanup the reserved link - this guards against
+        * drivers that may not actually call selrecord().
+        */
+       waitq_link_release(rsvd);
+       if (rsvd != rsvd_arg) {
+               /* the driver / handler called selrecord() */
+               struct waitq *wq;
+               memcpy(&wq, rlptr, sizeof(void *));
+
+               /*
+                * The waitq_get_prepost_id() function will (potentially)
+                * allocate a prepost table object for the waitq and return
+                * the table object's ID to us.  It will also set the
+                * waitq_prepost_id field within the waitq structure.
+                *
+                * We can just overwrite kn_hook_data because it's simply a
+                * table ID used to grab a reference when needed.
+                *
+                * We have a reference on the vnode, so we know that the
+                * device won't go away while we get this ID.
+                */
+               kn->kn_hook_data = waitq_get_prepost_id(wq);
+       }
+
        if (use_offset) {
                if (kn->kn_fp->f_fglob->fg_offset >= (uint32_t)selres) {
                        kn->kn_data = 0;
@@ -2458,6 +2532,9 @@ filt_spec(struct knote *kn, long hint)
 
        vnode_put(vp);
 
+       if ((kn->kn_sfflags & NOTE_LOWAT) != 0)
+               return (kn->kn_data >= kn->kn_sdata);
+
        return (kn->kn_data != 0);
 }
 
@@ -2466,9 +2543,11 @@ filt_specpeek(struct knote *kn)
 {
        vnode_t vp;
        uthread_t uth;
-       wait_queue_set_t old_wqs;
+       struct waitq_set *old_wqs;
        vfs_context_t ctx;
        int error, selres;
+       uint64_t rsvd, rsvd_arg;
+       uint64_t *rlptr = NULL;
        
        uth = get_bsdthread_info(current_thread());
        ctx = vfs_context_current();
@@ -2480,13 +2559,45 @@ filt_specpeek(struct knote *kn)
        }
 
        /*
-        * Why pass the link here?  Because we may not have registered in the past...
+        * Even if we've already registered, we need to pass a pointer
+        * to a reserved link structure. Otherwise, selrecord() will
+        * infer that we're in the second pass of select() and won't
+        * actually do anything!
         */
+       rsvd = rsvd_arg = waitq_link_reserve(NULL);
+       rlptr = (void *)&rsvd_arg;
+
        old_wqs = uth->uu_wqset;
        uth->uu_wqset = kn->kn_kq->kq_wqs;
-       selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), 0, kn->kn_hook, ctx);
+       selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter),
+                            0, (void *)rlptr, ctx);
        uth->uu_wqset = old_wqs;
 
+       /*
+        * make sure to cleanup the reserved link - this guards against
+        * drivers that may not actually call selrecord()
+        */
+       waitq_link_release(rsvd);
+       if (rsvd != rsvd_arg) {
+               /* the driver / handler called selrecord() */
+               struct waitq *wq;
+               memcpy(&wq, rlptr, sizeof(void *));
+
+               /*
+                * The waitq_get_prepost_id() function will (potentially)
+                * allocate a prepost table object for the waitq and return
+                * the table object's ID to us.  It will also set the
+                * waitq_prepost_id field within the waitq structure.
+                *
+                * We can just overwrite kn_hook_data because it's simply a
+                * table ID used to grab a reference when needed.
+                *
+                * We have a reference on the vnode, so we know that the
+                * device won't go away while we get this ID.
+                */
+               kn->kn_hook_data = waitq_get_prepost_id(wq);
+       }
+
        vnode_put(vp);
        return selres;
 }
index b0ef42982f95ac3660ff20788b083567f3c1bc76..773b2cd0001cec65326942addfb900aaf69a84c2 100644 (file)
@@ -10,11 +10,14 @@ include $(MakeInc_def)
 DATAFILES = \
        union.h
 
+KERNELFILES = \
+       ${DATAFILES}
+
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = miscfs/union
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR = miscfs/union
 
index bdb100c29f29e9c7bef4746c4301b4b326ba184b..93855776eacead9a8e9055fbf72e2ad2f517e048 100644 (file)
@@ -11,12 +11,21 @@ INSTINC_SUBDIRS = \
        altq classq pktsched
 
 DATAFILES= \
-       bpf.h dlil.h \
-       ethernet.h if.h if_arp.h \
-       if_dl.h if_llc.h if_media.h if_mib.h \
-       if_types.h if_var.h \
+       bpf.h \
+       dlil.h \
+       ethernet.h \
+       if.h \
+       if_arp.h \
+       if_dl.h \
+       if_llc.h \
+       if_media.h \
+       if_mib.h \
+       if_types.h \
+       if_var.h \
        if_utun.h \
-       kext_net.h ndrv.h pfkeyv2.h \
+       kext_net.h \
+       ndrv.h \
+       pfkeyv2.h \
        route.h
 
 KERNELFILES= \
@@ -24,17 +33,47 @@ KERNELFILES= \
        if_ether.h init.h radix.h
 
 PRIVATE_DATAFILES = \
-       if_vlan_var.h if_ppp.h firewire.h \
-       ppp_defs.h radix.h if_bond_var.h if_bond_internal.h lacp.h ndrv_var.h \
-       netsrc.h raw_cb.h etherdefs.h if_pflog.h pfvar.h \
-       if_bridgevar.h ntstat.h iptap.h if_llreach.h \
-       if_utun_crypto.h if_utun_crypto_ipsec.h if_utun_crypto_dtls.h \
-       pktap.h if_ipsec.h necp.h content_filter.h packet_mangler.h
+       bpf.h \
+       content_filter.h \
+       etherdefs.h \
+       firewire.h \
+       if.h \
+       if_bond_var.h \
+       if_bond_internal.h \
+       if_bridgevar.h \
+       if_ipsec.h \
+       if_llreach.h \
+       if_media.h \
+       if_mib.h \
+       if_pflog.h \
+       if_ppp.h \
+       if_utun.h \
+       if_utun_crypto.h \
+       if_utun_crypto_ipsec.h \
+       if_utun_crypto_dtls.h \
+       if_var.h \
+       if_vlan_var.h \
+       iptap.h \
+       lacp.h \
+       ndrv_var.h \
+       necp.h \
+       netsrc.h \
+       network_agent.h \
+       ntstat.h \
+       packet_mangler.h \
+       pfkeyv2.h \
+       pfvar.h \
+       pktap.h \
+       ppp_defs.h \
+       radix.h \
+       raw_cb.h \
+       route.h \
+       net_perf.h
 
 PRIVATE_KERNELFILES = $(filter-out radix.h,${KERNELFILES}) \
        bpfdesc.h ppp_comp.h \
        zlib.h bpf_compat.h net_osdep.h \
-       flowadv.h
+       flowadv.h net_perf.h
 
 INSTALL_MI_LIST        = ${DATAFILES}
 
@@ -44,9 +83,9 @@ EXPORT_MI_LIST        = ${INSTALL_MI_LIST} ${KERNELFILES}
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
-INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES})
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 6a330882f618c496da79903cbf9d18483afa26e0..c20cf04be09579a92062674b511ff862c59ffb5f 100644 (file)
@@ -23,9 +23,9 @@ EXPORT_MI_LIST        = ${INSTALL_MI_LIST} ${KERNELFILES}
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
-INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 1383cbb80f7eba3d12b93b19af1a2f0ffb0b908f..f98100d2b739f8cbee0d93c998f3b97a49c9aa5c 100644 (file)
@@ -160,6 +160,10 @@ static unsigned int bpf_wantpktap = 0;
 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
        &bpf_wantpktap, 0, "");
 
+static int bpf_debug = 0;
+SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &bpf_debug, 0, "");
+
 /*
  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
@@ -191,24 +195,25 @@ static mbuf_tag_id_t bpf_mtag_id;
 
 static int     bpf_allocbufs(struct bpf_d *);
 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
-static void    bpf_detachd(struct bpf_d *d);
+static int     bpf_detachd(struct bpf_d *d, int);
 static void    bpf_freed(struct bpf_d *);
 static void    bpf_mcopy(const void *, void *, size_t);
 static int     bpf_movein(struct uio *, int,
                    struct mbuf **, struct sockaddr *, int *);
-static int     bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt, dev_t);
+static int     bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
 static void    bpf_timed_out(void *, void *);
 static void    bpf_wakeup(struct bpf_d *);
 static void    catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int,
                    u_int, int, void (*)(const void *, void *, size_t));
 static void    reset_d(struct bpf_d *);
-static int     bpf_setf(struct bpf_d *, u_int , user_addr_t , dev_t, u_long);
+static int     bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
 static int     bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
-static int     bpf_setdlt(struct bpf_d *, u_int, dev_t);
+static int     bpf_setdlt(struct bpf_d *, u_int);
 static int     bpf_set_traffic_class(struct bpf_d *, int);
 static void    bpf_set_packet_service_class(struct mbuf *, int);
 
-/*static  void *bpf_devfs_token[MAXBPFILTER];*/
+static void    bpf_acquire_d(struct bpf_d *);
+static void    bpf_release_d(struct bpf_d *);
 
 static  int bpf_devsw_installed;
 
@@ -485,7 +490,13 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
        d->bd_bif = bp;
        d->bd_next = bp->bif_dlist;
        bp->bif_dlist = d;
-       
+
+       /*
+        * Take a reference on the device even if an error is returned
+        * because we keep the device in the interface's list of listeners
+        */
+       bpf_acquire_d(d);
+
        if (first) {
                /* Find the default bpf entry for this ifp */
                if (bp->bif_ifp->if_bpf == NULL) {
@@ -515,6 +526,11 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
                        error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT);
        }
 
+       /*
+        * Reset the detach flags in case we previously detached an interface
+        */
+       d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
+
        if (bp->bif_ifp->if_bpf != NULL &&
                bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP)
                d->bd_flags |= BPF_FINALIZE_PKTAP;
@@ -526,17 +542,34 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 
 /*
  * Detach a file from its interface.
+ *
+ * Return 1 if was closed by some thread, 0 otherwise
  */
-static void
-bpf_detachd(struct bpf_d *d)
+static int
+bpf_detachd(struct bpf_d *d, int closing)
 {
        struct bpf_d **p;
        struct bpf_if *bp;
        struct ifnet  *ifp;
 
+       /*
+        * Some other thread already detached
+        */
+       if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
+               goto done;
+       /*
+        * This thread is doing the detach
+        */
+       d->bd_flags |= BPF_DETACHING;
+
        ifp = d->bd_bif->bif_ifp;
        bp = d->bd_bif;
-       
+
+       if (bpf_debug != 0)
+               printf("%s: %llx %s%s\n",
+                   __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
+                   if_name(ifp), closing ? " closing" : "");
+
        /* Remove d from the interface's descriptor list. */
        p = &bp->bif_dlist;
        while (*p != d) {
@@ -576,10 +609,37 @@ bpf_detachd(struct bpf_d *d)
                         * take it out.
                         * Most likely the network interface is gone.
                         */
-                       printf("bpf: ifnet_set_promiscuous failed");
+                       printf("%s: ifnet_set_promiscuous failed\n", __func__);
                }
                lck_mtx_lock(bpf_mlock);
        }
+
+       /*
+        * Wake up other thread that are waiting for this thread to finish
+        * detaching
+        */
+       d->bd_flags &= ~BPF_DETACHING;
+       d->bd_flags |= BPF_DETACHED;
+       /*
+        * Note that We've kept the reference because we may have dropped
+        * the lock when turning off promiscuous mode
+        */
+       bpf_release_d(d);
+
+done:
+       /*
+        * When closing makes sure no other thread refer to the bpf_d
+        */
+       if (bpf_debug != 0)
+               printf("%s: %llx done\n",
+                   __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
+       /*
+        * Let the caller know the bpf_d is closed
+        */
+       if ((d->bd_flags & BPF_CLOSING))
+               return (1);
+       else
+               return (0);
 }
 
 
@@ -634,7 +694,42 @@ bpf_stop_timer(struct bpf_d *d)
        return (thread_call_cancel(d->bd_thread_call));
 }
 
+void
+bpf_acquire_d(struct bpf_d *d)
+{
+       void *lr_saved =  __builtin_return_address(0);
+
+       lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED);
+
+       d->bd_refcnt += 1;
+
+       d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
+       d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
+}
+
+void
+bpf_release_d(struct bpf_d *d)
+{
+       void *lr_saved =  __builtin_return_address(0);
+
+       lck_mtx_assert(bpf_mlock, LCK_MTX_ASSERT_OWNED);
+
+       if (d->bd_refcnt <= 0)
+               panic("%s: %p refcnt <= 0", __func__, d);
+
+       d->bd_refcnt -= 1;
 
+       d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
+       d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
+
+       if (d->bd_refcnt == 0) {
+               /* Assert the device is detached */
+               if ((d->bd_flags & BPF_DETACHED) == 0)
+                       panic("%s: %p BPF_DETACHED not set", __func__, d);
+
+               _FREE(d, M_DEVBUF);
+       }
+}
 
 /*
  * Open ethernet device.  Returns ENXIO for illegal minor device number,
@@ -678,7 +773,8 @@ bpfopen(dev_t dev, int flags, __unused int fmt,
                lck_mtx_unlock(bpf_mlock);
                return (EBUSY);
        }
-       d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT);
+       d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
+           M_WAIT | M_ZERO);
        if (d == NULL) {
                /* this really is a catastrophic failure */
                printf("bpfopen: malloc bpf_d failed\n");
@@ -686,32 +782,28 @@ bpfopen(dev_t dev, int flags, __unused int fmt,
                lck_mtx_unlock(bpf_mlock);
                return ENOMEM;
        }
-       bzero(d, sizeof(struct bpf_d));
-       
-       /*
-        * It is not necessary to take the BPF lock here because no other 
-        * thread can access the device until it is marked opened...
-        */
-       
+
        /* Mark "in use" and do most initialization. */
+       bpf_acquire_d(d);
        d->bd_bufsize = bpf_bufsize;
        d->bd_sig = SIGIO;
        d->bd_seesent = 1;
        d->bd_oflags = flags;
        d->bd_state = BPF_IDLE;
-       d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
        d->bd_traffic_class = SO_TC_BE;
+       d->bd_flags |= BPF_DETACHED;
        if (bpf_wantpktap)
                d->bd_flags |= BPF_WANT_PKTAP;
        else
                d->bd_flags &= ~BPF_WANT_PKTAP;
-
+       d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
        if (d->bd_thread_call == NULL) {
                printf("bpfopen: malloc thread call failed\n");
                bpf_dtab[minor(dev)] = NULL;
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
-               _FREE(d, M_DEVBUF);
-               return ENOMEM;
+
+               return (ENOMEM);
        }
 #if CONFIG_MACF_NET
        mac_bpfdesc_label_init(d);
@@ -741,7 +833,17 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt,
        if (d == 0 || d == (void *)1) {
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
-       }       
+       }
+
+       /*
+        * Other threads may call bpd_detachd() if we drop the bpf_mlock
+        */
+       d->bd_flags |= BPF_CLOSING;
+
+       if (bpf_debug != 0)
+               printf("%s: %llx\n",
+                   __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
+
        bpf_dtab[minor(dev)] = (void *)1;               /* Mark closing */
 
        /*
@@ -799,7 +901,7 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt,
        }
 
        if (d->bd_bif)
-               bpf_detachd(d);
+               bpf_detachd(d, 1);
        selthreadclear(&d->bd_sel);
 #if CONFIG_MACF_NET
        mac_bpfdesc_label_destroy(d);
@@ -813,10 +915,11 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 
        /* Mark free in same context as bpfopen comes to check */
        bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
+
+       bpf_release_d(d);
+
        lck_mtx_unlock(bpf_mlock);
-       
-       _FREE(d, M_DEVBUF);
-       
+
        return (0);
 }
 
@@ -844,8 +947,10 @@ bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
                panic("rotating bpf buffers during read"); \
        (d)->bd_hbuf = (d)->bd_sbuf; \
        (d)->bd_hlen = (d)->bd_slen; \
+       (d)->bd_hcnt = (d)->bd_scnt; \
        (d)->bd_sbuf = (d)->bd_fbuf; \
        (d)->bd_slen = 0; \
+       (d)->bd_scnt = 0; \
        (d)->bd_fbuf = NULL;
 /*
  *  bpfread - read next chunk of packets from buffers
@@ -862,16 +967,19 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
        lck_mtx_lock(bpf_mlock);
 
        d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
 
+       bpf_acquire_d(d);
+
        /*
         * Restrict application to use a buffer the same size as
         * as kernel buffers.
         */
        if (uio_resid(uio) != d->bd_bufsize) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (EINVAL);
        }
@@ -884,9 +992,9 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
 
        while (d->bd_hbuf_read) 
                msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
-       
-       d = bpf_dtab[minor(dev)]; 
-       if (d == 0 || d == (void *)1) {
+
+       if ((d->bd_flags & BPF_CLOSING) != 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
@@ -918,10 +1026,12 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
                 * it before using it again.
                 */
                if (d->bd_bif == NULL) {
+                       bpf_release_d(d);
                        lck_mtx_unlock(bpf_mlock);
                        return (ENXIO);
                }
                if (ioflag & IO_NDELAY) {
+                       bpf_release_d(d);
                        lck_mtx_unlock(bpf_mlock);
                        return (EWOULDBLOCK);
                }
@@ -930,8 +1040,8 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
                /*
                 * Make sure device is still opened
                 */
-               d = bpf_dtab[minor(dev)];
-               if (d == 0 || d == (void *)1) {
+               if ((d->bd_flags & BPF_CLOSING) != 0) {
+                       bpf_release_d(d);
                        lck_mtx_unlock(bpf_mlock);
                        return (ENXIO);
                }
@@ -939,8 +1049,8 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
                while (d->bd_hbuf_read)
                        msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 
-               d = bpf_dtab[minor(dev)];
-               if (d == 0 || d == (void *)1) {
+               if ((d->bd_flags & BPF_CLOSING) != 0) {
+                       bpf_release_d(d);
                        lck_mtx_unlock(bpf_mlock);
                        return (ENXIO);
                }
@@ -965,6 +1075,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
                                ROTATE_BUFFERS(d);
                                break;
                        }
+                       bpf_release_d(d);
                        lck_mtx_unlock(bpf_mlock);
                        return (error);
                }
@@ -983,6 +1094,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
                                break;
 
                        if (d->bd_slen == 0) {
+                               bpf_release_d(d);
                                lck_mtx_unlock(bpf_mlock);
                                return (0);
                        }
@@ -1091,8 +1203,8 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
        /*
         * Make sure device is still opened
         */
-       d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       if ((d->bd_flags & BPF_CLOSING) != 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
@@ -1101,7 +1213,10 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
        d->bd_fbuf = d->bd_hbuf;
        d->bd_hbuf = NULL;
        d->bd_hlen = 0;
+       d->bd_hcnt = 0;
        wakeup((caddr_t)d);
+
+       bpf_release_d(d);
        lck_mtx_unlock(bpf_mlock);
        return (error);
 
@@ -1123,11 +1238,8 @@ bpf_wakeup(struct bpf_d *d)
                pgsigio(d->bd_sigio, d->bd_sig);
 
        selwakeup(&d->bd_sel);
-       KNOTE(&d->bd_sel.si_note, 1);
-#ifndef __APPLE__
-       /* XXX */
-       d->bd_sel.si_pid = 0;
-#endif
+       if ((d->bd_flags & BPF_KNOTE))
+               KNOTE(&d->bd_sel.si_note, 1);
 }
 
 
@@ -1178,11 +1290,15 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
        lck_mtx_lock(bpf_mlock);
 
        d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
+
+       bpf_acquire_d(d);
+
        if (d->bd_bif == 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
@@ -1190,10 +1306,12 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
        ifp = d->bd_bif->bif_ifp;
 
        if ((ifp->if_flags & IFF_UP) == 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (ENETDOWN);
        }
        if (uio_resid(uio) == 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (0);
        }
@@ -1213,26 +1331,31 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
        bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
        &datlen);
 
+       /* take the lock again */
+       lck_mtx_lock(bpf_mlock);
        if (error) {
+               bpf_release_d(d);
+               lck_mtx_unlock(bpf_mlock);
                return (error);
        }
 
-       /* taking the lock again and verifying whether device is open */
-       lck_mtx_lock(bpf_mlock);
-       d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       /* verify the device is still open */
+       if ((d->bd_flags & BPF_CLOSING) != 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                m_freem(m);
                return (ENXIO);
        }
 
        if (d->bd_bif == NULL) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                m_free(m);
                return (ENXIO);
        }
 
        if ((unsigned)datlen > ifp->if_mtu) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                m_freem(m);
                return (EMSGSIZE);
@@ -1247,6 +1370,9 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
 
        lck_mtx_unlock(bpf_mlock);
 
+       /*
+        * The driver frees the mbuf.
+        */
        if (d->bd_hdrcmplt) {
                if (d->bd_bif->bif_send)
                        error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
@@ -1257,9 +1383,10 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
                    (struct sockaddr *)dst_buf, 0, NULL);
        }
 
-       /*
-        * The driver frees the mbuf.
-        */
+       lck_mtx_lock(bpf_mlock);
+       bpf_release_d(d);
+       lck_mtx_unlock(bpf_mlock);
+
        return (error);
 }
 
@@ -1280,6 +1407,8 @@ reset_d(struct bpf_d *d)
        }
        d->bd_slen = 0;
        d->bd_hlen = 0;
+       d->bd_scnt = 0;
+       d->bd_hcnt = 0;
        d->bd_rcount = 0;
        d->bd_dcount = 0;
 }
@@ -1306,6 +1435,8 @@ reset_d(struct bpf_d *d)
  *  BIOCSETTC          Set traffic class.
  *  BIOCGETTC          Get traffic class.
  *  BIOCSEXTHDR                Set "extended header" flag
+ *  BIOCSHEADDROP      Drop head of the buffer if user is not reading
+ *  BIOCGHEADDROP      Get "head-drop" flag
  */
 /* ARGSUSED */
 int
@@ -1320,11 +1451,13 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
        lck_mtx_lock(bpf_mlock);
 
        d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
 
+       bpf_acquire_d(d);
+
        if (d->bd_state == BPF_WAITING)
                bpf_stop_timer(d);
        d->bd_state = BPF_IDLE;
@@ -1399,7 +1532,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
 
                bcopy(addr, &prg32, sizeof (prg32));
                error = bpf_setf(d, prg32.bf_len,
-                   CAST_USER_ADDR_T(prg32.bf_insns), dev, cmd);
+                   CAST_USER_ADDR_T(prg32.bf_insns), cmd);
                break;
        }
 
@@ -1408,7 +1541,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                struct bpf_program64 prg64;
 
                bcopy(addr, &prg64, sizeof (prg64));
-               error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, dev, cmd);
+               error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
                break;
        }
 
@@ -1419,11 +1552,10 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                while (d->bd_hbuf_read) {
                        msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
                }
-               
-               d = bpf_dtab[minor(dev)];
-               if  (d == 0 || d == (void *)1)
-                       return (ENXIO);
-               
+               if ((d->bd_flags & BPF_CLOSING) != 0) {
+                       error = ENXIO;
+                       break;
+               }
                reset_d(d);
                break;
 
@@ -1478,7 +1610,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                        u_int dlt;
 
                        bcopy(addr, &dlt, sizeof (dlt));
-                       error = bpf_setdlt(d, dlt, dev);
+                       error = bpf_setdlt(d, dlt);
                }
                break;
 
@@ -1508,7 +1640,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                if (ifp == NULL)
                        error = ENXIO;
                else
-                       error = bpf_setif(d, ifp, 0, dev);
+                       error = bpf_setif(d, ifp, 0);
                break;
        }
 
@@ -1589,7 +1721,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
         * Set immediate mode.
         */
        case BIOCIMMEDIATE:             /* u_int */
-               bcopy(addr, &d->bd_immediate, sizeof (u_int));
+               d->bd_immediate = *(u_int *)(void *)addr;
                break;
 
        case BIOCVERSION: {             /* struct bpf_version */
@@ -1734,8 +1866,18 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
                         d->bd_flags &= ~BPF_WANT_PKTAP;
                break;
 #endif
+
+       case BIOCSHEADDROP:
+               bcopy(addr, &int_arg, sizeof (int_arg));
+               d->bd_headdrop = int_arg ? 1 : 0;
+               break;
+
+       case BIOCGHEADDROP:
+               bcopy(&d->bd_headdrop, addr, sizeof (int));
+               break;
        }
 
+       bpf_release_d(d);
        lck_mtx_unlock(bpf_mlock);
 
        return (error);
@@ -1746,7 +1888,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
  * free it and replace it.  Returns EINVAL for bogus requests.
  */
 static int
-bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long cmd)
+bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
+    u_long cmd)
 {
        struct bpf_insn *fcode, *old;
        u_int flen, size;
@@ -1754,8 +1897,7 @@ bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long
        while (d->bd_hbuf_read) 
                msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 
-       d = bpf_dtab[minor(dev)];
-       if  (d == 0 || d == (void *)1)
+       if ((d->bd_flags & BPF_CLOSING) != 0)
                return (ENXIO);
        
        old = d->bd_filter;
@@ -1800,7 +1942,7 @@ bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long
  * Return an errno or 0.
  */
 static int
-bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev)
+bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
 {
        struct bpf_if *bp;
        int error;
@@ -1808,8 +1950,7 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev)
        while (d->bd_hbuf_read)
                msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 
-       d = bpf_dtab[minor(dev)];
-       if  (d == 0 || d == (void *)1)
+       if ((d->bd_flags & BPF_CLOSING) != 0)
                return (ENXIO);
 
        /*
@@ -1839,15 +1980,15 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev)
                                return (error);
                }
                if (bp != d->bd_bif) {
-                       if (d->bd_bif)
                                /*
                                 * Detach if attached to something else.
                                 */
-                               bpf_detachd(d);
-
-                       if (bpf_attachd(d, bp) != 0) {
-                               return ENXIO;
+                       if (d->bd_bif) {
+                               if (bpf_detachd(d, 0) != 0)
+                                       return (ENXIO);
                        }
+                       if (bpf_attachd(d, bp) != 0)
+                               return (ENXIO);
                }
                reset_d(d);
                return (0);
@@ -1912,7 +2053,7 @@ bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
  * Set the data link type of a BPF instance.
  */
 static int
-bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev)
+bpf_setdlt(struct bpf_d *d, uint32_t dlt)
 {
        int error, opromisc;
        struct ifnet *ifp;
@@ -1924,8 +2065,7 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev)
        while (d->bd_hbuf_read)
                msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
 
-       d = bpf_dtab[minor(dev)];
-       if  (d == 0 || d == (void *)1)
+       if ((d->bd_flags & BPF_CLOSING) != 0)
                return (ENXIO);
 
        ifp = d->bd_bif->bif_ifp;
@@ -1935,7 +2075,8 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev)
        }
        if (bp != NULL) {
                opromisc = d->bd_promisc;
-               bpf_detachd(d);
+               if (bpf_detachd(d, 0) != 0)
+                       return (ENXIO);
                error = bpf_attachd(d, bp);
                if (error) {
                        printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
@@ -1947,11 +2088,13 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev)
                        lck_mtx_unlock(bpf_mlock);
                        error = ifnet_set_promiscuous(bp->bif_ifp, 1);
                        lck_mtx_lock(bpf_mlock);
-                       if (error)
-                               printf("bpf_setdlt: ifpromisc %s%d failed (%d)\n",
-                                          ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error);
-                       else
+                       if (error) {
+                               printf("%s: ifpromisc %s%d failed (%d)\n",
+                                   __func__, ifnet_name(bp->bif_ifp),
+                                   ifnet_unit(bp->bif_ifp), error);
+                       } else {
                                d->bd_promisc = 1;
+                       }
                }
        }
        return (bp == NULL ? EINVAL : 0);
@@ -1995,21 +2138,24 @@ bpfselect(dev_t dev, int which, void * wql, struct proc *p)
        lck_mtx_lock(bpf_mlock);
 
        d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
 
+       bpf_acquire_d(d);
+
        if (d->bd_bif == NULL) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
 
        while (d->bd_hbuf_read) 
                msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
-       
-       d = bpf_dtab[minor(dev)]; 
-       if (d == 0 || d == (void *)1) {
+
+       if ((d->bd_flags & BPF_CLOSING) != 0) {
+               bpf_release_d(d);
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
@@ -2036,7 +2182,9 @@ bpfselect(dev_t dev, int which, void * wql, struct proc *p)
                        break;
        }
 
+       bpf_release_d(d);
        lck_mtx_unlock(bpf_mlock);
+
        return (ret);
 }
 
@@ -2074,7 +2222,7 @@ bpfkqfilter(dev_t dev, struct knote *kn)
        lck_mtx_lock(bpf_mlock);
 
        d = bpf_dtab[minor(dev)];
-       if (d == 0 || d == (void *)1) {
+       if (d == 0 || d == (void *)1 || (d->bd_flags & BPF_CLOSING) != 0) {
                lck_mtx_unlock(bpf_mlock);
                return (ENXIO);
        }
@@ -2087,8 +2235,10 @@ bpfkqfilter(dev_t dev, struct knote *kn)
        kn->kn_hook = d;
        kn->kn_fop = &bpfread_filtops;
        KNOTE_ATTACH(&d->bd_sel.si_note, kn);
+       d->bd_flags |= BPF_KNOTE;
+
        lck_mtx_unlock(bpf_mlock);
-       return 0;
+       return (0);
 }
 
 static void
@@ -2097,7 +2247,10 @@ filt_bpfdetach(struct knote *kn)
        struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 
        lck_mtx_lock(bpf_mlock);
-       KNOTE_DETACH(&d->bd_sel.si_note, kn);
+       if (d->bd_flags & BPF_KNOTE) {
+               KNOTE_DETACH(&d->bd_sel.si_note, kn);
+               d->bd_flags &= ~BPF_KNOTE;
+       }
        lck_mtx_unlock(bpf_mlock);
 }
 
@@ -2248,7 +2401,7 @@ bpf_tap_imp(
                        hack_hdr.mh_type = m->m_type;
                        hack_hdr.mh_flags = 0;
                        
-                       m = (mbuf_t)&hack_hdr;
+                       __IGNORE_WCASTALIGN(m = (mbuf_t)&hack_hdr);
                }
 
                for (m0 = m; m0 != 0; m0 = m0->m_next)
@@ -2348,14 +2501,23 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
                 * pending reads.
                 */
                if (d->bd_fbuf == NULL) {
+                       if (d->bd_headdrop == 0) {
+                               /*
+                                * We haven't completed the previous read yet,
+                                * so drop the packet.
+                                */
+                               ++d->bd_dcount;
+                               return;
+                       }
                        /*
-                        * We haven't completed the previous read yet,
-                        * so drop the packet.
+                        * Drop the hold buffer as it contains older packets
                         */
-                       ++d->bd_dcount;
-                       return;
+                       d->bd_dcount += d->bd_hcnt;
+                       d->bd_fbuf = d->bd_hbuf;
+                       ROTATE_BUFFERS(d);
+               } else {
+                       ROTATE_BUFFERS(d);
                }
-               ROTATE_BUFFERS(d);
                do_wakeup = 1;
                curlen = 0;
        }
@@ -2421,6 +2583,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen,
         */
        (*cpfn)(pkt, payload, caplen);
        d->bd_slen = curlen + totlen;
+       d->bd_scnt += 1;
 
        if (do_wakeup)
                bpf_wakeup(d);
@@ -2443,6 +2606,8 @@ bpf_allocbufs(struct bpf_d *d)
        }
        d->bd_slen = 0;
        d->bd_hlen = 0;
+       d->bd_scnt = 0;
+       d->bd_hcnt = 0;
        return (0);
 }
 
@@ -2495,7 +2660,8 @@ bpf_attach(
        struct bpf_if *bp_temp;
        struct bpf_if *bp_first = NULL;
        
-       bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, M_WAIT);
+       bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
+           M_WAIT | M_ZERO);
        if (bp_new == 0)
                panic("bpfattach");
 
@@ -2519,7 +2685,6 @@ bpf_attach(
                return EEXIST;
        }
        
-       bzero(bp_new, sizeof(*bp_new));
        bp_new->bif_ifp = ifp;
        bp_new->bif_dlt = dlt;
        bp_new->bif_send = send;
@@ -2569,9 +2734,12 @@ void
 bpfdetach(struct ifnet *ifp)
 {
        struct bpf_if   *bp, *bp_prev, *bp_next;
-       struct bpf_if   *bp_free_list = NULL;
        struct bpf_d    *d;
 
+       if (bpf_debug != 0)
+               printf("%s: %s\n",
+                   __func__, if_name(ifp));
+
        lck_mtx_lock(bpf_mlock);
 
        /*
@@ -2593,32 +2761,22 @@ bpfdetach(struct ifnet *ifp)
                else
                        bpf_iflist = bp->bif_next;
 
-               /* Add to the list to be freed */
-               bp->bif_next = bp_free_list;
-               bp_free_list = bp;
-       }
-       
-       /*
-        * Detach the bpf devices attached to the interface
-        * Now we do not care if we lose the bpf_mlock in bpf_detachd
-        */
-       for (bp = bp_free_list; bp != NULL; bp = bp->bif_next) {
+               /* Detach the devices attached to the interface */
                while ((d = bp->bif_dlist) != NULL) {
-                       bpf_detachd(d);
+                       /*
+                        * Take an extra reference to prevent the device
+                        * from being freed when bpf_detachd() releases
+                        * the reference for the interface list
+                        */
+                       bpf_acquire_d(d);
+                       bpf_detachd(d, 0);
                        bpf_wakeup(d);
+                       bpf_release_d(d);
                }
                ifnet_release(ifp);
        }
 
        lck_mtx_unlock(bpf_mlock);
-
-       /*
-        * Free the list
-        */
-       while ((bp = bp_free_list) != NULL) {
-               bp_free_list = bp->bif_next;
-               FREE(bp, M_DEVBUF);
-       }
 }
 
 void
index 003f631d183e489729a6ad49f5c32f19909f5062..20293abd9878821dde67bfc83d2e217adefe6715 100644 (file)
@@ -209,6 +209,8 @@ struct bpf_version {
 #ifdef PRIVATE
 #define        BIOCGWANTPKTAP  _IOR('B', 127, u_int)
 #define        BIOCSWANTPKTAP  _IOWR('B', 127, u_int)
+#define BIOCSHEADDROP   _IOW('B', 128, int)
+#define BIOCGHEADDROP   _IOR('B', 128, int)
 #endif /* PRIVATE */
 /*
  * Structure prepended to each packet.
@@ -605,7 +607,11 @@ struct bpf_mtag {
 /*
  * For Apple private usage
  */
+#define DLT_USER0_APPLE_INTERNAL        DLT_USER0       /* rdar://12019509 */
+#define DLT_USER1_APPLE_INTERNAL        DLT_USER1       /* rdar://12019509 */
 #define DLT_PKTAP                      DLT_USER2       /* rdar://11779467 */
+#define DLT_USER3_APPLE_INTERNAL        DLT_USER3       /* rdar://19614531 */
+#define DLT_USER4_APPLE_INTERNAL       DLT_USER4       /* rdar://19614531 */
 #endif /* PRIVATE */
 
 /*
index 4145bf4058cb6465242be48e5e91fdb0c66d79b2..dcb9ac0afdbfb2420386e70309dd89a7dc717ff7 100644 (file)
@@ -97,15 +97,18 @@ struct bpf_d {
        caddr_t         bd_fbuf;        /* free slot */
        int             bd_slen;        /* current length of store buffer */
        int             bd_hlen;        /* current length of hold buffer */
+       u_int32_t       bd_scnt;        /* number of packets in store buffer */
+       u_int32_t       bd_hcnt;        /* number of packets in hold buffer */
 
        int             bd_bufsize;     /* absolute length of buffers */
        int             bd_hbuf_read;   /* reading from hbuf */
+       int             bd_headdrop;    /* Keep newer packets */
 
        struct bpf_if  *bd_bif;         /* interface descriptor */
-       u_int32_t               bd_rtout;       /* Read timeout in 'ticks' */
+       u_int32_t       bd_rtout;       /* Read timeout in 'ticks' */
        struct bpf_insn *bd_filter;     /* filter code */
-       u_int32_t               bd_rcount;      /* number of packets received */
-       u_int32_t               bd_dcount;      /* number of packets dropped */
+       u_int32_t       bd_rcount;      /* number of packets received */
+       u_int32_t       bd_dcount;      /* number of packets dropped */
 
        u_char          bd_promisc;     /* true if listening promiscuously */
        u_char          bd_state;       /* idle, waiting, or timed out */
@@ -129,12 +132,19 @@ struct bpf_d {
        int             bd_hdrcmplt;    /* false to fill in src lladdr automatically */
        int             bd_seesent;     /* true if bpf should see sent packets */
        int             bd_oflags;      /* device open flags */
-       thread_call_t bd_thread_call; /* for BPF timeouts with select */
+       thread_call_t   bd_thread_call; /* for BPF timeouts with select */
 #if CONFIG_MACF_NET
        struct label *  bd_label;       /* MAC label for descriptor */
 #endif
        int             bd_traffic_class; /* traffic service class */
        int             bd_flags;       /* flags */
+
+       int             bd_refcnt;
+#define        BPF_REF_HIST    4               /* how many callers to keep around */
+       void            *bd_ref_lr[BPF_REF_HIST];
+       void            *bd_unref_lr[BPF_REF_HIST];
+       int             bd_next_ref_lr;
+       int             bd_next_unref_lr;
 };
 
 /* Values for bd_state */
@@ -148,11 +158,14 @@ struct bpf_d {
                         (((bd)->bd_immediate || (bd)->bd_state == BPF_TIMED_OUT) && \
                          (bd)->bd_slen != 0))
 
-
 /* Values for bd_flags */
 #define        BPF_EXTENDED_HDR        0x01    /* process req. the extended header */
-#define        BPF_WANT_PKTAP          0x02    /* process knows how to keep DLT_PKTAP private */
+#define        BPF_WANT_PKTAP          0x02    /* knows how to handle DLT_PKTAP */
 #define        BPF_FINALIZE_PKTAP      0x04    /* finalize pktap header on read */
+#define        BPF_KNOTE               0x08    /* kernel note attached */
+#define        BPF_DETACHING           0x10    /* bpf_d is being detached */
+#define        BPF_DETACHED            0x20    /* bpf_d is detached */
+#define        BPF_CLOSING             0x40    /* bpf_d is being closed */
 
 /*
  * Descriptor associated with each attached hardware interface.
index 1aa7079e290a09b51aac854554ae5116df1ed24e..a02432ac6d2796592458e3af6b51e4d8cc3b42a0 100644 (file)
@@ -24,9 +24,9 @@ EXPORT_MI_LIST        = ${INSTALL_MI_LIST} ${KERNELFILES}
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
-INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 3d9d324aed991af3feca1398cef2807805a51988..625876773607d8125797159eb55df9f1ebfaef4d 100644 (file)
@@ -73,7 +73,7 @@
 
 #include <libkern/libkern.h>
 
-u_int32_t classq_verbose;      /* more noise if greater than 1 */
+u_int32_t classq_verbose = 0;  /* more noise if greater than 1 */
 
 SYSCTL_NODE(_net, OID_AUTO, classq, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "classq");
 
index 4f705e0f13c5d13ce87924d639635fc8de75e131..7d12ba606f03add83bc16efb6e39c93ab995c01b 100644 (file)
@@ -280,9 +280,9 @@ static u_int64_t sfb_hinterval;
 SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, hinterval, CTLFLAG_RW|CTLFLAG_LOCKED,
     &sfb_hinterval, "SFB hash interval in nanoseconds");
 
-static u_int64_t sfb_target_qdelay;
+static u_int64_t sfb_target_qdelay = 0;
 SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, target_qdelay, CTLFLAG_RW|CTLFLAG_LOCKED,
-    &sfb_target_qdelay, "SFB target queue delay in milliseconds");
+    &sfb_target_qdelay, "SFB target queue delay in nanoseconds");
 
 static u_int64_t sfb_update_interval;
 SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, update_interval,
@@ -458,6 +458,15 @@ sfb_calc_target_qdelay(struct sfb *sp, u_int64_t out_bw)
        if (target_qdelay == 0)
                target_qdelay = IFQ_TARGET_DELAY;
 
+       /*
+        * If a delay has been added to ifnet start callback for
+        * coalescing, we have to add that to the pre-set target delay
+        * because the packets can be in the queue longer.
+        */
+       if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
+               ifp->if_start_delay_timeout > 0)
+               target_qdelay += ifp->if_start_delay_timeout;
+
        sp->sfb_target_qdelay = target_qdelay;
 }
 
@@ -1147,8 +1156,7 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
        u_int16_t pmin;
        int fc_adv = 0;
        int ret = CLASSQEQ_SUCCESS;
-
-       nanouptime(&now);
+       u_int32_t maxqsize = 0;
 
        s = sp->sfb_current;
        VERIFY((s + (s ^ 1)) == 1);
@@ -1157,6 +1165,13 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
        VERIFY(!(pkt->pkt_flags & PKTF_PRIV_GUARDED));
        pkt->pkt_flags |= PKTF_PRIV_GUARDED;
 
+       if (pkt->pkt_enqueue_ts > 0) {
+               net_nsectimer(&pkt->pkt_enqueue_ts, &now); 
+       } else {
+               nanouptime(&now);
+               net_timernsec(&now, &pkt->pkt_enqueue_ts);
+       }
+
        /* time to swap the bins? */
        if (net_timercmp(&now, &sp->sfb_nextreset, >=)) {
                net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset);
@@ -1170,6 +1185,13 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
                    &sp->sfb_update_time);
        }
 
+       /*
+        * If getq time is not set because this is the first packet
+        * or after idle time, set it now so that we can detect a stall.
+        */
+       if (qsize(q) == 0 && !net_timerisset(&sp->sfb_getqtime))
+               *(&sp->sfb_getqtime) = *(&now);
+
        pkt->pkt_sfb_flags = 0;
        pkt->pkt_sfb_hash16[s] =
            (SFB_HASH(&pkt->pkt_flowid, sizeof (pkt->pkt_flowid),
@@ -1218,25 +1240,33 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
                sp->sfb_stats.drop_pbox++;
        }
 
-       /*
-        * if max queue size is static, make it a forced drop
-        * when the queue length hits the queue limit
-        */
-       if (!(SFB_QUEUE_DELAYBASED(sp)) &&
-           droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) {
-               droptype = DTYPE_FORCED;
-               sp->sfb_stats.drop_queue++;
-       }
+       if (SFB_QUEUE_DELAYBASED(sp))
+               maxqsize = SFB_QUEUE_DELAYBASED_MAXSIZE;
+       else
+               maxqsize = qlimit(q);
 
        /*
-        * delay based queues have a larger maximum size to
-        * allow for bursts
+        * When the queue length hits the queue limit, make it a forced
+        * drop
         */
-       if (SFB_QUEUE_DELAYBASED(sp) &&
-           droptype == DTYPE_NODROP &&
-           qlen(q) >= SFB_QUEUE_DELAYBASED_MAXSIZE) {
-               droptype = DTYPE_FORCED;
-               sp->sfb_stats.drop_queue++;
+       if (droptype == DTYPE_NODROP && qlen(q) >= maxqsize) {
+               if (pkt->pkt_proto == IPPROTO_TCP &&
+                   ((pkt->pkt_flags & PKTF_TCP_REXMT) ||
+                   (sp->sfb_flags & SFBF_LAST_PKT_DROPPED))) {
+                       /*
+                        * At some level, dropping packets will make the
+                        * flows backoff and will keep memory requirements
+                        * under control. But we should not cause a tail
+                        * drop because it can take a long time for a
+                        * TCP flow to recover. We should try to drop
+                        * alternate packets instead.
+                        */
+                       sp->sfb_flags &= ~SFBF_LAST_PKT_DROPPED;
+               } else {
+                       droptype = DTYPE_FORCED;
+                       sp->sfb_stats.drop_queue++;
+                       sp->sfb_flags |= SFBF_LAST_PKT_DROPPED;
+               }
        }
 
        if (fc_adv == 1 && droptype != DTYPE_FORCED &&
@@ -1255,7 +1285,6 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t)
        }
        /* if successful enqueue this packet, else drop it */
        if (droptype == DTYPE_NODROP) {
-               net_timernsec(&now, &pkt->pkt_enqueue_ts);
                _addq(q, m);
        } else {
                IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
@@ -1346,6 +1375,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge)
                        sp->sfb_min_qdelay = 0;
                }
        }
+       pkt->pkt_enqueue_ts = 0;
 
        /*
         * Clearpkts are the ones which were in the queue when the hash
@@ -1378,6 +1408,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge)
                sp->sfb_min_qdelay = 0;
                sp->sfb_fc_threshold = 0;
                net_timerclear(&sp->sfb_update_time);
+               net_timerclear(&sp->sfb_getqtime);
        }
 
        return (m);
index f401b0eb57c948029c8920346360cc6ded600878..2a28a719288281d731ad60ad825a370b75e2cbf4 100644 (file)
@@ -104,6 +104,7 @@ struct sfb_fcl {
 #define        SFBF_FLOWCTL    0x04    /* enable flow control advisories */
 #define        SFBF_DELAYBASED 0x08    /* queueing is delay based */
 #define        SFBF_DELAYHIGH  0x10    /* Estimated delay is greater than target */
+#define SFBF_LAST_PKT_DROPPED  0x20    /* Last packet dropped */
 #define        SFBF_SUSPENDED  0x1000  /* queue is suspended */
 
 #define        SFBF_USERFLAGS                                                  \
index 109cae5866893dc9117650407b717ad564ec50da..98c007bd9da8a11296e3d20333561a4eec3505c8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2011-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -95,6 +95,7 @@ ifclassq_setup(struct ifnet *ifp, u_int32_t sflags, boolean_t reuse)
        VERIFY(IFCQ_IS_EMPTY(ifq));
        ifq->ifcq_ifp = ifp;
        IFCQ_LEN(ifq) = 0;
+       IFCQ_BYTES(ifq) = 0;
        bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
        bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
 
@@ -197,6 +198,7 @@ ifclassq_teardown(struct ifnet *ifp)
        VERIFY(ifq->ifcq_dequeue_sc == NULL);
        VERIFY(ifq->ifcq_request == NULL);
        IFCQ_LEN(ifq) = 0;
+       IFCQ_BYTES(ifq) = 0;
        IFCQ_MAXLEN(ifq) = 0;
        bzero(&ifq->ifcq_xmitcnt, sizeof (ifq->ifcq_xmitcnt));
        bzero(&ifq->ifcq_dropcnt, sizeof (ifq->ifcq_dropcnt));
@@ -331,7 +333,6 @@ ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
        IFCQ_LOCK_SPIN(ifq);
 
        while (i < limit) {
-               u_int64_t pktlen;
 #if PF_ALTQ
                u_int32_t qlen;
 
@@ -383,13 +384,17 @@ ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
                last = *head;
 
                l += (*head)->m_pkthdr.len;
-               pktlen = (*head)->m_pkthdr.len;
 
 #if MEASURE_BW
                (*head)->m_pkthdr.pkt_bwseq =
-                   atomic_add_64_ov(&(ifp->if_bw.cur_seq), pktlen);
+                   atomic_add_64_ov(&(ifp->if_bw.cur_seq), m_pktlen(*head));
 #endif /* MEASURE_BW */
-
+               if (IFNET_IS_CELLULAR(ifp)) {
+                       (*head)->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
+                       (*head)->m_pkthdr.pkt_unsent_databytes =
+                           (total_snd_byte_count << MSIZESHIFT) +
+                           ifq->ifcq_bytes;
+               }
                head = &(*head)->m_nextpkt;
                i++;
        }
index cb60c5464dcbe47c41f345e1f33d399b2e696cc5..bc8f4191e3f39de0d808764d2e05c6e053255cea 100644 (file)
@@ -120,7 +120,7 @@ struct ifclassq {
        decl_lck_mtx_data(, ifcq_lock);
 
        struct ifnet    *ifcq_ifp;      /* back pointer to interface */
-       u_int32_t       ifcq_len;
+       u_int32_t       ifcq_len;       /* packet count */
        u_int32_t       ifcq_maxlen;
        struct pktcntr  ifcq_xmitcnt;
        struct pktcntr  ifcq_dropcnt;
@@ -129,6 +129,7 @@ struct ifclassq {
        u_int32_t       ifcq_flags;     /* flags */
        u_int32_t       ifcq_sflags;    /* scheduler flags */
        u_int32_t       ifcq_target_qdelay; /* target queue delay */
+       u_int32_t       ifcq_bytes;     /* bytes count */
        void            *ifcq_disc;     /* for scheduler-specific use */
        /*
         * ifcq_disc_slots[] represents the leaf classes configured for the
@@ -342,6 +343,9 @@ struct if_ifclassq_stats {
 #define        IFCQ_MAXLEN(_ifcq)      ((_ifcq)->ifcq_maxlen)
 #define        IFCQ_SET_MAXLEN(_ifcq, _len) ((_ifcq)->ifcq_maxlen = (_len))
 #define IFCQ_TARGET_QDELAY(_ifcq)      ((_ifcq)->ifcq_target_qdelay)
+#define        IFCQ_BYTES(_ifcq)       ((_ifcq)->ifcq_bytes)
+#define        IFCQ_INC_BYTES(_ifcq, _len) (IFCQ_BYTES(_ifcq) + _len)
+#define        IFCQ_DEC_BYTES(_ifcq, _len) (IFCQ_BYTES(_ifcq) - _len)
 
 #define        IFCQ_XMIT_ADD(_ifcq, _pkt, _len) do {                           \
        PKTCNTR_ADD(&(_ifcq)->ifcq_xmitcnt, _pkt, _len);                \
index 58bea9bbb8c34ca0c307c69ef0a605ec59157848..9975c99dc27fd7a75ae2f876c50f9d622240a9da 100644 (file)
@@ -2944,7 +2944,7 @@ cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing,
        uint64_t pass_offset, uint64_t peek_offset)
 {
        errno_t error = 0;
-       struct cfil_entry *entry;
+       struct cfil_entry *entry = NULL;
        struct cfe_buf *entrybuf;
        int updated = 0;
 
@@ -3006,7 +3006,7 @@ done:
         * or when the socket is closed and no more data is waiting
         * to be delivered to the filter
         */
-       if (so->so_cfil != NULL &&
+       if (entry != NULL &&
            ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
            entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
            ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
@@ -3196,9 +3196,12 @@ cfil_action_drop(struct socket *so, uint32_t kcunit)
 
        p = current_proc();
 
-       /* Force the socket to be marked defunct */
+       /*
+        * Force the socket to be marked defunct
+        * (forcing fixed along with rdar://19391339)
+        */
        error = sosetdefunct(p, so,
-               SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, 1);
+               SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, FALSE);
 
        /* Flush the socket buffer and disconnect */
        if (error == 0)
index 2e4facaef9a4ae1061356b765e01ff21992b9de8..7291b2fb49eafcdf430171cd72b932d41547cf23 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <sys/param.h>
 #include <sys/types.h>
+#include <sys/_types/_timeval64.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <netinet/in.h>
@@ -94,14 +95,6 @@ typedef uint64_t cfil_sock_id_t;
 
 #define        CFIL_SOCK_ID_NONE UINT64_MAX
 
-/*
- * Invariant timeval structure definition across architectures
- */
-struct timeval64 {
-       int64_t tv_sec;
-       int64_t tv_usec;
-};
-
 /*
  * struct cfil_msg_hdr
  *
index d0b55d2519b770885b3d0b038d20a7bd1ec368e5..45eb31f473cbd8bd50eb5f19b36798e2cd30b98e 100644 (file)
@@ -176,11 +176,10 @@ devtimer_create(devtimer_process_func process_func, void * arg0)
 {
     devtimer_ref       timer;
 
-    timer = _MALLOC(sizeof(*timer), M_DEVTIMER, M_WAITOK);
+    timer = _MALLOC(sizeof(*timer), M_DEVTIMER, M_WAITOK | M_ZERO);
     if (timer == NULL) {
        return (timer);
     }
-    bzero(timer, sizeof(*timer));
     devtimer_retain(timer);
     timer->dt_callout = thread_call_allocate(devtimer_process, timer);
     if (timer->dt_callout == NULL) {
index 0df5c6ea43a78fd65e51610faabc63b4fa603723..5576af7d7b0ed9f5e5682a9d0539d5ea54368673 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -425,6 +425,10 @@ static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
+static int sysctl_get_ports_used SYSCTL_HANDLER_ARGS;
+
+struct chain_len_stats tx_chain_len_stats;
+static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
 
 /* The following are protected by dlil_ifnet_lock */
 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
@@ -621,6 +625,16 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
     CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
     "enable hardware cksum debugging");
 
+u_int32_t ifnet_start_delayed = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
+    "number of times start was delayed");
+
+u_int32_t ifnet_delay_start_disabled = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
+    "number of times start was delayed");
+
 #define        HWCKSUM_DBG_PARTIAL_FORCED      0x1     /* forced partial checksum */
 #define        HWCKSUM_DBG_PARTIAL_RXOFF_ADJ   0x2     /* adjust start offset */
 #define        HWCKSUM_DBG_FINALIZE_FORCED     0x10    /* forced finalize */
@@ -696,6 +710,18 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
     CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
     "enable receive hardware checksum offload");
 
+SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
+    CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
+    sysctl_tx_chain_len_stats, "S", "");
+
+uint32_t tx_chain_len_count = 0;
+SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0,
+    "");
+
+SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_ports_used,
+    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_ports_used, "");
+
 unsigned int net_rxpoll = 1;
 unsigned int net_affinity = 1;
 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
@@ -892,6 +918,26 @@ ifnet_lock_done(struct ifnet *ifp)
        lck_rw_done(&ifp->if_lock);
 }
 
+#if INET
+__private_extern__ void
+if_inetdata_lock_shared(struct ifnet *ifp)
+{
+       lck_rw_lock_shared(&ifp->if_inetdata_lock);
+}
+
+__private_extern__ void
+if_inetdata_lock_exclusive(struct ifnet *ifp)
+{
+       lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
+}
+
+__private_extern__ void
+if_inetdata_lock_done(struct ifnet *ifp)
+{
+       lck_rw_done(&ifp->if_inetdata_lock);
+}
+#endif
+
 #if INET6
 __private_extern__ void
 if_inet6data_lock_shared(struct ifnet *ifp)
@@ -2476,7 +2522,10 @@ ifnet_start_common(struct ifnet *ifp, int resetfc)
                return;
        }
        ifp->if_start_req++;
-       if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) {
+       if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
+           (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
+           IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen
+           || ifp->if_start_delayed == 0)) {
                wakeup_one((caddr_t)&ifp->if_start_thread);
        }
        lck_mtx_unlock(&ifp->if_start_lock);
@@ -2496,6 +2545,7 @@ ifnet_start_thread_fn(void *v, wait_result_t w)
        char ifname[IFNAMSIZ + 1];
        struct timespec *ts = NULL;
        struct ifclassq *ifq = &ifp->if_snd;
+       struct timespec delay_start_ts;
 
        /*
         * Treat the dedicated starter thread for lo0 as equivalent to
@@ -2530,8 +2580,9 @@ ifnet_start_thread_fn(void *v, wait_result_t w)
        lck_mtx_lock_spin(&ifp->if_start_lock);
 
        for (;;) {
-               (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
-                   (PZERO - 1) | PSPIN, ifname, ts);
+               if (ifp->if_start_thread != NULL)
+                       (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock,
+                           (PZERO - 1) | PSPIN, ifname, ts);
 
                /* interface is detached? */
                if (ifp->if_start_thread == THREAD_NULL) {
@@ -2553,20 +2604,51 @@ ifnet_start_thread_fn(void *v, wait_result_t w)
                }
 
                ifp->if_start_active = 1;
+
                for (;;) {
                        u_int32_t req = ifp->if_start_req;
-
+                       if (!IFCQ_IS_EMPTY(ifq) &&
+                           (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
+                           ifp->if_start_delayed == 0 &&
+                           IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
+                           (ifp->if_eflags & IFEF_DELAY_START)) {
+                               ifp->if_start_delayed = 1;
+                               ifnet_start_delayed++;
+                               break;
+                       } else {
+                               ifp->if_start_delayed = 0;
+                       }
                        lck_mtx_unlock(&ifp->if_start_lock);
+
+                       /*
+                        * If no longer attached, don't call start because ifp
+                        * is being destroyed; else hold an IO refcnt to
+                        * prevent the interface from being detached (will be
+                        * released below.)
+                        */
+                       if (!ifnet_is_attached(ifp, 1)) {
+                               lck_mtx_lock_spin(&ifp->if_start_lock);
+                               break;
+                       }
+
                        /* invoke the driver's start routine */
                        ((*ifp->if_start)(ifp));
+
+                       /*
+                        * Release the io ref count taken by ifnet_is_attached.
+                        */
+                       ifnet_decr_iorefcnt(ifp);
+
                        lck_mtx_lock_spin(&ifp->if_start_lock);
 
                        /* if there's no pending request, we're done */
                        if (req == ifp->if_start_req)
                                break;
                }
+
                ifp->if_start_req = 0;
                ifp->if_start_active = 0;
+
                /*
                 * Wakeup N ns from now if rate-controlled by TBR, and if
                 * there are still packets in the send queue which haven't
@@ -2576,6 +2658,12 @@ ifnet_start_thread_fn(void *v, wait_result_t w)
                ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
                    &ifp->if_start_cycle : NULL);
 
+               if (ts == NULL && ifp->if_start_delayed == 1) {
+                       delay_start_ts.tv_sec = 0;
+                       delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
+                       ts = &delay_start_ts;
+               }
+
                if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
                        ts = NULL;
        }
@@ -2922,6 +3010,8 @@ errno_t
 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
 {
        int error;
+       struct timespec now;
+       u_int64_t now_nsec;
 
        if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
            m->m_nextpkt != NULL) {
@@ -2938,6 +3028,65 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
                return (ENETDOWN);
        }
 
+       nanouptime(&now);
+       net_timernsec(&now, &now_nsec);
+       m->m_pkthdr.pkt_enqueue_ts = now_nsec;
+
+       if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
+               /*
+                * If the driver chose to delay start callback for
+                * coalescing multiple packets, Then use the following
+                * heuristics to make sure that start callback will
+                * be delayed only when bulk data transfer is detected.
+                * 1. number of packets enqueued in (delay_win * 2) is
+                * greater than or equal to the delay qlen.
+                * 2. If delay_start is enabled it will stay enabled for
+                * another 10 idle windows. This is to take into account
+                * variable RTT and burst traffic.
+                * 3. If the time elapsed since last enqueue is more
+                * than 200ms we disable delaying start callback. This is
+                * is to take idle time into account.
+                */ 
+               u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
+               if (ifp->if_start_delay_swin > 0) {
+                       if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
+                               ifp->if_start_delay_cnt++;
+                       } else if ((now_nsec - ifp->if_start_delay_swin)
+                           >= (200 * 1000 * 1000)) {
+                               ifp->if_start_delay_swin = now_nsec;
+                               ifp->if_start_delay_cnt = 1;
+                               ifp->if_start_delay_idle = 0;
+                               if (ifp->if_eflags & IFEF_DELAY_START) {
+                                       ifp->if_eflags &=
+                                           ~(IFEF_DELAY_START);
+                                       ifnet_delay_start_disabled++;
+                               }
+                       } else {
+                               if (ifp->if_start_delay_cnt >=
+                                   ifp->if_start_delay_qlen) {
+                                       ifp->if_eflags |= IFEF_DELAY_START;
+                                       ifp->if_start_delay_idle = 0;
+                               } else {
+                                       if (ifp->if_start_delay_idle >= 10) {
+                                               ifp->if_eflags &= ~(IFEF_DELAY_START);
+                                               ifnet_delay_start_disabled++;
+                                       } else {
+                                               ifp->if_start_delay_idle++;
+                                       }
+                               } 
+                               ifp->if_start_delay_swin = now_nsec;
+                               ifp->if_start_delay_cnt = 1;
+                       }
+               } else {
+                       ifp->if_start_delay_swin = now_nsec;
+                       ifp->if_start_delay_cnt = 1;
+                       ifp->if_start_delay_idle = 0;
+                       ifp->if_eflags &= ~(IFEF_DELAY_START);
+               }
+       } else {
+               ifp->if_eflags &= ~(IFEF_DELAY_START);
+       }
+
        /* enqueue the packet */
        error = ifclassq_enqueue(&ifp->if_snd, m);
 
@@ -2946,7 +3095,8 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
         * for the packet is suspended (EQSUSPENDED), as the driver could still
         * be dequeueing from other unsuspended queues.
         */
-       if (error == 0 || error == EQFULL || error == EQSUSPENDED)
+       if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
+           (error == 0 || error == EQFULL || error == EQSUSPENDED))
                ifnet_start(ifp);
 
        return (error);
@@ -3486,10 +3636,6 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
        int tmp_ifproto_arr_idx = 0;
        bool tmp_malloc = false;
 
-       /* Get an io ref count if the interface is attached */
-       if (!ifnet_is_attached(ifp, 1))
-               goto done;
-
        /*
         * Pass the event to the interface filters
         */
@@ -3510,6 +3656,10 @@ dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
        if_flt_monitor_unbusy(ifp);
        lck_mtx_unlock(&ifp->if_flt_lock);
 
+       /* Get an io ref count if the interface is attached */
+       if (!ifnet_is_attached(ifp, 1))
+               goto done;
+
        /*
         * An embedded tmp_list_entry in if_proto may still get
         * over-written by another thread after giving up ifnet lock,
@@ -3716,6 +3866,38 @@ ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
        }
 }
 
+static void
+dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
+{
+       mbuf_t  n = m;
+       int chainlen = 0;
+
+       while (n != NULL) {
+               chainlen++;
+               n = n->m_next;
+       }
+       switch (chainlen) {
+               case 0:
+                       break;
+               case 1:
+                       atomic_add_64(&cls->cls_one, 1);
+                       break;
+               case 2:
+                       atomic_add_64(&cls->cls_two, 1);
+                       break;
+               case 3:
+                       atomic_add_64(&cls->cls_three, 1);
+                       break;
+               case 4:
+                       atomic_add_64(&cls->cls_four, 1);
+                       break;
+               case 5:
+               default:
+                       atomic_add_64(&cls->cls_five_or_more, 1);
+                       break;
+       }
+}
+
 /*
  * dlil_output
  *
@@ -3930,18 +4112,29 @@ preout_again:
                 * update the timestamp to indicate recent activity
                 * on a foreground socket.
                 */
-               if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) &&
-                   (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
-                   m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB)
-                       ifp->if_fg_sendts = net_uptime();
+               if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
+                   m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
+                       if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND))
+                               ifp->if_fg_sendts = net_uptime();
+
+                       if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME)
+                               ifp->if_rt_sendts = net_uptime();
+               }
 
                ifp_inc_traffic_class_out(ifp, m);
                pktap_output(ifp, proto_family, m, pre, post);
 
+               /*
+                * Count the number of elements in the mbuf chain
+                */
+               if (tx_chain_len_count) {
+                       dlil_count_chain_len(m, &tx_chain_len_stats);
+               }
+
                /*
                 * Finally, call the driver.
                 */
-               if (ifp->if_eflags & IFEF_SENDLIST) {
+               if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
                        if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
                                flen += (m_pktlen(m) - (pre + post));
                                m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
@@ -3989,24 +4182,57 @@ next:
        } while (m != NULL);
 
        if (send_head != NULL) {
-               VERIFY(ifp->if_eflags & IFEF_SENDLIST);
                KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
                    0, 0, 0, 0, 0);
-               retval = (*ifp->if_output)(ifp, send_head);
-               if (retval == EQFULL || retval == EQSUSPENDED) {
-                       if (adv != NULL) {
-                               adv->code = (retval == EQFULL ?
-                                   FADV_FLOW_CONTROLLED : FADV_SUSPENDED);
+               if (ifp->if_eflags & IFEF_SENDLIST) {
+                       retval = (*ifp->if_output)(ifp, send_head);
+                       if (retval == EQFULL || retval == EQSUSPENDED) {
+                               if (adv != NULL) {
+                                       adv->code = (retval == EQFULL ?
+                                           FADV_FLOW_CONTROLLED :
+                                           FADV_SUSPENDED);
+                               }
+                               retval = 0;
+                       }
+                       if (retval == 0 && flen > 0) {
+                               fbytes += flen;
+                               fpkts++;
+                       }
+                       if (retval != 0 && dlil_verbose) {
+                               printf("%s: output error on %s retval = %d\n",
+                                   __func__, if_name(ifp), retval);
+                       }
+               } else {
+                       struct mbuf *send_m;
+                       int enq_cnt = 0;
+                       VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
+                       while (send_head != NULL) {
+                               send_m = send_head;
+                               send_head = send_m->m_nextpkt;
+                               send_m->m_nextpkt = NULL;
+                               retval = (*ifp->if_output)(ifp, send_m);
+                               if (retval == EQFULL || retval == EQSUSPENDED) {
+                                       if (adv != NULL) {
+                                               adv->code = (retval == EQFULL ?
+                                                   FADV_FLOW_CONTROLLED :
+                                                   FADV_SUSPENDED);
+                                       }
+                                       retval = 0;
+                               }
+                               if (retval == 0) {
+                                       enq_cnt++;
+                                       if (flen > 0)
+                                               fpkts++;
+                               }
+                               if (retval != 0 && dlil_verbose) {
+                                       printf("%s: output error on %s retval = %d\n",
+                                           __func__, if_name(ifp), retval);
+                               }
+                       }
+                       if (enq_cnt > 0) {
+                               fbytes += flen;
+                               ifnet_start(ifp);
                        }
-                       retval = 0;
-               }
-               if (retval == 0 && flen > 0) {
-                       fbytes += flen;
-                       fpkts++;
-               }
-               if (retval != 0 && dlil_verbose) {
-                       printf("%s: output error on %s retval = %d\n",
-                           __func__, if_name(ifp), retval);
                }
                KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
        }
@@ -4278,7 +4504,7 @@ __private_extern__ void
 net_thread_marks_pop(net_thread_marks_t popx)
 {
        static const char *const base = (const void*)&net_thread_marks_base;
-       ptrdiff_t pop = (caddr_t)popx - (caddr_t)base;
+       const ptrdiff_t pop = (const char *)popx - (const char *)base;
 
        if (pop != 0) {
                static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
@@ -4294,7 +4520,7 @@ __private_extern__ void
 net_thread_unmarks_pop(net_thread_marks_t unpopx)
 {
        static const char *const base = (const void*)&net_thread_marks_base;
-       ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base;
+       ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
 
        if (unpop != 0) {
                static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
@@ -5171,6 +5397,23 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
        VERIFY(ifp->if_delegated.subfamily == 0);
        VERIFY(ifp->if_delegated.expensive == 0);
 
+       bzero(&ifp->if_agentids, sizeof(ifp->if_agentids));
+
+       /* Reset interface state */
+       bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
+       ifp->if_interface_state.valid_bitmask |= 
+               IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
+       ifp->if_interface_state.interface_availability =
+               IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
+
+       /* Initialize Link Quality Metric (loopback [lo0] is always good) */
+       if (ifp == lo_ifp) {
+               ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
+               ifp->if_interface_state.valid_bitmask |=
+                   IF_INTERFACE_STATE_LQM_STATE_VALID;
+       } else {
+               ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
+       }
        ifnet_lock_done(ifp);
        ifnet_head_done();
 
@@ -5223,9 +5466,6 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
         */
        lck_mtx_lock(rnh_lock);
        ifnet_lock_exclusive(ifp);
-       /* Initialize Link Quality Metric (loopback [lo0] is always good) */
-       ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD :
-           IFNET_LQM_THRESH_UNKNOWN;
        lck_mtx_lock_spin(&ifp->if_ref_lock);
        ifp->if_refflags = IFRF_ATTACHED;
        lck_mtx_unlock(&ifp->if_ref_lock);
@@ -5435,6 +5675,9 @@ ifnet_detach(ifnet_t ifp)
        ifp->if_link.tqe_prev = NULL;
        ifindex2ifnet[ifp->if_index] = NULL;
 
+       /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
+       ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
+
        /* Record detach PC stacktrace */
        ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
 
@@ -5445,6 +5688,9 @@ ifnet_detach(ifnet_t ifp)
        delegated_ifp = ifp->if_delegated.ifp;
        bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
 
+       /* Reset interface state */
+       bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
+
        ifnet_lock_done(ifp);
        ifnet_head_done();
        lck_mtx_unlock(rnh_lock);
@@ -5455,7 +5701,7 @@ ifnet_detach(ifnet_t ifp)
 
        /* Reset Link Quality Metric (unless loopback [lo0]) */
        if (ifp != lo_ifp)
-               if_lqm_update(ifp, IFNET_LQM_THRESH_OFF);
+               if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
 
        /* Reset TCP local statistics */
        if (ifp->if_tcp_stat != NULL)
@@ -5465,6 +5711,12 @@ ifnet_detach(ifnet_t ifp)
        if (ifp->if_udp_stat != NULL)
                bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
 
+       /* Release memory held for interface link status report */
+       if (ifp->if_link_status != NULL) {
+               FREE(ifp->if_link_status, M_TEMP);
+               ifp->if_link_status = NULL;
+       }
+
        /* Let BPF know we're detaching */
        bpfdetach(ifp);
 
@@ -6057,10 +6309,19 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid,
        lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
            ifnet_lock_attr);
        lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
+#if INET
+       lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
+           ifnet_lock_attr);
+       ifp1->if_inetdata = NULL;
+#endif
 #if INET6
-       lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr);
+       lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
+           ifnet_lock_attr);
        ifp1->if_inet6data = NULL;
 #endif
+       lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
+           ifnet_lock_attr);
+       ifp1->if_link_status = NULL;
 
        /* for send data paths */
        lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
@@ -6280,26 +6541,43 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
 #endif /* INET6 */
 
 void
-if_lqm_update(struct ifnet *ifp, int lqm)
+if_lqm_update(struct ifnet *ifp, int lqm, int locked)
 {
        struct kev_dl_link_quality_metric_data ev_lqm_data;
 
        VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
 
        /* Normalize to edge */
-       if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_BAD)
+       if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_BAD)
                lqm = IFNET_LQM_THRESH_BAD;
        else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR)
                lqm = IFNET_LQM_THRESH_POOR;
        else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD)
                lqm = IFNET_LQM_THRESH_GOOD;
 
-       ifnet_lock_exclusive(ifp);
-       if (lqm == ifp->if_lqm) {
-               ifnet_lock_done(ifp);
+       /*
+        * Take the lock if needed
+        */
+       if (!locked)
+               ifnet_lock_exclusive(ifp);
+
+       if (lqm == ifp->if_interface_state.lqm_state &&
+           (ifp->if_interface_state.valid_bitmask & 
+           IF_INTERFACE_STATE_LQM_STATE_VALID)) {
+               /*
+                * Release the lock if was not held by the caller
+                */
+               if (!locked)
+                       ifnet_lock_done(ifp);
                return;         /* nothing to update */
        }
-       ifp->if_lqm = lqm;
+       ifp->if_interface_state.valid_bitmask |=
+                   IF_INTERFACE_STATE_LQM_STATE_VALID;
+       ifp->if_interface_state.lqm_state = lqm;
+
+       /*
+        * Don't want to hold the lock when issuing kernel events
+        */
        ifnet_lock_done(ifp);
 
        bzero(&ev_lqm_data, sizeof (ev_lqm_data));
@@ -6307,6 +6585,157 @@ if_lqm_update(struct ifnet *ifp, int lqm)
 
        dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
            (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
+
+       /*
+        * Reacquire the lock for the caller
+        */
+       if (locked)
+               ifnet_lock_exclusive(ifp);
+}
+
+static void
+if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
+{
+       struct kev_dl_rrc_state kev;
+       
+       if (rrc_state == ifp->if_interface_state.rrc_state &&
+           (ifp->if_interface_state.valid_bitmask &
+           IF_INTERFACE_STATE_RRC_STATE_VALID))
+               return;
+
+       ifp->if_interface_state.valid_bitmask |=
+           IF_INTERFACE_STATE_RRC_STATE_VALID;
+
+       ifp->if_interface_state.rrc_state = rrc_state;
+
+       /*
+        * Don't want to hold the lock when issuing kernel events
+        */
+       ifnet_lock_done(ifp);
+
+       bzero(&kev, sizeof(struct kev_dl_rrc_state));
+       kev.rrc_state = rrc_state;
+
+       dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
+           (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
+
+       ifnet_lock_exclusive(ifp);
+}
+
+errno_t
+if_state_update(struct ifnet *ifp,
+   struct if_interface_state* if_interface_state)
+{
+       u_short if_index_available = 0;
+
+       ifnet_lock_exclusive(ifp);
+
+       if ((ifp->if_type != IFT_CELLULAR) &&
+           (if_interface_state->valid_bitmask &
+           IF_INTERFACE_STATE_RRC_STATE_VALID)) {
+               ifnet_lock_done(ifp);
+               return (ENOTSUP);
+       }
+       if ((if_interface_state->valid_bitmask &
+           IF_INTERFACE_STATE_LQM_STATE_VALID) &&
+           (if_interface_state->lqm_state < IFNET_LQM_MIN ||
+           if_interface_state->lqm_state > IFNET_LQM_MAX)) {
+               ifnet_lock_done(ifp);
+               return (EINVAL);
+       }
+       if ((if_interface_state->valid_bitmask &
+           IF_INTERFACE_STATE_RRC_STATE_VALID) &&
+           if_interface_state->rrc_state !=
+           IF_INTERFACE_STATE_RRC_STATE_IDLE &&
+           if_interface_state->rrc_state !=
+           IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
+               ifnet_lock_done(ifp);
+               return (EINVAL);
+       }
+
+       if (if_interface_state->valid_bitmask &
+           IF_INTERFACE_STATE_LQM_STATE_VALID) {
+               if_lqm_update(ifp, if_interface_state->lqm_state, 1);
+       }
+       if (if_interface_state->valid_bitmask &
+           IF_INTERFACE_STATE_RRC_STATE_VALID) {
+               if_rrc_state_update(ifp, if_interface_state->rrc_state);
+       }
+       if (if_interface_state->valid_bitmask &
+           IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
+               ifp->if_interface_state.valid_bitmask |=
+                   IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
+               ifp->if_interface_state.interface_availability =
+                   if_interface_state->interface_availability;
+
+               if (ifp->if_interface_state.interface_availability ==
+                   IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
+                       if_index_available = ifp->if_index;
+               }
+       }
+       ifnet_lock_done(ifp);
+
+       /*
+        * Check if the TCP connections going on this interface should be
+        * forced to send probe packets instead of waiting for TCP timers
+        * to fire. This will be done when there is an explicit
+        * notification that the interface became available.
+        */
+       if (if_index_available > 0)
+               tcp_interface_send_probe(if_index_available);
+
+       return (0);
+}
+
+void
+if_get_state(struct ifnet *ifp,
+   struct if_interface_state* if_interface_state)
+{
+       ifnet_lock_shared(ifp);
+
+       if_interface_state->valid_bitmask = 0;
+
+       if (ifp->if_interface_state.valid_bitmask &
+           IF_INTERFACE_STATE_RRC_STATE_VALID) {
+               if_interface_state->valid_bitmask |=
+                   IF_INTERFACE_STATE_RRC_STATE_VALID;
+               if_interface_state->rrc_state =
+                   ifp->if_interface_state.rrc_state;
+       }
+       if (ifp->if_interface_state.valid_bitmask &
+           IF_INTERFACE_STATE_LQM_STATE_VALID) {
+               if_interface_state->valid_bitmask |=
+                   IF_INTERFACE_STATE_LQM_STATE_VALID;
+               if_interface_state->lqm_state =
+                   ifp->if_interface_state.lqm_state;
+       }
+       if (ifp->if_interface_state.valid_bitmask &
+           IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
+               if_interface_state->valid_bitmask |=
+                   IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
+               if_interface_state->interface_availability =
+                   ifp->if_interface_state.interface_availability;
+       }
+
+       ifnet_lock_done(ifp);
+}
+
+errno_t
+if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
+{
+       ifnet_lock_exclusive(ifp);
+       if (conn_probe > 1) {
+               ifnet_lock_done(ifp);
+               return (EINVAL);
+       }
+       if (conn_probe == 0)
+               ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
+       else
+               ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
+       ifnet_lock_done(ifp);
+
+       tcp_probe_connectivity(ifp, conn_probe);
+       return (0);
 }
 
 /* for uuid.c */
@@ -6605,7 +7034,7 @@ dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
        _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
        _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
 
-       bzero(&kev, sizeof (&kev));
+       bzero(&kev, sizeof (kev));
 
        microtime(&tv);
        kev.timestamp = tv.tv_sec;
@@ -7075,6 +7504,125 @@ try_again:
        return (flowhash);
 }
 
+int
+ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
+    uint16_t flags, uint8_t *data)
+{
+#pragma unused(flags)
+       int error = 0;
+
+       switch (family) {
+       case AF_INET:
+               if_inetdata_lock_exclusive(ifp);
+               if (IN_IFEXTRA(ifp) != NULL) {
+                       if (len == 0) {
+                               /* Allow clearing the signature */
+                               IN_IFEXTRA(ifp)->netsig_len = 0;
+                               bzero(IN_IFEXTRA(ifp)->netsig,
+                                   sizeof (IN_IFEXTRA(ifp)->netsig));
+                               if_inetdata_lock_done(ifp);
+                               break;
+                       } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
+                               error = EINVAL;
+                               if_inetdata_lock_done(ifp);
+                               break;
+                       }
+                       IN_IFEXTRA(ifp)->netsig_len = len;
+                       bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
+               } else {
+                       error = ENOMEM;
+               }
+               if_inetdata_lock_done(ifp);
+               break;
+
+       case AF_INET6:
+               if_inet6data_lock_exclusive(ifp);
+               if (IN6_IFEXTRA(ifp) != NULL) {
+                       if (len == 0) {
+                               /* Allow clearing the signature */
+                               IN6_IFEXTRA(ifp)->netsig_len = 0;
+                               bzero(IN6_IFEXTRA(ifp)->netsig,
+                                   sizeof (IN6_IFEXTRA(ifp)->netsig));
+                               if_inet6data_lock_done(ifp);
+                               break;
+                       } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
+                               error = EINVAL;
+                               if_inet6data_lock_done(ifp);
+                               break;
+                       }
+                       IN6_IFEXTRA(ifp)->netsig_len = len;
+                       bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
+               } else {
+                       error = ENOMEM;
+               }
+               if_inet6data_lock_done(ifp);
+               break;
+
+       default:
+               error = EINVAL;
+               break;
+       }
+
+       return (error);
+}
+
+int
+ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
+    uint16_t *flags, uint8_t *data)
+{
+       int error = 0;
+
+       if (ifp == NULL || len == NULL || flags == NULL || data == NULL)
+               return (EINVAL);
+
+       switch (family) {
+       case AF_INET:
+               if_inetdata_lock_shared(ifp);
+               if (IN_IFEXTRA(ifp) != NULL) {
+                       if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
+                               error = EINVAL;
+                               if_inetdata_lock_done(ifp);
+                               break;
+                       }
+                       if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
+                               bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
+                       else
+                               error = ENOENT;
+               } else {
+                       error = ENOMEM;
+               }
+               if_inetdata_lock_done(ifp);
+               break;
+
+       case AF_INET6:
+               if_inet6data_lock_shared(ifp);
+               if (IN6_IFEXTRA(ifp) != NULL) {
+                       if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
+                               error = EINVAL;
+                               if_inet6data_lock_done(ifp);
+                               break;
+                       }
+                       if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
+                               bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
+                       else
+                               error = ENOENT;
+               } else {
+                       error = ENOMEM;
+               }
+               if_inet6data_lock_done(ifp);
+               break;
+
+       default:
+               error = EINVAL;
+               break;
+       }
+
+       if (error == 0)
+               *flags = 0;
+
+       return (error);
+}
+
 static void
 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
     protocol_family_t pf)
@@ -7304,6 +7852,25 @@ sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
        return (err);
 }
 
+static int
+sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       int err;
+       
+       if (req->oldptr == USER_ADDR_NULL) {
+                       
+       }
+       if (req->newptr != USER_ADDR_NULL) {
+               return (EPERM);
+       }
+       err = SYSCTL_OUT(req, &tx_chain_len_stats,
+           sizeof(struct chain_len_stats));
+
+       return (err);
+}
+
+
 #if DEBUG
 /* Blob for sum16 verification */
 static uint8_t sumdata[] = {
@@ -7469,3 +8036,73 @@ dlil_kev_dl_code_str(u_int32_t event_code)
        }
        return ("");
 }
+
+/*
+ * Mirror the arguments of ifnet_get_local_ports_extended()
+ *  ifindex
+ *  protocol
+ *  flags
+ */
+static int
+sysctl_get_ports_used SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+       int *name = (int *)arg1;
+       int namelen = arg2;
+       int error = 0;
+       int idx;
+       protocol_family_t protocol;
+       u_int32_t flags;
+       ifnet_t ifp = NULL;
+       u_int8_t *bitfield = NULL;
+
+       if (req->newptr) {
+               error = EPERM;
+               goto done;
+       }
+       if (namelen != 3) {
+               error = ENOENT;
+               goto done;
+       }
+
+       if (req->oldptr == USER_ADDR_NULL) {
+               req->oldidx = bitstr_size(65536);
+               goto done;
+       }
+       if (req->oldlen < bitstr_size(65536)) {
+               error = ENOMEM;
+               goto done;
+       }
+       
+       idx = name[0];
+       protocol = name[1];
+       flags = name[2];
+       
+       
+       ifnet_head_lock_shared();
+       if (idx > if_index) {
+               ifnet_head_done();
+               error = ENOENT;
+               goto done;
+       }
+       ifp = ifindex2ifnet[idx];
+       ifnet_head_done();
+       
+       bitfield = _MALLOC(bitstr_size(65536), M_TEMP, M_WAITOK);
+       if (bitfield == NULL) {
+               error = ENOMEM;
+               goto done;
+       }
+       error = ifnet_get_local_ports_extended(ifp, protocol, flags, bitfield);
+       if (error != 0) {
+               printf("%s: ifnet_get_local_ports_extended() error %d\n",
+                   __func__, error);
+               goto done;
+       }
+       error = SYSCTL_OUT(req, bitfield, bitstr_size(65536));
+done:
+       if (bitfield != NULL)
+               _FREE(bitfield, M_TEMP);
+       return (error);
+}
+
index da72b75f3b1e4a6d317812ca1c4e9af0124b095b..f2fb7161fc5f6826850f771ba5b4bbf9476c4bf1 100644 (file)
@@ -103,15 +103,39 @@ enum {
                *(nsp) += ((tvp)->tv_sec * (integer_t)NSEC_PER_SEC);            \
 } while (0)
 
+#if defined(__x86_64__) || defined(__arm64__)
 #define        net_nsectimer(nsp, tvp) do {                                    \
        u_int64_t __nsp = *(nsp);                                       \
        net_timerclear(tvp);                                            \
-       while ((__nsp) >= NSEC_PER_SEC) {                               \
-               (tvp)->tv_sec++;                                        \
-               (__nsp) -= NSEC_PER_SEC;                                \
-       }                                                               \
-       (tvp)->tv_nsec = (__nsp);                                       \
+       uint64_t __sec = __nsp / NSEC_PER_SEC;                          \
+       (tvp)->tv_sec = (__darwin_time_t)__sec;                         \
+       (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC);          \
 } while (0)
+#else /* 32 bit */
+/*
+ * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
+ * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
+ * is < 2^61. We get a first approximation to convert into seconds using
+ * the following values.
+ * a = floor(NSEC / 2^29)
+ * inv = floor(2^61 / 10^9)
+ *
+ * The approximation of seconds is correct or too low by 1 unit.
+ * So we fix it by computing the remainder.
+ */
+#define        net_nsectimer(nsp, tvp) do {                                    \
+       u_int64_t __nsp = *(nsp);                                       \
+       net_timerclear(tvp);                                            \
+       uint32_t __a = (uint32_t)(__nsp >> 29);                         \
+       const uint32_t __inv = 0x89705F41;                              \
+       uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32);     \
+       uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC);      \
+       __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0);                     \
+       (tvp)->tv_sec = (__darwin_time_t)__sec;                         \
+       (tvp)->tv_nsec =                                                \
+           (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem);   \
+} while(0)
+#endif /* 32 bit */
 
 struct ifnet;
 struct mbuf;
index 3a86d2674f4bab1f6c4cf43a741c06a3c83dfb67..8fd6074f13b031c19f5bd535ba8b749b32c109df 100644 (file)
@@ -550,7 +550,7 @@ ether_frameout_extended(struct ifnet *ifp, struct mbuf **m,
         * Add local net header.  If no space in first mbuf,
         * allocate another.
         */
-       M_PREPEND(*m, sizeof (struct ether_header), M_DONTWAIT);
+       M_PREPEND(*m, sizeof (struct ether_header), M_DONTWAIT, 0);
        if (*m == NULL)
                return (EJUSTRETURN);
 
index c9bb74aa40e63108d19b19d514f899fb7872003b..d65efe3a1cca1de134c709519caba9b02b51e467 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -97,6 +97,7 @@
 #include <net/if_var.h>
 #include <net/if_ppp.h>
 #include <net/ethernet.h>
+#include <net/network_agent.h>
 
 #include <net/radix.h>
 #include <net/route.h>
@@ -150,6 +151,7 @@ static int ifioctl_ifdesc(struct ifnet *, u_long, caddr_t, struct proc *);
 static int ifioctl_linkparams(struct ifnet *, u_long, caddr_t, struct proc *);
 static int ifioctl_qstats(struct ifnet *, u_long, caddr_t);
 static int ifioctl_throttle(struct ifnet *, u_long, caddr_t, struct proc *);
+static int ifioctl_netsignature(struct ifnet *, u_long, caddr_t);
 static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space);
 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
 void if_rtproto_del(struct ifnet *ifp, int protocol);
@@ -417,7 +419,7 @@ if_next_index(void)
 
                /* allocate space for the larger arrays */
                n = (2 * new_if_indexlim + 1) * sizeof(caddr_t);
-               new_ifnet_addrs = _MALLOC(n, M_IFADDR, M_WAITOK);
+               new_ifnet_addrs = _MALLOC(n, M_IFADDR, M_WAITOK | M_ZERO);
                if (new_ifnet_addrs == NULL) {
                        --if_index;
                        return -1;
@@ -425,7 +427,6 @@ if_next_index(void)
 
                new_ifindex2ifnet = new_ifnet_addrs 
                        + new_if_indexlim * sizeof(caddr_t);
-               bzero(new_ifnet_addrs, n);
                if (ifnet_addrs != NULL) {
                        /* copy the existing data */
                        bcopy((caddr_t)ifnet_addrs, new_ifnet_addrs,
@@ -627,7 +628,6 @@ if_clone_attach(struct if_clone *ifc)
        ifc->ifc_units = _MALLOC(len, M_CLONE, M_WAITOK | M_ZERO);
        if (ifc->ifc_units == NULL)
                return ENOBUFS;
-       bzero(ifc->ifc_units, len);
        ifc->ifc_bmlen = len;
 
        LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
@@ -691,6 +691,28 @@ if_clone_list(int count, int *ret_total, user_addr_t dst)
        return (error);
 }
 
+u_int32_t
+if_functional_type(struct ifnet *ifp)
+{
+       u_int32_t ret = IFRTYPE_FUNCTIONAL_UNKNOWN;
+       if (ifp != NULL) {
+               if (ifp->if_flags & IFF_LOOPBACK) {
+                       ret = IFRTYPE_FUNCTIONAL_LOOPBACK;
+               } else if (IFNET_IS_WIFI(ifp)) {
+                       if (ifp->if_eflags & IFEF_AWDL)
+                               ret = IFRTYPE_FUNCTIONAL_WIFI_AWDL;
+                       else
+                               ret = IFRTYPE_FUNCTIONAL_WIFI_INFRA;
+               } else if (IFNET_IS_CELLULAR(ifp)) {
+                       ret = IFRTYPE_FUNCTIONAL_CELLULAR;
+               } else if (IFNET_IS_WIRED(ifp)) {
+                       ret = IFRTYPE_FUNCTIONAL_WIRED;
+               }
+       }
+
+       return ret;
+}
+
 /*
  * Similar to ifa_ifwithaddr, except that this is IPv4 specific
  * and that it matches only the local (not broadcast) address.
@@ -1698,6 +1720,173 @@ ifioctl_throttle(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p)
        return (error);
 }
 
+static int
+ifioctl_getnetagents(struct ifnet *ifp, u_int32_t *count, user_addr_t uuid_p)
+{
+       int error = 0;
+       int index = 0;
+       u_int32_t valid_netagent_count = 0;
+       *count = 0;
+       for (index = 0; index < IF_MAXAGENTS; index++) {
+               uuid_t *netagent_uuid = &(ifp->if_agentids[index]);
+               if (!uuid_is_null(*netagent_uuid)) {
+                       if (uuid_p != USER_ADDR_NULL) {
+                               if ((error = copyout(netagent_uuid,
+                                                    uuid_p + sizeof(uuid_t) * valid_netagent_count,
+                                                    sizeof(uuid_t))) != 0) {
+                                       return (error);
+                               }
+                       }
+                       valid_netagent_count++;
+               }
+       }
+       *count = valid_netagent_count;
+
+       return (0);
+}
+
+static __attribute__((noinline)) int
+ifioctl_netagent(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p)
+{
+       struct if_agentidreq *ifar = (struct if_agentidreq *)(void *)data;
+       union {
+               struct if_agentidsreq32 s32;
+               struct if_agentidsreq64 s64;
+       } u;
+       int error = 0;
+       int index = 0;
+
+       VERIFY(ifp != NULL);
+
+       switch (cmd) {
+               case SIOCAIFAGENTID: {          /* struct if_agentidreq */
+                       uuid_t *first_empty_slot = NULL;
+                       // TODO: Use priv_check_cred() instead of root check
+                       if ((error = proc_suser(p)) != 0) {
+                               break;
+                       }
+                       for (index = 0; index < IF_MAXAGENTS; index++) {
+                               uuid_t *netagent_uuid = &(ifp->if_agentids[index]);
+                               if (uuid_compare(*netagent_uuid, ifar->ifar_uuid) == 0) {
+                                       /* Already present, ignore */
+                                       break;
+                               }
+                               if (first_empty_slot == NULL &&
+                                       uuid_is_null(*netagent_uuid)) {
+                                       first_empty_slot = netagent_uuid;
+                               }
+                       }
+                       if (first_empty_slot == NULL) {
+                               error = ENOMEM; /* No empty slot for a netagent UUID, bail */
+                               break;
+                       }
+                       uuid_copy(*first_empty_slot, ifar->ifar_uuid);
+                       netagent_post_updated_interfaces(ifar->ifar_uuid);
+                       break;
+               }
+               case SIOCDIFAGENTID: {                  /* struct if_agentidreq */
+                       bool removed_agent_id = FALSE;
+                       // TODO: Use priv_check_cred() instead of root check
+                       if ((error = proc_suser(p)) != 0) {
+                               break;
+                       }
+                       for (index = 0; index < IF_MAXAGENTS; index++) {
+                               uuid_t *netagent_uuid = &(ifp->if_agentids[index]);
+                               if (uuid_compare(*netagent_uuid, ifar->ifar_uuid) == 0) {
+                                       uuid_clear(*netagent_uuid);
+                                       removed_agent_id = TRUE;
+                                       break;
+                               }
+                       }
+                       if (removed_agent_id) {
+                               netagent_post_updated_interfaces(ifar->ifar_uuid);
+                       }
+                       break;
+               }
+               case SIOCGIFAGENTIDS32: {                       /* struct if_agentidsreq32 */
+                       bcopy(data, &u.s32, sizeof(u.s32));
+                       error = ifioctl_getnetagents(ifp, &u.s32.ifar_count, u.s32.ifar_uuids);
+                       if (error == 0) {
+                               bcopy(&u.s32, data, sizeof(u.s32));
+                       }
+                       break;
+               }
+               case SIOCGIFAGENTIDS64: {                       /* struct if_agentidsreq64 */
+                       bcopy(data, &u.s64, sizeof(u.s64));
+                       error = ifioctl_getnetagents(ifp, &u.s64.ifar_count, u.s64.ifar_uuids);
+                       if (error == 0) {
+                               bcopy(&u.s64, data, sizeof(u.s64));
+                       }
+                       break;
+               }
+               default:
+                       VERIFY(0);
+                       /* NOTREACHED */
+       }
+
+       return (error);
+}
+
+void
+ifnet_clear_netagent(uuid_t netagent_uuid)
+{
+       struct ifnet *ifp = NULL;
+       int index = 0;
+       bool removed_agent_id = FALSE;
+
+       ifnet_head_lock_shared();
+
+       TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+               for (index = 0; index < IF_MAXAGENTS; index++) {
+                       uuid_t *ifp_netagent_uuid = &(ifp->if_agentids[index]);
+                       if (uuid_compare(*ifp_netagent_uuid, netagent_uuid) == 0) {
+                               uuid_clear(*ifp_netagent_uuid);
+                               removed_agent_id = TRUE;
+                       }
+               }
+       }
+
+       ifnet_head_done();
+}
+
+static __attribute__((noinline)) int
+ifioctl_netsignature(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+       struct if_nsreq *ifnsr = (struct if_nsreq *)(void *)data;
+       u_int16_t flags;
+       int error = 0;
+
+       VERIFY(ifp != NULL);
+
+       switch (cmd) {
+       case SIOCSIFNETSIGNATURE:               /* struct if_nsreq */
+               if (ifnsr->ifnsr_len > sizeof (ifnsr->ifnsr_data)) {
+                       error = EINVAL;
+                       break;
+               }
+               bcopy(&ifnsr->ifnsr_flags, &flags, sizeof (flags));
+               error = ifnet_set_netsignature(ifp, ifnsr->ifnsr_family,
+                   ifnsr->ifnsr_len, flags, ifnsr->ifnsr_data);
+               break;
+
+       case SIOCGIFNETSIGNATURE:               /* struct if_nsreq */
+               ifnsr->ifnsr_len = sizeof (ifnsr->ifnsr_data);
+               error = ifnet_get_netsignature(ifp, ifnsr->ifnsr_family,
+                   &ifnsr->ifnsr_len, &flags, ifnsr->ifnsr_data);
+               if (error == 0)
+                       bcopy(&flags, &ifnsr->ifnsr_flags, sizeof (flags));
+               else
+                       ifnsr->ifnsr_len = 0;
+               break;
+
+       default:
+               VERIFY(0);
+               /* NOTREACHED */
+       }
+
+       return (error);
+}
+
 /*
  * Interface ioctls.
  *
@@ -1732,6 +1921,11 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                error = ifioctl_ifclone(cmd, data);
                goto done;
 
+       case SIOCGIFAGENTDATA32:                /* struct netagent_req32 */
+       case SIOCGIFAGENTDATA64:                /* struct netagent_req64 */
+               error = netagent_ioctl(cmd, data);
+               goto done;
+
        case SIOCSIFDSTADDR:                    /* struct ifreq */
        case SIOCSIFADDR:                       /* struct ifreq */
        case SIOCSIFBRDADDR:                    /* struct ifreq */
@@ -1775,6 +1969,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
        case SIOCSIFBOND:                       /* struct ifreq */
        case SIOCGIFLLADDR:                     /* struct ifreq */
        case SIOCGIFTYPE:                       /* struct ifreq */
+       case SIOCGIFFUNCTIONALTYPE:             /* struct ifreq */
        case SIOCGIFPSRCADDR:                   /* struct ifreq */
        case SIOCGIFPDSTADDR:                   /* struct ifreq */
        case SIOCGIFGENERIC:                    /* struct ifreq */
@@ -1792,7 +1987,12 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
        case SIOCGIFEXPENSIVE:                  /* struct ifreq */
        case SIOCSIFEXPENSIVE:                  /* struct ifreq */
        case SIOCSIF2KCL:                       /* struct ifreq */
-       case SIOCGIF2KCL: {                     /* struct ifreq */
+       case SIOCGIF2KCL:                       /* struct ifreq */
+       case SIOCSIFINTERFACESTATE:             /* struct ifreq */
+       case SIOCGIFINTERFACESTATE:             /* struct ifreq */
+       case SIOCSIFPROBECONNECTIVITY:          /* struct ifreq */
+       case SIOCGIFPROBECONNECTIVITY:          /* struct ifreq */
+       case SIOCGSTARTDELAY: {                 /* struct ifreq */
                struct ifreq ifr;
                bcopy(data, &ifr, sizeof (ifr));
                ifr.ifr_name[IFNAMSIZ - 1] = '\0';
@@ -1881,6 +2081,22 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                ifp = ifunit(ifname);
                break;
 
+       case SIOCAIFAGENTID:                    /* struct if_agentidreq */
+       case SIOCDIFAGENTID:                    /* struct if_agentidreq */
+       case SIOCGIFAGENTIDS32:         /* struct if_agentidsreq32 */
+       case SIOCGIFAGENTIDS64:         /* struct if_agentidsreq64 */
+               bcopy(((struct if_agentidreq *)(void *)data)->ifar_name,
+                         ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+
+       case SIOCSIFNETSIGNATURE:               /* struct if_nsreq */
+       case SIOCGIFNETSIGNATURE:               /* struct if_nsreq */
+               bcopy(((struct if_nsreq *)(void *)data)->ifnsr_name,
+                         ifname, IFNAMSIZ);
+               ifp = ifunit(ifname);
+               break;
+
        default:
                /*
                 * This is a bad assumption, but the code seems to
@@ -1948,6 +2164,18 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
                error = ifioctl_throttle(ifp, cmd, data, p);
                break;
 
+       case SIOCAIFAGENTID:                    /* struct if_agentidreq */
+       case SIOCDIFAGENTID:                    /* struct if_agentidreq */
+       case SIOCGIFAGENTIDS32:         /* struct if_agentidsreq32 */
+       case SIOCGIFAGENTIDS64:         /* struct if_agentidsreq64 */
+               error = ifioctl_netagent(ifp, cmd, data, p);
+               break;
+
+       case SIOCSIFNETSIGNATURE:               /* struct if_nsreq */
+       case SIOCGIFNETSIGNATURE:               /* struct if_nsreq */
+               error = ifioctl_netsignature(ifp, cmd, data);
+               break;
+
        default:
                if (so->so_proto == NULL) {
                        error = EOPNOTSUPP;
@@ -2334,6 +2562,10 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
                ifr->ifr_type.ift_subfamily = ifp->if_subfamily;
                break;
 
+       case SIOCGIFFUNCTIONALTYPE:
+               ifr->ifr_functional_type = if_functional_type(ifp);
+               break;
+
        case SIOCGIFPSRCADDR:
        case SIOCGIFPDSTADDR:
        case SIOCGIFGENERIC:
@@ -2362,7 +2594,17 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
 
        case SIOCGIFLINKQUALITYMETRIC:
                ifnet_lock_shared(ifp);
-               ifr->ifr_link_quality_metric = ifp->if_lqm;
+               if ((ifp->if_interface_state.valid_bitmask & 
+                   IF_INTERFACE_STATE_LQM_STATE_VALID))
+                       ifr->ifr_link_quality_metric =
+                          ifp->if_interface_state.lqm_state;
+               else if ((ifp->if_refflags & IFRF_ATTACHED)) {
+                       ifr->ifr_link_quality_metric =
+                           IFNET_LQM_THRESH_UNKNOWN;
+               } else {
+                       ifr->ifr_link_quality_metric =
+                           IFNET_LQM_THRESH_OFF;
+               }
                ifnet_lock_done(ifp);
                break;
 
@@ -2438,7 +2680,19 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
                        ifp->if_eflags &= ~IFEF_2KCL;
                ifnet_lock_done(ifp);
                break;
-
+       case SIOCGSTARTDELAY:
+               ifnet_lock_shared(ifp);
+               if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
+                       ifr->ifr_start_delay_qlen =
+                           ifp->if_start_delay_qlen;
+                       ifr->ifr_start_delay_timeout =
+                           ifp->if_start_delay_timeout;
+               } else {
+                       ifr->ifr_start_delay_qlen = 0;
+                       ifr->ifr_start_delay_timeout = 0;
+               }
+               ifnet_lock_done(ifp);
+               break;
        case SIOCSIFDSTADDR:
        case SIOCSIFADDR:
        case SIOCSIFBRDADDR:
@@ -2498,6 +2752,34 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p)
                }
                break;
 
+       case SIOCGIFINTERFACESTATE:
+               if_get_state(ifp, &ifr->ifr_interface_state);
+               
+               break;
+       case SIOCSIFINTERFACESTATE:
+               if ((error = priv_check_cred(kauth_cred_get(),
+                   PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
+                       return (error);
+
+               error = if_state_update(ifp, &ifr->ifr_interface_state);
+
+               break;
+       case SIOCSIFPROBECONNECTIVITY:
+               if ((error = priv_check_cred(kauth_cred_get(),
+                   PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
+                       return (error);
+               error = if_probe_connectivity(ifp,
+                   ifr->ifr_probe_connectivity);
+               break;
+       case SIOCGIFPROBECONNECTIVITY:
+               if ((error = priv_check_cred(kauth_cred_get(),
+                   PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
+                       return (error);
+               if (ifp->if_eflags & IFEF_PROBE_CONNECTIVITY)
+                       ifr->ifr_probe_connectivity = 1;
+               else
+                       ifr->ifr_probe_connectivity = 0;
+               break;
        default:
                VERIFY(0);
                /* NOTREACHED */
@@ -4012,6 +4294,15 @@ ifioctl_cassert(void)
        case SIOCGIFDELEGATE:
        case SIOCGIFLLADDR:
        case SIOCGIFTYPE:
+       case SIOCGIFFUNCTIONALTYPE:
+       case SIOCAIFAGENTID:
+       case SIOCDIFAGENTID:
+       case SIOCGIFAGENTIDS32:
+       case SIOCGIFAGENTIDS64:
+       case SIOCGIFAGENTDATA32:
+       case SIOCGIFAGENTDATA64:
+       case SIOCSIFINTERFACESTATE:
+       case SIOCGIFINTERFACESTATE:
                ;
        }
 }
index fd7800d8ae4cff826bb3046a8ae21042b66fd349..62afa9cd44033e4c0bbcdf55f733185c2be5fdf8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #define        KEV_DL_IFDELEGATE_CHANGED               25
 #define        KEV_DL_AWDL_RESTRICTED                  26
 #define        KEV_DL_AWDL_UNRESTRICTED                27
+#define        KEV_DL_RRC_STATE_CHANGED                28
 
 #include <net/if_var.h>
 #include <sys/types.h>
@@ -158,6 +159,9 @@ struct if_clonereq32 {
 #ifdef PRIVATE
 /* extended flags definitions:  (all bits reserved for internal/future use) */
 #define        IFEF_AUTOCONFIGURING    0x00000001      /* allow BOOTP/DHCP replies to enter */
+#define        IFEF_ENQUEUE_MULTI      0x00000002      /* enqueue multiple packets at once */
+#define        IFEF_DELAY_START        0x00000004      /* delay start callback */
+#define        IFEF_PROBE_CONNECTIVITY 0x00000008      /* Probe connections going over this interface */
 #define        IFEF_IPV6_DISABLED      0x00000020      /* coupled to ND6_IFF_IFDISABLED */
 #define        IFEF_ACCEPT_RTADV       0x00000040      /* accepts IPv6 RA on the interface */
 #define        IFEF_TXSTART            0x00000080      /* has start callback */
@@ -466,9 +470,22 @@ struct     ifreq {
 #define        IFRTYPE_SUBFAMILY_THUNDERBOLT   4
 #define        IFRTYPE_SUBFAMILY_RESERVED      5
                } ifru_type;
+               u_int32_t ifru_functional_type;
+#define IFRTYPE_FUNCTIONAL_UNKNOWN     0
+#define IFRTYPE_FUNCTIONAL_LOOPBACK    1
+#define IFRTYPE_FUNCTIONAL_WIRED       2
+#define IFRTYPE_FUNCTIONAL_WIFI_INFRA  3
+#define IFRTYPE_FUNCTIONAL_WIFI_AWDL   4
+#define IFRTYPE_FUNCTIONAL_CELLULAR    5
+#define IFRTYPE_FUNCTIONAL_LAST                5
                u_int32_t ifru_expensive;
-               u_int32_t ifru_awdl_restricted;
                u_int32_t ifru_2kcl;
+               struct {
+                       u_int32_t qlen;
+                       u_int32_t timeout;
+               } ifru_start_delay;
+               struct if_interface_state       ifru_interface_state;
+               u_int32_t ifru_probe_connectivity;
 #endif /* PRIVATE */
        } ifr_ifru;
 #define        ifr_addr        ifr_ifru.ifru_addr      /* address */
@@ -505,8 +522,12 @@ struct     ifreq {
 #define        ifr_delegated   ifr_ifru.ifru_delegated /* delegated interface index */
 #define        ifr_expensive   ifr_ifru.ifru_expensive
 #define        ifr_type        ifr_ifru.ifru_type      /* interface type */
-#define        ifr_awdl_restricted ifr_ifru.ifru_awdl_restricted
+#define        ifr_functional_type     ifr_ifru.ifru_functional_type
 #define        ifr_2kcl        ifr_ifru.ifru_2kcl
+#define        ifr_start_delay_qlen    ifr_ifru.ifru_start_delay.qlen
+#define        ifr_start_delay_timeout ifr_ifru.ifru_start_delay.timeout
+#define ifr_interface_state    ifr_ifru.ifru_interface_state
+#define        ifr_probe_connectivity  ifr_ifru.ifru_probe_connectivity
 #endif /* PRIVATE */
 };
 
@@ -818,6 +839,36 @@ enum {
 #endif /* XNU_KERNEL_PRIVATE */
 };
 
+/*
+ * Structure for SIOC[A/D]IFAGENTID
+ */
+struct if_agentidreq {
+       char            ifar_name[IFNAMSIZ];    /* interface name */
+       uuid_t          ifar_uuid;              /* agent UUID to add or delete */
+};
+
+/*
+ * Structure for SIOCGIFAGENTIDS
+ */
+struct if_agentidsreq {
+       char            ifar_name[IFNAMSIZ];    /* interface name */
+       u_int32_t       ifar_count;             /* number of agent UUIDs */
+       uuid_t          *ifar_uuids;            /* array of agent UUIDs */
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+struct if_agentidsreq32 {
+       char            ifar_name[IFNAMSIZ];
+       u_int32_t       ifar_count;
+       user32_addr_t ifar_uuids;
+};
+struct if_agentidsreq64 {
+       char            ifar_name[IFNAMSIZ];
+       u_int32_t       ifar_count;
+       user64_addr_t ifar_uuids __attribute__((aligned(8)));
+};
+#endif /* BSD_KERNEL_PRIVATE */
+
 #define        DLIL_MODIDLEN   20      /* same as IFNET_MODIDLEN */
 #define        DLIL_MODARGLEN  12      /* same as IFNET_MODARGLEN */
 
@@ -830,6 +881,30 @@ struct kev_dl_issues {
        u_int64_t               timestamp;
        u_int8_t                info[DLIL_MODARGLEN];
 };
+
+/*
+ * DLIL KEV_DL_RRC_STATE_CHANGED structure
+ */
+struct kev_dl_rrc_state {
+       struct net_event_data   link_data;
+       u_int32_t               rrc_state;
+};
+
+/*
+ * Length of network signature/fingerprint blob.
+ */
+#define        IFNET_SIGNATURELEN      20
+
+/*
+ * Structure for SIOC[S/G]IFNETSIGNATURE
+ */
+struct if_nsreq {
+       char            ifnsr_name[IFNAMSIZ];
+       u_int8_t        ifnsr_family;   /* address family */
+       u_int8_t        ifnsr_len;      /* data length */
+       u_int16_t       ifnsr_flags;    /* for future */
+       u_int8_t        ifnsr_data[IFNET_SIGNATURELEN];
+};
 #endif /* PRIVATE */
 
 #ifdef KERNEL
index 2bb5113f1e817150fa980046bb17acb3a5861e59..34f6e03d4ffd025e8f2460e5dd2b38086536a506 100644 (file)
@@ -801,6 +801,10 @@ link_speed(int active)
     case IFM_10G_SR:
     case IFM_10G_LR:
        return (10000);
+    case IFM_2500_T:
+       return (2500);
+    case IFM_5000_T:
+       return (5000);
     }
 }
 
@@ -866,11 +870,10 @@ bond_globals_create(lacp_system_priority sys_pri,
 {
     bond_globals_ref   b;
 
-    b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK);
+    b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK | M_ZERO);
     if (b == NULL) {
        return (NULL);
     }
-    bzero(b, sizeof(*b));
     TAILQ_INIT(&b->ifbond_list);
     b->system = *sys;
     b->system_priority = sys_pri;
@@ -1089,11 +1092,10 @@ bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
                return (error);
        }
        
-       ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK);
+       ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK | M_ZERO);
        if (ifb == NULL) {
                return (ENOMEM);
        }
-       bzero(ifb, sizeof(*ifb));
        
        ifbond_retain(ifb);
        TAILQ_INIT(&ifb->ifb_port_list);
@@ -1818,12 +1820,11 @@ bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
     lacp_actor_partner_state   s;
 
     *ret_error = 0;
-    p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK);
+    p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK | M_ZERO);
     if (p == NULL) {
        *ret_error = ENOMEM;
        return (NULL);
     }
-    bzero(p, sizeof(*p));
     multicast_list_init(&p->po_multicast);
     if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
                         ifnet_name(port_ifp), ifnet_unit(port_ifp)) 
index 98fff2803da508cb9b4eb7dd7fa9d3c9cf0e4d5f..9a2b34f3db82c3a0923fdb3b6b1bc07f371a8264 100644 (file)
@@ -5414,7 +5414,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
                if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
 
                        /* put the Ethernet header back on */
-                       M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+                       M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
                        if (*mp == NULL)
                                return (error);
                        bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -5545,13 +5545,13 @@ ipfwpass:
         * Finally, put everything back the way it was and return
         */
        if (snap) {
-               M_PREPEND(*mp, sizeof (struct llc), M_DONTWAIT);
+               M_PREPEND(*mp, sizeof (struct llc), M_DONTWAIT, 0);
                if (*mp == NULL)
                        return (error);
                bcopy(&llc1, mtod(*mp, caddr_t), sizeof (struct llc));
        }
 
-       M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+       M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
        if (*mp == NULL)
                return (error);
        bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -5748,7 +5748,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
        for (m0 = m; m0; m0 = m0->m_nextpkt) {
                if (error == 0) {
                        if (snap) {
-                               M_PREPEND(m0, sizeof (struct llc), M_DONTWAIT);
+                               M_PREPEND(m0, sizeof (struct llc), M_DONTWAIT, 0);
                                if (m0 == NULL) {
                                        error = ENOBUFS;
                                        continue;
@@ -5756,7 +5756,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
                                bcopy(llc, mtod(m0, caddr_t),
                                    sizeof (struct llc));
                        }
-                       M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+                       M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT, 0);
                        if (m0 == NULL) {
                                error = ENOBUFS;
                                continue;
index c33ef2f040c25677da263270110fc062633528e2..f144822baf298932cccea35f23cf969ba770460d 100644 (file)
@@ -330,14 +330,13 @@ gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
                goto done;
        }
 
-       sc = _MALLOC(sizeof (struct gif_softc), M_DEVBUF, M_WAITOK);
+       sc = _MALLOC(sizeof (struct gif_softc), M_DEVBUF, M_WAITOK | M_ZERO);
        if (sc == NULL) {
                log(LOG_ERR, "gif_clone_create: failed to allocate gif%d\n",
                    unit);
                error = ENOBUFS;
                goto done;
        }
-       bzero(sc, sizeof (struct gif_softc));
 
        /* use the interface name as the unique id for ifp recycle */
        snprintf(sc->gif_ifname, sizeof (sc->gif_ifname), "%s%d",
index 460fa731c1e4e2eb369c80d1daca296e3bae64e0..f8953609d10fbd13f63190b08eb262ca6148b15e 100644 (file)
@@ -655,7 +655,12 @@ ipsec_output(ifnet_t       interface,
             ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
             bzero(&ipsec_state.ro, sizeof(ipsec_state.ro));
                        
-                       error = ipsec4_interface_output(&ipsec_state, interface);
+            error = ipsec4_interface_output(&ipsec_state, interface);
+            /* Tunneled in IPv6 - packet is gone */
+            if (error == 0 && ipsec_state.tunneled == 6) {
+                goto done;
+            }
+
             data = ipsec_state.m;
             if (error || data == NULL) {
                 printf("ipsec_output: ipsec4_output error %d.\n", error);
@@ -708,6 +713,11 @@ ipsec_output(ifnet_t       interface,
             bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
             
             data = ipsec6_splithdr(data);
+                       if (data == NULL) {
+                               printf("ipsec_output: ipsec6_splithdr returned NULL\n");
+                               goto ipsec_output_err;
+                       }
+
             ip6 = mtod(data, struct ip6_hdr *);
                        
             bzero(&ipsec_state, sizeof(ipsec_state));
@@ -900,8 +910,12 @@ ipsec_proto_input(ifnet_t interface,
        mbuf_pkthdr_setrcvif(m, interface);
        bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
        
-       if (proto_input(protocol, m) != 0)
+       if (proto_input(protocol, m) != 0) {
+               ifnet_stat_increment_in(interface, 0, 0, 1);
                m_freem(m);
+       } else {
+               ifnet_stat_increment_in(interface, 1, m->m_pkthdr.len, 0);
+       }
        
        return 0;
 }
@@ -966,7 +980,7 @@ ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
                        if (family == AF_INET) {
                                struct ip *ip = mtod(packet, struct ip *);
                                packet->m_pkthdr.pkt_proto = ip->ip_p;
-                       } else if (family == AF_INET) {
+                       } else if (family == AF_INET6) {
                                struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
                                packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
                        }
index e2ce299a81d40f2fe8863df30121624cc63baabb..4b1f5ff3a3d36befb8d99285eeaa4f680214d375 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2011-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -76,7 +76,7 @@ struct if_llreach_info {
 #endif /* !INET6 */
 
 /*
- * Per-interface link-layer reachability.  (Currently only for ARP/Ethernet.)
+ * Per-interface link-layer reachability.  (Currently only for ARP/NDP/Ethernet.)
  */
 #define        IF_LLREACH_MAXLEN       ETHER_ADDR_LEN
 
index f659d3582f95f8d74b3e9e5589179f87270a7e23..a22c68cf03a05b157172d7ae0e1ef6de7420365d 100644 (file)
@@ -223,7 +223,7 @@ lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest,
 #pragma unused(ifp, dest, dest_linkaddr)
        struct loopback_header  *header;
 
-       M_PREPEND(*m, sizeof (struct loopback_header), M_WAITOK);
+       M_PREPEND(*m, sizeof (struct loopback_header), M_WAITOK, 1);
        if (*m == NULL) {
                /* Tell caller not to try to free passed-in mbuf */
                return (EJUSTRETURN);
index afba92a4302b551769c1b1568db7317d78340d84..633eb62cd3062e3d34a8af6e01089686092b9737 100644 (file)
 #define        IFM_10G_LR      19              /* 10GbaseLR - single-mode fiber */
 #define        IFM_10G_CX4     20              /* 10GbaseCX4 - copper */
 #define        IFM_10G_T       21              /* 10GbaseT - 4 pair cat 6 */
+#define        IFM_2500_T      22              /* 2500baseT - 4 pair cat 5 */
+#define        IFM_5000_T      23              /* 5000baseT - 4 pair cat 5 */
 
 /*
  * Token ring
@@ -254,6 +256,8 @@ struct ifmedia_description {
     { IFM_10G_LR,   "10GbaseLR"   },                \
     { IFM_10G_CX4,  "10GbaseCX4"  },                \
     { IFM_10G_T,    "10GbaseT"    },                \
+    { IFM_2500_T,   "2500baseT"   },                \
+    { IFM_5000_T,   "5000baseT"   },                \
     { 0, NULL },                                    \
 }
 
index bf29b91f643584c5c65ccf7f0835e6524dfb6237..72abbef09be45b743c8a20b28493cb8c28cfbeb0 100644 (file)
@@ -321,14 +321,12 @@ stfattach(void)
        if (error != 0)
                printf("proto_register_plumber failed for AF_INET6 error=%d\n", error);
 
-       sc = _MALLOC(sizeof(struct stf_softc), M_DEVBUF, M_WAITOK);
+       sc = _MALLOC(sizeof(struct stf_softc), M_DEVBUF, M_WAITOK | M_ZERO);
        if (sc == 0) {
                printf("stf softc attach failed\n" );
                return;
        }
        
-       bzero(sc, sizeof(*sc));
-       
        p = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck,
            &in_stf_protosw, sc);
        if (p == NULL) {
@@ -579,7 +577,7 @@ stf_pre_output(
                bpf_tap_out(ifp, 0, m, &af, sizeof(af));
        }
 
-       M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+       M_PREPEND(m, sizeof(struct ip), M_DONTWAIT, 1);
        if (m && mbuf_len(m) < sizeof(struct ip))
                m = m_pullup(m, sizeof(struct ip));
        if (m == NULL) {
@@ -600,7 +598,7 @@ stf_pre_output(
        ip->ip_ttl = ip_stf_ttl;
        ip->ip_len = m->m_pkthdr.len;   /*host order*/
        if (ifp->if_flags & IFF_LINK1)
-               ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
+               ip_ecn_ingress(ECN_NORMAL, &ip->ip_tos, &tos);
        else
                ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
@@ -796,7 +794,7 @@ in_stf_input(
 
        itos = (ntohl(ip6.ip6_flow) >> 20) & 0xff;
        if ((ifnet_flags(ifp) & IFF_LINK1) != 0)
-               ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
+               ip_ecn_egress(ECN_NORMAL, &otos, &itos);
        else
                ip_ecn_egress(ECN_NOCARE, &otos, &itos);
        ip6.ip6_flow &= ~htonl(0xff << 20);
index 5570c85784663dafc0e9ec8bb0aa5014939f4e7b..4261be9684da6e84be96b22a0ab17767bea3a2d7 100644 (file)
@@ -202,7 +202,6 @@ utun_ctl_connect(
        *unitinfo = pcb;
        pcb->utun_ctlref = kctlref;
        pcb->utun_unit = sac->sc_unit;
-       pcb->utun_pending_packets = 0;
        pcb->utun_max_pending_packets = 1;
        
        printf("utun_ctl_connect: creating interface utun%d\n", pcb->utun_unit - 1);
@@ -655,20 +654,25 @@ utun_ctl_getopt(
 static void
 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
 {
-#pragma unused(kctlref, unit, flags)
+#pragma unused(flags)
        bool reenable_output = false;
        struct utun_pcb *pcb = unitinfo;
        if (pcb == NULL) {
                return;
        }
        ifnet_lock_exclusive(pcb->utun_ifp);
-       if (pcb->utun_pending_packets > 0) {
-               pcb->utun_pending_packets--;
-               if (pcb->utun_pending_packets < pcb->utun_max_pending_packets) {
-                       reenable_output = true;
-               }
+
+       u_int32_t utun_packet_cnt;
+       errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
+       if (error_pc != 0) {
+               printf("utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
+               utun_packet_cnt = 0;
        }
-       
+
+       if (utun_packet_cnt < pcb->utun_max_pending_packets) {
+               reenable_output = true;
+       }
+
        if (reenable_output) {
                errno_t error = ifnet_enable_output(pcb->utun_ifp);
                if (error != 0) {
@@ -687,7 +691,15 @@ utun_start(ifnet_t interface)
        for (;;) {
                bool can_accept_packets = true;
                ifnet_lock_shared(pcb->utun_ifp);
-               can_accept_packets = (pcb->utun_pending_packets < pcb->utun_max_pending_packets);
+
+               u_int32_t utun_packet_cnt;
+               errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
+               if (error_pc != 0) {
+                       printf("utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
+                       utun_packet_cnt = 0;
+               }
+
+               can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
                if (!can_accept_packets && pcb->utun_ctlref) {
                        u_int32_t difference = 0;
                        if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
@@ -750,16 +762,8 @@ utun_output(
                        *(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
 
                length = mbuf_pkthdr_len(data);
-               // Increment packet count optimistically
-               ifnet_lock_exclusive(pcb->utun_ifp);
-               pcb->utun_pending_packets++;
-               ifnet_lock_done(pcb->utun_ifp);
                result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
                if (result != 0) {
-                       // Decrement packet count if errored
-                       ifnet_lock_exclusive(pcb->utun_ifp);
-                       pcb->utun_pending_packets--;
-                       ifnet_lock_done(pcb->utun_ifp);
                        mbuf_freem(data);
                        printf("utun_output - ctl_enqueuembuf failed: %d\n", result);
 
index cc74d87b375e019decc9db63d7a477494406851b..2ffd72ee195719af7b3869d8c9676a2ff91e85a7 100644 (file)
@@ -46,7 +46,6 @@ struct utun_pcb {
        u_int32_t               utun_flags;
        int                             utun_ext_ifdata_stats;
        u_int32_t               utun_max_pending_packets;
-       u_int32_t               utun_pending_packets;
        utun_crypto_ctx_t utun_crypto_ctx[UTUN_CRYPTO_CTX_NUM_DIRS];
 };
 
index 966447af2bed12280db6a3d2e7dfb3d4d85186f2..3565c4feb4c45a8e863f6916909b1e4e215cccb6 100644 (file)
@@ -910,7 +910,7 @@ utun_ctl_stop_datatraffic_crypto_dtls(struct utun_pcb   *pcb)
                *(protocol_family_t *)mbuf_data(*pkt) = htonl(*(protocol_family_t *)mbuf_data(*pkt));           \
                rc = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, *pkt, CTL_DATA_EOR);                     \
                if (rc != 0) {                                                                                  \
-                       printf("%s: - ctl_enqueuembuf failed (rc %d) for %s:\n", __FUNCTION__, rc, (char *)errstr); \
+                       printf("%s: - ctl_enqueuembuf failed (rc %d) for %s:\n", __FUNCTION__, rc, errstr); \
                        mbuf_freem(*pkt);                                                                           \
                        ifnet_stat_increment_out(pcb->utun_ifp, 0, 0, 1);                                           \
                        lck_mtx_unlock(&dtls_ctl_mutex);                                                            \
@@ -965,7 +965,7 @@ utun_pkt_dtls_output(struct utun_pcb *pcb, mbuf_t *pkt)
                } else if (rc == EINVAL) {
                        // unsupported proto... fall through and punt (but 1st undo the protocol strip)
                        utun_pkt_dtls_prepend_proto(pkt, proto);
-                       utun_pkt_dtls_puntup(pcb, pkt, (char *)"unsupported proto", rc);
+                       utun_pkt_dtls_puntup(pcb, pkt, "unsupported proto", rc);
                } else {
                        // mbuf_prepend failure... mbuf will be already freed
                        printf("%s: failed to encrypsulate and send pkt %d\n", __FUNCTION__,rc);
@@ -974,7 +974,7 @@ utun_pkt_dtls_output(struct utun_pcb *pcb, mbuf_t *pkt)
                        return 0; // and drop packet
                }
        } else {
-               utun_pkt_dtls_puntup(pcb, pkt, (char *)"slowpath", rc);
+               utun_pkt_dtls_puntup(pcb, pkt, "slowpath", rc);
        }
 
        if (!rc)
index efa21e8b88fbe5c84ec44d87a8ffb8360f46ad35..29e253bc4f3c9f2c5a6bca799d8cb026dc70633e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -326,6 +326,204 @@ struct if_rxpoll_stats {
        u_int32_t       ifi_poll_packets_limit; /* max packets per poll call */
        u_int64_t       ifi_poll_interval_time; /* poll interval (nsec) */
 };
+
+/*
+ * Interface link status report -- includes statistics related to
+ * the link layer technology sent by the driver. The driver will monitor
+ * these statistics over an interval (3-4 secs) and will generate a report
+ * to the network stack. This will give first-hand information about the
+ * status of the first hop of the network path. The version and
+ * length values should be correct for the data to be processed correctly.
+ * The definitions are different for different kind of interfaces like
+ * Wifi, Cellular etc,.
+ */
+#define IF_CELLULAR_STATUS_REPORT_VERSION_1    1
+#define IF_WIFI_STATUS_REPORT_VERSION_1                1
+#define IF_CELLULAR_STATUS_REPORT_CURRENT_VERSION      \
+                                       IF_CELLULAR_STATUS_REPORT_VERSION_1
+#define IF_WIFI_STATUS_REPORT_CURRENT_VERSION  IF_WIFI_STATUS_REPORT_VERSION_1
+/*
+ * For cellular interface --
+ * There is no way to share common headers between the Baseband and
+ * the kernel. Any changes to this structure will need to be communicated
+ * to the Baseband team. It is better to use reserved space instead of
+ * changing the size or existing fields in the structure.
+ */
+struct if_cellular_status_v1 {
+       u_int32_t valid_bitmask; /* indicates which fields are valid */
+#define IF_CELL_LINK_QUALITY_METRIC_VALID      0x1
+#define IF_CELL_UL_EFFECTIVE_BANDWIDTH_VALID   0x2
+#define IF_CELL_UL_MAX_BANDWIDTH_VALID         0x4
+#define IF_CELL_UL_MIN_LATENCY_VALID           0x8
+#define IF_CELL_UL_EFFECTIVE_LATENCY_VALID     0x10
+#define IF_CELL_UL_MAX_LATENCY_VALID           0x20
+#define IF_CELL_UL_RETXT_LEVEL_VALID           0x40
+#define IF_CELL_UL_BYTES_LOST_VALID            0x80
+#define IF_CELL_UL_MIN_QUEUE_SIZE_VALID                0x100
+#define IF_CELL_UL_AVG_QUEUE_SIZE_VALID                0x200
+#define IF_CELL_UL_MAX_QUEUE_SIZE_VALID                0x400
+#define IF_CELL_DL_EFFECTIVE_BANDWIDTH_VALID   0x800
+#define IF_CELL_DL_MAX_BANDWIDTH_VALID         0x1000
+#define IF_CELL_CONFIG_INACTIVITY_TIME_VALID   0x2000
+#define IF_CELL_CONFIG_BACKOFF_TIME_VALID      0x4000
+       u_int32_t link_quality_metric;
+       u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on current activity (bps) */
+       u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth (bps) */
+       u_int32_t ul_min_latency; /* min expected uplink latency for first hop (ms) */
+       u_int32_t ul_effective_latency; /* current expected uplink latency for first hop (ms) */
+       u_int32_t ul_max_latency; /* max expected uplink latency first hop (ms) */
+       u_int32_t ul_retxt_level; /* Retransmission metric */
+#define IF_CELL_UL_RETXT_LEVEL_NONE    1
+#define IF_CELL_UL_RETXT_LEVEL_LOW     2
+#define IF_CELL_UL_RETXT_LEVEL_MEDIUM  3
+#define IF_CELL_UL_RETXT_LEVEL_HIGH    4
+       u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10 format */
+       u_int32_t ul_min_queue_size; /* minimum bytes in queue */
+       u_int32_t ul_avg_queue_size; /* average bytes in queue */
+       u_int32_t ul_max_queue_size; /* maximum bytes in queue */
+       u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based on current activity (bps) */
+       u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth (bps) */
+       u_int32_t config_inactivity_time; /* ms */
+       u_int32_t config_backoff_time; /* new connections backoff time in ms */
+       u_int64_t reserved_1;
+       u_int64_t reserved_2;
+       u_int64_t reserved_3;
+       u_int64_t reserved_4;
+       u_int64_t reserved_5;
+} __attribute__((packed));
+
+struct if_cellular_status {
+       union {
+               struct if_cellular_status_v1 if_status_v1;
+       } if_cell_u;
+};
+
+/*
+ * These statistics will be provided by the Wifi driver periodically.
+ * After sending each report, the driver should start computing again
+ * for the next report duration so that the values represent the link
+ * status for one report duration.
+ */
+
+struct if_wifi_status_v1 {
+       u_int32_t valid_bitmask;
+#define IF_WIFI_LINK_QUALITY_METRIC_VALID      0x1
+#define IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID   0x2
+#define IF_WIFI_UL_MAX_BANDWIDTH_VALID         0x4
+#define IF_WIFI_UL_MIN_LATENCY_VALID           0x8
+#define IF_WIFI_UL_EFFECTIVE_LATENCY_VALID     0x10
+#define IF_WIFI_UL_MAX_LATENCY_VALID           0x20
+#define IF_WIFI_UL_RETXT_LEVEL_VALID           0x40
+#define IF_WIFI_UL_ERROR_RATE_VALID            0x80
+#define IF_WIFI_UL_BYTES_LOST_VALID            0x100
+#define IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID   0x200
+#define IF_WIFI_DL_MAX_BANDWIDTH_VALID         0x400
+#define IF_WIFI_DL_MIN_LATENCY_VALID           0x800
+#define IF_WIFI_DL_EFFECTIVE_LATENCY_VALID     0x1000
+#define IF_WIFI_DL_MAX_LATENCY_VALID           0x2000
+#define IF_WIFI_DL_ERROR_RATE_VALID            0x4000
+#define IF_WIFI_CONFIG_FREQUENCY_VALID         0x8000
+#define IF_WIFI_CONFIG_MULTICAST_RATE_VALID    0x10000
+#define IF_WIFI_CONFIG_SCAN_COUNT_VALID                0x20000
+#define IF_WIFI_CONFIG_SCAN_DURATION_VALID     0x40000
+       u_int32_t link_quality_metric; /* link quality metric */
+       u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on current activity (bps) */
+       u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth (bps) */
+       u_int32_t ul_min_latency; /* min expected uplink latency for first hop (ms) */
+       u_int32_t ul_effective_latency; /* current expected uplink latency for first hop (ms) */
+       u_int32_t ul_max_latency; /* max expected uplink latency for first hop (ms) */
+       u_int32_t ul_retxt_level; /* Retransmission metric */
+#define IF_WIFI_UL_RETXT_LEVEL_NONE    1
+#define IF_WIFI_UL_RETXT_LEVEL_LOW     2
+#define IF_WIFI_UL_RETXT_LEVEL_MEDIUM  3
+#define IF_WIFI_UL_RETXT_LEVEL_HIGH    4
+       u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10 format */
+       u_int32_t ul_error_rate; /* % of bytes dropped on uplink after many retransmissions in Q10 format */
+       u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based on current activity (bps) */
+       u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth (bps) */
+       /*
+        * The download latency values indicate the time AP may have to wait for the
+        * driver to receive the packet. These values give the range of expected latency
+        * mainly due to co-existence events and channel hopping where the interface
+        * becomes unavailable.
+        */
+       u_int32_t dl_min_latency; /* min expected latency for first hop in ms */
+       u_int32_t dl_effective_latency; /* current expected latency for first hop in ms */
+       u_int32_t dl_max_latency; /* max expected latency for first hop in ms */
+       u_int32_t dl_error_rate; /* % of CRC or other errors in Q10 format */
+       u_int32_t config_frequency; /* 2.4 or 5 GHz */
+#define IF_WIFI_CONFIG_FREQUENCY_2_4_GHZ       1
+#define IF_WIFI_CONFIG_FREQUENCY_5_0_GHZ       2
+       u_int32_t config_multicast_rate; /* bps */
+       u_int32_t scan_count; /* scan count during the previous period */
+       u_int32_t scan_duration; /* scan duration in ms */
+       u_int64_t reserved_1;
+       u_int64_t reserved_2;
+       u_int64_t reserved_3;
+       u_int64_t reserved_4;
+} __attribute__((packed));
+
+struct if_wifi_status {
+       union {
+               struct if_wifi_status_v1 if_status_v1;
+       } if_wifi_u;
+};
+
+struct if_link_status {
+       u_int32_t       ifsr_version;   /* version of this report */
+       u_int32_t       ifsr_len;       /* length of the following struct */
+       union {
+               struct if_cellular_status ifsr_cell;
+               struct if_wifi_status ifsr_wifi;
+       } ifsr_u;
+};
+
+struct if_interface_state {
+       /*
+        * The bitmask tells which of the fields
+        * to consider:
+        * - When setting, to control which fields
+        *   are being modified;
+        * - When getting, it tells which fields are set.
+        */
+       u_int8_t valid_bitmask;
+#define        IF_INTERFACE_STATE_RRC_STATE_VALID              0x1
+#define        IF_INTERFACE_STATE_LQM_STATE_VALID              0x2
+#define        IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID 0x4
+
+       /*
+        * Valid only for cellular interface
+        */
+       u_int8_t rrc_state;
+#define        IF_INTERFACE_STATE_RRC_STATE_IDLE       0x0
+#define        IF_INTERFACE_STATE_RRC_STATE_CONNECTED  0x1
+
+       /*
+        * Values normalized to the edge of the following values
+        * that are defined on <net/if.h>:
+        *  IFNET_LQM_THRESH_BAD
+        *  IFNET_LQM_THRESH_POOR
+        *  IFNET_LQM_THRESH_GOOD
+        */
+       int8_t lqm_state;
+
+       /*
+        * Indicate if the underlying link is currently
+        * available 
+        */
+       u_int8_t interface_availability;
+#define        IF_INTERFACE_STATE_INTERFACE_AVAILABLE          0x0
+#define        IF_INTERFACE_STATE_INTERFACE_UNAVAILABLE        0x1
+};
+
+struct chain_len_stats {
+       uint64_t        cls_one;
+       uint64_t        cls_two;
+       uint64_t        cls_three;
+       uint64_t        cls_four;
+       uint64_t        cls_five_or_more;
+};
+
 #endif /* PRIVATE */
 
 #pragma pack()
@@ -362,7 +560,7 @@ struct if_data_internal {
        u_int32_t       ifi_mtu;        /* maximum transmission unit */
        u_int32_t       ifi_metric;     /* routing metric (external only) */
        u_int32_t       ifi_baudrate;   /* linespeed */
-       u_int32_t       _pad;
+       u_int32_t       ifi_preamblelen;/* length of the packet preamble */
        /* volatile statistics */
        u_int64_t       ifi_ipackets;   /* packets received on interface */
        u_int64_t       ifi_ierrors;    /* input errors on interface */
@@ -414,6 +612,7 @@ struct if_measured_bw {
 #define if_physical    if_data.ifi_physical
 #define        if_addrlen      if_data.ifi_addrlen
 #define        if_hdrlen       if_data.ifi_hdrlen
+#define        if_preamblelen  if_data.ifi_preamblelen
 #define        if_metric       if_data.ifi_metric
 #define        if_baudrate     if_data.ifi_baudrate
 #define        if_hwassist     if_data.ifi_hwassist
@@ -589,7 +788,13 @@ struct ifnet {
        decl_lck_mtx_data(, if_start_lock);
        u_int32_t               if_start_flags; /* see IFSF flags below */
        u_int32_t               if_start_req;
-       u_int32_t               if_start_active; /* output is active */
+       u_int16_t               if_start_active; /* output is active */
+       u_int16_t               if_start_delayed;
+       u_int16_t               if_start_delay_qlen;
+       u_int16_t               if_start_delay_idle;
+       u_int64_t               if_start_delay_swin;
+       u_int32_t               if_start_delay_cnt;
+       u_int32_t               if_start_delay_timeout; /* nanoseconds */
        struct timespec         if_start_cycle;  /* restart interval */
        struct thread           *if_start_thread;
 
@@ -664,7 +869,6 @@ struct ifnet {
        struct mld_ifinfo       *if_mli;        /* for MLDv2 */
 #endif /* INET6 */
 
-       int                     if_lqm;         /* link quality metric */
 #if MEASURE_BW
        struct if_measured_bw   if_bw;
 #endif /* MEASURE_BW */
@@ -686,13 +890,24 @@ struct ifnet {
                uint32_t        expensive:1;    /* delegated i/f expensive? */
        } if_delegated;
 
+#define        IF_MAXAGENTS    8
+       uuid_t                  if_agentids[IF_MAXAGENTS];
+
        u_int64_t               if_data_threshold;
        u_int32_t               if_fg_sendts;   /* last send on a fg socket in seconds */
+       u_int32_t               if_rt_sendts;   /* last of a real time packet */
 
+#if INET
+       decl_lck_rw_data(, if_inetdata_lock);
+       void                    *if_inetdata;
+#endif /* INET */
 #if INET6
        decl_lck_rw_data(, if_inet6data_lock);
        void                    *if_inet6data;
 #endif
+       decl_lck_rw_data(, if_link_status_lock);
+       struct if_link_status   *if_link_status;
+       struct if_interface_state       if_interface_state;
 };
 
 #define        IF_TCP_STATINC(_ifp, _s) do {                                   \
@@ -835,6 +1050,7 @@ struct ifaddr {
        void (*ifa_detached)(struct ifaddr *); /* callback fn for detaching */
 };
 
+
 /*
  * Valid values for ifa_flags
  */
@@ -1064,7 +1280,10 @@ extern struct if_clone *if_clone_lookup(const char *, u_int32_t *);
 extern int if_clone_attach(struct if_clone *);
 extern void if_clone_detach(struct if_clone *);
 
+extern u_int32_t if_functional_type(struct ifnet *);
+
 extern errno_t if_mcasts_update(struct ifnet *);
+extern int32_t total_snd_byte_count;
 
 typedef enum {
        IFNET_LCK_ASSERT_EXCLUSIVE,     /* RW: held as writer */
@@ -1081,6 +1300,12 @@ __private_extern__ void ifnet_lock_shared(struct ifnet *ifp);
 __private_extern__ void ifnet_lock_exclusive(struct ifnet *ifp);
 __private_extern__ void ifnet_lock_done(struct ifnet *ifp);
 
+#if INET
+__private_extern__ void if_inetdata_lock_shared(struct ifnet *ifp);
+__private_extern__ void if_inetdata_lock_exclusive(struct ifnet *ifp);
+__private_extern__ void if_inetdata_lock_done(struct ifnet *ifp);
+#endif
+
 #if INET6
 __private_extern__ void if_inet6data_lock_shared(struct ifnet *ifp);
 __private_extern__ void if_inet6data_lock_exclusive(struct ifnet *ifp);
@@ -1176,7 +1401,13 @@ __private_extern__ struct rtentry *ifnet_cached_rtlookup_inet6(struct ifnet *,
     struct in6_addr *);
 #endif /* INET6 */
 
-__private_extern__ void if_lqm_update(struct ifnet *, int32_t);
+__private_extern__ errno_t if_state_update(struct ifnet *,
+    struct if_interface_state *);
+__private_extern__ void if_get_state(struct ifnet *,
+    struct if_interface_state *);
+__private_extern__ errno_t if_probe_connectivity(struct ifnet *ifp,
+    u_int32_t conn_probe);
+__private_extern__ void if_lqm_update(struct ifnet *, int32_t, int);
 __private_extern__ void ifnet_update_sndq(struct ifclassq *, cqev_t);
 __private_extern__ void ifnet_update_rcv(struct ifnet *, cqev_t);
 
@@ -1194,6 +1425,13 @@ __private_extern__ errno_t ifnet_set_input_latencies(struct ifnet *,
 __private_extern__ errno_t ifnet_set_output_latencies(struct ifnet *,
     struct if_latencies *, boolean_t);
 
+__private_extern__ void ifnet_clear_netagent(uuid_t);
+
+__private_extern__ int ifnet_set_netsignature(struct ifnet *, uint8_t,
+    uint8_t, uint16_t, uint8_t *);
+__private_extern__ int ifnet_get_netsignature(struct ifnet *, uint8_t,
+    uint8_t *, uint16_t *, uint8_t *);
+
 __private_extern__ errno_t ifnet_framer_stub(struct ifnet *, struct mbuf **,
     const struct sockaddr *, const char *, const char *, u_int32_t *,
     u_int32_t *);
index 1a45bf4d892fac9133cb5f6e1c43f02769e0d610..ded46afbf2a1cd0f2e9eb43ed43a79465418d941 100644 (file)
@@ -852,11 +852,10 @@ vlan_parent_create(struct ifnet * p, vlan_parent_ref * ret_vlp)
     vlan_parent_ref    vlp;
 
     *ret_vlp = NULL;
-    vlp = _MALLOC(sizeof(*vlp), M_VLAN, M_WAITOK);
+    vlp = _MALLOC(sizeof(*vlp), M_VLAN, M_WAITOK | M_ZERO);
     if (vlp == NULL) {
        return (ENOMEM);
     }
-    bzero(vlp, sizeof(*vlp));
     error = siocgifdevmtu(p, &vlp->vlp_devmtu);
     if (error != 0) {
        printf("vlan_parent_create (%s%d): siocgifdevmtu failed, %d\n",
@@ -978,10 +977,9 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
        if (error != 0) {
                return (error);
        }
-       ifv = _MALLOC(sizeof(struct ifvlan), M_VLAN, M_WAITOK);
+       ifv = _MALLOC(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
        if (ifv == NULL)
                return ENOBUFS;
-       bzero(ifv, sizeof(struct ifvlan));
        ifv->ifv_retain_count = 1;
        ifv->ifv_signature = IFV_SIGNATURE;
        multicast_list_init(&ifv->ifv_multicast);
@@ -1173,7 +1171,7 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
        m->m_pkthdr.csum_flags |= CSUM_VLAN_TAG_VALID;
        m->m_pkthdr.vlan_tag = tag;
     } else {
-       M_PREPEND(m, encaplen, M_DONTWAIT);
+       M_PREPEND(m, encaplen, M_DONTWAIT, 1);
        if (m == NULL) {
            printf("%s%d: unable to prepend VLAN header\n", ifnet_name(ifp),
                   ifnet_unit(ifp));
index 22b18df0563b311dbbd4fe2d7bd1ac1da4245785..a2289eca07988f5ccef6ec2ace53f6804296d7c2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -346,10 +346,25 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0,
                        bzero(&ifp->if_broadcast, sizeof (ifp->if_broadcast));
                }
 
+               /*
+                * output target queue delay is specified in millisecond
+                * convert it to nanoseconds
+                */
                IFCQ_TARGET_QDELAY(&ifp->if_snd) =
-                   einit.output_target_qdelay;
+                   einit.output_target_qdelay * 1000 * 1000;
                IFCQ_MAXLEN(&ifp->if_snd) = einit.sndq_maxlen;
 
+               if (einit.start_delay_qlen > 0 &&
+                   einit.start_delay_timeout > 0) {
+                       ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
+                       ifp->if_start_delay_qlen =
+                           min(100, einit.start_delay_qlen);
+                       ifp->if_start_delay_timeout =
+                           min(20000, einit.start_delay_timeout);
+                       /* convert timeout to nanoseconds */
+                       ifp->if_start_delay_timeout *= 1000;
+               }
+
                if (error == 0) {
                        *interface = ifp;
                        // temporary - this should be done in dlil_if_acquire
@@ -484,7 +499,7 @@ ifnet_flags(ifnet_t interface)
  * If IFEF_AWDL has been set on the interface and the caller attempts
  * to clear one or more of the associated flags in IFEF_AWDL_MASK,
  * return failure.
- * 
+ *
  * If IFEF_AWDL_RESTRICTED is set by the caller, make sure IFEF_AWDL is set
  * on the interface.
  *
@@ -568,7 +583,7 @@ ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask)
                ev_data.if_unit = interface->if_unit;
                ev_msg.dv[0].data_length = sizeof(struct net_event_data);
                ev_msg.dv[0].data_ptr = &ev_data;
-               ev_msg.dv[1].data_length = 0; 
+               ev_msg.dv[1].data_length = 0;
                kev_post_msg(&ev_msg);
        }
 
@@ -658,7 +673,7 @@ ifnet_set_link_quality(ifnet_t ifp, int quality)
                goto done;
        }
 
-       if_lqm_update(ifp, quality);
+       if_lqm_update(ifp, quality, 0);
 
 done:
        return (err);
@@ -673,12 +688,57 @@ ifnet_link_quality(ifnet_t ifp)
                return (IFNET_LQM_THRESH_OFF);
 
        ifnet_lock_shared(ifp);
-       lqm = ifp->if_lqm;
+       lqm = ifp->if_interface_state.lqm_state;
        ifnet_lock_done(ifp);
 
        return (lqm);
 }
 
+errno_t
+ifnet_set_interface_state(ifnet_t ifp,
+    struct if_interface_state *if_interface_state)
+{
+       errno_t err = 0;
+
+       if (ifp == NULL || if_interface_state == NULL) {
+               err = EINVAL;
+               goto done;
+       }
+
+       if (!ifnet_is_attached(ifp, 0)) {
+               err = ENXIO;
+               goto done;
+       }
+
+       if_state_update(ifp, if_interface_state);
+
+done:
+       return (err);
+}
+
+errno_t
+ifnet_get_interface_state(ifnet_t ifp,
+    struct if_interface_state *if_interface_state)
+{
+       errno_t err = 0;
+
+       if (ifp == NULL || if_interface_state == NULL) {
+               err = EINVAL;
+               goto done;
+       }
+
+       if (!ifnet_is_attached(ifp, 0)) {
+               err = ENXIO;
+               goto done;
+       }
+
+       if_get_state(ifp, if_interface_state);
+
+done:
+       return (err);
+}
+
+
 static errno_t
 ifnet_defrouter_llreachinfo(ifnet_t ifp, int af,
     struct ifnet_llreach_info *iflri)
@@ -824,7 +884,7 @@ ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload)
                ifcaps |= IFCAP_VLAN_MTU;
        if ((offload & IFNET_VLAN_TAGGING))
                ifcaps |= IFCAP_VLAN_HWTAGGING;
-       if ((offload & IFNET_TX_STATUS)) 
+       if ((offload & IFNET_TX_STATUS))
                ifcaps |= IFNET_TX_STATUS;
        if (ifcaps != 0) {
                (void) ifnet_set_capabilities_supported(interface, ifcaps,
@@ -1129,6 +1189,25 @@ ifnet_set_bandwidths(struct ifnet *ifp, struct if_bandwidths *output_bw,
        return (0);
 }
 
+static void
+ifnet_set_link_status_outbw(struct ifnet *ifp)
+{
+       struct if_wifi_status_v1 *sr;
+       sr = &ifp->if_link_status->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1;
+       if (ifp->if_output_bw.eff_bw != 0) {
+               sr->valid_bitmask |=
+                   IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID;
+               sr->ul_effective_bandwidth =
+                   ifp->if_output_bw.eff_bw;
+       }
+       if (ifp->if_output_bw.max_bw != 0) {
+               sr->valid_bitmask |=
+                   IF_WIFI_UL_MAX_BANDWIDTH_VALID;
+               sr->ul_max_bandwidth =
+                   ifp->if_output_bw.max_bw;
+       }
+}
+
 errno_t
 ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw,
     boolean_t locked)
@@ -1167,9 +1246,38 @@ ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw,
        if (!locked)
                IFCQ_UNLOCK(ifq);
 
+       /*
+        * If this is a Wifi interface, update the values in
+        * if_link_status structure also.
+        */
+       if (IFNET_IS_WIFI(ifp) && ifp->if_link_status != NULL) {
+               lck_rw_lock_exclusive(&ifp->if_link_status_lock);
+               ifnet_set_link_status_outbw(ifp);
+               lck_rw_done(&ifp->if_link_status_lock);
+       }
+
        return (0);
 }
 
+static void
+ifnet_set_link_status_inbw(struct ifnet *ifp)
+{
+       struct if_wifi_status_v1 *sr;
+
+       sr = &ifp->if_link_status->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1;
+       if (ifp->if_input_bw.eff_bw != 0) {
+               sr->valid_bitmask |=
+                   IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID;
+               sr->dl_effective_bandwidth =
+                   ifp->if_input_bw.eff_bw;
+       }
+       if (ifp->if_input_bw.max_bw != 0) {
+               sr->valid_bitmask |=
+                   IF_WIFI_DL_MAX_BANDWIDTH_VALID;
+               sr->dl_max_bandwidth = ifp->if_input_bw.max_bw;
+       }
+}
+
 errno_t
 ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw)
 {
@@ -1187,6 +1295,12 @@ ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw)
        else if (ifp->if_input_bw.eff_bw == 0)
                ifp->if_input_bw.eff_bw = ifp->if_input_bw.max_bw;
 
+       if (IFNET_IS_WIFI(ifp) && ifp->if_link_status != NULL) {
+               lck_rw_lock_exclusive(&ifp->if_link_status_lock);
+               ifnet_set_link_status_inbw(ifp);
+               lck_rw_done(&ifp->if_link_status_lock);
+       }
+
        if (old_bw.eff_bw != ifp->if_input_bw.eff_bw ||
            old_bw.max_bw != ifp->if_input_bw.max_bw)
                ifnet_update_rcv(ifp, CLASSQ_EV_LINK_BANDWIDTH);
@@ -1526,7 +1640,7 @@ errno_t
 ifnet_get_inuse_address_list(ifnet_t interface, ifaddr_t **addresses)
 {
        return (addresses == NULL ? EINVAL :
-               ifnet_get_address_list_family_internal(interface, addresses, 
+               ifnet_get_address_list_family_internal(interface, addresses,
                0, 0, M_NOWAIT, 1));
 }
 
@@ -1625,17 +1739,16 @@ done:
                        if (return_inuse_addrs) {
                                usecount = tcp_find_anypcb_byaddr(ifal->ifal_ifa);
                                usecount += udp_find_anypcb_byaddr(ifal->ifal_ifa);
-                               if (usecount) { 
+                               if (usecount) {
                                        (*addresses)[index] = ifal->ifal_ifa;
                                        index++;
-                               }       
-                               else
+                               } else {
                                        IFA_REMREF(ifal->ifal_ifa);
+                               }
                        } else {
                                (*addresses)[--count] = ifal->ifal_ifa;
                        }
-               }       
-               else {
+               } else {
                        IFA_REMREF(ifal->ifal_ifa);
                }
                FREE(ifal, M_TEMP);
@@ -2093,7 +2206,7 @@ ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt)
        uint64_t oseq, ots, bytes, ts, t;
        uint32_t flags;
 
-       if ( ifp == NULL || !(pkt->m_flags & M_PKTHDR))
+       if (ifp == NULL || !(pkt->m_flags & M_PKTHDR))
                return;
 
        flags = OSBitOrAtomic(IF_MEASURED_BW_CALCULATION, &ifp->if_bw.flags);
@@ -2116,7 +2229,7 @@ ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt)
 
        if (ifp->if_bw.start_seq > 0 && oseq > ifp->if_bw.start_seq) {
                ts = ots - ifp->if_bw.start_ts;
-               if (ts > 0 ) {
+               if (ts > 0) {
                        absolutetime_to_nanoseconds(ts, &t);
                        bytes = oseq - ifp->if_bw.start_seq;
                        ifp->if_bw.bytes = bytes;
@@ -2495,6 +2608,12 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol,
                INPCB_GET_PORTS_USED_WILDCARDOK : 0);
        inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ?
                INPCB_GET_PORTS_USED_NOWAKEUPOK : 0);
+       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) ?
+               INPCB_GET_PORTS_USED_RECVANYIFONLY : 0);
+       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) ?
+               INPCB_GET_PORTS_USED_EXTBGIDLEONLY : 0);
+       inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) ?
+               INPCB_GET_PORTS_USED_ACTIVEONLY : 0);
 
        if (bitfield == NULL)
                return (EINVAL);
@@ -2535,11 +2654,11 @@ ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi,
     int lqm, int npm, u_int8_t srvinfo[48])
 {
        if (ifp == NULL || sa == NULL || srvinfo == NULL)
-               return(EINVAL);
+               return (EINVAL);
        if (sa->sa_len > sizeof(struct sockaddr_storage))
-               return(EINVAL);
+               return (EINVAL);
        if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6)
-               return(EINVAL);
+               return (EINVAL);
 
        dlil_node_present(ifp, sa, rssi, lqm, npm, srvinfo);
        return (0);
@@ -2549,11 +2668,11 @@ errno_t
 ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa)
 {
        if (ifp == NULL || sa == NULL)
-               return(EINVAL);
+               return (EINVAL);
        if (sa->sa_len > sizeof(struct sockaddr_storage))
-               return(EINVAL);
+               return (EINVAL);
        if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6)
-               return(EINVAL);
+               return (EINVAL);
 
        dlil_node_absent(ifp, sa);
        return (0);
@@ -2563,7 +2682,7 @@ errno_t
 ifnet_notice_master_elected(ifnet_t ifp)
 {
        if (ifp == NULL)
-               return(EINVAL);
+               return (EINVAL);
 
        dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_MASTER_ELECTED, NULL, 0);
        return (0);
@@ -2588,7 +2707,7 @@ ifnet_report_issues(ifnet_t ifp, u_int8_t modid[IFNET_MODIDLEN],
        return (0);
 }
 
-extern errno_t
+errno_t
 ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp)
 {
        ifnet_t odifp = NULL;
@@ -2612,7 +2731,7 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp)
                ifp->if_delegated.type = delegated_ifp->if_type;
                ifp->if_delegated.family = delegated_ifp->if_family;
                ifp->if_delegated.subfamily = delegated_ifp->if_subfamily;
-               ifp->if_delegated.expensive = 
+               ifp->if_delegated.expensive =
                    delegated_ifp->if_eflags & IFEF_EXPENSIVE ? 1 : 0;
                printf("%s: is now delegating %s (type 0x%x, family %u, "
                    "sub-family %u)\n", ifp->if_xname, delegated_ifp->if_xname,
@@ -2639,7 +2758,7 @@ done:
        return (0);
 }
 
-extern errno_t
+errno_t
 ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp)
 {
        if (ifp == NULL || pdelegated_ifp == NULL)
@@ -2659,28 +2778,207 @@ ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp)
        return (0);
 }
 
-extern u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp,
-       struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count,
-       size_t frame_data_offset);
+extern u_int32_t
+key_fill_offload_frames_for_savs(ifnet_t ifp,
+    struct ifnet_keepalive_offload_frame *frames_array,
+    u_int32_t frames_array_count, size_t frame_data_offset);
+
+extern void
+udp_fill_keepalive_offload_frames(ifnet_t ifp,
+    struct ifnet_keepalive_offload_frame *frames_array,
+    u_int32_t frames_array_count, size_t frame_data_offset,
+    u_int32_t *used_frames_count);
 
-extern errno_t
-ifnet_get_ipsec_offload_frames(ifnet_t ifp,
-                                                          struct ipsec_offload_frame *frames_array,
-                                                          u_int32_t frames_array_count,
-                                                          size_t frame_data_offset,
-                                                          u_int32_t *used_frames_count)
+errno_t
+ifnet_get_keepalive_offload_frames(ifnet_t ifp,
+    struct ifnet_keepalive_offload_frame *frames_array,
+    u_int32_t frames_array_count, size_t frame_data_offset,
+    u_int32_t *used_frames_count)
 {
-       if (frames_array == NULL || used_frames_count == NULL) {
+       if (frames_array == NULL || used_frames_count == NULL)
                return (EINVAL);
-       }
 
-       *used_frames_count = 0;
+       /* frame_data_offset should be 32-bit aligned */
+       if (P2ROUNDUP(frame_data_offset, sizeof(u_int32_t))
+           != frame_data_offset)
+               return (EINVAL);
 
-       if (frames_array_count == 0) {
+       *used_frames_count = 0;
+       if (frames_array_count == 0)
                return (0);
-       }
 
+       /* First collect IPSec related keep-alive frames */
        *used_frames_count = key_fill_offload_frames_for_savs(ifp,
-               frames_array, frames_array_count, frame_data_offset);
+           frames_array, frames_array_count, frame_data_offset);
+
+       /* If there is more room, collect other UDP keep-alive frames */
+       if (*used_frames_count < frames_array_count)
+               udp_fill_keepalive_offload_frames(ifp, frames_array,
+                   frames_array_count, frame_data_offset,
+                   used_frames_count);
+
+       VERIFY(*used_frames_count <= frames_array_count);
        return (0);
 }
+
+errno_t
+ifnet_link_status_report(ifnet_t ifp, const void *buffer,
+    size_t buffer_len)
+{
+       struct if_link_status *ifsr;
+       errno_t err = 0;
+
+       if (ifp == NULL || buffer == NULL || buffer_len == 0)
+               return (EINVAL);
+
+       ifnet_lock_shared(ifp);
+
+       /*
+        * Make sure that the interface is attached but there is no need
+        * to take a reference because this call is coming from the driver.
+        */
+       if (!ifnet_is_attached(ifp, 0)) {
+               ifnet_lock_done(ifp);
+               return (ENXIO);
+       }
+
+       lck_rw_lock_exclusive(&ifp->if_link_status_lock);
+
+       /*
+        * If this is the first status report then allocate memory
+        * to store it.
+        */
+       if (ifp->if_link_status == NULL) {
+               MALLOC(ifp->if_link_status, struct if_link_status *,
+                   sizeof(struct if_link_status), M_TEMP, M_ZERO);
+               if (ifp->if_link_status == NULL) {
+                       err = ENOMEM;
+                       goto done;
+               }
+       }
+
+       ifsr = __DECONST(struct if_link_status *, buffer);
+
+       if (ifp->if_type == IFT_CELLULAR) {
+               struct if_cellular_status_v1 *if_cell_sr, *new_cell_sr;
+               /*
+                * Currently we have a single version -- if it does
+                * not match, just return.
+                */
+               if (ifsr->ifsr_version !=
+                   IF_CELLULAR_STATUS_REPORT_CURRENT_VERSION) {
+                       err = ENOTSUP;
+                       goto done;
+               }
+
+               if (ifsr->ifsr_len != sizeof(*if_cell_sr)) {
+                       err = EINVAL;
+                       goto done;
+               }
+
+               if_cell_sr =
+                   &ifp->if_link_status->ifsr_u.ifsr_cell.if_cell_u.if_status_v1;
+               new_cell_sr = &ifsr->ifsr_u.ifsr_cell.if_cell_u.if_status_v1;
+               ifp->if_link_status->ifsr_version = ifsr->ifsr_version;
+               ifp->if_link_status->ifsr_len = ifsr->ifsr_len;
+               if_cell_sr->valid_bitmask = 0;
+               bcopy(new_cell_sr, if_cell_sr, sizeof(*if_cell_sr));
+       } else if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI) {
+               struct if_wifi_status_v1 *if_wifi_sr, *new_wifi_sr;
+
+               /* Check version */
+               if (ifsr->ifsr_version !=
+                   IF_WIFI_STATUS_REPORT_CURRENT_VERSION) {
+                       err = ENOTSUP;
+                       goto done;
+               }
+
+               if (ifsr->ifsr_len != sizeof(*if_wifi_sr)) {
+                       err = EINVAL;
+                       goto done;
+               }
+
+               if_wifi_sr =
+                   &ifp->if_link_status->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1;
+               new_wifi_sr =
+                   &ifsr->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1;
+               ifp->if_link_status->ifsr_version = ifsr->ifsr_version;
+               ifp->if_link_status->ifsr_len = ifsr->ifsr_len;
+               if_wifi_sr->valid_bitmask = 0;
+               bcopy(new_wifi_sr, if_wifi_sr, sizeof(*if_wifi_sr));
+
+               /*
+                * Update the bandwidth values if we got recent values
+                * reported through the other KPI.
+                */
+               if (!(new_wifi_sr->valid_bitmask &
+                   IF_WIFI_UL_MAX_BANDWIDTH_VALID) &&
+                   ifp->if_output_bw.max_bw > 0) {
+                       if_wifi_sr->valid_bitmask |=
+                           IF_WIFI_UL_MAX_BANDWIDTH_VALID;
+                       if_wifi_sr->ul_max_bandwidth =
+                           ifp->if_output_bw.max_bw;
+               }
+               if (!(new_wifi_sr->valid_bitmask &
+                   IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID) &&
+                   ifp->if_output_bw.eff_bw > 0) {
+                       if_wifi_sr->valid_bitmask |=
+                           IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID;
+                       if_wifi_sr->ul_effective_bandwidth =
+                           ifp->if_output_bw.eff_bw;
+               }
+               if (!(new_wifi_sr->valid_bitmask &
+                   IF_WIFI_DL_MAX_BANDWIDTH_VALID) &&
+                   ifp->if_input_bw.max_bw > 0) {
+                       if_wifi_sr->valid_bitmask |=
+                           IF_WIFI_DL_MAX_BANDWIDTH_VALID;
+                       if_wifi_sr->dl_max_bandwidth =
+                           ifp->if_input_bw.max_bw;
+               }
+               if (!(new_wifi_sr->valid_bitmask &
+                   IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID) &&
+                   ifp->if_input_bw.eff_bw > 0) {
+                       if_wifi_sr->valid_bitmask |=
+                           IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID;
+                       if_wifi_sr->dl_effective_bandwidth =
+                           ifp->if_input_bw.eff_bw;
+               }
+       }
+
+done:
+       lck_rw_done(&ifp->if_link_status_lock);
+       ifnet_lock_done(ifp);
+       return (err);
+}
+
+/*************************************************************************/
+/* Packet preamble                                                       */
+/*************************************************************************/
+
+#define        MAX_IF_PACKET_PREAMBLE_LEN 32
+
+errno_t
+ifnet_set_packetpreamblelen(ifnet_t interface, u_int32_t len)
+{
+       errno_t err = 0;
+
+       if (interface == NULL || len > MAX_IF_PACKET_PREAMBLE_LEN) {
+               err = EINVAL;
+               goto done;
+       }
+       interface->if_data.ifi_preamblelen = len;
+done:
+       return (err);
+}
+
+u_int32_t
+ifnet_packetpreamblelen(ifnet_t interface)
+{
+       return ((interface == NULL) ? 0 : interface->if_data.ifi_preamblelen);
+}
+
+u_int32_t
+ifnet_maxpacketpreamblelen(void)
+{
+       return (MAX_IF_PACKET_PREAMBLE_LEN);
+}
index 1f4a41534722aa64dc3c5419954ed4b9bcbb5af4..2c6e8bbe9d8ee9c980a3863a6380920b9e36b053 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -42,6 +42,7 @@
 #include <sys/kernel_types.h>
 
 #ifdef KERNEL_PRIVATE
+struct if_interface_state;
 #include <sys/kpi_mbuf.h>
 #endif /* KERNEL_PRIVATE */
 
@@ -988,6 +989,14 @@ typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd,
        @field output_lt The effective output latency (in nanosecond.)
        @field output_lt_max The maximum theoretical output latency
                (in nanosecond.)
+       @field start_delay_qlen The maximum length of output queue for
+               delaying start callback to the driver. This is an
+               optimization for coalescing output packets. 
+       @field start_delay_timeout The timeout in microseconds to delay
+               start callback. If start_delay_qlen number of packets are
+               not in the output queue when the timer fires, the start
+               callback will be invoked. Maximum allowed value is
+               20ms (in microseconds).
        @field input_poll The poll function for the interface, valid only if
                IFNET_INIT_LEGACY is not set and only if IFNET_INIT_INPUT_POLL
                is set.
@@ -1045,12 +1054,14 @@ struct ifnet_init_eparams {
        ifnet_start_func        start;                  /* required only for new model */
        ifnet_ctl_func          output_ctl;             /* optional, only for new model */
        u_int32_t               output_sched_model;     /* optional, only for new model */
-       u_int32_t               output_target_qdelay;   /* optional, only for new model */
+       u_int32_t               output_target_qdelay;   /* optional, only for new model, value in ms */
        u_int64_t               output_bw;              /* optional */
        u_int64_t               output_bw_max;          /* optional */
        u_int64_t               output_lt;              /* optional */
        u_int64_t               output_lt_max;          /* optional */
-       u_int64_t               _reserved[2];           /* for future use */
+       u_int16_t               start_delay_qlen;       /* optional */
+       u_int16_t               start_delay_timeout;    /* optional */
+       u_int32_t               _reserved[3];           /* for future use */
        ifnet_input_poll_func   input_poll;             /* optional, ignored for legacy model */
        ifnet_ctl_func          input_ctl;              /* required for opportunistic polling */
        u_int32_t               rcvq_maxlen;            /* optional, only for opportunistic polling */
@@ -1846,6 +1857,27 @@ extern errno_t ifnet_set_link_quality(ifnet_t interface, int quality);
 */
 extern int ifnet_link_quality(ifnet_t interface);
 
+/*
+       @function ifnet_set_interface_state
+       @discussion Sets the interface state for the ifnet.
+       @param interface Interface for which the interface state should
+               be set to.
+       @param if_interface_state as defined in net/if_var.h.
+       @result 0 on success otherwise the errno error.  EINVAL if quality
+               is not a valid value.  ENXIO if the interface is not attached.
+*/
+extern errno_t ifnet_set_interface_state(ifnet_t interface,
+    struct if_interface_state *if_interface_state);
+
+/*
+       @function ifnet_get_interface_state
+       @discussion Returns the interface state for the ifnet.
+       @param if_interface_state to ret.
+       @result 0 on success, errno otherwise
+*/
+extern int ifnet_get_interface_state(ifnet_t interface,
+    struct if_interface_state *if_interface_state);
+
 /*
        @struct ifnet_llreach_info
        @discussion This structure is used to describe the link-layer
@@ -3118,10 +3150,13 @@ extern errno_t ifnet_clone_detach(if_clone_t ifcloner);
  */
 extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield);
 
-#define        IFNET_GET_LOCAL_PORTS_WILDCARDOK        0x1
-#define        IFNET_GET_LOCAL_PORTS_NOWAKEUPOK        0x2
-#define        IFNET_GET_LOCAL_PORTS_TCPONLY           0x4
-#define        IFNET_GET_LOCAL_PORTS_UDPONLY           0x8
+#define        IFNET_GET_LOCAL_PORTS_WILDCARDOK        0x01
+#define        IFNET_GET_LOCAL_PORTS_NOWAKEUPOK        0x02
+#define        IFNET_GET_LOCAL_PORTS_TCPONLY           0x04
+#define        IFNET_GET_LOCAL_PORTS_UDPONLY           0x08
+#define        IFNET_GET_LOCAL_PORTS_RECVANYIFONLY     0x10
+#define        IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY     0x20
+#define        IFNET_GET_LOCAL_PORTS_ACTIVEONLY        0x40
 /*
        @function ifnet_get_local_ports_extended
        @discussion Returns a bitfield indicating which local ports of the
@@ -3137,10 +3172,10 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield);
        @param protocol The protocol family of the sockets.  PF_UNSPEC (0)
                means all protocols, otherwise PF_INET or PF_INET6.
        @param flags A bitwise of the following flags:
-               IFNET_GET_LOCAL_PORTS_EXTENDED_WILDCARDOK: When bit is set,
+               IFNET_GET_LOCAL_PORTS_WILDCARDOK: When bit is set,
                the list of local ports should include those that are 
                used by sockets that aren't bound to any local address.
-               IFNET_GET_LOCAL_PORTS_EXTENDED_NOWAKEUPOK: When bit is
+               IFNET_GET_LOCAL_PORTS_NOWAKEUPOK: When bit is
                set, the list of local ports should return all sockets 
                including the ones that do not need a wakeup from sleep. 
                Sockets that do not want to wake from sleep are marked 
@@ -3150,6 +3185,15 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield);
                IFNET_GET_LOCAL_PORTS_UDPONLY: When bit is set, the list 
                of local ports should return the ports used by UDP sockets.
                only.
+               IFNET_GET_LOCAL_PORTS_RECVANYIFONLY: When bit is set, the
+               port is in the list only if the socket has the option
+               SO_RECV_ANYIF set
+               IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY: When bit is set, the
+               port is in the list only if the socket has the option
+               SO_EXTENDED_BK_IDLE set
+               IFNET_GET_LOCAL_PORTS_ACTIVETCPONLY: When bit is set, the
+               port is in the list only if the socket is not in a final TCP
+               state or the connection is not idle in a final TCP state
        @param bitfield A pointer to 8192 bytes.
        @result Returns 0 on success.
  */
@@ -3303,37 +3347,110 @@ ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp);
 extern errno_t
 ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp);
 
-/******************************************************************************/
-/* for interface IPSec keepalive offload                                                                         */
-/******************************************************************************/
-
-#define IPSEC_OFFLOAD_FRAME_DATA_SIZE 128
-struct ipsec_offload_frame {
-       u_int8_t data[IPSEC_OFFLOAD_FRAME_DATA_SIZE]; /* Frame bytes */
-       u_int16_t length; /* Number of valid bytes in data, including offset */
-       u_int16_t interval; /* Interval in seconds */
+/*************************************************************************/
+/* for interface keep alive offload support                              */
+/*************************************************************************/
+
+#define        IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE 128
+struct ifnet_keepalive_offload_frame {
+       u_int8_t data[IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE]; /* data bytes */
+#define        IFNET_KEEPALIVE_OFFLOAD_FRAME_IPSEC     0x0
+#define        IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY   0x1
+       u_int8_t type;  /* type of application */
+       u_int8_t length; /* Number of valid data bytes including offset */
+       u_int16_t interval; /* Keep alive interval in seconds */
+#define        IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4    0x0
+#define        IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6    0x1
+       u_int8_t ether_type; /* Ether type IPv4 or IPv6 */
+       u_int8_t __reserved[3]; /* For future */
 };
 
 /*
-       @function ifnet_get_ipsec_offload_frames
-       @discussion Fills out frames_array with IP packets to send at periodic
-               intervals on behalf of IPSec.
-       @param ifp The interface to send the frames out on. This is used to
-               select which IPSec SAs should generate the packets.
-       @param frames_array An array of ipsec_offload_frame structs. This is
-               allocated by the caller, and has frames_array_count frames of valid
-               memory.
-       @param frames_array_count The number of valid frames allocated in
-               frames_array.
-       @param frame_data_offset The offset in bytes into each frame data at
-               which IPSec should write the IP header and payload.
-       @param used_frames_count The returned number of frames that were filled
-               out with valid information.
+       @function ifnet_get_keepalive_offload_frames
+       @discussion Fills out frames_array with IP packets to send at
+               periodic intervals as Keep-alive or heartbeat messages.
+               These are UDP datagrams. This can be used to offload
+               IPSec keep alives.
+       @param ifp The interface to send frames out on. This is used to
+               select which sockets or IPSec SAs should generate the
+               packets.
+       @param frames_array An array of ifnet_keepalive_offload_frame
+               structs. This is allocated by the caller, and has
+               frames_array_count frames of valid memory.
+       @param frames_array_count The number of valid frames allocated
+               by the caller in frames_array
+       @param frame_data_offset The offset in bytes into each frame data
+               at which the IPv4/IPv6 packet and payload should be written
+       @param used_frames_count The returned number of frames that were
+               filled out with valid information.
+       @result Returns 0 on success, error number otherwise.
+*/
+extern errno_t ifnet_get_keepalive_offload_frames(ifnet_t ifp,
+    struct ifnet_keepalive_offload_frame *frames_array,
+    u_int32_t frames_array_count, size_t frame_data_offset,
+    u_int32_t *used_frames_count);
+
+/*************************************************************************/
+/* Link level notifications                                              */
+/*************************************************************************/
+/*
+       @function ifnet_link_status_report
+       @discussion A KPI to let the driver provide link specific
+               status information to the protocol stack. The KPI will
+               copy contents from the buffer based on the version and
+               length provided by the driver. The contents of the buffer
+               will be read but will not be modified.
+       @param ifp The interface that is generating the report
+       @param buffer Buffer containing the link specific information 
+               for this interface. It is the caller's responsibility
+               to free this buffer.
+       @param buffer_len Valid length of the buffer provided by the caller
        @result Returns 0 on success, error number otherwise.
+*/
+extern errno_t ifnet_link_status_report(ifnet_t ifp, const void *buffer,
+       size_t buffer_len);
+
+/*************************************************************************/
+/* Packet preamble                                                       */
+/*************************************************************************/
+/*!
+       @function ifnet_set_packetpreamblelen
+       @discussion
+               Allows a driver to specify a leading space to be
+               reserved in front of the link layer header.
+               The preamble is logically adjoining the link layer which
+               itself is logically contiguous to the network protocol header
+               (e.g. IP).
+               There is no guarantee that packets being sent to the
+               driver has leading space reserved for the preamble.
+               There is also no guarantee the packet will be laid out in a
+               contiguous block of memory.
+               The network protocol header is 32 bit aligned and this dictates
+               the alignment of the link layer header which in turn affects
+               the alignment the packet preamble.
+               This function is intended to be called by the driver. A kext
+               must not call this function on an interface the kext does not
+               own.
+       @param interface The interface.
+       @param len The length of the packet preamble.
+       @result 0 on success otherwise the errno error.
  */
-extern errno_t ifnet_get_ipsec_offload_frames(ifnet_t ifp,
-       struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count,
-       size_t frame_data_offset, u_int32_t *used_frames_count);
+extern errno_t ifnet_set_packetpreamblelen(ifnet_t interface, u_int32_t len);
+
+/*!
+       @function ifnet_packetpreamblelen
+       @param interface The interface.
+       @result The current packet preamble length.
+ */
+extern u_int32_t ifnet_packetpreamblelen(ifnet_t interface);
+
+/*!
+       @function ifnet_maxpacketpreamblelen
+       @result The maximum packet preamble length supported by the system
+ */
+extern u_int32_t ifnet_maxpacketpreamblelen(void);
+
+
 #endif /* KERNEL_PRIVATE */
 
 __END_DECLS
index 170dd25f058a381e31b88e00522d5c3e5451d473..f35b2b10b76976abf2e61b8e5efbced8dc66280a 100644 (file)
@@ -107,11 +107,10 @@ proto_register_input(protocol_family_t protocol, proto_input_handler input,
        struct domain *dp;
        domain_guard_t guard;
 
-       entry = _MALLOC(sizeof (*entry), M_IFADDR, M_WAITOK);
+       entry = _MALLOC(sizeof (*entry), M_IFADDR, M_WAITOK | M_ZERO);
        if (entry == NULL)
                return (ENOMEM);
 
-       bzero(entry, sizeof (*entry));
        entry->protocol = protocol;
        entry->input = input;
        entry->detached = detached;
@@ -365,13 +364,13 @@ proto_register_plumber(protocol_family_t protocol_family,
        }
 
        proto_family = (struct proto_family_str *)
-           _MALLOC(sizeof (struct proto_family_str), M_IFADDR, M_WAITOK);
+           _MALLOC(sizeof (struct proto_family_str), M_IFADDR,
+           M_WAITOK | M_ZERO);
        if (!proto_family) {
                lck_mtx_unlock(proto_family_mutex);
                return (ENOMEM);
        }
 
-       bzero(proto_family, sizeof (struct proto_family_str));
        proto_family->proto_family      = protocol_family;
        proto_family->if_family         = interface_family & 0xffff;
        proto_family->attach_proto      = attach;
index 04c81c167feac888cfcd15544e438c65ed32eebb..71ff0d5437da77ec671c2f6330d101100ad204e5 100644 (file)
@@ -273,7 +273,7 @@ static __inline__ uint16_t
 lacp_uint16_get(const uint8_t * field)
 {
     uint16_t tmp_field;
-    memcpy((void *)&tmp_field, (void *)field, sizeof(uint16_t));
+    memcpy((void *)&tmp_field, (const void *)field, sizeof(uint16_t));
     return (ntohs(tmp_field));
 }
 
@@ -301,7 +301,7 @@ static __inline__ uint32_t
 lacp_uint32_get(const uint8_t * field)
 {
     uint32_t tmp_field;
-    memcpy((void *)&tmp_field, (void *)field, sizeof(uint32_t));
+    memcpy((void *)&tmp_field, (const void *)field, sizeof(uint32_t));
     return (ntohl(tmp_field));
 }
 
index 6c589c22171bcc8c919e80fc106c25691e57df05..650e809cd7ff0f1b1bd20ff860d12e112ed31bd3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2013-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <sys/kpi_mbuf.h>
 #include <sys/proc_uuid_policy.h>
 #include <net/if.h>
+#include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/in_pcb.h>
+#include <netinet6/esp.h>
 #include <net/flowhash.h>
 #include <net/if_var.h>
 #include <sys/kauth.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/priv.h>
+#include <sys/kern_event.h>
+#include <net/network_agent.h>
 #include <net/necp.h>
 
 /*
@@ -130,13 +135,7 @@ u_int32_t necp_pass_keepalives = 1; // 0=Off, 1=On
 
 u_int32_t necp_debug = 0; // 0=None, 1=Basic, 2=EveryMatch
 
-static int sysctl_handle_necp_level SYSCTL_HANDLER_ARGS;
-
-SYSCTL_NODE(_net, OID_AUTO, necp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NECP");
-SYSCTL_INT(_net_necp, NECPCTL_PASS_LOOPBACK, pass_loopback, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_loopback, 0, "");
-SYSCTL_INT(_net_necp, NECPCTL_PASS_KEEPALIVES, pass_keepalives, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_keepalives, 0, "");
-SYSCTL_INT(_net_necp, NECPCTL_DEBUG, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_debug, 0, "");
-SYSCTL_PROC(_net_necp, NECPCTL_DROP_ALL_LEVEL, drop_all_level, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, &necp_drop_all_level, 0, &sysctl_handle_necp_level, "IU", "");
+u_int32_t necp_session_count = 0;
 
 #define        NECPLOG(level, format, ...) do {                                                                                        \
        log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: " format "\n", __FUNCTION__, __VA_ARGS__);  \
@@ -218,10 +217,11 @@ struct necp_session {
 
        bool                                            proc_locked; // Messages must come from proc_uuid
        uuid_t                                          proc_uuid;
+       int                                                     proc_pid;
 
        bool                                            dirty;
        LIST_HEAD(_policies, necp_session_policy) policies;
-       
+
        LIST_HEAD(_services, necp_service_registration) services;
 };
 
@@ -248,10 +248,17 @@ static    lck_attr_t              *necp_kernel_policy_mtx_attr    = NULL;
 static lck_grp_t               *necp_kernel_policy_mtx_grp             = NULL;
 decl_lck_rw_data(static, necp_kernel_policy_lock);
 
+static lck_grp_attr_t  *necp_route_rule_grp_attr       = NULL;
+static lck_attr_t              *necp_route_rule_mtx_attr       = NULL;
+static lck_grp_t               *necp_route_rule_mtx_grp        = NULL;
+decl_lck_rw_data(static, necp_route_rule_lock);
+
 static necp_policy_id necp_last_policy_id = 0;
 static necp_kernel_policy_id necp_last_kernel_policy_id = 0;
 static u_int32_t necp_last_uuid_id = 0;
 static u_int32_t necp_last_string_id = 0;
+static u_int32_t necp_last_route_rule_id = 0;
+static u_int32_t necp_last_aggregate_route_rule_id = 0;
 
 /*
  * On modification, invalidate cached lookups by bumping the generation count.
@@ -305,20 +312,20 @@ static void necp_handle_lock_session_to_proc(struct necp_session *session, u_int
 static void necp_handle_register_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset);
 static void necp_handle_unregister_service(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset);
 
-static struct necp_session_policy *necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, size_t conditions_array_size, u_int8_t *result, size_t result_size);
+static struct necp_session_policy *necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, u_int32_t conditions_array_size, u_int8_t *route_rules_array, u_int32_t route_rules_array_size, u_int8_t *result, u_int32_t result_size);
 static struct necp_session_policy *necp_policy_find(struct necp_session *session, necp_policy_id policy_id);
 static bool necp_policy_mark_for_deletion(struct necp_session *session, struct necp_session_policy *policy);
 static bool necp_policy_mark_all_for_deletion(struct necp_session *session);
 static bool necp_policy_delete(struct necp_session *session, struct necp_session_policy *policy);
 static void necp_policy_apply_all(struct necp_session *session);
 
-static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter);
+static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter);
 static bool necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id);
 static bool necp_kernel_socket_policies_reprocess(void);
 static bool necp_kernel_socket_policies_update_uuid_table(void);
-static inline struct necp_kernel_socket_policy *necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service);
+static inline struct necp_kernel_socket_policy *necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, u_int32_t *return_route_rule_id, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service, u_int32_t *return_netagent_array, size_t netagent_array_count);
 
-static necp_kernel_policy_id necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter);
+static necp_kernel_policy_id necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter);
 static bool necp_kernel_ip_output_policy_delete(necp_kernel_policy_id policy_id);
 static bool necp_kernel_ip_output_policies_reprocess(void);
 
@@ -366,6 +373,49 @@ static LIST_HEAD(_necp_kernel_service_list, necp_service_registration) necp_regi
 static char *necp_create_trimmed_domain(char *string, size_t length);
 static inline int necp_count_dots(char *string, size_t length);
 
+#define ROUTE_RULE_IS_AGGREGATE(ruleid) (ruleid > UINT16_MAX)
+
+#define MAX_ROUTE_RULE_INTERFACES 10
+struct necp_route_rule {
+       LIST_ENTRY(necp_route_rule) chain;
+       u_int32_t       id;
+       u_int32_t       default_action;
+       u_int8_t        cellular_action;
+       u_int8_t        wifi_action;
+       u_int8_t        wired_action;
+       u_int8_t        expensive_action;
+       u_int           exception_if_indices[MAX_ROUTE_RULE_INTERFACES];
+       u_int8_t        exception_if_actions[MAX_ROUTE_RULE_INTERFACES];
+       u_int32_t       refcount;
+};
+static LIST_HEAD(necp_route_rule_list, necp_route_rule) necp_route_rules;
+static u_int32_t necp_create_route_rule(struct necp_route_rule_list *list, u_int8_t *route_rules_array, u_int32_t route_rules_array_size);
+static bool necp_remove_route_rule(struct necp_route_rule_list *list, u_int32_t route_rule_id);
+static bool necp_route_is_allowed(struct rtentry *route, ifnet_t interface, u_int32_t route_rule_id, bool *cellular_denied);
+static struct necp_route_rule *necp_lookup_route_rule_locked(struct necp_route_rule_list *list, u_int32_t route_rule_id);
+
+#define MAX_AGGREGATE_ROUTE_RULES 16
+struct necp_aggregate_route_rule {
+       LIST_ENTRY(necp_aggregate_route_rule) chain;
+       u_int32_t       id;
+       u_int32_t       rule_ids[MAX_AGGREGATE_ROUTE_RULES];
+};
+static LIST_HEAD(necp_aggregate_route_rule_list, necp_aggregate_route_rule) necp_aggregate_route_rules;
+static u_int32_t necp_create_aggregate_route_rule(u_int32_t *rule_ids);
+
+// Sysctl definitions
+static int sysctl_handle_necp_level SYSCTL_HANDLER_ARGS;
+
+SYSCTL_NODE(_net, OID_AUTO, necp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NECP");
+SYSCTL_INT(_net_necp, NECPCTL_PASS_LOOPBACK, pass_loopback, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_loopback, 0, "");
+SYSCTL_INT(_net_necp, NECPCTL_PASS_KEEPALIVES, pass_keepalives, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_pass_keepalives, 0, "");
+SYSCTL_INT(_net_necp, NECPCTL_DEBUG, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_debug, 0, "");
+SYSCTL_PROC(_net_necp, NECPCTL_DROP_ALL_LEVEL, drop_all_level, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, &necp_drop_all_level, 0, &sysctl_handle_necp_level, "IU", "");
+SYSCTL_LONG(_net_necp, NECPCTL_SOCKET_POLICY_COUNT, socket_policy_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_kernel_socket_policies_count, "");
+SYSCTL_LONG(_net_necp, NECPCTL_SOCKET_NON_APP_POLICY_COUNT, socket_non_app_policy_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_kernel_socket_policies_non_app_count, "");
+SYSCTL_LONG(_net_necp, NECPCTL_IP_POLICY_COUNT, ip_policy_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_kernel_ip_output_policies_count, "");
+SYSCTL_INT(_net_necp, NECPCTL_SESSION_COUNT, session_count, CTLFLAG_LOCKED | CTLFLAG_RD, &necp_session_count, 0, "");
+
 // Session order allocation
 static u_int32_t
 necp_allocate_new_session_order(u_int32_t priority, u_int32_t control_unit)
@@ -448,15 +498,41 @@ necp_init(void)
 
        lck_rw_init(&necp_kernel_policy_lock, necp_kernel_policy_mtx_grp, necp_kernel_policy_mtx_attr);
 
+       necp_route_rule_grp_attr = lck_grp_attr_alloc_init();
+       if (necp_route_rule_grp_attr == NULL) {
+               NECPLOG0(LOG_ERR, "lck_grp_attr_alloc_init failed");
+               result = ENOMEM;
+               goto done;
+       }
+
+       necp_route_rule_mtx_grp = lck_grp_alloc_init("necp_route_rule", necp_route_rule_grp_attr);
+       if (necp_route_rule_mtx_grp == NULL) {
+               NECPLOG0(LOG_ERR, "lck_grp_alloc_init failed");
+               result = ENOMEM;
+               goto done;
+       }
+
+       necp_route_rule_mtx_attr = lck_attr_alloc_init();
+       if (necp_route_rule_mtx_attr == NULL) {
+               NECPLOG0(LOG_ERR, "lck_attr_alloc_init failed");
+               result = ENOMEM;
+               goto done;
+       }
+
+       lck_rw_init(&necp_route_rule_lock, necp_route_rule_mtx_grp, necp_route_rule_mtx_attr);
+
        LIST_INIT(&necp_kernel_socket_policies);
        LIST_INIT(&necp_kernel_ip_output_policies);
 
        LIST_INIT(&necp_account_id_list);
 
        LIST_INIT(&necp_uuid_service_id_list);
-       
+
        LIST_INIT(&necp_registered_service_list);
 
+       LIST_INIT(&necp_route_rules);
+       LIST_INIT(&necp_aggregate_route_rules);
+
        necp_uuid_app_id_hashtbl = hashinit(NECP_UUID_APP_ID_HASH_SIZE, M_NECP, &necp_uuid_app_id_hash_mask);
        necp_uuid_app_id_hash_num_buckets = necp_uuid_app_id_hash_mask + 1;
        necp_num_uuid_app_id_mappings = 0;
@@ -474,6 +550,10 @@ necp_init(void)
 
        necp_last_policy_id = 0;
        necp_last_kernel_policy_id = 0;
+       necp_last_uuid_id = 0;
+       necp_last_string_id = 0;
+       necp_last_route_rule_id = 0;
+       necp_last_aggregate_route_rule_id = 0;
 
        necp_kernel_socket_policies_gencount = 1;
 
@@ -495,6 +575,18 @@ done:
                        lck_grp_attr_free(necp_kernel_policy_grp_attr);
                        necp_kernel_policy_grp_attr = NULL;
                }
+               if (necp_route_rule_mtx_attr != NULL) {
+                       lck_attr_free(necp_route_rule_mtx_attr);
+                       necp_route_rule_mtx_attr = NULL;
+               }
+               if (necp_route_rule_mtx_grp != NULL) {
+                       lck_grp_free(necp_route_rule_mtx_grp);
+                       necp_route_rule_mtx_grp = NULL;
+               }
+               if (necp_route_rule_grp_attr != NULL) {
+                       lck_grp_attr_free(necp_route_rule_grp_attr);
+                       necp_route_rule_grp_attr = NULL;
+               }
                if (necp_kctlref != NULL) {
                        ctl_deregister(necp_kctlref);
                        necp_kctlref = NULL;
@@ -541,6 +633,24 @@ necp_register_control(void)
        return (0);
 }
 
+static void
+necp_post_change_event(struct kev_necp_policies_changed_data *necp_event_data)
+{
+       struct kev_msg ev_msg;
+       memset(&ev_msg, 0, sizeof(ev_msg));
+
+       ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+       ev_msg.kev_class        = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass     = KEV_NECP_SUBCLASS;
+       ev_msg.event_code       = KEV_NECP_POLICIES_CHANGED;
+
+       ev_msg.dv[0].data_ptr    = necp_event_data;
+       ev_msg.dv[0].data_length = sizeof(necp_event_data->changed_count);
+       ev_msg.dv[1].data_length = 0;
+
+       kev_post_msg(&ev_msg);
+}
+
 static errno_t
 necp_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo)
 {
@@ -575,7 +685,7 @@ necp_packet_find_tlv(mbuf_t packet, int offset, u_int8_t type, int *err, int nex
 {
        size_t  cursor                  = offset;
        int             error                   = 0;
-       size_t  curr_length;
+       u_int32_t       curr_length;
        u_int8_t        curr_type;
 
        *err = 0;
@@ -607,10 +717,10 @@ necp_packet_find_tlv(mbuf_t packet, int offset, u_int8_t type, int *err, int nex
 }
 
 static int
-necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, void *buff, size_t *value_size)
+necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, u_int32_t buff_len, void *buff, u_int32_t *value_size)
 {
-       int             error           = 0;
-       size_t  length;
+       int                     error   = 0;
+       u_int32_t       length;
 
        if (tlv_offset < 0) {
                return (error);
@@ -626,7 +736,7 @@ necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, vo
        }
 
        if (buff != NULL && buff_len > 0) {
-               size_t to_copy = (length < buff_len) ? length : buff_len;
+               u_int32_t to_copy = (length < buff_len) ? length : buff_len;
                error = mbuf_copydata(packet, tlv_offset + sizeof(u_int8_t) + sizeof(length), to_copy, buff);
                if (error) {
                        return (error);
@@ -637,7 +747,7 @@ necp_packet_get_tlv_at_offset(mbuf_t packet, int tlv_offset, size_t buff_len, vo
 }
 
 static int
-necp_packet_get_tlv(mbuf_t packet, int offset, u_int8_t type, size_t buff_len, void *buff, size_t *value_size)
+necp_packet_get_tlv(mbuf_t packet, int offset, u_int8_t type, u_int32_t buff_len, void *buff, u_int32_t *value_size)
 {
        int             error           = 0;
        int             tlv_offset;
@@ -660,10 +770,10 @@ necp_buffer_write_packet_header(u_int8_t *buffer, u_int8_t packet_type, u_int8_t
 }
 
 static u_int8_t *
-necp_buffer_write_tlv(u_int8_t *buffer, u_int8_t type, size_t length, const void *value)
+necp_buffer_write_tlv(u_int8_t *buffer, u_int8_t type, u_int32_t length, const void *value)
 {
        *(u_int8_t *)(buffer) = type;
-       *(size_t *)(void *)(buffer + sizeof(type)) = length;
+       *(u_int32_t *)(void *)(buffer + sizeof(type)) = length;
        if (length > 0) {
                memcpy((u_int8_t *)(buffer + sizeof(type) + sizeof(length)), value, length);
        }
@@ -684,24 +794,24 @@ necp_buffer_get_tlv_type(u_int8_t *buffer, int tlv_offset)
        return (type ? *type : 0);
 }
 
-static size_t
+static u_int32_t
 necp_buffer_get_tlv_length(u_int8_t *buffer, int tlv_offset)
 {
-       size_t *length = NULL;
+       u_int32_t *length = NULL;
 
        if (buffer == NULL) {
                return (0);
        }
 
-       length = (size_t *)(void *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t));
+       length = (u_int32_t *)(void *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t));
        return (length ? *length : 0);
 }
 
 static u_int8_t *
-necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, size_t *value_size)
+necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, u_int32_t *value_size)
 {
        u_int8_t *value = NULL;
-       size_t length = necp_buffer_get_tlv_length(buffer, tlv_offset);
+       u_int32_t length = necp_buffer_get_tlv_length(buffer, tlv_offset);
        if (length == 0) {
                return (value);
        }
@@ -710,19 +820,23 @@ necp_buffer_get_tlv_value(u_int8_t *buffer, int tlv_offset, size_t *value_size)
                *value_size = length;
        }
 
-       value = (u_int8_t *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t) + sizeof(size_t));
+       value = (u_int8_t *)((u_int8_t *)buffer + tlv_offset + sizeof(u_int8_t) + sizeof(u_int32_t));
        return (value);
 }
 
 static int
-necp_buffer_find_tlv(u_int8_t *buffer, size_t buffer_length, int offset, u_int8_t type, int next)
+necp_buffer_find_tlv(u_int8_t *buffer, u_int32_t buffer_length, int offset, u_int8_t type, int next)
 {
-       size_t cursor = offset;
-       size_t curr_length;
+       if (offset < 0) {
+               return (-1);
+       }
+       int cursor = offset;
+       int next_cursor;
+       u_int32_t curr_length;
        u_int8_t curr_type;
 
-       do {
-               if (cursor >= buffer_length) {
+       while (TRUE) {
+               if ((((u_int32_t)cursor) + sizeof(curr_type) + sizeof(curr_length)) > buffer_length) {
                        return (-1);
                }
                if (!next) {
@@ -731,13 +845,18 @@ necp_buffer_find_tlv(u_int8_t *buffer, size_t buffer_length, int offset, u_int8_
                        next = 0;
                        curr_type = NECP_TLV_NIL;
                }
-               if (curr_type != type) {
-                       curr_length = necp_buffer_get_tlv_length(buffer, cursor);
-                       cursor += (sizeof(curr_type) + sizeof(curr_length) + curr_length);
+               curr_length = necp_buffer_get_tlv_length(buffer, cursor);
+               next_cursor = (cursor + sizeof(curr_type) + sizeof(curr_length) + curr_length);
+               if (curr_type == type) {
+                       // check if entire TLV fits inside buffer
+                       if (((u_int32_t)next_cursor) <= buffer_length) {
+                               return (cursor);
+                       } else {
+                               return (-1);
+                       }
                }
-       } while (curr_type != type);
-
-       return (cursor);
+               cursor = next_cursor;
+       }
 }
 
 static bool
@@ -760,7 +879,7 @@ necp_send_success_response(struct necp_session *session, u_int8_t packet_type, u
        bool success = TRUE;
        u_int8_t *response = NULL;
        u_int8_t *cursor = NULL;
-       size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t);
+       size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(u_int32_t);
        MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK);
        if (response == NULL) {
                return (FALSE);
@@ -783,7 +902,7 @@ necp_send_error_response(struct necp_session *session, u_int8_t packet_type, u_i
        bool success = TRUE;
        u_int8_t *response = NULL;
        u_int8_t *cursor = NULL;
-       size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t);
+       size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(u_int32_t);
        MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK);
        if (response == NULL) {
                return (FALSE);
@@ -806,7 +925,7 @@ necp_send_policy_id_response(struct necp_session *session, u_int8_t packet_type,
        bool success = TRUE;
        u_int8_t *response = NULL;
        u_int8_t *cursor = NULL;
-       size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t);
+       size_t response_size = sizeof(struct necp_packet_header) + sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(u_int32_t);
        MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK);
        if (response == NULL) {
                return (FALSE);
@@ -858,6 +977,10 @@ necp_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t packe
                        necp_send_error_response(session, header.packet_type, header.message_id, NECP_ERROR_INVALID_PROCESS);
                        goto done;
                }
+       } else {
+               // If not locked, update the proc_uuid and proc_pid of the session
+               proc_getexecutableuuid(current_proc(), session->proc_uuid, sizeof(session->proc_uuid));
+               session->proc_pid = proc_pid(current_proc());
        }
 
        switch (header.packet_type) {
@@ -954,6 +1077,10 @@ necp_create_session(u_int32_t control_unit)
        new_session->dirty = FALSE;
        LIST_INIT(&new_session->policies);
 
+       lck_rw_lock_exclusive(&necp_kernel_policy_lock);
+       necp_session_count++;
+       lck_rw_done(&necp_kernel_policy_lock);
+
 done:
        return (new_session);
 }
@@ -975,34 +1102,48 @@ necp_delete_session(struct necp_session *session)
                        NECPLOG0(LOG_DEBUG, "Deleted NECP session");
                }
                FREE(session, M_NECP);
+
+               lck_rw_lock_exclusive(&necp_kernel_policy_lock);
+               necp_session_count--;
+               lck_rw_done(&necp_kernel_policy_lock);
        }
 }
 
 // Session Policy Management
 static inline u_int8_t
-necp_policy_result_get_type_from_buffer(u_int8_t *buffer, size_t length)
+necp_policy_result_get_type_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length >= sizeof(u_int8_t)) ? buffer[0] : 0);
 }
 
-static inline size_t
-necp_policy_result_get_parameter_length_from_buffer(u_int8_t *buffer, size_t length)
+static inline u_int32_t
+necp_policy_result_get_parameter_length_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length > sizeof(u_int8_t)) ? (length - sizeof(u_int8_t)) : 0);
 }
 
 static inline u_int8_t *
-necp_policy_result_get_parameter_pointer_from_buffer(u_int8_t *buffer, size_t length)
+necp_policy_result_get_parameter_pointer_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length > sizeof(u_int8_t)) ? (buffer + sizeof(u_int8_t)) : NULL);
 }
 
 static bool
-necp_policy_result_is_valid(u_int8_t *buffer, size_t length)
+necp_policy_result_requires_route_rules(u_int8_t *buffer, u_int32_t length)
+{
+       u_int8_t type = necp_policy_result_get_type_from_buffer(buffer, length);
+       if (type == NECP_POLICY_RESULT_ROUTE_RULES) {
+               return (TRUE);
+       }
+       return (FALSE);
+}
+
+static bool
+necp_policy_result_is_valid(u_int8_t *buffer, u_int32_t length)
 {
        bool validated = FALSE;
        u_int8_t type = necp_policy_result_get_type_from_buffer(buffer, length);
-       size_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length);
+       u_int32_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length);
        switch (type) {
                case NECP_POLICY_RESULT_PASS: {
                        validated = TRUE;
@@ -1042,10 +1183,15 @@ necp_policy_result_is_valid(u_int8_t *buffer, size_t length)
                        }
                        break;
                }
+               case NECP_POLICY_RESULT_ROUTE_RULES: {
+                       validated = TRUE;
+                       break;
+               }
                case NECP_POLICY_RESULT_TRIGGER:
                case NECP_POLICY_RESULT_TRIGGER_IF_NEEDED:
                case NECP_POLICY_RESULT_TRIGGER_SCOPED:
-               case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: {
+               case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED:
+               case NECP_POLICY_RESULT_USE_NETAGENT: {
                        if (parameter_length >= sizeof(uuid_t)) {
                                validated = TRUE;
                        }
@@ -1065,43 +1211,43 @@ necp_policy_result_is_valid(u_int8_t *buffer, size_t length)
 }
 
 static inline u_int8_t
-necp_policy_condition_get_type_from_buffer(u_int8_t *buffer, size_t length)
+necp_policy_condition_get_type_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length >= sizeof(u_int8_t)) ? buffer[0] : 0);
 }
 
 static inline u_int8_t
-necp_policy_condition_get_flags_from_buffer(u_int8_t *buffer, size_t length)
+necp_policy_condition_get_flags_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length >= (2 * sizeof(u_int8_t))) ? buffer[1] : 0);
 }
 
-static inline size_t
-necp_policy_condition_get_value_length_from_buffer(u_int8_t *buffer, size_t length)
+static inline u_int32_t
+necp_policy_condition_get_value_length_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length >= (2 * sizeof(u_int8_t))) ? (length - (2 * sizeof(u_int8_t))) : 0);
 }
 
 static inline u_int8_t *
-necp_policy_condition_get_value_pointer_from_buffer(u_int8_t *buffer, size_t length)
+necp_policy_condition_get_value_pointer_from_buffer(u_int8_t *buffer, u_int32_t length)
 {
        return ((buffer && length > (2 * sizeof(u_int8_t))) ? (buffer + (2 * sizeof(u_int8_t))) : NULL);
 }
 
 static inline bool
-necp_policy_condition_is_default(u_int8_t *buffer, size_t length)
+necp_policy_condition_is_default(u_int8_t *buffer, u_int32_t length)
 {
        return (necp_policy_condition_get_type_from_buffer(buffer, length) == NECP_POLICY_CONDITION_DEFAULT);
 }
 
 static inline bool
-necp_policy_condition_is_application(u_int8_t *buffer, size_t length)
+necp_policy_condition_is_application(u_int8_t *buffer, u_int32_t length)
 {
        return (necp_policy_condition_get_type_from_buffer(buffer, length) == NECP_POLICY_CONDITION_APPLICATION);
 }
 
 static inline bool
-necp_policy_condition_requires_application(u_int8_t *buffer, size_t length)
+necp_policy_condition_requires_application(u_int8_t *buffer, u_int32_t length)
 {
        u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length);
        return (type == NECP_POLICY_CONDITION_REAL_APPLICATION ||
@@ -1109,7 +1255,7 @@ necp_policy_condition_requires_application(u_int8_t *buffer, size_t length)
 }
 
 static bool
-necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_result_type)
+necp_policy_condition_is_valid(u_int8_t *buffer, u_int32_t length, u_int8_t policy_result_type)
 {
        bool validated = FALSE;
        bool result_cannot_have_ip_layer = (policy_result_type == NECP_POLICY_RESULT_SOCKET_DIVERT ||
@@ -1118,8 +1264,10 @@ necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_
                                                                                policy_result_type == NECP_POLICY_RESULT_TRIGGER_IF_NEEDED ||
                                                                                policy_result_type == NECP_POLICY_RESULT_TRIGGER_SCOPED ||
                                                                                policy_result_type == NECP_POLICY_RESULT_NO_TRIGGER_SCOPED ||
-                                                                               policy_result_type == NECP_POLICY_RESULT_SOCKET_SCOPED) ? TRUE : FALSE;
-       size_t condition_length = necp_policy_condition_get_value_length_from_buffer(buffer, length);
+                                                                               policy_result_type == NECP_POLICY_RESULT_SOCKET_SCOPED ||
+                                                                               policy_result_type == NECP_POLICY_RESULT_ROUTE_RULES ||
+                                                                               policy_result_type == NECP_POLICY_RESULT_USE_NETAGENT) ? TRUE : FALSE;
+       u_int32_t condition_length = necp_policy_condition_get_value_length_from_buffer(buffer, length);
        u_int8_t *condition_value = necp_policy_condition_get_value_pointer_from_buffer(buffer, length);
        u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length);
        u_int8_t flags = necp_policy_condition_get_flags_from_buffer(buffer, length);
@@ -1203,6 +1351,40 @@ necp_policy_condition_is_valid(u_int8_t *buffer, size_t length, u_int8_t policy_
        return (validated);
 }
 
+static bool
+necp_policy_route_rule_is_default(u_int8_t *buffer, u_int32_t length)
+{
+       return (necp_policy_condition_get_value_length_from_buffer(buffer, length) == 0 &&
+                       necp_policy_condition_get_flags_from_buffer(buffer, length) == 0);
+}
+
+static bool
+necp_policy_route_rule_is_valid(u_int8_t *buffer, u_int32_t length)
+{
+       bool validated = FALSE;
+       u_int8_t type = necp_policy_condition_get_type_from_buffer(buffer, length);
+       switch (type) {
+               case NECP_ROUTE_RULE_ALLOW_INTERFACE: {
+                       validated = TRUE;
+                       break;
+               }
+               case NECP_ROUTE_RULE_DENY_INTERFACE: {
+                       validated = TRUE;
+                       break;
+               }
+               default: {
+                       validated = FALSE;
+                       break;
+               }
+       }
+
+       if (necp_debug) {
+               NECPLOG(LOG_DEBUG, "Policy route rule type %d, valid %d", type, validated);
+       }
+
+       return (validated);
+}
+
 static void
 necp_handle_set_session_priority(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset)
 {
@@ -1258,7 +1440,7 @@ static void
 necp_handle_lock_session_to_proc(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset)
 {
 #pragma unused(packet, offset)
-       proc_getexecutableuuid(current_proc(), session->proc_uuid, sizeof(session->proc_uuid));
+       // proc_uuid already filled out
        session->proc_locked = TRUE;
        necp_send_success_response(session, NECP_PACKET_TYPE_LOCK_SESSION_TO_PROC, message_id);
 }
@@ -1353,10 +1535,10 @@ necp_handle_unregister_service(struct necp_session *session, u_int32_t message_i
        }
        lck_rw_done(&necp_kernel_policy_lock);
 
-       necp_send_success_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id);
+       necp_send_success_response(session, NECP_PACKET_TYPE_UNREGISTER_SERVICE, message_id);
        return;
 fail:
-       necp_send_error_response(session, NECP_PACKET_TYPE_REGISTER_SERVICE, message_id, response_error);
+       necp_send_error_response(session, NECP_PACKET_TYPE_UNREGISTER_SERVICE, message_id, response_error);
 }
 
 static void
@@ -1367,9 +1549,14 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_
        bool has_application_condition = FALSE;
        bool requires_application_condition = FALSE;
        u_int8_t *conditions_array = NULL;
-       size_t conditions_array_size = 0;
+       u_int32_t conditions_array_size = 0;
        int conditions_array_cursor;
 
+       bool has_default_route_rule = FALSE;
+       u_int8_t *route_rules_array = NULL;
+       u_int32_t route_rules_array_size = 0;
+       int route_rules_array_cursor;
+
        int cursor;
        int error = 0;
        u_int32_t response_error = NECP_ERROR_INTERNAL;
@@ -1377,7 +1564,7 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_
        necp_policy_order order = 0;
        struct necp_session_policy *policy = NULL;
        u_int8_t *policy_result = NULL;
-       size_t policy_result_size = 0;
+       u_int32_t policy_result_size = 0;
 
        // Read policy order
        error = necp_packet_get_tlv(packet, offset, NECP_TLV_POLICY_ORDER, sizeof(order), &order, NULL);
@@ -1413,15 +1600,79 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_
                goto fail;
        }
 
+       if (necp_policy_result_requires_route_rules(policy_result, policy_result_size)) {
+               // Read route rules conditions
+               for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_ROUTE_RULE, &error, 0);
+                        cursor >= 0;
+                        cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_ROUTE_RULE, &error, 1)) {
+                       u_int32_t route_rule_size = 0;
+                       necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &route_rule_size);
+                       if (route_rule_size > 0) {
+                               route_rules_array_size += (sizeof(u_int8_t) + sizeof(u_int32_t) + route_rule_size);
+                       }
+               }
+
+               if (route_rules_array_size == 0) {
+                       NECPLOG0(LOG_ERR, "Failed to get policy route rules");
+                       response_error = NECP_ERROR_INVALID_TLV;
+                       goto fail;
+               }
+
+               MALLOC(route_rules_array, u_int8_t *, route_rules_array_size, M_NECP, M_WAITOK);
+               if (route_rules_array == NULL) {
+                       NECPLOG(LOG_ERR, "Failed to allocate a policy route rules array (size %d)", route_rules_array_size);
+                       response_error = NECP_ERROR_INTERNAL;
+                       goto fail;
+               }
+
+               route_rules_array_cursor = 0;
+               for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_ROUTE_RULE, &error, 0);
+                        cursor >= 0;
+                        cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_ROUTE_RULE, &error, 1)) {
+                       u_int8_t route_rule_type = NECP_TLV_ROUTE_RULE;
+                       u_int32_t route_rule_size = 0;
+                       necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &route_rule_size);
+                       if (route_rule_size > 0 && route_rule_size <= (route_rules_array_size - route_rules_array_cursor)) {
+                               // Add type
+                               memcpy((route_rules_array + route_rules_array_cursor), &route_rule_type, sizeof(route_rule_type));
+                               route_rules_array_cursor += sizeof(route_rule_type);
+
+                               // Add length
+                               memcpy((route_rules_array + route_rules_array_cursor), &route_rule_size, sizeof(route_rule_size));
+                               route_rules_array_cursor += sizeof(route_rule_size);
+
+                               // Add value
+                               necp_packet_get_tlv_at_offset(packet, cursor, route_rule_size, (route_rules_array + route_rules_array_cursor), NULL);
+
+                               if (!necp_policy_route_rule_is_valid((route_rules_array + route_rules_array_cursor), route_rule_size)) {
+                                       NECPLOG0(LOG_ERR, "Failed to validate policy route rule");
+                                       response_error = NECP_ERROR_ROUTE_RULES_INVALID;
+                                       goto fail;
+                               }
+
+                               if (necp_policy_route_rule_is_default((route_rules_array + route_rules_array_cursor), route_rule_size)) {
+                                       if (has_default_route_rule) {
+                                               NECPLOG0(LOG_ERR, "Failed to validate route rule; contained multiple default route rules");
+                                               response_error = NECP_ERROR_ROUTE_RULES_INVALID;
+                                               goto fail;
+                                       }
+                                       has_default_route_rule = TRUE;
+                               }
+
+                               route_rules_array_cursor += route_rule_size;
+                       }
+               }
+       }
+
        // Read policy conditions
        for (cursor = necp_packet_find_tlv(packet, offset, NECP_TLV_POLICY_CONDITION, &error, 0);
                cursor >= 0;
                cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_POLICY_CONDITION, &error, 1)) {
-               size_t condition_size = 0;
+               u_int32_t condition_size = 0;
                necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &condition_size);
 
                if (condition_size > 0) {
-                       conditions_array_size += (sizeof(u_int8_t) + sizeof(size_t) + condition_size);
+                       conditions_array_size += (sizeof(u_int8_t) + sizeof(u_int32_t) + condition_size);
                }
        }
 
@@ -1442,7 +1693,7 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_
                cursor >= 0;
                cursor = necp_packet_find_tlv(packet, cursor, NECP_TLV_POLICY_CONDITION, &error, 1)) {
                u_int8_t condition_type = NECP_TLV_POLICY_CONDITION;
-               size_t condition_size = 0;
+               u_int32_t condition_size = 0;
                necp_packet_get_tlv_at_offset(packet, cursor, 0, NULL, &condition_size);
                if (condition_size > 0 && condition_size <= (conditions_array_size - conditions_array_cursor)) {
                        // Add type
@@ -1490,7 +1741,7 @@ necp_handle_policy_add(struct necp_session *session, u_int32_t message_id, mbuf_
                goto fail;
        }
 
-       if ((policy = necp_policy_create(session, order, conditions_array, conditions_array_size, policy_result, policy_result_size)) == NULL) {
+       if ((policy = necp_policy_create(session, order, conditions_array, conditions_array_size, route_rules_array, route_rules_array_size, policy_result, policy_result_size)) == NULL) {
                response_error = NECP_ERROR_INTERNAL;
                goto fail;
        }
@@ -1505,6 +1756,9 @@ fail:
        if (conditions_array != NULL) {
                FREE(conditions_array, M_NECP);
        }
+       if (route_rules_array != NULL) {
+               FREE(route_rules_array, M_NECP);
+       }
 
        necp_send_error_response(session, NECP_PACKET_TYPE_POLICY_ADD, message_id, response_error);
 }
@@ -1518,9 +1772,9 @@ necp_handle_policy_get(struct necp_session *session, u_int32_t message_id, mbuf_
        u_int8_t *cursor = NULL;
        u_int32_t response_error = NECP_ERROR_INTERNAL;
        necp_policy_id policy_id = 0;
-       size_t order_tlv_size = 0;
-       size_t result_tlv_size = 0;
-       size_t response_size = 0;
+       u_int32_t order_tlv_size = 0;
+       u_int32_t result_tlv_size = 0;
+       u_int32_t response_size = 0;
 
        struct necp_session_policy *policy = NULL;
 
@@ -1539,8 +1793,8 @@ necp_handle_policy_get(struct necp_session *session, u_int32_t message_id, mbuf_
                goto fail;
        }
 
-       order_tlv_size = sizeof(u_int8_t) + sizeof(size_t) + sizeof(necp_policy_order);
-       result_tlv_size = (policy->result_size ? (sizeof(u_int8_t) + sizeof(size_t) + policy->result_size) : 0);
+       order_tlv_size = sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(necp_policy_order);
+       result_tlv_size = (policy->result_size ? (sizeof(u_int8_t) + sizeof(u_int32_t) + policy->result_size) : 0);
        response_size = sizeof(struct necp_packet_header) + order_tlv_size + result_tlv_size + policy->conditions_size;
        MALLOC(response, u_int8_t *, response_size, M_NECP, M_WAITOK);
        if (response == NULL) {
@@ -1615,8 +1869,8 @@ static void
 necp_handle_policy_list_all(struct necp_session *session, u_int32_t message_id, mbuf_t packet, int offset)
 {
 #pragma unused(packet, offset)
-       size_t tlv_size = (sizeof(u_int8_t) + sizeof(size_t) + sizeof(u_int32_t));
-       size_t response_size = 0;
+       u_int32_t tlv_size = (sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(u_int32_t));
+       u_int32_t response_size = 0;
        u_int8_t *response = NULL;
        u_int8_t *cursor = NULL;
        int num_policies = 0;
@@ -1686,7 +1940,7 @@ necp_policy_get_new_id(void)
 }
 
 static struct necp_session_policy *
-necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, size_t conditions_array_size, u_int8_t *result, size_t result_size)
+necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8_t *conditions_array, u_int32_t conditions_array_size, u_int8_t *route_rules_array, u_int32_t route_rules_array_size, u_int8_t *result, u_int32_t result_size)
 {
        struct necp_session_policy *new_policy = NULL;
        struct necp_session_policy *tmp_policy = NULL;
@@ -1707,6 +1961,8 @@ necp_policy_create(struct necp_session *session, necp_policy_order order, u_int8
        new_policy->order = order;
        new_policy->conditions = conditions_array;
        new_policy->conditions_size = conditions_array_size;
+       new_policy->route_rules = route_rules_array;
+       new_policy->route_rules_size = route_rules_array_size;
        new_policy->result = result;
        new_policy->result_size = result_size;
        new_policy->id = necp_policy_get_new_id();
@@ -1745,17 +2001,17 @@ necp_policy_get_result_type(struct necp_session_policy *policy)
        return (policy ? necp_policy_result_get_type_from_buffer(policy->result, policy->result_size) : 0);
 }
 
-static inline size_t
+static inline u_int32_t
 necp_policy_get_result_parameter_length(struct necp_session_policy *policy)
 {
        return (policy ? necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size) : 0);
 }
 
 static bool
-necp_policy_get_result_parameter(struct necp_session_policy *policy, u_int8_t *parameter_buffer, size_t parameter_buffer_length)
+necp_policy_get_result_parameter(struct necp_session_policy *policy, u_int8_t *parameter_buffer, u_int32_t parameter_buffer_length)
 {
        if (policy) {
-               size_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size);
+               u_int32_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size);
                if (parameter_buffer_length >= parameter_length) {
                        u_int8_t *parameter = necp_policy_result_get_parameter_pointer_from_buffer(policy->result, policy->result_size);
                        if (parameter && parameter_buffer) {
@@ -1847,9 +2103,9 @@ necp_policy_unapply(struct necp_session_policy *policy)
                necp_remove_uuid_app_id_mapping(policy->applied_real_app_uuid, NULL, FALSE);
                uuid_clear(policy->applied_real_app_uuid);
        }
-       if (!uuid_is_null(policy->applied_service_uuid)) {
-               necp_remove_uuid_service_id_mapping(policy->applied_service_uuid);
-               uuid_clear(policy->applied_service_uuid);
+       if (!uuid_is_null(policy->applied_result_uuid)) {
+               necp_remove_uuid_service_id_mapping(policy->applied_result_uuid);
+               uuid_clear(policy->applied_result_uuid);
        }
 
        // Release string mappings
@@ -1859,6 +2115,12 @@ necp_policy_unapply(struct necp_session_policy *policy)
                policy->applied_account = NULL;
        }
 
+       // Release route rule
+       if (policy->applied_route_rules_id != 0) {
+               necp_remove_route_rule(&necp_route_rules, policy->applied_route_rules_id);
+               policy->applied_route_rules_id = 0;
+       }
+
        // Remove socket policies
        for (i = 0; i < MAX_KERNEL_SOCKET_POLICIES; i++) {
                if (policy->kernel_socket_policies[i] != 0) {
@@ -1926,7 +2188,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
        union necp_sockaddr_union cond_remote_start;
        union necp_sockaddr_union cond_remote_end;
        u_int8_t cond_remote_prefix = 0;
-       size_t offset = 0;
+       u_int32_t offset = 0;
        u_int8_t ultimate_result = 0;
        u_int32_t secondary_result = 0;
        necp_kernel_policy_result_parameter secondary_result_parameter;
@@ -1943,13 +2205,13 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
 
        // Process conditions
        while (offset < policy->conditions_size) {
-               size_t length = 0;
+               u_int32_t length = 0;
                u_int8_t *value = necp_buffer_get_tlv_value(policy->conditions, offset, &length);
 
                u_int8_t condition_type = necp_policy_condition_get_type_from_buffer(value, length);
                u_int8_t condition_flags = necp_policy_condition_get_flags_from_buffer(value, length);
                bool condition_is_negative = condition_flags & NECP_POLICY_CONDITION_FLAGS_NEGATIVE;
-               size_t condition_length = necp_policy_condition_get_value_length_from_buffer(value, length);
+               u_int32_t condition_length = necp_policy_condition_get_value_length_from_buffer(value, length);
                u_int8_t *condition_value = necp_policy_condition_get_value_pointer_from_buffer(value, length);
                switch (condition_type) {
                        case NECP_POLICY_CONDITION_DEFAULT: {
@@ -2158,7 +2420,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
                        }
                }
 
-               offset += sizeof(u_int8_t) + sizeof(size_t) + length;
+               offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
        }
 
        // Process result
@@ -2210,7 +2472,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
                }
                case NECP_POLICY_RESULT_IP_TUNNEL: {
                        struct necp_policy_result_ip_tunnel tunnel_parameters;
-                       size_t tunnel_parameters_length = necp_policy_get_result_parameter_length(policy);
+                       u_int32_t tunnel_parameters_length = necp_policy_get_result_parameter_length(policy);
                        if (tunnel_parameters_length > sizeof(u_int32_t) &&
                                tunnel_parameters_length <= sizeof(struct necp_policy_result_ip_tunnel) &&
                                necp_policy_get_result_parameter(policy, (u_int8_t *)&tunnel_parameters, sizeof(tunnel_parameters))) {
@@ -2248,7 +2510,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
                case NECP_POLICY_RESULT_TRIGGER_SCOPED:
                case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: {
                        struct necp_policy_result_service service_parameters;
-                       size_t service_result_length = necp_policy_get_result_parameter_length(policy);
+                       u_int32_t service_result_length = necp_policy_get_result_parameter_length(policy);
                        bool has_extra_service_data = FALSE;
                        if (service_result_length >= (sizeof(service_parameters))) {
                                has_extra_service_data = TRUE;
@@ -2256,7 +2518,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
                        if (necp_policy_get_result_parameter(policy, (u_int8_t *)&service_parameters, sizeof(service_parameters))) {
                                ultimate_result_parameter.service.identifier = necp_create_uuid_service_id_mapping(service_parameters.identifier);
                                if (ultimate_result_parameter.service.identifier != 0) {
-                                       uuid_copy(policy->applied_service_uuid, service_parameters.identifier);
+                                       uuid_copy(policy->applied_result_uuid, service_parameters.identifier);
                                        socket_layer_non_id_conditions = TRUE;
                                        if (has_extra_service_data) {
                                                ultimate_result_parameter.service.data = service_parameters.data;
@@ -2267,8 +2529,19 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
                        }
                        break;
                }
+               case NECP_POLICY_RESULT_USE_NETAGENT: {
+                       uuid_t netagent_uuid;
+                       if (necp_policy_get_result_parameter(policy, (u_int8_t *)&netagent_uuid, sizeof(netagent_uuid))) {
+                               ultimate_result_parameter.netagent_id = necp_create_uuid_service_id_mapping(netagent_uuid);
+                               if (ultimate_result_parameter.netagent_id != 0) {
+                                       uuid_copy(policy->applied_result_uuid, netagent_uuid);
+                                       socket_layer_non_id_conditions = TRUE;
+                               }
+                       }
+                       break;
+               }
                case NECP_POLICY_RESULT_SOCKET_SCOPED: {
-                       size_t interface_name_length = necp_policy_get_result_parameter_length(policy);
+                       u_int32_t interface_name_length = necp_policy_get_result_parameter_length(policy);
                        if (interface_name_length <= IFXNAMSIZ && interface_name_length > 0) {
                                char interface_name[IFXNAMSIZ];
                                ifnet_t scope_interface = NULL;
@@ -2280,13 +2553,23 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
                                }
                        }
                }
+               case NECP_POLICY_RESULT_ROUTE_RULES: {
+                       if (policy->route_rules != NULL && policy->route_rules_size > 0) {
+                               u_int32_t route_rule_id = necp_create_route_rule(&necp_route_rules, policy->route_rules, policy->route_rules_size);
+                               if (route_rule_id > 0) {
+                                       policy->applied_route_rules_id = route_rule_id;
+                                       ultimate_result_parameter.route_rule_id = route_rule_id;
+                                       socket_layer_non_id_conditions = TRUE;
+                               }
+                       }
+               }
                default: {
                        break;
                }
        }
 
        if (socket_layer_non_id_conditions) {
-               necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->id, policy->order, session->session_order, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_account_id, cond_domain, cond_pid, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter);
+               necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->id, policy->order, session->session_order, session->proc_pid, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_account_id, cond_domain, cond_pid, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter);
 
                if (policy_id == 0) {
                        NECPLOG0(LOG_DEBUG, "Error applying socket kernel policy");
@@ -2298,7 +2581,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
        }
 
        if (ip_output_layer_non_id_conditions) {
-               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS, session->session_order, master_condition_mask, master_condition_negated_mask, NECP_KERNEL_POLICY_ID_NONE, cond_bound_interface, 0, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter);
+               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS, session->session_order, session->proc_pid, master_condition_mask, master_condition_negated_mask, NECP_KERNEL_POLICY_ID_NONE, cond_bound_interface, 0, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, ultimate_result, ultimate_result_parameter);
 
                if (policy_id == 0) {
                        NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy");
@@ -2309,7 +2592,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
        }
 
        if (ip_output_layer_id_condition) {
-               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, cond_ip_output_layer_id, NULL, 0, 0, NULL, NULL, 0, NULL, NULL, 0, ultimate_result, ultimate_result_parameter);
+               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION, session->session_order, session->proc_pid, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, cond_ip_output_layer_id, NULL, 0, 0, NULL, NULL, 0, NULL, NULL, 0, ultimate_result, ultimate_result_parameter);
 
                if (policy_id == 0) {
                        NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy");
@@ -2321,7 +2604,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
 
        // Extra policies for IP Output tunnels for when packets loop back
        if (ip_output_layer_tunnel_condition_from_id) {
-               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter);
+               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_NON_ID_TUNNEL_CONDITION, session->session_order, session->proc_pid, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_NON_ID_CONDITIONS], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter);
 
                if (policy_id == 0) {
                        NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy");
@@ -2332,7 +2615,7 @@ necp_policy_apply(struct necp_session *session, struct necp_session_policy *poli
        }
 
        if (ip_output_layer_tunnel_condition_from_id) {
-               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION, session->session_order, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter);
+               necp_kernel_policy_id policy_id = necp_kernel_ip_output_policy_add(policy->id, policy->order, NECP_KERNEL_POLICY_SUBORDER_ID_TUNNEL_CONDITION, session->session_order, session->proc_pid, NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE | NECP_KERNEL_CONDITION_ALL_INTERFACES, 0, policy->kernel_ip_output_policies[NECP_KERNEL_POLICY_SUBORDER_ID_CONDITION], NULL, cond_last_interface_index, 0, NULL, NULL, 0, NULL, NULL, 0, secondary_result, secondary_result_parameter);
 
                if (policy_id == 0) {
                        NECPLOG0(LOG_DEBUG, "Error applying IP output kernel policy");
@@ -2355,6 +2638,8 @@ necp_policy_apply_all(struct necp_session *session)
 {
        struct necp_session_policy *policy = NULL;
        struct necp_session_policy *temp_policy = NULL;
+       struct kev_necp_policies_changed_data kev_data;
+       kev_data.changed_count = 0;
 
        lck_rw_lock_exclusive(&necp_kernel_policy_lock);
 
@@ -2386,6 +2671,8 @@ necp_policy_apply_all(struct necp_session *session)
 
        lck_rw_done(&necp_kernel_policy_lock);
 
+       necp_post_change_event(&kev_data);
+
        if (necp_debug) {
                NECPLOG0(LOG_DEBUG, "Applied NECP policies");
        }
@@ -2417,7 +2704,7 @@ necp_kernel_policy_get_new_id(void)
 
 #define        NECP_KERNEL_VALID_SOCKET_CONDITIONS (NECP_KERNEL_CONDITION_APP_ID | NECP_KERNEL_CONDITION_REAL_APP_ID | NECP_KERNEL_CONDITION_DOMAIN | NECP_KERNEL_CONDITION_ACCOUNT_ID | NECP_KERNEL_CONDITION_PID | NECP_KERNEL_CONDITION_UID | NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_TRAFFIC_CLASS | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_ENTITLEMENT)
 static necp_kernel_policy_id
-necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
+necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
 {
        struct necp_kernel_socket_policy *new_kernel_policy = NULL;
        struct necp_kernel_socket_policy *tmp_kernel_policy = NULL;
@@ -2432,6 +2719,7 @@ necp_kernel_socket_policy_add(necp_policy_id parent_policy_id, necp_policy_order
        new_kernel_policy->id = necp_kernel_policy_get_new_id();
        new_kernel_policy->order = order;
        new_kernel_policy->session_order = session_order;
+       new_kernel_policy->session_pid = session_pid;
 
        // Sanitize condition mask
        new_kernel_policy->condition_mask = (condition_mask & NECP_KERNEL_VALID_SOCKET_CONDITIONS);
@@ -2561,19 +2849,192 @@ necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id)
        return (FALSE);
 }
 
+#define MAX_RESULT_STRING_LEN 64
+static inline const char *
+necp_get_result_description(char *result_string, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
+{
+       uuid_string_t uuid_string;
+       switch (result) {
+               case NECP_KERNEL_POLICY_RESULT_NONE: {
+                       return ("None");
+               }
+               case NECP_KERNEL_POLICY_RESULT_PASS: {
+                       return ("Pass");
+               }
+               case NECP_KERNEL_POLICY_RESULT_SKIP: {
+                       return ("Skip");
+               }
+               case NECP_KERNEL_POLICY_RESULT_DROP: {
+                       return ("Drop");
+               }
+               case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT: {
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "SocketDivert (%d)", result_parameter.flow_divert_control_unit);
+                       break;
+               }
+               case NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER: {
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "SocketFilter (%d)", result_parameter.filter_control_unit);
+                       break;
+               }
+               case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
+                       ifnet_t interface = ifindex2ifnet[result_parameter.tunnel_interface_index];
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "IPTunnel (%s%d)", ifnet_name(interface), ifnet_unit(interface));
+                       break;
+               }
+               case NECP_KERNEL_POLICY_RESULT_IP_FILTER: {
+                       return ("IPFilter");
+               }
+               case NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED: {
+                       ifnet_t interface = ifindex2ifnet[result_parameter.scoped_interface_index];
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "SocketScoped (%s%d)", ifnet_name(interface), ifnet_unit(interface));
+                       break;
+               }
+               case NECP_KERNEL_POLICY_RESULT_ROUTE_RULES: {
+                       int index = 0;
+                       char interface_names[IFXNAMSIZ][MAX_ROUTE_RULE_INTERFACES];
+                       struct necp_route_rule *route_rule = necp_lookup_route_rule_locked(&necp_route_rules, result_parameter.route_rule_id);
+                       if (route_rule != NULL) {
+                               bool default_drop = (route_rule->default_action == NECP_ROUTE_RULE_DENY_INTERFACE);
+                               for (index = 0; index < MAX_ROUTE_RULE_INTERFACES; index++) {
+                                       if (route_rule->exception_if_indices[index] != 0) {
+                                               ifnet_t interface = ifindex2ifnet[route_rule->exception_if_indices[index]];
+                                               snprintf(interface_names[index], IFXNAMSIZ, "%s%d", ifnet_name(interface), ifnet_unit(interface));
+                                       } else {
+                                               memset(interface_names[index], 0, IFXNAMSIZ);
+                                       }
+                               }
+                               if (default_drop) {
+                                       snprintf(result_string, MAX_RESULT_STRING_LEN, "RouteRules (Only %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
+                                                        (route_rule->cellular_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "Cell " : "",
+                                                        (route_rule->wifi_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "WiFi " : "",
+                                                        (route_rule->wired_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "Wired " : "",
+                                                        (route_rule->expensive_action == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? "Exp " : "",
+                                                        (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[0] : "",
+                                                        (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[1] : "",
+                                                        (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[2] : "",
+                                                        (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[3] : "",
+                                                        (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[4] : "",
+                                                        (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[5] : "",
+                                                        (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[6] : "",
+                                                        (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[7] : "",
+                                                        (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[8] : "",
+                                                        (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? " " : "",
+                                                        (route_rule->exception_if_actions[9] == NECP_ROUTE_RULE_ALLOW_INTERFACE) ? interface_names[9] : "");
+                               } else {
+                                       snprintf(result_string, MAX_RESULT_STRING_LEN, "RouteRules (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
+                                                        (route_rule->cellular_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!Cell " : "",
+                                                        (route_rule->wifi_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!WiFi " : "",
+                                                        (route_rule->wired_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!Wired " : "",
+                                                        (route_rule->expensive_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!Exp " : "",
+                                                        (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[0] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[0] : "",
+                                                        (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[1] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[1] : "",
+                                                        (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[2] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[2] : "",
+                                                        (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[3] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[3] : "",
+                                                        (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[4] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[4] : "",
+                                                        (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[5] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[5] : "",
+                                                        (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[6] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[6] : "",
+                                                        (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[7] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[7] : "",
+                                                        (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[8] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[8] : "",
+                                                        (route_rule->exception_if_actions[9] == NECP_ROUTE_RULE_DENY_INTERFACE) ? "!" : "",
+                                                        (route_rule->exception_if_actions[9] == NECP_ROUTE_RULE_DENY_INTERFACE) ? interface_names[9] : "");
+                               }
+                       } else {
+                               snprintf(result_string, MAX_RESULT_STRING_LEN, "RouteRules (Unknown)");
+                       }
+                       break;
+               }
+               case NECP_KERNEL_POLICY_RESULT_USE_NETAGENT: {
+                       bool found_mapping = FALSE;
+                       struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.netagent_id);
+                       if (mapping != NULL) {
+                               uuid_unparse(mapping->uuid, uuid_string);
+                               found_mapping = TRUE;
+                       }
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "UseNetAgent (%s)", found_mapping ? uuid_string : "Unknown");
+                       break;
+               }
+               case NECP_POLICY_RESULT_TRIGGER: {
+                       bool found_mapping = FALSE;
+                       struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier);
+                       if (mapping != NULL) {
+                               uuid_unparse(mapping->uuid, uuid_string);
+                               found_mapping = TRUE;
+                       }
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "Trigger (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data);
+                       break;
+               }
+               case NECP_POLICY_RESULT_TRIGGER_IF_NEEDED: {
+                       bool found_mapping = FALSE;
+                       struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier);
+                       if (mapping != NULL) {
+                               uuid_unparse(mapping->uuid, uuid_string);
+                               found_mapping = TRUE;
+                       }
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "TriggerIfNeeded (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data);
+                       break;
+               }
+               case NECP_POLICY_RESULT_TRIGGER_SCOPED: {
+                       bool found_mapping = FALSE;
+                       struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier);
+                       if (mapping != NULL) {
+                               uuid_unparse(mapping->uuid, uuid_string);
+                               found_mapping = TRUE;
+                       }
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "TriggerScoped (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data);
+                       break;
+               }
+               case NECP_POLICY_RESULT_NO_TRIGGER_SCOPED: {
+                       bool found_mapping = FALSE;
+                       struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(result_parameter.service.identifier);
+                       if (mapping != NULL) {
+                               uuid_unparse(mapping->uuid, uuid_string);
+                               found_mapping = TRUE;
+                       }
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "NoTriggerScoped (%s.%d)", found_mapping ? uuid_string : "Unknown", result_parameter.service.data);
+                       break;
+               }
+               default: {
+                       snprintf(result_string, MAX_RESULT_STRING_LEN, "Unknown %d (%d)", result, result_parameter.tunnel_interface_index);
+                       break;
+               }
+       }
+       return (result_string);
+}
+
 static void
 necp_kernel_socket_policies_dump_all(void)
 {
-       struct necp_kernel_socket_policy *policy = NULL;
-       int policy_i;
-       int app_i;
-
        if (necp_debug) {
+               struct necp_kernel_socket_policy *policy = NULL;
+               int policy_i;
+               int app_i;
+               char result_string[MAX_RESULT_STRING_LEN];
+               char proc_name_string[MAXCOMLEN + 1];
+               memset(result_string, 0, MAX_RESULT_STRING_LEN);
+               memset(proc_name_string, 0, MAXCOMLEN + 1);
+
                NECPLOG0(LOG_DEBUG, "NECP Application Policies:\n");
                NECPLOG0(LOG_DEBUG, "-----------\n");
                for (policy_i = 0; necp_kernel_socket_policies_app_layer_map != NULL && necp_kernel_socket_policies_app_layer_map[policy_i] != NULL; policy_i++) {
                        policy = necp_kernel_socket_policies_app_layer_map[policy_i];
-                       NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->condition_mask, policy->result, policy->result_parameter);
+                       proc_name(policy->session_pid, proc_name_string, MAXCOMLEN);
+                       NECPLOG(LOG_DEBUG, "\t%3d. Policy ID: %5d\tProcess: %10.10s\tOrder: %04d.%04d\tMask: %5x\tResult: %s\n", policy_i, policy->id, proc_name_string, policy->session_order, policy->order, policy->condition_mask, necp_get_result_description(result_string, policy->result, policy->result_parameter));
                }
                if (necp_kernel_socket_policies_app_layer_map[0] != NULL) {
                        NECPLOG0(LOG_DEBUG, "-----------\n");
@@ -2585,7 +3046,8 @@ necp_kernel_socket_policies_dump_all(void)
                        NECPLOG(LOG_DEBUG, "\tApp Bucket: %d\n", app_i);
                        for (policy_i = 0; necp_kernel_socket_policies_map[app_i] != NULL && (necp_kernel_socket_policies_map[app_i])[policy_i] != NULL; policy_i++) {
                                policy = (necp_kernel_socket_policies_map[app_i])[policy_i];
-                               NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->condition_mask, policy->result, policy->result_parameter);
+                               proc_name(policy->session_pid, proc_name_string, MAXCOMLEN);
+                               NECPLOG(LOG_DEBUG, "\t%3d. Policy ID: %5d\tProcess: %10.10s\tOrder: %04d.%04d\tMask: %5x\tResult: %s\n", policy_i, policy->id, proc_name_string, policy->session_order, policy->order, policy->condition_mask, necp_get_result_description(result_string, policy->result, policy->result_parameter));
                        }
                        NECPLOG0(LOG_DEBUG, "-----------\n");
                }
@@ -2593,7 +3055,7 @@ necp_kernel_socket_policies_dump_all(void)
 }
 
 static inline bool
-necp_kernel_socket_result_is_service_type(struct necp_kernel_socket_policy *kernel_policy)
+necp_kernel_socket_result_is_trigger_service_type(struct necp_kernel_socket_policy *kernel_policy)
 {
        return (kernel_policy->result >= NECP_KERNEL_POLICY_RESULT_TRIGGER && kernel_policy->result <= NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED);
 }
@@ -2604,12 +3066,14 @@ necp_kernel_socket_policy_results_overlap(struct necp_kernel_socket_policy *uppe
        if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_DROP) {
                // Drop always cancels out lower policies
                return (TRUE);
-       } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER) {
-               // Filters never cancel out lower policies
+       } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER ||
+                          upper_policy->result == NECP_KERNEL_POLICY_RESULT_ROUTE_RULES ||
+                          upper_policy->result == NECP_KERNEL_POLICY_RESULT_USE_NETAGENT) {
+               // Filters and route rules never cancel out lower policies
                return (FALSE);
-       } else if (necp_kernel_socket_result_is_service_type(upper_policy)) {
+       } else if (necp_kernel_socket_result_is_trigger_service_type(upper_policy)) {
                // Trigger/Scoping policies can overlap one another, but not other results
-               return (necp_kernel_socket_result_is_service_type(lower_policy));
+               return (necp_kernel_socket_result_is_trigger_service_type(lower_policy));
        } else if (upper_policy->result == NECP_KERNEL_POLICY_RESULT_SKIP) {
                if (upper_policy->session_order != lower_policy->session_order) {
                        // A skip cannot override a policy of a different session
@@ -2626,7 +3090,7 @@ necp_kernel_socket_policy_results_overlap(struct necp_kernel_socket_policy *uppe
                }
        }
 
-       // A hard pass, flow divert, or tunnel will currently block out lower policies
+       // A hard pass, flow divert, tunnel, or scope will currently block out lower policies
        return (TRUE);
 }
 
@@ -2993,36 +3457,56 @@ necp_remove_string_to_id_mapping(struct necp_string_id_mapping_list *list, char
        return (FALSE);
 }
 
-#define NECP_NULL_SERVICE_ID 1
 static u_int32_t
-necp_get_new_uuid_id(void)
+necp_get_new_route_rule_id(void)
 {
        u_int32_t newid = 0;
 
        lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
 
-       necp_last_uuid_id++;
-       if (necp_last_uuid_id < (NECP_NULL_SERVICE_ID + 1)) {
-               necp_last_uuid_id = (NECP_NULL_SERVICE_ID + 1);
+       necp_last_route_rule_id++;
+       if (necp_last_route_rule_id < 1 || necp_last_route_rule_id > UINT16_MAX) {
+               necp_last_route_rule_id = 1;
        }
 
-       newid = necp_last_uuid_id;
+       newid = necp_last_route_rule_id;
        if (newid == 0) {
-               NECPLOG0(LOG_DEBUG, "Allocate uuid id failed.\n");
+               NECPLOG0(LOG_DEBUG, "Allocate route rule id failed.\n");
                return (0);
        }
 
        return (newid);
 }
 
-static struct necp_uuid_id_mapping *
-necp_uuid_lookup_app_id_locked(uuid_t uuid)
+static u_int32_t
+necp_get_new_aggregate_route_rule_id(void)
 {
-       struct necp_uuid_id_mapping *searchentry = NULL;
-       struct necp_uuid_id_mapping *foundentry = NULL;
+       u_int32_t newid = 0;
 
-       LIST_FOREACH(searchentry, APPUUIDHASH(uuid), chain) {
-               if (uuid_compare(searchentry->uuid, uuid) == 0) {
+       lck_rw_assert(&necp_route_rule_lock, LCK_RW_ASSERT_EXCLUSIVE);
+
+       necp_last_aggregate_route_rule_id++;
+       if (necp_last_aggregate_route_rule_id <= UINT16_MAX) {
+               necp_last_aggregate_route_rule_id = UINT16_MAX + 1;
+       }
+
+       newid = necp_last_aggregate_route_rule_id;
+       if (newid == 0) {
+               NECPLOG0(LOG_DEBUG, "Allocate aggregate route rule id failed.\n");
+               return (0);
+       }
+
+       return (newid);
+}
+
+static struct necp_route_rule *
+necp_lookup_route_rule_locked(struct necp_route_rule_list *list, u_int32_t route_rule_id)
+{
+       struct necp_route_rule *searchentry = NULL;
+       struct necp_route_rule *foundentry = NULL;
+
+       LIST_FOREACH(searchentry, list, chain) {
+               if (searchentry->id == route_rule_id) {
                        foundentry = searchentry;
                        break;
                }
@@ -3031,21 +3515,310 @@ necp_uuid_lookup_app_id_locked(uuid_t uuid)
        return (foundentry);
 }
 
+static struct necp_route_rule *
+necp_lookup_route_rule_by_contents_locked(struct necp_route_rule_list *list, u_int32_t default_action, u_int8_t cellular_action, u_int8_t wifi_action, u_int8_t wired_action, u_int8_t expensive_action, u_int32_t *if_indices, u_int8_t *if_actions)
+{
+       struct necp_route_rule *searchentry = NULL;
+       struct necp_route_rule *foundentry = NULL;
+
+       LIST_FOREACH(searchentry, list, chain) {
+               if (searchentry->default_action == default_action &&
+                       searchentry->cellular_action == cellular_action &&
+                       searchentry->wifi_action == wifi_action &&
+                       searchentry->wired_action == wired_action &&
+                       searchentry->expensive_action == expensive_action) {
+                       bool match_failed = FALSE;
+                       size_t index_a = 0;
+                       size_t index_b = 0;
+                       size_t count_a = 0;
+                       size_t count_b = 0;
+                       for (index_a = 0; index_a < MAX_ROUTE_RULE_INTERFACES; index_a++) {
+                               bool found_index = FALSE;
+                               if (searchentry->exception_if_indices[index_a] == 0) {
+                                       break;
+                               }
+                               count_a++;
+                               for (index_b = 0; index_b < MAX_ROUTE_RULE_INTERFACES; index_b++) {
+                                       if (if_indices[index_b] == 0) {
+                                               break;
+                                       }
+                                       if (index_b >= count_b) {
+                                               count_b = index_b + 1;
+                                       }
+                                       if (searchentry->exception_if_indices[index_a] == if_indices[index_b] &&
+                                               searchentry->exception_if_actions[index_a] == if_actions[index_b]) {
+                                               found_index = TRUE;
+                                               break;
+                                       }
+                               }
+                               if (!found_index) {
+                                       match_failed = TRUE;
+                                       break;
+                               }
+                       }
+                       if (!match_failed && count_a == count_b) {
+                               foundentry = searchentry;
+                               break;
+                       }
+               }
+       }
+
+       return (foundentry);
+}
+
 static u_int32_t
-necp_create_uuid_app_id_mapping(uuid_t uuid, bool *allocated_mapping, bool uuid_policy_table)
+necp_create_route_rule(struct necp_route_rule_list *list, u_int8_t *route_rules_array, u_int32_t route_rules_array_size)
 {
-       u_int32_t local_id = 0;
-       struct necp_uuid_id_mapping *existing_mapping = NULL;
+       size_t offset = 0;
+       u_int32_t route_rule_id = 0;
+       struct necp_route_rule *existing_rule = NULL;
+       u_int32_t default_action = NECP_ROUTE_RULE_ALLOW_INTERFACE;
+       u_int8_t cellular_action = NECP_ROUTE_RULE_NONE;
+       u_int8_t wifi_action = NECP_ROUTE_RULE_NONE;
+       u_int8_t wired_action = NECP_ROUTE_RULE_NONE;
+       u_int8_t expensive_action = NECP_ROUTE_RULE_NONE;
+       u_int32_t if_indices[MAX_ROUTE_RULE_INTERFACES];
+       size_t num_valid_indices = 0;
+       memset(&if_indices, 0, sizeof(if_indices));
+       u_int8_t if_actions[MAX_ROUTE_RULE_INTERFACES];
+       memset(&if_actions, 0, sizeof(if_actions));
 
        lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
 
-       if (allocated_mapping) {
-               *allocated_mapping = FALSE;
+       if (route_rules_array == NULL || route_rules_array_size == 0) {
+               return (0);
        }
 
-       existing_mapping = necp_uuid_lookup_app_id_locked(uuid);
-       if (existing_mapping != NULL) {
-               local_id = existing_mapping->id;
+       // Process rules
+       while (offset < route_rules_array_size) {
+               ifnet_t rule_interface = NULL;
+               char interface_name[IFXNAMSIZ];
+               u_int32_t length = 0;
+               u_int8_t *value = necp_buffer_get_tlv_value(route_rules_array, offset, &length);
+
+               u_int8_t rule_type = necp_policy_condition_get_type_from_buffer(value, length);
+               u_int8_t rule_flags = necp_policy_condition_get_flags_from_buffer(value, length);
+               u_int32_t rule_length = necp_policy_condition_get_value_length_from_buffer(value, length);
+               u_int8_t *rule_value = necp_policy_condition_get_value_pointer_from_buffer(value, length);
+
+               if (rule_type == NECP_ROUTE_RULE_NONE) {
+                       // Don't allow an explicit rule to be None action
+                       continue;
+               }
+
+               if (rule_length == 0) {
+                       if (rule_flags & NECP_ROUTE_RULE_FLAG_CELLULAR) {
+                               cellular_action = rule_type;
+                       }
+                       if (rule_flags & NECP_ROUTE_RULE_FLAG_WIFI) {
+                               wifi_action = rule_type;
+                       }
+                       if (rule_flags & NECP_ROUTE_RULE_FLAG_WIRED) {
+                               wired_action = rule_type;
+                       }
+                       if (rule_flags & NECP_ROUTE_RULE_FLAG_EXPENSIVE) {
+                               expensive_action = rule_type;
+                       }
+                       if (rule_flags == 0) {
+                               default_action = rule_type;
+                       }
+                       offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
+                       continue;
+               }
+
+               if (num_valid_indices >= MAX_ROUTE_RULE_INTERFACES) {
+                       offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
+                       continue;
+               }
+
+               memcpy(interface_name, rule_value, rule_length);
+               interface_name[length - 1] = 0; // Make sure the string is NULL terminated
+               if (ifnet_find_by_name(interface_name, &rule_interface) == 0) {
+                       if_actions[num_valid_indices] = rule_type;
+                       if_indices[num_valid_indices++] = rule_interface->if_index;
+               }
+
+               offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
+       }
+
+       existing_rule = necp_lookup_route_rule_by_contents_locked(list, default_action, cellular_action, wifi_action, wired_action, expensive_action, if_indices, if_actions);
+       if (existing_rule != NULL) {
+               route_rule_id = existing_rule->id;
+               existing_rule->refcount++;
+       } else {
+               struct necp_route_rule *new_rule = NULL;
+               MALLOC(new_rule, struct necp_route_rule *, sizeof(struct necp_route_rule), M_NECP, M_WAITOK);
+               if (new_rule != NULL) {
+                       memset(new_rule, 0, sizeof(struct necp_route_rule));
+                       route_rule_id = new_rule->id = necp_get_new_route_rule_id();
+                       new_rule->default_action = default_action;
+                       new_rule->cellular_action = cellular_action;
+                       new_rule->wifi_action = wifi_action;
+                       new_rule->wired_action = wired_action;
+                       new_rule->expensive_action = expensive_action;
+                       memcpy(&new_rule->exception_if_indices, &if_indices, sizeof(if_indices));
+                       memcpy(&new_rule->exception_if_actions, &if_actions, sizeof(if_actions));
+                       new_rule->refcount = 1;
+                       LIST_INSERT_HEAD(list, new_rule, chain);
+               }
+       }
+       return (route_rule_id);
+}
+
+static void
+necp_remove_aggregate_route_rule_for_id(u_int32_t rule_id)
+{
+       if (rule_id) {
+               lck_rw_lock_exclusive(&necp_route_rule_lock);
+
+               struct necp_aggregate_route_rule *existing_rule = NULL;
+               struct necp_aggregate_route_rule *tmp_rule = NULL;
+
+               LIST_FOREACH_SAFE(existing_rule, &necp_aggregate_route_rules, chain, tmp_rule) {
+                       int index = 0;
+                       for (index = 0; index < MAX_AGGREGATE_ROUTE_RULES; index++) {
+                               u_int32_t route_rule_id = existing_rule->rule_ids[index];
+                               if (route_rule_id == rule_id) {
+                                       LIST_REMOVE(existing_rule, chain);
+                                       FREE(existing_rule, M_NECP);
+                                       break;
+                               }
+                       }
+               }
+
+               lck_rw_done(&necp_route_rule_lock);
+       }
+}
+
+static bool
+necp_remove_route_rule(struct necp_route_rule_list *list, u_int32_t route_rule_id)
+{
+       struct necp_route_rule *existing_rule = NULL;
+
+       lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
+
+       existing_rule = necp_lookup_route_rule_locked(list, route_rule_id);
+       if (existing_rule != NULL) {
+               if (--existing_rule->refcount == 0) {
+                       necp_remove_aggregate_route_rule_for_id(existing_rule->id);
+                       LIST_REMOVE(existing_rule, chain);
+                       FREE(existing_rule, M_NECP);
+               }
+               return (TRUE);
+       }
+
+       return (FALSE);
+}
+
+static struct necp_aggregate_route_rule *
+necp_lookup_aggregate_route_rule_locked(u_int32_t route_rule_id)
+{
+       struct necp_aggregate_route_rule *searchentry = NULL;
+       struct necp_aggregate_route_rule *foundentry = NULL;
+
+       lck_rw_lock_shared(&necp_route_rule_lock);
+
+       LIST_FOREACH(searchentry, &necp_aggregate_route_rules, chain) {
+               if (searchentry->id == route_rule_id) {
+                       foundentry = searchentry;
+                       break;
+               }
+       }
+
+       lck_rw_done(&necp_route_rule_lock);
+
+       return (foundentry);
+}
+
+static u_int32_t
+necp_create_aggregate_route_rule(u_int32_t *rule_ids)
+{
+       u_int32_t aggregate_route_rule_id = 0;
+       struct necp_aggregate_route_rule *new_rule = NULL;
+       struct necp_aggregate_route_rule *existing_rule = NULL;
+
+       LIST_FOREACH(existing_rule, &necp_aggregate_route_rules, chain) {
+               if (memcmp(existing_rule->rule_ids, rule_ids, (sizeof(u_int32_t) * MAX_AGGREGATE_ROUTE_RULES)) == 0) {
+                       return (existing_rule->id);
+               }
+       }
+
+       lck_rw_lock_exclusive(&necp_route_rule_lock);
+
+       LIST_FOREACH(existing_rule, &necp_aggregate_route_rules, chain) {
+               // Re-check, in case something else created the rule while we are waiting to lock
+               if (memcmp(existing_rule->rule_ids, rule_ids, (sizeof(u_int32_t) * MAX_AGGREGATE_ROUTE_RULES)) == 0) {
+                       lck_rw_done(&necp_route_rule_lock);
+                       return (existing_rule->id);
+               }
+       }
+
+       MALLOC(new_rule, struct necp_aggregate_route_rule *, sizeof(struct necp_aggregate_route_rule), M_NECP, M_WAITOK);
+       if (new_rule != NULL) {
+               memset(new_rule, 0, sizeof(struct necp_aggregate_route_rule));
+               aggregate_route_rule_id = new_rule->id = necp_get_new_aggregate_route_rule_id();
+               new_rule->id = aggregate_route_rule_id;
+               memcpy(new_rule->rule_ids, rule_ids, (sizeof(u_int32_t) * MAX_AGGREGATE_ROUTE_RULES));
+               LIST_INSERT_HEAD(&necp_aggregate_route_rules, new_rule, chain);
+       }
+       lck_rw_done(&necp_route_rule_lock);
+
+       return (aggregate_route_rule_id);
+}
+
+#define NECP_NULL_SERVICE_ID 1
+static u_int32_t
+necp_get_new_uuid_id(void)
+{
+       u_int32_t newid = 0;
+
+       lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
+
+       necp_last_uuid_id++;
+       if (necp_last_uuid_id < (NECP_NULL_SERVICE_ID + 1)) {
+               necp_last_uuid_id = (NECP_NULL_SERVICE_ID + 1);
+       }
+
+       newid = necp_last_uuid_id;
+       if (newid == 0) {
+               NECPLOG0(LOG_DEBUG, "Allocate uuid id failed.\n");
+               return (0);
+       }
+
+       return (newid);
+}
+
+static struct necp_uuid_id_mapping *
+necp_uuid_lookup_app_id_locked(uuid_t uuid)
+{
+       struct necp_uuid_id_mapping *searchentry = NULL;
+       struct necp_uuid_id_mapping *foundentry = NULL;
+
+       LIST_FOREACH(searchentry, APPUUIDHASH(uuid), chain) {
+               if (uuid_compare(searchentry->uuid, uuid) == 0) {
+                       foundentry = searchentry;
+                       break;
+               }
+       }
+
+       return (foundentry);
+}
+
+static u_int32_t
+necp_create_uuid_app_id_mapping(uuid_t uuid, bool *allocated_mapping, bool uuid_policy_table)
+{
+       u_int32_t local_id = 0;
+       struct necp_uuid_id_mapping *existing_mapping = NULL;
+
+       lck_rw_assert(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
+
+       if (allocated_mapping) {
+               *allocated_mapping = FALSE;
+       }
+
+       existing_mapping = necp_uuid_lookup_app_id_locked(uuid);
+       if (existing_mapping != NULL) {
+               local_id = existing_mapping->id;
                existing_mapping->refcount++;
                if (uuid_policy_table) {
                        existing_mapping->table_refcount++;
@@ -3244,7 +4017,7 @@ necp_kernel_socket_policies_update_uuid_table(void)
 
 #define        NECP_KERNEL_VALID_IP_OUTPUT_CONDITIONS (NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_POLICY_ID | NECP_KERNEL_CONDITION_LAST_INTERFACE)
 static necp_kernel_policy_id
-necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
+necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_order order, necp_policy_order suborder, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_kernel_policy_id cond_policy_id, ifnet_t cond_bound_interface, u_int32_t cond_last_interface_index, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
 {
        struct necp_kernel_ip_output_policy *new_kernel_policy = NULL;
        struct necp_kernel_ip_output_policy *tmp_kernel_policy = NULL;
@@ -3260,6 +4033,7 @@ necp_kernel_ip_output_policy_add(necp_policy_id parent_policy_id, necp_policy_or
        new_kernel_policy->suborder = suborder;
        new_kernel_policy->order = order;
        new_kernel_policy->session_order = session_order;
+       new_kernel_policy->session_pid = session_pid;
 
        // Sanitize condition mask
        new_kernel_policy->condition_mask = (condition_mask & NECP_KERNEL_VALID_IP_OUTPUT_CONDITIONS);
@@ -3365,18 +4139,23 @@ necp_kernel_ip_output_policy_delete(necp_kernel_policy_id policy_id)
 static void
 necp_kernel_ip_output_policies_dump_all(void)
 {
-       struct necp_kernel_ip_output_policy *policy = NULL;
-       int policy_i;
-       int id_i;
-
        if (necp_debug) {
+               struct necp_kernel_ip_output_policy *policy = NULL;
+               int policy_i;
+               int id_i;
+               char result_string[MAX_RESULT_STRING_LEN];
+               char proc_name_string[MAXCOMLEN + 1];
+               memset(result_string, 0, MAX_RESULT_STRING_LEN);
+               memset(proc_name_string, 0, MAXCOMLEN + 1);
+
                NECPLOG0(LOG_DEBUG, "NECP IP Output Policies:\n");
                NECPLOG0(LOG_DEBUG, "-----------\n");
                for (id_i = 0; id_i < NECP_KERNEL_IP_OUTPUT_POLICIES_MAP_NUM_ID_BUCKETS; id_i++) {
                        NECPLOG(LOG_DEBUG, " ID Bucket: %d\n", id_i);
                        for (policy_i = 0; necp_kernel_ip_output_policies_map[id_i] != NULL && (necp_kernel_ip_output_policies_map[id_i])[policy_i] != NULL; policy_i++) {
                                policy = (necp_kernel_ip_output_policies_map[id_i])[policy_i];
-                               NECPLOG(LOG_DEBUG, "\t%d. Policy ID: %d, Order: %d.%d.%d, Mask: %x, Result: %d, Parameter: %d\n", policy_i, policy->id, policy->session_order, policy->order, policy->suborder, policy->condition_mask, policy->result, policy->result_parameter);
+                               proc_name(policy->session_pid, proc_name_string, MAXCOMLEN);
+                               NECPLOG(LOG_DEBUG, "\t%3d. Policy ID: %5d\tProcess: %10.10s\tOrder: %04d.%04d.%d\tMask: %5x\tResult: %s\n", policy_i, policy->id, proc_name_string, policy->session_order, policy->order, policy->suborder, policy->condition_mask, necp_get_result_description(result_string, policy->result, policy->result_parameter));
                        }
                        NECPLOG0(LOG_DEBUG, "-----------\n");
                }
@@ -3697,7 +4476,7 @@ necp_hostname_matches_domain(struct substring hostname_substring, u_int8_t hostn
                        memcmp(hostname_substring.string, domain_substring.string, hostname_substring.length) == 0) {
                        return (TRUE);
                }
-       } else if (domain_dot_count > 0 && domain_dot_count < hostname_dot_count) {
+       } else if (domain_dot_count < hostname_dot_count) {
                if (necp_check_suffix(hostname_substring, domain_substring, TRUE)) {
                        return (TRUE);
                }
@@ -3706,8 +4485,9 @@ necp_hostname_matches_domain(struct substring hostname_substring, u_int8_t hostn
        return (FALSE);
 }
 
+#define        NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX)
 static void
-necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, struct necp_socket_info *info)
+necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, struct necp_socket_info *info)
 {
        memset(info, 0, sizeof(struct necp_socket_info));
 
@@ -3746,10 +4526,32 @@ necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_applic
        if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_DOMAIN) {
                info->domain = domain;
        }
+
+       if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_ADDRESS_TYPE_CONDITIONS) {
+               if (local_addr && local_addr->sa.sa_len > 0) {
+                       memcpy(&info->local_addr, local_addr, local_addr->sa.sa_len);
+               }
+               if (remote_addr && remote_addr->sa.sa_len > 0) {
+                       memcpy(&info->remote_addr, remote_addr, remote_addr->sa.sa_len);
+               }
+       }
+}
+
+static void
+necp_send_application_cell_denied_event(pid_t pid, uuid_t proc_uuid)
+{
+       struct kev_netpolicy_ifdenied ev_ifdenied;
+
+       bzero(&ev_ifdenied, sizeof(ev_ifdenied));
+
+       ev_ifdenied.ev_data.epid = pid;
+       uuid_copy(ev_ifdenied.ev_data.euuid, proc_uuid);
+
+       netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data, sizeof(ev_ifdenied));
 }
 
 static int
-necp_application_find_policy_match_internal(u_int8_t *parameters, size_t parameters_size, struct necp_aggregate_result *returned_result)
+necp_application_find_policy_match_internal(u_int8_t *parameters, u_int32_t parameters_size, struct necp_aggregate_result *returned_result)
 {
        int error = 0;
        size_t offset = 0;
@@ -3757,6 +4559,7 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
        struct necp_kernel_socket_policy *matched_policy = NULL;
        struct necp_socket_info info;
        necp_kernel_policy_filter filter_control_unit = 0;
+       u_int32_t route_rule_id = 0;
        necp_kernel_policy_result service_action = 0;
        necp_kernel_policy_service service = { 0, 0 };
 
@@ -3765,7 +4568,12 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
        u_int16_t protocol = 0;
        u_int32_t bound_interface_index = 0;
        u_int32_t traffic_class = 0;
+       union necp_sockaddr_union local_addr;
+       union necp_sockaddr_union remote_addr;
+       bool no_remote_addr = FALSE;
 
+       memset(&local_addr, 0, sizeof(local_addr));
+       memset(&remote_addr, 0, sizeof(remote_addr));
        uuid_t application_uuid;
        uuid_clear(application_uuid);
        uuid_t real_application_uuid;
@@ -3773,6 +4581,10 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
        char *domain = NULL;
        char *account = NULL;
 
+       u_int32_t netagent_ids[NECP_MAX_NETAGENTS];
+       memset(&netagent_ids, 0, sizeof(netagent_ids));
+       int netagent_cursor;
+
        if (returned_result == NULL) {
                return (EINVAL);
        }
@@ -3789,11 +4601,11 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
        }
        lck_rw_done(&necp_kernel_policy_lock);
 
-       while (offset < parameters_size) {
+       while ((offset + sizeof(u_int8_t) + sizeof(u_int32_t)) <= parameters_size) {
                u_int8_t type = necp_buffer_get_tlv_type(parameters, offset);
-               size_t length = necp_buffer_get_tlv_length(parameters, offset);
+               u_int32_t length = necp_buffer_get_tlv_length(parameters, offset);
 
-               if (length > 0 && (offset + sizeof(u_int8_t) + sizeof(size_t) + length) <= parameters_size) {
+               if (length > 0 && (offset + sizeof(u_int8_t) + sizeof(u_int32_t) + length) <= parameters_size) {
                        u_int8_t *value = necp_buffer_get_tlv_value(parameters, offset, NULL);
                        if (value != NULL) {
                                switch (type) {
@@ -3855,6 +4667,20 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
                                                }
                                                break;
                                        }
+                                       case NECP_POLICY_CONDITION_LOCAL_ADDR: {
+                                               if (length >= sizeof(struct necp_policy_condition_addr)) {
+                                                       struct necp_policy_condition_addr *address_struct = (struct necp_policy_condition_addr *)(void *)value;
+                                                       memcpy(&local_addr, &address_struct->address, sizeof(address_struct->address));
+                                               }
+                                               break;
+                                       }
+                                       case NECP_POLICY_CONDITION_REMOTE_ADDR: {
+                                               if (length >= sizeof(struct necp_policy_condition_addr)) {
+                                                       struct necp_policy_condition_addr *address_struct = (struct necp_policy_condition_addr *)(void *)value;
+                                                       memcpy(&remote_addr, &address_struct->address, sizeof(address_struct->address));
+                                               }
+                                               break;
+                                       }
                                        default: {
                                                break;
                                        }
@@ -3862,23 +4688,26 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
                        }
                }
 
-               offset += sizeof(u_int8_t) + sizeof(size_t) + length;
+               offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
        }
 
        // Lock
        lck_rw_lock_shared(&necp_kernel_policy_lock);
 
-       necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &info);
-       matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, &service_action, &service);
+       necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, &info);
+       matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, &route_rule_id, &service_action, &service, netagent_ids, NECP_MAX_NETAGENTS);
        if (matched_policy) {
+               returned_result->policy_id = matched_policy->id;
                returned_result->routing_result = matched_policy->result;
                memcpy(&returned_result->routing_result_parameter, &matched_policy->result_parameter, sizeof(returned_result->routing_result_parameter));
        } else {
+               returned_result->policy_id = 0;
                returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_NONE;
        }
        returned_result->filter_control_unit = filter_control_unit;
        returned_result->service_action = service_action;
 
+       // Handle trigger service
        if (service.identifier != 0) {
                struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(service.identifier);
                if (mapping != NULL) {
@@ -3899,6 +4728,103 @@ necp_application_find_policy_match_internal(u_int8_t *parameters, size_t paramet
                }
        }
 
+       // Handle netagents
+       for (netagent_cursor = 0; netagent_cursor < NECP_MAX_NETAGENTS; netagent_cursor++) {
+               struct necp_uuid_id_mapping *mapping = NULL;
+               u_int32_t netagent_id = netagent_ids[netagent_cursor];
+               if (netagent_id == 0) {
+                       break;
+               }
+               mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id);
+               if (mapping != NULL) {
+                       uuid_copy(returned_result->netagents[netagent_cursor], mapping->uuid);
+                       returned_result->netagent_flags[netagent_cursor] = netagent_get_flags(mapping->uuid);
+               }
+       }
+
+       // Do routing evaluation
+       u_int output_bound_interface = bound_interface_index;
+       if (returned_result->routing_result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) {
+               output_bound_interface = returned_result->routing_result_parameter.scoped_interface_index;
+       } else if (returned_result->routing_result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL) {
+               output_bound_interface = returned_result->routing_result_parameter.tunnel_interface_index;
+       }
+
+       if (remote_addr.sa.sa_len == 0) {
+               no_remote_addr = TRUE;
+               // Default to 0.0.0.0:0
+               remote_addr.sa.sa_family = AF_INET;
+               remote_addr.sa.sa_len = sizeof(struct sockaddr_in);
+       }
+
+       struct rtentry *rt = NULL;
+       rt = rtalloc1_scoped((struct sockaddr *)&remote_addr, 0, 0, output_bound_interface);
+
+       if (no_remote_addr &&
+               (rt == NULL || rt->rt_ifp == NULL)) {
+               // Route lookup for default IPv4 failed, try IPv6
+
+               // Cleanup old route if necessary
+               if (rt != NULL) {
+                       rtfree(rt);
+                       rt = NULL;
+               }
+
+               // Reset address to ::
+               memset(&remote_addr, 0, sizeof(remote_addr));
+               remote_addr.sa.sa_family = AF_INET6;
+               remote_addr.sa.sa_len = sizeof(struct sockaddr_in6);
+
+               // Get route
+               rt = rtalloc1_scoped((struct sockaddr *)&remote_addr, 0, 0, output_bound_interface);
+       }
+
+       returned_result->routed_interface_index = 0;
+       if (rt != NULL &&
+           rt->rt_ifp != NULL) {
+               returned_result->routed_interface_index = rt->rt_ifp->if_index;
+               /*
+                * For local addresses, we allow the interface scope to be
+                * either the loopback interface or the interface hosting the
+                * local address.
+                */
+               if (bound_interface_index != IFSCOPE_NONE &&
+                   rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp &&
+                   (output_bound_interface == lo_ifp->if_index ||
+                   rt->rt_ifp->if_index == lo_ifp->if_index ||
+                   rt->rt_ifa->ifa_ifp->if_index == bound_interface_index)) {
+                       struct sockaddr_storage dst;
+                       unsigned int ifscope = bound_interface_index;
+
+                       /*
+                        * Transform dst into the internal routing table form
+                        */
+                       (void) sa_copy((struct sockaddr *)&remote_addr,
+                                       &dst, &ifscope);
+
+                       if ((rt->rt_ifp->if_index == lo_ifp->if_index) ||
+                           rt_ifa_is_dst((struct sockaddr *)&dst, rt->rt_ifa))
+                               returned_result->routed_interface_index =
+                                       bound_interface_index;
+               }
+       }
+
+       bool cellular_denied = FALSE;
+       bool route_is_allowed = necp_route_is_allowed(rt, NULL, route_rule_id, &cellular_denied);
+       if (!route_is_allowed) {
+               // If the route is blocked, treat the lookup as a drop
+               returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_DROP;
+               memset(&returned_result->routing_result_parameter, 0, sizeof(returned_result->routing_result_parameter));
+
+               if (cellular_denied) {
+                       necp_send_application_cell_denied_event(pid, application_uuid);
+               }
+       }
+
+       if (rt != NULL) {
+               rtfree(rt);
+               rt = NULL;
+       }
        // Unlock
        lck_rw_done(&necp_kernel_policy_lock);
 
@@ -4159,7 +5085,6 @@ necp_socket_calc_flowhash_locked(struct necp_socket_info *info)
        return (net_flowhash(info, sizeof(*info), necp_kernel_socket_policies_gencount));
 }
 
-#define        NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX)
 static void
 necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface, struct necp_socket_info *info)
 {
@@ -4270,13 +5195,16 @@ necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_loc
 }
 
 static inline struct necp_kernel_socket_policy *
-necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service)
+necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy **policy_search_array, struct necp_socket_info *info, necp_kernel_policy_filter *return_filter, u_int32_t *return_route_rule_id, necp_kernel_policy_result *return_service_action, necp_kernel_policy_service *return_service, u_int32_t *return_netagent_array, size_t netagent_array_count)
 {
        struct necp_kernel_socket_policy *matched_policy = NULL;
        u_int32_t skip_order = 0;
        u_int32_t skip_session_order = 0;
+       u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES];
+       size_t route_rule_id_count = 0;
        int i;
-       
+       size_t netagent_cursor = 0;
+
        // Pre-process domain for quick matching
        struct substring domain_substring = necp_trim_dots_and_stars(info->domain, info->domain ? strlen(info->domain) : 0);
        u_int8_t domain_dot_count = necp_count_dots(domain_substring.string, domain_substring.length);
@@ -4285,6 +5213,10 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy
                *return_filter = 0;
        }
 
+       if (return_route_rule_id) {
+               *return_route_rule_id = 0;
+       }
+
        if (return_service_action) {
                *return_service_action = 0;
        }
@@ -4327,7 +5259,15 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy
                                                }
                                        }
                                        continue;
-                               } else if (necp_kernel_socket_result_is_service_type(policy_search_array[i])) {
+                               } else if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_ROUTE_RULES) {
+                                       if (return_route_rule_id && route_rule_id_count < MAX_AGGREGATE_ROUTE_RULES) {
+                                               route_rule_id_array[route_rule_id_count++] = policy_search_array[i]->result_parameter.route_rule_id;
+                                               if (necp_debug > 1) {
+                                                       NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Route Rule %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result_parameter.route_rule_id);
+                                               }
+                                       }
+                                       continue;
+                               } else if (necp_kernel_socket_result_is_trigger_service_type(policy_search_array[i])) {
                                        if (return_service_action && *return_service_action == 0) {
                                                *return_service_action = policy_search_array[i]->result;
                                                if (necp_debug > 1) {
@@ -4342,6 +5282,16 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy
                                                }
                                        }
                                        continue;
+                               } else if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_USE_NETAGENT) {
+                                       if (return_netagent_array != NULL &&
+                                               netagent_cursor < netagent_array_count) {
+                                               return_netagent_array[netagent_cursor] = policy_search_array[i]->result_parameter.netagent_id;
+                                               netagent_cursor++;
+                                               if (necp_debug > 1) {
+                                                       NECPLOG(LOG_DEBUG, "Socket Policy: (Application %d Real Application %d BoundInterface %d Proto %d) Use Netagent %d", info->application_id, info->real_application_id, info->bound_interface_index, info->protocol, policy_search_array[i]->result_parameter.netagent_id);
+                                               }
+                                       }
+                                       continue;
                                }
 
                                // Passed all tests, found a match
@@ -4356,6 +5306,11 @@ necp_socket_find_policy_match_with_info_locked(struct necp_kernel_socket_policy
                }
        }
 
+       if (route_rule_id_count == 1) {
+               *return_route_rule_id = route_rule_id_array[0];
+       } else if (route_rule_id_count > 1) {
+               *return_route_rule_id = necp_create_aggregate_route_rule(route_rule_id_array);
+       }
        return (matched_policy);
 }
 
@@ -4419,11 +5374,16 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
 {
        struct socket *so = NULL;
        necp_kernel_policy_filter filter_control_unit = 0;
+       u_int32_t route_rule_id = 0;
        struct necp_kernel_socket_policy *matched_policy = NULL;
        necp_kernel_policy_id matched_policy_id = NECP_KERNEL_POLICY_ID_NONE;
        necp_kernel_policy_result service_action = 0;
        necp_kernel_policy_service service = { 0, 0 };
 
+       u_int32_t netagent_ids[NECP_MAX_NETAGENTS];
+       memset(&netagent_ids, 0, sizeof(netagent_ids));
+       int netagent_cursor;
+
        struct necp_socket_info info;
 
        if (inp == NULL) {
@@ -4440,6 +5400,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                        inp->inp_policyresult.policy_gencount = 0;
                        inp->inp_policyresult.flowhash = 0;
                        inp->inp_policyresult.results.filter_control_unit = 0;
+                       inp->inp_policyresult.results.route_rule_id = 0;
                        if (necp_pass_loopback > 0 &&
                                necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) {
                                inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS;
@@ -4458,6 +5419,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                inp->inp_policyresult.policy_gencount = 0;
                inp->inp_policyresult.flowhash = 0;
                inp->inp_policyresult.results.filter_control_unit = 0;
+               inp->inp_policyresult.results.route_rule_id = 0;
                inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS;
                return (NECP_KERNEL_POLICY_ID_NONE);
        }
@@ -4481,7 +5443,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
        }
 
        // Match socket to policy
-       matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, &service_action, &service);
+       matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, &route_rule_id, &service_action, &service, netagent_ids, NECP_MAX_NETAGENTS);
        // If the socket matched a scoped service policy, mark as Drop if not registered.
        // This covers the cases in which a service is required (on demand) but hasn't started yet.
        if ((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED ||
@@ -4502,23 +5464,67 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                        inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount;
                        inp->inp_policyresult.flowhash = flowhash;
                        inp->inp_policyresult.results.filter_control_unit = 0;
+                       inp->inp_policyresult.results.route_rule_id = 0;
                        inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP;
-                       
+
                        if (necp_debug > 1) {
                                NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Dropping packet because service is not registered", info.bound_interface_index, info.protocol);
                        }
-                       
+
                        // Unlock
                        lck_rw_done(&necp_kernel_policy_lock);
                        return (NECP_KERNEL_POLICY_ID_NONE);
                }
        }
+       // Verify netagents
+       for (netagent_cursor = 0; netagent_cursor < NECP_MAX_NETAGENTS; netagent_cursor++) {
+               struct necp_uuid_id_mapping *mapping = NULL;
+               u_int32_t netagent_id = netagent_ids[netagent_cursor];
+               if (netagent_id == 0) {
+                       break;
+               }
+               mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id);
+               if (mapping != NULL) {
+                       u_int32_t agent_flags = 0;
+                       agent_flags = netagent_get_flags(mapping->uuid);
+                       if (agent_flags & NETAGENT_FLAG_REGISTERED) {
+                               if (agent_flags & NETAGENT_FLAG_ACTIVE) {
+                                       continue;
+                               } else if ((agent_flags & NETAGENT_FLAG_VOLUNTARY) == 0) {
+                                       if (agent_flags & NETAGENT_FLAG_KERNEL_ACTIVATED) {
+                                               int trigger_error = 0;
+                                               trigger_error = netagent_kernel_trigger(mapping->uuid);
+                                               if (necp_debug > 1) {
+                                                       NECPLOG(LOG_DEBUG, "Socket Policy: Triggering inactive agent, error %d", trigger_error);
+                                               }
+                                       }
+
+                                       // Mark socket as a drop if required agent is not active
+                                       inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
+                                       inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount;
+                                       inp->inp_policyresult.flowhash = flowhash;
+                                       inp->inp_policyresult.results.filter_control_unit = 0;
+                                       inp->inp_policyresult.results.route_rule_id = 0;
+                                       inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP;
+
+                                       if (necp_debug > 1) {
+                                               NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Dropping packet because agent is not active", info.bound_interface_index, info.protocol);
+                                       }
+
+                                       // Unlock
+                                       lck_rw_done(&necp_kernel_policy_lock);
+                                       return (NECP_KERNEL_POLICY_ID_NONE);
+                               }
+                       }
+               }
+       }
        if (matched_policy) {
                matched_policy_id = matched_policy->id;
                inp->inp_policyresult.policy_id = matched_policy->id;
                inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount;
                inp->inp_policyresult.flowhash = flowhash;
                inp->inp_policyresult.results.filter_control_unit = filter_control_unit;
+               inp->inp_policyresult.results.route_rule_id = route_rule_id;
                inp->inp_policyresult.results.result = matched_policy->result;
                memcpy(&inp->inp_policyresult.results.result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter));
 
@@ -4529,10 +5535,15 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                                NECPLOG(LOG_DEBUG, "Marking socket in state %d as defunct", so->so_state);
                        }
                        sosetdefunct(current_proc(), so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
+               } else if (necp_socket_is_connected(inp) &&
+                                  matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL &&
+                                  info.protocol == IPPROTO_TCP) {
+                       // Reset MSS on TCP socket if tunnel policy changes
+                       tcp_mtudisc(inp, 0);
                }
 
                if (necp_debug > 1) {
-                       NECPLOG(LOG_DEBUG, "Socket Policy: (BoundInterface %d Proto %d) Policy %d Result %d Parameter %d", info.bound_interface_index, info.protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index);
+                       NECPLOG(LOG_DEBUG, "Socket Policy: %p (BoundInterface %d Proto %d) Policy %d Result %d Parameter %d", inp->inp_socket, info.bound_interface_index, info.protocol, matched_policy->id, matched_policy->result, matched_policy->result_parameter.tunnel_interface_index);
                }
        } else if (necp_drop_all_order > 0) {
                // Mark socket as a drop if set
@@ -4540,6 +5551,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount;
                inp->inp_policyresult.flowhash = flowhash;
                inp->inp_policyresult.results.filter_control_unit = 0;
+               inp->inp_policyresult.results.route_rule_id = 0;
                inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP;
        } else {
                // Mark non-matching socket so we don't re-check it
@@ -4547,6 +5559,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local
                inp->inp_policyresult.policy_gencount = necp_kernel_socket_policies_gencount;
                inp->inp_policyresult.flowhash = flowhash;
                inp->inp_policyresult.results.filter_control_unit = filter_control_unit; // We may have matched a filter, so mark it!
+               inp->inp_policyresult.results.route_rule_id = route_rule_id; // We may have matched a route rule, so mark it!
                inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_NONE;
        }
 
@@ -5257,11 +6270,22 @@ necp_set_socket_attributes(struct socket *so, struct sockopt *sopt)
 {
        int error = 0;
        u_int8_t *buffer = NULL;
-       struct inpcb *inp = sotoinpcb(so);
+       struct inpcb *inp = NULL;
+    
+    if ((SOCK_DOM(so) != PF_INET
+#if INET6
+         && SOCK_DOM(so) != PF_INET6
+#endif
+         )) {
+        error = EINVAL;
+        goto done;
+    }
+    
+       inp = sotoinpcb(so);
 
        size_t valsize = sopt->sopt_valsize;
        if (valsize == 0 ||
-               valsize > ((sizeof(u_int8_t) + sizeof(size_t) + NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH) * 2)) {
+               valsize > ((sizeof(u_int8_t) + sizeof(u_int32_t) + NECP_MAX_SOCKET_ATTRIBUTE_STRING_LENGTH) * 2)) {
                goto done;
        }
 
@@ -5308,10 +6332,10 @@ necp_get_socket_attributes(struct socket *so, struct sockopt *sopt)
        struct inpcb *inp = sotoinpcb(so);
        
        if (inp->inp_necp_attributes.inp_domain != NULL) {
-               valsize += sizeof(u_int8_t) + sizeof(size_t) + strlen(inp->inp_necp_attributes.inp_domain);
+               valsize += sizeof(u_int8_t) + sizeof(u_int32_t) + strlen(inp->inp_necp_attributes.inp_domain);
        }
        if (inp->inp_necp_attributes.inp_account != NULL) {
-               valsize += sizeof(u_int8_t) + sizeof(size_t) + strlen(inp->inp_necp_attributes.inp_account);
+               valsize += sizeof(u_int8_t) + sizeof(u_int32_t) + strlen(inp->inp_necp_attributes.inp_account);
        }
        if (valsize == 0) {
                goto done;
@@ -5344,7 +6368,174 @@ done:
 }
 
 static bool
-necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id)
+necp_route_is_allowed_inner(struct rtentry *route, struct ifnet *ifp, u_int32_t route_rule_id, bool *cellular_denied)
+{
+       bool default_is_allowed = TRUE;
+       u_int8_t type_aggregate_action = NECP_ROUTE_RULE_NONE;
+       int exception_index = 0;
+       struct ifnet *delegated_ifp = NULL;
+       struct necp_route_rule *route_rule = NULL;
+
+       route_rule = necp_lookup_route_rule_locked(&necp_route_rules, route_rule_id);
+       if (route_rule == NULL) {
+               return (TRUE);
+       }
+
+       default_is_allowed = (route_rule->default_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE;
+       if (ifp == NULL) {
+               ifp = route->rt_ifp;
+       }
+       if (ifp == NULL) {
+               if (necp_debug > 1 && !default_is_allowed) {
+                       NECPLOG(LOG_DEBUG, "Route Allowed: No interface for route, using default for Rule %d Allowed %d", route_rule_id, default_is_allowed);
+               }
+               return (default_is_allowed);
+       }
+
+       delegated_ifp = ifp->if_delegated.ifp;
+       for (exception_index = 0; exception_index < MAX_ROUTE_RULE_INTERFACES; exception_index++) {
+               if (route_rule->exception_if_indices[exception_index] == 0) {
+                       break;
+               }
+               if (route_rule->exception_if_indices[exception_index] == ifp->if_index ||
+                       (delegated_ifp != NULL && route_rule->exception_if_indices[exception_index] == delegated_ifp->if_index)) {
+                       if (necp_debug > 1) {
+                               NECPLOG(LOG_DEBUG, "Route Allowed: Interface match %d for Rule %d Allowed %d", route_rule->exception_if_indices[exception_index], route_rule_id, ((route_rule->exception_if_actions[exception_index] == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE));
+                       }
+                       return ((route_rule->exception_if_actions[exception_index] == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE);
+               }
+       }
+
+       if (route_rule->cellular_action != NECP_ROUTE_RULE_NONE &&
+               IFNET_IS_CELLULAR(ifp)) {
+               if (cellular_denied != NULL) {
+                       // Let clients know that cellular was blocked
+                       *cellular_denied = TRUE;
+               }
+               if (type_aggregate_action == NECP_ROUTE_RULE_NONE ||
+                       (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE &&
+                        route_rule->cellular_action == NECP_ROUTE_RULE_DENY_INTERFACE)) {
+                               // Deny wins if there is a conflict
+                               type_aggregate_action = route_rule->cellular_action;
+                       }
+       }
+
+       if (route_rule->wifi_action != NECP_ROUTE_RULE_NONE &&
+               IFNET_IS_WIFI(ifp)) {
+               if (type_aggregate_action == NECP_ROUTE_RULE_NONE ||
+                       (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE &&
+                        route_rule->wifi_action == NECP_ROUTE_RULE_DENY_INTERFACE)) {
+                               // Deny wins if there is a conflict
+                               type_aggregate_action = route_rule->wifi_action;
+                       }
+       }
+
+       if (route_rule->wired_action != NECP_ROUTE_RULE_NONE &&
+               IFNET_IS_WIRED(ifp)) {
+               if (type_aggregate_action == NECP_ROUTE_RULE_NONE ||
+                       (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE &&
+                        route_rule->wired_action == NECP_ROUTE_RULE_DENY_INTERFACE)) {
+                               // Deny wins if there is a conflict
+                               type_aggregate_action = route_rule->wired_action;
+                       }
+       }
+
+       if (route_rule->expensive_action != NECP_ROUTE_RULE_NONE &&
+               IFNET_IS_EXPENSIVE(ifp)) {
+               if (type_aggregate_action == NECP_ROUTE_RULE_NONE ||
+                       (type_aggregate_action == NECP_ROUTE_RULE_ALLOW_INTERFACE &&
+                        route_rule->expensive_action == NECP_ROUTE_RULE_DENY_INTERFACE)) {
+                               // Deny wins if there is a conflict
+                               type_aggregate_action = route_rule->expensive_action;
+                       }
+       }
+
+       if (type_aggregate_action != NECP_ROUTE_RULE_NONE) {
+               if (necp_debug > 1) {
+                       NECPLOG(LOG_DEBUG, "Route Allowed: C:%d WF:%d W:%d E:%d for Rule %d Allowed %d", route_rule->cellular_action, route_rule->wifi_action, route_rule->wired_action, route_rule->expensive_action, route_rule_id, ((type_aggregate_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE));
+               }
+               return ((type_aggregate_action == NECP_ROUTE_RULE_DENY_INTERFACE) ? FALSE : TRUE);
+       }
+
+       if (necp_debug > 1 && !default_is_allowed) {
+               NECPLOG(LOG_DEBUG, "Route Allowed: Using default for Rule %d Allowed %d", route_rule_id, default_is_allowed);
+       }
+       return (default_is_allowed);
+}
+
+static bool
+necp_route_is_allowed(struct rtentry *route, struct ifnet *interface, u_int32_t route_rule_id, bool *cellular_denied)
+{
+       if ((route == NULL && interface == NULL) || route_rule_id == 0) {
+               if (necp_debug > 1) {
+                       NECPLOG(LOG_DEBUG, "Route Allowed: no route or interface, Rule %d Allowed %d", route_rule_id, TRUE);
+               }
+               return (TRUE);
+       }
+
+       if (ROUTE_RULE_IS_AGGREGATE(route_rule_id)) {
+               struct necp_aggregate_route_rule *aggregate_route_rule = necp_lookup_aggregate_route_rule_locked(route_rule_id);
+               if (aggregate_route_rule != NULL) {
+                       int index = 0;
+                       for (index = 0; index < MAX_AGGREGATE_ROUTE_RULES; index++) {
+                               u_int32_t sub_route_rule_id = aggregate_route_rule->rule_ids[index];
+                               if (sub_route_rule_id == 0) {
+                                       break;
+                               }
+                               if (!necp_route_is_allowed_inner(route, interface, sub_route_rule_id, cellular_denied)) {
+                                       return (FALSE);
+                               }
+                       }
+               }
+       } else {
+               return (necp_route_is_allowed_inner(route, interface, route_rule_id, cellular_denied));
+       }
+
+       return (TRUE);
+}
+
+bool
+necp_packet_is_allowed_over_interface(struct mbuf *packet, struct ifnet *interface)
+{
+       bool is_allowed = TRUE;
+       u_int32_t route_rule_id = necp_get_route_rule_id_from_packet(packet);
+       if (route_rule_id != 0 &&
+               interface != NULL) {
+               lck_rw_lock_shared(&necp_kernel_policy_lock);
+               is_allowed = necp_route_is_allowed(NULL, interface, necp_get_route_rule_id_from_packet(packet), NULL);
+               lck_rw_done(&necp_kernel_policy_lock);
+       }
+       return (is_allowed);
+}
+
+static bool
+necp_netagents_allow_traffic(u_int32_t *netagent_ids, size_t netagent_id_count)
+{
+       size_t netagent_cursor;
+       for (netagent_cursor = 0; netagent_cursor < netagent_id_count; netagent_cursor++) {
+               struct necp_uuid_id_mapping *mapping = NULL;
+               u_int32_t netagent_id = netagent_ids[netagent_cursor];
+               if (netagent_id == 0) {
+                       break;
+               }
+               mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id);
+               if (mapping != NULL) {
+                       u_int32_t agent_flags = 0;
+                       agent_flags = netagent_get_flags(mapping->uuid);
+                       if (agent_flags & NETAGENT_FLAG_REGISTERED) {
+                               if (agent_flags & NETAGENT_FLAG_ACTIVE) {
+                                       continue;
+                               } else if ((agent_flags & NETAGENT_FLAG_VOLUNTARY) == 0) {
+                                       return (FALSE);
+                               }
+                       }
+               }
+       }
+       return (TRUE);
+}
+
+static bool
+necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id)
 {
        u_int32_t verifyifindex = interface ? interface->if_index : 0;
        bool allowed_to_receive = TRUE;
@@ -5352,15 +6543,26 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr
        u_int32_t flowhash = 0;
        necp_kernel_policy_result service_action = 0;
        necp_kernel_policy_service service = { 0, 0 };
+       u_int32_t route_rule_id = 0;
+       struct rtentry *route = NULL;
+       bool cellular_denied = FALSE;
+
+       u_int32_t netagent_ids[NECP_MAX_NETAGENTS];
+       memset(&netagent_ids, 0, sizeof(netagent_ids));
 
        if (return_policy_id) {
                *return_policy_id = NECP_KERNEL_POLICY_ID_NONE;
        }
+       if (return_route_rule_id) {
+               *return_route_rule_id = 0;
+       }
 
        if (inp == NULL) {
                goto done;
        }
 
+       route = inp->inp_route.ro_rt;
+
        // Don't lock. Possible race condition, but we don't want the performance hit.
        if (necp_kernel_socket_policies_count == 0 ||
                (!(inp->inp_flags2 & INP2_WANT_APP_POLICY) && necp_kernel_socket_policies_non_app_count == 0)) {
@@ -5378,25 +6580,37 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr
        // If this socket is connected, or we are not taking addresses into account, try to reuse last result
        if ((necp_socket_is_connected(inp) || (override_local_addr == NULL && override_remote_addr == NULL)) && inp->inp_policyresult.policy_id != NECP_KERNEL_POLICY_ID_NONE) {
                bool policies_have_changed = FALSE;
+               bool route_allowed = TRUE;
                lck_rw_lock_shared(&necp_kernel_policy_lock);
                if (inp->inp_policyresult.policy_gencount != necp_kernel_socket_policies_gencount) {
                        policies_have_changed = TRUE;
+               } else {
+                       if (inp->inp_policyresult.results.route_rule_id != 0 &&
+                               !necp_route_is_allowed(route, interface, inp->inp_policyresult.results.route_rule_id, &cellular_denied)) {
+                               route_allowed = FALSE;
+                       }
                }
                lck_rw_done(&necp_kernel_policy_lock);
 
                if (!policies_have_changed) {
-                       if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP ||
+                       if (!route_allowed ||
+                               inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP ||
                                inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT ||
                                (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface &&
                                inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) {
-                               allowed_to_receive = FALSE;
-                       } else if (return_policy_id) {
-                               *return_policy_id = inp->inp_policyresult.policy_id;
-                       }
+                                       allowed_to_receive = FALSE;
+                               } else {
+                                       if (return_policy_id) {
+                                               *return_policy_id = inp->inp_policyresult.policy_id;
+                                       }
+                                       if (return_route_rule_id) {
+                                               *return_route_rule_id = inp->inp_policyresult.results.route_rule_id;
+                                       }
+                               }
                        goto done;
                }
        }
-       
+
        // Check for loopback exception
        if (necp_pass_loopback > 0 &&
                necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) {
@@ -5415,16 +6629,23 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr
                if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP ||
                        inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT ||
                        (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface &&
-                       inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) {
+                       inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex) ||
+                       (inp->inp_policyresult.results.route_rule_id != 0 &&
+                        !necp_route_is_allowed(route, interface, inp->inp_policyresult.results.route_rule_id, &cellular_denied))) {
                        allowed_to_receive = FALSE;
-               } else if (return_policy_id) {
-                       *return_policy_id = inp->inp_policyresult.policy_id;
+               } else {
+                       if (return_policy_id) {
+                               *return_policy_id = inp->inp_policyresult.policy_id;
+                       }
+                       if (return_route_rule_id) {
+                               *return_route_rule_id = inp->inp_policyresult.results.route_rule_id;
+                       }
                }
                lck_rw_done(&necp_kernel_policy_lock);
                goto done;
        }
 
-       struct necp_kernel_socket_policy *matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, NULL, &service_action, &service);
+       struct necp_kernel_socket_policy *matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, NULL, &route_rule_id, &service_action, &service, netagent_ids, NECP_MAX_NETAGENTS);
        if (matched_policy != NULL) {
                if (matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP ||
                        matched_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT ||
@@ -5432,10 +6653,18 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr
                        matched_policy->result_parameter.tunnel_interface_index != verifyifindex) ||
                        ((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED ||
                          service_action == NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED) &&
-                        service.identifier != 0 && service.identifier != NECP_NULL_SERVICE_ID)) {
+                        service.identifier != 0 && service.identifier != NECP_NULL_SERVICE_ID) ||
+                       (route_rule_id != 0 &&
+                        !necp_route_is_allowed(route, interface, route_rule_id, &cellular_denied)) ||
+                       !necp_netagents_allow_traffic(netagent_ids, NECP_MAX_NETAGENTS)) {
                        allowed_to_receive = FALSE;
-               } else if (return_policy_id) {
-                       *return_policy_id = matched_policy->id;
+               } else {
+                       if (return_policy_id) {
+                               *return_policy_id = matched_policy->id;
+                       }
+                       if (return_route_rule_id) {
+                               *return_route_rule_id = route_rule_id;
+                       }
                }
                lck_rw_done(&necp_kernel_policy_lock);
 
@@ -5450,11 +6679,15 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr
        lck_rw_done(&necp_kernel_policy_lock);
 
 done:
+       if (!allowed_to_receive && cellular_denied) {
+               soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED));
+       }
+
        return (allowed_to_receive);
 }
 
 bool
-necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id)
+necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id)
 {
        struct sockaddr_in local;
        struct sockaddr_in remote;
@@ -5465,11 +6698,11 @@ necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port,
        memcpy(&local.sin_addr, local_addr, sizeof(local.sin_addr));
        memcpy(&remote.sin_addr, remote_addr, sizeof(remote.sin_addr));
 
-       return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id));
+       return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id, return_route_rule_id));
 }
 
 bool
-necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id)
+necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id)
 {
        struct sockaddr_in6 local;
        struct sockaddr_in6 remote;
@@ -5480,17 +6713,17 @@ necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port,
        memcpy(&local.sin6_addr, local_addr, sizeof(local.sin6_addr));
        memcpy(&remote.sin6_addr, remote_addr, sizeof(remote.sin6_addr));
 
-       return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id));
+       return (necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface, return_policy_id, return_route_rule_id));
 }
 
 bool
-necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id)
+necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id)
 {
-       return (necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, NULL, return_policy_id));
+       return (necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, NULL, return_policy_id, return_route_rule_id));
 }
 
 int
-necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id)
+necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id, u_int32_t route_rule_id)
 {
        if (packet == NULL || inp == NULL) {
                return (EINVAL);
@@ -5506,6 +6739,11 @@ necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel
                packet->m_pkthdr.necp_mtag.necp_policy_id = NECP_KERNEL_POLICY_ID_NONE;
        }
        packet->m_pkthdr.necp_mtag.necp_last_interface_index = 0;
+       if (route_rule_id != 0) {
+               packet->m_pkthdr.necp_mtag.necp_route_rule_id = route_rule_id;
+       } else {
+               packet->m_pkthdr.necp_mtag.necp_route_rule_id = inp->inp_policyresult.results.route_rule_id;
+       }
 
        return (0);
 }
@@ -5574,10 +6812,20 @@ necp_get_last_interface_index_from_packet(struct mbuf *packet)
        if (packet == NULL) {
                return (0);
        }
-       
+
        return (packet->m_pkthdr.necp_mtag.necp_last_interface_index);
 }
 
+u_int32_t
+necp_get_route_rule_id_from_packet(struct mbuf *packet)
+{
+       if (packet == NULL) {
+               return (0);
+       }
+
+       return (packet->m_pkthdr.necp_mtag.necp_route_rule_id);
+}
+
 bool
 necp_get_is_keepalive_from_packet(struct mbuf *packet)
 {
@@ -5647,6 +6895,58 @@ necp_socket_get_rescope_if_index(struct inpcb *inp)
        return (0);
 }
 
+u_int32_t
+necp_socket_get_effective_mtu(struct inpcb *inp, u_int32_t current_mtu)
+{
+       if (inp == NULL) {
+               return (current_mtu);
+       }
+
+       if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL &&
+               (inp->inp_flags & INP_BOUND_IF) &&
+               inp->inp_boundifp) {
+
+               u_int bound_interface_index = inp->inp_boundifp->if_index;
+               u_int tunnel_interface_index = inp->inp_policyresult.results.result_parameter.tunnel_interface_index;
+
+               // The result is IP Tunnel, and is rescoping from one interface to another. Recalculate MTU.
+               if (bound_interface_index != tunnel_interface_index) {
+                       ifnet_t tunnel_interface = NULL;
+
+                       ifnet_head_lock_shared();
+                       tunnel_interface = ifindex2ifnet[tunnel_interface_index];
+                       ifnet_head_done();
+
+                       if (tunnel_interface != NULL) {
+                               u_int32_t direct_tunnel_mtu = tunnel_interface->if_mtu;
+                               u_int32_t delegate_tunnel_mtu = (tunnel_interface->if_delegated.ifp != NULL) ? tunnel_interface->if_delegated.ifp->if_mtu : 0;
+                               if (delegate_tunnel_mtu != 0 &&
+                                       strncmp(tunnel_interface->if_name, "ipsec", strlen("ipsec")) == 0) {
+                                               // For ipsec interfaces, calculate the overhead from the delegate interface
+                                               u_int32_t tunnel_overhead = (u_int32_t)(esp_hdrsiz(NULL) + sizeof(struct ip6_hdr));
+                                               if (delegate_tunnel_mtu > tunnel_overhead) {
+                                                       delegate_tunnel_mtu -= tunnel_overhead;
+                                               }
+
+                                               if (delegate_tunnel_mtu < direct_tunnel_mtu) {
+                                                       // If the (delegate - overhead) < direct, return (delegate - overhead)
+                                                       return (delegate_tunnel_mtu);
+                                               } else {
+                                                       // Otherwise return direct
+                                                       return (direct_tunnel_mtu);
+                                               }
+                               } else {
+                                       // For non-ipsec interfaces, just return the tunnel MTU
+                                       return (direct_tunnel_mtu);
+                               }
+                       }
+               }
+       }
+
+       // By default, just return the MTU passed in
+       return (current_mtu);
+}
+
 ifnet_t
 necp_get_ifnet_from_result_parameter(necp_kernel_policy_result_parameter *result_parameter)
 {
index 519995b85b681c5316dfa60d3fb4f4cb56977b97..2aebe9c27d7c41446c808389e14e7317267b60ca 100644 (file)
@@ -37,6 +37,8 @@
  */
 #define        NECP_CONTROL_NAME "com.apple.net.necp_control"
 
+#define NECP_TLV_LENGTH_UINT32 1
+
 struct necp_packet_header {
     u_int8_t           packet_type;
        u_int8_t                flags;
@@ -65,6 +67,7 @@ struct necp_packet_header {
 #define        NECP_TLV_ATTRIBUTE_DOMAIN                               7       // char[]
 #define        NECP_TLV_ATTRIBUTE_ACCOUNT                              8       // char[]
 #define        NECP_TLV_SERVICE_UUID                                   9       // uuid_t
+#define        NECP_TLV_ROUTE_RULE                                             10
 
 #define        NECP_POLICY_CONDITION_FLAGS_NEGATIVE    0x01 // Negative
 
@@ -103,8 +106,20 @@ struct necp_packet_header {
 #define        NECP_POLICY_RESULT_TRIGGER_SCOPED               10      // service uuid_t
 #define        NECP_POLICY_RESULT_NO_TRIGGER_SCOPED    11      // service uuid_t
 #define        NECP_POLICY_RESULT_SOCKET_SCOPED                12      // String, interface name
+#define        NECP_POLICY_RESULT_ROUTE_RULES                  13      // N/A, must have route rules defined
+#define        NECP_POLICY_RESULT_USE_NETAGENT                 14      // netagent uuid_t
+
+#define        NECP_POLICY_RESULT_MAX                                  NECP_POLICY_RESULT_USE_NETAGENT
 
-#define        NECP_POLICY_RESULT_MAX                                  NECP_POLICY_RESULT_SOCKET_SCOPED
+// Route rule
+#define        NECP_ROUTE_RULE_NONE                                    0       // N/A
+#define        NECP_ROUTE_RULE_DENY_INTERFACE                  1       // String, or empty to match all
+#define        NECP_ROUTE_RULE_ALLOW_INTERFACE                 2       // String, or empty to match all
+
+#define        NECP_ROUTE_RULE_FLAG_CELLULAR                   0x01
+#define        NECP_ROUTE_RULE_FLAG_WIFI                               0x02
+#define        NECP_ROUTE_RULE_FLAG_WIRED                              0x04
+#define        NECP_ROUTE_RULE_FLAG_EXPENSIVE                  0x08
 
 // Errors
 #define        NECP_ERROR_INTERNAL                                             0
@@ -114,6 +129,7 @@ struct necp_packet_header {
 #define        NECP_ERROR_POLICY_CONDITIONS_INVALID    4
 #define        NECP_ERROR_POLICY_ID_NOT_FOUND                  5
 #define        NECP_ERROR_INVALID_PROCESS                              6
+#define        NECP_ERROR_ROUTE_RULES_INVALID                  7
 
 // Modifiers
 #define        NECP_MASK_USERSPACE_ONLY        0x80000000      // on filter_control_unit value
@@ -168,6 +184,7 @@ typedef union {
 } necp_kernel_policy_routing_result_parameter;
 
 #define        NECP_SERVICE_FLAGS_REGISTERED                   0x01
+#define        NECP_MAX_NETAGENTS                                              8
 struct necp_aggregate_result {
        necp_kernel_policy_result                       routing_result;
        necp_kernel_policy_routing_result_parameter     routing_result_parameter;
@@ -176,6 +193,17 @@ struct necp_aggregate_result {
        uuid_t                                                          service_uuid;
        u_int32_t                                                       service_flags;
        u_int32_t                                                       service_data;
+       u_int                                                           routed_interface_index;
+       u_int32_t                                                       policy_id;
+       uuid_t                                                          netagents[NECP_MAX_NETAGENTS];
+       u_int32_t                                                       netagent_flags[NECP_MAX_NETAGENTS];
+};
+
+#define KEV_NECP_SUBCLASS 8
+#define KEV_NECP_POLICIES_CHANGED 1
+
+struct kev_necp_policies_changed_data {
+       u_int32_t               changed_count;  // Defaults to 0.
 };
 
 #ifdef BSD_KERNEL_PRIVATE
@@ -185,10 +213,14 @@ struct necp_aggregate_result {
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 
-#define        NECPCTL_DROP_ALL_LEVEL                  1       /* Drop all packets if no policy matches above this level */
-#define        NECPCTL_DEBUG                                   2       /* Log all kernel policy matches */
-#define        NECPCTL_PASS_LOOPBACK                   3       /* Pass all loopback traffic */
-#define        NECPCTL_PASS_KEEPALIVES                 4       /* Pass all kernel-generated keepalive traffic */
+#define        NECPCTL_DROP_ALL_LEVEL                          1       /* Drop all packets if no policy matches above this level */
+#define        NECPCTL_DEBUG                                           2       /* Log all kernel policy matches */
+#define        NECPCTL_PASS_LOOPBACK                           3       /* Pass all loopback traffic */
+#define        NECPCTL_PASS_KEEPALIVES                         4       /* Pass all kernel-generated keepalive traffic */
+#define        NECPCTL_SOCKET_POLICY_COUNT                     5       /* Count of all socket-level policies */
+#define        NECPCTL_SOCKET_NON_APP_POLICY_COUNT     6       /* Count of non-per-app socket-level policies */
+#define        NECPCTL_IP_POLICY_COUNT                         7       /* Count of all ip-level policies */
+#define        NECPCTL_SESSION_COUNT                           8       /* Count of NECP sessions */
 
 #define        NECPCTL_NAMES {                                 \
        { 0, 0 },                                                       \
@@ -218,6 +250,8 @@ typedef u_int32_t necp_app_id;
 #define        NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED                NECP_POLICY_RESULT_TRIGGER_SCOPED
 #define        NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED             NECP_POLICY_RESULT_NO_TRIGGER_SCOPED
 #define        NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED                 NECP_POLICY_RESULT_SOCKET_SCOPED
+#define        NECP_KERNEL_POLICY_RESULT_ROUTE_RULES                   NECP_POLICY_RESULT_ROUTE_RULES
+#define        NECP_KERNEL_POLICY_RESULT_USE_NETAGENT                  NECP_POLICY_RESULT_USE_NETAGENT
 
 typedef struct {
        u_int32_t identifier;
@@ -230,6 +264,8 @@ typedef union {
        u_int32_t                                       flow_divert_control_unit;
        u_int32_t                                       filter_control_unit;
        u_int32_t                                       skip_policy_order;
+       u_int32_t                                       route_rule_id;
+       u_int32_t                                       netagent_id;
        necp_kernel_policy_service      service;
 } necp_kernel_policy_result_parameter;
 
@@ -245,7 +281,8 @@ struct necp_kernel_socket_policy {
        necp_kernel_policy_id           id;
        necp_policy_order                       order;
        u_int32_t                                       session_order;
-       
+       int                                                     session_pid;
+
        u_int32_t                                       condition_mask;
        u_int32_t                                       condition_negated_mask;
        necp_kernel_policy_id           cond_policy_id;
@@ -265,7 +302,7 @@ struct necp_kernel_socket_policy {
        union necp_sockaddr_union       cond_remote_start;                              // Matches remote IP address (or start)
        union necp_sockaddr_union       cond_remote_end;                                // Matches IP address range
        u_int8_t                                        cond_remote_prefix;                             // Defines subnet
-       
+
        necp_kernel_policy_result       result;
        necp_kernel_policy_result_parameter     result_parameter;
 };
@@ -277,7 +314,8 @@ struct necp_kernel_ip_output_policy {
        necp_policy_order                       suborder;
        necp_policy_order                       order;
        u_int32_t                                       session_order;
-       
+       int                                                     session_pid;
+
        u_int32_t                                       condition_mask;
        u_int32_t                                       condition_negated_mask;
        necp_kernel_policy_id           cond_policy_id;
@@ -290,7 +328,7 @@ struct necp_kernel_ip_output_policy {
        union necp_sockaddr_union       cond_remote_end;                                // Matches IP address range
        u_int8_t                                        cond_remote_prefix;                             // Defines subnet
        u_int32_t                                       cond_last_interface_index;
-       
+
        necp_kernel_policy_result       result;
        necp_kernel_policy_result_parameter     result_parameter;
 };
@@ -305,17 +343,21 @@ struct necp_session_policy {
        necp_policy_id          id;
        necp_policy_order       order;
        u_int8_t                        *result;
-       size_t                          result_size;
-       u_int8_t                        *conditions; // Array of conditions, each with a size_t length at start
-       size_t                          conditions_size;
-       
+       u_int32_t                       result_size;
+       u_int8_t                        *conditions; // Array of conditions, each with a u_int32_t length at start
+       u_int32_t                       conditions_size;
+       u_int8_t                        *route_rules; // Array of route rules, each with a u_int32_t length at start
+       u_int32_t                       route_rules_size;
+
        uuid_t                          applied_app_uuid;
        uuid_t                          applied_real_app_uuid;
        char                            *applied_domain;
        char                            *applied_account;
-       
-       uuid_t                          applied_service_uuid;
-       
+
+       uuid_t                          applied_result_uuid;
+
+       u_int32_t                       applied_route_rules_id;
+
        necp_kernel_policy_id   kernel_socket_policies[MAX_KERNEL_SOCKET_POLICIES];
        necp_kernel_policy_id   kernel_ip_output_policies[MAX_KERNEL_IP_OUTPUT_POLICIES];
 };
@@ -324,6 +366,7 @@ struct necp_aggregate_socket_result {
        necp_kernel_policy_result                       result;
        necp_kernel_policy_result_parameter     result_parameter;
        necp_kernel_policy_filter                       filter_control_unit;
+       u_int32_t                                                       route_rule_id;
 };
 
 struct necp_inpcb_result {
@@ -347,13 +390,15 @@ u_int32_t necp_socket_get_flow_divert_control_unit(struct inpcb *inp);
 
 bool necp_socket_should_rescope(struct inpcb *inp);
 u_int necp_socket_get_rescope_if_index(struct inpcb *inp);
+u_int32_t necp_socket_get_effective_mtu(struct inpcb *inp, u_int32_t current_mtu);
 
-bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id);
-bool necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id);
-bool necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id);
-int necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id);
+bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id);
+bool necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id);
+bool necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id);
+int necp_mark_packet_from_socket(struct mbuf *packet, struct inpcb *inp, necp_kernel_policy_id policy_id, u_int32_t route_rule_id);
 necp_kernel_policy_id necp_get_policy_id_from_packet(struct mbuf *packet);
 u_int32_t necp_get_last_interface_index_from_packet(struct mbuf *packet);
+u_int32_t necp_get_route_rule_id_from_packet(struct mbuf *packet);
 
 necp_kernel_policy_id necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface);
 necp_kernel_policy_id necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_args *ipoa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter);
@@ -365,6 +410,8 @@ int necp_mark_packet_from_interface(struct mbuf *packet, ifnet_t interface);
 ifnet_t necp_get_ifnet_from_result_parameter(necp_kernel_policy_result_parameter *result_parameter);
 bool necp_packet_can_rebind_to_ifnet(struct mbuf *packet, struct ifnet *interface, struct route *new_route, int family);
 
+bool necp_packet_is_allowed_over_interface(struct mbuf *packet, struct ifnet *interface);
+
 int necp_mark_packet_as_keepalive(struct mbuf *packet, bool is_keepalive);
 bool necp_get_is_keepalive_from_packet(struct mbuf *packet);
 
diff --git a/bsd/net/net_perf.c b/bsd/net/net_perf.c
new file mode 100644 (file)
index 0000000..b475644
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <net/if_var.h>
+#include <net/net_perf.h>
+#include <netinet/in_var.h>
+#include <sys/sysctl.h>
+
+static void ip_perf_record_stats(net_perf_t *npp, struct timeval *tv1,
+       struct timeval *tv2, uint64_t num_pkts);
+static void update_bins(net_perf_t *npp, uint64_t bins);
+
+void net_perf_start_time(net_perf_t *npp, struct timeval *tv)
+{
+#pragma unused(npp)
+       microtime(tv);
+}
+
+void net_perf_measure_time(net_perf_t *npp, struct timeval *start, uint64_t num_pkts)
+{
+       struct timeval stop;
+       microtime(&stop);
+       ip_perf_record_stats(npp, start, &stop, num_pkts);
+}
+
+static void
+ip_perf_record_stats(net_perf_t *npp, struct timeval *tv1, struct timeval *tv2, uint64_t num_pkts)
+{
+       struct timeval tv_diff;
+       uint64_t usecs;
+       timersub(tv2, tv1, &tv_diff);
+       usecs = tv_diff.tv_sec * 1000000ULL + tv_diff.tv_usec;
+       OSAddAtomic64(usecs, &npp->np_total_usecs);
+       OSAddAtomic64(num_pkts, &npp->np_total_pkts);
+}
+
+static void
+update_bins(net_perf_t *npp, uint64_t bins)
+{
+       bzero(&npp->np_hist_bars, sizeof(npp->np_hist_bars));
+
+       for (int i = 1, j = 0; i <= 64 && j < NET_PERF_BARS; i++) {
+               if (bins & 0x1) {
+                       npp->np_hist_bars[j] = i;
+                       j++;
+               }
+               bins >>= 1;
+       }
+}
+
+void
+net_perf_initialize(net_perf_t *npp, uint64_t bins)
+{
+       bzero(npp, sizeof(net_perf_t));
+       /* initialize np_hist_bars array */
+       update_bins(npp, bins);
+}
+
+void
+net_perf_histogram(net_perf_t *npp, uint64_t num_pkts)
+{
+       if (num_pkts <= npp->np_hist_bars[0]) {
+               OSAddAtomic64(num_pkts, &npp->np_hist1);
+       } else if (npp->np_hist_bars[0] < num_pkts && num_pkts <= npp->np_hist_bars[1]) {
+               OSAddAtomic64(num_pkts, &npp->np_hist2);
+       } else if (npp->np_hist_bars[1] < num_pkts && num_pkts <= npp->np_hist_bars[2]) {
+               OSAddAtomic64(num_pkts, &npp->np_hist3);
+       } else if (npp->np_hist_bars[2] < num_pkts && num_pkts <= npp->np_hist_bars[3]) {
+               OSAddAtomic64(num_pkts, &npp->np_hist4);
+       } else if (npp->np_hist_bars[3] < num_pkts) {
+               OSAddAtomic64(num_pkts, &npp->np_hist5);
+       }
+}
+
+boolean_t
+net_perf_validate_bins(uint64_t bins)
+{
+       return (NET_PERF_BARS == __builtin_popcountll(bins));
+}
+
diff --git a/bsd/net/net_perf.h b/bsd/net/net_perf.h
new file mode 100644 (file)
index 0000000..7c37356
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _NET_NET_PERF_H_
+#define _NET_NET_PERF_H_
+
+#include <stdint.h>
+
+#ifdef KERNEL_PRIVATE
+#include <sys/time.h>
+#include <mach/boolean.h>
+#endif /* KERNEL_PRIVATE */
+
+/* five histogram bins are separated by four dividing "bars" */
+#define NET_PERF_BARS 4
+
+typedef struct net_perf {
+       uint64_t np_total_pkts; /* total packets input or output during measurement */
+       uint64_t np_total_usecs;        /* microseconds elapsed during measurement */
+       uint64_t np_hist1;              /* histogram bin 1 */
+       uint64_t np_hist2;              /* histogram bin 2 */
+       uint64_t np_hist3;              /* histogram bin 3 */
+       uint64_t np_hist4;              /* histogram bin 4 */
+       uint64_t np_hist5;              /* histogram bin 5 */
+       uint8_t np_hist_bars[NET_PERF_BARS];
+} net_perf_t;
+
+#ifdef KERNEL_PRIVATE
+void net_perf_initialize(net_perf_t *npp, uint64_t bins);
+void net_perf_start_time(net_perf_t *npp, struct timeval *tv);
+void net_perf_measure_time(net_perf_t *npp, struct timeval *start, uint64_t num_pkts);
+void net_perf_histogram(net_perf_t *npp, uint64_t num_pkts);
+boolean_t net_perf_validate_bins(uint64_t bins);
+
+#endif /* KERNEL_PRIVATE */
+
+#endif /* _NET_NET_PERF_H_ */
+
index 54211eddef20f1536bb568b42df8d24dedbfe94e..052ae2ffe2ea4daf98a1793e0e889a4e8d73d117 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -330,7 +330,11 @@ STUB(ifnet_tx_compl_status);
 STUB(ifnet_flowid);
 STUB(ifnet_enable_output);
 STUB(ifnet_disable_output);
-STUB(ifnet_get_ipsec_offload_frames);
+STUB(ifnet_get_keepalive_offload_frames);
+STUB(ifnet_link_status_report);
+STUB(ifnet_set_packetpreamblelen);
+STUB(ifnet_packetpreamblelen);
+STUB(ifnet_maxpacketpreamblelen);
 STUB(in6_localaddr);
 STUB(in_localaddr);
 STUB(in6addr_local);
@@ -348,6 +352,7 @@ STUB(m_pullup);
 STUB(m_split);
 STUB(m_trailingspace);
 STUB(mbuf_get_driver_scratch);
+STUB(mbuf_get_unsent_data_bytes);
 STUB(mbuf_get_priority);
 STUB(mbuf_get_service_class);
 STUB(mbuf_get_service_class_index);
diff --git a/bsd/net/network_agent.c b/bsd/net/network_agent.c
new file mode 100644 (file)
index 0000000..6854379
--- /dev/null
@@ -0,0 +1,1150 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <string.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <libkern/OSMalloc.h>
+#include <sys/kernel.h>
+#include <sys/kern_control.h>
+#include <sys/mbuf.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/sysctl.h>
+#include <sys/priv.h>
+#include <sys/kern_event.h>
+#include <sys/sysproto.h>
+#include <net/network_agent.h>
+#include <net/if_var.h>
+
+u_int32_t netagent_debug = LOG_NOTICE; // 0=None, 1=Basic
+
+SYSCTL_NODE(_net, OID_AUTO, netagent, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "NetworkAgent");
+SYSCTL_INT(_net_netagent, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &netagent_debug, 0, "");
+
+static int netagent_registered_count = 0;
+SYSCTL_INT(_net_netagent, OID_AUTO, registered_count , CTLFLAG_RD | CTLFLAG_LOCKED,
+                  &netagent_registered_count, 0, "");
+
+static int netagent_active_count = 0;
+SYSCTL_INT(_net_netagent, OID_AUTO, active_count , CTLFLAG_RD | CTLFLAG_LOCKED,
+                  &netagent_active_count, 0, "");
+
+#define        NETAGENTLOG(level, format, ...) do {                                                                                    \
+       if (level <= netagent_debug)                                    \
+               log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: " format "\n", __FUNCTION__, __VA_ARGS__);  \
+} while (0)
+
+#define        NETAGENTLOG0(level, msg) do {                                                                                   \
+       if (level <= netagent_debug)                                    \
+               log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s: %s\n", __FUNCTION__, msg);  \
+} while (0)
+
+struct netagent_assertion {
+       LIST_ENTRY(netagent_assertion) assertion_chain;
+       uuid_t asserted_uuid;
+};
+
+struct netagent_wrapper {
+       LIST_ENTRY(netagent_wrapper) master_chain;
+       u_int32_t control_unit;
+       struct netagent netagent;
+};
+
+struct netagent_session {
+       u_int32_t control_unit;
+       struct netagent_wrapper *wrapper;
+       LIST_HEAD(_netagent_assertion_list, netagent_assertion) assertion_list;
+};
+
+static LIST_HEAD(_netagent_list, netagent_wrapper) master_netagent_list;
+
+static kern_ctl_ref    netagent_kctlref;
+static u_int32_t       netagent_family;
+static OSMallocTag     netagent_malloc_tag;
+static lck_grp_attr_t  *netagent_grp_attr      = NULL;
+static lck_attr_t              *netagent_mtx_attr      = NULL;
+static lck_grp_t               *netagent_mtx_grp               = NULL;
+decl_lck_rw_data(static, netagent_lock);
+
+static errno_t netagent_register_control(void);
+static errno_t netagent_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
+                                                                       void **unitinfo);
+static errno_t netagent_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo);
+static errno_t netagent_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
+                                                                mbuf_t m, int flags);
+static void netagent_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags);
+static errno_t netagent_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
+                                                                  int opt, void *data, size_t *len);
+static errno_t netagent_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
+                                                                  int opt, void *data, size_t len);
+
+static int netagent_send_ctl_data(u_int32_t control_unit, u_int8_t *buffer, size_t buffer_size);
+
+static struct netagent_session *netagent_create_session(u_int32_t control_unit);
+static void netagent_delete_session(struct netagent_session *session);
+
+static void netagent_handle_register(struct netagent_session *session, u_int32_t message_id,
+                                                                        u_int32_t payload_length, mbuf_t packet, int offset);
+static void netagent_handle_unregister(struct netagent_session *session, u_int32_t message_id,
+                                                                          u_int32_t payload_length, mbuf_t packet, int offset);
+static void netagent_handle_update(struct netagent_session *session, u_int32_t message_id,
+                                                                  u_int32_t payload_length, mbuf_t packet, int offset);
+static void netagent_handle_get(struct netagent_session *session, u_int32_t message_id,
+                                                               u_int32_t payload_length, mbuf_t packet, int offset);
+static void netagent_handle_assert(struct netagent_session *session, u_int32_t message_id,
+                                                                  u_int32_t payload_length, mbuf_t packet, int offset);
+static void netagent_handle_unassert(struct netagent_session *session, u_int32_t message_id,
+                                                                        u_int32_t payload_length, mbuf_t packet, int offset);
+
+static struct netagent_wrapper *netagent_find_agent_with_uuid(uuid_t uuid);
+
+errno_t
+netagent_init(void)
+{
+       errno_t result = 0;
+
+       result = netagent_register_control();
+       if (result != 0) {
+               goto done;
+       }
+
+       netagent_grp_attr = lck_grp_attr_alloc_init();
+       if (netagent_grp_attr == NULL) {
+               NETAGENTLOG0(LOG_ERR, "lck_grp_attr_alloc_init failed");
+               result = ENOMEM;
+               goto done;
+       }
+
+       netagent_mtx_grp = lck_grp_alloc_init(NETAGENT_CONTROL_NAME, netagent_grp_attr);
+       if (netagent_mtx_grp == NULL) {
+               NETAGENTLOG0(LOG_ERR, "lck_grp_alloc_init failed");
+               result = ENOMEM;
+               goto done;
+       }
+
+       netagent_mtx_attr = lck_attr_alloc_init();
+       if (netagent_mtx_attr == NULL) {
+               NETAGENTLOG0(LOG_ERR, "lck_attr_alloc_init failed");
+               result = ENOMEM;
+               goto done;
+       }
+
+       lck_rw_init(&netagent_lock, netagent_mtx_grp, netagent_mtx_attr);
+
+       LIST_INIT(&master_netagent_list);
+
+done:
+       if (result != 0) {
+               if (netagent_mtx_attr != NULL) {
+                       lck_attr_free(netagent_mtx_attr);
+                       netagent_mtx_attr = NULL;
+               }
+               if (netagent_mtx_grp != NULL) {
+                       lck_grp_free(netagent_mtx_grp);
+                       netagent_mtx_grp = NULL;
+               }
+               if (netagent_grp_attr != NULL) {
+                       lck_grp_attr_free(netagent_grp_attr);
+                       netagent_grp_attr = NULL;
+               }
+               if (netagent_kctlref != NULL) {
+                       ctl_deregister(netagent_kctlref);
+                       netagent_kctlref = NULL;
+               }
+       }
+       return (result);
+}
+
+static errno_t
+netagent_register_control(void)
+{
+       struct kern_ctl_reg     kern_ctl;
+       errno_t                         result = 0;
+
+       // Create a tag to allocate memory
+       netagent_malloc_tag = OSMalloc_Tagalloc(NETAGENT_CONTROL_NAME, OSMT_DEFAULT);
+
+       // Find a unique value for our interface family
+       result = mbuf_tag_id_find(NETAGENT_CONTROL_NAME, &netagent_family);
+       if (result != 0) {
+               NETAGENTLOG(LOG_ERR, "mbuf_tag_id_find_internal failed: %d", result);
+               return (result);
+       }
+
+       bzero(&kern_ctl, sizeof(kern_ctl));
+       strlcpy(kern_ctl.ctl_name, NETAGENT_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
+       kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
+       kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; // Require root
+       kern_ctl.ctl_sendsize = 64 * 1024;
+       kern_ctl.ctl_recvsize = 64 * 1024;
+       kern_ctl.ctl_connect = netagent_ctl_connect;
+       kern_ctl.ctl_disconnect = netagent_ctl_disconnect;
+       kern_ctl.ctl_send = netagent_ctl_send;
+       kern_ctl.ctl_rcvd = netagent_ctl_rcvd;
+       kern_ctl.ctl_setopt = netagent_ctl_setopt;
+       kern_ctl.ctl_getopt = netagent_ctl_getopt;
+
+       result = ctl_register(&kern_ctl, &netagent_kctlref);
+       if (result != 0) {
+               NETAGENTLOG(LOG_ERR, "ctl_register failed: %d", result);
+               return (result);
+       }
+
+       return (0);
+}
+
+static errno_t
+netagent_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo)
+{
+#pragma unused(kctlref)
+       *unitinfo = netagent_create_session(sac->sc_unit);
+       if (*unitinfo == NULL) {
+               // Could not allocate session
+               return (ENOBUFS);
+       }
+
+       return (0);
+}
+
+static errno_t
+netagent_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo)
+{
+#pragma unused(kctlref, unit)
+       struct netagent_session *session = (struct netagent_session *)unitinfo;
+       if (session != NULL) {
+               netagent_delete_session(session);
+       }
+
+       return (0);
+}
+
+// Kernel events
+static void
+netagent_post_event(uuid_t agent_uuid, u_int32_t event_code)
+{
+       struct kev_msg ev_msg;
+       memset(&ev_msg, 0, sizeof(ev_msg));
+
+       struct kev_netagent_data event_data;
+
+       ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+       ev_msg.kev_class        = KEV_NETWORK_CLASS;
+       ev_msg.kev_subclass     = KEV_NETAGENT_SUBCLASS;
+       ev_msg.event_code       = event_code;
+
+       uuid_copy(event_data.netagent_uuid, agent_uuid);
+       ev_msg.dv[0].data_ptr    = &event_data;
+       ev_msg.dv[0].data_length = sizeof(event_data);
+
+       kev_post_msg(&ev_msg);
+}
+
+// Message handling
+static u_int8_t *
+netagent_buffer_write_message_header(u_int8_t *buffer, u_int8_t message_type, u_int8_t flags,
+                                                                        u_int32_t message_id, u_int32_t error, u_int32_t payload_length)
+{
+       ((struct netagent_message_header *)(void *)buffer)->message_type = message_type;
+       ((struct netagent_message_header *)(void *)buffer)->message_flags = flags;
+       ((struct netagent_message_header *)(void *)buffer)->message_id = message_id;
+       ((struct netagent_message_header *)(void *)buffer)->message_error = error;
+       ((struct netagent_message_header *)(void *)buffer)->message_payload_length = payload_length;
+       return (buffer + sizeof(struct netagent_message_header));
+}
+
+static int
+netagent_send_ctl_data(u_int32_t control_unit, u_int8_t *buffer, size_t buffer_size)
+{
+       if (netagent_kctlref == NULL || control_unit == 0 || buffer == NULL || buffer_size == 0) {
+               return (EINVAL);
+       }
+
+       return ctl_enqueuedata(netagent_kctlref, control_unit, buffer, buffer_size, CTL_DATA_EOR);
+}
+
+static int
+netagent_send_trigger(struct netagent_wrapper *wrapper, struct proc *p, u_int32_t flags, u_int32_t trigger_type)
+{
+       int error = 0;
+       struct netagent_trigger_message *trigger_message = NULL;
+       u_int8_t *trigger = NULL;
+       size_t trigger_size = sizeof(struct netagent_message_header) + sizeof(struct netagent_trigger_message);
+
+       MALLOC(trigger, u_int8_t *, trigger_size, M_NETAGENT, M_WAITOK);
+       if (trigger == NULL) {
+               return (ENOMEM);
+       }
+
+       (void)netagent_buffer_write_message_header(trigger, trigger_type, 0, 0, 0, sizeof(struct netagent_trigger_message));
+
+       trigger_message = (struct netagent_trigger_message *)(void *)(trigger + sizeof(struct netagent_message_header));
+       trigger_message->trigger_flags = flags;
+       if (p != NULL) {
+               trigger_message->trigger_pid = proc_pid(p);
+               proc_getexecutableuuid(p, trigger_message->trigger_proc_uuid, sizeof(trigger_message->trigger_proc_uuid));
+       } else {
+               trigger_message->trigger_pid = 0;
+               uuid_clear(trigger_message->trigger_proc_uuid);
+       }
+
+       if ((error = netagent_send_ctl_data(wrapper->control_unit, (u_int8_t *)trigger, trigger_size))) {
+               NETAGENTLOG(LOG_ERR, "Failed to send trigger message on control unit %d", wrapper->control_unit);
+       }
+
+       FREE(trigger, M_NETAGENT);
+       return (error);
+}
+
+static int
+netagent_send_success_response(struct netagent_session *session, u_int8_t message_type, u_int32_t message_id)
+{
+       int error = 0;
+       u_int8_t *response = NULL;
+       size_t response_size = sizeof(struct netagent_message_header);
+       MALLOC(response, u_int8_t *, response_size, M_NETAGENT, M_WAITOK);
+       if (response == NULL) {
+               return (ENOMEM);
+       }
+       (void)netagent_buffer_write_message_header(response, message_type, NETAGENT_MESSAGE_FLAGS_RESPONSE, message_id, 0, 0);
+
+       if ((error = netagent_send_ctl_data(session->control_unit, (u_int8_t *)response, response_size))) {
+               NETAGENTLOG0(LOG_ERR, "Failed to send response");
+       }
+
+       FREE(response, M_NETAGENT);
+       return (error);
+}
+
+static int
+netagent_send_error_response(struct netagent_session *session, u_int8_t message_type,
+                                                        u_int32_t message_id, u_int32_t error_code)
+{
+       int error = 0;
+       u_int8_t *response = NULL;
+       size_t response_size = sizeof(struct netagent_message_header);
+       MALLOC(response, u_int8_t *, response_size, M_NETAGENT, M_WAITOK);
+       if (response == NULL) {
+               return (ENOMEM);
+       }
+       (void)netagent_buffer_write_message_header(response, message_type, NETAGENT_MESSAGE_FLAGS_RESPONSE,
+                                                                                          message_id, error_code, 0);
+
+       if ((error = netagent_send_ctl_data(session->control_unit, (u_int8_t *)response, response_size))) {
+               NETAGENTLOG0(LOG_ERR, "Failed to send response");
+       }
+
+       FREE(response, M_NETAGENT);
+       return (error);
+}
+
+static errno_t
+netagent_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t packet, int flags)
+{
+#pragma unused(kctlref, unit, flags)
+       struct netagent_session *session = (struct netagent_session *)unitinfo;
+       struct netagent_message_header header;
+       int error = 0;
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Got a NULL session");
+               error = EINVAL;
+               goto done;
+       }
+
+       if (mbuf_pkthdr_len(packet) < sizeof(header)) {
+               NETAGENTLOG(LOG_ERR, "Got a bad packet, length (%lu) < sizeof header (%lu)",
+                                       mbuf_pkthdr_len(packet), sizeof(header));
+               error = EINVAL;
+               goto done;
+       }
+
+       error = mbuf_copydata(packet, 0, sizeof(header), &header);
+       if (error) {
+               NETAGENTLOG(LOG_ERR, "mbuf_copydata failed for the header: %d", error);
+               error = ENOBUFS;
+               goto done;
+       }
+
+       switch (header.message_type) {
+               case NETAGENT_MESSAGE_TYPE_REGISTER: {
+                       netagent_handle_register(session, header.message_id, header.message_payload_length,
+                                                                        packet, sizeof(header));
+                       break;
+               }
+               case NETAGENT_MESSAGE_TYPE_UNREGISTER: {
+                       netagent_handle_unregister(session, header.message_id, header.message_payload_length,
+                                                                          packet, sizeof(header));
+                       break;
+               }
+               case NETAGENT_MESSAGE_TYPE_UPDATE: {
+                       netagent_handle_update(session, header.message_id, header.message_payload_length,
+                                                                  packet, sizeof(header));
+                       break;
+               }
+               case NETAGENT_MESSAGE_TYPE_GET: {
+                       netagent_handle_get(session, header.message_id, header.message_payload_length,
+                                                               packet, sizeof(header));
+                       break;
+               }
+               case NETAGENT_MESSAGE_TYPE_ASSERT: {
+                       netagent_handle_assert(session, header.message_id, header.message_payload_length,
+                                                               packet, sizeof(header));
+                       break;
+               }
+               case NETAGENT_MESSAGE_TYPE_UNASSERT: {
+                       netagent_handle_unassert(session, header.message_id, header.message_payload_length,
+                                                               packet, sizeof(header));
+                       break;
+               }
+               default: {
+                       NETAGENTLOG(LOG_ERR, "Received unknown message type %d", header.message_type);
+                       netagent_send_error_response(session, header.message_type, header.message_id,
+                                                                                NETAGENT_MESSAGE_ERROR_UNKNOWN_TYPE);
+                       break;
+               }
+       }
+
+done:
+       mbuf_freem(packet);
+       return (error);
+}
+
+static void
+netagent_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
+{
+#pragma unused(kctlref, unit, unitinfo, flags)
+       return;
+}
+
+static errno_t
+netagent_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt,
+                                       void *data, size_t *len)
+{
+#pragma unused(kctlref, unit, unitinfo, opt, data, len)
+       return (0);
+}
+
+static errno_t
+netagent_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt,
+                                       void *data, size_t len)
+{
+#pragma unused(kctlref, unit, unitinfo, opt, data, len)
+       return (0);
+}
+
+// Session Management
+static struct netagent_session *
+netagent_create_session(u_int32_t control_unit)
+{
+       struct netagent_session *new_session = NULL;
+
+       MALLOC(new_session, struct netagent_session *, sizeof(*new_session), M_NETAGENT, M_WAITOK);
+       if (new_session == NULL) {
+               goto done;
+       }
+       NETAGENTLOG(LOG_DEBUG, "Create agent session, control unit %d", control_unit);
+       memset(new_session, 0, sizeof(*new_session));
+       new_session->control_unit = control_unit;
+       LIST_INIT(&new_session->assertion_list);
+       new_session->wrapper = NULL;
+done:
+       return (new_session);
+}
+
+static void
+netagent_unregister_session_wrapper(struct netagent_session *session)
+{
+       bool unregistered = FALSE;
+       uuid_t unregistered_uuid;
+       struct netagent_wrapper *wrapper = NULL;
+       lck_rw_lock_exclusive(&netagent_lock);
+       if (session != NULL) {
+               wrapper = session->wrapper;
+               if (wrapper != NULL) {
+                       if (netagent_registered_count > 0) {
+                               netagent_registered_count--;
+                       }
+                       if ((session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) &&
+                               netagent_active_count > 0) {
+                               netagent_active_count--;
+                       }
+
+                       LIST_REMOVE(wrapper, master_chain);
+
+                       unregistered = TRUE;
+                       uuid_copy(unregistered_uuid, session->wrapper->netagent.netagent_uuid);
+
+                       FREE(wrapper, M_NETAGENT);
+                       session->wrapper = NULL;
+                       NETAGENTLOG0(LOG_DEBUG, "Unregistered agent");
+               }
+       }
+       lck_rw_done(&netagent_lock);
+
+       if (unregistered) {
+               netagent_post_event(unregistered_uuid, KEV_NETAGENT_UNREGISTERED);
+               ifnet_clear_netagent(unregistered_uuid);
+       }
+}
+
+static void
+netagent_delete_session(struct netagent_session *session)
+{
+       if (session != NULL) {
+               netagent_unregister_session_wrapper(session);
+
+               // Unassert any pending assertions
+               lck_rw_lock_shared(&netagent_lock);
+               struct netagent_assertion *search_assertion = NULL;
+               struct netagent_assertion *temp_assertion = NULL;
+               LIST_FOREACH_SAFE(search_assertion, &session->assertion_list, assertion_chain, temp_assertion) {
+                       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(search_assertion->asserted_uuid);
+                       if (wrapper != NULL) {
+                               netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER_UNASSERT);
+                       }
+                       LIST_REMOVE(search_assertion, assertion_chain);
+                       FREE(search_assertion, M_NETAGENT);
+               }
+               lck_rw_done(&netagent_lock);
+
+               FREE(session, M_NETAGENT);
+       }
+}
+
+static int
+netagent_packet_get_netagent_data_size(mbuf_t packet, int offset, int *err)
+{
+       int error = 0;
+
+       struct netagent netagent_peek;
+       memset(&netagent_peek, 0, sizeof(netagent_peek));
+
+       *err = 0;
+
+       error = mbuf_copydata(packet, offset, sizeof(netagent_peek), &netagent_peek);
+       if (error) {
+               *err = ENOENT;
+               return (-1);
+       }
+
+       return (netagent_peek.netagent_data_size);
+}
+
+static void
+netagent_handle_register(struct netagent_session *session, u_int32_t message_id,
+                                                u_int32_t payload_length, mbuf_t packet, int offset)
+{
+       int error;
+       int data_size = 0;
+       struct netagent_wrapper *new_wrapper = NULL;
+       u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+       uuid_t netagent_uuid;
+       uuid_clear(netagent_uuid);
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to find session");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       if (session->wrapper != NULL) {
+               NETAGENTLOG0(LOG_ERR, "Session already has a registered agent");
+               response_error = NETAGENT_MESSAGE_ERROR_ALREADY_REGISTERED;
+               goto fail;
+       }
+
+       if (payload_length < sizeof(struct netagent)) {
+               NETAGENTLOG(LOG_ERR, "Register message size too small for agent: (%d < %d)",
+                                       payload_length, sizeof(struct netagent));
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       data_size = netagent_packet_get_netagent_data_size(packet, offset, &error);
+       if (error || data_size < 0 || data_size > NETAGENT_MAX_DATA_SIZE) {
+               NETAGENTLOG(LOG_ERR, "Register message size could not be read, error %d data_size %d",
+                                       error, data_size);
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       MALLOC(new_wrapper, struct netagent_wrapper *, sizeof(*new_wrapper) + data_size, M_NETAGENT, M_WAITOK);
+       if (new_wrapper == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to allocate agent");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
+
+       error = mbuf_copydata(packet, offset, sizeof(struct netagent) + data_size,
+                                                 &new_wrapper->netagent);
+       if (error) {
+               NETAGENTLOG(LOG_ERR, "Failed to read data into agent structure: %d", error);
+               FREE(new_wrapper, M_NETAGENT);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       lck_rw_lock_exclusive(&netagent_lock);
+
+       new_wrapper->control_unit = session->control_unit;
+
+       session->wrapper = new_wrapper;
+       LIST_INSERT_HEAD(&master_netagent_list, new_wrapper, master_chain);
+
+       new_wrapper->netagent.netagent_flags |= NETAGENT_FLAG_REGISTERED;
+       netagent_registered_count++;
+       if (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) {
+               netagent_active_count++;
+       }
+
+       lck_rw_done(&netagent_lock);
+
+       NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
+       netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id);
+       netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED);
+       return;
+fail:
+       netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id, response_error);
+}
+
+static void
+netagent_handle_unregister(struct netagent_session *session, u_int32_t message_id,
+                                                  u_int32_t payload_length, mbuf_t packet, int offset)
+{
+#pragma unused(payload_length, packet, offset)
+       u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to find session");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       netagent_unregister_session_wrapper(session);
+
+       netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UNREGISTER, message_id);
+       return;
+fail:
+       netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_UNREGISTER, message_id, response_error);
+}
+
+static void
+netagent_handle_update(struct netagent_session *session, u_int32_t message_id,
+                                          u_int32_t payload_length, mbuf_t packet, int offset)
+{
+       int error;
+       int data_size = 0;
+       struct netagent_wrapper *new_wrapper = NULL;
+       u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+       uuid_t netagent_uuid;
+       uuid_clear(netagent_uuid);
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to find session");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       if (session->wrapper == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Session has no agent to update");
+               response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED;
+               goto fail;
+       }
+
+       if (payload_length < sizeof(struct netagent)) {
+               NETAGENTLOG(LOG_ERR, "Update message size too small for agent: (%d < %d)",
+                                       payload_length, sizeof(struct netagent));
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       data_size = netagent_packet_get_netagent_data_size(packet, offset, &error);
+       if (error || data_size < 0 || data_size > NETAGENT_MAX_DATA_SIZE) {
+               NETAGENTLOG(LOG_ERR, "Update message size could not be read, error %d data_size %d",
+                                       error, data_size);
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       MALLOC(new_wrapper, struct netagent_wrapper *, sizeof(*new_wrapper) + data_size, M_NETAGENT, M_WAITOK);
+       if (new_wrapper == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to allocate agent");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
+
+       error = mbuf_copydata(packet, offset, sizeof(struct netagent) + data_size, &new_wrapper->netagent);
+       if (error) {
+               NETAGENTLOG(LOG_ERR, "Failed to read data into agent structure: %d", error);
+               FREE(new_wrapper, M_NETAGENT);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       lck_rw_lock_exclusive(&netagent_lock);
+
+       if (uuid_compare(session->wrapper->netagent.netagent_uuid, new_wrapper->netagent.netagent_uuid) != 0 ||
+               memcmp(&session->wrapper->netagent.netagent_domain, &new_wrapper->netagent.netagent_domain,
+                          sizeof(new_wrapper->netagent.netagent_domain)) != 0 ||
+               memcmp(&session->wrapper->netagent.netagent_type, &new_wrapper->netagent.netagent_type,
+                          sizeof(new_wrapper->netagent.netagent_type)) != 0) {
+               NETAGENTLOG0(LOG_ERR, "Basic agent parameters do not match, cannot update");
+               FREE(new_wrapper, M_NETAGENT);
+               response_error = NETAGENT_MESSAGE_ERROR_CANNOT_UPDATE;
+               lck_rw_done(&netagent_lock);
+               goto fail;
+       }
+
+       new_wrapper->netagent.netagent_flags |= NETAGENT_FLAG_REGISTERED;
+       if ((new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) &&
+               !(session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE)) {
+               netagent_active_count++;
+       } else if (!(new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) &&
+                          (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE) &&
+                          netagent_active_count > 0) {
+               netagent_active_count--;
+       }
+
+       LIST_REMOVE(session->wrapper, master_chain);
+       FREE(session->wrapper, M_NETAGENT);
+       session->wrapper = new_wrapper;
+       new_wrapper->control_unit = session->control_unit;
+       LIST_INSERT_HEAD(&master_netagent_list, new_wrapper, master_chain);
+
+       lck_rw_done(&netagent_lock);
+
+       NETAGENTLOG0(LOG_DEBUG, "Updated agent");
+       netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UPDATE, message_id);
+       netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED);
+       return;
+fail:
+       netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_UPDATE, message_id, response_error);
+}
+
+static void
+netagent_handle_get(struct netagent_session *session, u_int32_t message_id,
+                                       u_int32_t payload_length, mbuf_t packet, int offset)
+{
+#pragma unused(payload_length, packet, offset)
+       u_int8_t *response = NULL;
+       u_int8_t *cursor = NULL;
+       u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to find session");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       if (session->wrapper == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Session has no agent to get");
+               response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED;
+               goto fail;
+       }
+
+       lck_rw_lock_shared(&netagent_lock);
+
+       size_t response_size = sizeof(struct netagent_message_header) + sizeof(session->wrapper->netagent)
+                                                               + session->wrapper->netagent.netagent_data_size;
+       MALLOC(response, u_int8_t *, response_size, M_NETAGENT, M_WAITOK);
+       if (response == NULL) {
+               goto fail;
+       }
+
+       cursor = response;
+       cursor = netagent_buffer_write_message_header(cursor, NETAGENT_MESSAGE_TYPE_GET,
+                                                                                                 NETAGENT_MESSAGE_FLAGS_RESPONSE, message_id, 0,
+                                                                                                 response_size - sizeof(struct netagent_message_header));
+       memcpy(cursor, &session->wrapper->netagent, sizeof(session->wrapper->netagent) +
+                  session->wrapper->netagent.netagent_data_size);
+
+       lck_rw_done(&netagent_lock);
+
+       if (!netagent_send_ctl_data(session->control_unit, (u_int8_t *)response, response_size)) {
+               NETAGENTLOG0(LOG_ERR, "Failed to send response");
+       }
+       FREE(response, M_NETAGENT);
+       return;
+fail:
+       netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_GET, message_id, response_error);
+}
+
+static void
+netagent_handle_assert(struct netagent_session *session, u_int32_t message_id,
+                                          u_int32_t payload_length, mbuf_t packet, int offset)
+{
+       int error;
+       struct netagent_assertion *new_assertion = NULL;
+       u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+       uuid_t netagent_uuid;
+       uuid_clear(netagent_uuid);
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to find session");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       if (payload_length < sizeof(uuid_t)) {
+               NETAGENTLOG(LOG_ERR, "Assert message size too small for uuid: (%d < %d)",
+                                       payload_length, sizeof(uuid_t));
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       error = mbuf_copydata(packet, offset, sizeof(uuid_t), &netagent_uuid);
+       if (error) {
+               NETAGENTLOG(LOG_ERR, "Failed to read uuid: %d", error);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       MALLOC(new_assertion, struct netagent_assertion *, sizeof(*new_assertion), M_NETAGENT, M_WAITOK);
+       if (new_assertion == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to allocate assertion");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       uuid_copy(new_assertion->asserted_uuid, netagent_uuid);
+
+       lck_rw_lock_shared(&netagent_lock);
+
+       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(netagent_uuid);
+       if (wrapper == NULL) {
+               lck_rw_done(&netagent_lock);
+               response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED;
+               FREE(new_assertion, M_NETAGENT);
+               goto fail;
+       }
+
+       error = netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER_ASSERT);
+       if (error) {
+               lck_rw_done(&netagent_lock);
+               NETAGENTLOG(LOG_ERR, "Failed to trigger assert agent: %d", error);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               FREE(new_assertion, M_NETAGENT);
+               goto fail;
+       }
+
+       LIST_INSERT_HEAD(&session->assertion_list, new_assertion, assertion_chain);
+
+       lck_rw_done(&netagent_lock);
+
+       NETAGENTLOG0(LOG_DEBUG, "Asserted agent");
+       netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_ASSERT, message_id);
+       return;
+fail:
+       netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_ASSERT, message_id, response_error);
+}
+
+static void
+netagent_handle_unassert(struct netagent_session *session, u_int32_t message_id,
+                                                u_int32_t payload_length, mbuf_t packet, int offset)
+{
+       int error;
+       u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+       uuid_t netagent_uuid;
+       uuid_clear(netagent_uuid);
+
+       if (session == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Failed to find session");
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       if (payload_length < sizeof(uuid_t)) {
+               NETAGENTLOG(LOG_ERR, "Unassert message size too small for uuid: (%d < %d)",
+                                       payload_length, sizeof(uuid_t));
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       error = mbuf_copydata(packet, offset, sizeof(uuid_t), &netagent_uuid);
+       if (error) {
+               NETAGENTLOG(LOG_ERR, "Failed to read uuid: %d", error);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       struct netagent_assertion *found_assertion = NULL;
+       struct netagent_assertion *search_assertion = NULL;
+       LIST_FOREACH(search_assertion, &session->assertion_list, assertion_chain) {
+               if (uuid_compare(search_assertion->asserted_uuid, netagent_uuid) == 0) {
+                       found_assertion = search_assertion;
+                       break;
+               }
+       }
+
+       if (found_assertion == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Netagent uuid not previously asserted");
+               response_error = NETAGENT_MESSAGE_ERROR_INVALID_DATA;
+               goto fail;
+       }
+
+       LIST_REMOVE(found_assertion, assertion_chain);
+       FREE(found_assertion, M_NETAGENT);
+       found_assertion = NULL;
+
+       lck_rw_lock_shared(&netagent_lock);
+
+       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(netagent_uuid);
+       if (wrapper == NULL) {
+               lck_rw_done(&netagent_lock);
+               response_error = NETAGENT_MESSAGE_ERROR_NOT_REGISTERED;
+               goto fail;
+       }
+
+       error = netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER_UNASSERT);
+       if (error) {
+               lck_rw_done(&netagent_lock);
+               NETAGENTLOG(LOG_ERR, "Failed to trigger assert agent: %d", error);
+               response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+               goto fail;
+       }
+
+       lck_rw_done(&netagent_lock);
+
+       NETAGENTLOG0(LOG_DEBUG, "Unasserted agent");
+       netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UNASSERT, message_id);
+       return;
+fail:
+       netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_UNASSERT, message_id, response_error);
+}
+
+static struct netagent_wrapper *
+netagent_find_agent_with_uuid(uuid_t uuid)
+{
+       struct netagent_wrapper *search_netagent = NULL;
+
+       LIST_FOREACH(search_netagent, &master_netagent_list, master_chain) {
+               if (uuid_compare(search_netagent->netagent.netagent_uuid, uuid) == 0) {
+                       return (search_netagent);
+               }
+       }
+
+       return (NULL);
+}
+
+void
+netagent_post_updated_interfaces(uuid_t uuid)
+{
+       struct netagent_wrapper *wrapper = NULL;
+       lck_rw_lock_shared(&netagent_lock);
+       wrapper = netagent_find_agent_with_uuid(uuid);
+       lck_rw_done(&netagent_lock);
+
+       if (wrapper != NULL) {
+               netagent_post_event(uuid, KEV_NETAGENT_UPDATED_INTERFACES);
+       } else {
+               NETAGENTLOG0(LOG_DEBUG, "Interface event with no associated agent");
+       }
+
+       return;
+}
+
+int
+netagent_ioctl(u_long cmd, caddr_t data)
+{
+       int error = 0;
+
+       lck_rw_lock_shared(&netagent_lock);
+       switch (cmd) {
+               case SIOCGIFAGENTDATA32: {
+                       struct netagent_req32 *ifsir32 = (struct netagent_req32 *)(void *)data;
+                       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(ifsir32->netagent_uuid);
+                       if (wrapper == NULL) {
+                               error = ENOENT;
+                               break;
+                       }
+                       uuid_copy(ifsir32->netagent_uuid, wrapper->netagent.netagent_uuid);
+                       memcpy(ifsir32->netagent_domain, wrapper->netagent.netagent_domain, sizeof(ifsir32->netagent_domain));
+                       memcpy(ifsir32->netagent_type, wrapper->netagent.netagent_type, sizeof(ifsir32->netagent_type));
+                       memcpy(ifsir32->netagent_desc, wrapper->netagent.netagent_desc, sizeof(ifsir32->netagent_desc));
+                       ifsir32->netagent_flags = wrapper->netagent.netagent_flags;
+                       if (ifsir32->netagent_data_size == 0) {
+                               // First pass, client wants data size
+                               ifsir32->netagent_data_size = wrapper->netagent.netagent_data_size;
+                       } else if (ifsir32->netagent_data != USER_ADDR_NULL &&
+                                          ifsir32->netagent_data_size == wrapper->netagent.netagent_data_size) {
+                               // Second pass, client wants data buffer filled out
+                               error = copyout(wrapper->netagent.netagent_data, ifsir32->netagent_data, wrapper->netagent.netagent_data_size);
+                       } else {
+                               error = EINVAL;
+                       }
+                       break;
+               }
+               case SIOCGIFAGENTDATA64: {
+                       struct netagent_req64 *ifsir64 = (struct netagent_req64 *)(void *)data;
+                       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(ifsir64->netagent_uuid);
+                       if (wrapper == NULL) {
+                               error = ENOENT;
+                               break;
+                       }
+                       uuid_copy(ifsir64->netagent_uuid, wrapper->netagent.netagent_uuid);
+                       memcpy(ifsir64->netagent_domain, wrapper->netagent.netagent_domain, sizeof(ifsir64->netagent_domain));
+                       memcpy(ifsir64->netagent_type, wrapper->netagent.netagent_type, sizeof(ifsir64->netagent_type));
+                       memcpy(ifsir64->netagent_desc, wrapper->netagent.netagent_desc, sizeof(ifsir64->netagent_desc));
+                       ifsir64->netagent_flags = wrapper->netagent.netagent_flags;
+                       if (ifsir64->netagent_data_size == 0) {
+                               // First pass, client wants data size
+                               ifsir64->netagent_data_size = wrapper->netagent.netagent_data_size;
+                       } else if (ifsir64->netagent_data != USER_ADDR_NULL &&
+                                          ifsir64->netagent_data_size == wrapper->netagent.netagent_data_size) {
+                               // Second pass, client wants data buffer filled out
+                               error = copyout(wrapper->netagent.netagent_data, ifsir64->netagent_data, wrapper->netagent.netagent_data_size);
+                       } else {
+                               error = EINVAL;
+                       }
+                       break;
+               }
+               default: {
+                       error = EINVAL;
+                       break;
+               }
+       }
+       lck_rw_done(&netagent_lock);
+       return (error);
+}
+
+u_int32_t
+netagent_get_flags(uuid_t uuid)
+{
+       u_int32_t flags = 0;
+       lck_rw_lock_shared(&netagent_lock);
+       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(uuid);
+       if (wrapper != NULL) {
+               flags = wrapper->netagent.netagent_flags;
+       } else {
+               NETAGENTLOG0(LOG_DEBUG, "Flags requested for invalid netagent");
+       }
+       lck_rw_done(&netagent_lock);
+
+       return (flags);
+}
+
+int
+netagent_kernel_trigger(uuid_t uuid)
+{
+       int error = 0;
+
+       lck_rw_lock_shared(&netagent_lock);
+       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(uuid);
+       if (wrapper == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Requested netagent for kernel trigger could not be found");
+               error = ENOENT;
+               goto done;
+       }
+
+       if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_KERNEL_ACTIVATED) == 0) {
+               NETAGENTLOG0(LOG_ERR, "Requested netagent for kernel trigger is not kernel activated");
+               // Agent does not accept kernel triggers
+               error = EINVAL;
+               goto done;
+       }
+
+       if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE)) {
+               // Agent already active
+               NETAGENTLOG0(LOG_INFO, "Requested netagent for kernel trigger is already active");
+               error = 0;
+               goto done;
+       }
+
+       error = netagent_send_trigger(wrapper, current_proc(), NETAGENT_TRIGGER_FLAG_KERNEL, NETAGENT_MESSAGE_TYPE_TRIGGER);
+       NETAGENTLOG((error ? LOG_ERR : LOG_INFO), "Triggered netagent from kernel (error %d)", error);
+done:
+       lck_rw_done(&netagent_lock);
+       return (error);
+}
+
+int
+netagent_trigger(struct proc *p, struct netagent_trigger_args *uap, int32_t *retval)
+{
+#pragma unused(p, retval)
+       uuid_t agent_uuid;
+       int error = 0;
+
+       if (uap == NULL) {
+               NETAGENTLOG0(LOG_ERR, "uap == NULL");
+               return (EINVAL);
+       }
+
+       if (uap->agent_uuid) {
+               if (uap->agent_uuidlen != sizeof(uuid_t)) {
+                       NETAGENTLOG(LOG_ERR, "Incorrect length (got %d, expected %d)",
+                                               uap->agent_uuidlen, sizeof(uuid_t));
+                       return (ERANGE);
+               }
+
+               error = copyin(uap->agent_uuid, agent_uuid, sizeof(uuid_t));
+               if (error) {
+                       NETAGENTLOG(LOG_ERR, "copyin error (%d)", error);
+                       return (error);
+               }
+       }
+
+       if (uuid_is_null(agent_uuid)) {
+               NETAGENTLOG0(LOG_ERR, "Requested netagent UUID is empty");
+               return (EINVAL);
+       }
+
+       lck_rw_lock_shared(&netagent_lock);
+       struct netagent_wrapper *wrapper = netagent_find_agent_with_uuid(agent_uuid);
+       if (wrapper == NULL) {
+               NETAGENTLOG0(LOG_ERR, "Requested netagent UUID is not registered");
+               error = ENOENT;
+               goto done;
+       }
+
+       if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_USER_ACTIVATED) == 0) {
+               // Agent does not accept triggers
+               NETAGENTLOG0(LOG_ERR, "Requested netagent UUID is not eligible for triggering");
+               error = EINVAL;
+               goto done;
+       }
+
+       if ((wrapper->netagent.netagent_flags & NETAGENT_FLAG_ACTIVE)) {
+               // Agent already active
+               NETAGENTLOG0(LOG_INFO, "Requested netagent UUID is already active");
+               error = 0;
+               goto done;
+       }
+
+       error = netagent_send_trigger(wrapper, p, NETAGENT_TRIGGER_FLAG_USER, NETAGENT_MESSAGE_TYPE_TRIGGER);
+       NETAGENTLOG((error ? LOG_ERR : LOG_INFO), "Triggered netagent (error %d)", error);
+done:
+       lck_rw_done(&netagent_lock);
+       return (error);
+}
diff --git a/bsd/net/network_agent.h b/bsd/net/network_agent.h
new file mode 100644 (file)
index 0000000..6fe55b9
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _NETAGENT_H_
+#define        _NETAGENT_H_
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+#ifdef BSD_KERNEL_PRIVATE
+#include <stdbool.h>
+
+errno_t netagent_init(void);
+#endif
+#ifdef PRIVATE
+/*
+ * Name registered by the Network Agent kernel control
+ */
+#define        NETAGENT_CONTROL_NAME "com.apple.net.netagent"
+
+struct netagent_message_header {
+       u_int8_t                message_type;
+       u_int8_t                message_flags;
+       u_int32_t               message_id;
+       u_int32_t               message_error;
+       u_int32_t               message_payload_length;
+};
+
+struct netagent_trigger_message {
+       u_int32_t               trigger_flags;
+       pid_t                   trigger_pid;
+       uuid_t                  trigger_proc_uuid;
+};
+
+#define        NETAGENT_MESSAGE_TYPE_REGISTER                  1       // Pass netagent to set, no return value
+#define        NETAGENT_MESSAGE_TYPE_UNREGISTER                2       // No value, no return value
+#define        NETAGENT_MESSAGE_TYPE_UPDATE                    3       // Pass netagent to update, no return value
+#define        NETAGENT_MESSAGE_TYPE_GET                       4       // No value, return netagent
+#define        NETAGENT_MESSAGE_TYPE_TRIGGER                   5       // Kernel initiated, no reply expected
+#define        NETAGENT_MESSAGE_TYPE_ASSERT                    6       // Pass uuid of netagent to assert
+#define        NETAGENT_MESSAGE_TYPE_UNASSERT                  7       // Pass uuid of netagent to unassert
+#define        NETAGENT_MESSAGE_TYPE_TRIGGER_ASSERT    8       // Kernel initiated, no reply expected
+#define        NETAGENT_MESSAGE_TYPE_TRIGGER_UNASSERT  9       // Kernel initiated, no reply expected
+
+#define        NETAGENT_MESSAGE_FLAGS_RESPONSE                 0x01    // Used for acks, errors, and query responses
+
+#define        NETAGENT_MESSAGE_ERROR_NONE                     0
+#define        NETAGENT_MESSAGE_ERROR_INTERNAL                 1
+#define        NETAGENT_MESSAGE_ERROR_UNKNOWN_TYPE             2
+#define        NETAGENT_MESSAGE_ERROR_INVALID_DATA             3
+#define        NETAGENT_MESSAGE_ERROR_NOT_REGISTERED           4
+#define        NETAGENT_MESSAGE_ERROR_ALREADY_REGISTERED       5
+#define        NETAGENT_MESSAGE_ERROR_CANNOT_UPDATE            6
+
+#define NETAGENT_DOMAINSIZE            32
+#define NETAGENT_TYPESIZE              32
+#define NETAGENT_DESCSIZE              128
+
+#define NETAGENT_MAX_DATA_SIZE 1024
+
+#define NETAGENT_FLAG_REGISTERED               0x0001  // Agent is registered
+#define NETAGENT_FLAG_ACTIVE                   0x0002  // Agent is active
+#define NETAGENT_FLAG_KERNEL_ACTIVATED         0x0004  // Agent can be activated by kernel activity
+#define NETAGENT_FLAG_USER_ACTIVATED           0x0008  // Agent can be activated by system call (netagent_trigger)
+#define NETAGENT_FLAG_VOLUNTARY                        0x0010  // Use of agent is optional
+#define NETAGENT_FLAG_SPECIFIC_USE_ONLY                0x0020  // Agent should only be used and activated when specifically required
+
+#define NETAGENT_TRIGGER_FLAG_USER             0x0001  // Userspace triggered agent
+#define NETAGENT_TRIGGER_FLAG_KERNEL           0x0002  // Kernel triggered agent
+
+#define KEV_NETAGENT_SUBCLASS                  9
+#define KEV_NETAGENT_REGISTERED                        1
+#define KEV_NETAGENT_UNREGISTERED              2
+#define KEV_NETAGENT_UPDATED                   3
+#define KEV_NETAGENT_UPDATED_INTERFACES                4
+
+struct kev_netagent_data {
+       uuid_t          netagent_uuid;
+};
+
+// To be used with kernel control socket
+struct netagent {
+       uuid_t          netagent_uuid;
+       char            netagent_domain[NETAGENT_DOMAINSIZE];
+       char            netagent_type[NETAGENT_TYPESIZE];
+       char            netagent_desc[NETAGENT_DESCSIZE];
+       u_int32_t       netagent_flags;
+       u_int32_t       netagent_data_size;
+       u_int8_t        netagent_data[0];
+};
+
+// To be used with SIOCGAGENTDATA
+struct netagent_req {
+       uuid_t          netagent_uuid;
+       char            netagent_domain[NETAGENT_DOMAINSIZE];
+       char            netagent_type[NETAGENT_TYPESIZE];
+       char            netagent_desc[NETAGENT_DESCSIZE];
+       u_int32_t       netagent_flags;
+       u_int32_t       netagent_data_size;
+       u_int8_t        *netagent_data;
+};
+#ifdef BSD_KERNEL_PRIVATE
+int netagent_ioctl(u_long cmd, caddr_t data);
+
+struct netagent_req32 {
+       uuid_t          netagent_uuid;
+       char            netagent_domain[NETAGENT_DOMAINSIZE];
+       char            netagent_type[NETAGENT_TYPESIZE];
+       char            netagent_desc[NETAGENT_DESCSIZE];
+       u_int32_t       netagent_flags;
+       u_int32_t       netagent_data_size;
+       user32_addr_t   netagent_data;
+};
+struct netagent_req64 {
+       uuid_t          netagent_uuid;
+       char            netagent_domain[NETAGENT_DOMAINSIZE];
+       char            netagent_type[NETAGENT_TYPESIZE];
+       char            netagent_desc[NETAGENT_DESCSIZE];
+       u_int32_t       netagent_flags;
+       u_int32_t       netagent_data_size;
+       user64_addr_t   netagent_data __attribute__((aligned(8)));
+};
+
+// Kernel accessors
+void netagent_post_updated_interfaces(uuid_t uuid); // To be called from interface ioctls
+
+u_int32_t netagent_get_flags(uuid_t uuid);
+
+int netagent_kernel_trigger(uuid_t uuid);
+#endif /* BSD_KERNEL_PRIVATE */
+
+#endif /* PRIVATE */
+
+#ifndef KERNEL
+int netagent_trigger(uuid_t agent_uuid, size_t agent_uuidlen);
+#endif /* !KERNEL */
+
+#endif /* _NETAGENT_H_ */
index 49380d88427d0cc6412b0dc94641be780f0b2466..8a295f887cc76b9980d610ba6dd1a7ff5de25763 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -87,46 +87,21 @@ static int nstat_recvspace = 8192;
 SYSCTL_INT(_net_stats, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
     &nstat_recvspace, 0, "");
 
-static int nstat_successmsgfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, successmsgfailures, CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_successmsgfailures, 0, "");
-
-static int nstat_sendountfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, sendountfailures, CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_sendountfailures, 0, "");
-
-static int nstat_sysinfofailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, sysinfofalures, CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_sysinfofailures, 0, "");
-
-static int nstat_srccountfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, srccountfailures, CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_srccountfailures, 0, "");
-
-static int nstat_descriptionfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, descriptionfailures, CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_descriptionfailures, 0, "");
-
-static int nstat_msgremovedfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, msgremovedfailures , CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_msgremovedfailures, 0, "");
-
-static int nstat_srcaddedfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, srcaddedfailures , CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_srcaddedfailures, 0, "");
-
-static int nstat_msgerrorfailures = 0;
-SYSCTL_INT(_net_stats, OID_AUTO, msgerrorfailures , CTLFLAG_RD| CTLFLAG_LOCKED,
-    &nstat_msgerrorfailures, 0, "");
+static struct nstat_stats nstat_stats;
+SYSCTL_STRUCT(_net_stats, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
+    &nstat_stats, nstat_stats, "");
 
 
 enum
 {
-       NSTAT_FLAG_CLEANUP      = (1 << 0),
-       NSTAT_FLAG_REQCOUNTS    = (1 << 1),
-       NSTAT_FLAG_REQDESCS     = (1 << 2)
+       NSTAT_FLAG_CLEANUP                              = (1 << 0),
+       NSTAT_FLAG_REQCOUNTS                    = (1 << 1),
+       NSTAT_FLAG_SUPPORTS_UPDATES             = (1 << 2),
+       NSTAT_FLAG_SYSINFO_SUBSCRIBED   = (1 << 3),
 };
 
+#define QUERY_CONTINUATION_SRC_COUNT 100
+
 typedef struct nstat_control_state
 {
        struct nstat_control_state      *ncs_next;
@@ -136,7 +111,12 @@ typedef struct nstat_control_state
        u_int32_t                       ncs_unit;
        nstat_src_ref_t                 ncs_next_srcref;
        struct nstat_src                *ncs_srcs;
+       mbuf_t                          ncs_accumulated;
        u_int32_t                       ncs_flags;
+       u_int64_t                       ncs_provider_filters[NSTAT_PROVIDER_COUNT];
+       /* state maintained for partial query requests */
+       u_int64_t                       ncs_context;
+       u_int64_t                       ncs_seq;
 } nstat_control_state;
 
 typedef struct nstat_provider
@@ -151,6 +131,7 @@ typedef struct nstat_provider
        void                                    (*nstat_watcher_remove)(nstat_control_state *state);
        errno_t                                 (*nstat_copy_descriptor)(nstat_provider_cookie_t cookie, void *data, u_int32_t len);
        void                                    (*nstat_release)(nstat_provider_cookie_t cookie, boolean_t locked);
+       bool                                    (*nstat_reporting_allowed)(nstat_provider_cookie_t cookie, uint64_t filter);
 } nstat_provider;
 
 
@@ -161,14 +142,19 @@ typedef struct nstat_src
        nstat_provider                  *provider;
        nstat_provider_cookie_t         cookie;
        uint32_t                        filter;
+       uint64_t                        seq;
 } nstat_src;
 
 static errno_t         nstat_control_send_counts(nstat_control_state *,
-                           nstat_src *, unsigned long long, int *); 
-static int             nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context);
+                           nstat_src *, unsigned long long, u_int16_t, int *);
+static int             nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context, u_int16_t hdr_flags);
+static int nstat_control_send_update(nstat_control_state *state, nstat_src *src, u_int64_t context, u_int16_t hdr_flags, int *gone);
 static errno_t         nstat_control_send_removed(nstat_control_state *, nstat_src *);
-static void            nstat_control_cleanup_source(nstat_control_state *state, nstat_src *src,
-                               boolean_t);
+static errno_t         nstat_control_send_goodbye(nstat_control_state  *state, nstat_src *src);
+static void            nstat_control_cleanup_source(nstat_control_state *state, nstat_src *src, boolean_t);
+static bool            nstat_control_reporting_allowed(nstat_control_state *state, nstat_src *src);
+static boolean_t       nstat_control_begin_query(nstat_control_state *state, const nstat_msg_hdr *hdrp);
+static u_int16_t       nstat_control_end_query(nstat_control_state *state, nstat_src *last_src, boolean_t partial);
 
 static u_int32_t       nstat_udp_watchers = 0;
 static u_int32_t       nstat_tcp_watchers = 0;
@@ -250,6 +236,57 @@ nstat_ip6_to_sockaddr(
        }
 }
 
+static u_int16_t
+nstat_inpcb_to_flags(
+       const struct inpcb *inp)
+{
+       u_int16_t flags = 0;
+
+       if ((inp != NULL ) && (inp->inp_last_outifp != NULL))
+       {
+               struct ifnet *ifp = inp->inp_last_outifp;
+
+               u_int32_t functional_type = if_functional_type(ifp);
+
+               /* Panic if someone adds a functional type without updating ntstat. */
+               VERIFY(0 <= functional_type && functional_type <= IFRTYPE_FUNCTIONAL_LAST);
+
+               switch (functional_type)
+               {
+               case IFRTYPE_FUNCTIONAL_UNKNOWN:
+                       flags |= NSTAT_IFNET_IS_UNKNOWN_TYPE;
+                       break;
+               case IFRTYPE_FUNCTIONAL_LOOPBACK:
+                       flags |= NSTAT_IFNET_IS_LOOPBACK;
+                       break;
+               case IFRTYPE_FUNCTIONAL_WIRED:
+                       flags |= NSTAT_IFNET_IS_WIRED;
+                       break;
+               case IFRTYPE_FUNCTIONAL_WIFI_INFRA:
+                       flags |= NSTAT_IFNET_IS_WIFI;
+                       break;
+               case IFRTYPE_FUNCTIONAL_WIFI_AWDL:
+                       flags |= NSTAT_IFNET_IS_WIFI;
+                       flags |= NSTAT_IFNET_IS_AWDL;
+                       break;
+               case IFRTYPE_FUNCTIONAL_CELLULAR:
+                       flags |= NSTAT_IFNET_IS_CELLULAR;
+                       break;
+               }
+
+               if (IFNET_IS_EXPENSIVE(ifp))
+               {
+                       flags |= NSTAT_IFNET_IS_EXPENSIVE;
+               }
+       }
+       else
+       {
+               flags = NSTAT_IFNET_IS_UNKNOWN_TYPE;
+       }
+
+       return flags;
+}
+
 #pragma mark -- Network Statistic Providers --
 
 static errno_t nstat_control_source_add(u_int64_t context, nstat_control_state *state, nstat_provider *provider, nstat_provider_cookie_t cookie);
@@ -291,7 +328,6 @@ static void nstat_init_route_provider(void);
 static void nstat_init_tcp_provider(void);
 static void nstat_init_udp_provider(void);
 static void nstat_init_ifnet_provider(void);
-static void nstat_init_sysinfo_provider(void);
 
 __private_extern__ void
 nstat_init(void)
@@ -311,7 +347,6 @@ nstat_init(void)
                nstat_init_tcp_provider();
                nstat_init_udp_provider();
                nstat_init_ifnet_provider();
-               nstat_init_sysinfo_provider();
                nstat_control_register();
        }
 }
@@ -433,9 +468,9 @@ nstat_route_counts(
        struct rtentry          *rt = (struct rtentry*)cookie;
        struct nstat_counts     *rt_stats = rt->rt_stats;
        
-       *out_gone = 0;
+       if (out_gone) *out_gone = 0;
        
-       if ((rt->rt_flags & RTF_UP) == 0) *out_gone = 1;
+       if (out_gone && (rt->rt_flags & RTF_UP) == 0) *out_gone = 1;
        
        if (rt_stats)
        {
@@ -454,7 +489,9 @@ nstat_route_counts(
                out_counts->nstat_cell_rxbytes = out_counts->nstat_cell_txbytes = 0;
        }
        else
+       {
                bzero(out_counts, sizeof(*out_counts));
+       }
        
        return 0;
 }
@@ -839,6 +876,7 @@ struct nstat_tucookie {
                struct sockaddr_in6     v6;
        } remote;
        unsigned int    if_index;
+       uint16_t        ifnet_properties;
 };
 
 static struct nstat_tucookie *
@@ -1037,12 +1075,12 @@ nstat_tcp_counts(
 
        bzero(out_counts, sizeof(*out_counts));
        
-       *out_gone = 0;
+       if (out_gone) *out_gone = 0;
        
        // if the pcb is in the dead state, we should stop using it
        if (nstat_tcp_gone(cookie))
        {
-               *out_gone = 1;
+               if (out_gone) *out_gone = 1;
                if (!(inp = tucookie->inp) || !intotcpcb(inp))
                        return EINVAL;
        } 
@@ -1167,7 +1205,8 @@ nstat_pcb_detach(struct inpcb *inp)
                return;
 
        lck_mtx_lock(&nstat_mtx);
-       for (state = nstat_controls; state; state = state->ncs_next) {
+       for (state = nstat_controls; state; state = state->ncs_next)
+       {
                lck_mtx_lock(&state->mtx);
                for (prevsrc = NULL, src = state->ncs_srcs; src;
                    prevsrc = src, src = src->next) 
@@ -1177,30 +1216,15 @@ nstat_pcb_detach(struct inpcb *inp)
                                break;
                }
 
-               if (src) {
-                       // send one last counts notification
-                       result = nstat_control_send_counts(state, src, 0, NULL);
-                       if (result != 0 && nstat_debug)
-                               printf("%s - nstat_control_send_counts() %d\n",
-                                       __func__, result);
-
-                       // send a last description
-                       result = nstat_control_send_description(state, src, 0);
-                       if (result != 0 && nstat_debug)
-                               printf("%s - nstat_control_send_description() %d\n",
-                                       __func__, result);
-
-                       // send the source removed notification
-                       result = nstat_control_send_removed(state, src);
-                       if (result != 0 && nstat_debug)
-                               printf("%s - nstat_control_send_removed() %d\n",
-                                       __func__, result);
-
+               if (src)
+               {
+                       result = nstat_control_send_goodbye(state, src);
+                       
                        if (prevsrc)
                                prevsrc->next = src->next;
                        else
                                state->ncs_srcs = src->next;
-
+                       
                        src->next = dead_list;
                        dead_list = src;
                }
@@ -1260,6 +1284,8 @@ nstat_pcb_cache(struct inpcb *inp)
                                if (inp->inp_last_outifp)
                                        tucookie->if_index = 
                                            inp->inp_last_outifp->if_index;
+
+                               tucookie->ifnet_properties = nstat_inpcb_to_flags(inp);
                                tucookie->cached = true;
                                break;
                        }
@@ -1357,7 +1383,7 @@ nstat_tcp_copy_descriptor(
                desc->traffic_class = so->so_traffic_class;
                desc->traffic_mgt_flags = so->so_traffic_mgt_flags;
                proc_name(desc->pid, desc->pname, sizeof(desc->pname));
-               if (desc->pname == NULL || desc->pname[0] == 0)
+               if (desc->pname[0] == 0)
                {
                        strlcpy(desc->pname, tucookie->pname,
                            sizeof(desc->pname));
@@ -1384,10 +1410,31 @@ nstat_tcp_copy_descriptor(
                desc->rcvbufsize = so->so_rcv.sb_hiwat;
                desc->rcvbufused = so->so_rcv.sb_cc;
        }
-       
+
+       tcp_get_connectivity_status(tp, &desc->connstatus);
+       desc->ifnet_properties = nstat_inpcb_to_flags(inp);
        return 0;
 }
 
+static bool
+nstat_tcpudp_reporting_allowed(nstat_provider_cookie_t cookie, uint64_t filter)
+{
+       bool retval = true;
+
+       /* Only apply interface filter if at least one is allowed. */
+       if ((filter & NSTAT_FILTER_ACCEPT_ALL) != 0)
+       {
+               struct nstat_tucookie *tucookie = (struct nstat_tucookie *)cookie;
+               struct inpcb    *inp = tucookie->inp;
+
+               uint16_t interface_properties = nstat_inpcb_to_flags(inp);
+
+               /* For now, just check on interface type. */
+               retval = ((filter & interface_properties) != 0);
+       }
+       return retval;
+}
+
 static void
 nstat_init_tcp_provider(void)
 {
@@ -1401,6 +1448,7 @@ nstat_init_tcp_provider(void)
        nstat_tcp_provider.nstat_watcher_add = nstat_tcp_add_watcher;
        nstat_tcp_provider.nstat_watcher_remove = nstat_tcp_remove_watcher;
        nstat_tcp_provider.nstat_copy_descriptor = nstat_tcp_copy_descriptor;
+       nstat_tcp_provider.nstat_reporting_allowed = nstat_tcpudp_reporting_allowed;
        nstat_tcp_provider.next = nstat_providers;
        nstat_providers = &nstat_tcp_provider;
 }
@@ -1439,12 +1487,12 @@ nstat_udp_counts(
        struct nstat_tucookie *tucookie =
            (struct nstat_tucookie *)cookie;
        
-       *out_gone = 0;
+       if (out_gone) *out_gone = 0;
        
        // if the pcb is in the dead state, we should stop using it
        if (nstat_udp_gone(cookie))
        {
-               *out_gone = 1;
+               if (out_gone) *out_gone = 1;
                if (!tucookie->inp)
                        return EINVAL;
        }
@@ -1570,34 +1618,36 @@ nstat_udp_copy_descriptor(
                if (inp->inp_vflag & INP_IPV6)
                {
                        nstat_ip6_to_sockaddr(&inp->in6p_laddr, inp->inp_lport,
-                               &desc->local.v6, sizeof(desc->local));
+                               &desc->local.v6, sizeof(desc->local.v6));
                        nstat_ip6_to_sockaddr(&inp->in6p_faddr, inp->inp_fport,
-                               &desc->remote.v6, sizeof(desc->remote));
+                               &desc->remote.v6, sizeof(desc->remote.v6));
                }
                else if (inp->inp_vflag & INP_IPV4)
                {
                        nstat_ip_to_sockaddr(&inp->inp_laddr, inp->inp_lport,
-                               &desc->local.v4, sizeof(desc->local));
+                               &desc->local.v4, sizeof(desc->local.v4));
                        nstat_ip_to_sockaddr(&inp->inp_faddr, inp->inp_fport,
-                               &desc->remote.v4, sizeof(desc->remote));
+                               &desc->remote.v4, sizeof(desc->remote.v4));
                }
+               desc->ifnet_properties = nstat_inpcb_to_flags(inp);
        }
        else
        {
                if (inp->inp_vflag & INP_IPV6)
                {
                        memcpy(&desc->local.v6, &tucookie->local.v6,
-                           sizeof(desc->local));
+                           sizeof(desc->local.v6));
                        memcpy(&desc->remote.v6, &tucookie->remote.v6,
-                           sizeof(desc->remote));
+                           sizeof(desc->remote.v6));
                }
                else if (inp->inp_vflag & INP_IPV4)
                {
                        memcpy(&desc->local.v4, &tucookie->local.v4,
-                           sizeof(desc->local));
+                           sizeof(desc->local.v4));
                        memcpy(&desc->remote.v4, &tucookie->remote.v4,
-                           sizeof(desc->remote));
+                           sizeof(desc->remote.v4));
                }
+               desc->ifnet_properties = tucookie->ifnet_properties;
        }
        
        if (inp->inp_last_outifp)
@@ -1613,7 +1663,7 @@ nstat_udp_copy_descriptor(
                desc->upid = so->last_upid;
                desc->pid = so->last_pid;
                proc_name(desc->pid, desc->pname, sizeof(desc->pname));
-               if (desc->pname == NULL || desc->pname[0] == 0)
+               if (desc->pname[0] == 0)
                {
                        strlcpy(desc->pname, tucookie->pname,
                            sizeof(desc->pname));
@@ -1639,7 +1689,7 @@ nstat_udp_copy_descriptor(
                desc->rcvbufused = so->so_rcv.sb_cc;
                desc->traffic_class = so->so_traffic_class;
        }
-       
+
        return 0;
 }
 
@@ -1656,6 +1706,7 @@ nstat_init_udp_provider(void)
        nstat_udp_provider.nstat_watcher_remove = nstat_udp_remove_watcher;
        nstat_udp_provider.nstat_copy_descriptor = nstat_udp_copy_descriptor;
        nstat_udp_provider.nstat_release = nstat_udp_release;
+       nstat_udp_provider.nstat_reporting_allowed = nstat_tcpudp_reporting_allowed;
        nstat_udp_provider.next = nstat_providers;
        nstat_providers = &nstat_udp_provider;
 }
@@ -1680,7 +1731,7 @@ nstat_ifnet_lookup(
        u_int32_t               length,
        nstat_provider_cookie_t *out_cookie)
 {
-       const nstat_ifnet_add_param *param = (nstat_ifnet_add_param *)data;
+       const nstat_ifnet_add_param *param = (const nstat_ifnet_add_param *)data;
        struct ifnet *ifp;
        boolean_t changed = FALSE;
        nstat_control_state *state;
@@ -1739,7 +1790,7 @@ nstat_ifnet_lookup(
                        {
                                if (src->provider != &nstat_ifnet_provider)
                                        continue;
-                               nstat_control_send_description(state, src, 0);
+                               nstat_control_send_description(state, src, 0, 0);
                        }
                        lck_mtx_unlock(&state->mtx);
                }
@@ -1780,12 +1831,12 @@ nstat_ifnet_counts(
            (struct nstat_ifnet_cookie *)cookie;
        struct ifnet *ifp = ifcookie->ifp;
 
-       *out_gone = 0;
+       if (out_gone) *out_gone = 0;
        
        // if the ifnet is gone, we should stop using it
        if (nstat_ifnet_gone(cookie))
        {
-               *out_gone = 1;
+               if (out_gone) *out_gone = 1;
                return EINVAL;
        }
 
@@ -1795,7 +1846,6 @@ nstat_ifnet_counts(
        out_counts->nstat_txpackets = ifp->if_opackets;
        out_counts->nstat_txbytes = ifp->if_obytes;
        out_counts->nstat_cell_rxbytes = out_counts->nstat_cell_txbytes = 0;
-
        return 0;
 }
 
@@ -1849,6 +1899,208 @@ nstat_ifnet_release(
        OSFree(ifcookie, sizeof(*ifcookie), nstat_malloc_tag);
 }
 
+static void
+nstat_ifnet_copy_link_status(
+       struct ifnet                    *ifp,
+       struct nstat_ifnet_descriptor   *desc)
+{
+       struct if_link_status *ifsr = ifp->if_link_status;
+       nstat_ifnet_desc_link_status *link_status = &desc->link_status;
+
+       link_status->link_status_type = NSTAT_IFNET_DESC_LINK_STATUS_TYPE_NONE;
+       if (ifsr == NULL)
+               return;
+
+       lck_rw_lock_shared(&ifp->if_link_status_lock);
+
+       if (ifp->if_type == IFT_CELLULAR) {
+
+               nstat_ifnet_desc_cellular_status *cell_status = &link_status->u.cellular;
+               struct if_cellular_status_v1 *if_cell_sr =
+                       &ifsr->ifsr_u.ifsr_cell.if_cell_u.if_status_v1;
+
+               if (ifsr->ifsr_version != IF_CELLULAR_STATUS_REPORT_VERSION_1)
+                       goto done;
+
+               link_status->link_status_type = NSTAT_IFNET_DESC_LINK_STATUS_TYPE_CELLULAR;
+
+               if (if_cell_sr->valid_bitmask & IF_CELL_LINK_QUALITY_METRIC_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_LINK_QUALITY_METRIC_VALID;
+                       cell_status->link_quality_metric = if_cell_sr->link_quality_metric;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_EFFECTIVE_BANDWIDTH_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_BANDWIDTH_VALID;
+                       cell_status->ul_effective_bandwidth = if_cell_sr->ul_effective_bandwidth;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_MAX_BANDWIDTH_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MAX_BANDWIDTH_VALID;
+                       cell_status->ul_max_bandwidth = if_cell_sr->ul_max_bandwidth;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_MIN_LATENCY_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MIN_LATENCY_VALID;
+                       cell_status->ul_min_latency = if_cell_sr->ul_min_latency;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_EFFECTIVE_LATENCY_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_LATENCY_VALID;
+                       cell_status->ul_effective_latency = if_cell_sr->ul_effective_latency;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_MAX_LATENCY_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MAX_LATENCY_VALID;
+                       cell_status->ul_max_latency = if_cell_sr->ul_max_latency;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_RETXT_LEVEL_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_VALID;
+                       if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_NONE)
+                               cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_NONE;
+                       else if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_LOW)
+                               cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_LOW;
+                       else if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_MEDIUM)
+                               cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_MEDIUM;
+                       else if (if_cell_sr->ul_retxt_level == IF_CELL_UL_RETXT_LEVEL_HIGH)
+                               cell_status->ul_retxt_level = NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_HIGH;
+                       else
+                               cell_status->valid_bitmask &= ~NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_VALID;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_BYTES_LOST_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_BYTES_LOST_VALID;
+                       cell_status->ul_bytes_lost = if_cell_sr->ul_bytes_lost;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_MIN_QUEUE_SIZE_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MIN_QUEUE_SIZE_VALID;
+                       cell_status->ul_min_queue_size = if_cell_sr->ul_min_queue_size;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_AVG_QUEUE_SIZE_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_AVG_QUEUE_SIZE_VALID;
+                       cell_status->ul_avg_queue_size = if_cell_sr->ul_avg_queue_size;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_UL_MAX_QUEUE_SIZE_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_UL_MAX_QUEUE_SIZE_VALID;
+                       cell_status->ul_max_queue_size = if_cell_sr->ul_max_queue_size;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_DL_EFFECTIVE_BANDWIDTH_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_DL_EFFECTIVE_BANDWIDTH_VALID;
+                       cell_status->dl_effective_bandwidth = if_cell_sr->dl_effective_bandwidth;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_DL_MAX_BANDWIDTH_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_DL_MAX_BANDWIDTH_VALID;
+                       cell_status->dl_max_bandwidth = if_cell_sr->dl_max_bandwidth;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_CONFIG_INACTIVITY_TIME_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_CONFIG_INACTIVITY_TIME_VALID;
+                       cell_status->config_inactivity_time = if_cell_sr->config_inactivity_time;
+               }
+               if (if_cell_sr->valid_bitmask & IF_CELL_CONFIG_BACKOFF_TIME_VALID) {
+                       cell_status->valid_bitmask |= NSTAT_IFNET_DESC_CELL_CONFIG_BACKOFF_TIME_VALID;
+                       cell_status->config_backoff_time = if_cell_sr->config_backoff_time;
+               }
+
+       } else if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI) {
+
+               nstat_ifnet_desc_wifi_status *wifi_status = &link_status->u.wifi;
+               struct if_wifi_status_v1 *if_wifi_sr =
+                       &ifsr->ifsr_u.ifsr_wifi.if_wifi_u.if_status_v1;
+
+               if (ifsr->ifsr_version != IF_WIFI_STATUS_REPORT_VERSION_1)
+                       goto done;
+
+               link_status->link_status_type = NSTAT_IFNET_DESC_LINK_STATUS_TYPE_WIFI;
+
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_LINK_QUALITY_METRIC_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_LINK_QUALITY_METRIC_VALID;
+                       wifi_status->link_quality_metric = if_wifi_sr->link_quality_metric;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID;
+                       wifi_status->ul_effective_bandwidth = if_wifi_sr->ul_effective_bandwidth;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_MAX_BANDWIDTH_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_MAX_BANDWIDTH_VALID;
+                       wifi_status->ul_max_bandwidth = if_wifi_sr->ul_max_bandwidth;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_MIN_LATENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_MIN_LATENCY_VALID;
+                       wifi_status->ul_min_latency = if_wifi_sr->ul_min_latency;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_EFFECTIVE_LATENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_LATENCY_VALID;
+                       wifi_status->ul_effective_latency = if_wifi_sr->ul_effective_latency;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_MAX_LATENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_MAX_LATENCY_VALID;
+                       wifi_status->ul_max_latency = if_wifi_sr->ul_max_latency;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_RETXT_LEVEL_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_VALID;
+                       if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_NONE)
+                               wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_NONE;
+                       else if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_LOW)
+                               wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_LOW;
+                       else if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_MEDIUM)
+                               wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_MEDIUM;
+                       else if (if_wifi_sr->ul_retxt_level == IF_WIFI_UL_RETXT_LEVEL_HIGH)
+                               wifi_status->ul_retxt_level = NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_HIGH;
+                       else
+                               wifi_status->valid_bitmask &= ~NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_VALID;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_BYTES_LOST_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_BYTES_LOST_VALID;
+                       wifi_status->ul_bytes_lost = if_wifi_sr->ul_bytes_lost;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_UL_ERROR_RATE_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_UL_ERROR_RATE_VALID;
+                       wifi_status->ul_error_rate = if_wifi_sr->ul_error_rate;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID;
+                       wifi_status->dl_effective_bandwidth = if_wifi_sr->dl_effective_bandwidth;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_MAX_BANDWIDTH_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_MAX_BANDWIDTH_VALID;
+                       wifi_status->dl_max_bandwidth = if_wifi_sr->dl_max_bandwidth;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_MIN_LATENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_MIN_LATENCY_VALID;
+                       wifi_status->dl_min_latency = if_wifi_sr->dl_min_latency;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_EFFECTIVE_LATENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_LATENCY_VALID;
+                       wifi_status->dl_effective_latency = if_wifi_sr->dl_effective_latency;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_MAX_LATENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_MAX_LATENCY_VALID;
+                       wifi_status->dl_max_latency = if_wifi_sr->dl_max_latency;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_DL_ERROR_RATE_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_DL_ERROR_RATE_VALID;
+                       wifi_status->dl_error_rate = if_wifi_sr->dl_error_rate;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_FREQUENCY_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_VALID;
+                       if (if_wifi_sr->config_frequency == IF_WIFI_CONFIG_FREQUENCY_2_4_GHZ)
+                               wifi_status->config_frequency = NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_2_4_GHZ;
+                       else if (if_wifi_sr->config_frequency == IF_WIFI_CONFIG_FREQUENCY_5_0_GHZ)
+                               wifi_status->config_frequency = NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_5_0_GHZ;
+                       else
+                               wifi_status->valid_bitmask &= ~NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_VALID;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_MULTICAST_RATE_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_MULTICAST_RATE_VALID;
+                       wifi_status->config_multicast_rate = if_wifi_sr->config_multicast_rate;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_SCAN_COUNT_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_COUNT_VALID;
+                       wifi_status->scan_count = if_wifi_sr->scan_count;
+               }
+               if (if_wifi_sr->valid_bitmask & IF_WIFI_CONFIG_SCAN_DURATION_VALID) {
+                       wifi_status->valid_bitmask |= NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_DURATION_VALID;
+                       wifi_status->scan_duration = if_wifi_sr->scan_duration;
+               }
+       }
+
+done:
+       lck_rw_done(&ifp->if_link_status_lock);
+}
+
 static errno_t
 nstat_ifnet_copy_descriptor(
        nstat_provider_cookie_t cookie,
@@ -1875,8 +2127,8 @@ nstat_ifnet_copy_descriptor(
        if (ifp->if_desc.ifd_len < sizeof(desc->description))
                memcpy(desc->description, ifp->if_desc.ifd_desc,
                    sizeof(desc->description));
+       nstat_ifnet_copy_link_status(ifp, desc);
        ifnet_lock_done(ifp);
-       
        return 0;
 }
 
@@ -1917,132 +2169,32 @@ nstat_ifnet_threshold_reached(unsigned int ifindex)
                        ifp = ifcookie->ifp;
                        if (ifp->if_index != ifindex)
                                continue;
-                       nstat_control_send_counts(state, src, 0, NULL);
+                       nstat_control_send_counts(state, src, 0, 0, NULL);
                }
                lck_mtx_unlock(&state->mtx);
        }
        lck_mtx_unlock(&nstat_mtx);
 }
 
-#pragma mark -- Sysinfo Provider --
-
-static nstat_provider nstat_sysinfo_provider;
-
-/* We store the flags requested by the client */
-typedef struct nstat_sysinfo_cookie
-{
-       u_int32_t       flags;
-} nstat_sysinfo_cookie;
-
-static errno_t
-nstat_sysinfo_lookup(
-       const void              *data,
-       u_int32_t               length,
-       nstat_provider_cookie_t *out_cookie)
-{
-       const nstat_sysinfo_add_param *param = (nstat_sysinfo_add_param *)data;
-       nstat_sysinfo_cookie *cookie;
-
-       if (length < sizeof(*param))
-               return (EINVAL);
-
-       if (nstat_privcheck != 0) {
-               errno_t result = priv_check_cred(kauth_cred_get(),
-                   PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0);
-               if (result != 0)
-                       return (result);
-       }
-
-       cookie = OSMalloc(sizeof(*cookie), nstat_malloc_tag);
-       if (cookie == NULL)
-               return (ENOMEM);
-       cookie->flags = param->flags;
-       *out_cookie = cookie;
-       return (0);
-}
-
-static int
-nstat_sysinfo_gone(
-       __unused nstat_provider_cookie_t cookie)
-{
-       /* Sysinfo always exists */
-       return (0);
-}
-
-static errno_t
-nstat_sysinfo_copy_descriptor(
-       nstat_provider_cookie_t cookie,
-       void                    *data,
-       u_int32_t               len)
-{
-       nstat_sysinfo_descriptor *desc = (nstat_sysinfo_descriptor *)data;
-       struct nstat_sysinfo_cookie *syscookie =
-               (struct nstat_sysinfo_cookie *)cookie;
-
-       if (len < sizeof(nstat_sysinfo_descriptor))
-               return (EINVAL);
-       desc->flags = syscookie->flags;
-       return (0);
-}
-
+#pragma mark -- Sysinfo --
 static void
-nstat_sysinfo_release(
-       nstat_provider_cookie_t cookie,
-       __unused boolean_t locked)
-{
-       struct nstat_sysinfo_cookie *syscookie =
-               (struct nstat_sysinfo_cookie *)cookie;
-       OSFree(syscookie, sizeof(*syscookie), nstat_malloc_tag);
-}
-
-static errno_t
-nstat_enqueue_success(
-    uint64_t context,
-    nstat_control_state        *state)
+nstat_set_keyval_scalar(nstat_sysinfo_keyval *kv, int key, u_int32_t val)
 {
-       nstat_msg_hdr success;
-       errno_t result;
-
-       bzero(&success, sizeof(success));
-       success.context = context;
-       success.type = NSTAT_MSG_TYPE_SUCCESS;
-       result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success,
-           sizeof(success), CTL_DATA_EOR | CTL_DATA_CRIT);
-       if (result != 0) {
-               printf("%s: could not enqueue success message %d\n",
-                   __func__, result);
-               nstat_successmsgfailures += 1;
-       }
-       return result;
-}
-
-static void
-nstat_init_sysinfo_provider(void)
-{
-       bzero(&nstat_sysinfo_provider, sizeof(nstat_sysinfo_provider));
-       nstat_sysinfo_provider.nstat_provider_id = NSTAT_PROVIDER_SYSINFO;
-       nstat_sysinfo_provider.nstat_descriptor_length = sizeof(nstat_sysinfo_descriptor);
-       nstat_sysinfo_provider.nstat_lookup = nstat_sysinfo_lookup;
-       nstat_sysinfo_provider.nstat_gone = nstat_sysinfo_gone;
-       nstat_sysinfo_provider.nstat_counts = NULL;
-       nstat_sysinfo_provider.nstat_watcher_add = NULL;
-       nstat_sysinfo_provider.nstat_watcher_remove = NULL;
-       nstat_sysinfo_provider.nstat_copy_descriptor = nstat_sysinfo_copy_descriptor;
-       nstat_sysinfo_provider.nstat_release = nstat_sysinfo_release;
-       nstat_sysinfo_provider.next = nstat_providers;
-       nstat_providers = &nstat_sysinfo_provider;
+       kv->nstat_sysinfo_key = key;
+       kv->nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
+       kv->u.nstat_sysinfo_scalar = val;
 }
 
 static void
 nstat_sysinfo_send_data_internal(
        nstat_control_state *control,
-       nstat_src *src,
        nstat_sysinfo_data *data)
 {
        nstat_msg_sysinfo_counts *syscnt = NULL;
        size_t allocsize = 0, countsize = 0, nkeyvals = 0;
        nstat_sysinfo_keyval *kv;
        errno_t result = 0;
+       size_t i = 0;
        
        allocsize = offsetof(nstat_msg_sysinfo_counts, counts);
        countsize = offsetof(nstat_sysinfo_counts, nstat_sysinfo_keyvals);
@@ -2051,10 +2203,12 @@ nstat_sysinfo_send_data_internal(
        switch (data->flags)
        {
                case NSTAT_SYSINFO_MBUF_STATS:
-                       nkeyvals = 5;
+                       nkeyvals = sizeof(struct nstat_sysinfo_mbuf_stats) /
+                           sizeof(u_int32_t);
                        break;
                case NSTAT_SYSINFO_TCP_STATS:
-                       nkeyvals = 6;
+                       nkeyvals = sizeof(struct nstat_sysinfo_tcp_stats) /
+                           sizeof(u_int32_t);
                        break;
                default:
                        return;
@@ -2068,61 +2222,150 @@ nstat_sysinfo_send_data_internal(
        bzero(syscnt, allocsize);
 
        syscnt->hdr.type = NSTAT_MSG_TYPE_SYSINFO_COUNTS;
+       syscnt->hdr.length = allocsize;
        syscnt->counts.nstat_sysinfo_len = countsize;
-       syscnt->srcref = src->srcref;
-
+       
        kv = (nstat_sysinfo_keyval *) &syscnt->counts.nstat_sysinfo_keyvals;
        switch (data->flags)
        {
                case NSTAT_SYSINFO_MBUF_STATS:
                {
-                       kv[0].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_256B_TOTAL;
-                       kv[0].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[0].u.nstat_sysinfo_scalar = data->u.mb_stats.total_256b;
-
-                       kv[1].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_2KB_TOTAL;
-                       kv[1].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[1].u.nstat_sysinfo_scalar = data->u.mb_stats.total_2kb;
-
-                       kv[2].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_MBUF_4KB_TOTAL;
-                       kv[2].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[2].u.nstat_sysinfo_scalar = data->u.mb_stats.total_4kb;
-
-                       kv[3].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SOCK_MBCNT;
-                       kv[3].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[3].u.nstat_sysinfo_scalar = data->u.mb_stats.sbmb_total;
-
-
-                       kv[4].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SOCK_ATMBLIMIT;
-                       kv[4].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[4].u.nstat_sysinfo_scalar = data->u.mb_stats.sb_atmbuflimit;
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_MBUF_256B_TOTAL,
+                           data->u.mb_stats.total_256b);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_MBUF_2KB_TOTAL,
+                           data->u.mb_stats.total_2kb);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_MBUF_4KB_TOTAL,
+                           data->u.mb_stats.total_4kb);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_MBUF_16KB_TOTAL,
+                           data->u.mb_stats.total_16kb);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_SOCK_MBCNT,
+                           data->u.mb_stats.sbmb_total);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_SOCK_ATMBLIMIT,
+                           data->u.mb_stats.sb_atmbuflimit);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_MBUF_DRAIN_CNT,
+                           data->u.mb_stats.draincnt);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_MBUF_MEM_RELEASED,
+                           data->u.mb_stats.memreleased);
+                       VERIFY(i == nkeyvals);
                        break;
                }
                case NSTAT_SYSINFO_TCP_STATS:
                {
-                       kv[0].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_IPV4_AVGRTT;
-                       kv[0].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[0].u.nstat_sysinfo_scalar = data->u.tcp_stats.ipv4_avgrtt;
-
-                       kv[1].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_IPV6_AVGRTT;
-                       kv[1].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[1].u.nstat_sysinfo_scalar = data->u.tcp_stats.ipv6_avgrtt;
-
-                       kv[2].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_PLR;
-                       kv[2].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[2].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_plr;
-
-                       kv[3].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_RECV_PLR;
-                       kv[3].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[3].u.nstat_sysinfo_scalar = data->u.tcp_stats.recv_plr;
-
-                       kv[4].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_TLRTO;
-                       kv[4].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[4].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_tlrto_rate;
-
-                       kv[5].nstat_sysinfo_key = NSTAT_SYSINFO_KEY_SEND_REORDERRATE;
-                       kv[5].nstat_sysinfo_flags = NSTAT_SYSINFO_FLAG_SCALAR;
-                       kv[5].u.nstat_sysinfo_scalar = data->u.tcp_stats.send_reorder_rate;
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_IPV4_AVGRTT,
+                           data->u.tcp_stats.ipv4_avgrtt);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_IPV6_AVGRTT,
+                           data->u.tcp_stats.ipv6_avgrtt);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_SEND_PLR,
+                           data->u.tcp_stats.send_plr);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_RECV_PLR,
+                           data->u.tcp_stats.recv_plr);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_SEND_TLRTO,
+                           data->u.tcp_stats.send_tlrto_rate);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_KEY_SEND_REORDERRATE,
+                           data->u.tcp_stats.send_reorder_rate);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_CONNECTION_ATTEMPTS,
+                           data->u.tcp_stats.connection_attempts);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_CONNECTION_ACCEPTS,
+                           data->u.tcp_stats.connection_accepts);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CLIENT_ENABLED,
+                           data->u.tcp_stats.ecn_client_enabled);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_SERVER_ENABLED,
+                           data->u.tcp_stats.ecn_server_enabled);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CLIENT_SETUP,
+                           data->u.tcp_stats.ecn_client_setup);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_SERVER_SETUP,
+                           data->u.tcp_stats.ecn_server_setup);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CLIENT_SUCCESS,
+                           data->u.tcp_stats.ecn_client_success);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_SERVER_SUCCESS,
+                           data->u.tcp_stats.ecn_server_success);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_NOT_SUPPORTED,
+                           data->u.tcp_stats.ecn_not_supported);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_LOST_SYN,
+                           data->u.tcp_stats.ecn_lost_syn);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_LOST_SYNACK,
+                           data->u.tcp_stats.ecn_lost_synack);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_RECV_CE,
+                           data->u.tcp_stats.ecn_recv_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_RECV_ECE,
+                           data->u.tcp_stats.ecn_recv_ece);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_SENT_ECE,
+                           data->u.tcp_stats.ecn_sent_ece);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CONN_RECV_CE,
+                           data->u.tcp_stats.ecn_conn_recv_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CONN_RECV_ECE,
+                           data->u.tcp_stats.ecn_conn_recv_ece);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CONN_PLNOCE,
+                           data->u.tcp_stats.ecn_conn_plnoce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CONN_PL_CE,
+                           data->u.tcp_stats.ecn_conn_pl_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_ECN_CONN_NOPL_CE,
+                           data->u.tcp_stats.ecn_conn_nopl_ce);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_SYN_DATA_RCV,
+                           data->u.tcp_stats.tfo_syn_data_rcv);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_COOKIE_REQ_RCV,
+                           data->u.tcp_stats.tfo_cookie_req_rcv);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_COOKIE_SENT,
+                           data->u.tcp_stats.tfo_cookie_sent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_COOKIE_INVALID,
+                           data->u.tcp_stats.tfo_cookie_invalid);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_COOKIE_REQ,
+                           data->u.tcp_stats.tfo_cookie_req);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_COOKIE_RCV,
+                           data->u.tcp_stats.tfo_cookie_rcv);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_SYN_DATA_SENT,
+                           data->u.tcp_stats.tfo_syn_data_sent);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_SYN_DATA_ACKED,
+                           data->u.tcp_stats.tfo_syn_data_acked);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_SYN_LOSS,
+                           data->u.tcp_stats.tfo_syn_loss);
+                       nstat_set_keyval_scalar(&kv[i++],
+                           NSTAT_SYSINFO_TFO_BLACKHOLE,
+                           data->u.tcp_stats.tfo_blackhole);
+
+                       VERIFY(i == nkeyvals);
                        break;
                }
        }
@@ -2132,7 +2375,9 @@ nstat_sysinfo_send_data_internal(
                result = ctl_enqueuedata(control->ncs_kctl,
                    control->ncs_unit, syscnt, allocsize, CTL_DATA_EOR);
                if (result != 0)
-                       nstat_sysinfofailures += 1;
+               {
+                       nstat_stats.nstat_sysinfofailures += 1;
+               }
                OSFree(syscnt, allocsize, nstat_malloc_tag);
        }
        return;
@@ -2148,25 +2393,13 @@ nstat_sysinfo_send_data(
        for (control = nstat_controls; control; control = control->ncs_next)
        {
                lck_mtx_lock(&control->mtx);
-               nstat_src       *src;
-               for (src = control->ncs_srcs; src; src = src->next)
+               if ((control->ncs_flags & NSTAT_FLAG_SYSINFO_SUBSCRIBED) != 0)
                {
-                       if (src->provider->nstat_provider_id == 
-                               NSTAT_PROVIDER_SYSINFO)
-                       {
-                               struct nstat_sysinfo_cookie *syscookie;
-                               syscookie = (struct nstat_sysinfo_cookie *) src->cookie;
-                               if (syscookie->flags & data->flags)
-                               {
-                                       nstat_sysinfo_send_data_internal(control,
-                                               src, data);
-                               }
-                       }
-               }       
+                       nstat_sysinfo_send_data_internal(control, data);
+               }
                lck_mtx_unlock(&control->mtx);
        }
        lck_mtx_unlock(&nstat_mtx);
-
 }
 
 static void
@@ -2185,73 +2418,209 @@ static errno_t nstat_control_connect(kern_ctl_ref kctl, struct sockaddr_ctl *sac
 static errno_t nstat_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void *uinfo);
 static errno_t nstat_control_send(kern_ctl_ref kctl, u_int32_t unit, void *uinfo, mbuf_t m, int flags);
 
+static errno_t
+nstat_enqueue_success(
+    uint64_t context,
+    nstat_control_state        *state,
+    u_int16_t flags)
+{
+       nstat_msg_hdr success;
+       errno_t result;
 
-static void*
-nstat_idle_check(
-       __unused thread_call_param_t p0,
-       __unused thread_call_param_t p1)
+       bzero(&success, sizeof(success));
+       success.context = context;
+       success.type = NSTAT_MSG_TYPE_SUCCESS;
+       success.length = sizeof(success);
+       success.flags = flags;
+       result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success,
+           sizeof(success), CTL_DATA_EOR | CTL_DATA_CRIT);
+       if (result != 0) {
+               if (nstat_debug != 0)
+                       printf("%s: could not enqueue success message %d\n",
+                           __func__, result);
+               nstat_stats.nstat_successmsgfailures += 1;
+       }
+       return result;
+}
+
+static errno_t
+nstat_control_send_goodbye(
+       nstat_control_state     *state,
+       nstat_src                       *src)
 {
-       lck_mtx_lock(&nstat_mtx);
-       
-       nstat_idle_time = 0;
-       
-       nstat_control_state *control;
-       nstat_src       *dead = NULL;
-       nstat_src       *dead_list = NULL;
-       for (control = nstat_controls; control; control = control->ncs_next)
+       errno_t result = 0;
+       int failed = 0;
+
+       if (nstat_control_reporting_allowed(state, src))
        {
-               lck_mtx_lock(&control->mtx);
-               nstat_src       **srcpp = &control->ncs_srcs;
-               
-               if (!(control->ncs_flags & NSTAT_FLAG_REQCOUNTS))
+               if ((state->ncs_flags & NSTAT_FLAG_SUPPORTS_UPDATES) != 0)
                {
-                       while(*srcpp != NULL)
+                       result = nstat_control_send_update(state, src, 0, NSTAT_MSG_HDR_FLAG_CLOSING, NULL);
+                       if (result != 0)
                        {
-                               if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie))
-                               {
-                                       errno_t result;
-                                       
-                                       // Pull it off the list
-                                       dead = *srcpp;
-                                       *srcpp = (*srcpp)->next;
-                                       
-                                       // send one last counts notification
-                                       result = nstat_control_send_counts(control, dead,
-                                           0, NULL);
-                                       if (result != 0 && nstat_debug)
-                                               printf("%s - nstat_control_send_counts() %d\n",
-                                                       __func__, result);
-                                               
-                                       // send a last description
-                                       result = nstat_control_send_description(control, dead, 0);
-                                       if (result != 0 && nstat_debug)
-                                               printf("%s - nstat_control_send_description() %d\n",
-                                                       __func__, result);
-                                       
-                                       // send the source removed notification
-                                       result = nstat_control_send_removed(control, dead);
-                                       if (result != 0 && nstat_debug)
-                                               printf("%s - nstat_control_send_removed() %d\n",
-                                                       __func__, result);
-                                       
-                                       // Put this on the list to release later
-                                       dead->next = dead_list;
-                                       dead_list = dead;
-                               }
-                               else
-                               {
-                                       srcpp = &(*srcpp)->next;
-                               }
+                               failed = 1;
+                               if (nstat_debug != 0)
+                                       printf("%s - nstat_control_send_update() %d\n", __func__, result);
                        }
                }
-               control->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
-               lck_mtx_unlock(&control->mtx);
-       }
-
-       if (nstat_controls)
-       {
-               clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time);
-               thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time);
+               else
+               {
+                       // send one last counts notification
+                       result = nstat_control_send_counts(state, src, 0, NSTAT_MSG_HDR_FLAG_CLOSING, NULL);
+                       if (result != 0)
+                       {
+                               failed = 1;
+                               if (nstat_debug != 0)
+                                       printf("%s - nstat_control_send_counts() %d\n", __func__, result);
+                       }
+
+                       // send a last description
+                       result = nstat_control_send_description(state, src, 0, NSTAT_MSG_HDR_FLAG_CLOSING);
+                       if (result != 0)
+                       {
+                               failed = 1;
+                               if (nstat_debug != 0)
+                                       printf("%s - nstat_control_send_description() %d\n", __func__, result);
+                       }
+               }
+       }
+
+       // send the source removed notification
+       result = nstat_control_send_removed(state, src);
+       if (result != 0 && nstat_debug)
+       {
+               failed = 1;
+               if (nstat_debug != 0)
+                       printf("%s - nstat_control_send_removed() %d\n", __func__, result);
+       }
+
+       if (failed != 0)
+               nstat_stats.nstat_control_send_goodbye_failures++;
+
+       
+       return result;
+}
+
+static errno_t
+nstat_flush_accumulated_msgs(
+       nstat_control_state     *state)
+{
+       errno_t result = 0;
+       if (state->ncs_accumulated && mbuf_len(state->ncs_accumulated))
+       {
+               mbuf_pkthdr_setlen(state->ncs_accumulated, mbuf_len(state->ncs_accumulated));
+               result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, state->ncs_accumulated, CTL_DATA_EOR);
+               if (result != 0 && nstat_debug)
+               {
+                       nstat_stats.nstat_flush_accumulated_msgs_failures++;
+                       if (nstat_debug != 0)
+                               printf("%s - ctl_enqueuembuf failed: %d\n", __func__, result);
+                       mbuf_freem(state->ncs_accumulated);
+               }
+               state->ncs_accumulated = NULL;
+       }
+       return result;
+}
+
+static errno_t
+nstat_accumulate_msg(
+       nstat_control_state     *state,
+       nstat_msg_hdr           *hdr,
+       size_t                          length)
+{
+       if (state->ncs_accumulated && mbuf_trailingspace(state->ncs_accumulated) < length)
+       {
+               // Will send the current mbuf
+               nstat_flush_accumulated_msgs(state);
+       }
+       
+       errno_t result = 0;
+       
+       if (state->ncs_accumulated == NULL)
+       {
+               unsigned int one = 1;
+               if (mbuf_allocpacket(MBUF_DONTWAIT, NSTAT_MAX_MSG_SIZE, &one, &state->ncs_accumulated) != 0)
+               {
+                       if (nstat_debug != 0)
+                               printf("%s - mbuf_allocpacket failed\n", __func__);
+                       result = ENOMEM;
+               }
+               else
+               {
+                       mbuf_setlen(state->ncs_accumulated, 0);
+               }
+       }
+       
+       if (result == 0)
+       {
+               hdr->length = length;
+               result = mbuf_copyback(state->ncs_accumulated, mbuf_len(state->ncs_accumulated),
+                                                          length, hdr, MBUF_DONTWAIT);
+       }
+       
+       if (result != 0)
+       {
+               nstat_flush_accumulated_msgs(state);
+               if (nstat_debug != 0)
+                       printf("%s - resorting to ctl_enqueuedata\n", __func__);
+               result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, hdr, length, CTL_DATA_EOR);
+       }
+       
+       if (result != 0)
+               nstat_stats.nstat_accumulate_msg_failures++;
+
+       return result;
+}
+
+static void*
+nstat_idle_check(
+       __unused thread_call_param_t p0,
+       __unused thread_call_param_t p1)
+{
+       lck_mtx_lock(&nstat_mtx);
+       
+       nstat_idle_time = 0;
+       
+       nstat_control_state *control;
+       nstat_src       *dead = NULL;
+       nstat_src       *dead_list = NULL;
+       for (control = nstat_controls; control; control = control->ncs_next)
+       {
+               lck_mtx_lock(&control->mtx);
+               nstat_src       **srcpp = &control->ncs_srcs;
+               
+               if (!(control->ncs_flags & NSTAT_FLAG_REQCOUNTS))
+               {
+                       while(*srcpp != NULL)
+                       {
+                               if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie))
+                               {
+                                       errno_t result;
+                                       
+                                       // Pull it off the list
+                                       dead = *srcpp;
+                                       *srcpp = (*srcpp)->next;
+                                       
+                                       result = nstat_control_send_goodbye(control, dead);
+                                       
+                                       // Put this on the list to release later
+                                       dead->next = dead_list;
+                                       dead_list = dead;
+                               }
+                               else
+                               {
+                                       srcpp = &(*srcpp)->next;
+                               }
+                       }
+               }
+               control->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
+               lck_mtx_unlock(&control->mtx);
+       }
+
+       if (nstat_controls)
+       {
+               clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time);
+               thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time);
        }
        
        lck_mtx_unlock(&nstat_mtx);
@@ -2304,17 +2673,38 @@ nstat_control_cleanup_source(
 {
        errno_t result;
        
-       if (state) {
+       if (state)
+       {
                result = nstat_control_send_removed(state, src);
-               if (result != 0 && nstat_debug)
-                       printf("%s - nstat_control_send_removed() %d\n",
-                               __func__, result);
+               if (result != 0)
+               {
+                       nstat_stats.nstat_control_cleanup_source_failures++;
+                       if (nstat_debug != 0)
+                               printf("%s - nstat_control_send_removed() %d\n",
+                                   __func__, result);
+               }
        }
        // Cleanup the source if we found it.
        src->provider->nstat_release(src->cookie, locked);
        OSFree(src, sizeof(*src), nstat_malloc_tag);
 }
 
+
+static bool
+nstat_control_reporting_allowed(
+       nstat_control_state *state,
+       nstat_src *src)
+{
+       if (src->provider->nstat_reporting_allowed == NULL)
+               return TRUE;
+
+       return (
+           src->provider->nstat_reporting_allowed( src->cookie,
+               state->ncs_provider_filters[src->provider->nstat_provider_id])
+       );
+}
+
+
 static errno_t
 nstat_control_connect(
        kern_ctl_ref            kctl,
@@ -2350,7 +2740,7 @@ static errno_t
 nstat_control_disconnect(
        __unused kern_ctl_ref   kctl,
        __unused u_int32_t              unit,
-       void                    *uinfo)
+       void                                    *uinfo)
 {
        u_int32_t       watching;
        nstat_control_state     *state = (nstat_control_state*)uinfo;
@@ -2385,6 +2775,12 @@ nstat_control_disconnect(
        // set cleanup flags
        state->ncs_flags |= NSTAT_FLAG_CLEANUP;
        
+       if (state->ncs_accumulated)
+       {
+               mbuf_freem(state->ncs_accumulated);
+               state->ncs_accumulated = NULL;
+       }
+       
        // Copy out the list of sources
        nstat_src       *srcs = state->ncs_srcs;
        state->ncs_srcs = NULL;
@@ -2441,10 +2837,10 @@ nstat_control_send_counts(
        nstat_control_state     *state,
        nstat_src               *src,
        unsigned long long      context,
+       u_int16_t hdr_flags,
        int *gone)
-{      
+{
        nstat_msg_src_counts counts;
-       int localgone = 0;
        errno_t result = 0;
 
        /* Some providers may not have any counts to send */
@@ -2453,33 +2849,68 @@ nstat_control_send_counts(
 
        bzero(&counts, sizeof(counts));
        counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS;
+       counts.hdr.length = sizeof(counts);
+       counts.hdr.flags = hdr_flags;
        counts.hdr.context = context;
        counts.srcref = src->srcref;
        
-       if (src->provider->nstat_counts(src->cookie, &counts.counts,
-           &localgone) == 0) {
+       if (src->provider->nstat_counts(src->cookie, &counts.counts, gone) == 0)
+       {
                if ((src->filter & NSTAT_FILTER_NOZEROBYTES) &&
                    counts.counts.nstat_rxbytes == 0 && 
-                   counts.counts.nstat_txbytes == 0) {
+                   counts.counts.nstat_txbytes == 0)
+               {
                        result = EAGAIN;
-               } else {
+               }
+               else
+               {
                        result = ctl_enqueuedata(state->ncs_kctl,
                            state->ncs_unit, &counts, sizeof(counts),
                            CTL_DATA_EOR);
                        if (result != 0)
-                               nstat_srccountfailures += 1;
+                               nstat_stats.nstat_sendcountfailures += 1;
                }
        }
-       if (gone)
-               *gone = localgone;
        return result;
 }
 
+static errno_t
+nstat_control_append_counts(
+       nstat_control_state     *state,
+       nstat_src                       *src,
+       int                                     *gone)
+{
+       /* Some providers may not have any counts to send */
+       if (!src->provider->nstat_counts) return 0;
+       
+       nstat_msg_src_counts counts;
+       bzero(&counts, sizeof(counts));
+       counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS;
+       counts.hdr.length = sizeof(counts);
+       counts.srcref = src->srcref;
+       
+       errno_t result = 0;
+       result = src->provider->nstat_counts(src->cookie, &counts.counts, gone);
+       if (result != 0)
+       {
+               return result;
+       }
+       
+       if ((src->filter & NSTAT_FILTER_NOZEROBYTES) == NSTAT_FILTER_NOZEROBYTES &&
+               counts.counts.nstat_rxbytes == 0 && counts.counts.nstat_txbytes == 0)
+       {
+               return EAGAIN;
+       }
+       
+       return nstat_accumulate_msg(state, &counts.hdr, counts.hdr.length);
+}
+
 static int
 nstat_control_send_description(
        nstat_control_state     *state,
        nstat_src                       *src,
-       u_int64_t                       context)
+       u_int64_t                       context,
+       u_int16_t                       hdr_flags)
 {
        // Provider doesn't support getting the descriptor? Done.
        if (src->provider->nstat_descriptor_length == 0 ||
@@ -2513,19 +2944,194 @@ nstat_control_send_description(
 
        desc->hdr.context = context;
        desc->hdr.type = NSTAT_MSG_TYPE_SRC_DESC;
+       desc->hdr.length = size;
+       desc->hdr.flags = hdr_flags;
        desc->srcref = src->srcref;
        desc->provider = src->provider->nstat_provider_id;
 
        result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR);
        if (result != 0)
        {
-               nstat_descriptionfailures += 1;
+               nstat_stats.nstat_descriptionfailures += 1;
                mbuf_freem(msg);
        }
 
        return result;
 }
 
+static errno_t
+nstat_control_append_description(
+       nstat_control_state     *state,
+       nstat_src                       *src)
+{
+       size_t  size = offsetof(nstat_msg_src_description, data) + src->provider->nstat_descriptor_length;
+       if (size > 512 || src->provider->nstat_descriptor_length == 0 ||
+               src->provider->nstat_copy_descriptor == NULL)
+       {
+               return EOPNOTSUPP;
+       }
+       
+       // Fill out a buffer on the stack, we will copy to the mbuf later
+       u_int64_t buffer[size/sizeof(u_int64_t)  + 1]; // u_int64_t to ensure alignment
+       bzero(buffer, size);
+       
+       nstat_msg_src_description       *desc = (nstat_msg_src_description*)buffer;
+       desc->hdr.type = NSTAT_MSG_TYPE_SRC_DESC;
+       desc->hdr.length = size;
+       desc->srcref = src->srcref;
+       desc->provider = src->provider->nstat_provider_id;
+       
+       errno_t result = 0;
+       // Fill in the description
+       // Query the provider for the provider specific bits
+       result = src->provider->nstat_copy_descriptor(src->cookie, desc->data,
+                               src->provider->nstat_descriptor_length);
+       if (result != 0)
+       {
+               return result;
+       }
+       
+       return nstat_accumulate_msg(state, &desc->hdr, size);
+}
+
+static int
+nstat_control_send_update(
+       nstat_control_state     *state,
+       nstat_src                       *src,
+       u_int64_t                       context,
+       u_int16_t               hdr_flags,
+       int                                     *gone)
+{
+       // Provider doesn't support getting the descriptor or counts? Done.
+       if ((src->provider->nstat_descriptor_length == 0 ||
+                src->provider->nstat_copy_descriptor == NULL) &&
+               src->provider->nstat_counts == NULL)
+       {
+               return EOPNOTSUPP;
+       }
+       
+       // Allocate storage for the descriptor message
+       mbuf_t                  msg;
+       unsigned int    one = 1;
+       u_int32_t               size = offsetof(nstat_msg_src_update, data) +
+                                                  src->provider->nstat_descriptor_length;
+       if (mbuf_allocpacket(MBUF_DONTWAIT, size, &one, &msg) != 0)
+       {
+               return ENOMEM;
+       }
+       
+       nstat_msg_src_update    *desc = (nstat_msg_src_update*)mbuf_data(msg);
+       bzero(desc, size);
+       desc->hdr.context = context;
+       desc->hdr.type = NSTAT_MSG_TYPE_SRC_UPDATE;
+       desc->hdr.length = size;
+       desc->hdr.flags = hdr_flags;
+       desc->srcref = src->srcref;
+       desc->provider = src->provider->nstat_provider_id;
+       
+       mbuf_setlen(msg, size);
+       mbuf_pkthdr_setlen(msg, mbuf_len(msg));
+       
+       errno_t result = 0;
+       if (src->provider->nstat_descriptor_length != 0 && src->provider->nstat_copy_descriptor)
+       {
+               // Query the provider for the provider specific bits
+               result = src->provider->nstat_copy_descriptor(src->cookie, desc->data,
+                                                       src->provider->nstat_descriptor_length);
+               if (result != 0)
+               {
+                       mbuf_freem(msg);
+                       return result;
+               }
+       }
+       
+       if (src->provider->nstat_counts)
+       {
+               result = src->provider->nstat_counts(src->cookie, &desc->counts, gone);
+               if (result == 0)
+               {
+                       if ((src->filter & NSTAT_FILTER_NOZEROBYTES) == NSTAT_FILTER_NOZEROBYTES &&
+                               desc->counts.nstat_rxbytes == 0 && desc->counts.nstat_txbytes == 0)
+                       {
+                               result = EAGAIN;
+                       }
+                       else
+                       {
+                               result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR);
+                       }
+               }
+       }
+       
+       if (result != 0)
+       {
+               nstat_stats.nstat_srcupatefailures += 1;
+               mbuf_freem(msg);
+       }
+
+       return result;
+}
+
+static errno_t
+nstat_control_append_update(
+       nstat_control_state     *state,
+       nstat_src                       *src,
+       int                                     *gone)
+{
+       size_t  size = offsetof(nstat_msg_src_update, data) + src->provider->nstat_descriptor_length;
+       if (size > 512 || ((src->provider->nstat_descriptor_length == 0 ||
+               src->provider->nstat_copy_descriptor == NULL) &&
+               src->provider->nstat_counts == NULL))
+       {
+               return EOPNOTSUPP;
+       }
+       
+       // Fill out a buffer on the stack, we will copy to the mbuf later
+       u_int64_t buffer[size/sizeof(u_int64_t)  + 1]; // u_int64_t to ensure alignment
+       bzero(buffer, size);
+       
+       nstat_msg_src_update    *desc = (nstat_msg_src_update*)buffer;
+       desc->hdr.type = NSTAT_MSG_TYPE_SRC_UPDATE;
+       desc->hdr.length = size;
+       desc->srcref = src->srcref;
+       desc->provider = src->provider->nstat_provider_id;
+       
+       errno_t result = 0;
+       // Fill in the description
+       if (src->provider->nstat_descriptor_length != 0 && src->provider->nstat_copy_descriptor)
+       {
+               // Query the provider for the provider specific bits
+               result = src->provider->nstat_copy_descriptor(src->cookie, desc->data,
+                                       src->provider->nstat_descriptor_length);
+               if (result != 0)
+               {
+                       nstat_stats.nstat_copy_descriptor_failures++;
+                       if (nstat_debug != 0)
+                               printf("%s: src->provider->nstat_copy_descriptor: %d\n", __func__, result);
+                       return result;
+               }
+       }
+       
+       if (src->provider->nstat_counts)
+       {
+               result = src->provider->nstat_counts(src->cookie, &desc->counts, gone);
+               if (result != 0)
+               {
+                       nstat_stats.nstat_provider_counts_failures++;
+                       if (nstat_debug != 0)
+                               printf("%s: src->provider->nstat_counts: %d\n", __func__, result);
+                       return result;
+               }
+               
+               if ((src->filter & NSTAT_FILTER_NOZEROBYTES) == NSTAT_FILTER_NOZEROBYTES &&
+                       desc->counts.nstat_rxbytes == 0 && desc->counts.nstat_txbytes == 0)
+               {
+                       return EAGAIN;
+               }
+       }
+       
+       return nstat_accumulate_msg(state, &desc->hdr, size);
+}
+
 static errno_t
 nstat_control_send_removed(
        nstat_control_state     *state,
@@ -2536,12 +3142,13 @@ nstat_control_send_removed(
 
        bzero(&removed, sizeof(removed));
        removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED;
+       removed.hdr.length = sizeof(removed);
        removed.hdr.context = 0;
        removed.srcref = src->srcref;
        result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &removed,
            sizeof(removed), CTL_DATA_EOR | CTL_DATA_CRIT);
        if (result != 0)
-               nstat_msgremovedfailures += 1;
+               nstat_stats.nstat_msgremovedfailures += 1;
 
        return result;
 }
@@ -2610,9 +3217,13 @@ nstat_control_handle_add_all(
                return EINVAL;
        }
        
+
        nstat_msg_add_all_srcs  *req = mbuf_data(m);
+       if (req->provider > NSTAT_PROVIDER_LAST) return ENOENT;
+
        nstat_provider                  *provider = nstat_find_provider_by_id(req->provider);
-       
+       u_int64_t               filter = req->filter;
+
        if (!provider) return ENOENT;
        if (provider->nstat_watcher_add == NULL) return ENOTSUP;
        
@@ -2631,85 +3242,104 @@ nstat_control_handle_add_all(
        lck_mtx_unlock(&state->mtx);
        if (result != 0) return result;
 
+       state->ncs_provider_filters[req->provider] = filter;
+
        result = provider->nstat_watcher_add(state);
        if (result != 0)
        {
+               state->ncs_provider_filters[req->provider] = 0;
                lck_mtx_lock(&state->mtx);
                state->ncs_watching &= ~(1 << provider->nstat_provider_id);
                lck_mtx_unlock(&state->mtx);
        }
        if (result == 0)
-               nstat_enqueue_success(req->hdr.context, state);
+               nstat_enqueue_success(req->hdr.context, state, 0);
        
        return result;
 }
 
 static errno_t
 nstat_control_source_add(
-       u_int64_t                               context,
+       u_int64_t                       context,
        nstat_control_state             *state,
        nstat_provider                  *provider,
-       nstat_provider_cookie_t cookie)
+       nstat_provider_cookie_t         cookie)
 {
-       // Fill out source added message
-       mbuf_t                                  msg = NULL;
-       unsigned int                    one = 1;
+       // Fill out source added message if appropriate
+       mbuf_t                  msg = NULL;
+       nstat_src_ref_t         *srcrefp = NULL;
+
+       u_int64_t               provider_filters =
+           state->ncs_provider_filters[provider->nstat_provider_id];
+       boolean_t               tell_user =
+           ((provider_filters & NSTAT_FILTER_SUPPRESS_SRC_ADDED) == 0);
+       u_int32_t               src_filter =
+           (provider_filters & NSTAT_FILTER_PROVIDER_NOZEROBYTES)
+               ? NSTAT_FILTER_NOZEROBYTES : 0;
+
+       if (tell_user)
+       {
+               unsigned int one = 1;
        
-       if (mbuf_allocpacket(MBUF_DONTWAIT, sizeof(nstat_msg_src_added), &one,
-           &msg) != 0)
-               return ENOMEM;
+               if (mbuf_allocpacket(MBUF_DONTWAIT, sizeof(nstat_msg_src_added),
+                   &one, &msg) != 0)
+                       return ENOMEM;
        
-       mbuf_setlen(msg, sizeof(nstat_msg_src_added));
-       mbuf_pkthdr_setlen(msg, mbuf_len(msg));
-       nstat_msg_src_added     *add = mbuf_data(msg);
-       bzero(add, sizeof(*add));
-       add->hdr.type = NSTAT_MSG_TYPE_SRC_ADDED;
-       add->hdr.context = context;
-       add->provider = provider->nstat_provider_id;
+               mbuf_setlen(msg, sizeof(nstat_msg_src_added));
+               mbuf_pkthdr_setlen(msg, mbuf_len(msg));
+               nstat_msg_src_added     *add = mbuf_data(msg);
+               bzero(add, sizeof(*add));
+               add->hdr.type = NSTAT_MSG_TYPE_SRC_ADDED;
+               add->hdr.length = mbuf_len(msg);
+               add->hdr.context = context;
+               add->provider = provider->nstat_provider_id;
+               srcrefp = &add->srcref;
+       }
        
        // Allocate storage for the source
        nstat_src       *src = OSMalloc(sizeof(*src), nstat_malloc_tag);
        if (src == NULL)
        {
-               mbuf_freem(msg);
+               if (msg) mbuf_freem(msg);
                return ENOMEM;
        }
        
        // Fill in the source, including picking an unused source ref
        lck_mtx_lock(&state->mtx);
-       
-       add->srcref = src->srcref = nstat_control_next_src_ref(state);
+
+       src->srcref = nstat_control_next_src_ref(state);
+       if (srcrefp)
+               *srcrefp = src->srcref;
+
        if (state->ncs_flags & NSTAT_FLAG_CLEANUP || src->srcref == NSTAT_SRC_REF_INVALID)
        {
                lck_mtx_unlock(&state->mtx);
                OSFree(src, sizeof(*src), nstat_malloc_tag);
-               mbuf_freem(msg);
+               if (msg) mbuf_freem(msg);
                return EINVAL;
        }
        src->provider = provider;
        src->cookie = cookie;
-       src->filter = 0;
-       
-       // send the source added message
-       errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg,
-                                       CTL_DATA_EOR);
-       if (result != 0)
+       src->filter = src_filter;
+
+       if (msg)
        {
-               nstat_srcaddedfailures += 1;
-               lck_mtx_unlock(&state->mtx);
-               OSFree(src, sizeof(*src), nstat_malloc_tag);
-               mbuf_freem(msg);
-               return result;
+               // send the source added message if appropriate
+               errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg,
+                                               CTL_DATA_EOR);
+               if (result != 0)
+               {
+                       nstat_stats.nstat_srcaddedfailures += 1;
+                       lck_mtx_unlock(&state->mtx);
+                       OSFree(src, sizeof(*src), nstat_malloc_tag);
+                       mbuf_freem(msg);
+                       return result;
+               }
        }
-       
-       // Put the      source in the list
+       // Put the source in the list
        src->next = state->ncs_srcs;
        state->ncs_srcs = src;
        
-       // send the description message
-       // not useful as the source is often not complete
-//     nstat_control_send_description(state, src, 0);
-       
        lck_mtx_unlock(&state->mtx);
        
        return 0;
@@ -2770,64 +3400,121 @@ nstat_control_handle_query_request(
        {
                return EINVAL;
        }
+
+       const boolean_t all_srcs = (req.srcref == NSTAT_SRC_REF_ALL);
        
        lck_mtx_lock(&state->mtx);
-       if (req.srcref == NSTAT_SRC_REF_ALL)
+
+       if (all_srcs)
+       {
                state->ncs_flags |= NSTAT_FLAG_REQCOUNTS;
+       }
        nstat_src       **srcpp = &state->ncs_srcs;
-       while (*srcpp != NULL)
-       {
-               int     gone;
+       u_int64_t       src_count = 0;
+       boolean_t       partial = FALSE;
+
+       /*
+        * Error handling policy and sequence number generation is folded into
+        * nstat_control_begin_query.
+        */
+       partial = nstat_control_begin_query(state, &req.hdr);
 
+       while (*srcpp != NULL
+               && (!partial || src_count < QUERY_CONTINUATION_SRC_COUNT))
+       {
+               nstat_src       *src = NULL;
+               int                     gone;
+               
+               src = *srcpp;
                gone = 0;
                // XXX ignore IFACE types?
-               if (req.srcref == NSTAT_SRC_REF_ALL ||
-                   (*srcpp)->srcref == req.srcref)
+               if (all_srcs || src->srcref == req.srcref)
                {
-                       gone = 0;
-
-                       result = nstat_control_send_counts(state, *srcpp,
-                           req.hdr.context, &gone);
-                       
-                       // If the counts message failed to enqueue then we should clear our flag so
-                       // that a client doesn't miss anything on idle cleanup.
-                       if (result != 0)
-                               state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
-                       
-                       if (gone)
+                       if (nstat_control_reporting_allowed(state, src)
+                           && (!partial || !all_srcs || src->seq != state->ncs_seq))
                        {
-                               // send one last descriptor message so client may see last state
-                               // If we can't send the notification now, it
-                               // will be sent in the idle cleanup.
-                               result = nstat_control_send_description(state, *srcpp, 0);
-                               if (result != 0 && nstat_debug)
-                                       printf("%s - nstat_control_send_description() %d\n",
-                                               __func__, result);
-                               if (result != 0) {
+                               if (all_srcs &&
+                                       (req.hdr.flags & NSTAT_MSG_HDR_FLAG_SUPPORTS_AGGREGATE) != 0)
+                               {
+                                       result = nstat_control_append_counts(state, src, &gone);
+                               }
+                               else
+                               {
+                                       result = nstat_control_send_counts(state, src, req.hdr.context, 0, &gone);
+                               }
+
+                               if (ENOMEM == result || ENOBUFS == result)
+                               {
+                                       /*
+                                        * If the counts message failed to
+                                        * enqueue then we should clear our flag so
+                                        * that a client doesn't miss anything on
+                                        * idle cleanup.  We skip the "gone"
+                                        * processing in the hope that we may
+                                        * catch it another time.
+                                        */
                                        state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
                                        break;
-                               }       
-
-                               // pull src out of the list
-                               nstat_src       *src = *srcpp;
-                               *srcpp = src->next;
-                               
-                               src->next = dead_srcs;
-                               dead_srcs = src;
+                               }
+                               if (partial)
+                               {
+                                       /*
+                                        * We skip over hard errors and
+                                        * filtered sources.
+                                        */
+                                       src->seq = state->ncs_seq;
+                                       src_count++;
+                               }
                        }
-                       
-                       if (req.srcref != NSTAT_SRC_REF_ALL)
-                               break;
                }
                
-               if (!gone)
+               if (gone)
+               {
+                       // send one last descriptor message so client may see last state
+                       // If we can't send the notification now, it
+                       // will be sent in the idle cleanup.
+                       result = nstat_control_send_description(state, *srcpp, 0, 0);
+                       if (result != 0)
+                       {
+                               nstat_stats.nstat_control_send_description_failures++;
+                               if (nstat_debug != 0)
+                                       printf("%s - nstat_control_send_description() %d\n", __func__, result);
+                               state->ncs_flags &= ~NSTAT_FLAG_REQCOUNTS;
+                               break;
+                       }
+                       
+                       // pull src out of the list
+                       *srcpp = src->next;
+                       
+                       src->next = dead_srcs;
+                       dead_srcs = src;
+               }
+               else
+               {
                        srcpp = &(*srcpp)->next;
+               }
+               
+               if (!all_srcs && req.srcref == src->srcref)
+               {
+                       break;
+               }
        }
-       lck_mtx_unlock(&state->mtx);
-       
+       nstat_flush_accumulated_msgs(state);
+
+       u_int16_t flags = 0;
        if (req.srcref == NSTAT_SRC_REF_ALL)
+               flags = nstat_control_end_query(state, *srcpp, partial);
+
+       lck_mtx_unlock(&state->mtx);
+
+       /*
+        * If an error occurred enqueueing data, then allow the error to
+        * propagate to nstat_control_send. This way, the error is sent to
+        * user-level.
+        */
+       if (all_srcs && ENOMEM != result && ENOBUFS != result)
        {
-               nstat_enqueue_success(req.hdr.context, state);
+               nstat_enqueue_success(req.hdr.context, state, flags);
                result = 0;
        }
        
@@ -2848,37 +3535,86 @@ nstat_control_handle_query_request(
 static errno_t
 nstat_control_handle_get_src_description(
        nstat_control_state     *state,
-       mbuf_t                  m)
+       mbuf_t                          m)
 {
        nstat_msg_get_src_description   req;
-       errno_t result = 0;
+       errno_t result = ENOENT;
        nstat_src *src;
 
        if (mbuf_copydata(m, 0, sizeof(req), &req) != 0)
        {
                return EINVAL;
        }
-       
+
        lck_mtx_lock(&state->mtx);
-       if (req.srcref == NSTAT_SRC_REF_ALL)
-               state->ncs_flags |= NSTAT_FLAG_REQDESCS;
-       for (src = state->ncs_srcs; src; src = src->next)
-               if (req.srcref == NSTAT_SRC_REF_ALL ||
-                   src->srcref == req.srcref)
+       u_int64_t src_count = 0;
+       boolean_t partial = FALSE;
+       const boolean_t all_srcs = (req.srcref == NSTAT_SRC_REF_ALL);
+
+       /*
+        * Error handling policy and sequence number generation is folded into
+        * nstat_control_begin_query.
+        */
+       partial = nstat_control_begin_query(state, &req.hdr);
+
+       for (src = state->ncs_srcs;
+            src && (!partial || src_count < QUERY_CONTINUATION_SRC_COUNT);
+            src = src->next)
+       {
+               if (all_srcs || src->srcref == req.srcref)
                {
-                       result = nstat_control_send_description(state, src,
-                           req.hdr.context);
-                       if (result != 0)
-                               state->ncs_flags &= ~NSTAT_FLAG_REQDESCS;
-                       if (req.srcref != NSTAT_SRC_REF_ALL)
+                       if (nstat_control_reporting_allowed(state, src)
+                           && (!all_srcs || !partial ||  src->seq != state->ncs_seq))
+                       {
+                               if ((req.hdr.flags & NSTAT_MSG_HDR_FLAG_SUPPORTS_AGGREGATE) != 0 && all_srcs)
+                               {
+                                       result = nstat_control_append_description(state, src);
+                               }
+                               else
+                               {
+                                       result = nstat_control_send_description(state, src, req.hdr.context, 0);
+                               }
+
+                               if (ENOMEM == result || ENOBUFS == result)
+                               {
+                                       /*
+                                        * If the description message failed to
+                                        * enqueue then we give up for now.
+                                        */
+                                       break;
+                               }
+                               if (partial)
+                               {
+                                       /*
+                                        * Note, we skip over hard errors and
+                                        * filtered sources.
+                                        */
+                                       src->seq = state->ncs_seq;
+                                       src_count++;
+                               }
+                       }
+                       
+                       if (!all_srcs)
+                       {
                                break;
+                       }
                }
+       }
+       nstat_flush_accumulated_msgs(state);
+
+       u_int16_t flags = 0;
+       if (req.srcref == NSTAT_SRC_REF_ALL)
+               flags = nstat_control_end_query(state, src, partial);
+
        lck_mtx_unlock(&state->mtx);
-       if (req.srcref != NSTAT_SRC_REF_ALL && src == NULL)
-               result = ENOENT;
-       else if (req.srcref == NSTAT_SRC_REF_ALL)
+       /*
+        * If an error occurred enqueueing data, then allow the error to
+        * propagate to nstat_control_send. This way, the error is sent to
+        * user-level.
+        */
+       if (all_srcs && ENOMEM != result && ENOBUFS != result)
        {
-               nstat_enqueue_success(req.hdr.context, state);
+               nstat_enqueue_success(req.hdr.context, state, flags);
                result = 0;
        }
        
@@ -2911,7 +3647,231 @@ nstat_control_handle_set_filter(
                return ENOENT;
 
        return 0;
+}
+
+static void
+nstat_send_error(
+    nstat_control_state *state,
+    u_int64_t context,
+    u_int32_t error)
+{
+       errno_t result;
+       struct nstat_msg_error  err;
+
+       bzero(&err, sizeof(err));
+       err.hdr.type = NSTAT_MSG_TYPE_ERROR;
+       err.hdr.length = sizeof(err);
+       err.hdr.context = context;
+       err.error = error;
+
+       result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &err,
+                                   sizeof(err), CTL_DATA_EOR | CTL_DATA_CRIT);
+       if (result != 0)
+               nstat_stats.nstat_msgerrorfailures++;
+}
+
+static boolean_t
+nstat_control_begin_query(
+    nstat_control_state *state,
+    const nstat_msg_hdr *hdrp)
+{
+       boolean_t partial = FALSE;
+
+       if (hdrp->flags & NSTAT_MSG_HDR_FLAG_CONTINUATION)
+       {
+               /* A partial query all has been requested. */
+               partial = TRUE;
+
+               if (state->ncs_context != hdrp->context)
+               {
+                       if (state->ncs_context != 0)
+                               nstat_send_error(state, state->ncs_context, EAGAIN);
+
+                       /* Initialize state for a partial query all. */
+                       state->ncs_context = hdrp->context;
+                       state->ncs_seq++;
+               }
+       }
+       else if (state->ncs_context != 0)
+       {
+               /*
+                * A continuation of a paced-query was in progress. Send that
+                * context an error and reset the state.  If the same context
+                * has changed its mind, just send the full query results.
+                */
+               if (state->ncs_context != hdrp->context)
+                       nstat_send_error(state, state->ncs_context, EAGAIN);
+       }
+
+       return partial;
+}
+
+static u_int16_t
+nstat_control_end_query(
+    nstat_control_state *state,
+    nstat_src *last_src,
+    boolean_t partial)
+{
+       u_int16_t flags = 0;
+
+       if (last_src == NULL || !partial)
+       {
+               /*
+                * We iterated through the entire srcs list or exited early
+                * from the loop when a partial update was not requested (an
+                * error occurred), so clear context to indicate internally
+                * that the query is finished.
+                */
+               state->ncs_context = 0;
+       }
+       else
+       {
+               /*
+                * Indicate to userlevel to make another partial request as
+                * there are still sources left to be reported.
+                */
+               flags |= NSTAT_MSG_HDR_FLAG_CONTINUATION;
+       }
+
+       return flags;
+}
+
+static errno_t
+nstat_control_handle_get_update(
+    nstat_control_state                *state,
+    mbuf_t                                     m)
+{
+       nstat_msg_query_src_req req;
+
+       if (mbuf_copydata(m, 0, sizeof(req), &req) != 0)
+       {
+               return EINVAL;
+       }
+
+       lck_mtx_lock(&state->mtx);
+       
+       state->ncs_flags |= NSTAT_FLAG_SUPPORTS_UPDATES;
+
+       errno_t         result = ENOENT;
+       nstat_src       *src;
+       nstat_src       *dead_srcs = NULL;
+       nstat_src       **srcpp = &state->ncs_srcs;
+       u_int64_t src_count = 0;
+       boolean_t partial = FALSE;
+
+       /*
+        * Error handling policy and sequence number generation is folded into
+        * nstat_control_begin_query.
+        */
+       partial = nstat_control_begin_query(state, &req.hdr);
+
+       while (*srcpp != NULL
+           && (FALSE == partial
+               || src_count < QUERY_CONTINUATION_SRC_COUNT))
+       {
+               int                     gone;
+               
+               gone = 0;
+               src = *srcpp;
+               if (nstat_control_reporting_allowed(state, src))
+               {
+                       /* skip this source if it has the current state
+                        * sequence number as it's already been reported in
+                        * this query-all partial sequence. */
+                       if (req.srcref == NSTAT_SRC_REF_ALL
+                           && (FALSE == partial || src->seq != state->ncs_seq))
+                       {
+                               result = nstat_control_append_update(state, src, &gone);
+                               if (ENOMEM == result || ENOBUFS == result)
+                               {
+                                       /*
+                                        * If the update message failed to
+                                        * enqueue then give up.
+                                        */
+                                       break;
+                               }
+                               if (partial)
+                               {
+                                       /*
+                                        * We skip over hard errors and
+                                        * filtered sources.
+                                        */
+                                       src->seq = state->ncs_seq;
+                                       src_count++;
+                               }
+                       }
+                       else if (src->srcref == req.srcref)
+                       {
+                               result = nstat_control_send_update(state, src, req.hdr.context, 0, &gone);
+                       }
+               }
+               
+               if (gone)
+               {
+                       // pull src out of the list
+                       *srcpp = src->next;
+
+                       src->next = dead_srcs;
+                       dead_srcs = src;
+               }
+               else
+               {
+                       srcpp = &(*srcpp)->next;
+               }
+               
+               if (req.srcref != NSTAT_SRC_REF_ALL && req.srcref == src->srcref)
+               {
+                       break;
+               }
+       }
+
+       nstat_flush_accumulated_msgs(state);
+
+
+       u_int16_t flags = 0;
+       if (req.srcref == NSTAT_SRC_REF_ALL)
+               flags = nstat_control_end_query(state, *srcpp, partial);
+
+       lck_mtx_unlock(&state->mtx);
+       /*
+        * If an error occurred enqueueing data, then allow the error to
+        * propagate to nstat_control_send. This way, the error is sent to
+        * user-level.
+        */
+       if (req.srcref == NSTAT_SRC_REF_ALL && ENOMEM != result && ENOBUFS != result)
+       {
+               nstat_enqueue_success(req.hdr.context, state, flags);
+               result = 0;
+       }
+
+       while (dead_srcs)
+       {
+               src = dead_srcs;
+               dead_srcs = src->next;
+               
+               // release src and send notification
+               nstat_control_cleanup_source(state, src, FALSE);
+       }
+       
+       return result;
+}
 
+static errno_t
+nstat_control_handle_subscribe_sysinfo(
+    nstat_control_state                *state)
+{
+       errno_t result = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0);
+       
+       if (result != 0)
+       {
+               return result;
+       }
+       
+       lck_mtx_lock(&state->mtx);
+       state->ncs_flags |= NSTAT_FLAG_SYSINFO_SUBSCRIBED;
+       lck_mtx_unlock(&state->mtx);
+       
+       return 0;
 }
 
 static errno_t
@@ -2927,7 +3887,7 @@ nstat_control_send(
        struct nstat_msg_hdr    storage;
        errno_t                                 result = 0;
        
-       if (mbuf_pkthdr_len(m) < sizeof(hdr))
+       if (mbuf_pkthdr_len(m) < sizeof(*hdr))
        {
                // Is this the right thing to do?
                mbuf_freem(m);
@@ -2944,6 +3904,19 @@ nstat_control_send(
                hdr = &storage;
        }
        
+       // Legacy clients may not set the length
+       // Those clients are likely not setting the flags either
+       // Fix everything up so old clients continue to work
+       if (hdr->length != mbuf_pkthdr_len(m))
+       {
+               hdr->flags = 0;
+               hdr->length = mbuf_pkthdr_len(m);
+               if (hdr == &storage)
+               {
+                       mbuf_copyback(m, 0, sizeof(*hdr), hdr, MBUF_DONTWAIT);
+               }
+       }
+       
        switch (hdr->type)
        {
                case NSTAT_MSG_TYPE_ADD_SRC:
@@ -2965,11 +3938,19 @@ nstat_control_send(
                case NSTAT_MSG_TYPE_GET_SRC_DESC:
                        result = nstat_control_handle_get_src_description(state, m);
                        break;
-
+               
                case NSTAT_MSG_TYPE_SET_FILTER:
                        result = nstat_control_handle_set_filter(state, m);
                        break;
-
+               
+               case NSTAT_MSG_TYPE_GET_UPDATE:
+                       result = nstat_control_handle_get_update(state, m);
+                       break;
+               
+               case NSTAT_MSG_TYPE_SUBSCRIBE_SYSINFO:
+                       result = nstat_control_handle_subscribe_sysinfo(state);
+                       break;
+               
                default:
                        result = EINVAL;
                        break;
@@ -2981,16 +3962,34 @@ nstat_control_send(
                
                bzero(&err, sizeof(err));
                err.hdr.type = NSTAT_MSG_TYPE_ERROR;
+               err.hdr.length = sizeof(err) + mbuf_pkthdr_len(m);
                err.hdr.context = hdr->context;
                err.error = result;
                
-               result = ctl_enqueuedata(kctl, unit, &err, sizeof(err),
-                                       CTL_DATA_EOR | CTL_DATA_CRIT);
+               if (mbuf_prepend(&m, sizeof(err), MBUF_DONTWAIT) == 0 &&
+                       mbuf_copyback(m, 0, sizeof(err), &err, MBUF_DONTWAIT) == 0)
+               {
+                       result = ctl_enqueuembuf(kctl, unit, m, CTL_DATA_EOR | CTL_DATA_CRIT);
+                       if (result != 0)
+                       {
+                               mbuf_freem(m);
+                       }
+                       m = NULL;
+               }
+               
                if (result != 0)
-                       nstat_descriptionfailures += 1;
+               {
+                       // Unable to prepend the error to the request - just send the error
+                       err.hdr.length = sizeof(err);
+                       result = ctl_enqueuedata(kctl, unit, &err, sizeof(err),
+                                               CTL_DATA_EOR | CTL_DATA_CRIT);
+                       if (result != 0)
+                               nstat_stats.nstat_msgerrorfailures += 1;
+               }
+               nstat_stats.nstat_handle_msg_failures += 1;
        }
        
-       mbuf_freem(m);
+       if (m) mbuf_freem(m);
        
        return result;
 }
index 2aad07b656dd131f4f9e26d0f702ad96c7691181..a6bcec06b8c7fb902d3dee068875754522732568 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2010-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/in.h>
 #include <net/if.h>
 #include <net/if_var.h>
+#include <netinet/tcp.h>
 
 #ifdef PRIVATE
 #pragma pack(push, 4)
 #pragma mark -- Common Data Structures --
 
-#define __NSTAT_REVISION__     6
+#define __NSTAT_REVISION__     7
 
 typedef        u_int32_t       nstat_provider_id_t;
 typedef        u_int32_t       nstat_src_ref_t;
@@ -75,18 +76,20 @@ typedef struct nstat_sysinfo_keyval
                        int64_t nstat_sysinfo_scalar;
                        double  nstat_sysinfo_distribution;
        } u;
-} nstat_sysinfo_keyval;
+} __attribute__((packed)) nstat_sysinfo_keyval;
 
 #define        NSTAT_SYSINFO_FLAG_SCALAR       0x0001
 #define        NSTAT_SYSINFO_FLAG_DISTRIBUTION 0x0002
 
+#define NSTAT_MAX_MSG_SIZE     4096
+
 typedef struct nstat_sysinfo_counts
 {
        /* Counters */
        u_int32_t       nstat_sysinfo_len;
        u_int32_t       pad;
        u_int8_t        nstat_sysinfo_keyvals[];
-} nstat_sysinfo_counts;
+} __attribute__((packed)) nstat_sysinfo_counts;
 
 enum
 {
@@ -101,19 +104,66 @@ enum
        ,NSTAT_SYSINFO_KEY_RECV_PLR             = 9
        ,NSTAT_SYSINFO_KEY_SEND_TLRTO           = 10
        ,NSTAT_SYSINFO_KEY_SEND_REORDERRATE     = 11
-
+       ,NSTAT_SYSINFO_CONNECTION_ATTEMPTS      = 12
+       ,NSTAT_SYSINFO_CONNECTION_ACCEPTS       = 13
+       ,NSTAT_SYSINFO_ECN_CLIENT_SETUP         = 14
+       ,NSTAT_SYSINFO_ECN_SERVER_SETUP         = 15
+       ,NSTAT_SYSINFO_ECN_CLIENT_SUCCESS       = 16
+       ,NSTAT_SYSINFO_ECN_SERVER_SUCCESS       = 17
+       ,NSTAT_SYSINFO_ECN_NOT_SUPPORTED        = 18
+       ,NSTAT_SYSINFO_ECN_LOST_SYN             = 19
+       ,NSTAT_SYSINFO_ECN_LOST_SYNACK          = 20
+       ,NSTAT_SYSINFO_ECN_RECV_CE              = 21
+       ,NSTAT_SYSINFO_ECN_RECV_ECE             = 22
+       ,NSTAT_SYSINFO_ECN_SENT_ECE             = 23
+       ,NSTAT_SYSINFO_ECN_CONN_RECV_CE         = 24
+       ,NSTAT_SYSINFO_ECN_CONN_PLNOCE          = 25
+       ,NSTAT_SYSINFO_ECN_CONN_PL_CE           = 26
+       ,NSTAT_SYSINFO_ECN_CONN_NOPL_CE         = 27
+       ,NSTAT_SYSINFO_MBUF_16KB_TOTAL          = 28
+       ,NSTAT_SYSINFO_ECN_CLIENT_ENABLED       = 29
+       ,NSTAT_SYSINFO_ECN_SERVER_ENABLED       = 30
+       ,NSTAT_SYSINFO_ECN_CONN_RECV_ECE        = 31
+       ,NSTAT_SYSINFO_MBUF_MEM_RELEASED        = 32
+       ,NSTAT_SYSINFO_MBUF_DRAIN_CNT           = 33
+       ,NSTAT_SYSINFO_TFO_SYN_DATA_RCV         = 34
+       ,NSTAT_SYSINFO_TFO_COOKIE_REQ_RCV       = 35
+       ,NSTAT_SYSINFO_TFO_COOKIE_SENT          = 36
+       ,NSTAT_SYSINFO_TFO_COOKIE_INVALID       = 37
+       ,NSTAT_SYSINFO_TFO_COOKIE_REQ           = 38
+       ,NSTAT_SYSINFO_TFO_COOKIE_RCV           = 39
+       ,NSTAT_SYSINFO_TFO_SYN_DATA_SENT        = 40
+       ,NSTAT_SYSINFO_TFO_SYN_DATA_ACKED       = 41
+       ,NSTAT_SYSINFO_TFO_SYN_LOSS             = 42
+       ,NSTAT_SYSINFO_TFO_BLACKHOLE            = 43
 };
 
 #pragma mark -- Network Statistics Providers --
 
+
+// Interface properties
+
+#define NSTAT_IFNET_IS_UNKNOWN_TYPE      0x01
+#define NSTAT_IFNET_IS_LOOPBACK          0x02
+#define NSTAT_IFNET_IS_CELLULAR          0x04
+#define NSTAT_IFNET_IS_WIFI              0x08
+#define NSTAT_IFNET_IS_WIRED             0x10
+#define NSTAT_IFNET_IS_AWDL              0x20
+#define NSTAT_IFNET_IS_EXPENSIVE         0x40
+#define NSTAT_IFNET_IS_VPN               0x80
+
+
 enum
 {
-       NSTAT_PROVIDER_ROUTE    = 1
+       NSTAT_PROVIDER_NONE     = 0
+       ,NSTAT_PROVIDER_ROUTE   = 1
        ,NSTAT_PROVIDER_TCP     = 2
        ,NSTAT_PROVIDER_UDP     = 3
        ,NSTAT_PROVIDER_IFNET   = 4
        ,NSTAT_PROVIDER_SYSINFO = 5
 };
+#define NSTAT_PROVIDER_LAST NSTAT_PROVIDER_SYSINFO
+#define NSTAT_PROVIDER_COUNT (NSTAT_PROVIDER_LAST+1)
 
 typedef struct nstat_route_add_param
 {
@@ -182,6 +232,8 @@ typedef struct nstat_tcp_descriptor
        uint8_t         uuid[16];
        uint8_t         euuid[16];
        uint8_t         vuuid[16];
+       struct tcp_conn_status connstatus;
+       uint16_t        ifnet_properties        __attribute__((aligned(4)));
 } nstat_tcp_descriptor;
 
 typedef struct nstat_tcp_add_param     nstat_udp_add_param;
@@ -215,6 +267,7 @@ typedef struct nstat_udp_descriptor
        uint8_t         uuid[16];
        uint8_t         euuid[16];
        uint8_t         vuuid[16];
+       uint16_t        ifnet_properties;
 } nstat_udp_descriptor;
 
 typedef struct nstat_route_descriptor
@@ -255,16 +308,146 @@ typedef struct nstat_ifnet_add_param
        u_int64_t       threshold;
 } nstat_ifnet_add_param;
 
+typedef struct nstat_ifnet_desc_cellular_status
+{
+       u_int32_t valid_bitmask; /* indicates which fields are valid */
+#define NSTAT_IFNET_DESC_CELL_LINK_QUALITY_METRIC_VALID                0x1
+#define NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_BANDWIDTH_VALID     0x2
+#define NSTAT_IFNET_DESC_CELL_UL_MAX_BANDWIDTH_VALID           0x4
+#define NSTAT_IFNET_DESC_CELL_UL_MIN_LATENCY_VALID             0x8
+#define NSTAT_IFNET_DESC_CELL_UL_EFFECTIVE_LATENCY_VALID       0x10
+#define NSTAT_IFNET_DESC_CELL_UL_MAX_LATENCY_VALID             0x20
+#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_VALID             0x40
+#define NSTAT_IFNET_DESC_CELL_UL_BYTES_LOST_VALID              0x80
+#define NSTAT_IFNET_DESC_CELL_UL_MIN_QUEUE_SIZE_VALID          0x100
+#define NSTAT_IFNET_DESC_CELL_UL_AVG_QUEUE_SIZE_VALID          0x200
+#define NSTAT_IFNET_DESC_CELL_UL_MAX_QUEUE_SIZE_VALID          0x400
+#define NSTAT_IFNET_DESC_CELL_DL_EFFECTIVE_BANDWIDTH_VALID     0x800
+#define NSTAT_IFNET_DESC_CELL_DL_MAX_BANDWIDTH_VALID           0x1000
+#define NSTAT_IFNET_DESC_CELL_CONFIG_INACTIVITY_TIME_VALID     0x2000
+#define NSTAT_IFNET_DESC_CELL_CONFIG_BACKOFF_TIME_VALID                0x4000
+       u_int32_t link_quality_metric;
+       u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on
+                                            current activity (bps) */
+       u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth
+                                      (bps) */
+       u_int32_t ul_min_latency; /* min expected uplink latency for first hop
+                                    (ms) */
+       u_int32_t ul_effective_latency; /* current expected uplink latency for
+                                          first hop (ms) */
+       u_int32_t ul_max_latency; /* max expected uplink latency first hop
+                                   (ms) */
+       u_int32_t ul_retxt_level; /* Retransmission metric */
+#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_NONE      1
+#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_LOW       2
+#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_MEDIUM    3
+#define NSTAT_IFNET_DESC_CELL_UL_RETXT_LEVEL_HIGH      4
+
+       u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10
+                                   format */
+       u_int32_t ul_min_queue_size; /* minimum bytes in queue */
+       u_int32_t ul_avg_queue_size; /* average bytes in queue */
+       u_int32_t ul_max_queue_size; /* maximum bytes in queue */
+       u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based
+                                            on current activity (bps) */
+       u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth
+                                      (bps) */
+       u_int32_t config_inactivity_time; /* ms */
+       u_int32_t config_backoff_time; /* new connections backoff time in ms */
+} nstat_ifnet_desc_cellular_status;
+
+typedef struct nstat_ifnet_desc_wifi_status {
+       u_int32_t valid_bitmask;
+#define        NSTAT_IFNET_DESC_WIFI_LINK_QUALITY_METRIC_VALID         0x1
+#define        NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_BANDWIDTH_VALID      0x2
+#define        NSTAT_IFNET_DESC_WIFI_UL_MAX_BANDWIDTH_VALID            0x4
+#define        NSTAT_IFNET_DESC_WIFI_UL_MIN_LATENCY_VALID              0x8
+#define        NSTAT_IFNET_DESC_WIFI_UL_EFFECTIVE_LATENCY_VALID        0x10
+#define        NSTAT_IFNET_DESC_WIFI_UL_MAX_LATENCY_VALID              0x20
+#define        NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_VALID              0x40
+#define        NSTAT_IFNET_DESC_WIFI_UL_ERROR_RATE_VALID               0x80
+#define        NSTAT_IFNET_DESC_WIFI_UL_BYTES_LOST_VALID               0x100
+#define        NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_BANDWIDTH_VALID      0x200
+#define        NSTAT_IFNET_DESC_WIFI_DL_MAX_BANDWIDTH_VALID            0x400
+#define        NSTAT_IFNET_DESC_WIFI_DL_MIN_LATENCY_VALID              0x800
+#define        NSTAT_IFNET_DESC_WIFI_DL_EFFECTIVE_LATENCY_VALID        0x1000
+#define        NSTAT_IFNET_DESC_WIFI_DL_MAX_LATENCY_VALID              0x2000
+#define        NSTAT_IFNET_DESC_WIFI_DL_ERROR_RATE_VALID               0x4000
+#define        NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_VALID            0x8000
+#define        NSTAT_IFNET_DESC_WIFI_CONFIG_MULTICAST_RATE_VALID       0x10000
+#define        NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_COUNT_VALID           0x20000
+#define        NSTAT_IFNET_DESC_WIFI_CONFIG_SCAN_DURATION_VALID        0x40000
+       u_int32_t link_quality_metric; /* link quality metric */
+       u_int32_t ul_effective_bandwidth; /* Measured uplink bandwidth based on
+                                            current activity (bps) */
+       u_int32_t ul_max_bandwidth; /* Maximum supported uplink bandwidth
+                                      (bps) */
+       u_int32_t ul_min_latency; /* min expected uplink latency for first hop
+                                    (ms) */
+       u_int32_t ul_effective_latency; /* current expected uplink latency for
+                                          first hop (ms) */
+       u_int32_t ul_max_latency; /* max expected uplink latency for first hop
+                                    (ms) */
+       u_int32_t ul_retxt_level; /* Retransmission metric */
+#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_NONE      1
+#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_LOW       2
+#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_MEDIUM    3
+#define NSTAT_IFNET_DESC_WIFI_UL_RETXT_LEVEL_HIGH      4
+
+       u_int32_t ul_bytes_lost; /* % of total bytes lost on uplink in Q10
+                                   format */
+       u_int32_t ul_error_rate; /* % of bytes dropped on uplink after many
+                                   retransmissions in Q10 format */
+       u_int32_t dl_effective_bandwidth; /* Measured downlink bandwidth based
+                                            on current activity (bps) */
+       u_int32_t dl_max_bandwidth; /* Maximum supported downlink bandwidth
+                                      (bps) */
+       /*
+        * The download latency values indicate the time AP may have to wait
+        * for the  driver to receive the packet. These values give the range
+        * of expected latency mainly due to co-existence events and channel
+        * hopping where the interface becomes unavailable.
+        */
+       u_int32_t dl_min_latency; /* min expected latency for first hop in ms */
+       u_int32_t dl_effective_latency; /* current expected latency for first
+                                          hop in ms */
+       u_int32_t dl_max_latency; /* max expected latency for first hop in ms */
+       u_int32_t dl_error_rate; /* % of CRC or other errors in Q10 format */
+       u_int32_t config_frequency; /* 2.4 or 5 GHz */
+#define        NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_2_4_GHZ  1
+#define        NSTAT_IFNET_DESC_WIFI_CONFIG_FREQUENCY_5_0_GHZ  2
+       u_int32_t config_multicast_rate; /* bps */
+       u_int32_t scan_count; /* scan count during the previous period */
+       u_int32_t scan_duration; /* scan duration in ms */
+} nstat_ifnet_desc_wifi_status;
+
+enum
+{
+       NSTAT_IFNET_DESC_LINK_STATUS_TYPE_NONE = 0
+       ,NSTAT_IFNET_DESC_LINK_STATUS_TYPE_CELLULAR = 1
+       ,NSTAT_IFNET_DESC_LINK_STATUS_TYPE_WIFI = 2
+};
+
+typedef struct nstat_ifnet_desc_link_status
+{
+       u_int32_t       link_status_type;
+       union {
+               nstat_ifnet_desc_cellular_status        cellular;
+               nstat_ifnet_desc_wifi_status            wifi;
+       } u;
+} nstat_ifnet_desc_link_status;
+
 #ifndef        IF_DESCSIZE
 #define        IF_DESCSIZE 128
 #endif
 typedef struct nstat_ifnet_descriptor
 {
-       char                    name[IFNAMSIZ+1];
-       u_int32_t               ifindex;
-       u_int64_t               threshold;
-       unsigned int            type;
-       char                    description[IF_DESCSIZE];
+       char                            name[IFNAMSIZ+1];
+       u_int32_t                       ifindex;
+       u_int64_t                       threshold;
+       unsigned int                    type;
+       char                            description[IF_DESCSIZE];
+       nstat_ifnet_desc_link_status    link_status;
 } nstat_ifnet_descriptor;
 
 typedef struct nstat_sysinfo_descriptor
@@ -292,19 +475,22 @@ enum
        ,NSTAT_MSG_TYPE_ERROR                   = 1
        
        // Requests
-       ,NSTAT_MSG_TYPE_ADD_SRC                 = 1001
+       ,NSTAT_MSG_TYPE_ADD_SRC                         = 1001
        ,NSTAT_MSG_TYPE_ADD_ALL_SRCS            = 1002
-       ,NSTAT_MSG_TYPE_REM_SRC                 = 1003
-       ,NSTAT_MSG_TYPE_QUERY_SRC               = 1004
+       ,NSTAT_MSG_TYPE_REM_SRC                         = 1003
+       ,NSTAT_MSG_TYPE_QUERY_SRC                       = 1004
        ,NSTAT_MSG_TYPE_GET_SRC_DESC            = 1005
-       ,NSTAT_MSG_TYPE_SET_FILTER              = 1006
+       ,NSTAT_MSG_TYPE_SET_FILTER                      = 1006
+       ,NSTAT_MSG_TYPE_GET_UPDATE                      = 1007
+       ,NSTAT_MSG_TYPE_SUBSCRIBE_SYSINFO       = 1008
        
        // Responses/Notfications
-       ,NSTAT_MSG_TYPE_SRC_ADDED               = 10001
-       ,NSTAT_MSG_TYPE_SRC_REMOVED             = 10002
-       ,NSTAT_MSG_TYPE_SRC_DESC                = 10003
-       ,NSTAT_MSG_TYPE_SRC_COUNTS              = 10004
-       ,NSTAT_MSG_TYPE_SYSINFO_COUNTS          = 10005
+       ,NSTAT_MSG_TYPE_SRC_ADDED                               = 10001
+       ,NSTAT_MSG_TYPE_SRC_REMOVED                             = 10002
+       ,NSTAT_MSG_TYPE_SRC_DESC                                = 10003
+       ,NSTAT_MSG_TYPE_SRC_COUNTS                              = 10004
+       ,NSTAT_MSG_TYPE_SYSINFO_COUNTS                  = 10005
+       ,NSTAT_MSG_TYPE_SRC_UPDATE                              = 10006
 };
 
 enum
@@ -313,16 +499,50 @@ enum
        ,NSTAT_SRC_REF_INVALID  = 0
 };
 
+/* Source-level filters */
 enum
 {
-       NSTAT_FILTER_NOZEROBYTES = 0x01,
+       NSTAT_FILTER_NOZEROBYTES             = 0x00000001
+};
+
+/* Provider-level filters */
+enum
+{
+       NSTAT_FILTER_ACCEPT_UNKNOWN          = 0x00000001
+       ,NSTAT_FILTER_ACCEPT_LOOPBACK        = 0x00000002
+       ,NSTAT_FILTER_ACCEPT_CELLULAR        = 0x00000004
+       ,NSTAT_FILTER_ACCEPT_WIFI            = 0x00000008
+       ,NSTAT_FILTER_ACCEPT_WIRED           = 0x00000010
+       ,NSTAT_FILTER_ACCEPT_ALL             = 0x0000001F
+       ,NSTAT_FILTER_IFNET_FLAGS            = 0x000000FF
+
+       ,NSTAT_FILTER_PROVIDER_NOZEROBYTES   = 0x00000100
+
+       ,NSTAT_FILTER_TCP_NO_LISTENER        = 0x00001000
+       ,NSTAT_FILTER_TCP_ONLY_LISTENER      = 0x00002000
+       ,NSTAT_FILTER_TCP_INTERFACE_ATTACH   = 0x00004000
+       ,NSTAT_FILTER_TCP_FLAGS              = 0x0000F000
+
+       ,NSTAT_FILTER_UDP_INTERFACE_ATTACH   = 0x00010000
+       ,NSTAT_FILTER_UDP_FLAGS              = 0x000F0000
+
+       ,NSTAT_FILTER_SUPPRESS_SRC_ADDED     = 0x00100000
+       ,NSTAT_FILTER_REQUIRE_SRC_ADDED      = 0x00200000
+};
+
+enum
+{
+       NSTAT_MSG_HDR_FLAG_SUPPORTS_AGGREGATE   = 1 << 0,
+       NSTAT_MSG_HDR_FLAG_CONTINUATION         = 1 << 1,
+       NSTAT_MSG_HDR_FLAG_CLOSING              = 1 << 2,
 };
 
 typedef struct nstat_msg_hdr
 {
        u_int64_t       context;
        u_int32_t       type;
-       u_int32_t       pad; // unused for now
+       u_int16_t       length;
+       u_int16_t       flags;
 } nstat_msg_hdr;
 
 typedef struct nstat_msg_error
@@ -342,6 +562,7 @@ typedef struct nstat_msg_add_all_srcs
 {
        nstat_msg_hdr           hdr;
        nstat_provider_id_t     provider;
+       u_int64_t               filter;
 } nstat_msg_add_all_srcs;
 
 typedef struct nstat_msg_src_added
@@ -391,6 +612,15 @@ typedef struct nstat_msg_src_counts
        nstat_counts            counts;
 } nstat_msg_src_counts;
 
+typedef struct nstat_msg_src_update
+{
+       nstat_msg_hdr           hdr;
+       nstat_src_ref_t         srcref;
+       nstat_counts            counts;
+       nstat_provider_id_t     provider;
+       u_int8_t                        data[];
+} nstat_msg_src_update;
+
 typedef struct nstat_msg_src_removed
 {
        nstat_msg_hdr           hdr;
@@ -402,27 +632,87 @@ typedef struct nstat_msg_sysinfo_counts
        nstat_msg_hdr           hdr;
        nstat_src_ref_t         srcref;
        nstat_sysinfo_counts    counts;
-} nstat_msg_sysinfo_counts;
+} __attribute__((packed)) nstat_msg_sysinfo_counts;
+
+#pragma pack(pop)
+
+#pragma mark -- Statitiscs about Network Statistics --
+
+struct nstat_stats {
+       u_int32_t nstat_successmsgfailures;
+       u_int32_t nstat_sendcountfailures;
+       u_int32_t nstat_sysinfofailures;
+       u_int32_t nstat_srcupatefailures;
+       u_int32_t nstat_descriptionfailures;
+       u_int32_t nstat_msgremovedfailures;
+       u_int32_t nstat_srcaddedfailures;
+       u_int32_t nstat_msgerrorfailures;
+       u_int32_t nstat_copy_descriptor_failures;
+       u_int32_t nstat_provider_counts_failures;
+       u_int32_t nstat_control_send_description_failures;
+       u_int32_t nstat_control_send_goodbye_failures;
+       u_int32_t nstat_flush_accumulated_msgs_failures;
+       u_int32_t nstat_accumulate_msg_failures;
+       u_int32_t nstat_control_cleanup_source_failures;
+       u_int32_t nstat_handle_msg_failures;
+};
+
+#endif /* PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/mcache.h>
+
+#pragma mark -- System Information Internal Support --
 
 typedef struct nstat_sysinfo_mbuf_stats
 {
-       u_int32_t               total_256b;
-       u_int32_t               total_2kb;
-       u_int32_t               total_4kb;
-       u_int32_t               sbmb_total;
-       u_int32_t               sb_atmbuflimit;
-       u_int32_t               draincnt;
-       u_int32_t               memreleased;
+       u_int32_t               total_256b;     /* Peak usage, 256B pool */
+       u_int32_t               total_2kb;      /* Peak usage, 2KB pool */
+       u_int32_t               total_4kb;      /* Peak usage, 4KB pool */
+       u_int32_t               total_16kb;     /* Peak usage, 16KB pool */
+       u_int32_t               sbmb_total;     /* Total mbufs in sock buffer pool */
+       u_int32_t               sb_atmbuflimit; /* Memory limit reached for socket buffer autoscaling */
+       u_int32_t               draincnt;       /* Number of times mbuf pool has been drained under memory pressure */
+       u_int32_t               memreleased;    /* Memory (bytes) released from mbuf pool to VM */
 } nstat_sysinfo_mbuf_stats;
 
 typedef struct nstat_sysinfo_tcp_stats
 {
-       u_int32_t               ipv4_avgrtt;
-       u_int32_t               ipv6_avgrtt;
-       u_int32_t               send_plr;
-       u_int32_t               recv_plr;
-       u_int32_t               send_tlrto_rate;
-       u_int32_t               send_reorder_rate;
+       u_int32_t               ipv4_avgrtt;    /* Average RTT for IPv4 */
+       u_int32_t               ipv6_avgrtt;    /* Average RTT for IPv6 */
+       u_int32_t               send_plr;       /* Average uplink packet loss rate */
+       u_int32_t               recv_plr;       /* Average downlink packet loss rate */
+       u_int32_t               send_tlrto_rate; /* Average rxt timeout after tail loss */
+       u_int32_t               send_reorder_rate; /* Average packet reordering rate */
+       u_int32_t               connection_attempts; /* TCP client connection attempts */
+       u_int32_t               connection_accepts; /* TCP server connection accepts */
+       u_int32_t               ecn_client_enabled; /* Global setting for ECN client side */
+       u_int32_t               ecn_server_enabled; /* Global setting for ECN server side */
+       u_int32_t               ecn_client_setup; /* Attempts to setup TCP client connection with ECN */
+       u_int32_t               ecn_server_setup; /* Attempts to setup TCP server connection with ECN */
+       u_int32_t               ecn_client_success; /* Number of successful negotiations of ECN for a client connection */
+       u_int32_t               ecn_server_success; /* Number of successful negotiations of ECN for a server connection */
+       u_int32_t               ecn_not_supported; /* Number of falbacks to Non-ECN, no support from peer */
+       u_int32_t               ecn_lost_syn;   /* Number of SYNs lost with ECN bits */
+       u_int32_t               ecn_lost_synack; /* Number of SYN-ACKs lost with ECN bits */
+       u_int32_t               ecn_recv_ce;    /* Number of CEs received from network */
+       u_int32_t               ecn_recv_ece;   /* Number of ECEs received from receiver */
+       u_int32_t               ecn_sent_ece;   /* Number of ECEs sent in response to CE */
+       u_int32_t               ecn_conn_recv_ce; /* Number of connections using ECN received CE at least once */
+       u_int32_t               ecn_conn_recv_ece; /* Number of connections using ECN received ECE at least once */
+       u_int32_t               ecn_conn_plnoce; /* Number of connections using ECN seen packet loss but never received CE */
+       u_int32_t               ecn_conn_pl_ce; /* Number of connections using ECN seen packet loss and CE */
+       u_int32_t               ecn_conn_nopl_ce; /* Number of connections using ECN with no packet loss but received CE */
+       u_int32_t               tfo_syn_data_rcv;       /* Number of SYN+data received with valid cookie */
+       u_int32_t               tfo_cookie_req_rcv;/* Number of TFO cookie-requests received */
+       u_int32_t               tfo_cookie_sent;        /* Number of TFO-cookies offered to the client */
+       u_int32_t               tfo_cookie_invalid;/* Number of invalid TFO-cookies received */
+       u_int32_t               tfo_cookie_req; /* Number of SYNs with cookie request received*/
+       u_int32_t               tfo_cookie_rcv; /* Number of SYN/ACKs with Cookie received */
+       u_int32_t               tfo_syn_data_sent;      /* Number of SYNs+data+cookie sent */
+       u_int32_t               tfo_syn_data_acked;/* Number of times our SYN+data has been acknowledged */
+       u_int32_t               tfo_syn_loss;   /* Number of times SYN+TFO has been lost and we fallback */
+       u_int32_t               tfo_blackhole;  /* Number of times SYN+TFO has been lost and we fallback */
 } nstat_sysinfo_tcp_stats;
 
 typedef struct nstat_sysinfo_data
@@ -434,13 +724,6 @@ typedef struct nstat_sysinfo_data
        } u;
 } nstat_sysinfo_data;
 
-#pragma pack(pop)
-
-#endif /* PRIVATE */
-
-#ifdef XNU_KERNEL_PRIVATE
-#include <sys/mcache.h>
-
 #pragma mark -- Generic Network Statistics Provider --
 
 typedef        void *  nstat_provider_cookie_t;
index b2666ee19156de2039297f32d4ce9662d1b5a303..fbdc502e1000ace0ef757871672b1d3e0258ed50 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  *
@@ -138,6 +138,18 @@ static void pktmnglr_ipfilter_detach(void *cookie);
 
 static void chksm_update(mbuf_t data);
 
+#define TCP_OPT_MULTIPATH_TCP  30
+#define MPTCP_SBT_VER_OFFSET   2
+
+#define MPTCP_SUBTYPE_MPCAPABLE                0x0
+#define MPTCP_SUBTYPE_MPJOIN           0x1
+#define MPTCP_SUBTYPE_DSS              0x2
+#define MPTCP_SUBTYPE_ADD_ADDR         0x3
+#define MPTCP_SUBTYPE_REM_ADDR         0x4
+#define MPTCP_SUBTYPE_MP_PRIO          0x5
+#define MPTCP_SUBTYPE_MP_FAIL          0x6
+#define MPTCP_SUBTYPE_MP_FASTCLOSE     0x7
+
 /*
  * packet filter global read write lock
  */
@@ -755,57 +767,66 @@ pkt_mnglr_init(void)
 static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options)
 {
        struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
-       unsigned char *ptr = (unsigned char *)mbuf_data(*data);
-       struct ip *ip = (struct ip *)(void *)ptr;
-       struct tcphdr *tcp;
+       struct ip ip;
+       struct tcphdr tcp;
        int optlen = 0;
+       errno_t error = 0;
 
 #pragma unused(tcp, optlen, options)
-
        if (p_pkt_mnglr == NULL) {
-               return 0;
+               goto output_done;
        }
 
        if (!p_pkt_mnglr->activate) {
-               return 0;
+               goto output_done;
+       }
+
+       if (p_pkt_mnglr->dir == IN) {
+               goto output_done;
        }
 
        if (data == NULL) {
-               PKT_MNGLR_LOG(LOG_INFO, "%s:%d Data pointer is NULL\n", __FILE__, __LINE__);
-               return 0;
+               PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
+               goto output_done;
        }
 
-       if (p_pkt_mnglr->dir == IN) {
-               return 0;
+       /* Check for IP filter options */
+       error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
+       if (error) {
+               PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
+               goto output_done;
        }
 
-       if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip->ip_v == 4)) {
-               return 0;
+       if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
+               goto output_done;
        }
 
-       if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip->ip_v == 6)) {
-               return 0;
+       if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
+               goto output_done;
        }
 
        if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
                struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
-               if (ip->ip_src.s_addr != laddr.sin_addr.s_addr) {
-                       return 0;
+               if (ip.ip_src.s_addr != laddr.sin_addr.s_addr) {
+                       goto output_done;
                }
        }
 
        if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
                struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
-               if (ip->ip_dst.s_addr != raddr.sin_addr.s_addr) {
-                       return 0;
+               if (ip.ip_dst.s_addr != raddr.sin_addr.s_addr) {
+                       goto output_done;
                }
        }
 
-       if (ip->ip_v != 4) {
-               PKT_MNGLR_LOG(LOG_INFO, "%s:%d Not handling IP version %d\n", __FILE__, __LINE__, ip->ip_v);
-               return 0;
+       if (ip.ip_v != 4) {
+               PKT_MNGLR_LOG(LOG_INFO,
+                   "%s:%d Not handling IP version %d\n",
+                   __FILE__, __LINE__, ip.ip_v);
+               goto output_done;
        }
 
+output_done:
        /* Not handling output flow */
        return 0;
 }
@@ -832,12 +853,12 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u
                goto input_done;
        }
 
-       if (data == NULL) {
-               PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
+       if (p_pkt_mnglr->dir == OUT) {
                goto input_done;
        }
 
-       if (p_pkt_mnglr->dir == OUT) {
+       if (data == NULL) {
+               PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
                goto input_done;
        }
 
@@ -924,7 +945,7 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u
        /* Protocol actions */
        switch (protocol) {
                case IPPROTO_TCP:
-                       if (p_pkt_mnglr->proto_action_mask & PKT_MNGLR_TCP_ACT_NOP_MPTCP) {
+                       if (p_pkt_mnglr->proto_action_mask) {
                                int i = 0;
                                tcp_optlen = (tcp.th_off << 2)-sizeof(struct tcphdr);
                                PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n");
@@ -944,18 +965,34 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u
                                                tcp_optlen--;
                                                i++;
                                                continue;
-                                       } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != 0x1e)) {
+                                       } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) {
                                                PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]);
                                                tcp_optlen -= tcp_opt_buf[i+1];
                                                i += tcp_opt_buf[i+1];
                                                continue;
-                                       } else if (tcp_opt_buf[i] == 0x1e) {
+                                       } else if (tcp_opt_buf[i] == TCP_OPT_MULTIPATH_TCP) {
                                                int j = 0;
                                                int mptcpoptlen = tcp_opt_buf[i+1];
+                                               uint8_t sbtver = tcp_opt_buf[i+MPTCP_SBT_VER_OFFSET];
+                                               uint8_t subtype = sbtver >> 4;
+
+                                               PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
+                                               PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP subtype %x\n", subtype);
+                                               if (subtype == MPTCP_SUBTYPE_DSS) {
+                                                       PKT_MNGLR_LOG(LOG_INFO, "Got DSS option\n");
+                                                       PKT_MNGLR_LOG(LOG_INFO, "Protocol option mask: %d\n", p_pkt_mnglr->proto_action_mask);
+                                                       if (p_pkt_mnglr->proto_action_mask &
+                                                           PKT_MNGLR_TCP_ACT_DSS_DROP) {
+                                                               goto drop_it;
+                                                       }
+                                               }
+
                                                PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
-                                               PKT_MNGLR_LOG(LOG_INFO, "Overwriting with NOP\n");
                                                for (; j < mptcpoptlen; j++) {
-                                                       tcp_opt_buf[i+j] = 0x1;
+                                                       if (p_pkt_mnglr->proto_action_mask &
+                                                           PKT_MNGLR_TCP_ACT_NOP_MPTCP) {
+                                                               tcp_opt_buf[i+j] = 0x1;
+                                                       }
                                                }
                                                tcp_optlen -= mptcpoptlen;
                                                i += mptcpoptlen;
@@ -988,6 +1025,11 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u
        chksm_update(*data);
 input_done:
        return 0;
+
+drop_it:
+       PKT_MNGLR_LOG(LOG_INFO, "Dropping packet\n");
+       mbuf_freem(*data);
+       return EJUSTRETURN;
 }
 
 static void pktmnglr_ipfilter_detach(void *cookie)
index 7042fe0c44a28f56b694505e97df9e6e16b23bc4..b2384991040b72d59e2cf75c7b07c5100f0e9edd 100644 (file)
@@ -52,37 +52,38 @@ typedef enum {
  * to be set in the sc_id field of sockaddr_ctl for connect(2)
  * Note: the sc_unit is ephemeral
  */
-#define PACKET_MANGLER_CONTROL_NAME "com.apple.packet-mangler"
-
-#define PKT_MNGLR_OPT_PROTO_ACT_MASK   1
-#define PKT_MNGLR_OPT_IP_ACT_MASK       2
-#define PKT_MNGLR_OPT_LOCAL_IP          3
-#define PKT_MNGLR_OPT_REMOTE_IP         4
-#define PKT_MNGLR_OPT_LOCAL_PORT        5
-#define PKT_MNGLR_OPT_REMOTE_PORT              6 
-#define PKT_MNGLR_OPT_DIRECTION         7
-#define PKT_MNGLR_OPT_PROTOCOL          8
-#define PKT_MNGLR_OPT_ACTIVATE         0xFFFFFFFF
+#define        PACKET_MANGLER_CONTROL_NAME "com.apple.packet-mangler"
+
+#define        PKT_MNGLR_OPT_PROTO_ACT_MASK    1
+#define        PKT_MNGLR_OPT_IP_ACT_MASK       2
+#define        PKT_MNGLR_OPT_LOCAL_IP          3
+#define        PKT_MNGLR_OPT_REMOTE_IP         4
+#define        PKT_MNGLR_OPT_LOCAL_PORT        5
+#define        PKT_MNGLR_OPT_REMOTE_PORT       6
+#define        PKT_MNGLR_OPT_DIRECTION         7
+#define        PKT_MNGLR_OPT_PROTOCOL          8
+#define        PKT_MNGLR_OPT_ACTIVATE          0xFFFFFFFF
 
 /* Packet mangler action masks */
 /* Packet Mangler TCP action mask */
-#define PKT_MNGLR_TCP_ACT_NOP_MPTCP    0x00000001
-#define PKT_MNGLR_TCP_ACT_SWAP_L_PORT   0x00000002
-#define PKT_MNGLR_TCP_ACT_SWAP_R_PORT   0x00000004
-#define PKT_MNGLR_TCP_ACT_CHK_EXTENDED  0x80000000
+#define        PKT_MNGLR_TCP_ACT_NOP_MPTCP     0x00000001
+#define        PKT_MNGLR_TCP_ACT_SWAP_L_PORT   0x00000002
+#define        PKT_MNGLR_TCP_ACT_SWAP_R_PORT   0x00000004
+#define        PKT_MNGLR_TCP_ACT_DSS_DROP      0x00000008
+#define        PKT_MNGLR_TCP_ACT_CHK_EXTENDED  0x80000000
 
 /* Packet Mangler IP action mask */
-#define PKT_MNGLR_IP_ACT_FLT_L_IP       0x00000001
-#define PKT_MNGLR_IP_ACT_FLT_R_IP       0x00000002
-#define PKT_MNGLR_IP_ACT_SWAP_L_IP      0x00000004
-#define PKT_MNGLR_IP_ACT_SWAP_R_IP      0x00000008
-#define PKT_MNGLR_IP_ACT_DROP_PACKET   0x00000010
-#define PKT_MNGLR_IP_ACT_CHK_EXTENDED   0x80000000
+#define        PKT_MNGLR_IP_ACT_FLT_L_IP       0x00000001
+#define        PKT_MNGLR_IP_ACT_FLT_R_IP       0x00000002
+#define        PKT_MNGLR_IP_ACT_SWAP_L_IP      0x00000004
+#define        PKT_MNGLR_IP_ACT_SWAP_R_IP      0x00000008
+#define        PKT_MNGLR_IP_ACT_DROP_PACKET    0x00000010
+#define        PKT_MNGLR_IP_ACT_CHK_EXTENDED   0x80000000
 
 /*
  * How many filter may be active simultaneously
  */
-#define        PKT_MNGLR_MAX_FILTER_COUNT      1       
+#define        PKT_MNGLR_MAX_FILTER_COUNT      1
 
 #define        PKT_MNGLR_VERSION_CURRENT 1
 
index 0a74fe5d2ad46386d22a4abb78ccd661471e9f8c..58a20fe4f79e1ff830b11a9993789400c49dcd66 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -31,7 +31,8 @@
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
- * Copyright (c) 2002,2003 Henning Brauer
+ * Copyright (c) 2002 - 2013 Henning Brauer
+ * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -219,12 +220,16 @@ static int                 pf_check_threshold(struct pf_threshold *);
 
 static void             pf_change_ap(int, struct mbuf *, struct pf_addr *,
                            u_int16_t *, u_int16_t *, u_int16_t *,
-                           struct pf_addr *, u_int16_t, u_int8_t, sa_family_t);
+                           struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
+                           sa_family_t, int);
 static int              pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
                            struct tcphdr *, struct pf_state_peer *);
 #if INET6
 static void             pf_change_a6(struct pf_addr *, u_int16_t *,
                            struct pf_addr *, u_int8_t);
+void                    pf_change_addr(struct pf_addr *a, u_int16_t *c,
+                                       struct pf_addr *an, u_int8_t u,
+                                       sa_family_t af, sa_family_t afn);
 #endif /* INET6 */
 static void             pf_change_icmp(struct pf_addr *, u_int16_t *,
                            struct pf_addr *, struct pf_addr *, u_int16_t,
@@ -245,8 +250,7 @@ static struct pf_rule       *pf_get_translation_aux(struct pf_pdesc *,
                            struct mbuf *, int, int, struct pfi_kif *,
                            struct pf_src_node **, struct pf_addr *,
                            union pf_state_xport *, struct pf_addr *,
-                           union pf_state_xport *, struct pf_addr *,
-                           union pf_state_xport *);
+                           union pf_state_xport *, union pf_state_xport *);
 static void             pf_attach_state(struct pf_state_key *,
                            struct pf_state *, int);
 static void             pf_detach_state(struct pf_state *, int);
@@ -302,7 +306,7 @@ static u_int16_t     pf_get_mss(struct mbuf *, int, u_int16_t,
 static u_int16_t        pf_calc_mss(struct pf_addr *, sa_family_t,
                                u_int16_t);
 static void             pf_set_rt_ifp(struct pf_state *,
-                           struct pf_addr *);
+                           struct pf_addr *, sa_family_t af);
 static int              pf_check_proto_cksum(struct mbuf *, int, int,
                            u_int8_t, sa_family_t);
 static int              pf_addr_wrap_neq(struct pf_addr_wrap *,
@@ -421,13 +425,14 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
 
 #define        STATE_ADDR_TRANSLATE(sk)                                        \
        (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] ||         \
-       ((sk)->af == AF_INET6 &&                                        \
+       ((sk)->af_lan == AF_INET6 &&                                    \
        ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] ||        \
        (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] ||         \
        (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
 
 #define STATE_TRANSLATE(sk)                                            \
-       (STATE_ADDR_TRANSLATE(sk) ||                                    \
+       ((sk)->af_lan != (sk)->af_gwy ||                                \
+       STATE_ADDR_TRANSLATE(sk) ||                                     \
        (sk)->lan.xport.port != (sk)->gwy.xport.port)
 
 #define STATE_GRE_TRANSLATE(sk)                                                \
@@ -773,42 +778,34 @@ struct pf_esp_hdr {
 };
 
 static __inline int
-pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
+pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
 {
-       int     diff;
-
-       if (a->rule.ptr > b->rule.ptr)
-               return (1);
-       if (a->rule.ptr < b->rule.ptr)
-               return (-1);
-       if ((diff = a->af - b->af) != 0)
-               return (diff);
-       switch (a->af) {
-#if INET
+       switch (af) {
+#ifdef INET
        case AF_INET:
-               if (a->addr.addr32[0] > b->addr.addr32[0])
+               if (a->addr32[0] > b->addr32[0])
                        return (1);
-               if (a->addr.addr32[0] < b->addr.addr32[0])
+               if (a->addr32[0] < b->addr32[0])
                        return (-1);
                break;
 #endif /* INET */
-#if INET6
+#ifdef INET6
        case AF_INET6:
-               if (a->addr.addr32[3] > b->addr.addr32[3])
+               if (a->addr32[3] > b->addr32[3])
                        return (1);
-               if (a->addr.addr32[3] < b->addr.addr32[3])
+               if (a->addr32[3] < b->addr32[3])
                        return (-1);
-               if (a->addr.addr32[2] > b->addr.addr32[2])
+               if (a->addr32[2] > b->addr32[2])
                        return (1);
-               if (a->addr.addr32[2] < b->addr.addr32[2])
+               if (a->addr32[2] < b->addr32[2])
                        return (-1);
-               if (a->addr.addr32[1] > b->addr.addr32[1])
+               if (a->addr32[1] > b->addr32[1])
                        return (1);
-               if (a->addr.addr32[1] < b->addr.addr32[1])
+               if (a->addr32[1] < b->addr32[1])
                        return (-1);
-               if (a->addr.addr32[0] > b->addr.addr32[0])
+               if (a->addr32[0] > b->addr32[0])
                        return (1);
-               if (a->addr.addr32[0] < b->addr.addr32[0])
+               if (a->addr32[0] < b->addr32[0])
                        return (-1);
                break;
 #endif /* INET6 */
@@ -816,6 +813,22 @@ pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
        return (0);
 }
 
+static __inline int
+pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
+{
+       int     diff;
+
+       if (a->rule.ptr > b->rule.ptr)
+               return (1);
+       if (a->rule.ptr < b->rule.ptr)
+               return (-1);
+       if ((diff = a->af - b->af) != 0)
+               return (diff);
+       if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0)
+               return (diff);
+       return (0);
+}
+
 static __inline int
 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
 {
@@ -824,7 +837,7 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
 
        if ((diff = a->proto - b->proto) != 0)
                return (diff);
-       if ((diff = a->af - b->af) != 0)
+       if ((diff = a->af_lan - b->af_lan) != 0)
                return (diff);
 
        extfilter = PF_EXTFILTER_APD;
@@ -839,7 +852,7 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
        case IPPROTO_TCP:
                if ((diff = a->lan.xport.port - b->lan.xport.port) != 0)
                        return (diff);
-               if ((diff = a->ext.xport.port - b->ext.xport.port) != 0)
+               if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0)
                        return (diff);
                break;
 
@@ -850,21 +863,21 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
                if ((diff = a->lan.xport.port - b->lan.xport.port) != 0)
                        return (diff);
                if ((extfilter < PF_EXTFILTER_AD) &&
-                   (diff = a->ext.xport.port - b->ext.xport.port) != 0)
+                   (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0)
                        return (diff);
                break;
 
        case IPPROTO_GRE:
                if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
                    a->proto_variant == b->proto_variant) {
-                       if (!!(diff = a->ext.xport.call_id -
-                           b->ext.xport.call_id))
+                       if (!!(diff = a->ext_lan.xport.call_id -
+                           b->ext_lan.xport.call_id))
                                return (diff);
                }
                break;
 
        case IPPROTO_ESP:
-               if (!!(diff = a->ext.xport.spi - b->ext.xport.spi))
+               if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi))
                        return (diff);
                break;
 
@@ -872,57 +885,33 @@ pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
                break;
        }
 
-       switch (a->af) {
+       switch (a->af_lan) {
 #if INET
        case AF_INET:
-               if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
-                       return (1);
-               if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
-                       return (-1);
+               if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
+                                           a->af_lan)) != 0)
+                       return (diff);
+
                if (extfilter < PF_EXTFILTER_EI) {
-                       if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                               return (1);
-                       if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                               return (-1);
+                       if ((diff = pf_addr_compare(&a->ext_lan.addr,
+                                                   &b->ext_lan.addr,
+                                                   a->af_lan)) != 0)
+                               return (diff);
                }
                break;
 #endif /* INET */
 #if INET6
        case AF_INET6:
-               if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
-                       return (1);
-               if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
-                       return (-1);
-               if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
-                       return (1);
-               if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
-                       return (-1);
-               if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
-                       return (1);
-               if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
-                       return (-1);
-               if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
-                       return (1);
-               if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
-                       return (-1);
+               if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
+                                           a->af_lan)) != 0)
+                       return (diff);
+
                if (extfilter < PF_EXTFILTER_EI ||
-                   !PF_AZERO(&b->ext.addr, AF_INET6)) {
-                       if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
-                               return (1);
-                       if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
-                               return (-1);
-                       if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
-                               return (1);
-                       if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
-                               return (-1);
-                       if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
-                               return (1);
-                       if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
-                               return (-1);
-                       if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                               return (1);
-                       if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                               return (-1);
+                   !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
+                       if ((diff = pf_addr_compare(&a->ext_lan.addr,
+                                                   &b->ext_lan.addr,
+                                                   a->af_lan)) != 0)
+                               return (diff);
                }
                break;
 #endif /* INET6 */
@@ -954,7 +943,7 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
        if ((diff = a->proto - b->proto) != 0)
                return (diff);
 
-       if ((diff = a->af - b->af) != 0)
+       if ((diff = a->af_gwy - b->af_gwy) != 0)
                return (diff);
 
        extfilter = PF_EXTFILTER_APD;
@@ -967,7 +956,7 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
                break;
 
        case IPPROTO_TCP:
-               if ((diff = a->ext.xport.port - b->ext.xport.port) != 0)
+               if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0)
                        return (diff);
                if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0)
                        return (diff);
@@ -980,7 +969,7 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
                if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0)
                        return (diff);
                if ((extfilter < PF_EXTFILTER_AD) &&
-                   (diff = a->ext.xport.port - b->ext.xport.port) != 0)
+                   (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0)
                        return (diff);
                break;
 
@@ -1002,57 +991,31 @@ pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
                break;
        }
 
-       switch (a->af) {
+       switch (a->af_gwy) {
 #if INET
        case AF_INET:
-               if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
-                       return (1);
-               if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
-                       return (-1);
+               if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
+                                           a->af_gwy)) != 0)
+                       return (diff);
+
                if (extfilter < PF_EXTFILTER_EI) {
-                       if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                               return (1);
-                       if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                               return (-1);
+                       if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
+                                                   a->af_gwy)) != 0)
+                               return (diff);
                }
                break;
 #endif /* INET */
 #if INET6
        case AF_INET6:
-               if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
-                       return (1);
-               if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
-                       return (-1);
-               if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
-                       return (1);
-               if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
-                       return (-1);
-               if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
-                       return (1);
-               if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
-                       return (-1);
-               if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
-                       return (1);
-               if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
-                       return (-1);
+               if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
+                                           a->af_gwy)) != 0)
+                       return (diff);
+
                if (extfilter < PF_EXTFILTER_EI ||
-                   !PF_AZERO(&b->ext.addr, AF_INET6)) {
-                       if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
-                               return (1);
-                       if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
-                               return (-1);
-                       if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
-                               return (1);
-                       if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
-                               return (-1);
-                       if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
-                               return (1);
-                       if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
-                               return (-1);
-                       if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
-                               return (1);
-                       if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
-                               return (-1);
+                   !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
+                       if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
+                                                   a->af_gwy)) != 0)
+                               return (diff);
                }
                break;
 #endif /* INET6 */
@@ -1135,6 +1098,17 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
        case PF_IN:
                sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
                    (struct pf_state_key *)key);
+               /*
+                * NAT64 is done only on input, for packets coming in from
+                * from the LAN side, need to lookup the lan_ext tree.
+                */
+               if (sk == NULL) {
+                       sk = RB_FIND(pf_state_tree_lan_ext,
+                                    &pf_statetbl_lan_ext,
+                                    (struct pf_state_key *)key);
+                       if (sk && sk->af_lan == sk->af_gwy)
+                               sk = NULL;
+               }
                break;
        default:
                panic("pf_find_state");
@@ -1165,6 +1139,17 @@ pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
        case PF_IN:
                sk = RB_FIND(pf_state_tree_ext_gwy,
                    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
+               /*
+                * NAT64 is done only on input, for packets coming in from
+                * from the LAN side, need to lookup the lan_ext tree.
+                */
+               if ((sk == NULL) && pf_nat64_configured) {
+                       sk = RB_FIND(pf_state_tree_lan_ext,
+                                       &pf_statetbl_lan_ext,
+                                       (struct pf_state_key *)key);
+                       if (sk && sk->af_lan == sk->af_gwy)
+                               sk = NULL;
+               }
                break;
        default:
                panic("pf_find_state_all");
@@ -1216,7 +1201,6 @@ static int
 pf_src_connlimit(struct pf_state **state)
 {
        int bad = 0;
-
        (*state)->src_node->conn++;
        VERIFY((*state)->src_node->conn != 0);
        (*state)->src.tcp_est = 1;
@@ -1246,12 +1230,12 @@ pf_src_connlimit(struct pf_state **state)
                if (pf_status.debug >= PF_DEBUG_MISC) {
                        printf("pf_src_connlimit: blocking address ");
                        pf_print_host(&(*state)->src_node->addr, 0,
-                           (*state)->state_key->af);
+                                       (*state)->state_key->af_lan);
                }
 
                bzero(&p, sizeof (p));
-               p.pfra_af = (*state)->state_key->af;
-               switch ((*state)->state_key->af) {
+               p.pfra_af = (*state)->state_key->af_lan;
+               switch ((*state)->state_key->af_lan) {
 #if INET
                case AF_INET:
                        p.pfra_net = 32;
@@ -1282,15 +1266,15 @@ pf_src_connlimit(struct pf_state **state)
                                 * from the same rule if PF_FLUSH_GLOBAL is not
                                 * set)
                                 */
-                               if (sk->af ==
-                                   (*state)->state_key->af &&
+                               if (sk->af_lan ==
+                                   (*state)->state_key->af_lan &&
                                    (((*state)->state_key->direction ==
                                        PF_OUT &&
                                    PF_AEQ(&(*state)->src_node->addr,
-                                       &sk->lan.addr, sk->af)) ||
+                                       &sk->lan.addr, sk->af_lan)) ||
                                    ((*state)->state_key->direction == PF_IN &&
                                    PF_AEQ(&(*state)->src_node->addr,
-                                       &sk->ext.addr, sk->af))) &&
+                                       &sk->ext_lan.addr, sk->af_lan))) &&
                                    ((*state)->rule.ptr->flush &
                                    PF_FLUSH_GLOBAL ||
                                    (*state)->rule.ptr == st->rule.ptr)) {
@@ -1402,13 +1386,16 @@ pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
                        break;
                }
                printf(" lan: ");
-               pf_print_sk_host(&sk->lan, sk->af, sk->proto,
+               pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
                    sk->proto_variant);
                printf(" gwy: ");
-               pf_print_sk_host(&sk->gwy, sk->af, sk->proto,
+               pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
+                   sk->proto_variant);
+               printf(" ext_lan: ");
+               pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
                    sk->proto_variant);
-               printf(" ext: ");
-               pf_print_sk_host(&sk->ext, sk->af, sk->proto,
+               printf(" ext_gwy: ");
+               pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
                    sk->proto_variant);
                if (s->sync_flags & PFSTATE_FROMSYNC)
                        printf(" (from sync)");
@@ -1655,9 +1642,9 @@ pf_unlink_state(struct pf_state *cur)
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
 
        if (cur->src.state == PF_TCPS_PROXY_DST) {
-               pf_send_tcp(cur->rule.ptr, cur->state_key->af,
-                   &cur->state_key->ext.addr, &cur->state_key->lan.addr,
-                   cur->state_key->ext.xport.port,
+               pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
+                   &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
+                   cur->state_key->ext_lan.xport.port,
                    cur->state_key->lan.xport.port,
                    cur->src.seqhi, cur->src.seqlo + 1,
                    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
@@ -1900,11 +1887,15 @@ pf_print_state(struct pf_state *s)
                printf("%u ", sk->proto);
                break;
        }
-       pf_print_sk_host(&sk->lan, sk->af, sk->proto, sk->proto_variant);
+       pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
+       printf(" ");
+       pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
        printf(" ");
-       pf_print_sk_host(&sk->gwy, sk->af, sk->proto, sk->proto_variant);
+       pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
+                        sk->proto_variant);
        printf(" ");
-       pf_print_sk_host(&sk->ext, sk->af, sk->proto, sk->proto_variant);
+       pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
+                        sk->proto_variant);
        printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
            s->src.seqhi, s->src.max_win, s->src.seqdiff);
        if (s->src.wscale && s->dst.wscale)
@@ -2034,21 +2025,21 @@ pf_calc_state_key_flowhash(struct pf_state_key *sk)
        uint32_t flowhash = 0;
 
        bzero(&fh, sizeof (fh));
-       if (PF_ALEQ(&sk->lan.addr, &sk->ext.addr, sk->af)) {
+       if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
                bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
-               bcopy(&sk->ext.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
+               bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
        } else {
-               bcopy(&sk->ext.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
+               bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof (fh.ap1.addr));
                bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof (fh.ap2.addr));
        }
-       if (sk->lan.xport.spi <= sk->ext.xport.spi) {
+       if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
                fh.ap1.xport.spi = sk->lan.xport.spi;
-               fh.ap2.xport.spi = sk->ext.xport.spi;
+               fh.ap2.xport.spi = sk->ext_lan.xport.spi;
        } else {
-               fh.ap1.xport.spi = sk->ext.xport.spi;
+               fh.ap1.xport.spi = sk->ext_lan.xport.spi;
                fh.ap2.xport.spi = sk->lan.xport.spi;
        }
-       fh.af = sk->af;
+       fh.af = sk->af_lan;
        fh.proto = sk->proto;
 
 try_again:
@@ -2106,84 +2097,152 @@ pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
        return (l);
 }
 
+/*
+ * change ip address & port
+ * dir : packet direction
+ * a   : address to be changed
+ * p   : port to be changed
+ * ic  : ip header checksum
+ * pc  : protocol checksum
+ * an  : new ip address
+ * pn  : new port
+ * u   : should be 1 if UDP packet else 0
+ * af  : address family of the packet
+ * afn : address family of the new address
+ * ua  : should be 1 if ip address needs to be updated in the packet else
+ *       only the checksum is recalculated & updated.
+ */
 static void
 pf_change_ap(int dir, struct mbuf *m, struct pf_addr *a, u_int16_t *p,
     u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
-    u_int8_t u, sa_family_t af)
+    u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
 {
        struct pf_addr  ao;
        u_int16_t       po = *p;
 
        PF_ACPY(&ao, a, af);
-       PF_ACPY(a, an, af);
+       if (ua)
+               PF_ACPY(a, an, afn);
 
        *p = pn;
 
        switch (af) {
 #if INET
        case AF_INET:
-               *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
-                   ao.addr16[0], an->addr16[0], 0),
-                   ao.addr16[1], an->addr16[1], 0);
-               *p = pn;
+               switch (afn) {
+               case AF_INET:
+                       *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+                       ao.addr16[0], an->addr16[0], 0),
+                       ao.addr16[1], an->addr16[1], 0);
+                       *p = pn;
                /*
                 * If the packet is originated from an ALG on the NAT gateway
                 * (source address is loopback or local), in which case the
                 * TCP/UDP checksum field contains the pseudo header checksum
-                * that's not yet complemented.
+                * that's not yet complemented. A packet generated locally
+                * will have UDP/TCP CSUM flag set (gets set in protocol
+                * output).
                 */
-               if (dir == PF_OUT && m != NULL &&
-                   (m->m_flags & M_PKTHDR) &&
-                   (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) {
+                       if (dir == PF_OUT && m != NULL &&
+                       (m->m_flags & M_PKTHDR) &&
+                       (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) {
                        /* Pseudo-header checksum does not include ports */
-                       *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
-                           ao.addr16[0], an->addr16[0], u),
-                           ao.addr16[1], an->addr16[1], u);
-               } else {
-                       *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+                               *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
+                               ao.addr16[0], an->addr16[0], u),
+                               ao.addr16[1], an->addr16[1], u);
+                       } else {
+                               *pc =
+                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                               *pc, ao.addr16[0], an->addr16[0], u),
+                               ao.addr16[1], an->addr16[1], u),
+                               po, pn, u);
+                       }
+                       break;
+#ifdef INET6
+               case AF_INET6:
+                       *p = pn;
+                       *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                           pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+
+                           pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
                            ao.addr16[0], an->addr16[0], u),
                            ao.addr16[1], an->addr16[1], u),
+                           0,            an->addr16[2], u),
+                           0,            an->addr16[3], u),
+                           0,            an->addr16[4], u),
+                           0,            an->addr16[5], u),
+                           0,            an->addr16[6], u),
+                           0,            an->addr16[7], u),
                            po, pn, u);
+                       break;
+#endif /* INET6 */
                }
                break;
 #endif /* INET */
 #if INET6
        case AF_INET6:
+               switch (afn) {
+               case AF_INET6:
                /*
                 * If the packet is originated from an ALG on the NAT gateway
                 * (source address is loopback or local), in which case the
                 * TCP/UDP checksum field contains the pseudo header checksum
                 * that's not yet complemented.
+                * A packet generated locally
+                * will have UDP/TCP CSUM flag set (gets set in protocol
+                * output).
                 */
-               if (dir == PF_OUT && m != NULL &&
-                   (m->m_flags & M_PKTHDR) &&
-                   (m->m_pkthdr.csum_flags & (CSUM_TCPIPV6 | CSUM_UDPIPV6))) {
+                       if (dir == PF_OUT && m != NULL &&
+                           (m->m_flags & M_PKTHDR) &&
+                           (m->m_pkthdr.csum_flags & (CSUM_TCPIPV6 |
+                                                  CSUM_UDPIPV6))) {
                        /* Pseudo-header checksum does not include ports */
-                       *pc = ~pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(~*pc,
-                               ao.addr16[0], an->addr16[0], u),
-                               ao.addr16[1], an->addr16[1], u),
-                               ao.addr16[2], an->addr16[2], u),
-                               ao.addr16[3], an->addr16[3], u),
-                               ao.addr16[4], an->addr16[4], u),
-                               ao.addr16[5], an->addr16[5], u),
-                               ao.addr16[6], an->addr16[6], u),
-                               ao.addr16[7], an->addr16[7], u),
-                               po, pn, u);
-               } else {
+                               *pc =
+                               ~pf_cksum_fixup(pf_cksum_fixup(
+                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                                       ~*pc,
+                                       ao.addr16[0], an->addr16[0], u),
+                                       ao.addr16[1], an->addr16[1], u),
+                                       ao.addr16[2], an->addr16[2], u),
+                                       ao.addr16[3], an->addr16[3], u),
+                                       ao.addr16[4], an->addr16[4], u),
+                                       ao.addr16[5], an->addr16[5], u),
+                                       ao.addr16[6], an->addr16[6], u),
+                                       ao.addr16[7], an->addr16[7], u);
+                       } else {
+                               *pc =
+                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                                       *pc,
+                                       ao.addr16[0], an->addr16[0], u),
+                                       ao.addr16[1], an->addr16[1], u),
+                                       ao.addr16[2], an->addr16[2], u),
+                                       ao.addr16[3], an->addr16[3], u),
+                                       ao.addr16[4], an->addr16[4], u),
+                                       ao.addr16[5], an->addr16[5], u),
+                                       ao.addr16[6], an->addr16[6], u),
+                                       ao.addr16[7], an->addr16[7], u),
+                                       po, pn, u);
+                       }
+                       break;
+#ifdef INET
+               case AF_INET:
                        *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-                               pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
-                               ao.addr16[0], an->addr16[0], u),
-                               ao.addr16[1], an->addr16[1], u),
-                               ao.addr16[2], an->addr16[2], u),
-                               ao.addr16[3], an->addr16[3], u),
-                               ao.addr16[4], an->addr16[4], u),
-                               ao.addr16[5], an->addr16[5], u),
-                               ao.addr16[6], an->addr16[6], u),
-                               ao.addr16[7], an->addr16[7], u),
-                               po, pn, u);
+                           pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                           pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+                           ao.addr16[0], an->addr16[0], u),
+                           ao.addr16[1], an->addr16[1], u),
+                           ao.addr16[2], 0,             u),
+                           ao.addr16[3], 0,             u),
+                           ao.addr16[4], 0,             u),
+                           ao.addr16[5], 0,             u),
+                           ao.addr16[6], 0,             u),
+                           ao.addr16[7], 0,             u),
+                           po, pn, u);
+                       break;
+#endif /* INET */
                }
                break;
 #endif /* INET6 */
@@ -2224,6 +2283,60 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
            ao.addr16[6], an->addr16[6], u),
            ao.addr16[7], an->addr16[7], u);
 }
+
+void
+pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
+              sa_family_t af, sa_family_t afn)
+{
+       struct pf_addr  ao;
+
+       PF_ACPY(&ao, a, af);
+       PF_ACPY(a, an, afn);
+
+       switch (af) {
+       case AF_INET:
+               switch (afn) {
+               case AF_INET:
+                       pf_change_a(a, c, an->v4.s_addr, u);
+                       break;
+               case AF_INET6:
+                       *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                           pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                           pf_cksum_fixup(pf_cksum_fixup(*c,
+                           ao.addr16[0], an->addr16[0], u),
+                           ao.addr16[1], an->addr16[1], u),
+                           0,            an->addr16[2], u),
+                           0,            an->addr16[3], u),
+                           0,            an->addr16[4], u),
+                           0,            an->addr16[5], u),
+                           0,            an->addr16[6], u),
+                           0,            an->addr16[7], u);
+                       break;
+               }
+               break;
+       case AF_INET6:
+               switch (afn) {
+               case AF_INET:
+                       *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                           pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+                           pf_cksum_fixup(pf_cksum_fixup(*c,
+                           ao.addr16[0], an->addr16[0], u),
+                           ao.addr16[1], an->addr16[1], u),
+                           ao.addr16[2], 0,             u),
+                           ao.addr16[3], 0,             u),
+                           ao.addr16[4], 0,             u),
+                           ao.addr16[5], 0,             u),
+                           ao.addr16[6], 0,             u),
+                           ao.addr16[7], 0,             u);
+                       break;
+               case AF_INET6:
+                       pf_change_a6(a, c, an, u);
+                       break;
+               }
+               break;
+       }
+}
+
 #endif /* INET6 */
 
 static void
@@ -3036,13 +3149,13 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                        k.rule.ptr = NULL;
                pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
                *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
-               if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
-                       PF_ACPY(naddr, &(*sn)->raddr, af);
+               if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
+                       PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
                        if (pf_status.debug >= PF_DEBUG_MISC) {
                                printf("pf_map_addr: src tracking maps ");
                                pf_print_host(&k.addr, 0, af);
                                printf(" to ");
-                               pf_print_host(naddr, 0, af);
+                               pf_print_host(naddr, 0, rpool->af);
                                printf("\n");
                        }
                        return (0);
@@ -3054,7 +3167,7 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
        if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
                if (rpool->cur->addr.p.dyn == NULL)
                        return (1);
-               switch (af) {
+               switch (rpool->af) {
 #if INET
                case AF_INET:
                        if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
@@ -3086,13 +3199,14 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
 
        switch (rpool->opts & PF_POOL_TYPEMASK) {
        case PF_POOL_NONE:
-               PF_ACPY(naddr, raddr, af);
+               PF_ACPY(naddr, raddr, rpool->af);
                break;
        case PF_POOL_BITMASK:
+               ASSERT(af == rpool->af);
                PF_POOLMASK(naddr, raddr, rmask, saddr, af);
                break;
        case PF_POOL_RANDOM:
-               if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+               if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
                        switch (af) {
 #if INET
                        case AF_INET:
@@ -3122,15 +3236,19 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                                break;
 #endif /* INET6 */
                        }
-                       PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
-                       PF_ACPY(init_addr, naddr, af);
+                       PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
+                                   rpool->af);
+                       PF_ACPY(init_addr, naddr, rpool->af);
 
                } else {
-                       PF_AINC(&rpool->counter, af);
-                       PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+                       PF_AINC(&rpool->counter, rpool->af);
+                       PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
+                                   rpool->af);
                }
                break;
        case PF_POOL_SRCHASH:
+               ASSERT(af == rpool->af);
+               PF_POOLMASK(naddr, raddr, rmask, saddr, af);
                pf_hash(saddr, (struct pf_addr *)(void *)&hash,
                    &rpool->key, af);
                PF_POOLMASK(naddr, raddr, rmask,
@@ -3140,7 +3258,7 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                if (rpool->cur->addr.type == PF_ADDR_TABLE) {
                        if (!pfr_pool_get(rpool->cur->addr.p.tbl,
                            &rpool->tblidx, &rpool->counter,
-                           &raddr, &rmask, af))
+                           &raddr, &rmask, rpool->af))
                                goto get_addr;
                } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
                        if (rpool->cur->addr.p.dyn != NULL &&
@@ -3148,7 +3266,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                            &rpool->tblidx, &rpool->counter,
                            &raddr, &rmask, af))
                                goto get_addr;
-               } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
+               } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
+                                        rpool->af))
                        goto get_addr;
 
        try_next:
@@ -3158,8 +3277,9 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                        rpool->tblidx = -1;
                        if (pfr_pool_get(rpool->cur->addr.p.tbl,
                            &rpool->tblidx, &rpool->counter,
-                           &raddr, &rmask, af)) {
-                               /* table contains no address of type 'af' */
+                           &raddr, &rmask, rpool->af)) {
+                               /* table contains no address of type
+                                * 'rpool->af' */
                                if (rpool->cur != acur)
                                        goto try_next;
                                return (1);
@@ -3170,8 +3290,9 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                                return (1);
                        if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
                            &rpool->tblidx, &rpool->counter,
-                           &raddr, &rmask, af)) {
-                               /* table contains no address of type 'af' */
+                           &raddr, &rmask, rpool->af)) {
+                               /* table contains no address of type
+                                * 'rpool->af' */
                                if (rpool->cur != acur)
                                        goto try_next;
                                return (1);
@@ -3179,23 +3300,23 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
                } else {
                        raddr = &rpool->cur->addr.v.a.addr;
                        rmask = &rpool->cur->addr.v.a.mask;
-                       PF_ACPY(&rpool->counter, raddr, af);
+                       PF_ACPY(&rpool->counter, raddr, rpool->af);
                }
 
        get_addr:
-               PF_ACPY(naddr, &rpool->counter, af);
-               if (init_addr != NULL && PF_AZERO(init_addr, af))
-                       PF_ACPY(init_addr, naddr, af);
-               PF_AINC(&rpool->counter, af);
+               PF_ACPY(naddr, &rpool->counter, rpool->af);
+               if (init_addr != NULL && PF_AZERO(init_addr, rpool->af))
+                       PF_ACPY(init_addr, naddr, rpool->af);
+               PF_AINC(&rpool->counter, rpool->af);
                break;
        }
        if (*sn != NULL)
-               PF_ACPY(&(*sn)->raddr, naddr, af);
+               PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
 
        if (pf_status.debug >= PF_DEBUG_MISC &&
            (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
                printf("pf_map_addr: selected address ");
-               pf_print_host(naddr, 0, af);
+               pf_print_host(naddr, 0, rpool->af);
                printf("\n");
        }
 
@@ -3248,14 +3369,15 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
                                        continue;
                                if (s->nat_rule.ptr != r)
                                        continue;
-                               if (sk->proto != IPPROTO_UDP || sk->af != af)
+                               if (sk->proto != IPPROTO_UDP ||
+                                   sk->af_lan != af)
                                        continue;
                                if (sk->lan.xport.port != sxport->port)
                                        continue;
                                if (PF_ANEQ(&sk->lan.addr, saddr, af))
                                        continue;
                                if (r->extmap < PF_EXTMAP_EI &&
-                                   PF_ANEQ(&sk->ext.addr, daddr, af))
+                                   PF_ANEQ(&sk->ext_lan.addr, daddr, af))
                                        continue;
 
                                nxport->port = sk->gwy.xport.port;
@@ -3275,7 +3397,7 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
                                continue;
                        if (s->nat_rule.ptr != r)
                                continue;
-                       if (sk->proto != IPPROTO_TCP || sk->af != af)
+                       if (sk->proto != IPPROTO_TCP || sk->af_lan != af)
                                 continue;
                        if (sk->lan.xport.port != sxport->port)
                                continue;
@@ -3286,10 +3408,10 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
                }
        }
        do {
-               key.af = af;
+               key.af_gwy = af;
                key.proto = proto;
-               PF_ACPY(&key.ext.addr, daddr, key.af);
-               PF_ACPY(&key.gwy.addr, naddr, key.af);
+               PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
+               PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
                switch (proto) {
                        case IPPROTO_UDP:
                                key.proto_variant = r->extfilter;
@@ -3299,9 +3421,10 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
                                break;
                }
                if (dxport)
-                       key.ext.xport = *dxport;
+                       key.ext_gwy.xport = *dxport;
                else
-                       memset(&key.ext.xport, 0, sizeof (key.ext.xport));
+                       memset(&key.ext_gwy.xport, 0,
+                               sizeof (key.ext_gwy.xport));
                /*
                 * port search; start random, step;
                 * similar 2 portloop in in_pcbbind
@@ -3312,7 +3435,7 @@ pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
                                key.gwy.xport = *dxport;
                        else
                                memset(&key.gwy.xport, 0,
-                                   sizeof (key.ext.xport));
+                                   sizeof (key.gwy.xport));
                        if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
                                return (0);
                } else if (low == 0 && high == 0) {
@@ -3462,19 +3585,37 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
        if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, NULL))
                return (NULL);
        if (rm != NULL && (rm->action == PF_NONAT ||
-           rm->action == PF_NORDR || rm->action == PF_NOBINAT))
+           rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
+           rm->action == PF_NONAT64))
                return (NULL);
        return (rm);
 }
 
+/* 
+ * Get address translation information for NAT/BINAT/RDR
+ * pd          : pf packet descriptor
+ * m           : mbuf holding the packet
+ * off         : offset to protocol header
+ * direction   : direction of packet
+ * kif         : pf interface info obtained from the packet's recv interface
+ * sn          : source node pointer (output)
+ * saddr       : packet source address
+ * sxport      : packet source port
+ * daddr       : packet destination address
+ * dxport      : packet destination port
+ * nsxport     : translated source port (output)
+ *
+ * Translated source & destination address are updated in pd->nsaddr &
+ * pd->ndaddr
+ */
 static struct pf_rule *
 pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
     int direction, struct pfi_kif *kif, struct pf_src_node **sn,
     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
-    union pf_state_xport *dxport, struct pf_addr *naddr,
-    union pf_state_xport *nxport)
+    union pf_state_xport *dxport, union pf_state_xport *nsxport)
 {
        struct pf_rule  *r = NULL;
+       pd->naf = pd->af;
 
        if (direction == PF_OUT) {
                r = pf_match_translation(pd, m, off, direction, kif, saddr,
@@ -3494,14 +3635,32 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
        }
 
        if (r != NULL) {
+               struct pf_addr *nsaddr = &pd->naddr;
+               struct pf_addr *ndaddr = &pd->ndaddr;
+
+               *nsaddr = *saddr;
+               *ndaddr = *daddr;
+
                switch (r->action) {
                case PF_NONAT:
+               case PF_NONAT64:
                case PF_NOBINAT:
                case PF_NORDR:
                        return (NULL);
                case PF_NAT:
+               case PF_NAT64:
+                       /*
+                        * we do NAT64 on incoming path and we call ip_input
+                        * which asserts receive interface to be not NULL.
+                        * The below check is to prevent NAT64 action on any
+                        * packet generated by local entity using synthesized
+                        * IPv6 address.
+                        */
+                       if ((r->action == PF_NAT64) && (direction == PF_OUT))
+                               return (NULL);
+
                        if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
-                           dxport, naddr, nxport, sn)) {
+                           dxport, nsaddr, nsxport, sn)) {
                                DPFPRINTF(PF_DEBUG_MISC,
                                    ("pf: NAT proxy port allocation "
                                    "(%u-%u) failed\n",
@@ -3509,6 +3668,14 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                    r->rpool.proxy_port[1]));
                                return (NULL);
                        }
+                       /*
+                        * For NAT64 the destination IPv4 address is derived
+                        * from the last 32 bits of synthesized IPv6 address
+                        */
+                       if (r->action == PF_NAT64) {
+                               ndaddr->v4.s_addr = daddr->addr32[3];
+                               pd->naf = AF_INET;
+                       }
                        break;
                case PF_BINAT:
                        switch (direction) {
@@ -3523,7 +3690,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                if (r->rpool.cur->addr.p.dyn->
                                                    pfid_acnt4 < 1)
                                                        return (NULL);
-                                               PF_POOLMASK(naddr,
+                                               PF_POOLMASK(nsaddr,
                                                    &r->rpool.cur->addr.p.dyn->
                                                    pfid_addr4,
                                                    &r->rpool.cur->addr.p.dyn->
@@ -3536,7 +3703,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                if (r->rpool.cur->addr.p.dyn->
                                                    pfid_acnt6 < 1)
                                                        return (NULL);
-                                               PF_POOLMASK(naddr,
+                                               PF_POOLMASK(nsaddr,
                                                    &r->rpool.cur->addr.p.dyn->
                                                    pfid_addr6,
                                                    &r->rpool.cur->addr.p.dyn->
@@ -3546,7 +3713,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
 #endif /* INET6 */
                                        }
                                } else {
-                                       PF_POOLMASK(naddr,
+                                       PF_POOLMASK(nsaddr,
                                            &r->rpool.cur->addr.v.a.addr,
                                            &r->rpool.cur->addr.v.a.mask,
                                            saddr, pd->af);
@@ -3562,7 +3729,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                if (r->src.addr.p.dyn->
                                                    pfid_acnt4 < 1)
                                                        return (NULL);
-                                               PF_POOLMASK(naddr,
+                                               PF_POOLMASK(ndaddr,
                                                    &r->src.addr.p.dyn->
                                                    pfid_addr4,
                                                    &r->src.addr.p.dyn->
@@ -3575,7 +3742,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                if (r->src.addr.p.dyn->
                                                    pfid_acnt6 < 1)
                                                        return (NULL);
-                                               PF_POOLMASK(naddr,
+                                               PF_POOLMASK(ndaddr,
                                                    &r->src.addr.p.dyn->
                                                    pfid_addr6,
                                                    &r->src.addr.p.dyn->
@@ -3585,7 +3752,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
 #endif /* INET6 */
                                        }
                                } else
-                                       PF_POOLMASK(naddr,
+                                       PF_POOLMASK(ndaddr,
                                            &r->src.addr.v.a.addr,
                                            &r->src.addr.v.a.mask, daddr,
                                            pd->af);
@@ -3604,7 +3771,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                if (r->dst.addr.p.dyn->
                                                    pfid_acnt4 < 1)
                                                        return (NULL);
-                                               PF_POOLMASK(naddr,
+                                               PF_POOLMASK(nsaddr,
                                                    &r->dst.addr.p.dyn->
                                                    pfid_addr4,
                                                    &r->dst.addr.p.dyn->
@@ -3617,7 +3784,7 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                if (r->dst.addr.p.dyn->
                                                    pfid_acnt6 < 1)
                                                        return (NULL);
-                                               PF_POOLMASK(naddr,
+                                               PF_POOLMASK(nsaddr,
                                                    &r->dst.addr.p.dyn->
                                                    pfid_addr6,
                                                    &r->dst.addr.p.dyn->
@@ -3627,26 +3794,26 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
 #endif /* INET6 */
                                        }
                                } else {
-                                       PF_POOLMASK(naddr,
+                                       PF_POOLMASK(nsaddr,
                                            &r->dst.addr.v.a.addr,
                                            &r->dst.addr.v.a.mask,
                                            daddr, pd->af);
                                }
-                               if (nxport && r->dst.xport.range.port[0])
-                                       nxport->port =
+                               if (nsxport && r->dst.xport.range.port[0])
+                                       nsxport->port =
                                            r->dst.xport.range.port[0];
                                break;
                        case PF_IN:
                                if (pf_map_addr(pd->af, r, saddr,
-                                   naddr, NULL, sn))
+                                   ndaddr, NULL, sn))
                                        return (NULL);
                                if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
                                    PF_POOL_BITMASK)
-                                       PF_POOLMASK(naddr, naddr,
+                                       PF_POOLMASK(ndaddr, ndaddr,
                                            &r->rpool.cur->addr.v.a.mask, daddr,
                                            pd->af);
 
-                               if (nxport && dxport) {
+                               if (nsxport && dxport) {
                                        if (r->rpool.proxy_port[1]) {
                                                u_int32_t       tmp_nport;
 
@@ -3661,10 +3828,10 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off,
                                                /* wrap around if necessary */
                                                if (tmp_nport > 65535)
                                                        tmp_nport -= 65535;
-                                               nxport->port =
+                                               nsxport->port =
                                                    htons((u_int16_t)tmp_nport);
                                        } else if (r->rpool.proxy_port[0]) {
-                                               nxport->port = htons(r->rpool.
+                                               nsxport->port = htons(r->rpool.
                                                    proxy_port[0]);
                                        }
                                }
@@ -3925,29 +4092,21 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
 }
 
 static void
-pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
+pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
 {
        struct pf_rule *r = s->rule.ptr;
 
        s->rt_kif = NULL;
+
        if (!r->rt || r->rt == PF_FASTROUTE)
                return;
-       switch (s->state_key->af) {
-#if INET
-       case AF_INET:
-               pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
-                   &s->nat_src_node);
-               s->rt_kif = r->rpool.cur->kif;
-               break;
-#endif /* INET */
-#if INET6
-       case AF_INET6:
-               pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
+       if ((af == AF_INET) || (af == AF_INET6)) {
+               pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
                    &s->nat_src_node);
                s->rt_kif = r->rpool.cur->kif;
-               break;
-#endif /* INET6 */
        }
+
+       return;
 }
 
 static void
@@ -4001,8 +4160,10 @@ pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
        if (psk != NULL) {
                bcopy(&psk->lan, &sk->lan, sizeof (sk->lan));
                bcopy(&psk->gwy, &sk->gwy, sizeof (sk->gwy));
-               bcopy(&psk->ext, &sk->ext, sizeof (sk->ext));
-               sk->af = psk->af;
+               bcopy(&psk->ext_lan, &sk->ext_lan, sizeof (sk->ext_lan));
+               bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof (sk->ext_gwy));
+               sk->af_lan = psk->af_lan;
+               sk->af_gwy = psk->af_gwy;
                sk->proto = psk->proto;
                sk->direction = psk->direction;
                sk->proto_variant = psk->proto_variant;
@@ -4044,6 +4205,383 @@ pf_tcp_iss(struct pf_pdesc *pd)
        return (digest[0] + random() + pf_tcp_iss_off);
 }
 
+/*
+ * This routine is called to perform address family translation on the
+ * inner IP header (that may come as payload) of an ICMP(v4/6) error
+ * response.
+ */
+static int
+pf_change_icmp_af(struct mbuf *m, int off,
+       struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
+       struct pf_addr *dst, sa_family_t af, sa_family_t naf)
+{
+       struct mbuf             *n = NULL;
+       struct ip               *ip4 = NULL;
+       struct ip6_hdr          *ip6 = NULL;
+       int                      hlen, olen, mlen;
+
+       if (af == naf || (af != AF_INET && af != AF_INET6) ||
+           (naf != AF_INET && naf != AF_INET6))
+               return (-1);
+
+       /* split the mbuf chain on the inner ip/ip6 header boundary */
+       if ((n = m_split(m, off, M_DONTWAIT)) == NULL)
+               return (-1);
+
+       /* old header */
+       olen = pd2->off - off;
+       /* new header */
+       hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
+
+       /* trim old header */
+       m_adj(n, olen);
+
+       /* prepend a new one */
+       if (M_PREPEND(n, hlen, M_DONTWAIT, 0) == NULL)
+               return (-1);
+
+       /* translate inner ip/ip6 header */
+       switch (naf) {
+       case AF_INET:
+               ip4 = mtod(n, struct ip *);
+               bzero(ip4, sizeof(*ip4));
+               ip4->ip_v   = IPVERSION;
+               ip4->ip_hl  = sizeof(*ip4) >> 2;
+               ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
+               ip4->ip_id  = htons(ip_randomid());
+               ip4->ip_off = htons(IP_DF);
+               ip4->ip_ttl = pd2->ttl;
+               if (pd2->proto == IPPROTO_ICMPV6)
+                       ip4->ip_p = IPPROTO_ICMP;
+               else
+                       ip4->ip_p = pd2->proto;
+               ip4->ip_src = src->v4;
+               ip4->ip_dst = dst->v4;
+               ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
+               break;
+       case AF_INET6:
+               ip6 = mtod(n, struct ip6_hdr *);
+               bzero(ip6, sizeof(*ip6));
+               ip6->ip6_vfc  = IPV6_VERSION;
+               ip6->ip6_plen = htons(pd2->tot_len - olen);
+               if (pd2->proto == IPPROTO_ICMP)
+                       ip6->ip6_nxt = IPPROTO_ICMPV6;
+               else
+                       ip6->ip6_nxt = pd2->proto;
+               if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
+                       ip6->ip6_hlim = IPV6_DEFHLIM;
+               else
+                       ip6->ip6_hlim = pd2->ttl;
+               ip6->ip6_src  = src->v6;
+               ip6->ip6_dst  = dst->v6;
+               break;
+       }
+
+       /* adjust payload offset and total packet length */
+       pd2->off += hlen - olen;
+       pd->tot_len += hlen - olen;
+
+       /* merge modified inner packet with the original header */
+       mlen = n->m_pkthdr.len;
+       m_cat(m, n);
+       m->m_pkthdr.len += mlen;
+
+       return (0);
+}
+
+#define PTR_IP(field)  ((int32_t)offsetof(struct ip, field))
+#define PTR_IP6(field) ((int32_t)offsetof(struct ip6_hdr, field))
+
+static int
+pf_translate_icmp_af(int af, void *arg)
+{
+       struct icmp             *icmp4;
+       struct icmp6_hdr        *icmp6;
+       u_int32_t                mtu;
+       int32_t                  ptr = -1;
+       u_int8_t                 type;
+       u_int8_t                 code;
+
+       switch (af) {
+       case AF_INET:
+               icmp6 = arg;
+               type  = icmp6->icmp6_type;
+               code  = icmp6->icmp6_code;
+               mtu   = ntohl(icmp6->icmp6_mtu);
+
+               switch (type) {
+               case ICMP6_ECHO_REQUEST:
+                       type = ICMP_ECHO;
+                       break;
+               case ICMP6_ECHO_REPLY:
+                       type = ICMP_ECHOREPLY;
+                       break;
+               case ICMP6_DST_UNREACH:
+                       type = ICMP_UNREACH;
+                       switch (code) {
+                       case ICMP6_DST_UNREACH_NOROUTE:
+                       case ICMP6_DST_UNREACH_BEYONDSCOPE:
+                       case ICMP6_DST_UNREACH_ADDR:
+                               code = ICMP_UNREACH_HOST;
+                               break;
+                       case ICMP6_DST_UNREACH_ADMIN:
+                               code = ICMP_UNREACH_HOST_PROHIB;
+                               break;
+                       case ICMP6_DST_UNREACH_NOPORT:
+                               code = ICMP_UNREACH_PORT;
+                               break;
+                       default:
+                               return (-1);
+                       }
+                       break;
+               case ICMP6_PACKET_TOO_BIG:
+                       type = ICMP_UNREACH;
+                       code = ICMP_UNREACH_NEEDFRAG;
+                       mtu -= 20;
+                       break;
+               case ICMP6_TIME_EXCEEDED:
+                       type = ICMP_TIMXCEED;
+                       break;
+               case ICMP6_PARAM_PROB:
+                       switch (code) {
+                       case ICMP6_PARAMPROB_HEADER:
+                               type = ICMP_PARAMPROB;
+                               code = ICMP_PARAMPROB_ERRATPTR;
+                               ptr  = ntohl(icmp6->icmp6_pptr);
+
+                               if (ptr == PTR_IP6(ip6_vfc))
+                                       ; /* preserve */
+                               else if (ptr == PTR_IP6(ip6_vfc) + 1)
+                                       ptr = PTR_IP(ip_tos);
+                               else if (ptr == PTR_IP6(ip6_plen) ||
+                                   ptr == PTR_IP6(ip6_plen) + 1)
+                                       ptr = PTR_IP(ip_len);
+                               else if (ptr == PTR_IP6(ip6_nxt))
+                                       ptr = PTR_IP(ip_p);
+                               else if (ptr == PTR_IP6(ip6_hlim))
+                                       ptr = PTR_IP(ip_ttl);
+                               else if (ptr >= PTR_IP6(ip6_src) &&
+                                   ptr < PTR_IP6(ip6_dst))
+                                       ptr = PTR_IP(ip_src);
+                               else if (ptr >= PTR_IP6(ip6_dst) &&
+                                   ptr < (int32_t)sizeof(struct ip6_hdr))
+                                       ptr = PTR_IP(ip_dst);
+                               else {
+                                       return (-1);
+                               }
+                               break;
+                       case ICMP6_PARAMPROB_NEXTHEADER:
+                               type = ICMP_UNREACH;
+                               code = ICMP_UNREACH_PROTOCOL;
+                               break;
+                       default:
+                               return (-1);
+                       }
+                       break;
+               default:
+                       return (-1);
+               }
+               icmp6->icmp6_type = type;
+               icmp6->icmp6_code = code;
+               /* aligns well with a icmpv4 nextmtu */
+               icmp6->icmp6_mtu = htonl(mtu);
+               /* icmpv4 pptr is a one most significant byte */
+               if (ptr >= 0)
+                       icmp6->icmp6_pptr = htonl(ptr << 24);
+               break;
+
+       case AF_INET6:
+               icmp4 = arg;
+               type  = icmp4->icmp_type;
+               code  = icmp4->icmp_code;
+               mtu   = ntohs(icmp4->icmp_nextmtu);
+
+               switch (type) {
+               case ICMP_ECHO:
+                       type = ICMP6_ECHO_REQUEST;
+                       break;
+               case ICMP_ECHOREPLY:
+                       type = ICMP6_ECHO_REPLY;
+                       break;
+               case ICMP_UNREACH:
+                       type = ICMP6_DST_UNREACH;
+                       switch (code) {
+                       case ICMP_UNREACH_NET:
+                       case ICMP_UNREACH_HOST:
+                       case ICMP_UNREACH_NET_UNKNOWN:
+                       case ICMP_UNREACH_HOST_UNKNOWN:
+                       case ICMP_UNREACH_ISOLATED:
+                       case ICMP_UNREACH_TOSNET:
+                       case ICMP_UNREACH_TOSHOST:
+                               code = ICMP6_DST_UNREACH_NOROUTE;
+                               break;
+                       case ICMP_UNREACH_PORT:
+                               code = ICMP6_DST_UNREACH_NOPORT;
+                               break;
+                       case ICMP_UNREACH_NET_PROHIB:
+                       case ICMP_UNREACH_HOST_PROHIB:
+                       case ICMP_UNREACH_FILTER_PROHIB:
+                       case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+                               code = ICMP6_DST_UNREACH_ADMIN;
+                               break;
+                       case ICMP_UNREACH_PROTOCOL:
+                               type = ICMP6_PARAM_PROB;
+                               code = ICMP6_PARAMPROB_NEXTHEADER;
+                               ptr  = offsetof(struct ip6_hdr, ip6_nxt);
+                               break;
+                       case ICMP_UNREACH_NEEDFRAG:
+                               type = ICMP6_PACKET_TOO_BIG;
+                               code = 0;
+                               mtu += 20;
+                               break;
+                       default:
+                               return (-1);
+                       }
+                       break;
+               case ICMP_TIMXCEED:
+                       type = ICMP6_TIME_EXCEEDED;
+                       break;
+               case ICMP_PARAMPROB:
+                       type = ICMP6_PARAM_PROB;
+                       switch (code) {
+                       case ICMP_PARAMPROB_ERRATPTR:
+                               code = ICMP6_PARAMPROB_HEADER;
+                               break;
+                       case ICMP_PARAMPROB_LENGTH:
+                               code = ICMP6_PARAMPROB_HEADER;
+                               break;
+                       default:
+                               return (-1);
+                       }
+
+                       ptr = icmp4->icmp_pptr;
+                       if (ptr == 0 || ptr == PTR_IP(ip_tos))
+                               ; /* preserve */
+                       else if (ptr == PTR_IP(ip_len) ||
+                           ptr == PTR_IP(ip_len) + 1)
+                               ptr = PTR_IP6(ip6_plen);
+                       else if (ptr == PTR_IP(ip_ttl))
+                               ptr = PTR_IP6(ip6_hlim);
+                       else if (ptr == PTR_IP(ip_p))
+                               ptr = PTR_IP6(ip6_nxt);
+                       else if (ptr >= PTR_IP(ip_src) &&
+                           ptr < PTR_IP(ip_dst))
+                               ptr = PTR_IP6(ip6_src);
+                       else if (ptr >= PTR_IP(ip_dst) &&
+                           ptr < (int32_t)sizeof(struct ip))
+                               ptr = PTR_IP6(ip6_dst);
+                       else {
+                               return (-1);
+                       }
+                       break;
+               default:
+                       return (-1);
+               }
+               icmp4->icmp_type = type;
+               icmp4->icmp_code = code;
+               icmp4->icmp_nextmtu = htons(mtu);
+               if (ptr >= 0)
+                       icmp4->icmp_void = htonl(ptr);
+               break;
+       }
+
+       return (0);
+}
+
+static int
+pf_nat64_ipv6(struct mbuf *m, int off, struct pf_pdesc *pd)
+{
+       struct ip               *ip4;
+
+       /*
+        * ip_input asserts for rcvif to be not NULL
+        * That may not be true for two corner cases
+        * 1. If for some reason a local app sends DNS
+        * AAAA query to local host
+        * 2. If IPv6 stack in kernel internally generates a
+        * message destined for a synthesized IPv6 end-point.
+        */
+       if (m->m_pkthdr.rcvif == NULL)
+               return (PF_DROP);
+
+       /* trim the old header */
+       m_adj(m, off);
+
+       /* prepend the new one */
+       if (M_PREPEND(m, sizeof(*ip4), M_DONTWAIT, 0) == NULL)
+               return (PF_DROP);
+
+       ip4 = mtod(m, struct ip *);
+       ip4->ip_v   = 4;
+       ip4->ip_hl  = 5;
+       ip4->ip_tos = pd->tos & htonl(0x0ff00000);
+       ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
+        ip4->ip_id  = 0;
+        ip4->ip_off = htons(IP_DF);
+        ip4->ip_ttl = pd->ttl;
+        ip4->ip_p   = pd->proto;
+       ip4->ip_sum = 0;
+       ip4->ip_src = pd->naddr.v4;
+       ip4->ip_dst = pd->ndaddr.v4;
+       ip4->ip_sum = in_cksum(m, ip4->ip_hl << 2);
+
+       /* recalculate icmp checksums */
+       if (pd->proto == IPPROTO_ICMP) {
+               struct mbuf *mp;
+               struct icmp *icmp;
+               int moff, hlen = sizeof(*ip4);
+
+               if ((mp = m_pulldown(m, hlen, ICMP_MINLEN, &moff)) == NULL)
+                       return (PF_NAT64);
+
+               icmp = (struct icmp *)(void *)(mtod(mp, char *) + moff);
+               icmp->icmp_cksum = 0;
+               icmp->icmp_cksum = inet_cksum(m, 0, hlen,
+                                               ntohs(ip4->ip_len) - hlen);
+       }
+
+       ip_input(m);
+       return (PF_NAT64);
+}
+
+static int
+pf_nat64_ipv4(struct mbuf *m, int off, struct pf_pdesc *pd)
+{
+       struct ip6_hdr          *ip6;
+
+       if (m->m_pkthdr.rcvif == NULL)
+               return (PF_DROP);
+
+       m_adj(m, off);
+       if (M_PREPEND(m, sizeof(*ip6), M_DONTWAIT, 0) == NULL)
+               return (PF_DROP);
+
+       ip6 = mtod(m, struct ip6_hdr *);
+       ip6->ip6_vfc  = htonl((6 << 28) | (pd->tos << 20));
+       ip6->ip6_plen = htons(pd->tot_len - off);
+       ip6->ip6_nxt  = pd->proto;
+       ip6->ip6_hlim = pd->ttl;
+       ip6->ip6_src = pd->naddr.v6;
+       ip6->ip6_dst = pd->ndaddr.v6;
+
+       /* recalculate icmp6 checksums */
+       if (pd->proto == IPPROTO_ICMPV6) {
+               struct mbuf *mp;
+               struct icmp6_hdr *icmp6;
+               int moff, hlen = sizeof(*ip6);
+
+               if ((mp = m_pulldown(m, hlen, sizeof(*icmp6), &moff)) == NULL)
+                       return (PF_NAT64);
+
+               icmp6 = (struct icmp6_hdr *)(void *)(mtod(mp, char *) + moff);
+               icmp6->icmp6_cksum = 0;
+               icmp6->icmp6_cksum = inet6_cksum(m, IPPROTO_ICMPV6, hlen,
+                                               ntohs(ip6->ip6_plen));
+       }
+       ip6_input(m);
+       return (PF_NAT64);
+}
+
 static int
 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
@@ -4058,6 +4596,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        struct pf_ruleset       *ruleset = NULL;
        struct pf_src_node      *nsn = NULL;
        struct tcphdr           *th = pd->hdr.tcp;
+       struct udphdr           *uh = pd->hdr.udp;
        u_short                  reason;
        int                      rewrite = 0, hdrlen = 0;
        int                      tag = -1;
@@ -4069,7 +4608,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        u_int8_t                 icmptype = 0, icmpcode = 0;
 
        struct pf_grev1_hdr     *grev1 = pd->hdr.grev1;
-       union pf_state_xport bxport, nxport, sxport, dxport;
+       union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
        struct pf_state_key      psk;
 
        lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
@@ -4091,9 +4630,9 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                hdrlen = sizeof (*th);
                break;
        case IPPROTO_UDP:
-               sxport.port = pd->hdr.udp->uh_sport;
-               dxport.port = pd->hdr.udp->uh_dport;
-               hdrlen = sizeof (*pd->hdr.udp);
+               sxport.port = uh->uh_sport;
+               dxport.port = uh->uh_dport;
+               hdrlen = sizeof (*uh);
                break;
 #if INET
        case IPPROTO_ICMP:
@@ -4144,199 +4683,229 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 
        r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
-       if (direction == PF_OUT) {
-               bxport = nxport = sxport;
-               /* check outgoing packet for BINAT/NAT */
-               if ((nr = pf_get_translation_aux(pd, m, off, PF_OUT, kif, &nsn,
-                   saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) !=
+       bxport = sxport;
+       bdxport = dxport;
+
+       if (direction == PF_OUT)
+               nxport = sxport;
+       else
+               nxport = dxport;
+
+       /* check packet for BINAT/NAT/RDR */
+       if ((nr = pf_get_translation_aux(pd, m, off, direction, kif, &nsn,
+                   saddr, &sxport, daddr, &dxport, &nxport)) !=
                    NULL) {
-                       PF_ACPY(&pd->baddr, saddr, af);
-                       switch (pd->proto) {
-                       case IPPROTO_TCP:
+               int ua;
+               u_int16_t dport;
+
+               if (pd->af != pd->naf)
+                       ua = 0;
+               else
+                       ua = 1;
+
+               PF_ACPY(&pd->baddr, saddr, af);
+               PF_ACPY(&pd->bdaddr, daddr, af);
+
+               switch (pd->proto) {
+               case IPPROTO_TCP:
+                       if (pd->af != pd->naf ||
+                           PF_ANEQ(saddr, &pd->naddr, pd->af)) {
                                pf_change_ap(direction, pd->mp, saddr,
-                                   &th->th_sport, pd->ip_sum, &th->th_sum,
-                                   &pd->naddr, nxport.port, 0, af);
+                                       &th->th_sport, pd->ip_sum, &th->th_sum,
+                                       &pd->naddr, nxport.port, 0, af,
+                                       pd->naf, ua);
                                sxport.port = th->th_sport;
-                               rewrite++;
-                               break;
-                       case IPPROTO_UDP:
+                       }
+
+                       if (pd->af != pd->naf ||
+                           PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
+                           (nr && (nr->action == PF_RDR) &&
+                            (th->th_dport != nxport.port))) {
+                               if (nr && nr->action == PF_RDR)
+                                       dport = nxport.port;
+                               else
+                                       dport = th->th_dport;
+                               pf_change_ap(direction, pd->mp, daddr,
+                                       &th->th_dport, pd->ip_sum,
+                                       &th->th_sum, &pd->ndaddr,
+                                       dport, 0, af, pd->naf, ua);
+                               dxport.port = th->th_dport;
+                       }
+                       rewrite++;
+                       break;
+
+               case IPPROTO_UDP:
+                       if (pd->af != pd->naf ||
+                           PF_ANEQ(saddr, &pd->naddr, pd->af)) {
                                pf_change_ap(direction, pd->mp, saddr,
-                                   &pd->hdr.udp->uh_sport, pd->ip_sum,
-                                   &pd->hdr.udp->uh_sum, &pd->naddr,
-                                   nxport.port, 1, af);
-                               sxport.port = pd->hdr.udp->uh_sport;
-                               rewrite++;
-                               break;
-#if INET
-                       case IPPROTO_ICMP:
-                               if (pd->af == AF_INET) {
-                                       pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
-                                           pd->naddr.v4.s_addr, 0);
-                                       pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
-                                           pd->hdr.icmp->icmp_cksum, sxport.port,
-                                           nxport.port, 0);
-                                       pd->hdr.icmp->icmp_id = nxport.port;
-                                       ++rewrite;
-                               }
-                               break;
-#endif /* INET */
-#if INET6
-                       case IPPROTO_ICMPV6:
-                               if (pd->af == AF_INET6) {
-                                       pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
-                                           &pd->naddr, 0);
-                                       rewrite++;
-                               }
-                               break;
-#endif /* INET */
-                       case IPPROTO_GRE:
-                               switch (af) {
-#if INET
-                               case AF_INET:
-                                       pf_change_a(&saddr->v4.s_addr,
-                                           pd->ip_sum, pd->naddr.v4.s_addr, 0);
-                                       break;
-#endif /* INET */
-#if INET6
-                               case AF_INET6:
-                                       PF_ACPY(saddr, &pd->naddr, AF_INET6);
-                                       break;
-#endif /* INET6 */
-                               }
-                               ++rewrite;
-                               break;
-                       case IPPROTO_ESP:
-                               bxport.spi = 0;
-                               switch (af) {
+                                       &uh->uh_sport, pd->ip_sum,
+                                       &uh->uh_sum, &pd->naddr,
+                                       nxport.port, 1, af, pd->naf, ua);
+                               sxport.port = uh->uh_sport;
+                       }
+
+                       if (pd->af != pd->naf ||
+                           PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
+                           (nr && (nr->action == PF_RDR) &&
+                            (uh->uh_dport != nxport.port))) {
+                               if (nr && nr->action == PF_RDR)
+                                       dport = nxport.port;
+                               else
+                                       dport = uh->uh_dport;
+                               pf_change_ap(direction, pd->mp, daddr,
+                                       &uh->uh_dport, pd->ip_sum,
+                                       &uh->uh_sum, &pd->ndaddr,
+                                       dport, 0, af, pd->naf, ua);
+                               dxport.port = uh->uh_dport;
+                       }
+                       rewrite++;
+                       break;
 #if INET
-                               case AF_INET:
-                                       pf_change_a(&saddr->v4.s_addr,
-                                           pd->ip_sum, pd->naddr.v4.s_addr, 0);
-                                       break;
-#endif /* INET */
-#if INET6
-                               case AF_INET6:
-                                       PF_ACPY(saddr, &pd->naddr, AF_INET6);
-                                       break;
-#endif /* INET6 */
-                               }
+               case IPPROTO_ICMP:
+                       if (pd->af != AF_INET)
                                break;
-                       default:
-                               switch (af) {
-#if INET
-                               case AF_INET:
-                                       pf_change_a(&saddr->v4.s_addr,
-                                           pd->ip_sum, pd->naddr.v4.s_addr, 0);
-                                       break;
+                       /*
+                        * TODO:
+                        * pd->af != pd->naf not handled yet here and would be
+                        * needed for NAT46 needed to support XLAT.
+                        * Will cross the bridge when it comes.
+                        */
+                       if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
+                               pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+                                       pd->naddr.v4.s_addr, 0);
+                               pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+                                   pd->hdr.icmp->icmp_cksum, sxport.port,
+                                   nxport.port, 0);
+                               pd->hdr.icmp->icmp_id = nxport.port;
+                       }
+
+                       if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
+                               pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
+                                           pd->ndaddr.v4.s_addr, 0);
+                       }
+                       ++rewrite;
+                       break;
 #endif /* INET */
 #if INET6
-                               case AF_INET6:
-                                       PF_ACPY(saddr, &pd->naddr, af);
-                                       break;
-#endif /* INET */
-                               }
+               case IPPROTO_ICMPV6:
+                       if (pd->af != AF_INET6)
                                break;
+
+                       if (pd->af != pd->naf ||
+                           PF_ANEQ(saddr, &pd->naddr, pd->af)) {
+                               pf_change_addr(saddr,
+                                       &pd->hdr.icmp6->icmp6_cksum,
+                                       &pd->naddr, 0, pd->af, pd->naf);
                        }
 
-                       if (nr->natpass)
-                               r = NULL;
-                       pd->nat_rule = nr;
-               }
-       } else {
-               bxport.port = nxport.port = dxport.port;
-               /* check incoming packet for BINAT/RDR */
-               if ((nr = pf_get_translation_aux(pd, m, off, PF_IN, kif, &nsn,
-                   saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) !=
-                   NULL) {
-                       PF_ACPY(&pd->baddr, daddr, af);
-                       switch (pd->proto) {
-                       case IPPROTO_TCP:
-                               pf_change_ap(direction, pd->mp, daddr,
-                                   &th->th_dport, pd->ip_sum, &th->th_sum,
-                                   &pd->naddr, nxport.port, 0, af);
-                               dxport.port = th->th_dport;
-                               rewrite++;
-                               break;
-                       case IPPROTO_UDP:
-                               pf_change_ap(direction, pd->mp, daddr,
-                                   &pd->hdr.udp->uh_dport, pd->ip_sum,
-                                   &pd->hdr.udp->uh_sum, &pd->naddr,
-                                   nxport.port, 1, af);
-                               dxport.port = pd->hdr.udp->uh_dport;
-                               rewrite++;
-                               break;
+                       if (pd->af != pd->naf ||
+                           PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
+                               pf_change_addr(daddr,
+                                       &pd->hdr.icmp6->icmp6_cksum,
+                                       &pd->ndaddr, 0, pd->af, pd->naf);
+                       }
+
+                       if (pd->af != pd->naf) {
+                               if (pf_translate_icmp_af(AF_INET,
+                                                       pd->hdr.icmp6))
+                                       return (PF_DROP);
+                               pd->proto = IPPROTO_ICMP;
+                       }
+                       rewrite++;
+                       break;
+#endif /* INET */
+               case IPPROTO_GRE:
+                       if ((direction == PF_IN) &&
+                           (pd->proto_variant == PF_GRE_PPTP_VARIANT))
+                           grev1->call_id = nxport.call_id;
+
+                       switch (pd->af) {
 #if INET
-                       case IPPROTO_ICMP:
-                               if (pd->af == AF_INET) {
-                                       pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
-                                           pd->naddr.v4.s_addr, 0);
+                       case AF_INET:
+                               if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
+                                       pf_change_a(&saddr->v4.s_addr,
+                                               pd->ip_sum,
+                                               pd->naddr.v4.s_addr, 0);
+                               }
+                               if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
+                                       pf_change_a(&daddr->v4.s_addr,
+                                               pd->ip_sum,
+                                               pd->ndaddr.v4.s_addr, 0);
                                }
                                break;
 #endif /* INET */
 #if INET6
-                       case IPPROTO_ICMPV6:
-                               if (pd->af == AF_INET6) {
-                                       pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
-                                           &pd->naddr, 0);
-                                       rewrite++;
-                               }
+                       case AF_INET6:
+                               if (PF_ANEQ(saddr, &pd->naddr, pd->af))
+                                       PF_ACPY(saddr, &pd->naddr, AF_INET6);
+                               if (PF_ANEQ(daddr, &pd->ndaddr, pd->af))
+                                       PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
                                break;
 #endif /* INET6 */
-                       case IPPROTO_GRE:
-                               if (pd->proto_variant == PF_GRE_PPTP_VARIANT)
-                                       grev1->call_id = nxport.call_id;
+                       }
+                       ++rewrite;
+                       break;
+               case IPPROTO_ESP:
+                       if (direction == PF_OUT)
+                               bxport.spi = 0;
 
-                               switch (af) {
+                       switch (pd->af) {
 #if INET
-                               case AF_INET:
+                       case AF_INET:
+                               if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
+                                       pf_change_a(&saddr->v4.s_addr,
+                                       pd->ip_sum, pd->naddr.v4.s_addr, 0);
+                               }
+                               if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
                                        pf_change_a(&daddr->v4.s_addr,
-                                           pd->ip_sum, pd->naddr.v4.s_addr, 0);
-                                       break;
-#endif /* INET */
-#if INET6
-                               case AF_INET6:
-                                       PF_ACPY(daddr, &pd->naddr, AF_INET6);
-                                       break;
-#endif /* INET6 */
+                                               pd->ip_sum,
+                                               pd->ndaddr.v4.s_addr, 0);
                                }
-                               ++rewrite;
                                break;
-                       case IPPROTO_ESP:
-                               switch (af) {
-#if INET
-                               case AF_INET:
-                                       pf_change_a(&daddr->v4.s_addr,
-                                           pd->ip_sum, pd->naddr.v4.s_addr, 0);
-                                       break;
 #endif /* INET */
 #if INET6
-                               case AF_INET6:
-                                       PF_ACPY(daddr, &pd->naddr, AF_INET6);
-                                       break;
+                       case AF_INET6:
+                               if (PF_ANEQ(saddr, &pd->naddr, pd->af))
+                                       PF_ACPY(saddr, &pd->naddr, AF_INET6);
+                               if (PF_ANEQ(daddr, &pd->ndaddr, pd->af))
+                                       PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
+                               break;
 #endif /* INET6 */
+                       }
+                       break;
+               default:
+                       switch (pd->af) {
+#if INET
+                       case AF_INET:
+                               if ((pd->naf != AF_INET) ||
+                                   (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
+                                       pf_change_addr(saddr, pd->ip_sum,
+                                               &pd->naddr, 0, af, pd->naf);
+                               }
+
+                               if ((pd->naf != AF_INET) ||
+                                   (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
+                                       pf_change_addr(daddr, pd->ip_sum,
+                                               &pd->ndaddr, 0, af, pd->naf);
                                }
                                break;
-                       default:
-                               switch (af) {
-#if INET
-                               case AF_INET:
-                                       pf_change_a(&daddr->v4.s_addr,
-                                           pd->ip_sum, pd->naddr.v4.s_addr, 0);
-                                       break;
 #endif /* INET */
 #if INET6
-                               case AF_INET6:
-                                       PF_ACPY(daddr, &pd->naddr, af);
-                                       break;
-#endif /* INET */
-                               }
+                       case AF_INET6:
+                               if (PF_ANEQ(saddr, &pd->naddr, pd->af))
+                                       PF_ACPY(saddr, &pd->naddr, af);
+                               if (PF_ANEQ(daddr, &pd->ndaddr, pd->af))
+                                       PF_ACPY(daddr, &pd->ndaddr, af);
                                break;
+#endif /* INET */
                        }
-
-                       if (nr->natpass)
-                               r = NULL;
-                       pd->nat_rule = nr;
+                       break;
                }
+
+               if (nr->natpass)
+                       r = NULL;
+               pd->nat_rule = nr;
+               pd->af = pd->naf;
        }
 
        if (nr && nr->tag > 0)
@@ -4348,11 +4917,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        r = r->skip[PF_SKIP_IFP].ptr;
                else if (r->direction && r->direction != direction)
                        r = r->skip[PF_SKIP_DIR].ptr;
-               else if (r->af && r->af != af)
+               else if (r->af && r->af != pd->af)
                        r = r->skip[PF_SKIP_AF].ptr;
                else if (r->proto && r->proto != pd->proto)
                        r = r->skip[PF_SKIP_PROTO].ptr;
-               else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+               else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
                    r->src.neg, kif))
                        r = r->skip[PF_SKIP_SRC_ADDR].ptr;
                /* tcp/udp only. port_op always 0 in other cases */
@@ -4363,7 +4932,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                    r->src.xport.range.port[0], r->src.xport.range.port[1],
                    th->th_sport))
                        r = r->skip[PF_SKIP_SRC_PORT].ptr;
-               else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+               else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
                    r->dst.neg, NULL))
                        r = r->skip[PF_SKIP_DST_ADDR].ptr;
                /* tcp/udp only. port_op always 0 in other cases */
@@ -4455,8 +5024,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 
                        m_copyback(m, off, hdrlen, pd->hdr.any);
                }
-               PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
-                   a, ruleset, pd);
+               PFLOG_PACKET(kif, h, m, pd->af, direction, reason,
+                               r->log ? r : nr, a, ruleset, pd);
        }
 
        if ((r->action == PF_DROP) &&
@@ -4464,14 +5033,16 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
            (r->rule_flag & PFRULE_RETURNICMP) ||
            (r->rule_flag & PFRULE_RETURN))) {
                /* undo NAT changes, if they have taken place */
-               if (nr != NULL) {
+               /* XXX For NAT64 we are not reverting the changes */
+               if (nr != NULL && nr->action != PF_NAT64) {
                        if (direction == PF_OUT) {
+                               pd->af = af;
                                switch (pd->proto) {
                                case IPPROTO_TCP:
                                        pf_change_ap(direction, pd->mp, saddr,
                                            &th->th_sport, pd->ip_sum,
                                            &th->th_sum, &pd->baddr,
-                                           bxport.port, 0, af);
+                                           bxport.port, 0, af, pd->af, 1);
                                        sxport.port = th->th_sport;
                                        rewrite++;
                                        break;
@@ -4479,7 +5050,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                        pf_change_ap(direction, pd->mp, saddr,
                                            &pd->hdr.udp->uh_sport, pd->ip_sum,
                                            &pd->hdr.udp->uh_sum, &pd->baddr,
-                                           bxport.port, 1, af);
+                                           bxport.port, 1, af, pd->af, 1);
                                        sxport.port = pd->hdr.udp->uh_sport;
                                        rewrite++;
                                        break;
@@ -4543,16 +5114,16 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                case IPPROTO_TCP:
                                        pf_change_ap(direction, pd->mp, daddr,
                                            &th->th_dport, pd->ip_sum,
-                                           &th->th_sum, &pd->baddr,
-                                           bxport.port, 0, af);
+                                           &th->th_sum, &pd->bdaddr,
+                                           bdxport.port, 0, af, pd->af, 1);
                                        dxport.port = th->th_dport;
                                        rewrite++;
                                        break;
                                case IPPROTO_UDP:
                                        pf_change_ap(direction, pd->mp, daddr,
                                            &pd->hdr.udp->uh_dport, pd->ip_sum,
-                                           &pd->hdr.udp->uh_sum, &pd->baddr,
-                                           bxport.port, 1, af);
+                                           &pd->hdr.udp->uh_sum, &pd->bdaddr,
+                                           bdxport.port, 1, af, pd->af, 1);
                                        dxport.port = pd->hdr.udp->uh_dport;
                                        rewrite++;
                                        break;
@@ -4565,19 +5136,20 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                case IPPROTO_GRE:
                                        if (pd->proto_variant ==
                                            PF_GRE_PPTP_VARIANT)
-                                               grev1->call_id = bxport.call_id;
+                                               grev1->call_id =
+                                                       bdxport.call_id;
                                        ++rewrite;
                                        switch (af) {
 #if INET
                                        case AF_INET:
                                                pf_change_a(&daddr->v4.s_addr,
                                                    pd->ip_sum,
-                                                   pd->baddr.v4.s_addr, 0);
+                                                   pd->bdaddr.v4.s_addr, 0);
                                                break;
 #endif /* INET */
 #if INET6
                                        case AF_INET6:
-                                               PF_ACPY(daddr, &pd->baddr,
+                                               PF_ACPY(daddr, &pd->bdaddr,
                                                    AF_INET6);
                                                break;
 #endif /* INET6 */
@@ -4589,12 +5161,12 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                        case AF_INET:
                                                pf_change_a(&daddr->v4.s_addr,
                                                    pd->ip_sum,
-                                                   pd->baddr.v4.s_addr, 0);
+                                                   pd->bdaddr.v4.s_addr, 0);
                                                break;
 #endif /* INET */
 #if INET6
                                        case AF_INET6:
-                                               PF_ACPY(daddr, &pd->baddr,
+                                               PF_ACPY(daddr, &pd->bdaddr,
                                                    AF_INET6);
                                                break;
 #endif /* INET6 */
@@ -4605,11 +5177,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                                        case AF_INET:
                                                pf_change_a(&daddr->v4.s_addr,
                                                    pd->ip_sum,
-                                                   pd->baddr.v4.s_addr, 0);
+                                                   pd->bdaddr.v4.s_addr, 0);
                                                break;
 #if INET6
                                        case AF_INET6:
-                                               PF_ACPY(daddr, &pd->baddr, af);
+                                               PF_ACPY(daddr, &pd->bdaddr, af);
                                                break;
 #endif /* INET6 */
                                        }
@@ -4627,7 +5199,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                        struct ip6_hdr  *h6;
 #endif /* INET6 */
 
-                       switch (af) {
+                       switch (pd->af) {
                        case AF_INET:
                                h4 = mtod(m, struct ip *);
                                len = ntohs(h4->ip_len) - off;
@@ -4641,28 +5213,29 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
 #endif /* INET6 */
                        }
 
-                       if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
+                       if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP,
+                                                pd->af))
                                REASON_SET(&reason, PFRES_PROTCKSUM);
                        else {
                                if (th->th_flags & TH_SYN)
                                        ack++;
                                if (th->th_flags & TH_FIN)
                                        ack++;
-                               pf_send_tcp(r, af, pd->dst,
+                               pf_send_tcp(r, pd->af, pd->dst,
                                    pd->src, th->th_dport, th->th_sport,
                                    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
                                    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
                        }
-               } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
+               } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
                    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
                    r->return_icmp)
                        pf_send_icmp(m, r->return_icmp >> 8,
-                           r->return_icmp & 255, af, r);
+                           r->return_icmp & 255, pd->af, r);
                else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
                    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
                    r->return_icmp6)
                        pf_send_icmp(m, r->return_icmp6 >> 8,
-                           r->return_icmp6 & 255, af, r);
+                           r->return_icmp6 & 255, pd->af, r);
        }
 
        if (r->action == PF_DROP)
@@ -4672,7 +5245,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
        bzero(&psk, sizeof (psk));
        psk.proto = pd->proto;
        psk.direction = direction;
-       psk.af = af;
        if (pd->proto == IPPROTO_UDP) {
                if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
                    ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
@@ -4686,62 +5258,120 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                psk.proto_variant = pd->proto_variant;
        }
        if (direction == PF_OUT) {
+               psk.af_gwy = af;
                PF_ACPY(&psk.gwy.addr, saddr, af);
-               PF_ACPY(&psk.ext.addr, daddr, af);
+               PF_ACPY(&psk.ext_gwy.addr, daddr, af);
                switch (pd->proto) {
-               case IPPROTO_UDP:
-                       psk.gwy.xport = sxport;
-                       psk.ext.xport = dxport;
-                       break;
                case IPPROTO_ESP:
                        psk.gwy.xport.spi = 0;
-                       psk.ext.xport.spi = pd->hdr.esp->spi;
+                       psk.ext_gwy.xport.spi = pd->hdr.esp->spi;
                        break;
                case IPPROTO_ICMP:
 #if INET6
                case IPPROTO_ICMPV6:
 #endif
+                       /*
+                        * NAT64 requires protocol translation  between ICMPv4
+                        * and ICMPv6. TCP and UDP do not require protocol
+                        * translation. To avoid adding complexity just to
+                        * handle ICMP(v4/v6), we always lookup  for
+                        * proto = IPPROTO_ICMP on both LAN and WAN side
+                        */
+                       psk.proto = IPPROTO_ICMP;
                        psk.gwy.xport.port = nxport.port;
-                       psk.ext.xport.spi = 0;
+                       psk.ext_gwy.xport.spi = 0;
                        break;
                default:
                        psk.gwy.xport = sxport;
-                       psk.ext.xport = dxport;
+                       psk.ext_gwy.xport = dxport;
                        break;
                }
+               psk.af_lan = af;
                if (nr != NULL) {
                        PF_ACPY(&psk.lan.addr, &pd->baddr, af);
                        psk.lan.xport = bxport;
+                       PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
+                       psk.ext_lan.xport = bdxport;
                } else {
                        PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
                        psk.lan.xport = psk.gwy.xport;
+                       PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
+                       psk.ext_lan.xport = psk.ext_gwy.xport;
                }
        } else {
-               PF_ACPY(&psk.lan.addr, daddr, af);
-               PF_ACPY(&psk.ext.addr, saddr, af);
+               psk.af_lan = af;
+               if (nr && nr->action == PF_NAT64) {
+                       PF_ACPY(&psk.lan.addr, &pd->baddr, af);
+                       PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
+               } else {
+                       PF_ACPY(&psk.lan.addr, daddr, af);
+                       PF_ACPY(&psk.ext_lan.addr, saddr, af);
+               }
                switch (pd->proto) {
                case IPPROTO_ICMP:
 #if INET6
                case IPPROTO_ICMPV6:
 #endif
-                       psk.lan.xport = nxport;
-                       psk.ext.xport.spi = 0;
+                       /*
+                        * NAT64 requires protocol translation  between ICMPv4
+                        * and ICMPv6. TCP and UDP do not require protocol
+                        * translation. To avoid adding complexity just to
+                        * handle ICMP(v4/v6), we always lookup  for
+                        * proto = IPPROTO_ICMP on both LAN and WAN side
+                        */
+                       psk.proto = IPPROTO_ICMP;
+                       if (nr && nr->action == PF_NAT64) {
+                               psk.lan.xport = bxport;
+                               psk.ext_lan.xport = bxport;
+                       } else {
+                               psk.lan.xport = nxport;
+                               psk.ext_lan.xport.spi = 0;
+                       }
                        break;
                case IPPROTO_ESP:
-                       psk.ext.xport.spi = 0;
+                       psk.ext_lan.xport.spi = 0;
                        psk.lan.xport.spi = pd->hdr.esp->spi;
                        break;
                default:
-                       psk.lan.xport = dxport;
-                       psk.ext.xport = sxport;
+                       if (nr != NULL) {
+                               if (nr->action == PF_NAT64) {
+                                       psk.lan.xport = bxport;
+                                       psk.ext_lan.xport = bdxport;
+                               } else {
+                                       psk.lan.xport = dxport;
+                                       psk.ext_lan.xport = sxport;
+                               }
+                       } else {
+                               psk.lan.xport = dxport;
+                               psk.ext_lan.xport = sxport;
+                       }
                        break;
                }
+               psk.af_gwy = pd->naf;
                if (nr != NULL) {
-                       PF_ACPY(&psk.gwy.addr, &pd->baddr, af);
-                       psk.gwy.xport = bxport;
+                       if (nr->action == PF_NAT64) {
+                               PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
+                               PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
+                                       pd->naf);
+                               if ((pd->proto == IPPROTO_ICMPV6) ||
+                                   (pd->proto == IPPROTO_ICMP)) {
+                                       psk.gwy.xport = nxport;
+                                       psk.ext_gwy.xport = nxport;
+                               } else {
+                                       psk.gwy.xport = sxport;
+                                       psk.ext_gwy.xport = dxport;
+                               }
+                       } else {
+                               PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
+                               psk.gwy.xport = bdxport;
+                               PF_ACPY(&psk.ext_gwy.addr, saddr, af);
+                               psk.ext_gwy.xport = sxport;
+                       }
                } else {
                        PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
                        psk.gwy.xport = psk.lan.xport;
+                       PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
+                       psk.ext_gwy.xport = psk.ext_lan.xport;
                }
        }
        if (pd->pktflags & PKTF_FLOW_ID) {
@@ -4772,7 +5402,6 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                struct pf_ike_hdr ike;
 
                if (pd->proto == IPPROTO_UDP) {
-                       struct udphdr *uh = pd->hdr.udp;
                        size_t plen = m->m_pkthdr.len - off - sizeof (*uh);
 
                        if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
@@ -4797,10 +5426,10 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
                         * partial state is allowed for each external address.
                         */
                        memset(&sk0, 0, sizeof (sk0));
-                       sk0.af = pd->af;
+                       sk0.af_gwy = pd->af;
                        sk0.proto = IPPROTO_ESP;
-                       PF_ACPY(&sk0.gwy.addr, saddr, sk0.af);
-                       PF_ACPY(&sk0.ext.addr, daddr, sk0.af);
+                       PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
+                       PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
                        s0 = pf_find_state(kif, &sk0, PF_IN);
 
                        if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
@@ -4975,7 +5604,7 @@ cleanup:
                        goto cleanup;
                }
 
-               pf_set_rt_ifp(s, saddr);        /* needs s->state_key set */
+               pf_set_rt_ifp(s, saddr, af);    /* needs s->state_key set */
 
                m = pd->mp;
 
@@ -4983,7 +5612,7 @@ cleanup:
                        switch (pd->proto) {
                        case IPPROTO_TCP: {
                                u_int16_t dport = (direction == PF_OUT) ?
-                                   sk->ext.xport.port : sk->gwy.xport.port;
+                                   sk->ext_gwy.xport.port : sk->gwy.xport.port;
 
                                if (nr != NULL &&
                                    ntohs(dport) == PF_PPTP_PORT) {
@@ -5010,8 +5639,6 @@ cleanup:
                        }
 
                        case IPPROTO_UDP: {
-                               struct udphdr *uh = pd->hdr.udp;
-
                                if (nr != NULL &&
                                    ntohs(uh->uh_sport) == PF_IKE_PORT &&
                                    ntohs(uh->uh_dport) == PF_IKE_PORT) {
@@ -5056,19 +5683,20 @@ cleanup:
                if (pd->proto == IPPROTO_TCP &&
                    (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
                    r->keep_state == PF_STATE_SYNPROXY) {
+                       int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
                        s->src.state = PF_TCPS_PROXY_SRC;
                        if (nr != NULL) {
                                if (direction == PF_OUT) {
                                        pf_change_ap(direction, pd->mp, saddr,
                                            &th->th_sport, pd->ip_sum,
                                            &th->th_sum, &pd->baddr,
-                                           bxport.port, 0, af);
+                                           bxport.port, 0, af, pd->af, ua);
                                        sxport.port = th->th_sport;
                                } else {
                                        pf_change_ap(direction, pd->mp, daddr,
                                            &th->th_dport, pd->ip_sum,
                                            &th->th_sum, &pd->baddr,
-                                           bxport.port, 0, af);
+                                           bxport.port, 0, af, pd->af, ua);
                                        sxport.port = th->th_dport;
                                }
                        }
@@ -5124,6 +5752,11 @@ cleanup:
                }
 
                m_copyback(m, off, hdrlen, pd->hdr.any);
+               if (af == AF_INET6 && pd->naf == AF_INET)
+                       return pf_nat64_ipv6(m, off, pd);
+               else if (af == AF_INET  && pd->naf == AF_INET6)
+                       return pf_nat64_ipv4(m, off, pd);
+
        }
 
        return (PF_PASS);
@@ -5375,7 +6008,7 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
                        NTOHS(iphdr->ip_len);
                        NTOHS(iphdr->ip_off);
                }
-               /* 
+               /*
                 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF 
                 * allows for recursive behavior
                 */
@@ -5571,14 +6204,17 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
 
                memcpy(&gsk->lan, &sk->lan, sizeof (gsk->lan));
                memcpy(&gsk->gwy, &sk->gwy, sizeof (gsk->gwy));
-               memcpy(&gsk->ext, &sk->ext, sizeof (gsk->ext));
-               gsk->af = sk->af;
+               memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof (gsk->ext_lan));
+               memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof (gsk->ext_gwy));
+               gsk->af_lan = sk->af_lan;
+               gsk->af_gwy = sk->af_gwy;
                gsk->proto = IPPROTO_GRE;
                gsk->proto_variant = PF_GRE_PPTP_VARIANT;
                gsk->app_state = gas;
                gsk->lan.xport.call_id = 0;
                gsk->gwy.xport.call_id = 0;
-               gsk->ext.xport.call_id = 0;
+               gsk->ext_lan.xport.call_id = 0;
+               gsk->ext_gwy.xport.call_id = 0;
                gsk->flowsrc = FLOWSRC_PF;
                gsk->flowhash = pf_calc_state_key_flowhash(gsk);
                memset(gas, 0, sizeof (*gas));
@@ -5593,7 +6229,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
 
        switch (sk->direction) {
        case PF_IN:
-               pns_call_id = &gsk->ext.xport.call_id;
+               pns_call_id = &gsk->ext_lan.xport.call_id;
                pns_state = &gs->dst.state;
                pac_call_id = &gsk->lan.xport.call_id;
                pac_state = &gs->src.state;
@@ -5602,7 +6238,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
        case PF_OUT:
                pns_call_id = &gsk->lan.xport.call_id;
                pns_state = &gs->src.state;
-               pac_call_id = &gsk->ext.xport.call_id;
+               pac_call_id = &gsk->ext_lan.xport.call_id;
                pac_state = &gs->dst.state;
                break;
 
@@ -5697,13 +6333,13 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                        int n = 0;
                        struct pf_state_key_cmp key;
 
-                       key.af = gsk->af;
+                       key.af_gwy = gsk->af_gwy;
                        key.proto = IPPROTO_GRE;
                        key.proto_variant = PF_GRE_PPTP_VARIANT;
-                       PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af);
-                       PF_ACPY(&key.ext.addr, &gsk->ext.addr, key.af);
+                       PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
+                       PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
                        key.gwy.xport.call_id = gsk->gwy.xport.call_id;
-                       key.ext.xport.call_id = gsk->ext.xport.call_id;
+                       key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
                        do {
                                call_id = htonl(random());
                        } while (!call_id);
@@ -5758,7 +6394,8 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
                gsk->lan.xport.call_id = 0;
                gsk->gwy.xport.call_id = 0;
-               gsk->ext.xport.call_id = 0;
+               gsk->ext_lan.xport.call_id = 0;
+               gsk->ext_gwy.xport.call_id = 0;
                gs->id = gs->creatorid = 0;
                break;
 
@@ -5774,7 +6411,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off,
                        ++gs->nat_src_node->states;
                        VERIFY(gs->nat_src_node->states != 0);
                }
-               pf_set_rt_ifp(gs, &sk->lan.addr);
+               pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
                if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
 
                        /*
@@ -5840,6 +6477,33 @@ pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
        return ((d > 0) ? 1 : ((d < 0) ? -1 : 0));
 }
 
+static int
+pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, struct mbuf *m,
+           int off)
+{
+       if (pd->af == AF_INET) {
+               if (pd->af != sk->af_lan) {
+                       pd->ndaddr = sk->lan.addr;
+                       pd->naddr = sk->ext_lan.addr;
+               } else {
+                       pd->naddr = sk->gwy.addr;
+                       pd->ndaddr = sk->ext_gwy.addr;
+               }
+               return (pf_nat64_ipv4(m, off, pd));
+       }
+       else if (pd->af == AF_INET6) {
+               if (pd->af != sk->af_lan) {
+                       pd->ndaddr = sk->lan.addr;
+                       pd->naddr = sk->ext_lan.addr;
+                       } else {
+                               pd->naddr = sk->gwy.addr;
+                               pd->ndaddr = sk->ext_gwy.addr;
+                       }
+                       return (pf_nat64_ipv6(m, off, pd));
+       }
+       return (PF_DROP);
+}
+
 static int
 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
@@ -5854,25 +6518,44 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
        int                      ackskew;
        int                      copyback = 0;
        struct pf_state_peer    *src, *dst;
+       struct pf_state_key     *sk;
 
        key.app_state = 0;
-       key.af = pd->af;
        key.proto = IPPROTO_TCP;
-       if (direction == PF_IN) {
-               PF_ACPY(&key.ext.addr, pd->src, key.af);
-               PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-               key.ext.xport.port = th->th_sport;
-               key.gwy.xport.port = th->th_dport;
-       } else {
-               PF_ACPY(&key.lan.addr, pd->src, key.af);
-               PF_ACPY(&key.ext.addr, pd->dst, key.af);
-               key.lan.xport.port = th->th_sport;
-               key.ext.xport.port = th->th_dport;
-       }
+       key.af_lan = key.af_gwy = pd->af;
+
+       /*
+        * For NAT64 the first time rule search and state creation
+        * is done on the incoming side only.
+        * Once the state gets created, NAT64's LAN side (ipv6) will
+        * not be able to find the state in ext-gwy tree as that normally
+        * is intended to be looked up for incoming traffic from the
+        * WAN side.
+        * Therefore to handle NAT64 case we init keys here for both
+        * lan-ext as well as ext-gwy trees.
+        * In the state lookup we attempt a lookup on both trees if
+        * first one does not return any result and return a match if
+        * the match state's was created by NAT64 rule.
+        */
+       PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
+       PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
+       key.ext_gwy.xport.port = th->th_sport;
+       key.gwy.xport.port = th->th_dport;
+
+       PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
+       PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
+       key.lan.xport.port = th->th_sport;
+       key.ext_lan.xport.port = th->th_dport;
 
        STATE_LOOKUP();
 
-       if (direction == (*state)->state_key->direction) {
+       sk = (*state)->state_key;
+       /*
+        * In case of NAT64 the translation is first applied on the LAN
+        * side. Therefore for stack's address family comparison
+        * we use sk->af_lan.
+        */
+       if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
                src = &(*state)->src;
                dst = &(*state)->dst;
        } else {
@@ -5880,26 +6563,26 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                dst = &(*state)->src;
        }
 
-       if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
-               if (direction != (*state)->state_key->direction) {
+       if (src->state == PF_TCPS_PROXY_SRC) {
+               if (direction != sk->direction) {
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_SYNPROXY_DROP);
                }
                if (th->th_flags & TH_SYN) {
-                       if (ntohl(th->th_seq) != (*state)->src.seqlo) {
+                       if (ntohl(th->th_seq) != src->seqlo) {
                                REASON_SET(reason, PFRES_SYNPROXY);
                                return (PF_DROP);
                        }
                        pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
                            pd->src, th->th_dport, th->th_sport,
-                           (*state)->src.seqhi, ntohl(th->th_seq) + 1,
-                           TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
+                           src->seqhi, ntohl(th->th_seq) + 1,
+                           TH_SYN|TH_ACK, 0, src->mss, 0, 1,
                            0, NULL, NULL);
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_SYNPROXY_DROP);
                } else if (!(th->th_flags & TH_ACK) ||
-                   (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
-                   (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+                   (ntohl(th->th_ack) != src->seqhi + 1) ||
+                   (ntohl(th->th_seq) != src->seqlo + 1)) {
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_DROP);
                } else if ((*state)->src_node != NULL &&
@@ -5907,62 +6590,62 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        REASON_SET(reason, PFRES_SRCLIMIT);
                        return (PF_DROP);
                } else
-                       (*state)->src.state = PF_TCPS_PROXY_DST;
+                       src->state = PF_TCPS_PROXY_DST;
        }
-       if ((*state)->src.state == PF_TCPS_PROXY_DST) {
+       if (src->state == PF_TCPS_PROXY_DST) {
                struct pf_state_host *psrc, *pdst;
 
                if (direction == PF_OUT) {
-                       psrc = &(*state)->state_key->gwy;
-                       pdst = &(*state)->state_key->ext;
+                       psrc = &sk->gwy;
+                       pdst = &sk->ext_gwy;
                } else {
-                       psrc = &(*state)->state_key->ext;
-                       pdst = &(*state)->state_key->lan;
+                       psrc = &sk->ext_lan;
+                       pdst = &sk->lan;
                }
-               if (direction == (*state)->state_key->direction) {
+               if (direction == sk->direction) {
                        if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
-                           (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
-                           (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+                           (ntohl(th->th_ack) != src->seqhi + 1) ||
+                           (ntohl(th->th_seq) != src->seqlo + 1)) {
                                REASON_SET(reason, PFRES_SYNPROXY);
                                return (PF_DROP);
                        }
-                       (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
-                       if ((*state)->dst.seqhi == 1)
-                               (*state)->dst.seqhi = htonl(random());
+                       src->max_win = MAX(ntohs(th->th_win), 1);
+                       if (dst->seqhi == 1)
+                               dst->seqhi = htonl(random());
                        pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
                            &pdst->addr, psrc->xport.port, pdst->xport.port,
-                           (*state)->dst.seqhi, 0, TH_SYN, 0,
-                           (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
+                           dst->seqhi, 0, TH_SYN, 0,
+                           src->mss, 0, 0, (*state)->tag, NULL, NULL);
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_SYNPROXY_DROP);
                } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
                    (TH_SYN|TH_ACK)) ||
-                   (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
+                   (ntohl(th->th_ack) != dst->seqhi + 1)) {
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_DROP);
                } else {
-                       (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
-                       (*state)->dst.seqlo = ntohl(th->th_seq);
+                       dst->max_win = MAX(ntohs(th->th_win), 1);
+                       dst->seqlo = ntohl(th->th_seq);
                        pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
                            pd->src, th->th_dport, th->th_sport,
                            ntohl(th->th_ack), ntohl(th->th_seq) + 1,
-                           TH_ACK, (*state)->src.max_win, 0, 0, 0,
+                           TH_ACK, src->max_win, 0, 0, 0,
                            (*state)->tag, NULL, NULL);
                        pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
                            &pdst->addr, psrc->xport.port, pdst->xport.port,
-                           (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
-                           TH_ACK, (*state)->dst.max_win, 0, 0, 1,
+                           src->seqhi + 1, src->seqlo + 1,
+                           TH_ACK, dst->max_win, 0, 0, 1,
                            0, NULL, NULL);
-                       (*state)->src.seqdiff = (*state)->dst.seqhi -
-                           (*state)->src.seqlo;
-                       (*state)->dst.seqdiff = (*state)->src.seqhi -
-                           (*state)->dst.seqlo;
-                       (*state)->src.seqhi = (*state)->src.seqlo +
-                           (*state)->dst.max_win;
-                       (*state)->dst.seqhi = (*state)->dst.seqlo +
-                           (*state)->src.max_win;
-                       (*state)->src.wscale = (*state)->dst.wscale = 0;
-                       (*state)->src.state = (*state)->dst.state =
+                       src->seqdiff = dst->seqhi -
+                           src->seqlo;
+                       dst->seqdiff = src->seqhi -
+                           dst->seqlo;
+                       src->seqhi = src->seqlo +
+                           dst->max_win;
+                       dst->seqhi = dst->seqlo +
+                           src->max_win;
+                       src->wscale = dst->wscale = 0;
+                       src->state = dst->state =
                            TCPS_ESTABLISHED;
                        REASON_SET(reason, PFRES_SYNPROXY);
                        return (PF_SYNPROXY_DROP);
@@ -5979,7 +6662,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        printf("\n");
                }
                /* XXX make sure it's the same direction ?? */
-               (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+               src->state = dst->state = TCPS_CLOSED;
                pf_unlink_state(*state);
                *state = NULL;
                return (PF_DROP);
@@ -6252,7 +6935,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            pd->p_len, ackskew, (*state)->packets[0],
                            (*state)->packets[1],
                            direction == PF_IN ? "in" : "out",
-                           direction == (*state)->state_key->direction ?
+                           direction == sk->direction ?
                            "fwd" : "rev");
                }
 
@@ -6287,8 +6970,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                /* Fall through to PASS packet */
 
        } else {
-               if ((*state)->dst.state == TCPS_SYN_SENT &&
-                   (*state)->src.state == TCPS_SYN_SENT) {
+               if (dst->state == TCPS_SYN_SENT &&
+                   src->state == TCPS_SYN_SENT) {
                        /* Send RST for state mismatches during handshake */
                        if (!(th->th_flags & TH_RST))
                                pf_send_tcp((*state)->rule.ptr, pd->af,
@@ -6310,7 +6993,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
                            (unsigned int)sws, (unsigned int)dws,
                            (*state)->packets[0], (*state)->packets[1],
                            direction == PF_IN ? "in" : "out",
-                           direction == (*state)->state_key->direction ?
+                           direction == sk->direction ?
                            "fwd" : "rev");
                        printf("pf: State failure on: %c %c %c %c | %c %c\n",
                            SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
@@ -6328,9 +7011,9 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
        /* Any packets which have gotten here are to be passed */
 
-       if ((*state)->state_key->app_state &&
-           (*state)->state_key->app_state->handler) {
-               (*state)->state_key->app_state->handler(*state, direction,
+       if (sk->app_state &&
+           sk->app_state->handler) {
+               sk->app_state->handler(*state, direction,
                    off + (th->th_off << 2), pd, kif);
                if (pd->lmw < 0) {
                        REASON_SET(reason, PFRES_MEMORY);
@@ -6340,17 +7023,50 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
        }
 
        /* translate source/destination address, if necessary */
-       if (STATE_TRANSLATE((*state)->state_key)) {
-               if (direction == PF_OUT)
+       if (STATE_TRANSLATE(sk)) {
+               pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
+
+               if (direction == PF_OUT) {
                        pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
-                           pd->ip_sum, &th->th_sum,
-                           &(*state)->state_key->gwy.addr,
-                           (*state)->state_key->gwy.xport.port, 0, pd->af);
-               else
-                       pf_change_ap(direction, pd->mp, pd->dst, &th->th_dport,
-                           pd->ip_sum, &th->th_sum,
-                           &(*state)->state_key->lan.addr,
-                           (*state)->state_key->lan.xport.port, 0, pd->af);
+                                    pd->ip_sum, &th->th_sum, &sk->gwy.addr,
+                                    sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
+               } else {
+                       if (pd->af != pd->naf) {
+                               if (pd->af == sk->af_gwy) {
+                                       pf_change_ap(direction, pd->mp, pd->dst,
+                                               &th->th_dport, pd->ip_sum,
+                                               &th->th_sum, &sk->lan.addr,
+                                               sk->lan.xport.port, 0,
+                                               pd->af, pd->naf, 0);
+
+                                       pf_change_ap(direction, pd->mp, pd->src,
+                                               &th->th_sport, pd->ip_sum,
+                                               &th->th_sum, &sk->ext_lan.addr,
+                                               th->th_sport, 0, pd->af,
+                                               pd->naf, 0);
+
+                               } else {
+                                       pf_change_ap(direction, pd->mp, pd->dst,
+                                               &th->th_dport, pd->ip_sum,
+                                               &th->th_sum, &sk->ext_gwy.addr,
+                                               th->th_dport, 0, pd->af,
+                                               pd->naf, 0);
+
+                                       pf_change_ap(direction, pd->mp, pd->src,
+                                               &th->th_sport, pd->ip_sum,
+                                               &th->th_sum, &sk->gwy.addr,
+                                               sk->gwy.xport.port, 0, pd->af,
+                                               pd->naf, 0);
+                               }
+                       } else {
+                               pf_change_ap(direction, pd->mp, pd->dst,
+                                            &th->th_dport, pd->ip_sum,
+                                            &th->th_sum, &sk->lan.addr,
+                                            sk->lan.xport.port, 0, pd->af,
+                                            pd->naf, 1);
+                       }
+               }
+
                copyback = off + sizeof (*th);
        }
 
@@ -6363,8 +7079,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
                /* Copyback sequence modulation or stateful scrub changes */
                m_copyback(m, off, sizeof (*th), th);
-       }
 
+               if (sk->af_lan != sk->af_gwy)
+                       return (pf_do_nat64(sk, pd, m, off));
+       }
        return (PF_PASS);
 }
 
@@ -6375,27 +7093,38 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
 #pragma unused(h)
        struct pf_state_peer    *src, *dst;
        struct pf_state_key_cmp  key;
+       struct pf_state_key     *sk;
        struct udphdr           *uh = pd->hdr.udp;
        struct pf_app_state as;
-       int dx, action, extfilter;
+       int action, extfilter;
        key.app_state = 0;
        key.proto_variant = PF_EXTFILTER_APD;
 
-       key.af = pd->af;
        key.proto = IPPROTO_UDP;
-       if (direction == PF_IN) {
-               PF_ACPY(&key.ext.addr, pd->src, key.af);
-               PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-               key.ext.xport.port = uh->uh_sport;
-               key.gwy.xport.port = uh->uh_dport;
-               dx = PF_IN;
-       } else {
-               PF_ACPY(&key.lan.addr, pd->src, key.af);
-               PF_ACPY(&key.ext.addr, pd->dst, key.af);
-               key.lan.xport.port = uh->uh_sport;
-               key.ext.xport.port = uh->uh_dport;
-               dx = PF_OUT;
-       }
+       key.af_lan = key.af_gwy = pd->af;
+
+       /*
+        * For NAT64 the first time rule search and state creation
+        * is done on the incoming side only.
+        * Once the state gets created, NAT64's LAN side (ipv6) will
+        * not be able to find the state in ext-gwy tree as that normally
+        * is intended to be looked up for incoming traffic from the
+        * WAN side.
+        * Therefore to handle NAT64 case we init keys here for both
+        * lan-ext as well as ext-gwy trees.
+        * In the state lookup we attempt a lookup on both trees if
+        * first one does not return any result and return a match if
+        * the match state's was created by NAT64 rule.
+        */
+       PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
+       PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
+       key.ext_gwy.xport.port = uh->uh_sport;
+       key.gwy.xport.port = uh->uh_dport;
+
+       PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
+       PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
+       key.lan.xport.port = uh->uh_sport;
+       key.ext_lan.xport.port = uh->uh_dport;
 
        if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
            ntohs(uh->uh_dport) == PF_IKE_PORT) {
@@ -6429,16 +7158,16 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
                }
        }
 
-       *state = pf_find_state(kif, &key, dx);
+       *state = pf_find_state(kif, &key, direction);
 
        if (!key.app_state && *state == 0) {
                key.proto_variant = PF_EXTFILTER_AD;
-               *state = pf_find_state(kif, &key, dx);
+               *state = pf_find_state(kif, &key, direction);
        }
 
        if (!key.app_state && *state == 0) {
                key.proto_variant = PF_EXTFILTER_EI;
-               *state = pf_find_state(kif, &key, dx);
+               *state = pf_find_state(kif, &key, direction);
        }
 
        /* similar to STATE_LOOKUP() */
@@ -6454,7 +7183,14 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
        if (pf_state_lookup_aux(state, kif, direction, &action))
                return (action);
 
-       if (direction == (*state)->state_key->direction) {
+       sk = (*state)->state_key;
+
+       /*
+        * In case of NAT64 the translation is first applied on the LAN
+        * side. Therefore for stack's address family comparison
+        * we use sk->af_lan.
+        */
+       if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
                src = &(*state)->src;
                dst = &(*state)->dst;
        } else {
@@ -6475,18 +7211,24 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
        else
                (*state)->timeout = PFTM_UDP_SINGLE;
 
-       extfilter = (*state)->state_key->proto_variant;
+       extfilter = sk->proto_variant;
        if (extfilter > PF_EXTFILTER_APD) {
-               (*state)->state_key->ext.xport.port = key.ext.xport.port;
-               if (extfilter > PF_EXTFILTER_AD)
-                       PF_ACPY(&(*state)->state_key->ext.addr,
-                           &key.ext.addr, key.af);
+               if (direction == PF_OUT) {
+                       sk->ext_lan.xport.port = key.ext_lan.xport.port;
+                       if (extfilter > PF_EXTFILTER_AD)
+                               PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
+                                       key.af_lan);
+               } else {
+                       sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
+                       if (extfilter > PF_EXTFILTER_AD)
+                               PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
+                                       key.af_gwy);
+               }
        }
 
-       if ((*state)->state_key->app_state &&
-           (*state)->state_key->app_state->handler) {
-               (*state)->state_key->app_state->handler(*state, direction,
-                   off + uh->uh_ulen, pd, kif);
+       if (sk->app_state && sk->app_state->handler) {
+               sk->app_state->handler(*state, direction, off + uh->uh_ulen,
+                                       pd, kif);
                if (pd->lmw < 0) {
                        REASON_SET(reason, PFRES_MEMORY);
                        return (PF_DROP);
@@ -6495,26 +7237,61 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
        }
 
        /* translate source/destination address, if necessary */
-       if (STATE_TRANSLATE((*state)->state_key)) {
+       if (STATE_TRANSLATE(sk)) {
                m = pf_lazy_makewritable(pd, m, off + sizeof (*uh));
                if (!m) {
                        REASON_SET(reason, PFRES_MEMORY);
                        return (PF_DROP);
                }
 
-               if (direction == PF_OUT)
+               pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
+
+               if (direction == PF_OUT) {
                        pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
-                           pd->ip_sum, &uh->uh_sum,
-                           &(*state)->state_key->gwy.addr,
-                           (*state)->state_key->gwy.xport.port, 1, pd->af);
-               else
-                       pf_change_ap(direction, pd->mp, pd->dst, &uh->uh_dport,
-                           pd->ip_sum, &uh->uh_sum,
-                           &(*state)->state_key->lan.addr,
-                           (*state)->state_key->lan.xport.port, 1, pd->af);
+                                    pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
+                                    sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
+               } else {
+                       if (pd->af != pd->naf) {
+
+                               if (pd->af == sk->af_gwy) {
+                                       pf_change_ap(direction, pd->mp, pd->dst,
+                                               &uh->uh_dport, pd->ip_sum,
+                                               &uh->uh_sum, &sk->lan.addr,
+                                               sk->lan.xport.port, 1,
+                                               pd->af, pd->naf, 0);
+
+                                       pf_change_ap(direction, pd->mp, pd->src,
+                                               &uh->uh_sport, pd->ip_sum,
+                                               &uh->uh_sum, &sk->ext_lan.addr,
+                                               uh->uh_sport, 1, pd->af,
+                                               pd->naf, 0);
+
+                               } else {
+                                       pf_change_ap(direction, pd->mp, pd->dst,
+                                               &uh->uh_dport, pd->ip_sum,
+                                               &uh->uh_sum, &sk->ext_gwy.addr,
+                                               uh->uh_dport, 1, pd->af,
+                                               pd->naf, 0);
+
+                                       pf_change_ap(direction, pd->mp, pd->src,
+                                               &uh->uh_sport, pd->ip_sum,
+                                               &uh->uh_sum, &sk->gwy.addr,
+                                               sk->gwy.xport.port, 1, pd->af,
+                                               pd->naf, 0);
+                               }
+                       } else {
+                               pf_change_ap(direction, pd->mp, pd->dst,
+                                               &uh->uh_dport, pd->ip_sum,
+                                               &uh->uh_sum, &sk->lan.addr,
+                                               sk->lan.xport.port, 1,
+                                               pd->af, pd->naf, 1);
+                       }
+               }
+
                m_copyback(m, off, sizeof (*uh), uh);
+               if (sk->af_lan != sk->af_gwy)
+                       return (pf_do_nat64(sk, pd, m, off));
        }
-
        return (PF_PASS);
 }
 
@@ -6524,14 +7301,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
 {
 #pragma unused(h)
        struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
+       struct in_addr  srcv4_inaddr = saddr->v4;
        u_int16_t        icmpid = 0, *icmpsum;
        u_int8_t         icmptype;
        int              state_icmp = 0;
        struct pf_state_key_cmp key;
+       struct pf_state_key     *sk;
 
        struct pf_app_state as;
        key.app_state = 0;
 
+       pd->off = off;
+
        switch (pd->proto) {
 #if INET
        case IPPROTO_ICMP:
@@ -6568,42 +7349,51 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                 * ICMP query/reply message not related to a TCP/UDP packet.
                 * Search for an ICMP state.
                 */
-               key.af = pd->af;
-               key.proto = pd->proto;
-               if (direction == PF_IN) {
-                       PF_ACPY(&key.ext.addr, pd->src, key.af);
-                       PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-                       key.ext.xport.port = 0;
-                       key.gwy.xport.port = icmpid;
-               } else {
-                       PF_ACPY(&key.lan.addr, pd->src, key.af);
-                       PF_ACPY(&key.ext.addr, pd->dst, key.af);
-                       key.lan.xport.port = icmpid;
-                       key.ext.xport.port = 0;
-               }
+               /*
+                * NAT64 requires protocol translation  between ICMPv4
+                * and ICMPv6. TCP and UDP do not require protocol
+                * translation. To avoid adding complexity just to
+                * handle ICMP(v4/v6), we always lookup  for
+                * proto = IPPROTO_ICMP on both LAN and WAN side
+                */
+               key.proto = IPPROTO_ICMP;
+               key.af_lan = key.af_gwy = pd->af;
+
+               PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
+               PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
+               key.ext_gwy.xport.port = 0;
+               key.gwy.xport.port = icmpid;
+
+               PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
+               PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
+               key.lan.xport.port = icmpid;
+               key.ext_lan.xport.port = 0;
 
                STATE_LOOKUP();
 
+               sk = (*state)->state_key;
                (*state)->expire = pf_time_second();
                (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
 
                /* translate source/destination address, if necessary */
-               if (STATE_TRANSLATE((*state)->state_key)) {
+               if (STATE_TRANSLATE(sk)) {
+                       pd->naf = (pd->af == sk->af_lan) ?
+                                               sk->af_gwy : sk->af_lan;
                        if (direction == PF_OUT) {
                                switch (pd->af) {
 #if INET
                                case AF_INET:
                                        pf_change_a(&saddr->v4.s_addr,
                                            pd->ip_sum,
-                                           (*state)->state_key->gwy.addr.v4.s_addr, 0);
+                                           sk->gwy.addr.v4.s_addr, 0);
                                        pd->hdr.icmp->icmp_cksum =
                                            pf_cksum_fixup(
                                            pd->hdr.icmp->icmp_cksum, icmpid,
-                                           (*state)->state_key->gwy.xport.port, 0);
+                                           sk->gwy.xport.port, 0);
                                        pd->hdr.icmp->icmp_id =
-                                           (*state)->state_key->gwy.xport.port;
+                                                       sk->gwy.xport.port;
                                        m = pf_lazy_makewritable(pd, m,
-                                           off + ICMP_MINLEN);
+                                                       off + ICMP_MINLEN);
                                        if (!m)
                                                return (PF_DROP);
                                        m_copyback(m, off, ICMP_MINLEN,
@@ -6614,7 +7404,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                case AF_INET6:
                                        pf_change_a6(saddr,
                                            &pd->hdr.icmp6->icmp6_cksum,
-                                           &(*state)->state_key->gwy.addr, 0);
+                                           &sk->gwy.addr, 0);
                                        m = pf_lazy_makewritable(pd, m,
                                            off + sizeof (struct icmp6_hdr));
                                        if (!m)
@@ -6629,35 +7419,62 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                switch (pd->af) {
 #if INET
                                case AF_INET:
-                                       pf_change_a(&daddr->v4.s_addr,
-                                           pd->ip_sum,
-                                           (*state)->state_key->lan.addr.v4.s_addr, 0);
-                                       pd->hdr.icmp->icmp_cksum =
-                                           pf_cksum_fixup(
-                                           pd->hdr.icmp->icmp_cksum, icmpid,
-                                           (*state)->state_key->lan.xport.port, 0);
-                                       pd->hdr.icmp->icmp_id =
-                                           (*state)->state_key->lan.xport.port;
+                                       if (pd->naf != AF_INET) {
+                                               if (pf_translate_icmp_af(
+                                                       AF_INET6, pd->hdr.icmp))
+                                                       return (PF_DROP);
+
+                                               pd->proto = IPPROTO_ICMPV6;
+
+                                       } else {
+
+                                               pf_change_a(&daddr->v4.s_addr,
+                                               pd->ip_sum,
+                                               sk->lan.addr.v4.s_addr, 0);
+
+                                               pd->hdr.icmp->icmp_cksum =
+                                               pf_cksum_fixup(
+                                               pd->hdr.icmp->icmp_cksum,
+                                               icmpid, sk->lan.xport.port, 0);
+
+                                               pd->hdr.icmp->icmp_id =
+                                                       sk->lan.xport.port;
+                                       }
+
                                        m = pf_lazy_makewritable(pd, m,
                                            off + ICMP_MINLEN);
                                        if (!m)
                                                return (PF_DROP);
                                        m_copyback(m, off, ICMP_MINLEN,
-                                           pd->hdr.icmp);
+                                                       pd->hdr.icmp);
+                                       if (sk->af_lan != sk->af_gwy)
+                                               return (pf_do_nat64(sk, pd, m,
+                                                                  off));
                                        break;
 #endif /* INET */
 #if INET6
                                case AF_INET6:
-                                       pf_change_a6(daddr,
-                                           &pd->hdr.icmp6->icmp6_cksum,
-                                           &(*state)->state_key->lan.addr, 0);
+                                       if (pd->naf != AF_INET6) {
+                                               if (pf_translate_icmp_af(
+                                                       AF_INET, pd->hdr.icmp6))
+                                                       return (PF_DROP);
+
+                                               pd->proto = IPPROTO_ICMP;
+                                       } else {
+                                               pf_change_a6(daddr,
+                                               &pd->hdr.icmp6->icmp6_cksum,
+                                               &sk->lan.addr, 0);
+                                       }
                                        m = pf_lazy_makewritable(pd, m,
                                            off + sizeof (struct icmp6_hdr));
                                        if (!m)
                                                return (PF_DROP);
                                        m_copyback(m, off,
-                                           sizeof (struct icmp6_hdr),
-                                           pd->hdr.icmp6);
+                                               sizeof (struct icmp6_hdr),
+                                               pd->hdr.icmp6);
+                                       if (sk->af_lan != sk->af_gwy)
+                                               return (pf_do_nat64(sk, pd, m,
+                                                                  off));
                                        break;
 #endif /* INET6 */
                                }
@@ -6671,8 +7488,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                 * ICMP error message in response to a TCP/UDP packet.
                 * Extract the inner TCP/UDP header and search for that state.
                 */
-
-               struct pf_pdesc pd2;
+               struct pf_pdesc pd2; /* For inner (original) header */
 #if INET
                struct ip       h2;
 #endif /* INET */
@@ -6710,6 +7526,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
                        /* offset of protocol header that follows h2 */
                        off2 = ipoff2 + (h2.ip_hl << 2);
+                       /* TODO */
+                       pd2.off = ipoff2 + (h2.ip_hl << 2);
 
                        pd2.proto = h2.ip_p;
                        pd2.src = (struct pf_addr *)&h2.ip_src;
@@ -6769,6 +7587,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                        break;
                                }
                        } while (!terminal);
+                       /* TODO */
+                       pd2.off = ipoff2;
                        break;
 #endif /* INET6 */
                }
@@ -6794,23 +7614,25 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                return (PF_DROP);
                        }
 
-                       key.af = pd2.af;
                        key.proto = IPPROTO_TCP;
-                       if (direction == PF_IN) {
-                               PF_ACPY(&key.ext.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-                               key.ext.xport.port = th.th_dport;
-                               key.gwy.xport.port = th.th_sport;
-                       } else {
-                               PF_ACPY(&key.lan.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.ext.addr, pd2.src, key.af);
-                               key.lan.xport.port = th.th_dport;
-                               key.ext.xport.port = th.th_sport;
-                       }
+                       key.af_gwy = pd2.af;
+                       PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
+                       PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
+                       key.ext_gwy.xport.port = th.th_dport;
+                       key.gwy.xport.port = th.th_sport;
+
+                       key.af_lan = pd2.af;
+                       PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
+                       PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
+                       key.lan.xport.port = th.th_dport;
+                       key.ext_lan.xport.port = th.th_sport;
 
                        STATE_LOOKUP();
 
-                       if (direction == (*state)->state_key->direction) {
+                       sk = (*state)->state_key;
+                       if ((direction == sk->direction) &&
+                           ((sk->af_lan == sk->af_gwy) ||
+                            (pd2.af == sk->af_lan))) {
                                src = &(*state)->dst;
                                dst = &(*state)->src;
                        } else {
@@ -6848,17 +7670,93 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                return (PF_DROP);
                        }
 
-                       if (STATE_TRANSLATE((*state)->state_key)) {
+                       pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
+                                               sk->af_gwy : sk->af_lan;
+
+                       if (STATE_TRANSLATE(sk)) {
+                               /* NAT64 case */
+                               if (sk->af_lan != sk->af_gwy) {
+                                       struct pf_state_host *saddr2, *daddr2;
+
+                                       if (pd2.naf == sk->af_lan) {
+                                               saddr2 = &sk->lan;
+                                               daddr2 = &sk->ext_lan;
+                                       } else {
+                                               saddr2 = &sk->ext_gwy;
+                                               daddr2 = &sk->gwy;
+                                       }
+
+                                       /* translate ICMP message types and codes */
+                                       if (pf_translate_icmp_af(pd->naf,
+                                               pd->hdr.icmp))
+                                               return (PF_DROP);
+                                       m =
+                                       pf_lazy_makewritable(pd, m, off2 + 8);
+                                       if (!m)
+                                               return (PF_DROP);
+
+                                       m_copyback(m, pd->off,
+                                               sizeof(struct icmp6_hdr),
+                                               pd->hdr.icmp6);
+
+                                       /*
+                                        * translate inner ip header within the
+                                        * ICMP message
+                                        */
+                                       if (pf_change_icmp_af(m, ipoff2, pd,
+                                           &pd2, &saddr2->addr, &daddr2->addr,
+                                           pd->af, pd->naf))
+                                               return (PF_DROP);
+
+                                       if (pd->naf == AF_INET)
+                                               pd->proto = IPPROTO_ICMP;
+                                       else
+                                               pd->proto = IPPROTO_ICMPV6;
+
+                                       /*
+                                        * translate inner tcp header within
+                                        * the ICMP message
+                                        */
+                                       pf_change_ap(direction, NULL, pd2.src,
+                                               &th.th_sport, pd2.ip_sum,
+                                               &th.th_sum, &daddr2->addr,
+                                               saddr2->xport.port, 0, pd2.af,
+                                               pd2.naf, 0);
+
+                                       pf_change_ap(direction, NULL, pd2.dst,
+                                               &th.th_dport, pd2.ip_sum,
+                                               &th.th_sum, &saddr2->addr,
+                                               daddr2->xport.port, 0, pd2.af,
+                                               pd2.naf, 0);
+
+                                       m_copyback(m, pd2.off, 8, &th);
+
+                                       /* translate outer ip header */
+                                       PF_ACPY(&pd->naddr, &daddr2->addr,
+                                               pd->naf);
+                                       PF_ACPY(&pd->ndaddr, &saddr2->addr,
+                                               pd->naf);
+                                       if (pd->af == AF_INET) {
+                                               memcpy(&pd->naddr.addr32[3],
+                                                   &srcv4_inaddr,
+                                                   sizeof(pd->naddr.addr32[3]));
+                                               return (pf_nat64_ipv4(m, off,
+                                                                       pd));
+                                       } else {
+                                               return (pf_nat64_ipv6(m, off,
+                                                                       pd));
+                                       }
+                               }
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &th.th_sport,
-                                           daddr, &(*state)->state_key->lan.addr,
-                                           (*state)->state_key->lan.xport.port, NULL,
+                                           daddr, &sk->lan.addr,
+                                           sk->lan.xport.port, NULL,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, pd2.af);
                                } else {
                                        pf_change_icmp(pd2.dst, &th.th_dport,
-                                           saddr, &(*state)->state_key->gwy.addr,
-                                           (*state)->state_key->gwy.xport.port, NULL,
+                                           saddr, &sk->gwy.addr,
+                                           sk->gwy.xport.port, NULL,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, pd2.af);
                                }
@@ -6895,7 +7793,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        break;
                }
                case IPPROTO_UDP: {
-                       struct udphdr           uh;
+                       struct udphdr uh;
                        int dx, action;
                        if (!pf_pull_hdr(m, off2, &uh, sizeof (uh),
                            NULL, reason, pd2.af)) {
@@ -6905,23 +7803,21 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                return (PF_DROP);
                        }
 
-                       key.af = pd2.af;
-                       key.proto = IPPROTO_UDP;
-                       if (direction == PF_IN) {
-                               PF_ACPY(&key.ext.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-                               key.ext.xport.port = uh.uh_dport;
-                               key.gwy.xport.port = uh.uh_sport;
-                               dx = PF_IN;
-                       } else {
-                               PF_ACPY(&key.lan.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.ext.addr, pd2.src, key.af);
-                               key.lan.xport.port = uh.uh_dport;
-                               key.ext.xport.port = uh.uh_sport;
-                               dx = PF_OUT;
-                       }
+                       key.af_gwy = pd2.af;
+                       PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
+                       PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
+                       key.ext_gwy.xport.port = uh.uh_dport;
+                       key.gwy.xport.port = uh.uh_sport;
 
+                       key.af_lan = pd2.af;
+                       PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
+                       PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
+                       key.lan.xport.port = uh.uh_dport;
+                       key.ext_lan.xport.port = uh.uh_sport;
+
+                       key.proto = IPPROTO_UDP;
                        key.proto_variant = PF_EXTFILTER_APD;
+                       dx = direction;
 
                        if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
                            ntohs(uh.uh_dport) == PF_IKE_PORT) {
@@ -6977,17 +7873,94 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                        if (pf_state_lookup_aux(state, kif, direction, &action))
                                return (action);
 
-                       if (STATE_TRANSLATE((*state)->state_key)) {
+                       sk = (*state)->state_key;
+                       pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
+                                               sk->af_gwy : sk->af_lan;
+
+                       if (STATE_TRANSLATE(sk)) {
+                               /* NAT64 case */
+                               if (sk->af_lan != sk->af_gwy) {
+                                       struct pf_state_host *saddr2, *daddr2;
+
+                                       if (pd2.naf == sk->af_lan) {
+                                               saddr2 = &sk->lan;
+                                               daddr2 = &sk->ext_lan;
+                                       } else {
+                                               saddr2 = &sk->ext_gwy;
+                                               daddr2 = &sk->gwy;
+                                       }
+
+                                       /* translate ICMP message */
+                                       if (pf_translate_icmp_af(pd->naf,
+                                               pd->hdr.icmp))
+                                               return (PF_DROP);
+                                       m =
+                                       pf_lazy_makewritable(pd, m, off2 + 8);
+                                       if (!m)
+                                               return (PF_DROP);
+
+                                       m_copyback(m, pd->off,
+                                               sizeof(struct icmp6_hdr),
+                                               pd->hdr.icmp6);
+
+                                       /*
+                                        * translate inner ip header within the
+                                        * ICMP message
+                                        */
+                                       if (pf_change_icmp_af(m, ipoff2, pd,
+                                           &pd2, &saddr2->addr, &daddr2->addr,
+                                           pd->af, pd->naf))
+                                               return (PF_DROP);
+
+                                       if (pd->naf == AF_INET)
+                                               pd->proto = IPPROTO_ICMP;
+                                       else
+                                               pd->proto = IPPROTO_ICMPV6;
+
+                                       /*
+                                        * translate inner udp header within
+                                        * the ICMP message
+                                        */
+                                       pf_change_ap(direction, NULL, pd2.src,
+                                               &uh.uh_sport, pd2.ip_sum,
+                                               &uh.uh_sum, &daddr2->addr,
+                                               saddr2->xport.port, 0, pd2.af,
+                                               pd2.naf, 0);
+
+                                       pf_change_ap(direction, NULL, pd2.dst,
+                                               &uh.uh_dport, pd2.ip_sum,
+                                               &uh.uh_sum, &saddr2->addr,
+                                               daddr2->xport.port, 0, pd2.af,
+                                               pd2.naf, 0);
+
+                                       m_copyback(m, pd2.off, sizeof(uh), &uh);
+
+                                       /* translate outer ip header */
+                                       PF_ACPY(&pd->naddr, &daddr2->addr,
+                                               pd->naf);
+                                       PF_ACPY(&pd->ndaddr, &saddr2->addr,
+                                               pd->naf);
+                                       if (pd->af == AF_INET) {
+                                               memcpy(&pd->naddr.addr32[3],
+                                                   &srcv4_inaddr,
+                                                   sizeof(pd->naddr.addr32[3]));
+                                               return (pf_nat64_ipv4(m, off,
+                                                                       pd));
+                                       } else {
+                                               return (pf_nat64_ipv6(m, off,
+                                                                       pd));
+                                       }
+                               }
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &uh.uh_sport,
-                                           daddr, &(*state)->state_key->lan.addr,
-                                           (*state)->state_key->lan.xport.port, &uh.uh_sum,
+                                           daddr, &sk->lan.addr,
+                                           sk->lan.xport.port, &uh.uh_sum,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 1, pd2.af);
                                } else {
                                        pf_change_icmp(pd2.dst, &uh.uh_dport,
-                                           saddr, &(*state)->state_key->gwy.addr,
-                                           (*state)->state_key->gwy.xport.port, &uh.uh_sum,
+                                           saddr, &sk->gwy.addr,
+                                           sk->gwy.xport.port, &uh.uh_sum,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 1, pd2.af);
                                }
@@ -7031,37 +8004,40 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                return (PF_DROP);
                        }
 
-                       key.af = pd2.af;
                        key.proto = IPPROTO_ICMP;
                        if (direction == PF_IN) {
-                               PF_ACPY(&key.ext.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-                               key.ext.xport.port = 0;
+                               key.af_gwy = pd2.af;
+                               PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
+                               PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
+                               key.ext_gwy.xport.port = 0;
                                key.gwy.xport.port = iih.icmp_id;
                        } else {
-                               PF_ACPY(&key.lan.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.ext.addr, pd2.src, key.af);
+                               key.af_lan = pd2.af;
+                               PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
+                               PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
                                key.lan.xport.port = iih.icmp_id;
-                               key.ext.xport.port = 0;
+                               key.ext_lan.xport.port = 0;
                        }
 
                        STATE_LOOKUP();
 
-                       if (STATE_TRANSLATE((*state)->state_key)) {
+                       sk = (*state)->state_key;
+                       if (STATE_TRANSLATE(sk)) {
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &iih.icmp_id,
-                                           daddr, &(*state)->state_key->lan.addr,
-                                           (*state)->state_key->lan.xport.port, NULL,
+                                           daddr, &sk->lan.addr,
+                                           sk->lan.xport.port, NULL,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET);
                                } else {
                                        pf_change_icmp(pd2.dst, &iih.icmp_id,
-                                           saddr, &(*state)->state_key->gwy.addr,
-                                           (*state)->state_key->gwy.xport.port, NULL,
+                                           saddr, &sk->gwy.addr,
+                                           sk->gwy.xport.port, NULL,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET);
                                }
-                               m = pf_lazy_makewritable(pd, m, off2 + ICMP_MINLEN);
+                               m = pf_lazy_makewritable(pd, m,
+                                                        off2 + ICMP_MINLEN);
                                if (!m)
                                        return (PF_DROP);
                                m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
@@ -7085,33 +8061,35 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                return (PF_DROP);
                        }
 
-                       key.af = pd2.af;
                        key.proto = IPPROTO_ICMPV6;
                        if (direction == PF_IN) {
-                               PF_ACPY(&key.ext.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-                               key.ext.xport.port = 0;
+                               key.af_gwy = pd2.af;
+                               PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
+                               PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
+                               key.ext_gwy.xport.port = 0;
                                key.gwy.xport.port = iih.icmp6_id;
                        } else {
-                               PF_ACPY(&key.lan.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.ext.addr, pd2.src, key.af);
+                               key.af_lan = pd2.af;
+                               PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
+                               PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
                                key.lan.xport.port = iih.icmp6_id;
-                               key.ext.xport.port = 0;
+                               key.ext_lan.xport.port = 0;
                        }
 
                        STATE_LOOKUP();
 
-                       if (STATE_TRANSLATE((*state)->state_key)) {
+                       sk = (*state)->state_key;
+                       if (STATE_TRANSLATE(sk)) {
                                if (direction == PF_IN) {
                                        pf_change_icmp(pd2.src, &iih.icmp6_id,
-                                           daddr, &(*state)->state_key->lan.addr,
-                                           (*state)->state_key->lan.xport.port, NULL,
+                                           daddr, &sk->lan.addr,
+                                           sk->lan.xport.port, NULL,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET6);
                                } else {
                                        pf_change_icmp(pd2.dst, &iih.icmp6_id,
-                                           saddr, &(*state)->state_key->gwy.addr,
-                                           (*state)->state_key->gwy.xport.port, NULL,
+                                           saddr, &sk->gwy.addr,
+                                           sk->gwy.xport.port, NULL,
                                            pd2.ip_sum, icmpsum,
                                            pd->ip_sum, 0, AF_INET6);
                                }
@@ -7131,35 +8109,35 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
                }
 #endif /* INET6 */
                default: {
-                       key.af = pd2.af;
                        key.proto = pd2.proto;
                        if (direction == PF_IN) {
-                               PF_ACPY(&key.ext.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.gwy.addr, pd2.src, key.af);
-                               key.ext.xport.port = 0;
+                               key.af_gwy = pd2.af;
+                               PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
+                               PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
+                               key.ext_gwy.xport.port = 0;
                                key.gwy.xport.port = 0;
                        } else {
-                               PF_ACPY(&key.lan.addr, pd2.dst, key.af);
-                               PF_ACPY(&key.ext.addr, pd2.src, key.af);
+                               key.af_lan = pd2.af;
+                               PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
+                               PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
                                key.lan.xport.port = 0;
-                               key.ext.xport.port = 0;
+                               key.ext_lan.xport.port = 0;
                        }
 
                        STATE_LOOKUP();
 
-                       if (STATE_TRANSLATE((*state)->state_key)) {
+                       sk = (*state)->state_key;
+                       if (STATE_TRANSLATE(sk)) {
                                if (direction == PF_IN) {
-                                       pf_change_icmp(pd2.src, NULL,
-                                           daddr, &(*state)->state_key->lan.addr,
-                                           0, NULL,
-                                           pd2.ip_sum, icmpsum,
-                                           pd->ip_sum, 0, pd2.af);
+                                       pf_change_icmp(pd2.src, NULL, daddr,
+                                               &sk->lan.addr, 0, NULL,
+                                               pd2.ip_sum, icmpsum,
+                                               pd->ip_sum, 0, pd2.af);
                                } else {
-                                       pf_change_icmp(pd2.dst, NULL,
-                                           saddr, &(*state)->state_key->gwy.addr,
-                                           0, NULL,
-                                           pd2.ip_sum, icmpsum,
-                                           pd->ip_sum, 0, pd2.af);
+                                       pf_change_icmp(pd2.dst, NULL, saddr,
+                                               &sk->gwy.addr, 0, NULL,
+                                               pd2.ip_sum, icmpsum,
+                                               pd->ip_sum, 0, pd2.af);
                                }
                                switch (pd2.af) {
 #if INET
@@ -7172,14 +8150,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
 #if INET6
                                case AF_INET6:
                                        m = pf_lazy_makewritable(pd, m,
-                                           ipoff2 + sizeof (h2_6));
+                                                       ipoff2 + sizeof (h2_6));
                                        if (!m)
                                                return (PF_DROP);
                                        m_copyback(m, off,
-                                           sizeof (struct icmp6_hdr),
-                                           pd->hdr.icmp6);
+                                               sizeof (struct icmp6_hdr),
+                                               pd->hdr.icmp6);
                                        m_copyback(m, ipoff2, sizeof (h2_6),
-                                           &h2_6);
+                                                  &h2_6);
                                        break;
 #endif /* INET6 */
                                }
@@ -7203,17 +8181,18 @@ pf_test_state_grev1(struct pf_state **state, int direction,
        struct mbuf *m;
 
        key.app_state = 0;
-       key.af = pd->af;
        key.proto = IPPROTO_GRE;
        key.proto_variant = PF_GRE_PPTP_VARIANT;
        if (direction == PF_IN) {
-               PF_ACPY(&key.ext.addr, pd->src, key.af);
-               PF_ACPY(&key.gwy.addr, pd->dst, key.af);
+               key.af_gwy = pd->af;
+               PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
+               PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
                key.gwy.xport.call_id = grev1->call_id;
        } else {
-               PF_ACPY(&key.lan.addr, pd->src, key.af);
-               PF_ACPY(&key.ext.addr, pd->dst, key.af);
-               key.ext.xport.call_id = grev1->call_id;
+               key.af_lan = pd->af;
+               PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
+               PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
+               key.ext_lan.xport.call_id = grev1->call_id;
        }
 
        STATE_LOOKUP();
@@ -7305,16 +8284,17 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
        int action;
 
        memset(&key, 0, sizeof (key));
-       key.af = pd->af;
        key.proto = IPPROTO_ESP;
        if (direction == PF_IN) {
-               PF_ACPY(&key.ext.addr, pd->src, key.af);
-               PF_ACPY(&key.gwy.addr, pd->dst, key.af);
+               key.af_gwy = pd->af;
+               PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
+               PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
                key.gwy.xport.spi = esp->spi;
        } else {
-               PF_ACPY(&key.lan.addr, pd->src, key.af);
-               PF_ACPY(&key.ext.addr, pd->dst, key.af);
-               key.ext.xport.spi = esp->spi;
+               key.af_lan = pd->af;
+               PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
+               PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
+               key.ext_lan.xport.spi = esp->spi;
        }
 
        *state = pf_find_state(kif, &key, direction);
@@ -7347,7 +8327,7 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
                                        *state = s;
                        }
                } else {
-                       key.ext.xport.spi = 0;
+                       key.ext_lan.xport.spi = 0;
 
                        s = pf_find_state(kif, &key, direction);
                        if (s) {
@@ -7355,7 +8335,7 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
 
                                RB_REMOVE(pf_state_tree_lan_ext,
                                    &pf_statetbl_lan_ext, sk);
-                               sk->ext.xport.spi = esp->spi;
+                               sk->ext_lan.xport.spi = esp->spi;
 
                                if (RB_INSERT(pf_state_tree_lan_ext,
                                    &pf_statetbl_lan_ext, sk))
@@ -7464,18 +8444,19 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
        struct pf_state_key_cmp  key;
 
        key.app_state = 0;
-       key.af = pd->af;
        key.proto = pd->proto;
        if (direction == PF_IN) {
-               PF_ACPY(&key.ext.addr, pd->src, key.af);
-               PF_ACPY(&key.gwy.addr, pd->dst, key.af);
-               key.ext.xport.port = 0;
+               key.af_gwy = pd->af;
+               PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
+               PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
+               key.ext_gwy.xport.port = 0;
                key.gwy.xport.port = 0;
        } else {
-               PF_ACPY(&key.lan.addr, pd->src, key.af);
-               PF_ACPY(&key.ext.addr, pd->dst, key.af);
+               key.af_lan = pd->af;
+               PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
+               PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
                key.lan.xport.port = 0;
-               key.ext.xport.port = 0;
+               key.ext_lan.xport.port = 0;
        }
 
        STATE_LOOKUP();
@@ -8130,12 +9111,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
        pd.pf_mtag = pf_get_mtag(m);
        pd.src = (struct pf_addr *)&h->ip_src;
        pd.dst = (struct pf_addr *)&h->ip_dst;
-       PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
+       PF_ACPY(&pd.baddr, pd.src, AF_INET);
+       PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
        pd.ip_sum = &h->ip_sum;
        pd.proto = h->ip_p;
        pd.proto_variant = 0;
        pd.af = AF_INET;
        pd.tos = h->ip_tos;
+       pd.ttl = h->ip_ttl;
        pd.tot_len = ntohs(h->ip_len);
        pd.eh = eh;
 
@@ -8175,7 +9158,8 @@ nonormalize:
 
        pd.src = (struct pf_addr *)&h->ip_src;
        pd.dst = (struct pf_addr *)&h->ip_dst;
-       PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
+       PF_ACPY(&pd.baddr, pd.src, AF_INET);
+       PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
        pd.ip_sum = &h->ip_sum;
        pd.proto = h->ip_p;
        pd.proto_variant = 0;
@@ -8184,6 +9168,7 @@ nonormalize:
        pd.pf_mtag = pf_get_mtag(m);
        pd.af = AF_INET;
        pd.tos = h->ip_tos;
+       pd.ttl = h->ip_ttl;
        pd.sc = MBUF_SCIDX(mbuf_get_service_class(m));
        pd.tot_len = ntohs(h->ip_len);
        pd.eh = eh;
@@ -8239,6 +9224,8 @@ nonormalize:
                        goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
+               if (action == PF_NAT64)
+                       goto done;
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
@@ -8281,6 +9268,8 @@ nonormalize:
 #endif /* DUMMYNET */
                action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
                    &reason);
+               if (action == PF_NAT64)
+                       goto done;
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
@@ -8316,6 +9305,8 @@ nonormalize:
 #endif /* DUMMYNET */
                action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
                    &reason);
+               if (action == PF_NAT64)
+                       goto done;
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv4();
@@ -8326,7 +9317,7 @@ nonormalize:
                        r = s->rule.ptr;
                        a = s->anchor.ptr;
                        log = s->log;
-               } else if (s == NULL)
+                } else if (s == NULL)
                        action = pf_test_rule(&r, &s, dir, kif,
                            m, off, h, &pd, &a, &ruleset, NULL);
                break;
@@ -8440,6 +9431,11 @@ nonormalize:
        }
 
 done:
+       if (action == PF_NAT64) {
+               *m0 = NULL;
+               return (action);
+       }
+
        *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv4();
 
@@ -8664,12 +9660,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
        pd.pf_mtag = pf_get_mtag(m);
        pd.src = (struct pf_addr *)&h->ip6_src;
        pd.dst = (struct pf_addr *)&h->ip6_dst;
-       PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
+       PF_ACPY(&pd.baddr, pd.src, AF_INET6);
+       PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
        pd.ip_sum = NULL;
        pd.af = AF_INET6;
        pd.proto = nxt;
        pd.proto_variant = 0;
        pd.tos = 0;
+       pd.ttl = h->ip6_hlim;
        pd.sc = MBUF_SCIDX(mbuf_get_service_class(m));
        pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
        pd.eh = eh;
@@ -8719,10 +9717,12 @@ nonormalize:
 
        pd.src = (struct pf_addr *)&h->ip6_src;
        pd.dst = (struct pf_addr *)&h->ip6_dst;
-       PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
+       PF_ACPY(&pd.baddr, pd.src, AF_INET6);
+       PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
        pd.ip_sum = NULL;
        pd.af = AF_INET6;
        pd.tos = 0;
+       pd.ttl = h->ip6_hlim;
        pd.tot_len = ntohs(h->ip6_plen) + sizeof (struct ip6_hdr);
        pd.eh = eh;
 
@@ -8829,6 +9829,8 @@ nonormalize:
                        goto done;
                action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
                    &reason);
+               if (action == PF_NAT64)
+                       goto done;
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
@@ -8839,7 +9841,7 @@ nonormalize:
                        r = s->rule.ptr;
                        a = s->anchor.ptr;
                        log = s->log;
-               } else if (s == NULL)
+                } else if (s == NULL)
                        action = pf_test_rule(&r, &s, dir, kif,
                            m, off, h, &pd, &a, &ruleset, NULL);
                break;
@@ -8871,6 +9873,8 @@ nonormalize:
 #endif /* DUMMYNET */
                action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd,
                    &reason);
+               if (action == PF_NAT64)
+                       goto done;
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
@@ -8881,7 +9885,7 @@ nonormalize:
                        r = s->rule.ptr;
                        a = s->anchor.ptr;
                        log = s->log;
-               } else if (s == NULL)
+                } else if (s == NULL)
                        action = pf_test_rule(&r, &s, dir, kif,
                            m, off, h, &pd, &a, &ruleset, NULL);
                break;
@@ -8906,6 +9910,8 @@ nonormalize:
 #endif /* DUMMYNET */
                action = pf_test_state_icmp(&s, dir, kif,
                    m, off, h, &pd, &reason);
+               if (action == PF_NAT64)
+                       goto done;
                if (pd.lmw < 0)
                        goto done;
                PF_APPLE_UPDATE_PDESC_IPv6();
@@ -9031,6 +10037,11 @@ nonormalize:
        }
 
 done:
+       if (action == PF_NAT64) {
+               *m0 = NULL;
+               return (action);
+       }
+
        *m0 = pd.mp;
        PF_APPLE_UPDATE_PDESC_IPv6();
 
index bfad2e2aec14baebaea7c745784e9e287915176b..15bfb072475a04ef7d0e43aac1bc37687c8de412 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -240,6 +240,7 @@ u_int32_t altq_allowed = 0;
 #endif /* PF_ALTQ */
 
 u_int32_t pf_hash_seed;
+int16_t pf_nat64_configured = 0;
 
 /*
  * These are the pf enabled reference counting variables
@@ -1420,12 +1421,15 @@ pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk,
        sp->lan.xport = sk->lan.xport;
        sp->gwy.addr = sk->gwy.addr;
        sp->gwy.xport = sk->gwy.xport;
-       sp->ext.addr = sk->ext.addr;
-       sp->ext.xport = sk->ext.xport;
+       sp->ext_lan.addr = sk->ext_lan.addr;
+       sp->ext_lan.xport = sk->ext_lan.xport;
+       sp->ext_gwy.addr = sk->ext_gwy.addr;
+       sp->ext_gwy.xport = sk->ext_gwy.xport;
        sp->proto_variant = sk->proto_variant;
        sp->tag = s->tag;
        sp->proto = sk->proto;
-       sp->af = sk->af;
+       sp->af_lan = sk->af_lan;
+       sp->af_gwy = sk->af_gwy;
        sp->direction = sk->direction;
        sp->flowhash = sk->flowhash;
 
@@ -1473,12 +1477,15 @@ pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk,
        sk->lan.xport = sp->lan.xport;
        sk->gwy.addr = sp->gwy.addr;
        sk->gwy.xport = sp->gwy.xport;
-       sk->ext.addr = sp->ext.addr;
-       sk->ext.xport = sp->ext.xport;
+       sk->ext_lan.addr = sp->ext_lan.addr;
+       sk->ext_lan.xport = sp->ext_lan.xport;
+       sk->ext_gwy.addr = sp->ext_gwy.addr;
+       sk->ext_gwy.xport = sp->ext_gwy.xport;
        sk->proto_variant = sp->proto_variant;
        s->tag = sp->tag;
        sk->proto = sp->proto;
-       sk->af = sp->af;
+       sk->af_lan = sp->af_lan;
+       sk->af_gwy = sp->af_gwy;
        sk->direction = sp->direction;
        sk->flowhash = pf_calc_state_key_flowhash(sk);
 
@@ -3087,8 +3094,10 @@ pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule,
        }
 
        pf_mv_pool(&pf_pabuf, &rule->rpool.list);
+
        if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
-           (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
+           (rule->action == PF_BINAT) || (rule->action == PF_NAT64)) &&
+           rule->anchor == NULL) ||
            (rule->rt > PF_FASTROUTE)) &&
            (TAILQ_FIRST(&rule->rpool.list) == NULL))
                error = EINVAL;
@@ -3097,6 +3106,10 @@ pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule,
                pf_rm_rule(NULL, rule);
                return (error);
        }
+       /* For a NAT64 rule the rule's address family is AF_INET6 whereas
+        * the address pool's family will be AF_INET
+        */
+       rule->rpool.af = (rule->action == PF_NAT64) ? AF_INET: rule->af;
        rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
        rule->evaluations = rule->packets[0] = rule->packets[1] =
            rule->bytes[0] = rule->bytes[1] = 0;
@@ -3175,6 +3188,9 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p
                ruleset->rules[rs_num].inactive.rcount++;
                if (rule->rule_flag & PFRULE_PFM)
                        pffwrules++;
+
+               if (rule->action == PF_NAT64)
+                       atomic_add_16(&pf_nat64_configured, 1);
                break;
        }
 
@@ -3575,6 +3591,8 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p
                pf_rule_copyout(rule, &pr->rule);
                if (rule->rule_flag & PFRULE_PFM)
                        pffwrules++;
+               if (rule->action == PF_NAT64)
+                       atomic_add_16(&pf_nat64_configured, 1);
                break;
        }
 
@@ -3597,6 +3615,8 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p
                } else
                        pf_delete_rule_by_owner(pr->rule.owner, req_dev);
                pr->nr = pffwrules;
+               if (pr->rule.action == PF_NAT64)
+                       atomic_add_16(&pf_nat64_configured, -1);
                break;
        }
 
@@ -3691,21 +3711,21 @@ pfioctl_ioc_state_kill(u_long cmd, struct pfioc_state_kill *psk, struct proc *p)
 
                        if (sk->direction == PF_OUT) {
                                src = &sk->lan;
-                               dst = &sk->ext;
+                               dst = &sk->ext_lan;
                        } else {
-                               src = &sk->ext;
+                               src = &sk->ext_lan;
                                dst = &sk->lan;
                        }
-                       if ((!psk->psk_af || sk->af == psk->psk_af) &&
+                       if ((!psk->psk_af || sk->af_lan == psk->psk_af) &&
                            (!psk->psk_proto || psk->psk_proto == sk->proto) &&
                            PF_MATCHA(psk->psk_src.neg,
                            &psk->psk_src.addr.v.a.addr,
                            &psk->psk_src.addr.v.a.mask,
-                           &src->addr, sk->af) &&
+                           &src->addr, sk->af_lan) &&
                            PF_MATCHA(psk->psk_dst.neg,
                            &psk->psk_dst.addr.v.a.addr,
                            &psk->psk_dst.addr.v.a.mask,
-                           &dst->addr, sk->af) &&
+                           &dst->addr, sk->af_lan) &&
                            (pf_match_xport(psk->psk_proto,
                            psk->psk_proto_variant, &psk->psk_src.xport,
                            &src->xport)) &&
@@ -3891,7 +3911,6 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p)
                struct pf_state_key_cmp  key;
                int                      m = 0, direction = pnl->direction;
 
-               key.af = pnl->af;
                key.proto = pnl->proto;
                key.proto_variant = pnl->proto_variant;
 
@@ -3910,20 +3929,24 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p)
                         * state tree.
                         */
                        if (direction == PF_IN) {
-                               PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af);
-                               memcpy(&key.ext.xport, &pnl->dxport,
-                                   sizeof (key.ext.xport));
+                               key.af_gwy = pnl->af;
+                               PF_ACPY(&key.ext_gwy.addr, &pnl->daddr,
+                                       pnl->af);
+                               memcpy(&key.ext_gwy.xport, &pnl->dxport,
+                                   sizeof (key.ext_gwy.xport));
                                PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af);
                                memcpy(&key.gwy.xport, &pnl->sxport,
                                    sizeof (key.gwy.xport));
                                state = pf_find_state_all(&key, PF_IN, &m);
                        } else {
+                               key.af_lan = pnl->af;
                                PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af);
                                memcpy(&key.lan.xport, &pnl->dxport,
                                    sizeof (key.lan.xport));
-                               PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af);
-                               memcpy(&key.ext.xport, &pnl->sxport,
-                                   sizeof (key.ext.xport));
+                               PF_ACPY(&key.ext_lan.addr, &pnl->saddr,
+                                       pnl->af);
+                               memcpy(&key.ext_lan.xport, &pnl->sxport,
+                                   sizeof (key.ext_lan.xport));
                                state = pf_find_state_all(&key, PF_OUT, &m);
                        }
                        if (m > 1)
@@ -3932,7 +3955,7 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p)
                                sk = state->state_key;
                                if (direction == PF_IN) {
                                        PF_ACPY(&pnl->rsaddr, &sk->lan.addr,
-                                           sk->af);
+                                           sk->af_lan);
                                        memcpy(&pnl->rsxport, &sk->lan.xport,
                                            sizeof (pnl->rsxport));
                                        PF_ACPY(&pnl->rdaddr, &pnl->daddr,
@@ -3941,7 +3964,7 @@ pfioctl_ioc_natlook(u_long cmd, struct pfioc_natlook *pnl, struct proc *p)
                                            sizeof (pnl->rdxport));
                                } else {
                                        PF_ACPY(&pnl->rdaddr, &sk->gwy.addr,
-                                           sk->af);
+                                           sk->af_gwy);
                                        memcpy(&pnl->rdxport, &sk->gwy.xport,
                                            sizeof (pnl->rdxport));
                                        PF_ACPY(&pnl->rsaddr, &pnl->saddr,
index 27121f77902d87ec041ca6ad1e164c80a41b9c1a..dc78423c354292ae08d5c22fd037e159f61bfe6c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -32,6 +32,7 @@
 /*
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002,2003 Henning Brauer
+ * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -166,6 +167,8 @@ pf_get_ruleset_number(u_int8_t action)
                break;
        case PF_RDR:
        case PF_NORDR:
+       case PF_NAT64:
+       case PF_NONAT64:
                return (PF_RULESET_RDR);
                break;
 #if DUMMYNET
@@ -237,7 +240,7 @@ pf_find_ruleset_with_owner(const char *path, const char *owner, int is_anchor,
                *error = EINVAL;
                return (NULL);
        } else {
-               if ((owner && anchor->owner && (!strcmp(owner, anchor->owner)))
+               if ((owner && (!strcmp(owner, anchor->owner)))
                    || (is_anchor && !strcmp(anchor->owner, "")))
                        return (&anchor->ruleset);
                *error = EPERM;
index 427cc65670670140ce2ec2c33a4629d11a53117b..17ff0ab1aa24c211c57c90bd6aa1135a2634d326 100644 (file)
@@ -1096,6 +1096,11 @@ pfr_walktree(struct radix_node *rn, void *arg)
 
                        pfr_copyout_addr(&as.pfras_a, ke);
 
+#if !defined(__LP64__)
+                       /* Initialized to avoid potential info leak to
+                        * userspace */
+                       as._pad = 0;
+#endif
                        bcopy(ke->pfrke_packets, as.pfras_packets,
                            sizeof (as.pfras_packets));
                        bcopy(ke->pfrke_bytes, as.pfras_bytes,
index 880eb1ecb3177aebe47b2a871263ea318042cfc8..97d6280fad34461c6cf4fd85f575486cefe3f61d 100644 (file)
@@ -106,7 +106,8 @@ you leave this credit intact on any copies of this file.
 #define SADB_GETSASTAT    23
 #define SADB_X_SPDENABLE  24   /* by policy id */
 #define SADB_X_SPDDISABLE 25   /* by policy id */
-#define SADB_MAX          25
+#define SADB_MIGRATE      26
+#define SADB_MAX          26
 
 struct sadb_msg {
   u_int8_t sadb_msg_version;
@@ -143,7 +144,11 @@ struct sadb_sa_2 {
                u_int16_t               sadb_reserved0;
                u_int16_t               sadb_sa_natt_interval;
        };
-       u_int32_t               sadb_reserved1;
+
+       union {
+               u_int32_t               sadb_reserved1;
+               u_int16_t               sadb_sa_natt_offload_interval;
+       };
 };
 #endif /* PRIVATE */
 
@@ -376,7 +381,10 @@ struct sadb_sastat {
 #define SADB_X_EXT_ADDR_RANGE_SRC_END   24
 #define SADB_X_EXT_ADDR_RANGE_DST_START 25
 #define SADB_X_EXT_ADDR_RANGE_DST_END   26
-#define SADB_EXT_MAX                  26
+#define SADB_EXT_MIGRATE_ADDRESS_SRC  27
+#define SADB_EXT_MIGRATE_ADDRESS_DST  28
+#define SADB_X_EXT_MIGRATE_IPSECIF    29
+#define SADB_EXT_MAX                  29
 
 #define SADB_SATYPE_UNSPEC     0
 #define SADB_SATYPE_AH         2
@@ -423,6 +431,7 @@ struct sadb_sastat {
 #define SADB_X_EALG_RIJNDAELCBC        12
 #define SADB_X_EALG_AESCBC      12
 #define SADB_X_EALG_AES                12
+#define SADB_X_EALG_AES_GCM     13
 /* private allocations should use 249-255 (RFC2407) */
 
 #if 1  /*nonstandard */
@@ -468,6 +477,10 @@ struct sadb_sastat {
 #define SADB_X_EXT_NATT_KEEPALIVE_OFFLOAD  0x8000
 #endif /* PRIVATE */   
 
+#ifdef PRIVATE
+#define NATT_KEEPALIVE_OFFLOAD_INTERVAL        0x1
+#endif
+
 #if 1
 #define SADB_X_EXT_RAWCPI      0x0080  /* use well known CPI (IPComp) */
 #endif
index 29a7716abb8c56d89e0c1e9431e44ec220fd3468..11e771bf51f9ed3f16607c42c808580cb0f8cc7e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -31,6 +31,7 @@
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
+ * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -82,7 +83,7 @@ extern "C" {
 
 #include <net/radix.h>
 #include <netinet/in.h>
-
+#include <net/if_var.h>
 #ifdef KERNEL
 #include <kern/kern_types.h>
 #include <kern/zalloc.h>
@@ -155,7 +156,7 @@ struct pf_esp_hdr;
 enum   { PF_INOUT, PF_IN, PF_OUT };
 enum   { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
          PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP,
-         PF_DUMMYNET, PF_NODUMMYNET };
+         PF_DUMMYNET, PF_NODUMMYNET, PF_NAT64, PF_NONAT64 };
 enum   { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
          PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_DUMMYNET, 
          PF_RULESET_MAX };
@@ -543,6 +544,7 @@ struct pf_pool {
        u_int16_t                proxy_port[2];
        u_int8_t                 port_op;
        u_int8_t                 opts;
+       sa_family_t              af;
 };
 
 
@@ -974,8 +976,10 @@ struct pf_app_state {
 struct pf_state_key_cmp {
        struct pf_state_host lan;
        struct pf_state_host gwy;
-       struct pf_state_host ext;
-       sa_family_t      af;
+       struct pf_state_host ext_lan;
+       struct pf_state_host ext_gwy;
+       sa_family_t      af_lan;
+       sa_family_t      af_gwy;
        u_int8_t         proto;
        u_int8_t         direction;
        u_int8_t         proto_variant;
@@ -987,8 +991,10 @@ TAILQ_HEAD(pf_statelist, pf_state);
 struct pf_state_key {
        struct pf_state_host lan;
        struct pf_state_host gwy;
-       struct pf_state_host ext;
-       sa_family_t      af;
+       struct pf_state_host ext_lan;
+       struct pf_state_host ext_gwy;
+       sa_family_t      af_lan;
+       sa_family_t      af_gwy;
        u_int8_t         proto;
        u_int8_t         direction;
        u_int8_t         proto_variant;
@@ -1097,7 +1103,8 @@ struct pfsync_state {
        char             ifname[IFNAMSIZ];
        struct pfsync_state_host lan;
        struct pfsync_state_host gwy;
-       struct pfsync_state_host ext;
+       struct pfsync_state_host ext_lan;
+       struct pfsync_state_host ext_gwy;
        struct pfsync_state_peer src;
        struct pfsync_state_peer dst;
        struct pf_addr   rt_addr;
@@ -1114,7 +1121,8 @@ struct pfsync_state {
        u_int32_t        bytes[2][2];
        u_int32_t        creatorid;
        u_int16_t        tag;
-       sa_family_t      af;
+       sa_family_t      af_lan;
+       sa_family_t      af_gwy;
        u_int8_t         proto;
        u_int8_t         direction;
        u_int8_t         log;
@@ -1412,8 +1420,12 @@ struct pf_pdesc {
                struct pf_esp_hdr       *esp;
                void                    *any;
        } hdr;
-       struct pf_addr   baddr;         /* address before translation */
-       struct pf_addr   naddr;         /* address after translation */
+
+       /* XXX TODO: Change baddr and naddr to *saddr */
+       struct pf_addr   baddr;         /* src address before translation */
+       struct pf_addr   bdaddr;        /* dst address before translation */
+       struct pf_addr   naddr;         /* src address after translation */
+       struct pf_addr   ndaddr;        /* dst address after translation */
        struct pf_rule  *nat_rule;      /* nat/rdr rule applied to packet */
        struct pf_addr  *src;
        struct pf_addr  *dst;
@@ -1423,6 +1435,8 @@ struct pf_pdesc {
        int             lmw;            /* lazy writable offset */
        struct pf_mtag  *pf_mtag;
        u_int16_t       *ip_sum;
+       u_int32_t        off;           /* protocol header offset */
+       u_int32_t        hdrlen;        /* protocol header length */
        u_int32_t        p_len;         /* total length of payload */
        u_int16_t        flags;         /* Let SCRUB trigger behavior in */
                                        /* state code. Easier than tags */
@@ -1430,8 +1444,10 @@ struct pf_pdesc {
 #define PFDESC_IP_REAS 0x0002          /* IP frags would've been reassembled */
 #define PFDESC_IP_FRAG 0x0004          /* This is a fragment */
        sa_family_t      af;
+       sa_family_t      naf;           /*  address family after translation */
        u_int8_t         proto;
        u_int8_t         tos;
+       u_int8_t         ttl;
        u_int8_t         proto_variant;
        mbuf_svc_class_t sc;            /* mbuf service class (MBUF_SVC) */
        u_int32_t        pktflags;      /* mbuf packet flags (PKTF) */
@@ -2364,6 +2380,7 @@ extern struct pf_anchor pf_main_anchor;
 #define pf_main_ruleset        pf_main_anchor.ruleset
 
 extern int pf_is_enabled;
+extern int16_t pf_nat64_configured;
 #define PF_IS_ENABLED (pf_is_enabled != 0)
 extern u_int32_t pf_hash_seed;
 
index 81d0c35fbd0c0d417a792a065f7be2d5ae7c8c25..e02810ac40af1a870471eecb77f7fc2678392baa 100644 (file)
@@ -531,8 +531,7 @@ pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd)
                                        break;
 
                                case PKTAP_FILTER_PARAM_IF_NAME:
-                                       if (x_filter->filter_param_if_name == 0 ||
-                                               strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME,
+                                       if (strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME,
                                                        strlen(PKTAP_IFNAME)) == 0) {
                                                error = EINVAL;
                                                break;
@@ -758,12 +757,11 @@ pktap_set_procinfo(struct pktap_header *hdr, struct so_procinfo *soprocinfo)
        /*
         * When not delegated, the effective pid is the same as the real pid
         */
-       if (soprocinfo->spi_epid != soprocinfo->spi_pid) {
+       if (soprocinfo->spi_delegated != 0) {
                hdr->pth_flags |= PTH_FLAG_PROC_DELEGATED;
                hdr->pth_epid = soprocinfo->spi_epid;
                proc_name(soprocinfo->spi_epid, hdr->pth_ecomm, MAXCOMLEN);
-               if (soprocinfo->spi_epid != 0)
-                       uuid_copy(hdr->pth_uuid, soprocinfo->spi_euuid);
+               uuid_copy(hdr->pth_euuid, soprocinfo->spi_euuid);
        }
 }
 
@@ -807,25 +805,19 @@ pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto,
         * For outgoing, do the lookup only if there's an
         * associated socket as indicated by the flowhash
         */
-       if (outgoing != 0 && (m->m_pkthdr.pkt_flags &
-               (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC)) == (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
-               m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
+       if (outgoing != 0 && m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
                /*
                 * To avoid lock ordering issues we delay the process lookup
                 * to the BPF read as we cannot
                 * assume the socket lock is unlocked on output
                 */
-               if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) ||
-                   m->m_pkthdr.pkt_proto == IPPROTO_TCP ||
-                   m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
-                       found = 0;
-                       hdr->pth_flags |= PTH_FLAG_DELAY_PKTAP;
-                       hdr->pth_flowid = m->m_pkthdr.pkt_flowid;
-                       if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK)
-                               hdr->pth_ipproto = IPPROTO_RAW;
-                       else            
-                               hdr->pth_ipproto = m->m_pkthdr.pkt_proto;
-               }
+               found = 0;
+               hdr->pth_flags |= PTH_FLAG_DELAY_PKTAP;
+               hdr->pth_flowid = m->m_pkthdr.pkt_flowid;
+               if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK)
+                       hdr->pth_ipproto = IPPROTO_RAW;
+               else            
+                       hdr->pth_ipproto = m->m_pkthdr.pkt_proto;
        } else if (outgoing == 0) {
                struct inpcb *inp = NULL;
 
index a5d9cba8e5b248b0f9c523f147e3ea7dd96c3ddd..f091673a3598156705f01edd115ca976762e524c 100644 (file)
@@ -24,9 +24,9 @@ EXPORT_MI_LIST        = ${INSTALL_MI_LIST} ${KERNELFILES}
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
-INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index eda1ae420257700cbbf1705b5193ea07a1374256..4f7d32a7505537bd885eec21b0b2e412be597ef9 100644 (file)
@@ -231,7 +231,6 @@ pktsched_teardown(struct ifclassq *ifq)
                error = ENXIO;
                break;
        }
-
        return (error);
 }
 
index 0c8663899ba653716d30fcf0ecd606b33c7f96f3..41b1f8ede3276ba4f1501b26dcf5fd8e8707a298 100644 (file)
@@ -570,6 +570,7 @@ cbq_enqueue(cbq_state_t *cbqp, struct rm_class *cl, struct mbuf *m,
        /* successfully queued. */
        ++cbqp->cbq_qlen;
        IFCQ_INC_LEN(ifq);
+       IFCQ_INC_BYTES(ifq, len);
 
        return (ret);
 }
@@ -587,6 +588,7 @@ cbq_dequeue(cbq_state_t *cbqp, cqdq_op_t op)
        if (m && op == CLASSQDQ_REMOVE) {
                --cbqp->cbq_qlen;  /* decrement # of packets in cbq */
                IFCQ_DEC_LEN(ifq);
+               IFCQ_DEC_BYTES(ifq, m_pktlen(m));
                IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
 
                /* Update the class. */
index 28d7363e8f4f28a0e2473f50e3cc1582ef8d11af..7e61e04c148c0a5fa520887c0ba2547bbe1fa232 100644 (file)
@@ -709,6 +709,7 @@ fairq_enqueue(struct fairq_if *fif, struct fairq_class *cl, struct mbuf *m,
                }
        }
        IFCQ_INC_LEN(ifq);
+       IFCQ_INC_BYTES(ifq, len);
 
        /* successfully queued. */
        return (ret);
@@ -747,6 +748,7 @@ fairq_dequeue(struct fairq_if *fif, cqdq_op_t op)
                fif->fif_poll_cache = NULL;
                if (m != NULL) {
                        IFCQ_DEC_LEN(ifq);
+                       IFCQ_DEC_BYTES(ifq, m_pktlen(m));
                        IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
                        PKTCNTR_ADD(&best_cl->cl_xmitcnt, 1, m_pktlen(m));
                }
@@ -797,6 +799,7 @@ fairq_dequeue(struct fairq_if *fif, cqdq_op_t op)
                        m = fairq_getq(best_cl, cur_time);
                        if (m != NULL) {
                                IFCQ_DEC_LEN(ifq);
+                               IFCQ_DEC_BYTES(ifq, m_pktlen(m));
                                IFCQ_XMIT_ADD(ifq, 1, m_pktlen(m));
                                PKTCNTR_ADD(&best_cl->cl_xmitcnt, 1,
                                    m_pktlen(m));
index 894cf7e286f6d45f6200bd3415771412f7de5c58..365d16f013a7e21f16d62bddd95dfd226309aef7 100644 (file)
@@ -808,6 +808,7 @@ hfsc_enqueue(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf *m,
                }
        }
        IFCQ_INC_LEN(ifq);
+       IFCQ_INC_BYTES(ifq, len);
        cl->cl_hif->hif_packets++;
 
        /* successfully queued. */
@@ -897,6 +898,7 @@ hfsc_dequeue(struct hfsc_if *hif, cqdq_op_t op)
        len = m_pktlen(m);
        cl->cl_hif->hif_packets--;
        IFCQ_DEC_LEN(ifq);
+       IFCQ_DEC_BYTES(ifq, len);
        IFCQ_XMIT_ADD(ifq, 1, len);
        PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, 1, len);
 
index 23fa87fc2d497ae8846daa7c8ac4faf329e41997..78da2f1b18a1ce6bc0feceb37d003f9565fc6613 100644 (file)
@@ -613,6 +613,7 @@ priq_enqueue(struct priq_if *pif, struct priq_class *cl, struct mbuf *m,
                }
        }
        IFCQ_INC_LEN(ifq);
+       IFCQ_INC_BYTES(ifq, len);
 
        /* class is now active; indicate it as such */
        if (!pktsched_bit_tst(pri, &pif->pif_bitmap))
@@ -657,6 +658,7 @@ priq_dequeue(struct priq_if *pif, cqdq_op_t op)
        len = m_pktlen(m);
 
        IFCQ_DEC_LEN(ifq);
+       IFCQ_DEC_BYTES(ifq, len);
        if (qempty(&cl->cl_q)) {
                cl->cl_period++;
                /* class is now inactive; indicate it as such */
index 2e042823300f951063fb2805005d620d307a6aed..bc7cc221524a004b24af1fff42887119c7ed202e 100644 (file)
@@ -977,6 +977,7 @@ qfq_dequeue(struct qfq_if *qif, cqdq_op_t op)
 #endif /* QFQ_DEBUG */
 
        IFCQ_DEC_LEN(ifq);
+       IFCQ_DEC_BYTES(ifq, len);
        if (qempty(&cl->cl_q))
                cl->cl_period++;
        PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
@@ -1122,6 +1123,7 @@ qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m,
                }
        }
        IFCQ_INC_LEN(ifq);
+       IFCQ_INC_BYTES(ifq, len);
 
 #if QFQ_DEBUG
        qif->qif_queued++;
index ecadb8bbd6672202ccb5a3ce42572abaa6b1d291..5a57824e6515befbcdd8ac1306aa0106ed2b697a 100644 (file)
@@ -582,6 +582,7 @@ tcq_enqueue(struct tcq_if *tif, struct tcq_class *cl, struct mbuf *m,
                }
        }
        IFCQ_INC_LEN(ifq);
+       IFCQ_INC_BYTES(ifq, len);
 
        /* successfully queued. */
        return (ret);
@@ -625,6 +626,7 @@ tcq_dequeue_cl(struct tcq_if *tif, struct tcq_class *cl,
        m = tcq_getq(cl);
        if (m != NULL) {
                IFCQ_DEC_LEN(ifq);
+               IFCQ_DEC_BYTES(ifq, m_pktlen(m));
                if (qempty(&cl->cl_q))
                        cl->cl_period++;
                PKTCNTR_ADD(&cl->cl_xmitcnt, 1, m_pktlen(m));
index 464ef234902afe2965f6d88247573b75f12dcf7a..d48399aae2b83465abe6b7b9b540f6a147e1dc8d 100644 (file)
@@ -101,6 +101,7 @@ struct radix_node {
        struct radix_node *rn_twin;
        struct radix_node *rn_ybro;
 #endif
+
 };
 
 #define        rn_dupedkey     rn_u.rn_leaf.rn_Dupedkey
index 79dcda95aa84e0ebae3249920bc144429206868c..657f47d0618619c153f80dfc9d96946c86b90495 100644 (file)
@@ -178,7 +178,7 @@ void
 raw_ctlinput(int cmd, __unused struct sockaddr *arg, __unused void *dummy)
 {
 
-       if (cmd < 0 || cmd > PRC_NCMDS)
+       if (cmd < 0 || cmd >= PRC_NCMDS)
                return;
        /* INCOMPLETE */
 }
index 8f077340bc1703881e2a04f3ed53f9b54f663ed0..d13a1994f7225a71c5da77f74d221e6e2e579f31 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -237,6 +237,8 @@ static struct zone *rte_zone;                       /* special zone for rtentry */
 #define        RTD_INUSE               0xFEEDFACE      /* entry is in use */
 #define        RTD_FREED               0xDEADBEEF      /* entry is freed */
 
+#define MAX_SCOPE_ADDR_STR_LEN (MAX_IPv6_STR_LEN + 6)
+
 /* For gdb */
 __private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE;
 __private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE;
@@ -282,8 +284,8 @@ static inline struct rtentry *rte_alloc_debug(void);
 static inline void rte_free_debug(struct rtentry *);
 static inline void rte_lock_debug(struct rtentry_dbg *);
 static inline void rte_unlock_debug(struct rtentry_dbg *);
-static void rt_maskedcopy(struct sockaddr *,
-           struct sockaddr *, struct sockaddr *);
+static void rt_maskedcopy(const struct sockaddr *,
+           struct sockaddr *, const struct sockaddr *);
 static void rtable_init(void **);
 static inline void rtref_audit(struct rtentry_dbg *);
 static inline void rtunref_audit(struct rtentry_dbg *);
@@ -297,8 +299,6 @@ static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int);
 static inline void sin6_set_ifscope(struct sockaddr *, unsigned int);
 static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int);
 static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *);
-static struct sockaddr *sa_copy(struct sockaddr *, struct sockaddr_storage *,
-    unsigned int *);
 static struct sockaddr *ma_copy(int, struct sockaddr *,
     struct sockaddr_storage *, unsigned int);
 static struct sockaddr *sa_trim(struct sockaddr *, int);
@@ -560,7 +560,7 @@ sin6_get_embedded_ifscope(struct sockaddr *sa)
  * In any case, the effective scope ID value is returned to the caller via
  * pifscope, if it is non-NULL.
  */
-static struct sockaddr *
+struct sockaddr *
 sa_copy(struct sockaddr *src, struct sockaddr_storage *dst,
     unsigned int *pifscope)
 {
@@ -589,7 +589,13 @@ sa_copy(struct sockaddr *src, struct sockaddr_storage *dst,
                        eifscope = sin6_get_embedded_ifscope(SA(dst));
                        if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE)
                                ifscope = eifscope;
-                       sin6_set_ifscope(SA(dst), ifscope);
+                       if (ifscope != IFSCOPE_NONE) {
+                               /* Set ifscope from pifscope or eifscope */
+                               sin6_set_ifscope(SA(dst), ifscope);
+                       } else {
+                               /* If sin6_scope_id has a value, use that one */
+                               ifscope = sin6_get_ifscope(SA(dst));
+                       }
                        /*
                         * If sin6_scope_id is set but the address doesn't
                         * contain the equivalent embedded value, set it.
@@ -952,6 +958,38 @@ rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags,
                 * reference held during rtrequest.
                 */
                rtfree_locked(rt);
+
+               /*
+                * If the newly created cloned route is a direct host route
+                * then also check if it is to a router or not.
+                * If it is, then set the RTF_ROUTER flag on the host route
+                * for the gateway.
+                *
+                * XXX It is possible for the default route to be created post
+                * cloned route creation of router's IP.
+                * We can handle that corner case by special handing for RTM_ADD
+                * of default route.
+                */
+               if ((newrt->rt_flags & (RTF_HOST | RTF_LLINFO)) ==
+                   (RTF_HOST | RTF_LLINFO)) {
+                       struct rtentry *defrt = NULL;
+                       struct sockaddr_storage def_key;
+
+                       bzero(&def_key, sizeof(def_key));
+                       def_key.ss_len = rt_key(newrt)->sa_len;
+                       def_key.ss_family = rt_key(newrt)->sa_family;
+
+                       defrt = rtalloc1_scoped_locked((struct sockaddr *)&def_key,
+                                       0, 0, newrt->rt_ifp->if_index);
+
+                       if (defrt) {
+                               if (equal(rt_key(newrt), defrt->rt_gateway)) {
+                                       newrt->rt_flags |= RTF_ROUTER;
+                               }
+                               rtfree_locked(defrt);
+                       }
+               }
+
                if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
                        /*
                         * If the new route specifies it be
@@ -2659,23 +2697,23 @@ rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt)
 }
 
 static void
-rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
-    struct sockaddr *netmask)
+rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
+    const struct sockaddr *netmask)
 {
-       u_char *cp1 = (u_char *)src;
-       u_char *cp2 = (u_char *)dst;
-       u_char *cp3 = (u_char *)netmask;
-       u_char *cplim = cp2 + *cp3;
-       u_char *cplim2 = cp2 + *cp1;
+       const char *netmaskp = &netmask->sa_data[0];
+       const char *srcp = &src->sa_data[0];
+       char *dstp = &dst->sa_data[0];
+       const char *maskend = (char *)dst
+                                   + MIN(netmask->sa_len, src->sa_len);
+       const char *srcend = (char *)dst + src->sa_len;
+
+       dst->sa_len = src->sa_len;
+       dst->sa_family = src->sa_family;
 
-       *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
-       cp3 += 2;
-       if (cplim > cplim2)
-               cplim = cplim2;
-       while (cp2 < cplim)
-               *cp2++ = *cp1++ & *cp3++;
-       if (cp2 < cplim2)
-               bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
+       while (dstp < maskend)
+               *dstp++ = *srcp++ & *netmaskp++;
+       if (dstp < srcend)
+               memset(dstp, 0, (size_t)(srcend - dstp));
 }
 
 /*
@@ -2734,6 +2772,29 @@ node_lookup_default(int af)
            rnh->rnh_lookup(&sin6_def, NULL, rnh));
 }
 
+boolean_t
+rt_ifa_is_dst(struct sockaddr *dst, struct ifaddr *ifa)
+{
+       boolean_t result = FALSE;
+
+       if (ifa == NULL || ifa->ifa_addr == NULL)
+               return (result);
+
+       IFA_LOCK_SPIN(ifa);
+
+       if (dst->sa_family == ifa->ifa_addr->sa_family &&
+           ((dst->sa_family == AF_INET &&
+           SIN(dst)->sin_addr.s_addr ==
+           SIN(ifa->ifa_addr)->sin_addr.s_addr) ||
+           (dst->sa_family == AF_INET6 &&
+           SA6_ARE_ADDR_EQUAL(SIN6(dst), SIN6(ifa->ifa_addr)))))
+               result = TRUE;
+
+       IFA_UNLOCK(ifa);
+
+       return (result);
+}
+
 /*
  * Common routine to lookup/match a route.  It invokes the lookup/matchaddr
  * callback which could be address family-specific.  The main difference
@@ -2765,6 +2826,8 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
        boolean_t dontcare;
        int af = dst->sa_family;
        struct sockaddr_storage dst_ss, mask_ss;
+       char s_dst[MAX_IPv6_STR_LEN], s_netmask[MAX_IPv6_STR_LEN];
+       char dbuf[MAX_SCOPE_ADDR_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
 
        VERIFY(!coarse || ifscope == IFSCOPE_NONE);
 
@@ -2818,6 +2881,26 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
                netmask = ma_copy(af, netmask, &mask_ss, ifscope);
        dontcare = (ifscope == IFSCOPE_NONE);
 
+       if (rt_verbose) {
+               if (af == AF_INET)
+                       (void) inet_ntop(af, &SIN(dst)->sin_addr.s_addr,
+                           s_dst, sizeof (s_dst));
+               else
+                       (void) inet_ntop(af, &SIN6(dst)->sin6_addr,
+                           s_dst, sizeof (s_dst));
+
+               if (netmask != NULL && af == AF_INET)
+                       (void) inet_ntop(af, &SIN(netmask)->sin_addr.s_addr,
+                           s_netmask, sizeof (s_netmask));
+               if (netmask != NULL && af == AF_INET6)
+                       (void) inet_ntop(af, &SIN6(netmask)->sin6_addr,
+                           s_netmask, sizeof (s_netmask));
+               else
+                       *s_netmask = '\0';
+               printf("%s (%d, %d, %s, %s, %u)\n",
+                   __func__, lookup_only, coarse, s_dst, s_netmask, ifscope);
+       }
+
        /*
         * Scoped route lookup:
         *
@@ -2852,6 +2935,16 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
         */
        if (rn != NULL) {
                struct rtentry *rt = RT(rn);
+
+               if (rt_verbose) {
+                       rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
+                       printf("%s unscoped search %p to %s->%s->%s ifa_ifp %s\n",
+                           __func__, rt,
+                           dbuf, gbuf,
+                           (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
+                           (rt->rt_ifa->ifa_ifp != NULL) ?
+                           rt->rt_ifa->ifa_ifp->if_xname : "");
+               }
                if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
                        if (rt->rt_ifp->if_index != ifscope) {
                                /*
@@ -2860,11 +2953,15 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
                                 * and do a more specific scoped search using
                                 * the scope of the found route.  Otherwise,
                                 * start again from scratch.
+                                *
+                                * For loopback scope we keep the unscoped
+                                * route for local addresses
                                 */
                                rn = NULL;
                                if (dontcare)
                                        ifscope = rt->rt_ifp->if_index;
-                               else
+                               else if (ifscope != lo_ifp->if_index ||
+                                   rt_ifa_is_dst(dst, rt->rt_ifa) == FALSE)
                                        rn0 = NULL;
                        } else if (!(rt->rt_flags & RTF_IFSCOPE)) {
                                /*
@@ -2884,9 +2981,21 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
         * interface scope as the one requested.  The following will result
         * in searching for the longest prefix scoped match.
         */
-       if (rn == NULL)
+       if (rn == NULL) {
                rn = node_lookup(dst, netmask, ifscope);
 
+               if (rt_verbose && rn != NULL) {
+                       struct rtentry *rt = RT(rn);
+
+                       rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
+                       printf("%s scoped search %p to %s->%s->%s ifa %s\n",
+                           __func__, rt,
+                           dbuf, gbuf,
+                           (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
+                           (rt->rt_ifa->ifa_ifp != NULL) ?
+                           rt->rt_ifa->ifa_ifp->if_xname : "");
+               }
+       }
        /*
         * Use the original result if either of the following is true:
         *
@@ -2909,8 +3018,9 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
         * route as long as the interface portion satistifes the scope.
         */
        if (rn == NULL && (rn = node_lookup_default(af)) != NULL &&
-           RT(rn)->rt_ifp->if_index != ifscope)
+           RT(rn)->rt_ifp->if_index != ifscope) {
                rn = NULL;
+       }
 
        if (rn != NULL) {
                /*
@@ -2930,6 +3040,23 @@ rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
                }
        }
 
+       if (rt_verbose) {
+               if (rn == NULL)
+                       printf("%s %u return NULL\n", __func__, ifscope);
+               else {
+                       struct rtentry *rt = RT(rn);
+
+                       rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
+
+                       printf("%s %u return %p to %s->%s->%s ifa_ifp %s\n",
+                           __func__, ifscope, rt,
+                           dbuf, gbuf,
+                           (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
+                           (rt->rt_ifa->ifa_ifp != NULL) ?
+                           rt->rt_ifa->ifa_ifp->if_xname : "");
+               }
+       }
+
        return (RT(rn));
 }
 
@@ -3418,6 +3545,7 @@ rte_free(struct rtentry *p)
                panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt);
                /* NOTREACHED */
        }
+
        zfree(rte_zone, p);
 }
 
@@ -3845,9 +3973,20 @@ rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
 {
        VERIFY(rt_key(rt)->sa_family == AF_INET);
 
-       if (ds != NULL)
+       if (ds != NULL) {
                (void) inet_ntop(AF_INET,
                    &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen);
+               if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
+                   SINIFSCOPE(rt_key(rt))->sin_scope_id != IFSCOPE_NONE) {
+                       char scpstr[16];
+
+                       snprintf(scpstr, sizeof(scpstr), "@%u",
+                           SINIFSCOPE(rt_key(rt))->sin_scope_id);
+
+                       strlcat(ds, scpstr, dslen);
+               }
+       }
+
        if (gs != NULL) {
                if (rt->rt_flags & RTF_GATEWAY) {
                        (void) inet_ntop(AF_INET,
@@ -3866,9 +4005,20 @@ rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
 {
        VERIFY(rt_key(rt)->sa_family == AF_INET6);
 
-       if (ds != NULL)
+       if (ds != NULL) {
                (void) inet_ntop(AF_INET6,
                    &SIN6(rt_key(rt))->sin6_addr, ds, dslen);
+               if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
+                   SIN6IFSCOPE(rt_key(rt))->sin6_scope_id != IFSCOPE_NONE) {
+                       char scpstr[16];
+
+                       snprintf(scpstr, sizeof(scpstr), "@%u",
+                           SIN6IFSCOPE(rt_key(rt))->sin6_scope_id);
+
+                       strlcat(ds, scpstr, dslen);
+               }
+       }
+
        if (gs != NULL) {
                if (rt->rt_flags & RTF_GATEWAY) {
                        (void) inet_ntop(AF_INET6,
index d382013dad9183952e6848f00a1ac1f3a4c1a0e4..8ddef833d7c46bf49089b58e47f869c4956bedff 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -193,6 +193,7 @@ struct rtentry {
            (struct rtentry *, struct ifnet_llreach_info *);
        void (*rt_llinfo_purge)(struct rtentry *); /* llinfo purge fn */
        void (*rt_llinfo_free)(void *); /* link level info free function */
+       void (*rt_llinfo_refresh) (struct rtentry *); /* expedite llinfo refresh */
        struct rt_metrics rt_rmx;       /* metrics used by rx'ing protocols */
 #define        rt_use rt_rmx.rmx_pksent
        struct rtentry *rt_gwroute;     /* implied entry for gatewayed routes */
@@ -350,7 +351,8 @@ struct rt_msghdr_ext {
 #define        RTM_DELETE      0x2     /* Delete Route */
 #define        RTM_CHANGE      0x3     /* Change Metrics or flags */
 #define        RTM_GET         0x4     /* Report Metrics */
-#define        RTM_LOSING      0x5     /* Kernel Suspects Partitioning */
+#define        RTM_LOSING      0x5     /* RTM_LOSING is no longer generated by xnu
+                                  and is deprecated */
 #define        RTM_REDIRECT    0x6     /* Told to use different route */
 #define        RTM_MISS        0x7     /* Lookup failed on this address */
 #define        RTM_LOCK        0x8     /* fix specified metrics */
@@ -383,6 +385,9 @@ struct rt_msghdr_ext {
 #define        RTV_SSTHRESH    0x20    /* init or lock _ssthresh */
 #define        RTV_RTT         0x40    /* init or lock _rtt */
 #define        RTV_RTTVAR      0x80    /* init or lock _rttvar */
+#ifdef PRIVATE
+#define        RTV_REFRESH_HOST        0x100   /* init host route to expedite refresh */
+#endif
 
 /*
  * Bitmask values for rtm_addrs.
@@ -575,5 +580,8 @@ extern void rt_str(struct rtentry *, char *, uint32_t, char *, uint32_t);
 extern const char *rtm2str(int);
 extern void route_copyin(struct route *, struct route *, size_t);
 extern void route_copyout(struct route *, const struct route *, size_t);
+extern boolean_t rt_ifa_is_dst(struct sockaddr *, struct ifaddr *);
+extern struct sockaddr *sa_copy(struct sockaddr *, struct sockaddr_storage *,
+    unsigned int *);
 #endif /* BSD_KERNEL_PRIVATE */
 #endif /* _NET_ROUTE_H_ */
index 2c2ae2dcdb1754ff0ba10eae08d7c65f574bbf54..a7a9f16db4c5cbebe22c0c390516fcb525b0a8ca 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -124,7 +124,7 @@ static int rts_shutdown(struct socket *);
 static int rts_sockaddr(struct socket *, struct sockaddr **);
 
 static int route_output(struct mbuf *, struct socket *);
-static void rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
+static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
     struct sockaddr *, unsigned int);
@@ -481,7 +481,7 @@ route_output(struct mbuf *m, struct socket *so)
                        rt_setif(saved_nrt,
                            info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
                            info.rti_info[RTAX_GATEWAY], ifscope);
-                       rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt);
+                       (void)rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt);
                        saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
                        saved_nrt->rt_rmx.rmx_locks |=
                            (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
@@ -613,7 +613,12 @@ report:
                            info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
                            info.rti_info[RTAX_GATEWAY], ifscope);
 
-                       rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, rt);
+                       if ((error = rt_setmetrics(rtm->rtm_inits,
+                           &rtm->rtm_rmx, rt))) {
+                                int tmp = error;
+                                RT_UNLOCK(rt);
+                                senderr(tmp);
+                       }
                        if (info.rti_info[RTAX_GENMASK])
                                rt->rt_genmask = info.rti_info[RTAX_GENMASK];
                        /* FALLTHRU */
@@ -705,41 +710,54 @@ rt_setexpire(struct rtentry *rt, uint64_t expiry)
        }
 }
 
-static void
+static int
 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
 {
-       struct timeval caltime;
-
-       getmicrotime(&caltime);
-
+       if (!(which & RTV_REFRESH_HOST)) {
+               struct timeval caltime;
+               getmicrotime(&caltime);
 #define        metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
-       metric(RTV_RPIPE, rmx_recvpipe);
-       metric(RTV_SPIPE, rmx_sendpipe);
-       metric(RTV_SSTHRESH, rmx_ssthresh);
-       metric(RTV_RTT, rmx_rtt);
-       metric(RTV_RTTVAR, rmx_rttvar);
-       metric(RTV_HOPCOUNT, rmx_hopcount);
-       metric(RTV_MTU, rmx_mtu);
-       metric(RTV_EXPIRE, rmx_expire);
+               metric(RTV_RPIPE, rmx_recvpipe);
+               metric(RTV_SPIPE, rmx_sendpipe);
+               metric(RTV_SSTHRESH, rmx_ssthresh);
+               metric(RTV_RTT, rmx_rtt);
+               metric(RTV_RTTVAR, rmx_rttvar);
+               metric(RTV_HOPCOUNT, rmx_hopcount);
+               metric(RTV_MTU, rmx_mtu);
+               metric(RTV_EXPIRE, rmx_expire);
 #undef metric
+               if (out->rt_rmx.rmx_expire > 0) {
+                       /* account for system time change */
+                       getmicrotime(&caltime);
+                       out->base_calendartime +=
+                               NET_CALCULATE_CLOCKSKEW(caltime,
+                                               out->base_calendartime,
+                                               net_uptime(), out->base_uptime);
+                       rt_setexpire(out,
+                                       out->rt_rmx.rmx_expire -
+                                       out->base_calendartime +
+                                       out->base_uptime);
+               } else {
+                       rt_setexpire(out, 0);
+               }
 
-       if (out->rt_rmx.rmx_expire > 0) {
-               /* account for system time change */
-               getmicrotime(&caltime);
-               out->base_calendartime +=
-                   NET_CALCULATE_CLOCKSKEW(caltime,
-                   out->base_calendartime,
-                   net_uptime(), out->base_uptime);
-               rt_setexpire(out,
-                   out->rt_rmx.rmx_expire -
-                   out->base_calendartime +
-                   out->base_uptime);
+               VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
+               VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
        } else {
-               rt_setexpire(out, 0);
-       }
+               /* Only RTV_REFRESH_HOST must be set */
+               if ((which & ~RTV_REFRESH_HOST) ||
+                   (out->rt_flags & RTF_STATIC) ||
+                   !(out->rt_flags & RTF_LLINFO)) {
+                       return (EINVAL);
+               }
 
-       VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
-       VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
+               if (out->rt_llinfo_refresh == NULL) {
+                       return (ENOTSUP);
+               }
+
+               out->rt_llinfo_refresh(out);
+       }
+       return (0);
 }
 
 static void
@@ -983,7 +1001,7 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
        struct rt_msghdr *rtm;
        struct mbuf *m;
        int i;
-       int len, dlen;
+       int len, dlen, off;
 
        switch (type) {
 
@@ -1004,8 +1022,6 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
        default:
                len = sizeof (struct rt_msghdr);
        }
-       if (len > MCLBYTES)
-               panic("rt_msg1");
        m = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m && len > MHLEN) {
                MCLGET(m, M_DONTWAIT);
@@ -1020,6 +1036,7 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
        m->m_pkthdr.rcvif = NULL;
        rtm = mtod(m, struct rt_msghdr *);
        bzero((caddr_t)rtm, len);
+       off = len;
        for (i = 0; i < RTAX_MAX; i++) {
                struct sockaddr *sa, *hint;
                uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
@@ -1048,9 +1065,10 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
                }
 
                rtinfo->rti_addrs |= (1 << i);
-               dlen = ROUNDUP32(sa->sa_len);
-               m_copyback(m, len, dlen, (caddr_t)sa);
-               len += dlen;
+               dlen = sa->sa_len;
+               m_copyback(m, off, dlen, (caddr_t)sa);
+               len = off + dlen;
+               off += ROUNDUP32(dlen);
        }
        if (m->m_pkthdr.len != len) {
                m_freem(m);
@@ -1067,7 +1085,7 @@ rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w,
        kauth_cred_t* credp)
 {
        int i;
-       int len, dlen, second_time = 0;
+       int len, dlen, rlen, second_time = 0;
        caddr_t cp0;
 
        rtinfo->rti_addrs = 0;
@@ -1143,12 +1161,15 @@ again:
                }
 
                rtinfo->rti_addrs |= (1 << i);
-               dlen = ROUNDUP32(sa->sa_len);
+               dlen = sa->sa_len;
+               rlen = ROUNDUP32(dlen);
                if (cp) {
-                       bcopy((caddr_t)sa, cp, (unsigned)dlen);
-                       cp += dlen;
+                       bcopy((caddr_t)sa, cp, (size_t)dlen);
+                       if (dlen != rlen)
+                               bzero(cp + dlen, rlen - dlen);
+                       cp += rlen;
                }
-               len += dlen;
+               len += rlen;
        }
        if (cp == NULL && w != NULL && !second_time) {
                struct walkarg *rw = w;
index 03bb3672842a373a760bbdc4503cd0f5bbb43e8a..9ab2a319211b5813cbf51545cfbcc13bb948b552 100644 (file)
@@ -19,27 +19,37 @@ KERNELFILES = \
        kpi_ipfilter.h in_arp.h
 
 PRIVATE_DATAFILES = \
+       flow_divert_proto.h \
+       igmp_var.h \
+       in.h \
+       in_gif.h \
+       in_pcb.h \
+       ip.h \
+       ip_compat.h \
        ip_dummynet.h \
        ip_flowid.h \
-       ip_fw.h ip_fw2.h \
+       ip_fw.h \
+       ip_fw2.h \
+       mptcp_var.h \
+       tcp.h \
        tcp_debug.h \
-       in_gif.h ip_compat.h \
-       flow_divert_proto.h \
-       mptcp_var.h
+       tcp_var.h \
+       tcp_cache.h \
+       udp.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
-       ip_ecn.h ip_encap.h 
+       ip_ecn.h ip_encap.h
 
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = netinet
 
-EXPORT_MI_LIST = ${DATAFILES} ${KERNELFILES}
+EXPORT_MI_LIST = ${DATAFILES} ${KERNELFILES}
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
-INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES})
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 697016c4996a806e54d433e36391091f817c69d4..76e29f8d607a50027fec5e7b0856e9db2c77a13b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -57,6 +57,7 @@
 #include <netinet/flow_divert.h>
 #include <netinet/flow_divert_proto.h>
 #if INET6
+#include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 #include <dev/random/randomdev.h>
@@ -149,13 +150,40 @@ static    kern_ctl_ref                            g_flow_divert_kctl_ref                  = NULL;
 
 static struct protosw                          g_flow_divert_in_protosw;
 static struct pr_usrreqs                       g_flow_divert_in_usrreqs;
+static struct protosw                          g_flow_divert_in_udp_protosw;
+static struct pr_usrreqs                       g_flow_divert_in_udp_usrreqs;
 #if INET6
 static struct ip6protosw                       g_flow_divert_in6_protosw;
 static struct pr_usrreqs                       g_flow_divert_in6_usrreqs;
+static struct ip6protosw                       g_flow_divert_in6_udp_protosw;
+static struct pr_usrreqs                       g_flow_divert_in6_udp_usrreqs;
 #endif /* INET6 */
 
 static struct protosw                          *g_tcp_protosw                                  = NULL;
 static struct ip6protosw                       *g_tcp6_protosw                                 = NULL;
+static struct protosw                          *g_udp_protosw                                  = NULL;
+static struct ip6protosw                       *g_udp6_protosw                                 = NULL;
+
+static errno_t
+flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
+
+static errno_t
+flow_divert_inp_to_sockaddr(const struct inpcb *inp, struct sockaddr **local_socket);
+
+static boolean_t
+flow_divert_is_sockaddr_valid(struct sockaddr *addr);
+
+static int
+flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr);
+
+struct sockaddr *
+flow_divert_get_buffered_target_address(mbuf_t buffer);
+
+static boolean_t
+flow_divert_has_pcb_local_address(const struct inpcb *inp);
+
+static void
+flow_divert_disconnect_socket(struct socket *so);
 
 static inline int
 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
@@ -210,12 +238,11 @@ flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
 static errno_t
 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
 {
-       int                                                     error                                           = 0;
+       errno_t                                                 error                                           = 0;
        struct                                          flow_divert_pcb *exist          = NULL;
        struct flow_divert_group        *group;
        static uint32_t                         g_nextkey                                       = 1;
        static uint32_t                         g_hash_seed                                     = 0;
-       errno_t                                         result                                          = 0;
        int                                                     try_count                                       = 0;
 
        if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
@@ -277,7 +304,7 @@ flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
                FDRETAIN(fd_cb);                /* The group now has a reference */
        } else {
                fd_cb->hash = 0;
-               result = EEXIST;
+               error = EEXIST;
        }
 
        socket_unlock(fd_cb->so, 0);
@@ -286,7 +313,7 @@ done:
        lck_rw_done(&g_flow_divert_group_lck);
        socket_lock(fd_cb->so, 0);
 
-       return result;
+       return error;
 }
 
 static struct flow_divert_pcb *
@@ -371,10 +398,10 @@ flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf
 }
 
 static int
-flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, size_t length, const void *value)
+flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value)
 {
-       size_t  net_length      = htonl(length);
-       int             error           = 0;
+       uint32_t        net_length      = htonl(length);
+       int                     error           = 0;
 
        error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
        if (error) {
@@ -400,10 +427,10 @@ flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, size_t length, const
 static int
 flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
 {
-       size_t  cursor                  = offset;
-       int             error                   = 0;
-       size_t  curr_length;
-       uint8_t curr_type;
+       size_t          cursor                  = offset;
+       int                     error                   = 0;
+       uint32_t        curr_length;
+       uint8_t         curr_type;
 
        *err = 0;
 
@@ -435,11 +462,11 @@ flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, i
 }
 
 static int
-flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, size_t *val_size)
+flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size)
 {
-       int             error           = 0;
-       size_t  length;
-       int             tlv_offset;
+       int                     error           = 0;
+       uint32_t        length;
+       int                     tlv_offset;
 
        tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
        if (tlv_offset < 0) {
@@ -778,8 +805,9 @@ flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, si
        return current;
 }
 
+#define APPLE_WEBCLIP_ID_PREFIX        "com.apple.webapp"
 static uint16_t
-flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
+flow_divert_trie_search(struct flow_divert_trie *trie, uint8_t *string_bytes)
 {
        uint16_t current = trie->root;
        uint16_t string_idx = 0;
@@ -796,6 +824,10 @@ flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_byt
                if (node_idx == node_end) {
                        if (string_bytes[string_idx] == '\0') {
                                return current; /* Got an exact match */
+                       } else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
+                                  0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
+                               string_bytes[string_idx] = '\0'; 
+                               return current; /* Got an apple webclip id prefix match */
                        } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
                                next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
                        }
@@ -841,7 +873,7 @@ flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean en
 
        if (fd_cb->group == NULL) {
                fd_cb->so->so_error = ECONNABORTED;
-               soisdisconnected(fd_cb->so);
+               flow_divert_disconnect_socket(fd_cb->so);
                return ECONNABORTED;
        }
 
@@ -873,6 +905,7 @@ static int
 flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbuf_t connect_packet)
 {
        int                             error                   = 0;
+       int                             flow_type               = 0;
 
        error = flow_divert_packet_append_tlv(connect_packet,
                                              FLOW_DIVERT_TLV_TRAFFIC_CLASS,
@@ -882,6 +915,23 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbu
                goto done;
        }
 
+       if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
+               flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
+       } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
+               flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
+       } else {
+               error = EINVAL;
+               goto done;
+       }
+       error = flow_divert_packet_append_tlv(connect_packet,
+                                             FLOW_DIVERT_TLV_FLOW_TYPE,
+                                             sizeof(flow_type),
+                                             &flow_type);
+
+       if (error) {
+               goto done;
+       }
+
        if (fd_cb->so->so_flags & SOF_DELEGATED) {
                error = flow_divert_packet_append_tlv(connect_packet,
                                                      FLOW_DIVERT_TLV_PID,
@@ -923,33 +973,27 @@ flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, mbu
                fd_cb->connect_token = NULL;
        } else {
                uint32_t ctl_unit = htonl(fd_cb->control_group_unit);
-               int port;
 
                error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
                if (error) {
                        goto done;
                }
 
-               error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, to->sa_len, to);
-               if (error) {
-                       goto done;
-               }
-
-               if (to->sa_family == AF_INET) {
-                       port = ntohs((satosin(to))->sin_port);
-               }
-#if INET6
-               else {
-                       port = ntohs((satosin6(to))->sin6_port);
-               }
-#endif
-
-               error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
+               error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
                if (error) {
                        goto done;
                }
        }
 
+       if (fd_cb->local_address != NULL) {
+               /* socket is bound. */
+                error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR,
+                                                       sizeof(struct sockaddr_storage), fd_cb->local_address);
+                if (error) {
+                        goto done;
+                }
+        }
+
        error = flow_divert_send_packet(fd_cb, connect_packet, TRUE);
        if (error) {
                goto done;
@@ -972,7 +1016,7 @@ flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
                goto done;
        }
 
-       rbuff_space = sbspace(&fd_cb->so->so_rcv);
+       rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
        if (rbuff_space < 0) {
                rbuff_space = 0;
        }
@@ -992,7 +1036,7 @@ flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
 
 done:
        if (error && packet != NULL) {
-               mbuf_free(packet);
+               mbuf_freem(packet);
        }
 
        return error;
@@ -1092,12 +1136,12 @@ flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
        }
 
        if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
-               soisdisconnected(fd_cb->so);
+               flow_divert_disconnect_socket(fd_cb->so);
        }
 }
 
 static errno_t
-flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, Boolean force)
+flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean force)
 {
        mbuf_t  packet;
        mbuf_t  last;
@@ -1109,15 +1153,22 @@ flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t
                return error;
        }
 
+       if (toaddr != NULL) {
+               error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
+               if (error) {
+                       FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
+                       return error;
+               }
+       }
+
        last = m_last(packet);
        mbuf_setnext(last, data);
        mbuf_pkthdr_adjustlen(packet, data_len);
-
        error = flow_divert_send_packet(fd_cb, packet, force);
 
        if (error) {
                mbuf_setnext(last, NULL);
-               mbuf_free(packet);
+               mbuf_freem(packet);
        } else {
                fd_cb->bytes_sent += data_len;
                flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
@@ -1147,28 +1198,72 @@ flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
                to_send = fd_cb->send_window;
        }
 
-       while (sent < to_send) {
-               mbuf_t  data;
-               size_t  data_len;
+       if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
+               while (sent < to_send) {
+                       mbuf_t  data;
+                       size_t  data_len;
 
-               data_len = to_send - sent;
-               if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
-                       data_len = FLOW_DIVERT_CHUNK_SIZE;
-               }
+                       data_len = to_send - sent;
+                       if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
+                               data_len = FLOW_DIVERT_CHUNK_SIZE;
+                       }
 
-               error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
-               if (error) {
-                       FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
-                       break;
-               }
+                       error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
+                       if (error) {
+                               FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
+                               break;
+                       }
 
-               error = flow_divert_send_data_packet(fd_cb, data, data_len, force);
-               if (error) {
-                       mbuf_free(data);
-                       break;
-               }
+                       error = flow_divert_send_data_packet(fd_cb, data, data_len, NULL, force);
+                       if (error) {
+                               mbuf_freem(data);
+                               break;
+                       }
 
-               sent += data_len;
+                       sent += data_len;
+               }
+               sbdrop(&fd_cb->so->so_snd, sent);
+               sowwakeup(fd_cb->so);
+       } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
+               mbuf_t data;
+               mbuf_t m;
+               size_t data_len;
+
+               while(buffer) {
+                       struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
+
+                       m = buffer;
+                       if (toaddr != NULL) {
+                               /* look for data in the chain */
+                               do {
+                                       m = m->m_next;
+                                       if (m != NULL && m->m_type == MT_DATA) {
+                                               break;
+                                       }
+                               } while(m);
+                               if (m == NULL) {
+                                       /* unexpected */
+                                       FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
+                                       goto move_on;
+                               }
+                       }
+                       data_len = mbuf_pkthdr_len(m);
+                       FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %u", data_len);
+                       error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
+                       if (error) {
+                               FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
+                               break;
+                       }
+                       error = flow_divert_send_data_packet(fd_cb, data, data_len, toaddr, force);
+                       if (error) {
+                               mbuf_freem(data);
+                               break;
+                       }
+                       sent += data_len;
+move_on:
+                       buffer = buffer->m_nextpkt;
+                       (void) sbdroprecord(&(fd_cb->so->so_snd));
+               }
        }
 
        if (sent > 0) {
@@ -1178,19 +1273,14 @@ flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
                } else {
                        fd_cb->send_window = 0;
                }
-               sbdrop(&fd_cb->so->so_snd, sent);
-               sowwakeup(fd_cb->so);
        }
 }
 
 static int
-flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data)
+flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, struct sockaddr *toaddr)
 {
        size_t  to_send         = mbuf_pkthdr_len(data);
-       size_t  sent            = 0;
-       int             error           = 0;
-       mbuf_t  remaining_data  = data;
-       mbuf_t  pkt_data        = NULL;
+       int     error           = 0;
 
        if (to_send > fd_cb->send_window) {
                to_send = fd_cb->send_window;
@@ -1200,57 +1290,94 @@ flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data)
                to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
        }
 
-       while (sent < to_send) {
-               size_t  pkt_data_len;
+       if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
+               size_t  sent            = 0;
+               mbuf_t  remaining_data  = data;
+               mbuf_t  pkt_data        = NULL;
+               while (sent < to_send) {
+                       size_t  pkt_data_len;
+
+                       pkt_data = remaining_data;
 
-               pkt_data = remaining_data;
+                       if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
+                               pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
+                       } else {
+                               pkt_data_len = to_send - sent;
+                       }
+
+                       if (pkt_data_len < mbuf_pkthdr_len(pkt_data)) {
+                               error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
+                               if (error) {
+                                       FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
+                                       pkt_data = NULL;
+                                       break;
+                               }
+                       } else {
+                               remaining_data = NULL;
+                       }
+
+                       error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, NULL, FALSE);
 
-               if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
-                       pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
-                       error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
                        if (error) {
-                               FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
-                               pkt_data = NULL;
                                break;
                        }
-               } else {
-                       pkt_data_len = to_send - sent;
-                       remaining_data = NULL;
-               }
 
-               error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, FALSE);
-
-               if (error) {
-                       break;
+                       pkt_data = NULL;
+                       sent += pkt_data_len;
                }
 
-               pkt_data = NULL;
-               sent += pkt_data_len;
-       }
+               fd_cb->send_window -= sent;
 
-       fd_cb->send_window -= sent;
+               error = 0;
 
-       error = 0;
-
-       if (pkt_data != NULL) {
-               if (sbspace(&fd_cb->so->so_snd) > 0) {
-                       if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
-                               FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
-                                               fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
+               if (pkt_data != NULL) {
+                       if (sbspace(&fd_cb->so->so_snd) > 0) {
+                               if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
+                                       FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
+                                                       fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
+                               }
+                       } else {
+                               error = ENOBUFS;
                        }
-               } else {
-                       error = ENOBUFS;
                }
-       }
 
-       if (remaining_data != NULL) {
-               if (sbspace(&fd_cb->so->so_snd) > 0) {
-                       if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
-                               FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
-                                               fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
+               if (remaining_data != NULL) {
+                       if (sbspace(&fd_cb->so->so_snd) > 0) {
+                               if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
+                                       FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
+                                                       fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
+                               }
+                       } else {
+                               error = ENOBUFS;
+                       }
+               }
+       } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
+               if (to_send) {
+                       error = flow_divert_send_data_packet(fd_cb, data, to_send, toaddr, FALSE);
+                       if (error) {
+                               FDLOG(LOG_ERR, fd_cb, "flow_divert_send_data_packet failed. send data size = %u", to_send);
+                       } else {
+                               fd_cb->send_window -= to_send;
                        }
                } else {
-                       error = ENOBUFS;
+                       /* buffer it */
+                       if (sbspace(&fd_cb->so->so_snd) >= (int)mbuf_pkthdr_len(data)) {
+                               if (toaddr != NULL) {
+                                       if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &error)) {
+                                               FDLOG(LOG_ERR, fd_cb,
+                                                       "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d\n",
+                                                       fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error);
+                                       }
+                               } else {
+                                       if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
+                                               FDLOG(LOG_ERR, fd_cb,
+                                                       "sbappendrecord failed. send buffer size = %u, send_window = %u, error = %d\n",
+                                                       fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error);
+                                       }
+                               }
+                       } else {
+                               error = ENOBUFS;
+                       }
                }
        }
 
@@ -1408,14 +1535,15 @@ flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet,
                        goto set_socket_state;
                }
 
-               if (local_address.ss_family != 0) {
+               if (local_address.ss_family == 0 && fd_cb->local_address == NULL) {
+                       error = EINVAL;
+                       goto set_socket_state;
+               }
+               if (local_address.ss_family != 0 && fd_cb->local_address == NULL) {
                        if (local_address.ss_len > sizeof(local_address)) {
                                local_address.ss_len = sizeof(local_address);
                        }
                        fd_cb->local_address = dup_sockaddr((struct sockaddr *)&local_address, 1);
-               } else {
-                       error = EINVAL;
-                       goto set_socket_state;
                }
 
                if (remote_address.ss_family != 0) {
@@ -1482,7 +1610,7 @@ set_socket_state:
                                flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
                                fd_cb->so->so_error = connect_error;
                        }
-                       soisdisconnected(fd_cb->so);
+                       flow_divert_disconnect_socket(fd_cb->so);
                } else {
                        soisconnected(fd_cb->so);
                }
@@ -1528,7 +1656,7 @@ flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offse
                
                how = flow_divert_tunnel_how_closed(fd_cb);
                if (how == SHUT_RDWR) {
-                       soisdisconnected(fd_cb->so);
+                       flow_divert_disconnect_socket(fd_cb->so);
                } else if (how == SHUT_RD) {
                        socantrcvmore(fd_cb->so);
                } else if (how == SHUT_WR) {
@@ -1540,49 +1668,119 @@ flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offse
        FDUNLOCK(fd_cb);
 }
 
-static void
-flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
+static mbuf_t
+flow_divert_get_control_mbuf(struct flow_divert_pcb *fd_cb)
 {
-       int             error           = 0;
-       mbuf_t  data            = NULL;
-       size_t  data_size;
+       struct inpcb *inp = sotoinpcb(fd_cb->so);
+       if (inp->inp_vflag & INP_IPV4 && inp->inp_flags & INP_RECVDSTADDR) {
+               struct sockaddr_in *sin = (struct sockaddr_in *)(void *)fd_cb->local_address;
 
-       data_size = (mbuf_pkthdr_len(packet) - offset);
+               return sbcreatecontrol((caddr_t) &sin->sin_addr, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
+       } else if (inp->inp_vflag & INP_IPV6 && (inp->inp_flags & IN6P_PKTINFO) != 0) {
+               struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)fd_cb->local_address;
+               struct in6_pktinfo pi6;
 
-       FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
-
-       error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
-       if (error || data == NULL) {
-               FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
-               return;
+               bcopy(&sin6->sin6_addr, &pi6.ipi6_addr, sizeof (struct in6_addr));
+               pi6.ipi6_ifindex = 0;
+               return sbcreatecontrol((caddr_t)&pi6, sizeof (struct in6_pktinfo), IPV6_PKTINFO, IPPROTO_IPV6);
        }
+       return (NULL);
+}
 
+static void
+flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
+{
        FDLOCK(fd_cb);
        if (fd_cb->so != NULL) {
+               int             error           = 0;
+               mbuf_t  data            = NULL;
+               size_t  data_size;
+               struct sockaddr_storage remote_address;
+               boolean_t got_remote_sa = FALSE;
+
                socket_lock(fd_cb->so, 0);
-               if (flow_divert_check_no_cellular(fd_cb) || 
-                   flow_divert_check_no_expensive(fd_cb)) {
-                       flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
-                       flow_divert_send_close(fd_cb, SHUT_RDWR);
-                       soisdisconnected(fd_cb->so);
-               } else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) {
-                       if (sbappendstream(&fd_cb->so->so_rcv, data)) {
-                               fd_cb->bytes_received += data_size;
-                               flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
-                               fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
-                               sorwakeup(fd_cb->so);
-                               data = NULL;
+
+               if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
+                       uint32_t val_size = 0;
+
+                       /* check if we got remote address with data */
+                       memset(&remote_address, 0, sizeof(remote_address));
+                       error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
+                       if (error || val_size > sizeof(remote_address)) {
+                               FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
+                               error = 0;
                        } else {
-                               FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed");
+                               /* validate the address */
+                               if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
+                                       got_remote_sa = TRUE;
+                               }
+                               offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
+                       }
+               }
+
+               data_size = (mbuf_pkthdr_len(packet) - offset);
+
+               FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
+
+               error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
+               if (error || data == NULL) {
+                       FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
+               } else {
+                       if (flow_divert_check_no_cellular(fd_cb) || 
+                           flow_divert_check_no_expensive(fd_cb))
+                       {
+                               flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
+                               flow_divert_send_close(fd_cb, SHUT_RDWR);
+                               flow_divert_disconnect_socket(fd_cb->so);
+                       } else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) {
+                               if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
+                                       if (sbappendstream(&fd_cb->so->so_rcv, data)) {
+                                               fd_cb->bytes_received += data_size;
+                                               flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
+                                               fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
+                                               sorwakeup(fd_cb->so);
+                                               data = NULL;
+                                       } else {
+                                               FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed");
+                                       }
+                               } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
+                                       struct sockaddr *append_sa;
+                                       mbuf_t mctl;
+
+                                       if (got_remote_sa == TRUE) {
+                                               error = flow_divert_dup_addr(fd_cb->so->so_proto->pr_domain->dom_family,
+                                                               (struct sockaddr *)&remote_address, &append_sa);
+                                       } else {
+                                               error = flow_divert_dup_addr(fd_cb->so->so_proto->pr_domain->dom_family,
+                                                               fd_cb->remote_address, &append_sa);
+                                       }
+                                       if (error) {
+                                               FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
+                                       }
+
+                                       mctl = flow_divert_get_control_mbuf(fd_cb);
+                                       if (sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, NULL)) {
+                                               fd_cb->bytes_received += data_size;
+                                               flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
+                                               fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
+                                               sorwakeup(fd_cb->so);
+                                               data = NULL;
+                                       } else {
+                                               FDLOG0(LOG_ERR, fd_cb, "received data, but sbappendaddr failed");
+                                       }
+                                       if (!error) {
+                                               FREE(append_sa, M_TEMP);
+                                       }
+                               }
                        }
                }
                socket_unlock(fd_cb->so, 0);
-       }
-       FDUNLOCK(fd_cb);
 
-       if (data != NULL) {
-               mbuf_free(data);
+               if (data != NULL) {
+                       mbuf_freem(data);
+               }
        }
+       FDUNLOCK(fd_cb);
 }
 
 static void
@@ -1597,7 +1795,7 @@ flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packe
                return;
        }
 
-       FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", read_count);
+       FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
 
        FDLOCK(fd_cb);
        if (fd_cb->so != NULL) {
@@ -1613,7 +1811,7 @@ static void
 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
 {
        int error = 0;
-       size_t key_size = 0;
+       uint32_t key_size = 0;
        int log_level;
 
        error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
@@ -1747,7 +1945,7 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset)
             cursor >= 0;
             cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
        {
-               size_t sid_size = 0;
+               uint32_t sid_size = 0;
                flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
                new_trie.bytes_count += sid_size;
                signing_id_count++;
@@ -1795,7 +1993,7 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset)
             cursor >= 0;
             cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
        {
-               size_t sid_size = 0;
+               uint32_t sid_size = 0;
                flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
                if (new_trie.bytes_free_next + sid_size <= new_trie.bytes_count) {
                        boolean_t is_dns;
@@ -1848,7 +2046,7 @@ flow_divert_handle_app_map_update(struct flow_divert_group *group, mbuf_t packet
             cursor >= 0;
             cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
        {
-               size_t sid_size = 0;
+               uint32_t sid_size = 0;
                flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
                if (sid_size > max_size) {
                        max_size = sid_size;
@@ -1865,7 +2063,7 @@ flow_divert_handle_app_map_update(struct flow_divert_group *group, mbuf_t packet
             cursor >= 0;
             cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
        {
-               size_t signing_id_len = 0;
+               uint32_t signing_id_len = 0;
                uint16_t node;
 
                flow_divert_packet_get_tlv(packet,
@@ -1904,6 +2102,12 @@ flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
                goto done;
        }
 
+       if (mbuf_pkthdr_len(packet) > FD_CTL_RCVBUFF_SIZE) {
+               FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) > %lu", mbuf_pkthdr_len(packet), FD_CTL_RCVBUFF_SIZE);
+               error = EINVAL;
+               goto done;
+       }
+
        error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
        if (error) {
                FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
@@ -1963,7 +2167,7 @@ flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
        FDRELEASE(fd_cb);
 
 done:
-       mbuf_free(packet);
+       mbuf_freem(packet);
        return error;
 }
 
@@ -2018,6 +2222,8 @@ flow_divert_detach(struct socket *so)
                /* Last-ditch effort to send any buffered data */
                flow_divert_send_buffered_data(fd_cb, TRUE);
 
+               flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
+               flow_divert_send_close_if_needed(fd_cb);
                /* Remove from the group */
                flow_divert_pcb_remove(fd_cb);
        }
@@ -2040,8 +2246,10 @@ flow_divert_close(struct socket *so)
 
        FDLOG0(LOG_INFO, fd_cb, "Closing");
 
-       soisdisconnecting(so);
-       sbflush(&so->so_rcv);
+       if (SOCK_TYPE(so) == SOCK_STREAM) {
+               soisdisconnecting(so);
+               sbflush(&so->so_rcv);
+       }
 
        flow_divert_send_buffered_data(fd_cb, TRUE);
        flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
@@ -2054,9 +2262,10 @@ flow_divert_close(struct socket *so)
 }
 
 static int
-flow_divert_disconnectx(struct socket *so, associd_t aid, connid_t cid __unused)
+flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
+    sae_connid_t cid __unused)
 {
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) {
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
                return (EINVAL);
        }
 
@@ -2108,6 +2317,106 @@ flow_divert_rcvd(struct socket *so, int flags __unused)
        return 0;
 }
 
+static int
+flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr)
+{
+       int error = 0;
+       int port  = 0;
+
+       error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
+       if (error) {
+               goto done;
+       }
+
+       if (toaddr->sa_family == AF_INET) {
+               port = ntohs((satosin(toaddr))->sin_port);
+       }
+#if INET6
+       else {
+               port = ntohs((satosin6(toaddr))->sin6_port);
+       }
+#endif
+
+       error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
+       if (error) {
+               goto done;
+       }
+
+done:
+       return error;
+}
+
+struct sockaddr *
+flow_divert_get_buffered_target_address(mbuf_t buffer)
+{
+       if (buffer != NULL && buffer->m_type == MT_SONAME) {
+               struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
+               if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
+                       return toaddr;
+               }
+       }
+       return NULL;
+}
+
+static boolean_t
+flow_divert_is_sockaddr_valid(struct sockaddr *addr)
+{
+       switch(addr->sa_family)
+       {
+               case AF_INET:
+                       if (addr->sa_len != sizeof(struct sockaddr_in)) {
+                               return FALSE;
+                       }
+                       break;
+#if INET6
+               case AF_INET6:
+                       if (addr->sa_len != sizeof(struct sockaddr_in6)) {
+                               return FALSE;
+                       }
+                       break;
+#endif /* INET6 */
+               default:
+                       return FALSE;
+       }
+       return TRUE;
+}
+
+static errno_t
+flow_divert_inp_to_sockaddr(const struct inpcb *inp, struct sockaddr **local_socket)
+{
+       int error = 0;
+       union sockaddr_in_4_6 sin46;
+
+       bzero(&sin46, sizeof(sin46));
+       if (inp->inp_vflag & INP_IPV4) {
+               struct sockaddr_in  *sin = &sin46.sin;
+
+               sin->sin_family = AF_INET;
+               sin->sin_len = sizeof(*sin);
+               sin->sin_port = inp->inp_lport;
+               sin->sin_addr = inp->inp_laddr;
+       } else if (inp->inp_vflag & INP_IPV6) {
+               struct sockaddr_in6 *sin6 = &sin46.sin6;
+
+               sin6->sin6_len = sizeof(*sin6);
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_port = inp->inp_lport;
+               sin6->sin6_addr = inp->in6p_laddr;
+       }
+       *local_socket = dup_sockaddr((struct sockaddr *)&sin46, 1);
+       if (*local_socket == NULL) {
+               error = ENOBUFS;
+       }
+       return (error);
+}
+
+static boolean_t
+flow_divert_has_pcb_local_address(const struct inpcb *inp)
+{
+       return (inp->inp_lport != 0
+               && (inp->inp_laddr.s_addr != INADDR_ANY || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)));
+}
+
 static errno_t
 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
                      struct sockaddr **dup)
@@ -2145,6 +2454,25 @@ flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
        return error;
 }
 
+static void
+flow_divert_disconnect_socket(struct socket *so)
+{
+       soisdisconnected(so);
+       if (SOCK_TYPE(so) == SOCK_DGRAM) {
+               struct inpcb *inp = NULL;
+
+               inp = sotoinpcb(so);
+               if (inp != NULL) {
+#if INET6
+                       if (SOCK_CHECK_DOM(so, PF_INET6))
+                               in6_pcbdetach(inp);
+                       else
+#endif /* INET6 */
+                               in_pcbdetach(inp);
+               }
+       }
+}
+
 static errno_t
 flow_divert_getpeername(struct socket *so, struct sockaddr **sa)
 {
@@ -2244,6 +2572,20 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
                }
        }
 
+       if (fd_cb->local_address != NULL) {
+                error = EALREADY;
+                goto done;
+        } else {
+                if (flow_divert_has_pcb_local_address(inp)) {
+                        error = flow_divert_inp_to_sockaddr(inp, &fd_cb->local_address);
+                        if (error) {
+                                FDLOG0(LOG_ERR, fd_cb, "failed to get the local socket address.");
+                                goto done;
+                        }
+                }
+        }
+
+
        error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
        if (error) {
                goto done;
@@ -2252,7 +2594,7 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
        error = EPERM;
 
        if (fd_cb->connect_token != NULL) {
-               size_t sid_size = 0;
+               uint32_t sid_size = 0;
                int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
                if (find_error == 0 && sid_size > 0) {
                        MALLOC(signing_id, char *, sid_size + 1, M_TEMP, M_WAITOK | M_ZERO);
@@ -2274,7 +2616,9 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
                        if (src_proc != PROC_NULL) {
                                proc_lock(src_proc);
                                if (src_proc->p_csflags & CS_VALID) {
-                                       signing_id = (char *)cs_identity_get(src_proc);
+                    const char * cs_id;
+                    cs_id = cs_identity_get(src_proc);
+                    signing_id = __DECONST(char *, cs_id);
                                } else {
                                        FDLOG0(LOG_WARNING, fd_cb, "Signature is invalid");
                                }
@@ -2288,7 +2632,7 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
                if (signing_id != NULL) {
                        uint16_t result = NULL_TRIE_IDX;
                        lck_rw_lock_shared(&g_flow_divert_group_lck);
-                       result = flow_divert_trie_search(&g_signing_id_trie, (const uint8_t *)signing_id);
+                       result = flow_divert_trie_search(&g_signing_id_trie, (uint8_t *)signing_id);
                        lck_rw_done(&g_flow_divert_group_lck);
                        if (result != NULL_TRIE_IDX) {
                                error = 0;
@@ -2350,7 +2694,7 @@ flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
 
 done:
        if (error && connect_packet != NULL) {
-               mbuf_free(connect_packet);
+               mbuf_freem(connect_packet);
        }
        return error;
 }
@@ -2358,8 +2702,8 @@ done:
 static int
 flow_divert_connectx_out_common(struct socket *so, int af,
     struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl,
-    struct proc *p, uint32_t ifscope __unused, associd_t aid __unused,
-    connid_t *pcid, uint32_t flags __unused, void *arg __unused,
+    struct proc *p, uint32_t ifscope __unused, sae_associd_t aid __unused,
+    sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
     uint32_t arglen __unused)
 {
        struct sockaddr_entry *src_se = NULL, *dst_se = NULL;
@@ -2395,9 +2739,10 @@ flow_divert_connectx_out_common(struct socket *so, int af,
 static int
 flow_divert_connectx_out(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
+#pragma unused(uio, bytes_written)
        return (flow_divert_connectx_out_common(so, AF_INET, src_sl, dst_sl,
            p, ifscope, aid, pcid, flags, arg, arglen));
 }
@@ -2406,16 +2751,17 @@ flow_divert_connectx_out(struct socket *so, struct sockaddr_list **src_sl,
 static int
 flow_divert_connectx6_out(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
+#pragma unused(uio, bytes_written)
        return (flow_divert_connectx_out_common(so, AF_INET6, src_sl, dst_sl,
            p, ifscope, aid, pcid, flags, arg, arglen));
 }
 #endif /* INET6 */
 
 static int
-flow_divert_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
+flow_divert_getconninfo(struct socket *so, sae_connid_t cid, uint32_t *flags,
                         uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
                         user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
                         user_addr_t aux_data __unused, uint32_t *aux_len)
@@ -2432,7 +2778,7 @@ flow_divert_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
                goto out;
        }
 
-       if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) {
+       if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL && cid != 1) {
                error = EINVAL;
                goto out;
        }
@@ -2605,7 +2951,7 @@ flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr
        FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data));
 
        fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data);
-       error = flow_divert_send_app_data(fd_cb, data);
+       error = flow_divert_send_app_data(fd_cb, data, to);
        if (error) {
                goto done;
        }
@@ -2618,7 +2964,7 @@ flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr
 
 done:
        if (data) {
-               mbuf_free(data);
+               mbuf_freem(data);
        }
        if (control) {
                mbuf_free(control);
@@ -2640,6 +2986,20 @@ flow_divert_set_protosw(struct socket *so)
 #endif /* INET6 */
 }
 
+static void
+flow_divert_set_udp_protosw(struct socket *so)
+{
+        so->so_flags |= SOF_FLOW_DIVERT;
+        if (SOCK_DOM(so) == PF_INET) {
+                so->so_proto = &g_flow_divert_in_udp_protosw;
+        }
+#if INET6
+        else {
+                so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
+        }
+#endif  /* INET6 */
+}
+
 static errno_t
 flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit)
 {
@@ -2679,10 +3039,14 @@ flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit)
        VERIFY(inp != NULL);
 
        socket_lock(old_so, 0);
-       soisdisconnected(old_so);
+       flow_divert_disconnect_socket(old_so);
        old_so->so_flags &= ~SOF_FLOW_DIVERT;
        old_so->so_fd_pcb = NULL;
-       old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_TCP, SOCK_STREAM);
+       if (SOCK_TYPE(old_so) == SOCK_STREAM) {
+               old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_TCP, SOCK_STREAM);
+       } else if (SOCK_TYPE(old_so) == SOCK_DGRAM) {
+               old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_UDP, SOCK_DGRAM);
+       }
        fd_cb->so = NULL;
        /* Save the output interface */
        ifp = inp->inp_last_outifp;
@@ -2720,6 +3084,44 @@ done:
        return error;
 }
 
+errno_t
+flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
+{
+        struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
+       struct inpcb *inp;
+        int error = 0;
+
+       inp = sotoinpcb(so);
+       if (inp == NULL) {
+               return (EINVAL);
+       }
+
+        if (fd_cb == NULL) {
+                uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
+                if (fd_ctl_unit > 0) {
+                        error = flow_divert_pcb_init(so, fd_ctl_unit);
+                        fd_cb  = so->so_fd_pcb;
+                        if (error != 0 || fd_cb == NULL) {
+                                goto done;
+                        }
+                } else {
+                        error = ENETDOWN;
+                        goto done;
+                }
+        }
+        return flow_divert_data_out(so, flags, data, to, control, p);
+
+done:
+        if (data) {
+                mbuf_freem(data);
+        }
+        if (control) {
+                mbuf_free(control);
+        }
+
+        return error;
+}
+
 errno_t
 flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit)
 {
@@ -2737,11 +3139,14 @@ flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit)
                        FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
                        FDRELEASE(fd_cb);
                } else {
-                       fd_cb->log_level = LOG_NOTICE;
                        fd_cb->control_group_unit = ctl_unit;
                        so->so_fd_pcb = fd_cb;
 
-                       flow_divert_set_protosw(so);
+                       if (SOCK_TYPE(so) == SOCK_STREAM) {
+                               flow_divert_set_protosw(so);
+                       } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
+                               flow_divert_set_udp_protosw(so);
+                       }
 
                        FDLOG0(LOG_INFO, fd_cb, "Created");
                }
@@ -2772,8 +3177,8 @@ flow_divert_token_set(struct socket *so, struct sockopt *sopt)
                goto done;
        }
 
-       if (SOCK_TYPE(so) != SOCK_STREAM ||
-           SOCK_PROTO(so) != IPPROTO_TCP ||
+       if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
+           (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
            (SOCK_DOM(so) != PF_INET
 #if INET6
             && SOCK_DOM(so) != PF_INET6
@@ -2783,10 +3188,12 @@ flow_divert_token_set(struct socket *so, struct sockopt *sopt)
                error = EINVAL;
                goto done;
        } else {
-               struct tcpcb *tp = sototcpcb(so);
-               if (tp == NULL || tp->t_state != TCPS_CLOSED) {
-                       error = EINVAL;
-                       goto done;
+               if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
+                       struct tcpcb *tp = sototcpcb(so);
+                       if (tp == NULL || tp->t_state != TCPS_CLOSED) {
+                               error = EINVAL;
+                               goto done;
+                       }
                }
        }
 
@@ -2957,7 +3364,7 @@ done:
 static errno_t
 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
 {
-       struct flow_divert_group        *new_group;
+       struct flow_divert_group        *new_group      = NULL;
        int                             error           = 0;
 
        if (sac->sc_unit >= GROUP_COUNT_MAX) {
@@ -3193,6 +3600,39 @@ flow_divert_init(void)
        g_flow_divert_in_protosw.pr_filter_head.tqh_last =
            (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
 
+       /* UDP */
+       g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
+       VERIFY(g_udp_protosw != NULL);
+
+       memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
+       memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
+
+       g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
+       g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
+       g_flow_divert_in_udp_usrreqs.pru_control = flow_divert_in_control;
+       g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
+       g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
+       g_flow_divert_in_udp_usrreqs.pru_peeraddr = flow_divert_getpeername;
+       g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
+       g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
+       g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
+       g_flow_divert_in_udp_usrreqs.pru_sockaddr = flow_divert_getsockaddr;
+       g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
+       g_flow_divert_in_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
+
+       g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
+       g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
+
+       /*
+       * Socket filters shouldn't attach/detach to/from this protosw
+       * since pr_protosw is to be used instead, which points to the
+       * real protocol; if they do, it is a bug and we should panic.
+       */
+       g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
+           (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
+       g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
+           (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
+
 #if INET6
        g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
 
@@ -3223,6 +3663,39 @@ flow_divert_init(void)
            (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
        g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
            (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
+
+       /* UDP6 */
+       g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
+
+       VERIFY(g_udp6_protosw != NULL);
+
+       memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
+       memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
+
+       g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
+       g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
+       g_flow_divert_in6_udp_usrreqs.pru_control = flow_divert_in6_control;
+       g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
+       g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
+       g_flow_divert_in6_udp_usrreqs.pru_peeraddr = flow_divert_getpeername;
+       g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
+       g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
+       g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
+       g_flow_divert_in6_udp_usrreqs.pru_sockaddr = flow_divert_getsockaddr;
+       g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
+       g_flow_divert_in6_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
+
+       g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
+       g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
+       /*
+       * Socket filters shouldn't attach/detach to/from this protosw
+       * since pr_protosw is to be used instead, which points to the
+       * real protocol; if they do, it is a bug and we should panic.
+       */
+       g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
+           (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
+       g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
+           (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
 #endif /* INET6 */
 
        flow_divert_grp_attr = lck_grp_attr_alloc_init();
index 0d1f6255b6afd5a20946c6cb3da3c8d011b3c199..0968d9ad0e486208f529435c37274cc4454fd3e3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,5 +73,6 @@ errno_t               flow_divert_token_set(struct socket *so, struct sockopt *sopt);
 errno_t                flow_divert_token_get(struct socket *so, struct sockopt *sopt);
 errno_t                flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit);
 errno_t                flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p);
+errno_t                flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p);
 
 #endif /* __FLOW_DIVERT_H__ */
index d3bf02c4f1bba0dd3359459927994feb10303ded..a3b025eb7f555008229b468a38211069b4345793 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -31,6 +31,8 @@
 
 #define FLOW_DIVERT_CONTROL_NAME               "com.apple.flow-divert"
 
+#define FLOW_DIVERT_TLV_LENGTH_UINT32  1
+
 #define FLOW_DIVERT_PKT_CONNECT                        1
 #define FLOW_DIVERT_PKT_CONNECT_RESULT         2
 #define FLOW_DIVERT_PKT_DATA                   3
 #define FLOW_DIVERT_TLV_UUID                   27
 #define FLOW_DIVERT_TLV_PREFIX_COUNT           28
 #define FLOW_DIVERT_TLV_FLAGS                  29
+#define FLOW_DIVERT_TLV_FLOW_TYPE               30
+
+#define FLOW_DIVERT_FLOW_TYPE_TCP               1
+#define FLOW_DIVERT_FLOW_TYPE_UDP               3
 
 #define FLOW_DIVERT_CHUNK_SIZE                 4096
 
index da271fa32afe74be6f72a2db9e49b4b966d2e594..0dc3dda2f3b04912503c619c1d2d0ab8586f70ea 100644 (file)
@@ -714,6 +714,10 @@ struct     in6_multi;
 struct ip6protosw;
 void   icmp6_init(struct ip6protosw *, struct domain *);
 void   icmp6_paramerror(struct mbuf *, int);
+
+void   icmp6_error_flag(struct mbuf *, int, int, int, int);
+#define        ICMP6_ERROR_RST_MRCVIF  0x1
+
 void   icmp6_error(struct mbuf *, int, int, int);
 void   icmp6_error2(struct mbuf *, int, int, int, struct ifnet *);
 int    icmp6_input(struct mbuf **, int *, int);
index 572a083ec211a0a928b058fa094bf3111f7a054f..2b185927052173de79e5018e0952fc5d76b56cbf 100644 (file)
@@ -3902,7 +3902,7 @@ igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
        if (m->m_flags & M_IGMPV3_HDR) {
                igmpreclen -= hdrlen;
        } else {
-               M_PREPEND(m, hdrlen, M_DONTWAIT);
+               M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
                if (m == NULL)
                        return (NULL);
                m->m_flags |= M_IGMPV3_HDR;
index 83d2a024a05d192c23a22e0c2b21cfec95f76a07..f25e77c0534e9e21ee0d986fe8425ba3512876a1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -134,8 +134,8 @@ static void in_ifaddr_free(struct ifaddr *);
 static void in_ifaddr_trace(struct ifaddr *, int);
 
 static int in_getassocids(struct socket *, uint32_t *, user_addr_t);
-static int in_getconnids(struct socket *, associd_t, uint32_t *, user_addr_t);
-static int in_getconninfo(struct socket *, connid_t, uint32_t *,
+static int in_getconnids(struct socket *, sae_associd_t, uint32_t *, user_addr_t);
+static int in_getconninfo(struct socket *, sae_connid_t, uint32_t *,
     uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
     uint32_t *, user_addr_t, uint32_t *);
 
@@ -187,6 +187,10 @@ static struct zone *inifa_zone;                    /* zone for in_ifaddr */
 #define        INIFA_ZONE_MAX          64              /* maximum elements in zone */
 #define        INIFA_ZONE_NAME         "in_ifaddr"     /* zone name */
 
+static const unsigned int in_extra_size = sizeof (struct in_ifextra);
+static const unsigned int in_extra_bufsize = in_extra_size +
+    sizeof (void *) + sizeof (uint64_t);
+
 /*
  * Return 1 if the address is
  * - loopback
@@ -316,10 +320,47 @@ in_domifattach(struct ifnet *ifp)
 
        VERIFY(ifp != NULL);
 
-       if ((error = proto_plumb(PF_INET, ifp)) && error != EEXIST)
+       if ((error = proto_plumb(PF_INET, ifp)) && error != EEXIST) {
                log(LOG_ERR, "%s: proto_plumb returned %d if=%s\n",
                    __func__, error, if_name(ifp));
+       } else if (error == 0 && ifp->if_inetdata == NULL) {
+               void **pbuf, *base;
+               struct in_ifextra *ext;
+               int errorx;
+
+               if ((ext = (struct in_ifextra *)_MALLOC(in_extra_bufsize,
+                   M_IFADDR, M_WAITOK|M_ZERO)) == NULL) {
+                       error = ENOMEM;
+                       errorx = proto_unplumb(PF_INET, ifp);
+                       if (errorx != 0) {
+                               log(LOG_ERR,
+                                   "%s: proto_unplumb returned %d if=%s%d\n",
+                                   __func__, errorx, ifp->if_name,
+                                   ifp->if_unit);
+                       }
+                       goto done;
+               }
 
+               /* Align on 64-bit boundary */
+               base = (void *)P2ROUNDUP((intptr_t)ext + sizeof (uint64_t),
+                   sizeof (uint64_t));
+               VERIFY(((intptr_t)base + in_extra_size) <=
+                   ((intptr_t)ext + in_extra_bufsize));
+               pbuf = (void **)((intptr_t)base - sizeof (void *));
+               *pbuf = ext;
+               ifp->if_inetdata = base;
+               VERIFY(IS_P2ALIGNED(ifp->if_inetdata, sizeof (uint64_t)));
+       }
+done:
+       if (error == 0 && ifp->if_inetdata != NULL) {
+               /*
+                * Since the structure is never freed, we need to
+                * zero out its contents to avoid reusing stale data.
+                * A little redundant with allocation above, but it
+                * keeps the code simpler for all cases.
+                */
+               bzero(ifp->if_inetdata, in_extra_size);
+       }
        return (error);
 }
 
@@ -2137,13 +2178,13 @@ static int
 in_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp)
 {
        struct inpcb *inp = sotoinpcb(so);
-       associd_t aid;
+       sae_associd_t aid;
 
        if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD)
                return (EINVAL);
 
        /* INPCB has no concept of association */
-       aid = ASSOCID_ANY;
+       aid = SAE_ASSOCID_ANY;
        *cnt = 0;
 
        /* just asking how many there are? */
@@ -2157,16 +2198,16 @@ in_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp)
  * Handle SIOCGCONNIDS ioctl for PF_INET domain.
  */
 static int
-in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt,
+in_getconnids(struct socket *so, sae_associd_t aid, uint32_t *cnt,
     user_addr_t cidp)
 {
        struct inpcb *inp = sotoinpcb(so);
-       connid_t cid;
+       sae_connid_t cid;
 
        if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD)
                return (EINVAL);
 
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL)
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL)
                return (EINVAL);
 
        /* if connected, return 1 connection count */
@@ -2177,7 +2218,7 @@ in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt,
                return (0);
 
        /* if INPCB is connected, assign it connid 1 */
-       cid = ((*cnt != 0) ? 1 : CONNID_ANY);
+       cid = ((*cnt != 0) ? 1 : SAE_CONNID_ANY);
 
        return (copyout(&cid, cidp, sizeof (cid)));
 }
@@ -2186,7 +2227,7 @@ in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt,
  * Handle SIOCGCONNINFO ioctl for PF_INET domain.
  */
 static int
-in_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
+in_getconninfo(struct socket *so, sae_connid_t cid, uint32_t *flags,
     uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
     user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
     user_addr_t aux_data, uint32_t *aux_len)
@@ -2207,7 +2248,7 @@ in_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
                goto out;
        }
 
-       if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) {
+       if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL && cid != 1) {
                error = EINVAL;
                goto out;
        }
index 53c43651726006b25d0f6beaede186c61ff376ca..497a03cea1ef9800fc476751291bfe8e8537f083 100644 (file)
@@ -855,17 +855,18 @@ extern uint16_t b_sum16(const void *buf, int len);
 /* exported for ApplicationFirewall */
 extern int in_localaddr(struct in_addr);
 extern int inaddr_local(struct in_addr);
+
+extern char    *inet_ntoa(struct in_addr);
+extern char    *inet_ntoa_r(struct in_addr ina, char *buf,
+    size_t buflen);
+extern int     inet_pton(int af, const char *, void *);
 #endif /* KERNEL_PRIVATE */
 
 #define MAX_IPv4_STR_LEN       16
 #define MAX_IPv6_STR_LEN       64
 
 extern int      inet_aton(const char *, struct in_addr *); /* in libkern */
-extern char    *inet_ntoa(struct in_addr); /* in libkern */
-extern char    *inet_ntoa_r(struct in_addr ina, char *buf,
-    size_t buflen); /* in libkern */
 extern const char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern*/
-extern int     inet_pton(int af, const char *, void *); /* in libkern */
 #endif /* KERNEL */
 
 #ifndef KERNEL
index 418f2d26c7fb0c87690fb8920594bc4850b6674b..b889a8aaf8147b8843b74a9a053cb1083805acf4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -146,6 +146,7 @@ static void arp_llinfo_free(void *);
 static void arp_llinfo_purge(struct rtentry *);
 static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
 static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
+static void arp_llinfo_refresh(struct rtentry *);
 
 static __inline void arp_llreach_use(struct llinfo_arp *);
 static __inline int arp_llreach_reachable(struct llinfo_arp *);
@@ -355,6 +356,27 @@ arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
        }
 }
 
+static void
+arp_llinfo_refresh(struct rtentry *rt)
+{
+       uint64_t timenow = net_uptime();
+       /*
+        * If route entry is permanent or if expiry is less
+        * than timenow and extra time taken for unicast probe
+        * we can't expedite the refresh
+        */
+       if ((rt->rt_expire == 0) ||
+           (rt->rt_flags & RTF_STATIC) ||
+           !(rt->rt_flags & RTF_LLINFO)) {
+               return;
+       }
+
+       if (rt->rt_expire > timenow + arp_unicast_lim) {
+               rt->rt_expire = timenow + arp_unicast_lim;
+       }
+       return;
+}
+
 void
 arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
 {
@@ -753,6 +775,7 @@ arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
                rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri;
                rt->rt_llinfo_purge     = arp_llinfo_purge;
                rt->rt_llinfo_free      = arp_llinfo_free;
+               rt->rt_llinfo_refresh   = arp_llinfo_refresh;
                rt->rt_flags |= RTF_LLINFO;
                la->la_rt = rt;
                LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
@@ -1695,7 +1718,7 @@ match:
            route->rt_flags & RTF_ROUTER && 
            llinfo->la_flags & LLINFO_RTRFAIL_EVTSENT) {
                struct kev_msg ev_msg;
-               struct kev_in_arpfailure in_arpalive;
+               struct kev_in_arpalive in_arpalive;
 
                llinfo->la_flags &= ~LLINFO_RTRFAIL_EVTSENT;
                RT_UNLOCK(route);
index 4e6d453cf434cd64cc2353f53d6a80cfb19a3129..bc302ae30e799d4b08f3ddbc197691f1aeaef390 100644 (file)
@@ -117,7 +117,7 @@ in_cksumdata(const void *buf, int mlen)
 {
        uint32_t sum, partial;
        unsigned int final_acc;
-       uint8_t *data = (void *)buf;
+       const uint8_t *data = (const uint8_t *)buf;
        boolean_t needs_swap, started_on_odd;
 
        VERIFY(mlen >= 0);
@@ -142,22 +142,22 @@ in_cksumdata(const void *buf, int mlen)
        needs_swap = started_on_odd;
        while (mlen >= 32) {
                __builtin_prefetch(data + 32);
-               partial += *(uint16_t *)(void *)data;
-               partial += *(uint16_t *)(void *)(data + 2);
-               partial += *(uint16_t *)(void *)(data + 4);
-               partial += *(uint16_t *)(void *)(data + 6);
-               partial += *(uint16_t *)(void *)(data + 8);
-               partial += *(uint16_t *)(void *)(data + 10);
-               partial += *(uint16_t *)(void *)(data + 12);
-               partial += *(uint16_t *)(void *)(data + 14);
-               partial += *(uint16_t *)(void *)(data + 16);
-               partial += *(uint16_t *)(void *)(data + 18);
-               partial += *(uint16_t *)(void *)(data + 20);
-               partial += *(uint16_t *)(void *)(data + 22);
-               partial += *(uint16_t *)(void *)(data + 24);
-               partial += *(uint16_t *)(void *)(data + 26);
-               partial += *(uint16_t *)(void *)(data + 28);
-               partial += *(uint16_t *)(void *)(data + 30);
+               partial += *(const uint16_t *)(const void *)data;
+               partial += *(const uint16_t *)(const void *)(data + 2);
+               partial += *(const uint16_t *)(const void *)(data + 4);
+               partial += *(const uint16_t *)(const void *)(data + 6);
+               partial += *(const uint16_t *)(const void *)(data + 8);
+               partial += *(const uint16_t *)(const void *)(data + 10);
+               partial += *(const uint16_t *)(const void *)(data + 12);
+               partial += *(const uint16_t *)(const void *)(data + 14);
+               partial += *(const uint16_t *)(const void *)(data + 16);
+               partial += *(const uint16_t *)(const void *)(data + 18);
+               partial += *(const uint16_t *)(const void *)(data + 20);
+               partial += *(const uint16_t *)(const void *)(data + 22);
+               partial += *(const uint16_t *)(const void *)(data + 24);
+               partial += *(const uint16_t *)(const void *)(data + 26);
+               partial += *(const uint16_t *)(const void *)(data + 28);
+               partial += *(const uint16_t *)(const void *)(data + 30);
                data += 32;
                mlen -= 32;
                if (PREDICT_FALSE(partial & 0xc0000000)) {
@@ -170,14 +170,14 @@ in_cksumdata(const void *buf, int mlen)
                }
        }
        if (mlen & 16) {
-               partial += *(uint16_t *)(void *)data;
-               partial += *(uint16_t *)(void *)(data + 2);
-               partial += *(uint16_t *)(void *)(data + 4);
-               partial += *(uint16_t *)(void *)(data + 6);
-               partial += *(uint16_t *)(void *)(data + 8);
-               partial += *(uint16_t *)(void *)(data + 10);
-               partial += *(uint16_t *)(void *)(data + 12);
-               partial += *(uint16_t *)(void *)(data + 14);
+               partial += *(const uint16_t *)(const void *)data;
+               partial += *(const uint16_t *)(const void *)(data + 2);
+               partial += *(const uint16_t *)(const void *)(data + 4);
+               partial += *(const uint16_t *)(const void *)(data + 6);
+               partial += *(const uint16_t *)(const void *)(data + 8);
+               partial += *(const uint16_t *)(const void *)(data + 10);
+               partial += *(const uint16_t *)(const void *)(data + 12);
+               partial += *(const uint16_t *)(const void *)(data + 14);
                data += 16;
                mlen -= 16;
        }
@@ -186,19 +186,19 @@ in_cksumdata(const void *buf, int mlen)
         * are using bit masks, which are not affected.
         */
        if (mlen & 8) {
-               partial += *(uint16_t *)(void *)data;
-               partial += *(uint16_t *)(void *)(data + 2);
-               partial += *(uint16_t *)(void *)(data + 4);
-               partial += *(uint16_t *)(void *)(data + 6);
+               partial += *(const uint16_t *)(const void *)data;
+               partial += *(const uint16_t *)(const void *)(data + 2);
+               partial += *(const uint16_t *)(const void *)(data + 4);
+               partial += *(const uint16_t *)(const void *)(data + 6);
                data += 8;
        }
        if (mlen & 4) {
-               partial += *(uint16_t *)(void *)data;
-               partial += *(uint16_t *)(void *)(data + 2);
+               partial += *(const uint16_t *)(const void *)data;
+               partial += *(const uint16_t *)(const void *)(data + 2);
                data += 4;
        }
        if (mlen & 2) {
-               partial += *(uint16_t *)(void *)data;
+               partial += *(const uint16_t *)(const void *)data;
                data += 2;
        }
        if (mlen & 1) {
@@ -228,7 +228,7 @@ in_cksumdata(const void *buf, int mlen)
 {
        uint64_t sum, partial;
        unsigned int final_acc;
-       uint8_t *data = (void *)buf;
+       const uint8_t *data = (const uint8_t *)buf;
        boolean_t needs_swap, started_on_odd;
 
        VERIFY(mlen >= 0);
@@ -254,29 +254,29 @@ in_cksumdata(const void *buf, int mlen)
        if ((uintptr_t)data & 2) {
                if (mlen < 2)
                        goto trailing_bytes;
-               partial += *(uint16_t *)(void *)data;
+               partial += *(const uint16_t *)(const void *)data;
                data += 2;
                mlen -= 2;
        }
        while (mlen >= 64) {
                __builtin_prefetch(data + 32);
                __builtin_prefetch(data + 64);
-               partial += *(uint32_t *)(void *)data;
-               partial += *(uint32_t *)(void *)(data + 4);
-               partial += *(uint32_t *)(void *)(data + 8);
-               partial += *(uint32_t *)(void *)(data + 12);
-               partial += *(uint32_t *)(void *)(data + 16);
-               partial += *(uint32_t *)(void *)(data + 20);
-               partial += *(uint32_t *)(void *)(data + 24);
-               partial += *(uint32_t *)(void *)(data + 28);
-               partial += *(uint32_t *)(void *)(data + 32);
-               partial += *(uint32_t *)(void *)(data + 36);
-               partial += *(uint32_t *)(void *)(data + 40);
-               partial += *(uint32_t *)(void *)(data + 44);
-               partial += *(uint32_t *)(void *)(data + 48);
-               partial += *(uint32_t *)(void *)(data + 52);
-               partial += *(uint32_t *)(void *)(data + 56);
-               partial += *(uint32_t *)(void *)(data + 60);
+               partial += *(const uint32_t *)(const void *)data;
+               partial += *(const uint32_t *)(const void *)(data + 4);
+               partial += *(const uint32_t *)(const void *)(data + 8);
+               partial += *(const uint32_t *)(const void *)(data + 12);
+               partial += *(const uint32_t *)(const void *)(data + 16);
+               partial += *(const uint32_t *)(const void *)(data + 20);
+               partial += *(const uint32_t *)(const void *)(data + 24);
+               partial += *(const uint32_t *)(const void *)(data + 28);
+               partial += *(const uint32_t *)(const void *)(data + 32);
+               partial += *(const uint32_t *)(const void *)(data + 36);
+               partial += *(const uint32_t *)(const void *)(data + 40);
+               partial += *(const uint32_t *)(const void *)(data + 44);
+               partial += *(const uint32_t *)(const void *)(data + 48);
+               partial += *(const uint32_t *)(const void *)(data + 52);
+               partial += *(const uint32_t *)(const void *)(data + 56);
+               partial += *(const uint32_t *)(const void *)(data + 60);
                data += 64;
                mlen -= 64;
                if (PREDICT_FALSE(partial & (3ULL << 62))) {
@@ -293,34 +293,34 @@ in_cksumdata(const void *buf, int mlen)
         * are using bit masks, which are not affected.
         */
        if (mlen & 32) {
-               partial += *(uint32_t *)(void *)data;
-               partial += *(uint32_t *)(void *)(data + 4);
-               partial += *(uint32_t *)(void *)(data + 8);
-               partial += *(uint32_t *)(void *)(data + 12);
-               partial += *(uint32_t *)(void *)(data + 16);
-               partial += *(uint32_t *)(void *)(data + 20);
-               partial += *(uint32_t *)(void *)(data + 24);
-               partial += *(uint32_t *)(void *)(data + 28);
+               partial += *(const uint32_t *)(const void *)data;
+               partial += *(const uint32_t *)(const void *)(data + 4);
+               partial += *(const uint32_t *)(const void *)(data + 8);
+               partial += *(const uint32_t *)(const void *)(data + 12);
+               partial += *(const uint32_t *)(const void *)(data + 16);
+               partial += *(const uint32_t *)(const void *)(data + 20);
+               partial += *(const uint32_t *)(const void *)(data + 24);
+               partial += *(const uint32_t *)(const void *)(data + 28);
                data += 32;
        }
        if (mlen & 16) {
-               partial += *(uint32_t *)(void *)data;
-               partial += *(uint32_t *)(void *)(data + 4);
-               partial += *(uint32_t *)(void *)(data + 8);
-               partial += *(uint32_t *)(void *)(data + 12);
+               partial += *(const uint32_t *)(const void *)data;
+               partial += *(const uint32_t *)(const void *)(data + 4);
+               partial += *(const uint32_t *)(const void *)(data + 8);
+               partial += *(const uint32_t *)(const void *)(data + 12);
                data += 16;
        }
        if (mlen & 8) {
-               partial += *(uint32_t *)(void *)data;
-               partial += *(uint32_t *)(void *)(data + 4);
+               partial += *(const uint32_t *)(const void *)data;
+               partial += *(const uint32_t *)(const void *)(data + 4);
                data += 8;
        }
        if (mlen & 4) {
-               partial += *(uint32_t *)(void *)data;
+               partial += *(const uint32_t *)(const void *)data;
                data += 4;
        }
        if (mlen & 2) {
-               partial += *(uint16_t *)(void *)data;
+               partial += *(const uint16_t *)(const void *)data;
                data += 2;
        }
 trailing_bytes:
diff --git a/bsd/netinet/in_dhcp.c b/bsd/netinet/in_dhcp.c
deleted file mode 100644 (file)
index e7f8ab6..0000000
+++ /dev/null
@@ -1,935 +0,0 @@
-/*
- * Copyright (c) 1988-2013 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * in_dhcp.c
- * - use DHCP to allocate an IP address and get the subnet mask and router
- */
-
-/* 
- * Modification History
- *
- * April 17, 2007      Dieter Siegmund (dieter@apple.com)
- * - created based on in_bootp.c
- */
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <mach/boolean.h>
-#include <sys/kernel.h>
-#include <sys/errno.h>
-#include <sys/file.h>
-#include <sys/uio.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/mbuf.h>
-#include <sys/vnode.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/uio_internal.h>
-#include <net/if.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-#include <net/route.h>
-#include <net/dlil.h>
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/if_ether.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-#include <netinet/udp.h>
-#include <netinet/udp_var.h>
-#include <netinet/ip_icmp.h>
-#include <netinet/bootp.h>
-#include <netinet/dhcp.h>
-#include <netinet/in_dhcp.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <netinet/dhcp_options.h>
-
-#include <kern/kern_types.h>
-#include <kern/kalloc.h>
-
-#ifdef DHCP_DEBUG
-#define        dprintf(x) printf x;
-#else  /* !DHCP_DEBUG */
-#define        dprintf(x)
-#endif /* DHCP_DEBUG */
-
-#define INITIAL_WAIT_SECS              2
-#define MAX_WAIT_SECS                  64
-#define GATHER_TIME_SECS               4
-#define RAND_TICKS                     (hz)    /* one second */
-
-const struct sockaddr_in blank_sin = {
-    sizeof(struct sockaddr_in), 
-    AF_INET, 
-    0, 
-    { 0 },
-    { 0, 0, 0, 0, 0, 0, 0, 0 }
-};
-
-__private_extern__ int
-inet_aifaddr(struct socket * so, const char * name,
-            const struct in_addr * addr, 
-            const struct in_addr * mask,
-            const struct in_addr * broadcast)
-{
-    struct ifaliasreq  ifra;
-
-    bzero(&ifra, sizeof(ifra));
-    strlcpy(ifra.ifra_name, name, sizeof(ifra.ifra_name));
-    if (addr) {
-       *((struct sockaddr_in *)(void *)&ifra.ifra_addr) = blank_sin;
-       ((struct sockaddr_in *)(void *)&ifra.ifra_addr)->sin_addr = *addr;
-    }
-    if (mask) {
-       *((struct sockaddr_in *)(void *)&ifra.ifra_mask) = blank_sin;
-       ((struct sockaddr_in *)(void *)&ifra.ifra_mask)->sin_addr = *mask;
-    }
-    if (broadcast) {
-       *((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr) = blank_sin;
-       ((struct sockaddr_in *)(void *)&ifra.ifra_broadaddr)->sin_addr = *broadcast;
-    }
-    return (ifioctl(so, SIOCAIFADDR, (caddr_t)&ifra, current_proc()));
-}
-
-
-struct dhcp_context {
-    struct ifnet *             ifp;
-    struct sockaddr_dl *       dl_p;
-    struct ifreq               ifr;
-    struct socket *            so;
-    uint8_t                    request[DHCP_PACKET_MIN];
-    dhcpoa_t                   request_options;
-    uint8_t                    reply[DHCP_PAYLOAD_MIN];
-    struct timeval             start_time;
-    uint32_t                   xid;
-    int                                max_try;
-    struct in_addr             iaddr;
-    struct in_addr             netmask;
-    struct in_addr             router;
-    struct in_addr             server_id;
-};
-
-static __inline__ struct dhcp_packet *
-dhcp_context_request(struct dhcp_context * context)
-{
-    return ((struct dhcp_packet *)(void *)context->request);
-}
-
-static __inline__ struct dhcp *
-dhcp_context_reply(struct dhcp_context * context)
-{
-    return ((struct dhcp *)(void *)context->reply);
-}
-
-struct mbuf * ip_pkt_to_mbuf(caddr_t pkt, int pktsize);
-
-static int
-receive_packet(struct socket * so, void * pp, int psize,
-              int * actual_size);
-
-/* ip address formatting macros */
-#define IP_FORMAT      "%d.%d.%d.%d"
-#define IP_CH(ip)      ((const uint8_t *)ip)
-#define IP_LIST(ip)    IP_CH(ip)[0],IP_CH(ip)[1],IP_CH(ip)[2],IP_CH(ip)[3]
-
-#define SUGGESTED_LEASE_LENGTH         (60 * 60 * 24 * 30 * 3) /* 3 months */
-
-static const uint8_t dhcp_params[] = {
-    dhcptag_subnet_mask_e, 
-    dhcptag_router_e,
-};
-
-#define        N_DHCP_PARAMS   (sizeof(dhcp_params) / sizeof(dhcp_params[0]))
-
-static __inline__ long
-random_range(long bottom, long top)
-{
-    long number = top - bottom + 1;
-    long range_size = LONG_MAX / number;
-    return (((long)random()) / range_size + bottom);
-}
-
-static void
-init_dhcp_packet_header(struct dhcp_packet * pkt, int pkt_size)
-{
-    bzero(&pkt->ip, sizeof(pkt->ip));
-    bzero(&pkt->udp, sizeof(pkt->udp));
-    pkt->ip.ip_v = IPVERSION;
-    pkt->ip.ip_hl = sizeof(struct ip) >> 2;
-    pkt->ip.ip_ttl = MAXTTL;
-    pkt->ip.ip_p = IPPROTO_UDP;
-    pkt->ip.ip_src.s_addr = 0;
-    pkt->ip.ip_dst.s_addr = htonl(INADDR_BROADCAST);
-    pkt->ip.ip_len = htons(pkt_size);
-    pkt->ip.ip_sum = 0;
-    pkt->udp.uh_sport = htons(IPPORT_BOOTPC);
-    pkt->udp.uh_dport = htons(IPPORT_BOOTPS);
-    pkt->udp.uh_sum = 0;
-    pkt->udp.uh_ulen = htons(pkt_size - sizeof(pkt->ip));
-    return;
-}
-
-/*
- * Function: make_dhcp_request
- * Purpose:
- *   Initialize the DHCP-specific parts of the message.
- */
-static void
-make_dhcp_request(struct dhcp * request, int request_size,
-                 dhcp_msgtype_t msg, 
-                 const uint8_t * hwaddr, uint8_t hwtype, int hwlen,
-                 dhcpoa_t * options_p)
-{
-    uint8_t            cid[ETHER_ADDR_LEN + 1];
-    uint8_t            rfc_magic[RFC_MAGIC_SIZE] = RFC_OPTIONS_MAGIC;
-
-    if (hwlen >= (int)sizeof(cid)) {
-       printf("dhcp: hwlen is %d (> %d), truncating\n", hwlen,
-              (int)sizeof(cid));
-       hwlen = sizeof(cid) - 1;
-    }
-    bzero(request, request_size);
-    request->dp_op = BOOTREQUEST;
-    request->dp_htype = hwtype;
-    request->dp_hlen = hwlen;
-    bcopy(hwaddr, request->dp_chaddr, hwlen);
-    bcopy(rfc_magic, request->dp_options, RFC_MAGIC_SIZE);
-    dhcpoa_init(options_p, request->dp_options + RFC_MAGIC_SIZE,
-               request_size - sizeof(struct dhcp) - RFC_MAGIC_SIZE);
-    /* make the request a dhcp packet */
-    dhcpoa_add_dhcpmsg(options_p, msg);
-
-    /* add the list of required parameters */
-    dhcpoa_add(options_p, dhcptag_parameter_request_list_e,
-              N_DHCP_PARAMS, dhcp_params);
-
-    /* add the DHCP client identifier */
-    cid[0] = hwtype;
-    bcopy(hwaddr, cid + 1, hwlen);
-    dhcpoa_add(options_p, dhcptag_client_identifier_e, hwlen + 1, cid);
-
-    return;
-}
-
-/*
- * Function: ip_pkt_to_mbuf
- * Purpose:
- *   Put the given IP packet into an mbuf, calculate the
- *   IP checksum.
- */
-struct mbuf *
-ip_pkt_to_mbuf(caddr_t pkt, int pktsize)
-{
-    struct ip *                ip;
-    struct mbuf        *       m;
-    
-    m = (struct mbuf *)m_devget(pkt, pktsize, 0, NULL, NULL);
-    if (m == 0) {
-       printf("dhcp: ip_pkt_to_mbuf: m_devget failed\n");
-       return NULL;
-    }
-    m->m_flags |= M_BCAST;
-    /* Compute the checksum */
-    ip = mtod(m, struct ip *);
-    ip->ip_sum = 0;
-    ip->ip_sum = in_cksum(m, sizeof(struct ip));
-    return (m);
-}
-
-static __inline__ u_char *
-link_address(struct sockaddr_dl * dl_p)
-{
-    return (u_char *)(dl_p->sdl_data + dl_p->sdl_nlen);
-}
-
-static __inline__ int
-link_address_length(struct sockaddr_dl * dl_p)
-{
-    return (dl_p->sdl_alen);
-}
-
-static __inline__ void
-link_print(struct sockaddr_dl * dl_p)
-{
-    int i;
-
-    for (i = 0; i < dl_p->sdl_alen; i++) 
-       printf("%s%x", i ? ":" : "", 
-              (link_address(dl_p))[i]);
-    printf("\n");
-    return;
-}
-
-static struct sockaddr_dl *
-link_from_ifnet(struct ifnet * ifp)
-{
-    return ((struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr);
-}
-
-/*
- * Function: send_packet
- * Purpose:
- *     Send the request directly on the interface, bypassing the routing code.
- */
-static int
-send_packet(struct ifnet * ifp, struct dhcp_packet * pkt, int pkt_size)
-{
-    struct mbuf        *       m;
-    struct sockaddr_in dest;
-    
-    dest = blank_sin;
-    dest.sin_port = htons(IPPORT_BOOTPS);
-    dest.sin_addr.s_addr = INADDR_BROADCAST;
-    m = ip_pkt_to_mbuf((caddr_t)pkt, pkt_size);
-    return dlil_output(ifp, PF_INET, m, 0, (struct sockaddr *)&dest, 0, NULL);
-}
-
-/*
- * Function: receive_packet
- * Purpose:
- *   Return a received packet or an error if none available.
- */
-static int
-receive_packet(struct socket * so, void * pp, int psize, int * actual_size)
-{
-    uio_t      auio;
-    int                error;
-    int                rcvflg;
-    char       uio_buf[ UIO_SIZEOF(1) ];
-
-    auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, 
-                               &uio_buf[0], sizeof(uio_buf));
-    uio_addiov(auio, CAST_USER_ADDR_T(pp), psize);
-    rcvflg = MSG_WAITALL;
-    
-    error = soreceive(so, (struct sockaddr **) 0, auio, 0, 0, &rcvflg);
-    *actual_size = psize - uio_resid(auio);
-    return (error);
-}
-
-/*
- * Function: dhcp_timeout
- * Purpose:
- *   Wakeup the process waiting for something on a socket.
- */
-static void
-dhcp_timeout(void * arg)
-{
-    struct socket * * timer_arg = (struct socket * *)arg;
-    struct socket * so = *timer_arg;
-    
-    dprintf(("dhcp: timeout\n"));
-
-    *timer_arg = NULL;
-    socket_lock(so, 1);
-    sowakeup(so, &so->so_rcv);
-    socket_unlock(so, 1);
-    return;
-}
-
-/*
- * Function: rate_packet
- * Purpose:
- *   Return an integer point rating value for the given dhcp packet.
- *   If yiaddr non-zero, the packet gets a rating of 1.
- *   Another point is given if the packet contains the subnet mask,
- *   and another if the router is present.
- */
-#define GOOD_RATING    3
-static __inline__ int 
-rate_packet(dhcpol_t * options_p)
-{
-    int                len;
-    int        rating = 1;
-
-    if (dhcpol_find(options_p, dhcptag_subnet_mask_e, &len, NULL) != NULL) {
-       rating++;
-    }
-    if (dhcpol_find(options_p, dhcptag_router_e, &len, NULL) != NULL) {
-       rating++;
-    }
-    return (rating);
-}
-
-static dhcp_msgtype_t
-get_dhcp_msgtype(dhcpol_t * options_p)
-{
-    int                                len;
-    const uint8_t *            opt;
-
-    opt = dhcpol_find(options_p, dhcptag_dhcp_message_type_e, &len, NULL);
-    if (opt != NULL && len == 1) {
-       return (*opt);
-    }
-    return (dhcp_msgtype_none_e);
-}
-
-static int
-dhcp_get_ack(struct dhcp_context * context, int wait_ticks)
-{
-    int                                error = 0;
-    const struct in_addr *     ip;
-    int                                len;
-    int                                n;
-    struct dhcp *              reply;
-    struct in_addr             server_id;
-    struct socket *            timer_arg;
-
-    timer_arg = context->so;
-    reply = dhcp_context_reply(context);
-    timeout((timeout_fcn_t)dhcp_timeout, &timer_arg, wait_ticks);
-    while (1) {
-       error = receive_packet(context->so, context->reply,
-                              sizeof(context->reply), &n);
-       if (error == 0) {
-           dhcp_msgtype_t      msg;
-           dhcpol_t            options;
-
-           dprintf(("\ndhcp: received packet length %d\n", n));
-           if (n < (int)sizeof(struct dhcp)) {
-               dprintf(("dhcp: packet is too short %d < %d\n",
-                        n, (int)sizeof(struct dhcp)));
-               continue;
-           }
-           if (ntohl(reply->dp_xid) != context->xid
-               || bcmp(reply->dp_chaddr, link_address(context->dl_p), 
-                       link_address_length(context->dl_p)) != 0) {
-               /* not for us */
-               continue;
-           }
-           (void)dhcpol_parse_packet(&options, reply, n);
-           server_id.s_addr = 0;
-           ip = (const struct in_addr *)
-               dhcpol_find(&options, 
-                           dhcptag_server_identifier_e, &len, NULL);
-           if (ip != NULL && len >= (int)sizeof(*ip)) {
-               server_id = *ip;
-           }
-           msg = get_dhcp_msgtype(&options);
-           if (msg == dhcp_msgtype_nak_e
-               && server_id.s_addr == context->server_id.s_addr) {
-               /* server NAK'd us, start over */
-               dhcpol_free(&options);
-               error = EPROTO;
-               untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg);
-               break;
-           }
-           if (msg != dhcp_msgtype_ack_e
-               || reply->dp_yiaddr.s_addr == 0
-               || reply->dp_yiaddr.s_addr == INADDR_BROADCAST) {
-               /* ignore the packet */
-               goto next_packet;
-           }
-           printf("dhcp: received ACK: server " IP_FORMAT
-                  " IP address "  IP_FORMAT "\n",
-                  IP_LIST(&server_id), IP_LIST(&reply->dp_yiaddr));
-           context->iaddr = reply->dp_yiaddr;
-           ip = (const struct in_addr *)
-               dhcpol_find(&options, 
-                           dhcptag_subnet_mask_e, &len, NULL);
-           if (ip != NULL && len >= (int)sizeof(*ip)) {
-               context->netmask = *ip;
-           }
-           ip = (const struct in_addr *)
-               dhcpol_find(&options, dhcptag_router_e, &len, NULL);
-           if (ip != NULL && len >= (int)sizeof(*ip)) {
-               context->router = *ip;
-           }
-           dhcpol_free(&options);
-           untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg);
-           break;
-
-       next_packet:
-           dhcpol_free(&options);
-       }
-       else if ((error != EWOULDBLOCK)) {
-           /* if some other error occurred, we're done */
-           untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg);
-           break;
-       }
-       else if (timer_arg == NULL) { 
-           /* timed out */
-           break;
-       }
-       else {
-           /* wait for a wait to arrive, or a timeout to occur */
-           socket_lock(context->so, 1);
-           error = sbwait(&context->so->so_rcv);
-           socket_unlock(context->so, 1);
-       }
-    }
-    return (error);
-}
-
-static int
-dhcp_select(struct dhcp_context * context)
-{
-    struct timeval             current_time;
-    int                                error = 0;
-    dhcpoa_t *                 options_p;
-    struct dhcp_packet *       request;
-    int                                request_size;
-    int                                retry;
-    int                                wait_ticks;
-
-    /* format a DHCP Request packet */
-    request = dhcp_context_request(context);
-    options_p = &context->request_options;
-
-    make_dhcp_request(&request->dhcp, DHCP_PAYLOAD_MIN,
-                     dhcp_msgtype_request_e,
-                     link_address(context->dl_p), ARPHRD_ETHER,
-                     link_address_length(context->dl_p),
-                     options_p);
-    /* insert server identifier and requested ip address */
-    dhcpoa_add(options_p, dhcptag_requested_ip_address_e,
-              sizeof(context->iaddr), &context->iaddr);
-    dhcpoa_add(options_p, dhcptag_server_identifier_e,
-              sizeof(context->server_id), &context->server_id);
-    dhcpoa_add(options_p, dhcptag_end_e, 0, 0);
-    request_size = sizeof(*request) + RFC_MAGIC_SIZE 
-       + dhcpoa_used(options_p);
-    if (request_size < (int)sizeof(struct bootp_packet)) {
-       /* pad out to BOOTP-sized packet */
-       request_size = sizeof(struct bootp_packet);
-    }
-    init_dhcp_packet_header(request, request_size);
-
-    wait_ticks = INITIAL_WAIT_SECS * hz;
-#define SELECT_RETRY_COUNT     3
-    for (retry = 0; retry < SELECT_RETRY_COUNT; retry++) {
-       /* Send the request */
-       printf("dhcp: sending REQUEST: server " IP_FORMAT 
-              " IP address " IP_FORMAT "\n",
-              IP_LIST(&context->server_id),
-              IP_LIST(&context->iaddr));
-       microtime(&current_time);
-       request->dhcp.dp_secs 
-           = htons((u_short)
-                   (current_time.tv_sec - context->start_time.tv_sec));
-       request->dhcp.dp_xid = htonl(context->xid);
-       request->ip.ip_id = ip_randomid();
-       error = send_packet(context->ifp, request, request_size);
-       if (error != 0) {
-           printf("dhcp: send_packet failed with %d\n", error);
-           goto failed;
-       }
-
-       wait_ticks += random_range(-RAND_TICKS, RAND_TICKS);
-       dprintf(("dhcp: waiting %d ticks\n", wait_ticks));
-       error = dhcp_get_ack(context, wait_ticks);
-       switch (error) {
-       case 0:
-           /* we're done */
-           goto done;
-       case EPROTO:
-           printf("dhcp: server " IP_FORMAT " send us a NAK\n",
-                  IP_LIST(&context->server_id));
-           goto failed;
-       case EWOULDBLOCK:
-           break;
-       default:
-           dprintf(("dhcp: failed to receive packets: %d\n", error));
-           goto failed;
-       }
-       wait_ticks *= 2;
-       if (wait_ticks > (MAX_WAIT_SECS * hz))
-           wait_ticks = MAX_WAIT_SECS * hz;
-       microtime(&current_time);
-    }
-    error = ETIMEDOUT;
-    goto failed;
-    
- done:
-    error = 0;
-
- failed:
-    return (error);
-}
-
-static int
-dhcp_get_offer(struct dhcp_context * context, int wait_ticks)
-{
-    int                                error = 0;
-    int                                gather_count = 0;
-    const struct in_addr *     ip;
-    int                                last_rating = 0;
-    int                                len;
-    int                                n;
-    int                        rating;
-    struct dhcp *              reply;
-    struct in_addr             server_id;
-    struct socket *            timer_arg;
-
-    timer_arg = context->so;
-    reply = dhcp_context_reply(context);
-    timeout((timeout_fcn_t)dhcp_timeout, &timer_arg, wait_ticks);
-    while (1) {
-       error = receive_packet(context->so, context->reply,
-                              sizeof(context->reply), &n);
-       if (error == 0) {
-           dhcpol_t            options;
-
-           dprintf(("\ndhcp: received packet length %d\n", n));
-           if (n < (int)sizeof(struct dhcp)) {
-               dprintf(("dhcp: packet is too short %d < %d\n",
-                        n, (int)sizeof(struct dhcp)));
-               continue;
-           }
-           if (ntohl(reply->dp_xid) != context->xid
-               || reply->dp_yiaddr.s_addr == 0
-               || reply->dp_yiaddr.s_addr == INADDR_BROADCAST
-               || bcmp(reply->dp_chaddr,
-                       link_address(context->dl_p), 
-                       link_address_length(context->dl_p)) != 0) {
-               /* not for us */
-               continue;
-           }
-           (void)dhcpol_parse_packet(&options, reply, n);
-           if (get_dhcp_msgtype(&options) != dhcp_msgtype_offer_e) {
-               /* not an offer */
-               goto next_packet;
-           }
-           ip = (const struct in_addr *)
-               dhcpol_find(&options, 
-                           dhcptag_server_identifier_e, &len, NULL);
-           if (ip == NULL || len < (int)sizeof(*ip)) {
-               /* missing/invalid server identifier */
-               goto next_packet;
-           }
-           printf("dhcp: received OFFER: server " IP_FORMAT
-                  " IP address "  IP_FORMAT "\n",
-                  IP_LIST(ip), IP_LIST(&reply->dp_yiaddr));
-           server_id = *ip;
-           rating = rate_packet(&options);
-           if (rating > last_rating) {
-               context->iaddr = reply->dp_yiaddr;
-               ip = (const struct in_addr *)
-                   dhcpol_find(&options, 
-                               dhcptag_subnet_mask_e, &len, NULL);
-               if (ip != NULL && len >= (int)sizeof(*ip)) {
-                   context->netmask = *ip;
-               }
-               ip = (const struct in_addr *)
-                   dhcpol_find(&options, dhcptag_router_e, &len, NULL);
-               if (ip != NULL && len >= (int)sizeof(*ip)) {
-                   context->router = *ip;
-               }
-               context->server_id = server_id;
-           }
-           if (rating >= GOOD_RATING) {
-               dhcpol_free(&options);
-               /* packet is good enough */
-               untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg);
-               break;
-           }
-           if (gather_count == 0) {
-               untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg);
-               timer_arg = context->so;
-               timeout((timeout_fcn_t)dhcp_timeout, &timer_arg, 
-                       hz * GATHER_TIME_SECS);
-           }
-           gather_count = 1;
-       next_packet:
-           dhcpol_free(&options);
-       }
-       else if ((error != EWOULDBLOCK)) {
-           untimeout((timeout_fcn_t)dhcp_timeout, &timer_arg);
-           break;
-       }
-       else if (timer_arg == NULL) { /* timed out */
-           if (gather_count != 0) {
-               dprintf(("dhcp: gathering time has expired\n"));
-               error = 0;
-           }
-           break;
-       }
-       else {
-           socket_lock(context->so, 1);
-           error = sbwait(&context->so->so_rcv);
-           socket_unlock(context->so, 1);
-       }
-    }
-    return (error);
-}
-
-/*
- * Function: dhcp_init
- * Purpose:
- *   Start in the DHCP INIT state sending DISCOVER's.  When we get OFFER's,
- *   try to select one of them by sending a REQUEST and waiting for an ACK.
- */
-static int
-dhcp_init(struct dhcp_context * context)
-{
-    struct timeval             current_time;
-    int                                error = 0;
-    uint32_t                   lease_option = htonl(SUGGESTED_LEASE_LENGTH);
-    dhcpoa_t *                 options_p;
-    struct dhcp_packet *       request;
-    int                                request_size;
-    int                                retry;
-    int                                wait_ticks;
-
-    /* remember the time we started */
-    microtime(&context->start_time);
-    current_time = context->start_time;
-    
-    request = dhcp_context_request(context);
-    options_p = &context->request_options;
-
- retry:
-    /* format a DHCP DISCOVER packet */
-    make_dhcp_request(&request->dhcp, DHCP_PAYLOAD_MIN,
-                     dhcp_msgtype_discover_e,
-                     link_address(context->dl_p), ARPHRD_ETHER,
-                     link_address_length(context->dl_p), 
-                     options_p);
-    /* add the requested lease time */
-    dhcpoa_add(options_p, dhcptag_lease_time_e,
-              sizeof(lease_option), &lease_option);
-    dhcpoa_add(options_p, dhcptag_end_e, 0, 0);
-    request_size = sizeof(*request) + RFC_MAGIC_SIZE 
-       + dhcpoa_used(options_p);
-    if (request_size < (int)sizeof(struct bootp_packet)) {
-       /* pad out to BOOTP-sized packet */
-       request_size = sizeof(struct bootp_packet);
-    }
-    init_dhcp_packet_header(request, request_size);
-
-    wait_ticks = INITIAL_WAIT_SECS * hz;
-    for (retry = 0; retry < context->max_try; retry++) {
-       /* Send the request */
-       printf("dhcp: sending DISCOVER\n");
-       request->dhcp.dp_secs 
-           = htons((u_short)(current_time.tv_sec 
-                             - context->start_time.tv_sec));
-       request->dhcp.dp_xid = htonl(context->xid);
-       request->ip.ip_id = ip_randomid();
-       error = send_packet(context->ifp, request, request_size);
-       if (error != 0) {
-           printf("dhcp: send_packet failed with %d\n", error);
-           goto failed;
-       }
-       wait_ticks += random_range(-RAND_TICKS, RAND_TICKS);
-       dprintf(("dhcp: waiting %d ticks\n", wait_ticks));
-       error = dhcp_get_offer(context, wait_ticks);
-       if (error == 0) {
-           /* send a REQUEST */
-           error = dhcp_select(context);
-           if (error == 0) {
-               /* we're done !*/
-               goto done;
-           }
-           if (error != EPROTO && error != ETIMEDOUT) {
-               /* fatal error */ 
-               dprintf(("dhcp: dhcp_select failed %d\n", error));
-               goto failed;
-           }
-           /* wait 10 seconds, and try again */
-           printf("dhcp: trying again in 10 seconds\n");
-           tsleep(&error, PRIBIO, "dhcp_init", 10 * hz);
-           context->xid++;
-           goto retry;
-       }
-       else if (error != EWOULDBLOCK) {
-           dprintf(("dhcp: failed to receive packets: %d\n", error));
-           goto failed;
-       }
-       wait_ticks *= 2;
-       if (wait_ticks > (MAX_WAIT_SECS * hz))
-           wait_ticks = MAX_WAIT_SECS * hz;
-       microtime(&current_time);
-    }
-    error = ETIMEDOUT;
-    goto failed;
-    
- done:
-    error = 0;
-
- failed:
-    return (error);
-}
-
-static void
-dhcp_context_free(struct dhcp_context * context, struct proc * procp)
-{
-    if (context == NULL) {
-       return;
-    }
-    if (context->so != NULL) {
-       int             error;
-
-       /* disable reception of DHCP packets before address assignment */
-       context->ifr.ifr_intval = 0;
-       error = ifioctl(context->so, SIOCAUTOADDR,
-                       (caddr_t)&context->ifr, procp);
-       if (error) {
-           printf("dhcp: SIOCAUTOADDR failed: %d\n", error);
-       }
-       soclose(context->so);
-    }
-    kfree(context, sizeof(*context));
-    return;
-}
-
-static struct dhcp_context *
-dhcp_context_create(struct ifnet * ifp, int max_try,
-                   struct proc * procp, int * error_p)
-{
-    struct dhcp_context        *       context = NULL;
-    struct sockaddr_dl *       dl_p;
-    struct in_addr             lo_addr;
-    struct in_addr             lo_mask;
-    int                                error;
-    struct sockaddr_in         sin;
-
-    /* get the hardware address from the interface */
-    dl_p = link_from_ifnet(ifp);
-    if (dl_p == NULL) {
-       printf("dhcp: can't get link address\n");
-       error = ENXIO;
-       goto failed;
-    }
-
-    printf("dhcp: h/w addr ");
-    link_print(dl_p);
-    if (dl_p->sdl_type != IFT_ETHER) {
-       printf("dhcp: hardware type %d not supported\n",
-              dl_p->sdl_type);
-       error = ENXIO;
-       goto failed;
-    }
-
-    context = (struct dhcp_context *)kalloc(sizeof(*context));
-    if (context == NULL) {
-       printf("dhcp: failed to allocate context\n");
-       error = ENOMEM;
-       goto failed;
-    }
-    bzero(context, sizeof(*context));
-
-    /* get a socket */
-    error = socreate(AF_INET, &context->so, SOCK_DGRAM, 0);
-    if (error != 0) {
-       printf("dhcp: socreate failed %d\n", error);
-       goto failed;
-    }
-
-    /* assign 127.0.0.1 to lo0 so that the bind will succeed */
-    lo_addr.s_addr = htonl(INADDR_LOOPBACK);
-    lo_mask.s_addr = htonl(IN_CLASSA_NET);
-    error = inet_aifaddr(context->so, "lo0", &lo_addr, &lo_mask, NULL);
-    if (error != 0) {
-       printf("dhcp: assigning loopback address failed %d\n", error);
-    }
-
-    /* enable reception of DHCP packets before an address is assigned */
-    snprintf(context->ifr.ifr_name, 
-            sizeof(context->ifr.ifr_name), "%s", if_name(ifp));
-    context->ifr.ifr_intval = 1;
-
-    error = ifioctl(context->so, SIOCAUTOADDR, (caddr_t)&context->ifr, procp);
-    if (error) {
-       printf("dhcp: SIOCAUTOADDR failed: %d\n", error);
-       goto failed;
-    }
-    dprintf(("dhcp: SIOCAUTOADDR done\n"));
-
-    error = ifioctl(context->so, SIOCPROTOATTACH, (caddr_t)&context->ifr, 
-                   procp);
-    if (error) {
-       printf("dhcp: SIOCPROTOATTACH failed: %d\n", error);
-       goto failed;
-    }
-    dprintf(("dhcp: SIOCPROTOATTACH done\n"));
-    
-    /* bind the socket */
-    sin.sin_len = sizeof(sin);
-    sin.sin_family = AF_INET;
-    sin.sin_port = htons(IPPORT_BOOTPC);
-    sin.sin_addr.s_addr = INADDR_ANY;
-    error = sobindlock(context->so, (struct sockaddr *)&sin, 1);
-    if (error) {
-       printf("dhcp: sobind failed, %d\n", error);
-       goto failed;
-    }
-
-    /* make it non-blocking I/O */
-    socket_lock(context->so, 1);
-    context->so->so_state |= SS_NBIO;
-    socket_unlock(context->so, 1);
-
-    /* save passed-in information */
-    context->max_try = max_try;
-    context->dl_p = dl_p;
-    context->ifp = ifp;
-
-    /* get a random transaction id */
-    context->xid = random();
-
-    return (context);
-
- failed:
-    dhcp_context_free(context, procp);
-    *error_p = error;
-    return (NULL);
-}
-
-/*
- * Routine: dhcp
- * Function:
- *   Do DHCP over the specified interface to retrieve the IP address,
- *   subnet mask, and router.  
- */
-int 
-dhcp(struct ifnet * ifp, struct in_addr * iaddr_p, int max_try,
-     struct in_addr * netmask_p, struct in_addr * router_p,
-     struct proc * procp)
-{
-    int                                error = 0;
-    struct dhcp_context        *       context;
-
-    context = dhcp_context_create(ifp, max_try, procp, &error);
-    if (context == NULL) {
-       return (error);
-    }
-    /* start DHCP in the INIT state */
-    error = dhcp_init(context);
-    if (error == 0) {
-       *iaddr_p = context->iaddr;
-       *netmask_p = context->netmask;
-       *router_p = context->router;
-    }
-    dhcp_context_free(context, procp);
-    return (error);
-}
diff --git a/bsd/netinet/in_dhcp.h b/bsd/netinet/in_dhcp.h
deleted file mode 100644 (file)
index 3a898af..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _NETINET_IN_DHCP_H
-#define _NETINET_IN_DHCP_H
-#include <sys/appleapiopts.h>
-
-/*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * in_dhcp.h
- * - definitions for in_dhcp.c
- */
-
-extern int
-inet_aifaddr(struct socket * so, const char * name,
-            const struct in_addr * addr, 
-            const struct in_addr * mask,
-            const struct in_addr * broadcast);
-
-extern int
-dhcp(struct ifnet * ifp, struct in_addr * iaddr_p, int max_try,
-     struct in_addr * netmask_p, struct in_addr * router_p,
-     struct proc * procp);
-
-#endif /* _NETINET_IN_DHCP_H */
index 66bf01c30ee5c32fd0ff4cc412fba6a6b100f6c7..df11307022f9300888e003242016e4a81237d14d 100644 (file)
@@ -177,12 +177,12 @@ in_gif_output(
        iphdr.ip_ttl = ip_gif_ttl;
        iphdr.ip_len = m->m_pkthdr.len + sizeof (struct ip);
        if (ifp->if_flags & IFF_LINK1)
-               ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos);
+               ip_ecn_ingress(ECN_NORMAL, &iphdr.ip_tos, &tos);
        else
                ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos);
 
        /* prepend new IP header */
-       M_PREPEND(m, sizeof (struct ip), M_DONTWAIT);
+       M_PREPEND(m, sizeof (struct ip), M_DONTWAIT, 0);
        if (m && mbuf_len(m) < sizeof (struct ip))
                m = m_pullup(m, sizeof (struct ip));
        if (m == NULL) {
@@ -240,6 +240,7 @@ in_gif_input(m, off)
        struct ip *ip;
        int af, proto;
        u_int8_t otos;
+       int egress_success = 0;
 
        ip = mtod(m, struct ip *);
        proto = ip->ip_p;
@@ -268,9 +269,9 @@ in_gif_input(m, off)
                }
                ip = mtod(m, struct ip *);
                if (gifp->if_flags & IFF_LINK1)
-                       ip_ecn_egress(ECN_ALLOWED, &otos, &ip->ip_tos);
+                       egress_success = ip_ecn_egress(ECN_NORMAL, &otos, &ip->ip_tos);
                else
-                       ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos);
+                       egress_success = ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos);
                break;
            }
 #endif
@@ -288,9 +289,9 @@ in_gif_input(m, off)
                ip6 = mtod(m, struct ip6_hdr *);
                itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
                if (gifp->if_flags & IFF_LINK1)
-                       ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
+                       egress_success = ip_ecn_egress(ECN_NORMAL, &otos, &itos);
                else
-                       ip_ecn_egress(ECN_NOCARE, &otos, &itos);
+                       egress_success = ip_ecn_egress(ECN_NOCARE, &otos, &itos);
                ip6->ip6_flow &= ~htonl(0xff << 20);
                ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
                break;
@@ -301,6 +302,13 @@ in_gif_input(m, off)
                m_freem(m);
                return;
        }
+
+       if (egress_success == 0) {
+               OSAddAtomic(1, &ipstat.ips_nogif);
+               m_freem(m);
+               return;
+       }
+
 #ifdef __APPLE__
        /* Replace the rcvif by gifp for dlil to route it correctly */
        if (m->m_pkthdr.rcvif)
index 893665adab69e0bd8e931cc7eeccdfe27ce21363..320c7394ad88349248bc8b50f5f552b8aebf97ed 100644 (file)
@@ -1737,7 +1737,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                if (error)
                        return (error);
                /* we never use msfr.msfr_srcs; */
-               memcpy(&msfr, &msfr64, sizeof(msfr));
+               memcpy(&msfr, &msfr64, sizeof(msfr64));
        } else {
                error = sooptcopyin(sopt, &msfr32,
                    sizeof(struct __msfilterreq32),
@@ -1745,7 +1745,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                if (error)
                        return (error);
                /* we never use msfr.msfr_srcs; */
-               memcpy(&msfr, &msfr32, sizeof(msfr));
+               memcpy(&msfr, &msfr32, sizeof(msfr32));
        }
 
        ifnet_head_lock_shared();
@@ -1809,7 +1809,6 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                        IMO_UNLOCK(imo);
                        return (ENOBUFS);
                }
-               bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss));
        }
 
        /*
@@ -1858,7 +1857,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                msfr32.msfr_ifindex = msfr.msfr_ifindex;
                msfr32.msfr_fmode   = msfr.msfr_fmode;
                msfr32.msfr_nsrcs   = msfr.msfr_nsrcs;
-               memcpy(&msfr64.msfr_group, &msfr.msfr_group,
+               memcpy(&msfr32.msfr_group, &msfr.msfr_group,
                    sizeof(struct sockaddr_storage));
                error = sooptcopyout(sopt, &msfr32,
                    sizeof(struct __msfilterreq32));
@@ -2723,7 +2722,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
                if (error)
                        return (error);
                /* we never use msfr.msfr_srcs; */
-               memcpy(&msfr, &msfr64, sizeof(msfr));
+               memcpy(&msfr, &msfr64, sizeof(msfr64));
        } else {
                error = sooptcopyin(sopt, &msfr32,
                    sizeof(struct __msfilterreq32),
@@ -2731,7 +2730,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
                if (error)
                        return (error);
                /* we never use msfr.msfr_srcs; */
-               memcpy(&msfr, &msfr32, sizeof(msfr));
+               memcpy(&msfr, &msfr32, sizeof(msfr32));
        }
 
        if ((size_t) msfr.msfr_nsrcs >
index e74dccbc302e100934ade4dff30007f19840d6bb..0cbd238ccdc75ea41cc9d45fbf00d508c50cf406 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -534,7 +534,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
        int mac_error;
 #endif /* CONFIG_MACF_NET */
 
-       if (!so->cached_in_sock_layer) {
+       if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
                inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone);
                if (inp == NULL)
                        return (ENOBUFS);
@@ -552,7 +552,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
 #if CONFIG_MACF_NET
        mac_error = mac_inpcb_label_init(inp, M_WAITOK);
        if (mac_error != 0) {
-               if (!so->cached_in_sock_layer)
+               if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0)
                        zfree(pcbinfo->ipi_zone, inp);
                return (mac_error);
        }
@@ -1296,6 +1296,16 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
                inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
                inp->inp_flags |= INP_INADDR_ANY;
        } else {
+               /*
+                * Usage of IP_PKTINFO, without local port already
+                * speficified will cause kernel to panic,
+                * see rdar://problem/18508185.
+                * For now returning error to avoid a kernel panic
+                * This routines can be refactored and handle this better
+                * in future.
+                */
+               if (inp->inp_lport == 0)
+                       return (EINVAL);
                if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
                        /*
                         * Lock inversion issue, mostly with udp
@@ -1369,6 +1379,13 @@ in_pcbdetach(struct inpcb *inp)
        if (nstat_collect && 
            (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
                nstat_pcb_detach(inp);
+
+       /* Free memory buffer held for generating keep alives */
+       if (inp->inp_keepalive_data != NULL) {
+               FREE(inp->inp_keepalive_data, M_TEMP);
+               inp->inp_keepalive_data = NULL;
+       }
+
        /* mark socket state as dead */
        if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
                panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
@@ -1465,7 +1482,7 @@ in_pcbdispose(struct inpcb *inp)
                 * we deallocate the structure.
                 */
                ROUTE_RELEASE(&inp->inp_route);
-               if (!so->cached_in_sock_layer) {
+               if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
                        zfree(ipi->ipi_zone, inp);
                }
                sodealloc(so);
@@ -1618,18 +1635,11 @@ in_losing(struct inpcb *inp)
 {
        boolean_t release = FALSE;
        struct rtentry *rt;
-       struct rt_addrinfo info;
 
        if ((rt = inp->inp_route.ro_rt) != NULL) {
                struct in_ifaddr *ia = NULL;
 
-               bzero((caddr_t)&info, sizeof (info));
                RT_LOCK(rt);
-               info.rti_info[RTAX_DST] =
-                   (struct sockaddr *)&inp->inp_route.ro_dst;
-               info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-               info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-               rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
                if (rt->rt_flags & RTF_DYNAMIC) {
                        /*
                         * Prevent another thread from modifying rt_key,
@@ -2822,10 +2832,11 @@ inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
         * When not delegated, the effective pid is the same as the real pid
         */
        if (so->so_flags & SOF_DELEGATED) {
+               soprocinfo->spi_delegated = 1;
                soprocinfo->spi_epid = so->e_pid;
-               if (so->e_pid != 0)
-                       uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
+               uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
        } else {
+               soprocinfo->spi_delegated = 0;
                soprocinfo->spi_epid = so->last_pid;
        }
 }
index c86c03c6c1dc29afa741e3c8b4501e38aed3e0f1..7ce89307aa027fb252aec700cf74af24f192fedd 100644 (file)
@@ -220,6 +220,11 @@ struct inpcb {
        } inp_necp_attributes;
        struct necp_inpcb_result inp_policyresult;
 #endif
+       u_char *inp_keepalive_data;     /* for keepalive offload */
+       u_int8_t inp_keepalive_datalen; /* keepalive data length */
+       u_int8_t inp_keepalive_type;    /* type of application */
+       u_int16_t inp_keepalive_interval; /* keepalive interval */
+       uint32_t inp_nstat_refcnt __attribute__((aligned(4)));
        struct inp_stat *inp_stat;
        struct inp_stat *inp_cstat;     /* cellular data */
        struct inp_stat *inp_wstat;     /* Wi-Fi data */
@@ -228,7 +233,6 @@ struct inpcb {
        u_int8_t inp_cstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)];
        u_int8_t inp_wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)];
        u_int8_t inp_Wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)];
-       uint32_t inp_nstat_refcnt __attribute__((aligned(4)));
 };
 
 #define        INP_ADD_STAT(_inp, _cnt_cellular, _cnt_wifi, _cnt_wired, _a, _n)\
@@ -678,6 +682,7 @@ struct inpcbinfo {
 #define        INP2_NO_IFF_EXPENSIVE   0x00000008 /* do not use expensive interface */
 #define        INP2_INHASHLIST         0x00000010 /* pcb is in inp_hash list */
 #define        INP2_AWDL_UNRESTRICTED  0x00000020 /* AWDL restricted mode allowed */
+#define        INP2_KEEPALIVE_OFFLOAD  0x00000040 /* Enable UDP keepalive offload */
 
 /*
  * Flags passed to in_pcblookup*() functions.
@@ -754,9 +759,14 @@ extern int in_pcb_checkstate(struct inpcb *, int, int);
 extern void in_pcbremlists(struct inpcb *);
 extern void inpcb_to_compat(struct inpcb *, struct inpcb_compat *);
 extern void inpcb_to_xinpcb64(struct inpcb *, struct xinpcb64 *);
+
 extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *);
-#define        INPCB_GET_PORTS_USED_WILDCARDOK 0x1
-#define        INPCB_GET_PORTS_USED_NOWAKEUPOK 0x2
+#define        INPCB_GET_PORTS_USED_WILDCARDOK 0x01
+#define        INPCB_GET_PORTS_USED_NOWAKEUPOK 0x02
+#define        INPCB_GET_PORTS_USED_RECVANYIFONLY 0x04
+#define        INPCB_GET_PORTS_USED_EXTBGIDLEONLY 0x08
+#define        INPCB_GET_PORTS_USED_ACTIVEONLY 0x10
+
 extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *,
     struct inpcbinfo *);
 #define        INPCB_OPPORTUNISTIC_THROTTLEON  0x0001
index 4df416c6aac9558ab00a40df5eb4ca62c5366fbf..3e0facc1c74cd7863b6142a02a5e6a4efa1b0a03 100644 (file)
@@ -401,21 +401,30 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
        struct socket *so;
        inp_gen_t gencnt;
        bool iswildcard, wildcardok, nowakeok;
+       bool recvanyifonly, extbgidleok;
+       bool activeonly;
 
        wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0);
        nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0);
+       recvanyifonly = ((flags & INPCB_GET_PORTS_USED_RECVANYIFONLY) != 0);
+       extbgidleok = ((flags & INPCB_GET_PORTS_USED_EXTBGIDLEONLY) != 0);
+       activeonly = ((flags & INPCB_GET_PORTS_USED_ACTIVEONLY) != 0);
+
        lck_rw_lock_shared(pcbinfo->ipi_lock);
        gencnt = pcbinfo->ipi_gencnt;
+
        for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp;
            inp = LIST_NEXT(inp, inp_list)) {
                uint16_t port;
 
                if (inp->inp_gencnt > gencnt ||
-                   inp->inp_state == INPCB_STATE_DEAD)
+                   inp->inp_state == INPCB_STATE_DEAD ||
+                   inp->inp_wantcnt == WNT_STOPUSING)
                        continue;
 
                if ((so = inp->inp_socket) == NULL ||
-                   (so->so_state & SS_DEFUNCT))
+                   (so->so_state & SS_DEFUNCT) ||
+                   (so->so_state & SS_ISDISCONNECTED))
                        continue;
 
                if (!(protocol == PF_UNSPEC ||
@@ -435,12 +444,64 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags,
                        !nowakeok)
                        continue;
 
+               if (!(inp->inp_flags & INP_RECV_ANYIF) &&
+                       recvanyifonly)
+                       continue;
+
+               if (!(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) &&
+                       extbgidleok)
+                       continue;
+
                if (!iswildcard &&
                    !(ifindex == 0 || inp->inp_last_outifp == NULL ||
                    ifindex == inp->inp_last_outifp->if_index))
                        continue;
 
+               if (SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP &&
+                   so->so_state & SS_CANTRCVMORE)
+                       continue;
+
+               if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) {
+                       struct  tcpcb *tp = sototcpcb(inp->inp_socket);
+
+                       switch (tp->t_state) {
+                               case TCPS_CLOSED:
+                                       continue;
+                                       /* NOT REACHED */
+                               case TCPS_LISTEN:
+                               case TCPS_SYN_SENT:
+                               case TCPS_SYN_RECEIVED:
+                               case TCPS_ESTABLISHED:
+                               case TCPS_FIN_WAIT_1:
+                                       /*
+                                        * Note: FIN_WAIT_1 is an active state
+                                        * because we need our FIN to be
+                                        * acknowledged
+                                        */
+                                       break;
+                               case TCPS_CLOSE_WAIT:
+                               case TCPS_CLOSING:
+                               case TCPS_LAST_ACK:
+                               case TCPS_FIN_WAIT_2:
+                                       /*
+                                        * In the closing states, the connection
+                                        * is not idle when there is outgoing
+                                        * data having to be acknowledged
+                                        */
+                                       if (activeonly && so->so_snd.sb_cc == 0)
+                                               continue;
+                                       break;
+                               case TCPS_TIME_WAIT:
+                                       continue;
+                                       /* NOT REACHED */
+                       }
+               }
+               /*
+                * Final safeguard to exclude unspecified local port
+                */
                port = ntohs(inp->inp_lport);
+               if (port == 0)
+                       continue;
                bit_set(bitfield, port);
        }
        lck_rw_done(pcbinfo->ipi_lock);
index 321e7819ec721932fe7c643abba202e0e206e62c..bb0fee86485f050f1dda4824850a2e295a32c33d 100644 (file)
@@ -129,7 +129,7 @@ static struct protosw inetsw[] = {
        .pr_type =              SOCK_DGRAM,
        .pr_protocol =          IPPROTO_UDP,
        .pr_flags =             PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK|
-                               PR_EVCONNINFO,
+                               PR_EVCONNINFO|PR_PRECONN_WRITE,
        .pr_input =             udp_input,
        .pr_ctlinput =          udp_ctlinput,
        .pr_ctloutput =         udp_ctloutput,
@@ -143,7 +143,8 @@ static struct protosw inetsw[] = {
        .pr_type =              SOCK_STREAM,
        .pr_protocol =          IPPROTO_TCP,
        .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK|
-                               PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO,
+                               PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO|
+                               PR_PRECONN_WRITE|PR_DATA_IDEMPOTENT,
        .pr_input =             tcp_input,
        .pr_ctlinput =          tcp_ctlinput,
        .pr_ctloutput =         tcp_ctloutput,
@@ -325,15 +326,15 @@ static void
 ip_proto_input(protocol_family_t protocol, mbuf_t packet_list)
 {
 #pragma unused(protocol)
-       mbuf_t  packet;
-       int how_many = 0 ;
 
-       /* ip_input should handle a list of packets but does not yet */
-       for (packet = packet_list; packet; packet = packet_list) {
-               how_many++;
-               packet_list = mbuf_nextpkt(packet);
-               mbuf_setnextpkt(packet, NULL);
-               ip_input(packet);
+       if (packet_list->m_nextpkt != NULL) {
+               ip_input_process_list(packet_list);
+       } else {
+               /*
+                * XXX remove this path if ip_input_process_list is proven
+                * to be stable and has minimum overhead on most platforms.
+                */
+               ip_input(packet_list);
        }
 }
 
index e0972f831c9898718c75d95481cd9dd6d0879923..1eb03c11b525a5731ae0328e9109e8ba0f492a1c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
index e019e43c3b7b554adc9b369c187049a942d2479e..20a37fd9f2ec6f4eeb8c2e8797368fc4243b1234 100644 (file)
@@ -80,6 +80,7 @@ static int flush_pid_tclass(struct so_tcdbg *);
 static int purge_tclass_for_proc(void);
 static int flush_tclass_for_proc(void);
 int get_tclass_for_curr_proc(int *);
+static inline int so_throttle_best_effort(struct socket* ,struct ifnet *);
 
 static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */
 static lck_grp_t *tclass_lck_grp = NULL;       /* mutex group definition */
@@ -92,7 +93,7 @@ static lck_mtx_t *tclass_lock = &tclass_lock_data;
  * seconds, the background connections can switch to foreground TCP
  * congestion control.
  */ 
-#define TCP_BG_SWITCH_TIME 2
+#define TCP_BG_SWITCH_TIME 2 /* seconds */
 
 /*
  * Must be called with tclass_lock held
@@ -787,6 +788,17 @@ so_inc_recv_data_stat(struct socket *so, size_t pkts, size_t bytes, uint32_t tc)
        so->so_tc_stats[tc].rxpackets += pkts;
        so->so_tc_stats[tc].rxbytes +=bytes;
 }
+
+static inline int
+so_throttle_best_effort(struct socket *so, struct ifnet *ifp)
+{
+       u_int32_t uptime = net_uptime();
+       return (soissrcbesteffort(so) &&
+           net_io_policy_throttle_best_effort == 1 &&
+           ifp->if_rt_sendts > 0 &&
+           (int)(uptime - ifp->if_rt_sendts) <= TCP_BG_SWITCH_TIME);
+}
 __private_extern__ void
 set_tcp_stream_priority(struct socket *so)
 {
@@ -795,7 +807,7 @@ set_tcp_stream_priority(struct socket *so)
        struct ifnet *outifp;
        u_char old_cc = tp->tcp_cc_index;
        int recvbg = IS_TCP_RECV_BG(so);
-       bool is_local, fg_active = false;
+       bool is_local = false, fg_active = false;
        u_int32_t uptime;
 
        VERIFY((SOCK_CHECK_DOM(so, PF_INET) 
@@ -817,20 +829,42 @@ set_tcp_stream_priority(struct socket *so)
         * background. The variable sotcdb which can be set with sysctl
         * is used to disable these settings for testing.
         */
-       if (soissrcbackground(so)) {
-               if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK))
-                       is_local = true;
-               else
-                       is_local = false;
-
-               /* Check if there has been recent foreground activity */
-               if ((outifp != NULL &&
-                   outifp->if_fg_sendts > 0 &&
+       if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK))
+               is_local = true;
+
+       /* Check if there has been recent foreground activity */
+       if (outifp != NULL) {
+               /*
+                * If the traffic source is background, check if
+                * if it can be switched to foreground. This can 
+                * happen when there is no indication of foreground
+                * activity.
+                */
+               if (soissrcbackground(so) && 
+                   ((outifp->if_fg_sendts > 0 &&
                    (int)(uptime - outifp->if_fg_sendts) <= 
-                   TCP_BG_SWITCH_TIME) ||
-                   net_io_policy_throttled)
+                   TCP_BG_SWITCH_TIME) || net_io_policy_throttled))
                        fg_active = true;
 
+               /*
+                * The traffic source is best-effort -- check if
+                * the policy to throttle best effort is enabled
+                * and there was realtime activity on this
+                * interface recently. If this is true, enable
+                * algorithms that respond to increased latency
+                * on best-effort traffic.
+                */ 
+               if (so_throttle_best_effort(so, outifp))
+                       fg_active = true;
+       }
+
+       /*
+        * System initiated background traffic like cloud uploads should
+        * always use background delay sensitive algorithms. This will
+        * make the stream more responsive to other streams on the user's
+        * network and it will minimize latency induced.
+        */
+       if (fg_active || IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
                /*
                 * If the interface that the connection is using is
                 * loopback, do not use background congestion
@@ -842,18 +876,9 @@ set_tcp_stream_priority(struct socket *so)
                 * switch the backgroung streams to use background 
                 * congestion control algorithm. Otherwise, even background
                 * flows can move into foreground.
-                *
-                * System initiated background traffic like cloud uploads
-                * should always use background delay sensitive
-                * algorithms. This will make the stream more resposive to
-                * other streams on the user's network and it will
-                * minimize the latency induced.
                 */
-               if (IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class))
-                       fg_active = true;
-
-               if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 ||
-                       is_local || !fg_active) {
+               if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || is_local ||
+                   !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
                        if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
                                tcp_set_foreground_cc(so);
                } else {
@@ -862,11 +887,12 @@ set_tcp_stream_priority(struct socket *so)
                }
 
                /* Set receive side background flags */
-               if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 ||
-                       is_local || !fg_active)
+               if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || is_local ||
+                   !IS_SO_TC_BACKGROUNDSYSTEM(so->so_traffic_class)) {
                        tcp_clear_recv_bg(so);
-               else
+               } else {
                        tcp_set_recv_bg(so);
+               }
        } else {
                tcp_clear_recv_bg(so);
                if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX)
@@ -920,13 +946,21 @@ set_packet_service_class(struct mbuf *m, struct socket *so,
        }
 
        /*
-        * If TRAFFIC_MGT_SO_BACKGROUND is set, depress the priority.
+        * If TRAFFIC_MGT_SO_BACKGROUND is set or policy to throttle
+        * best effort is set, depress the priority.
         */
-       if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc))
+       if (!IS_MBUF_SC_BACKGROUND(msc) && soisthrottled(so))
+               msc = MBUF_SC_BK;
+
+       if (IS_MBUF_SC_BESTEFFORT(msc) && inp->inp_last_outifp != NULL &&
+           so_throttle_best_effort(so, inp->inp_last_outifp))
                msc = MBUF_SC_BK;
 
        if (soissrcbackground(so))
                m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND;
+
+       if (soissrcrealtime(so) || IS_MBUF_SC_REALTIME(msc))
+               m->m_pkthdr.pkt_flags |= PKTF_SO_REALTIME;
        /*
         * Set the traffic class in the mbuf packet header svc field
         */
index 8a3e7b94c0451dcce5435974b67ba3c6b938c62a..5b047a56164f662d382a8f513bdc02cd303381d7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -163,6 +163,7 @@ struct kev_in_portinuse {
                                              router */
 
 #ifdef BSD_KERNEL_PRIVATE
+#include <net/if.h>
 #include <net/if_var.h>
 #include <kern/locks.h>
 #include <sys/tree.h>
@@ -473,6 +474,15 @@ struct inpcb;
 #define        MCAST_NOTSMEMBER        2       /* This host excluded source */
 #define        MCAST_MUTED             3       /* [deprecated] */
 
+/*
+ * Per-interface IPv4 structures.
+ */
+struct in_ifextra {
+       uint32_t                netsig_len;
+       u_int8_t                netsig[IFNET_SIGNATURELEN];
+};
+#define        IN_IFEXTRA(_ifp)        ((struct in_ifextra *)(_ifp->if_inetdata))
+
 extern u_int32_t ipv4_ll_arp_aware;
 
 extern void in_ifaddr_init(void);
@@ -500,6 +510,7 @@ extern int in_inithead(void **, int);
 extern void in_rtqdrain(void);
 extern struct radix_node *in_validate(struct radix_node *);
 extern void ip_input(struct mbuf *);
+extern void ip_input_process_list(struct mbuf *);
 extern int in_ifadown(struct ifaddr *ifa, int);
 extern void in_ifscrub(struct ifnet *, struct in_ifaddr *, int);
 extern u_int32_t inaddr_hashval(u_int32_t);
index 38338ae58e94e9951179dad83ad3b91942270cf7..c24935fd9c14fb11e0553f79d625b0290657a78e 100644 (file)
@@ -1304,10 +1304,10 @@ find_queue(struct dn_flow_set *fs, struct ip_flow_id *id)
                 ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^
                 ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^
 
-                ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^
-                ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^
-                ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^
-                ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[0] >> 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[1] >> 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[2] >> 16) & 0xffff)^
+                ((id->src_ip6.__u6_addr.__u6_addr32[3] >> 16) & 0xffff)^
 
                 (id->dst_port << 1) ^ (id->src_port) ^
                 (id->proto ) ^
index 28a5589612c2c021945b3c74344a8991662756de..38c7900267e75b4b97ad9dd47f736b0527a0d837 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000,2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000, 2007, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -91,11 +91,10 @@ ip_ecn_ingress(mode, outer, inner)
 
        *outer = *inner;
        switch (mode) {
-       case ECN_ALLOWED:               /* ECN allowed */
-               *outer &= ~IPTOS_CE;
+       case ECN_NORMAL:                /* ECN normal mode, copy flags */
                break;
-       case ECN_FORBIDDEN:             /* ECN forbidden */
-               *outer &= ~(IPTOS_ECT | IPTOS_CE);
+       case ECN_COMPATIBILITY:         /* ECN compatibility mode */
+               *outer &= ~IPTOS_ECN_MASK;
                break;
        case ECN_NOCARE:        /* no consideration to ECN */
                break;
@@ -105,7 +104,7 @@ ip_ecn_ingress(mode, outer, inner)
 /*
  * modify inner ECN (TOS) field on egress operation (tunnel decapsulation).
  */
-void
+int
 ip_ecn_egress(mode, outer, inner)
        int mode;
        const u_int8_t *outer;
@@ -115,14 +114,25 @@ ip_ecn_egress(mode, outer, inner)
                panic("NULL pointer passed to ip_ecn_egress");
 
        switch (mode) {
-       case ECN_ALLOWED:
-               if (*outer & IPTOS_CE)
-                       *inner |= IPTOS_CE;
+       /* Process ECN for both normal and compatibility modes */
+       case ECN_NORMAL:
+       case ECN_COMPATIBILITY:
+               if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
+                       if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
+                               /* Drop */
+                               return (0);
+                       } else {
+                               *inner |= IPTOS_ECN_CE;
+                       }
+               } else if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 &&
+                                  (*inner & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0) {
+                       *inner = *outer;
+               }
                break;
-       case ECN_FORBIDDEN:             /* ECN forbidden */
        case ECN_NOCARE:        /* no consideration to ECN */
                break;
        }
+       return (1);
 }
 
 #if INET6
@@ -143,7 +153,7 @@ ip6_ecn_ingress(mode, outer, inner)
        *outer |= htonl((u_int32_t)outer8 << 20);
 }
 
-void
+int
 ip6_ecn_egress(mode, outer, inner)
        int mode;
        const u_int32_t *outer;
@@ -156,8 +166,95 @@ ip6_ecn_egress(mode, outer, inner)
 
        outer8 = (ntohl(*outer) >> 20) & 0xff;
        inner8 = (ntohl(*inner) >> 20) & 0xff;
-       ip_ecn_egress(mode, &outer8, &inner8);
+       if (ip_ecn_egress(mode, &outer8, &inner8) == 0) {
+               return (0);
+       }
+       *inner &= ~htonl(0xff << 20);
+       *inner |= htonl((u_int32_t)inner8 << 20);
+       return (1);
+}
+
+/*
+ * Modify outer IPv6 ECN (Traffic Class) field according to inner IPv4 TOS field
+ * on ingress operation (tunnel encapsulation).
+ */
+void
+ip46_ecn_ingress(mode, outer, tos)
+       int mode;
+       u_int32_t *outer;
+       const u_int8_t *tos;
+{
+       u_int8_t outer8;
+
+       if (!outer || !tos)
+               panic("NULL pointer passed to ip46_ecn_ingress");
+
+       ip_ecn_ingress(mode, &outer8, tos);
+       *outer &= ~htonl(0xff << 20);
+       *outer |= htonl((u_int32_t)outer8 << 20);
+}
+
+/*
+ * Modify inner IPv4 ECN (TOS) field according to output IPv6 ECN (Traffic Class)
+ * on egress operation (tunnel decapsulation).
+ */
+int
+ip46_ecn_egress(mode, outer, tos)
+       int mode;
+       const u_int32_t *outer;
+       u_int8_t *tos;
+{
+       u_int8_t outer8;
+
+       if (!outer || !tos)
+               panic("NULL pointer passed to ip46_ecn_egress");
+
+       outer8 = (ntohl(*outer) >> 20) & 0xff;
+       return ip_ecn_egress(mode, &outer8, tos);
+}
+
+/*
+ * Modify outer IPv4 TOS field according to inner IPv6 ECN (Traffic Class)
+ * on ingress operation (tunnel encapsulation).
+ */
+void
+ip64_ecn_ingress(mode, outer, inner)
+       int mode;
+       u_int8_t *outer;
+       const u_int32_t *inner;
+{
+       u_int8_t inner8;
+
+       if (!outer || ! inner)
+               panic("NULL pointer passed to ip64_ecn_ingress");
+
+       inner8 = (ntohl(*inner) >> 20) & 0xff;
+       ip_ecn_ingress(mode, outer, &inner8);
+}
+
+/*
+ * Modify inner IPv6 ECN (Traffic Class) according to outer IPv4 TOS field
+ * on egress operation (tunnel decapsulation).
+ */
+int
+ip64_ecn_egress(mode, outer, inner)
+       int mode;
+       const u_int8_t *outer;
+       u_int32_t *inner;
+{
+       u_int8_t inner8;
+
+       if (!outer || !inner)
+               panic("NULL pointer passed to ip64_ecn_egress");
+
+       inner8 = (ntohl(*inner) >> 20) & 0xff;
+       if (ip_ecn_egress(mode, outer, &inner8) == 0) {
+               return (0);
+       }
+
        *inner &= ~htonl(0xff << 20);
        *inner |= htonl((u_int32_t)inner8 << 20);
+       return (1);
 }
+
 #endif
index ae06c45c098859b5b7529f60ad2577ac85ab4286..959a8e24d495277a512130fd90e2ea051fb33be2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2013, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/appleapiopts.h>
 
 #ifdef BSD_KERNEL_PRIVATE
-#define ECN_ALLOWED    1       /* ECN allowed */
-#define ECN_FORBIDDEN  0       /* ECN forbidden */
-#define ECN_NOCARE     (-1)    /* no consideration to ECN */
+#define ECN_NORMAL             1       /* ECN normal mode */
+#define ECN_COMPATIBILITY      0       /* ECN comptability mode */
+#define ECN_NOCARE             (-1)    /* Ignore ECN. Use caution with this mode. */
 
 extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *);
-extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *);
+extern int ip_ecn_egress(int, const u_int8_t *, u_int8_t *);
 #endif /* BSD_KERNEL_PRIVATE */
index 3c0ee4a481f13339665fae771c6a3774e8fe06da..6c4d33072b487d1204faf025ace058bab8282ec1 100644 (file)
@@ -390,12 +390,11 @@ encap_attach(af, proto, sp, sm, dp, dm, psw, arg)
                goto fail;
        }
 
-       ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK); /*XXX*/
+       ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */
        if (ep == NULL) {
                error = ENOBUFS;
                goto fail;
        }
-       bzero(ep, sizeof(*ep));
 
        ep->af = af;
        ep->proto = proto;
@@ -432,12 +431,11 @@ encap_attach_func(af, proto, func, psw, arg)
                goto fail;
        }
 
-       ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK); /*XXX*/
+       ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */
        if (ep == NULL) {
                error = ENOBUFS;
                goto fail;
        }
-       bzero(ep, sizeof(*ep));
 
        ep->af = af;
        ep->proto = proto;
index aa3ac23c9ec6ec685f4b037bb5b3a810db27d346..8e84eec1f710df45efffea4a1bf714df4bddcec0 100644 (file)
@@ -127,6 +127,14 @@ struct ip_fw_args {
 #define fwa_dst fwa_dst_._fwa_dst
 #define fwa_dst6 fwa_dst_._fwa_dst6
 
+/* Allocate a separate structure for inputs args to save space and bzero time */
+struct ip_fw_in_args {
+       struct sockaddr_in      *fwai_next_hop; /* forward address            */
+       struct ip_fw            *fwai_ipfw_rule;/* matching IPFW rule         */
+       struct pf_rule          *fwai_pf_rule;  /* matching PF rule           */
+       u_int16_t               fwai_divert_rule;/* divert cookie             */
+};
+
 #endif /* BSD_KERNEL_PRIVATE */
 
 #endif /* __IP_FLOWID_H__ */
index d9520158e6c4e679d2dd95bf99bf1466712faa33..bfdc619649a90d883566adfd60a4476125513ec3 100644 (file)
@@ -2838,13 +2838,12 @@ add_rule(struct ip_fw **head, struct ip_fw *input_rule)
        if (*head == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
                return (EINVAL);
 
-       rule = _MALLOC(l, M_IPFW, M_WAIT);
+       rule = _MALLOC(l, M_IPFW, M_WAIT | M_ZERO);
        if (rule == NULL) {
                printf("ipfw2: add_rule MALLOC failed\n");
                return (ENOSPC);
        }
        
-       bzero(rule, l);
        bcopy(input_rule, rule, l);
 
        rule->next = NULL;
@@ -3539,14 +3538,12 @@ ipfw_ctl(struct sockopt *sopt)
                 * how much room is needed, do not bother filling up the
                 * buffer, just jump to the sooptcopyout.
                 */
-               buf = _MALLOC(size, M_TEMP, M_WAITOK);
+               buf = _MALLOC(size, M_TEMP, M_WAITOK | M_ZERO);
                if (buf == 0) {
                        lck_mtx_unlock(ipfw_mutex);
                        error = ENOBUFS;
                        break;
                }
-               
-               bzero(buf, size);
 
                bp = buf;
                for (rule = layer3_chain; rule ; rule = rule->next) {
@@ -3607,7 +3604,7 @@ ipfw_ctl(struct sockopt *sopt)
                                                ipfw_dyn_dst->ack_rev = p->ack_rev;
                                                ipfw_dyn_dst->dyn_type = p->dyn_type;
                                                ipfw_dyn_dst->count = p->count;
-                                               last = (char*)&ipfw_dyn_dst->next;
+                                               last = (char*)ipfw_dyn_dst;
                                        } else {
                                                ipfw_dyn_rule_32        *ipfw_dyn_dst;
                                                
@@ -3633,11 +3630,16 @@ ipfw_ctl(struct sockopt *sopt)
                                                ipfw_dyn_dst->ack_rev = p->ack_rev;
                                                ipfw_dyn_dst->dyn_type = p->dyn_type;
                                                ipfw_dyn_dst->count = p->count;
-                                               last = (char*)&ipfw_dyn_dst->next;
+                                               last = (char*)ipfw_dyn_dst;
                                        }
                                }
-                       if (last != NULL) /* mark last dynamic rule */
-                               bzero(last, sizeof(last));
+                       /* mark last dynamic rule */
+                       if (last != NULL) {
+                               if (is64user)
+                                       ((ipfw_dyn_rule_64 *)last)->next = 0;
+                               else
+                                       ((ipfw_dyn_rule_32 *)last)->next = 0;
+                       }
                }
                lck_mtx_unlock(ipfw_mutex);
 
@@ -3758,13 +3760,11 @@ ipfw_ctl(struct sockopt *sopt)
        case IP_FW_ADD:
        {
                size_t savedsopt_valsize=0;
-               rule = _MALLOC(RULE_MAXSIZE, M_TEMP, M_WAITOK);
+               rule = _MALLOC(RULE_MAXSIZE, M_TEMP, M_WAITOK | M_ZERO);
                if (rule == 0) {
                        error = ENOBUFS;
                        break;
                }
-               
-               bzero(rule, RULE_MAXSIZE);
 
                if (api_version != IP_FW_CURRENT_API_VERSION) {
                        error = ipfw_convert_to_latest(sopt, rule, api_version, is64user);
index 1022e03f1921c4fb453ad9338c586f8d547767f7..c4f1bf576cd0a599274a68661601cdb339db1555 100644 (file)
@@ -1981,7 +1981,7 @@ ipfw_convert_to_cmds_32(struct ip_fw *curr_rule, struct ip_fw_compat_32 *compat_
        ipfw_insn       *action, *cmd, *src, *dst;
        ipfw_insn       *have_state = NULL;     /* track check-state or keep-state */
        
-       if (!compat_rule || !curr_rule || !(curr_rule->cmd)) {
+       if (!compat_rule || !curr_rule) {
                return;
        }
 
@@ -2525,7 +2525,7 @@ ipfw_convert_to_cmds_64(struct ip_fw *curr_rule, struct ip_fw_compat_64 *compat_
        ipfw_insn       *action, *cmd, *src, *dst;
        ipfw_insn       *have_state = NULL;     /* track check-state or keep-state */
        
-       if (!compat_rule || !curr_rule || !(curr_rule->cmd)) {
+       if (!compat_rule || !curr_rule) {
                return;
        }
 
index 256d54b8fc5e02b7ff8d0cd050ab429d8649bc16..cb0e43f7a30455aa5855624d99cd03103184da30 100644 (file)
@@ -148,7 +148,7 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect,
     CTLFLAG_RW | CTLFLAG_LOCKED,
     &log_redirect, 0, "");
 
-static int icmp_datalen = 8;
+const static int icmp_datalen = 8;
 
 #if ICMP_BANDLIM 
 
@@ -287,7 +287,7 @@ stdreply:   icmpelen = max(ICMP_MINLEN, min(icmp_datalen,
         */
        if (MHLEN > (sizeof(struct ip) + ICMP_MINLEN + icmplen))
                m = m_gethdr(M_DONTWAIT, MT_HEADER);    /* MAC-OK */
-       else 
+       else
                m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 
        if (m == NULL)
index bc1bb0f2f0f64846306fcf763e3c625a38d74da7..994bbf8a3736c4f924ff1b71e1749b4d6c03c899 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <net/ntstat.h>
 #include <net/dlil.h>
 #include <net/classq/classq.h>
+#include <net/net_perf.h>
 #if PF
 #include <net/pfvar.h>
 #endif /* PF */
@@ -159,6 +160,8 @@ static void frag_sched_timeout(void);
 static struct ipq *ipq_alloc(int);
 static void ipq_free(struct ipq *);
 static void ipq_updateparams(void);
+static void ip_input_second_pass(struct mbuf *, struct ifnet *,
+    u_int32_t, int, int, struct ip_fw_in_args *, int);
 
 decl_lck_mtx_data(static, ipqlock);
 static lck_attr_t      *ipqlock_attr;
@@ -183,6 +186,9 @@ static u_int32_t ipq_count;         /* current # of allocated ipq's */
 static int sysctl_ipforwarding SYSCTL_HANDLER_ARGS;
 static int sysctl_maxnipq SYSCTL_HANDLER_ARGS;
 static int sysctl_maxfragsperpacket SYSCTL_HANDLER_ARGS;
+static int sysctl_reset_ip_input_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip_input_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip_input_getperf SYSCTL_HANDLER_ARGS;
 
 int ipforwarding = 0;
 SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding,
@@ -251,6 +257,31 @@ static int ip_checkinterface = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED,
        &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
+static int ip_chaining = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rx_chaining, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip_chaining, 1, "Do receive side ip address based chaining");
+
+static int ip_chainsz = 6;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rx_chainsz, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip_chainsz, 1, "IP receive side max chaining");
+
+static int ip_input_measure = 0;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf,
+       CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip_input_measure, 0, sysctl_reset_ip_input_stats, "I", "Do time measurement");
+
+static uint64_t ip_input_measure_bins = 0;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf_bins,
+       CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip_input_measure_bins, 0,
+       sysctl_ip_input_measure_bins, "I",
+       "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, input_perf_data,
+       CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+       0, 0, sysctl_ip_input_getperf, "S,net_perf",
+       "IP input performance data (struct net_perf, net/net_perf.h)");
+
 #if DIAGNOSTIC
 static int ipprintfs = 0;
 #endif
@@ -444,198 +475,1270 @@ ip_init(struct protosw *pp, struct domain *dp)
                /* NOTREACHED */
        }
 
-       /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
-       for (i = 0; i < IPPROTO_MAX; i++)
-               ip_protox[i] = pr;
+       /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
+       for (i = 0; i < IPPROTO_MAX; i++)
+               ip_protox[i] = pr;
+       /*
+        * Cycle through IP protocols and put them into the appropriate place
+        * in ip_protox[], skipping protocols IPPROTO_{IP,RAW}.
+        */
+       VERIFY(dp == inetdomain && dp->dom_family == PF_INET);
+       TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) {
+               VERIFY(pr->pr_domain == dp);
+               if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) {
+                       /* Be careful to only index valid IP protocols. */
+                       if (pr->pr_protocol < IPPROTO_MAX)
+                               ip_protox[pr->pr_protocol] = pr;
+               }
+       }
+
+       /* IP fragment reassembly queue lock */
+       ipqlock_grp_attr  = lck_grp_attr_alloc_init();
+       ipqlock_grp = lck_grp_alloc_init("ipqlock", ipqlock_grp_attr);
+       ipqlock_attr = lck_attr_alloc_init();
+       lck_mtx_init(&ipqlock, ipqlock_grp, ipqlock_attr);
+
+       lck_mtx_lock(&ipqlock);
+       /* Initialize IP reassembly queue. */
+       for (i = 0; i < IPREASS_NHASH; i++)
+               TAILQ_INIT(&ipq[i]);
+
+       maxnipq = nmbclusters / 32;
+       maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */
+       ipq_updateparams();
+       lck_mtx_unlock(&ipqlock);
+
+       getmicrotime(&tv);
+       ip_id = RandomULong() ^ tv.tv_usec;
+       ip_initid();
+
+       ipf_init();
+
+#if IPSEC
+       sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init();
+       sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat",
+           sadb_stat_mutex_grp_attr);
+       sadb_stat_mutex_attr = lck_attr_alloc_init();
+       lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp,
+           sadb_stat_mutex_attr);
+
+#endif
+       arp_init();
+}
+
+/*
+ * Initialize IPv4 source address hash table.
+ */
+static void
+in_ifaddrhashtbl_init(void)
+{
+       int i, k, p;
+
+       if (in_ifaddrhashtbl != NULL)
+               return;
+
+       PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash,
+           sizeof (inaddr_nhash));
+       if (inaddr_nhash == 0)
+               inaddr_nhash = INADDR_NHASH;
+
+       MALLOC(in_ifaddrhashtbl, struct in_ifaddrhashhead *,
+           inaddr_nhash * sizeof (*in_ifaddrhashtbl),
+           M_IFADDR, M_WAITOK | M_ZERO);
+       if (in_ifaddrhashtbl == NULL)
+               panic("in_ifaddrhashtbl_init allocation failed");
+
+       /*
+        * Generate the next largest prime greater than inaddr_nhash.
+        */
+       k = (inaddr_nhash % 2 == 0) ? inaddr_nhash + 1 : inaddr_nhash + 2;
+       for (;;) {
+               p = 1;
+               for (i = 3; i * i <= k; i += 2) {
+                       if (k % i == 0)
+                               p = 0;
+               }
+               if (p == 1)
+                       break;
+               k += 2;
+       }
+       inaddr_hashp = k;
+}
+
+u_int32_t
+inaddr_hashval(u_int32_t key)
+{
+       /*
+        * The hash index is the computed prime times the key modulo
+        * the hash size, as documented in "Introduction to Algorithms"
+        * (Cormen, Leiserson, Rivest).
+        */
+       if (inaddr_nhash > 1)
+               return ((key * inaddr_hashp) % inaddr_nhash);
+       else
+               return (0);
+}
+
+void
+ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto)
+{
+       ip_proto_dispatch_in(m, hlen, proto, 0);
+}
+
+__private_extern__ void
+ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto,
+    ipfilter_t inject_ipfref)
+{
+       struct ipfilter *filter;
+       int seen = (inject_ipfref == NULL);
+       int     changed_header = 0;
+       struct ip *ip;
+       void (*pr_input)(struct mbuf *, int len);
+
+       if (!TAILQ_EMPTY(&ipv4_filters)) {
+               ipf_ref();
+               TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
+                       if (seen == 0) {
+                               if ((struct ipfilter *)inject_ipfref == filter)
+                                       seen = 1;
+                       } else if (filter->ipf_filter.ipf_input) {
+                               errno_t result;
+
+                               if (changed_header == 0) {
+                                       /*
+                                        * Perform IP header alignment fixup,
+                                        * if needed, before passing packet
+                                        * into filter(s).
+                                        */
+                                       IP_HDR_ALIGNMENT_FIXUP(m,
+                                           m->m_pkthdr.rcvif, ipf_unref());
+
+                                       /* ipf_unref() already called */
+                                       if (m == NULL)
+                                               return;
+
+                                       changed_header = 1;
+                                       ip = mtod(m, struct ip *);
+                                       ip->ip_len = htons(ip->ip_len + hlen);
+                                       ip->ip_off = htons(ip->ip_off);
+                                       ip->ip_sum = 0;
+                                       ip->ip_sum = ip_cksum_hdr_in(m, hlen);
+                               }
+                               result = filter->ipf_filter.ipf_input(
+                                   filter->ipf_filter.cookie, (mbuf_t *)&m,
+                                   hlen, proto);
+                               if (result == EJUSTRETURN) {
+                                       ipf_unref();
+                                       return;
+                               }
+                               if (result != 0) {
+                                       ipf_unref();
+                                       m_freem(m);
+                                       return;
+                               }
+                       }
+               }
+               ipf_unref();
+       }
+
+       /* Perform IP header alignment fixup (post-filters), if needed */
+       IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return);
+
+       /*
+        * If there isn't a specific lock for the protocol
+        * we're about to call, use the generic lock for AF_INET.
+        * otherwise let the protocol deal with its own locking
+        */
+       ip = mtod(m, struct ip *);
+
+       if (changed_header) {
+               ip->ip_len = ntohs(ip->ip_len) - hlen;
+               ip->ip_off = ntohs(ip->ip_off);
+       }
+
+       if ((pr_input = ip_protox[ip->ip_p]->pr_input) == NULL) {
+               m_freem(m);
+       } else if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) {
+               lck_mtx_lock(inet_domain_mutex);
+               pr_input(m, hlen);
+               lck_mtx_unlock(inet_domain_mutex);
+       } else {
+               pr_input(m, hlen);
+       }
+}
+
+struct pktchain_elm {
+       struct mbuf     *pkte_head;
+       struct mbuf     *pkte_tail;
+       struct in_addr  pkte_saddr;
+       struct in_addr  pkte_daddr;
+       uint16_t        pkte_npkts;
+       uint16_t        pkte_proto;
+       uint32_t        pkte_nbytes;
+};
+
+typedef struct pktchain_elm pktchain_elm_t;
+
+/* Store upto PKTTBL_SZ unique flows on the stack */
+#define PKTTBL_SZ      7
+
+static struct mbuf *
+ip_chain_insert(struct mbuf *packet, pktchain_elm_t *tbl)
+{
+       struct ip*      ip;
+       int             pkttbl_idx = 0;
+
+       ip = mtod(packet, struct ip*);
+
+       /* reusing the hash function from inaddr_hashval */
+       pkttbl_idx = inaddr_hashval(ntohs(ip->ip_src.s_addr)) % PKTTBL_SZ;
+       if (tbl[pkttbl_idx].pkte_head == NULL) {
+               tbl[pkttbl_idx].pkte_head = packet;
+               tbl[pkttbl_idx].pkte_saddr.s_addr = ip->ip_src.s_addr;
+               tbl[pkttbl_idx].pkte_daddr.s_addr = ip->ip_dst.s_addr;
+               tbl[pkttbl_idx].pkte_proto = ip->ip_p;
+       } else {
+               if ((ip->ip_dst.s_addr == tbl[pkttbl_idx].pkte_daddr.s_addr) &&
+                   (ip->ip_src.s_addr == tbl[pkttbl_idx].pkte_saddr.s_addr) &&
+                   (ip->ip_p == tbl[pkttbl_idx].pkte_proto)) {
+               } else {
+                       return (packet);
+               }
+       }
+       if (tbl[pkttbl_idx].pkte_tail != NULL)
+               mbuf_setnextpkt(tbl[pkttbl_idx].pkte_tail, packet);
+
+       tbl[pkttbl_idx].pkte_tail = packet;
+       tbl[pkttbl_idx].pkte_npkts += 1;
+       tbl[pkttbl_idx].pkte_nbytes += packet->m_pkthdr.len;
+       return (NULL);
+}
+
+/* args is a dummy variable here for backward compatibility */
+static void
+ip_input_second_pass_loop_tbl(pktchain_elm_t *tbl, struct ip_fw_in_args *args)
+{
+       int i = 0;
+
+       for (i = 0; i < PKTTBL_SZ; i++) {
+               if (tbl[i].pkte_head != NULL) {
+                       struct mbuf *m = tbl[i].pkte_head;
+                       ip_input_second_pass(m, m->m_pkthdr.rcvif, 0,
+                           tbl[i].pkte_npkts, tbl[i].pkte_nbytes, args, 0);
+
+                       if (tbl[i].pkte_npkts > 2)
+                               ipstat.ips_rxc_chainsz_gt2++;
+                       if (tbl[i].pkte_npkts > 4)
+                               ipstat.ips_rxc_chainsz_gt4++;
+
+                       if (ip_input_measure)
+                               net_perf_histogram(&net_perf, tbl[i].pkte_npkts);
+
+                       tbl[i].pkte_head = tbl[i].pkte_tail = NULL;
+                       tbl[i].pkte_npkts = 0;
+                       tbl[i].pkte_nbytes = 0;
+                       /* no need to initialize address and protocol in tbl */
+               }
+       }
+}
+
+static void
+ip_input_cpout_args(struct ip_fw_in_args *args, struct ip_fw_args *args1,
+    boolean_t *done_init)
+{
+       if (*done_init == FALSE) {
+               bzero(args1, sizeof(struct ip_fw_args));
+               *done_init = TRUE;
+       }
+       args1->fwa_next_hop = args->fwai_next_hop;
+       args1->fwa_ipfw_rule = args->fwai_ipfw_rule;
+       args1->fwa_pf_rule = args->fwai_pf_rule;
+       args1->fwa_divert_rule = args->fwai_divert_rule;
+}
+
+static void
+ip_input_cpin_args(struct ip_fw_args *args1, struct ip_fw_in_args *args)
+{
+       args->fwai_next_hop = args1->fwa_next_hop;
+       args->fwai_ipfw_rule = args1->fwa_ipfw_rule;
+       args->fwai_pf_rule = args1->fwa_pf_rule;
+       args->fwai_divert_rule = args1->fwa_divert_rule;
+}
+
+typedef enum {
+       IPINPUT_DOCHAIN = 0,
+       IPINPUT_DONTCHAIN,
+       IPINPUT_FREED,
+       IPINPUT_DONE
+} ipinput_chain_ret_t;
+
+static void
+ip_input_update_nstat(struct ifnet *ifp, struct in_addr src_ip,
+    u_int32_t packets, u_int32_t bytes)
+{
+       if (nstat_collect) {
+               struct rtentry *rt = ifnet_cached_rtlookup_inet(ifp,
+                   src_ip);
+               if (rt != NULL) {
+                       nstat_route_rx(rt, packets, bytes, 0);
+                       rtfree(rt);
+               }
+       }
+}
+
+static void
+ip_input_dispatch_chain(struct mbuf *m)
+{
+       struct mbuf *tmp_mbuf = m;
+       struct mbuf *nxt_mbuf = NULL;
+       struct ip *ip = NULL;
+       unsigned int hlen;
+
+       ip = mtod(tmp_mbuf, struct ip *);
+       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+       while(tmp_mbuf) {
+               nxt_mbuf = mbuf_nextpkt(tmp_mbuf);
+               mbuf_setnextpkt(tmp_mbuf, NULL);
+
+               if ((sw_lro) && (ip->ip_p == IPPROTO_TCP))
+                       tmp_mbuf = tcp_lro(tmp_mbuf, hlen);
+               if (tmp_mbuf)
+                       ip_proto_dispatch_in(tmp_mbuf, hlen, ip->ip_p, 0);
+               tmp_mbuf = nxt_mbuf;
+               if (tmp_mbuf) {
+                       ip = mtod(tmp_mbuf, struct ip *);
+                       /* first mbuf of chain already has adjusted ip_len */
+                       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+                       ip->ip_len -= hlen;
+               }
+       }
+}
+
+static void
+ip_input_setdst_chain(struct mbuf *m, uint32_t ifindex, struct in_ifaddr *ia)
+{
+       struct mbuf *tmp_mbuf = m;
+
+       while (tmp_mbuf) {
+               ip_setdstifaddr_info(tmp_mbuf, ifindex, ia);
+               tmp_mbuf = mbuf_nextpkt(tmp_mbuf);
+       }
+}
+
+/*
+ * First pass does all essential packet validation and places on a per flow
+ * queue for doing operations that have same outcome for all packets of a flow.
+ * div_info is packet divert/tee info
+ */
+static ipinput_chain_ret_t
+ip_input_first_pass(struct mbuf *m, u_int32_t *div_info,
+    struct ip_fw_in_args *args, int *ours, struct mbuf **modm)
+{
+       struct ip       *ip;
+       struct ifnet    *inifp;
+       unsigned int    hlen;
+       int             retval = IPINPUT_DOCHAIN;
+       int             len = 0;
+       struct in_addr  src_ip;
+#if IPFIREWALL
+       int             i;
+#endif
+#if IPFIREWALL || DUMMYNET
+       struct m_tag            *copy;
+       struct m_tag            *p;
+       boolean_t               delete = FALSE;
+       struct ip_fw_args       args1;
+       boolean_t               init = FALSE;
+#endif
+       ipfilter_t inject_filter_ref = NULL;
+
+#if !IPFIREWALL
+#pragma unused (args)
+#endif
+
+#if !IPDIVERT
+#pragma unused (div_info)
+#pragma unused (ours)
+#endif
+
+#if !IPFIREWALL_FORWARD
+#pragma unused (ours)
+#endif
+
+       /* Check if the mbuf is still valid after interface filter processing */
+       MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif);
+       inifp = mbuf_pkthdr_rcvif(m);
+       VERIFY(inifp != NULL);
+
+       /* Perform IP header alignment fixup, if needed */
+       IP_HDR_ALIGNMENT_FIXUP(m, inifp, goto bad);
+
+       m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
+
+#if IPFIREWALL || DUMMYNET
+
+       /*
+        * Don't bother searching for tag(s) if there's none.
+        */
+       if (SLIST_EMPTY(&m->m_pkthdr.tags))
+               goto ipfw_tags_done;
+
+       /* Grab info from mtags prepended to the chain */
+       p = m_tag_first(m);
+       while (p) {
+               if (p->m_tag_id == KERNEL_MODULE_TAG_ID) {
+#if DUMMYNET
+                       if (p->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET) {
+                               struct dn_pkt_tag *dn_tag;
+
+                               dn_tag = (struct dn_pkt_tag *)(p+1);
+                               args->fwai_ipfw_rule = dn_tag->dn_ipfw_rule;
+                               args->fwai_pf_rule = dn_tag->dn_pf_rule;
+                               delete = TRUE;
+                       }
+#endif
+
+#if IPDIVERT
+                       if (p->m_tag_type == KERNEL_TAG_TYPE_DIVERT) {
+                               struct divert_tag *div_tag;
+
+                               div_tag = (struct divert_tag *)(p+1);
+                               args->fwai_divert_rule = div_tag->cookie;
+                               delete = TRUE;
+                       }
+#endif
+
+                       if (p->m_tag_type == KERNEL_TAG_TYPE_IPFORWARD) {
+                               struct ip_fwd_tag *ipfwd_tag;
+
+                               ipfwd_tag = (struct ip_fwd_tag *)(p+1);
+                               args->fwai_next_hop = ipfwd_tag->next_hop;
+                               delete = TRUE;
+                       }
+
+                       if (delete) {
+                               copy = p;
+                               p = m_tag_next(m, p);
+                               m_tag_delete(m, copy);
+                       } else  {
+                               p = m_tag_next(m, p);
+                       }
+               } else {
+                       p = m_tag_next(m, p);
+               }
+       }
+
+#if DIAGNOSTIC
+       if (m == NULL || !(m->m_flags & M_PKTHDR))
+               panic("ip_input no HDR");
+#endif
+
+#if DUMMYNET
+       if (args->fwai_ipfw_rule || args->fwai_pf_rule) {
+               /* dummynet already filtered us */
+               ip = mtod(m, struct ip *);
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+               inject_filter_ref = ipf_get_inject_filter(m);
+#if IPFIREWALL
+               if (args->fwai_ipfw_rule)
+                       goto iphack;
+#endif /* IPFIREWALL */
+               if (args->fwai_pf_rule)
+                       goto check_with_pf;
+       }
+#endif /* DUMMYNET */
+ipfw_tags_done:
+#endif /* IPFIREWALL || DUMMYNET */
+
+       /*
+        * No need to process packet twice if we've already seen it.
+        */
+       if (!SLIST_EMPTY(&m->m_pkthdr.tags))
+               inject_filter_ref = ipf_get_inject_filter(m);
+       if (inject_filter_ref != NULL) {
+               ip = mtod(m, struct ip *);
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+
+               DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL,
+                   struct ip *, ip, struct ifnet *, inifp,
+                   struct ip *, ip, struct ip6_hdr *, NULL);
+
+               ip->ip_len = ntohs(ip->ip_len) - hlen;
+               ip->ip_off = ntohs(ip->ip_off);
+               ip_proto_dispatch_in(m, hlen, ip->ip_p, inject_filter_ref);
+               return (IPINPUT_DONE);
+       }
+
+       if (m->m_pkthdr.len < sizeof (struct ip)) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               OSAddAtomic(1, &ipstat.ips_tooshort);
+               m_freem(m);
+               return (IPINPUT_FREED);
+       }
+
+       if (m->m_len < sizeof (struct ip) &&
+           (m = m_pullup(m, sizeof (struct ip))) == NULL) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               OSAddAtomic(1, &ipstat.ips_toosmall);
+               return (IPINPUT_FREED);
+       }
+
+       ip = mtod(m, struct ip *);
+       *modm = m;
+
+       KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr,
+           ip->ip_p, ip->ip_off, ip->ip_len);
+
+       if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               OSAddAtomic(1, &ipstat.ips_badvers);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               m_freem(m);
+               return (IPINPUT_FREED);
+       }
+
+       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+       if (hlen < sizeof (struct ip)) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               OSAddAtomic(1, &ipstat.ips_badhlen);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               m_freem(m);
+               return (IPINPUT_FREED);
+       }
+
+       if (hlen > m->m_len) {
+               if ((m = m_pullup(m, hlen)) == NULL) {
+                       OSAddAtomic(1, &ipstat.ips_total);
+                       OSAddAtomic(1, &ipstat.ips_badhlen);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       return (IPINPUT_FREED);
+               }
+               ip = mtod(m, struct ip *);
+               *modm = m;
+       }
+
+       /* 127/8 must not appear on wire - RFC1122 */
+       if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+           (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+               /*
+                * Allow for the following exceptions:
+                *
+                *   1. If the packet was sent to loopback (i.e. rcvif
+                *      would have been set earlier at output time.)
+                *
+                *   2. If the packet was sent out on loopback from a local
+                *      source address which belongs to a non-loopback
+                *      interface (i.e. rcvif may not necessarily be a
+                *      loopback interface, hence the test for PKTF_LOOP.)
+                *      Unlike IPv6, there is no interface scope ID, and
+                *      therefore we don't care so much about PKTF_IFINFO.
+                */
+               if (!(inifp->if_flags & IFF_LOOPBACK) &&
+                    !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
+                       OSAddAtomic(1, &ipstat.ips_total);
+                       OSAddAtomic(1, &ipstat.ips_badaddr);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       m_freem(m);
+                       return (IPINPUT_FREED);
+               }
+       }
+
+       /* IPv4 Link-Local Addresses as defined in RFC3927 */
+       if ((IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
+           IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)))) {
+               ip_linklocal_stat.iplls_in_total++;
+               if (ip->ip_ttl != MAXTTL) {
+                       OSAddAtomic(1, &ip_linklocal_stat.iplls_in_badttl);
+                       /* Silently drop link local traffic with bad TTL */
+                       if (!ip_linklocal_in_allowbadttl) {
+                               OSAddAtomic(1, &ipstat.ips_total);
+                               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                               m_freem(m);
+                               return (IPINPUT_FREED);
+                       }
+               }
+       }
+
+       if (ip_cksum(m, hlen)) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               m_freem(m);
+               return (IPINPUT_FREED);
+       }
+
+       DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL,
+           struct ip *, ip, struct ifnet *, inifp,
+           struct ip *, ip, struct ip6_hdr *, NULL);
+
+       /*
+        * Convert fields to host representation.
+        */
+#if BYTE_ORDER != BIG_ENDIAN
+       NTOHS(ip->ip_len);
+#endif
+
+       if (ip->ip_len < hlen) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               OSAddAtomic(1, &ipstat.ips_badlen);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               m_freem(m);
+               return (IPINPUT_FREED);
+       }
+
+#if BYTE_ORDER != BIG_ENDIAN
+       NTOHS(ip->ip_off);
+#endif
+
+       /*
+        * Check that the amount of data in the buffers
+        * is as at least much as the IP header would have us expect.
+        * Trim mbufs if longer than we expect.
+        * Drop packet if shorter than we expect.
+        */
+       if (m->m_pkthdr.len < ip->ip_len) {
+               OSAddAtomic(1, &ipstat.ips_total);
+               OSAddAtomic(1, &ipstat.ips_tooshort);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               m_freem(m);
+               return (IPINPUT_FREED);
+       }
+
+       if (m->m_pkthdr.len > ip->ip_len) {
+               /*
+                * Invalidate hardware checksum info if ip_adj_clear_hwcksum
+                * is set; useful to handle buggy drivers.  Note that this
+                * should not be enabled by default, as we may get here due
+                * to link-layer padding.
+                */
+               if (ip_adj_clear_hwcksum &&
+                   (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
+                   !(inifp->if_flags & IFF_LOOPBACK) &&
+                   !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
+                       m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
+                       m->m_pkthdr.csum_data = 0;
+                       ipstat.ips_adj_hwcsum_clr++;
+               }
+
+               ipstat.ips_adj++;
+               if (m->m_len == m->m_pkthdr.len) {
+                       m->m_len = ip->ip_len;
+                       m->m_pkthdr.len = ip->ip_len;
+               } else
+                       m_adj(m, ip->ip_len - m->m_pkthdr.len);
+       }
+
+       /* for consistency */
+       m->m_pkthdr.pkt_proto = ip->ip_p;
+
+       /* for netstat route statistics */
+       src_ip = ip->ip_src;
+       len = m->m_pkthdr.len;
+
+#if DUMMYNET
+check_with_pf:
+#endif
+#if PF
+       /* Invoke inbound packet filter */
+       if (PF_IS_ENABLED) {
+               int error;
+               ip_input_cpout_args(args, &args1, &init);
+
+#if DUMMYNET
+               error = pf_af_hook(inifp, NULL, &m, AF_INET, TRUE, &args1);
+#else
+               error = pf_af_hook(inifp, NULL, &m, AF_INET, TRUE, NULL);
+#endif /* DUMMYNET */
+               if (error != 0 || m == NULL) {
+                       if (m != NULL) {
+                               panic("%s: unexpected packet %p\n",
+                                   __func__, m);
+                               /* NOTREACHED */
+                       }
+                       /* Already freed by callee */
+                       ip_input_update_nstat(inifp, src_ip, 1, len);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       OSAddAtomic(1, &ipstat.ips_total);
+                       return (IPINPUT_FREED);
+               }
+               ip = mtod(m, struct ip *);
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+               *modm = m;
+               ip_input_cpin_args(&args1, args);
+       }
+#endif /* PF */
+
+#if IPSEC
+       if (ipsec_bypass == 0 && ipsec_gethist(m, NULL)) {
+               retval = IPINPUT_DONTCHAIN; /* XXX scope for chaining here? */
+               goto pass;
+       }
+#endif
+
+#if IPFIREWALL
+#if DUMMYNET
+iphack:
+#endif /* DUMMYNET */
+       /*
+        * Check if we want to allow this packet to be processed.
+        * Consider it to be bad if not.
+        */
+       if (fw_enable && IPFW_LOADED) {
+#if IPFIREWALL_FORWARD
+               /*
+                * If we've been forwarded from the output side, then
+                * skip the firewall a second time
+                */
+               if (args->fwai_next_hop) {
+                       *ours = 1;
+                       return (IPINPUT_DONTCHAIN);
+               }
+#endif /* IPFIREWALL_FORWARD */
+               ip_input_cpout_args(args, &args1, &init);
+               args1.fwa_m = m;
+
+               i = ip_fw_chk_ptr(&args1);
+               m = args1.fwa_m;
+
+               if ((i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
+                       if (m)
+                               m_freem(m);
+                       ip_input_update_nstat(inifp, src_ip, 1, len);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       OSAddAtomic(1, &ipstat.ips_total);
+                       return (IPINPUT_FREED);
+               }
+               ip = mtod(m, struct ip *); /* just in case m changed */
+               *modm = m;
+               ip_input_cpin_args(&args1, args);
+
+               if (i == 0 && args->fwai_next_hop == NULL) { /* common case */
+                       goto pass;
+               }
+#if DUMMYNET
+               if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
+                       /* Send packet to the appropriate pipe */
+                       ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args1,
+                           DN_CLIENT_IPFW);
+                       ip_input_update_nstat(inifp, src_ip, 1, len);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       OSAddAtomic(1, &ipstat.ips_total);
+                       return (IPINPUT_FREED);
+               }
+#endif /* DUMMYNET */
+#if IPDIVERT
+               if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
+                       /* Divert or tee packet */
+                       *div_info = i;
+                       *ours = 1;
+                       return (IPINPUT_DONTCHAIN);
+               }
+#endif
+#if IPFIREWALL_FORWARD
+               if (i == 0 && args->fwai_next_hop != NULL) {
+                       retval = IPINPUT_DONTCHAIN;
+                       goto pass;
+               }
+#endif
+               /*
+                * if we get here, the packet must be dropped
+                */
+               ip_input_update_nstat(inifp, src_ip, 1, len);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               m_freem(m);
+               OSAddAtomic(1, &ipstat.ips_total);
+               return (IPINPUT_FREED);
+       }
+#endif /* IPFIREWALL */
+#if IPSEC | IPFIREWALL
+pass:
+#endif
+       /*
+        * Process options and, if not destined for us,
+        * ship it on.  ip_dooptions returns 1 when an
+        * error was detected (causing an icmp message
+        * to be sent and the original packet to be freed).
+        */
+       ip_nhops = 0;           /* for source routed packets */
+#if IPFIREWALL
+       if (hlen > sizeof (struct ip) &&
+           ip_dooptions(m, 0, args->fwai_next_hop)) {
+#else /* !IPFIREWALL */
+       if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL)) {
+#endif /* !IPFIREWALL */
+               ip_input_update_nstat(inifp, src_ip, 1, len);
+               KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+               OSAddAtomic(1, &ipstat.ips_total);
+               return (IPINPUT_FREED);
+       }
+
+       /*
+        * Don't chain fragmented packets as the process of determining
+        * if it is our fragment or someone else's plus the complexity of
+        * divert and fw args makes it harder to do chaining.
+        */
+       if (ip->ip_off & ~(IP_DF | IP_RF))
+               return (IPINPUT_DONTCHAIN);
+
+       /* Allow DHCP/BootP responses through */
+       if ((inifp->if_eflags & IFEF_AUTOCONFIGURING) &&
+           hlen == sizeof (struct ip) && ip->ip_p == IPPROTO_UDP) {
+               struct udpiphdr *ui;
+
+               if (m->m_len < sizeof (struct udpiphdr) &&
+                   (m = m_pullup(m, sizeof (struct udpiphdr))) == NULL) {
+                       OSAddAtomic(1, &udpstat.udps_hdrops);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       OSAddAtomic(1, &ipstat.ips_total);
+                       return (IPINPUT_FREED);
+               }
+               *modm = m;
+               ui = mtod(m, struct udpiphdr *);
+               if (ntohs(ui->ui_dport) == IPPORT_BOOTPC) {
+                       ip_setdstifaddr_info(m, inifp->if_index, NULL);
+                       return (IPINPUT_DONTCHAIN);
+               }
+       }
+
+       /* Avoid chaining raw sockets as ipsec checks occur later for them */
+       if (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR)
+               return (IPINPUT_DONTCHAIN);
+
+       return (retval);
+#if !defined(__i386__) && !defined(__x86_64__)
+bad:
+       m_freem(m);
+       return (IPINPUT_FREED);
+#endif
+}
+
+static void
+ip_input_second_pass(struct mbuf *m, struct ifnet *inifp, u_int32_t div_info,
+    int npkts_in_chain, int bytes_in_chain, struct ip_fw_in_args *args, int ours)
+{
+       unsigned int            checkif;
+       struct mbuf             *tmp_mbuf = NULL;
+       struct in_ifaddr        *ia = NULL;
+       struct in_addr          pkt_dst;
+       unsigned int            hlen;
+
+#if !IPFIREWALL
+#pragma unused (args)
+#endif
+
+#if !IPDIVERT
+#pragma unused (div_info)
+#endif
+
+       struct ip *ip = mtod(m, struct ip *);
+       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+
+       OSAddAtomic(npkts_in_chain, &ipstat.ips_total);
+
+       /*
+        * Naively assume we can attribute inbound data to the route we would
+        * use to send to this destination. Asymmetric routing breaks this
+        * assumption, but it still allows us to account for traffic from
+        * a remote node in the routing table.
+        * this has a very significant performance impact so we bypass
+        * if nstat_collect is disabled. We may also bypass if the
+        * protocol is tcp in the future because tcp will have a route that
+        * we can use to attribute the data to. That does mean we would not
+        * account for forwarded tcp traffic.
+        */
+       ip_input_update_nstat(inifp, ip->ip_src, npkts_in_chain,
+           bytes_in_chain);
+
+       if (ours)
+               goto ours;
+
+       /*
+        * Check our list of addresses, to see if the packet is for us.
+        * If we don't have any addresses, assume any unicast packet
+        * we receive might be for us (and let the upper layers deal
+        * with it).
+        */
+       tmp_mbuf = m;
+       if (TAILQ_EMPTY(&in_ifaddrhead)) {
+               while (tmp_mbuf) {
+                       if (!(tmp_mbuf->m_flags & (M_MCAST|M_BCAST))) {
+                               ip_setdstifaddr_info(tmp_mbuf, inifp->if_index,
+                                   NULL);
+                       }
+                       tmp_mbuf = mbuf_nextpkt(tmp_mbuf);
+               }
+               goto ours;
+       }
+       /*
+        * Cache the destination address of the packet; this may be
+        * changed by use of 'ipfw fwd'.
+        */
+#if IPFIREWALL
+       pkt_dst = args->fwai_next_hop == NULL ?
+           ip->ip_dst : args->fwai_next_hop->sin_addr;
+#else /* !IPFIREWALL */
+       pkt_dst = ip->ip_dst;
+#endif /* !IPFIREWALL */
+
+       /*
+        * Enable a consistency check between the destination address
+        * and the arrival interface for a unicast packet (the RFC 1122
+        * strong ES model) if IP forwarding is disabled and the packet
+        * is not locally generated and the packet is not subject to
+        * 'ipfw fwd'.
+        *
+        * XXX - Checking also should be disabled if the destination
+        * address is ipnat'ed to a different interface.
+        *
+        * XXX - Checking is incompatible with IP aliases added
+        * to the loopback interface instead of the interface where
+        * the packets are received.
+        */
+       checkif = ip_checkinterface && (ipforwarding == 0) &&
+           !(inifp->if_flags & IFF_LOOPBACK) &&
+           !(m->m_pkthdr.pkt_flags & PKTF_LOOP)
+#if IPFIREWALL
+           && (args->fwai_next_hop == NULL);
+#else /* !IPFIREWALL */
+               ;
+#endif /* !IPFIREWALL */
+
+       /*
+        * Check for exact addresses in the hash bucket.
+        */
+       lck_rw_lock_shared(in_ifaddr_rwlock);
+       TAILQ_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
+               /*
+                * If the address matches, verify that the packet
+                * arrived via the correct interface if checking is
+                * enabled.
+                */
+               if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr &&
+                   (!checkif || ia->ia_ifp == inifp)) {
+                       ip_input_setdst_chain(m, 0, ia);
+                       lck_rw_done(in_ifaddr_rwlock);
+                       goto ours;
+               }
+       }
+       lck_rw_done(in_ifaddr_rwlock);
+
+       /*
+        * Check for broadcast addresses.
+        *
+        * Only accept broadcast packets that arrive via the matching
+        * interface.  Reception of forwarded directed broadcasts would be
+        * handled via ip_forward() and ether_frameout() with the loopback
+        * into the stack for SIMPLEX interfaces handled by ether_frameout().
+        */
+       if (inifp->if_flags & IFF_BROADCAST) {
+               struct ifaddr *ifa;
+
+               ifnet_lock_shared(inifp);
+               TAILQ_FOREACH(ifa, &inifp->if_addrhead, ifa_link) {
+                       if (ifa->ifa_addr->sa_family != AF_INET) {
+                               continue;
+                       }
+                       ia = ifatoia(ifa);
+                       if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+                           pkt_dst.s_addr || ia->ia_netbroadcast.s_addr ==
+                           pkt_dst.s_addr) {
+                               ip_input_setdst_chain(m, 0, ia);
+                               ifnet_lock_done(inifp);
+                               goto ours;
+                       }
+               }
+               ifnet_lock_done(inifp);
+       }
+
+       if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+               struct in_multi *inm;
+               /*
+                * See if we belong to the destination multicast group on the
+                * arrival interface.
+                */
+               in_multihead_lock_shared();
+               IN_LOOKUP_MULTI(&ip->ip_dst, inifp, inm);
+               in_multihead_lock_done();
+               if (inm == NULL) {
+                       OSAddAtomic(npkts_in_chain, &ipstat.ips_notmember);
+                       m_freem_list(m);
+                       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+                       return;
+               }
+               ip_input_setdst_chain(m, inifp->if_index, NULL);
+               INM_REMREF(inm);
+               goto ours;
+       }
+
+       if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST ||
+           ip->ip_dst.s_addr == INADDR_ANY) {
+               ip_input_setdst_chain(m, inifp->if_index, NULL);
+               goto ours;
+       }
+
+       if (ip->ip_p == IPPROTO_UDP) {
+               struct udpiphdr *ui;
+               ui = mtod(m, struct udpiphdr *);
+               if (ntohs(ui->ui_dport) == IPPORT_BOOTPC) {
+                       goto ours;
+               }
+       }
+
+       tmp_mbuf = m;
+       struct mbuf *nxt_mbuf = NULL;
+       while (tmp_mbuf) {
+               nxt_mbuf = mbuf_nextpkt(tmp_mbuf);
+               /*
+                * Not for us; forward if possible and desirable.
+                */
+               mbuf_setnextpkt(tmp_mbuf, NULL);
+               if (ipforwarding == 0) {
+                       OSAddAtomic(1, &ipstat.ips_cantforward);
+                       m_freem(tmp_mbuf);
+               } else {
+#if IPFIREWALL
+                       ip_forward(tmp_mbuf, 0, args->fwai_next_hop);
+#else
+                       ip_forward(tmp_mbuf, 0, NULL);
+#endif
+               }
+               tmp_mbuf = nxt_mbuf;
+       }
+       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+       return;
+ours:
+       /*
+        * If offset or IP_MF are set, must reassemble.
+        */
+       if (ip->ip_off & ~(IP_DF | IP_RF)) {
+               VERIFY(npkts_in_chain == 1);
+               /*
+                * ip_reass() will return a different mbuf, and update
+                * the divert info in div_info and args->fwai_divert_rule.
+                */
+#if IPDIVERT
+               m = ip_reass(m, (u_int16_t *)&div_info, &args->fwai_divert_rule);
+#else
+               m = ip_reass(m);
+#endif
+               if (m == NULL)
+                       return;
+               ip = mtod(m, struct ip *);
+               /* Get the header length of the reassembled packet */
+               hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#if IPDIVERT
+               /* Restore original checksum before diverting packet */
+               if (div_info != 0) {
+                       VERIFY(npkts_in_chain == 1);
+#if BYTE_ORDER != BIG_ENDIAN
+                       HTONS(ip->ip_len);
+                       HTONS(ip->ip_off);
+#endif
+                       ip->ip_sum = 0;
+                       ip->ip_sum = ip_cksum_hdr_in(m, hlen);
+#if BYTE_ORDER != BIG_ENDIAN
+                       NTOHS(ip->ip_off);
+                       NTOHS(ip->ip_len);
+#endif
+               }
+#endif
+       }
+
+       /*
+        * Further protocols expect the packet length to be w/o the
+        * IP header.
+        */
+       ip->ip_len -= hlen;
+
+#if IPDIVERT
+       /*
+        * Divert or tee packet to the divert protocol if required.
+        *
+        * If div_info is zero then cookie should be too, so we shouldn't
+        * need to clear them here.  Assume divert_packet() does so also.
+        */
+       if (div_info != 0) {
+               struct mbuf *clone = NULL;
+               VERIFY(npkts_in_chain == 1);
+
+               /* Clone packet if we're doing a 'tee' */
+               if (div_info & IP_FW_PORT_TEE_FLAG)
+                       clone = m_dup(m, M_DONTWAIT);
+
+               /* Restore packet header fields to original values */
+               ip->ip_len += hlen;
+
+#if BYTE_ORDER != BIG_ENDIAN
+               HTONS(ip->ip_len);
+               HTONS(ip->ip_off);
+#endif
+               /* Deliver packet to divert input routine */
+               OSAddAtomic(1, &ipstat.ips_delivered);
+               divert_packet(m, 1, div_info & 0xffff, args->fwai_divert_rule);
+
+               /* If 'tee', continue with original packet */
+               if (clone == NULL) {
+                       return;
+               }
+               m = clone;
+               ip = mtod(m, struct ip *);
+       }
+#endif
+
+#if IPSEC
        /*
-        * Cycle through IP protocols and put them into the appropriate place
-        * in ip_protox[], skipping protocols IPPROTO_{IP,RAW}.
+        * enforce IPsec policy checking if we are seeing last header.
+        * note that we do not visit this with protocols with pcb layer
+        * code - like udp/tcp/raw ip.
         */
-       VERIFY(dp == inetdomain && dp->dom_family == PF_INET);
-       TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) {
-               VERIFY(pr->pr_domain == dp);
-               if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) {
-                       /* Be careful to only index valid IP protocols. */
-                       if (pr->pr_protocol < IPPROTO_MAX)
-                               ip_protox[pr->pr_protocol] = pr;
+       if (ipsec_bypass == 0 && (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR)) {
+               VERIFY(npkts_in_chain == 1);
+               if (ipsec4_in_reject(m, NULL)) {
+                       IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
+                       goto bad;
                }
        }
+#endif /* IPSEC */
 
-       /* IP fragment reassembly queue lock */
-       ipqlock_grp_attr  = lck_grp_attr_alloc_init();
-       ipqlock_grp = lck_grp_alloc_init("ipqlock", ipqlock_grp_attr);
-       ipqlock_attr = lck_attr_alloc_init();
-       lck_mtx_init(&ipqlock, ipqlock_grp, ipqlock_attr);
-
-       lck_mtx_lock(&ipqlock);
-       /* Initialize IP reassembly queue. */
-       for (i = 0; i < IPREASS_NHASH; i++)
-               TAILQ_INIT(&ipq[i]);
-
-       maxnipq = nmbclusters / 32;
-       maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */
-       ipq_updateparams();
-       lck_mtx_unlock(&ipqlock);
-
-       getmicrotime(&tv);
-       ip_id = RandomULong() ^ tv.tv_usec;
-       ip_initid();
+       /*
+        * Switch out to protocol's input routine.
+        */
+       OSAddAtomic(npkts_in_chain, &ipstat.ips_delivered);
 
-       ipf_init();
+#if IPFIREWALL
+       if (args->fwai_next_hop && ip->ip_p == IPPROTO_TCP) {
+               /* TCP needs IPFORWARD info if available */
+               struct m_tag *fwd_tag;
+               struct ip_fwd_tag *ipfwd_tag;
 
-#if IPSEC
-       sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init();
-       sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat",
-           sadb_stat_mutex_grp_attr);
-       sadb_stat_mutex_attr = lck_attr_alloc_init();
-       lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp,
-           sadb_stat_mutex_attr);
+               VERIFY(npkts_in_chain == 1);
+               fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
+                   KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag),
+                   M_NOWAIT, m);
+               if (fwd_tag == NULL)
+                       goto bad;
 
-#endif
-       arp_init();
-}
+               ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
+               ipfwd_tag->next_hop = args->fwai_next_hop;
 
-/*
- * Initialize IPv4 source address hash table.
- */
-static void
-in_ifaddrhashtbl_init(void)
-{
-       int i, k, p;
+               m_tag_prepend(m, fwd_tag);
 
-       if (in_ifaddrhashtbl != NULL)
-               return;
+               KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
+                   ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
 
-       PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash,
-           sizeof (inaddr_nhash));
-       if (inaddr_nhash == 0)
-               inaddr_nhash = INADDR_NHASH;
+               /* TCP deals with its own locking */
+               ip_proto_dispatch_in(m, hlen, ip->ip_p, 0);
+       } else {
+               KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
+                   ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
 
-       MALLOC(in_ifaddrhashtbl, struct in_ifaddrhashhead *,
-           inaddr_nhash * sizeof (*in_ifaddrhashtbl),
-           M_IFADDR, M_WAITOK | M_ZERO);
-       if (in_ifaddrhashtbl == NULL)
-               panic("in_ifaddrhashtbl_init allocation failed");
+               ip_input_dispatch_chain(m);
 
-       /*
-        * Generate the next largest prime greater than inaddr_nhash.
-        */
-       k = (inaddr_nhash % 2 == 0) ? inaddr_nhash + 1 : inaddr_nhash + 2;
-       for (;;) {
-               p = 1;
-               for (i = 3; i * i <= k; i += 2) {
-                       if (k % i == 0)
-                               p = 0;
-               }
-               if (p == 1)
-                       break;
-               k += 2;
        }
-       inaddr_hashp = k;
-}
+#else /* !IPFIREWALL */
+       ip_input_dispatch_chain(m);
 
-u_int32_t
-inaddr_hashval(u_int32_t key)
-{
-       /*
-        * The hash index is the computed prime times the key modulo
-        * the hash size, as documented in "Introduction to Algorithms"
-        * (Cormen, Leiserson, Rivest).
-        */
-       if (inaddr_nhash > 1)
-               return ((key * inaddr_hashp) % inaddr_nhash);
-       else
-               return (0);
+#endif /* !IPFIREWALL */
+       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+       return;
+bad:
+       KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0);
+       m_freem(m);
 }
 
 void
-ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto)
-{
-       ip_proto_dispatch_in(m, hlen, proto, 0);
-}
-
-__private_extern__ void
-ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto,
-    ipfilter_t inject_ipfref)
+ip_input_process_list(struct mbuf *packet_list)
 {
-       struct ipfilter *filter;
-       int seen = (inject_ipfref == NULL);
-       int     changed_header = 0;
-       struct ip *ip;
-       void (*pr_input)(struct mbuf *, int len);
-
-       if (!TAILQ_EMPTY(&ipv4_filters)) {
-               ipf_ref();
-               TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
-                       if (seen == 0) {
-                               if ((struct ipfilter *)inject_ipfref == filter)
-                                       seen = 1;
-                       } else if (filter->ipf_filter.ipf_input) {
-                               errno_t result;
-
-                               if (changed_header == 0) {
-                                       /*
-                                        * Perform IP header alignment fixup,
-                                        * if needed, before passing packet
-                                        * into filter(s).
-                                        */
-                                       IP_HDR_ALIGNMENT_FIXUP(m,
-                                           m->m_pkthdr.rcvif, ipf_unref());
-
-                                       /* ipf_unref() already called */
-                                       if (m == NULL)
-                                               return;
-
-                                       changed_header = 1;
-                                       ip = mtod(m, struct ip *);
-                                       ip->ip_len = htons(ip->ip_len + hlen);
-                                       ip->ip_off = htons(ip->ip_off);
-                                       ip->ip_sum = 0;
-                                       ip->ip_sum = ip_cksum_hdr_in(m, hlen);
-                               }
-                               result = filter->ipf_filter.ipf_input(
-                                   filter->ipf_filter.cookie, (mbuf_t *)&m,
-                                   hlen, proto);
-                               if (result == EJUSTRETURN) {
-                                       ipf_unref();
-                                       return;
-                               }
-                               if (result != 0) {
-                                       ipf_unref();
-                                       m_freem(m);
-                                       return;
-                               }
+       pktchain_elm_t  pktchain_tbl[PKTTBL_SZ];
+
+       struct mbuf     *packet = NULL;
+       struct mbuf     *modm = NULL; /* modified mbuf */
+       int             retval = 0;
+       u_int32_t       div_info = 0;
+       int             ours = 0;
+       struct timeval start_tv;
+       int     num_pkts = 0;
+       int chain = 0;
+       struct ip_fw_in_args       args;
+
+       if (ip_chaining == 0) {
+               struct mbuf *m = packet_list;
+               if (ip_input_measure)
+                       net_perf_start_time(&net_perf, &start_tv);
+               while (m) {
+                       packet_list = mbuf_nextpkt(m);
+                       mbuf_setnextpkt(m, NULL);
+                       ip_input(m);
+                       m = packet_list;
+                       num_pkts++;
+               }
+               if (ip_input_measure)
+                       net_perf_measure_time(&net_perf, &start_tv, num_pkts);
+               return;
+       }
+       if (ip_input_measure)
+               net_perf_start_time(&net_perf, &start_tv);
+
+       bzero(&pktchain_tbl, sizeof(pktchain_tbl));
+restart_list_process:
+       chain = 0;
+       for (packet = packet_list; packet; packet = packet_list) {
+               packet_list = mbuf_nextpkt(packet);
+               mbuf_setnextpkt(packet, NULL);
+
+               num_pkts++;
+               modm = NULL;
+               div_info = 0;
+               bzero(&args, sizeof (args));
+
+               retval = ip_input_first_pass(packet, &div_info, &args,
+                   &ours, &modm);
+
+               if (retval == IPINPUT_DOCHAIN) {
+                       if (modm)
+                               packet = modm;
+                       packet = ip_chain_insert(packet, &pktchain_tbl[0]);
+                       if (packet == NULL) {
+                               ipstat.ips_rxc_chained++;
+                               chain++;
+                               if (chain > ip_chainsz)
+                                       break;
+                       } else {
+                               ipstat.ips_rxc_collisions++;
+                               break;
                        }
+               } else if (retval == IPINPUT_DONTCHAIN) {
+                       /* in order to preserve order, exit from chaining */
+                       if (modm)
+                               packet = modm;
+                       ipstat.ips_rxc_notchain++;
+                       break;
+               } else {
+                       /* packet was freed or delivered, do nothing. */
                }
-               ipf_unref();
        }
 
-       /* Perform IP header alignment fixup (post-filters), if needed */
-       IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return);
+       /* do second pass here for pktchain_tbl */
+       if (chain)
+               ip_input_second_pass_loop_tbl(&pktchain_tbl[0], &args);
 
-       /*
-        * If there isn't a specific lock for the protocol
-        * we're about to call, use the generic lock for AF_INET.
-        * otherwise let the protocol deal with its own locking
-        */
-       ip = mtod(m, struct ip *);
+       if (packet) {
+               /*
+                * equivalent update in chaining case if performed in
+                * ip_input_second_pass_loop_tbl().
+                */
+               if (ip_input_measure)
+                       net_perf_histogram(&net_perf, 1);
 
-       if (changed_header) {
-               ip->ip_len = ntohs(ip->ip_len) - hlen;
-               ip->ip_off = ntohs(ip->ip_off);
+               ip_input_second_pass(packet, packet->m_pkthdr.rcvif, div_info,
+                   1, packet->m_pkthdr.len, &args, ours);
        }
 
-       if ((pr_input = ip_protox[ip->ip_p]->pr_input) == NULL) {
-               m_freem(m);
-       } else if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) {
-               lck_mtx_lock(inet_domain_mutex);
-               pr_input(m, hlen);
-               lck_mtx_unlock(inet_domain_mutex);
-       } else {
-               pr_input(m, hlen);
-       }
-}
+       if (packet_list)
+               goto restart_list_process;
 
+       if (ip_input_measure)
+               net_perf_measure_time(&net_perf, &start_tv, num_pkts);
+}
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
@@ -664,6 +1767,8 @@ ip_input(struct mbuf *m)
        inifp = m->m_pkthdr.rcvif;
        VERIFY(inifp != NULL);
 
+       ipstat.ips_rxc_notlist++;
+
        /* Perform IP header alignment fixup, if needed */
        IP_HDR_ALIGNMENT_FIXUP(m, inifp, goto bad);
 
@@ -833,7 +1938,7 @@ ipfw_tags_done:
 
        /*
         * Naively assume we can attribute inbound data to the route we would
-        * use to send to this destination. Asymetric routing breaks this
+        * use to send to this destination. Asymmetric routing breaks this
         * assumption, but it still allows us to account for traffic from
         * a remote node in the routing table.
         * this has a very significant performance impact so we bypass
@@ -3249,3 +4354,58 @@ ip_gre_register_input(gre_input_func_t fn)
 
        return (0);
 }
+
+static int
+sysctl_reset_ip_input_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error, i;
+
+       i = ip_input_measure;
+       error = sysctl_handle_int(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* impose bounds */
+       if (i < 0 || i > 1) {
+               error = EINVAL;
+               goto done;
+       }
+       if (ip_input_measure != i && i == 1) {
+               net_perf_initialize(&net_perf, ip_input_measure_bins);
+       }
+       ip_input_measure = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip_input_measure_bins SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       uint64_t i;
+
+       i = ip_input_measure_bins;
+       error = sysctl_handle_quad(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* validate data */
+       if (!net_perf_validate_bins(i)) {
+               error = EINVAL;
+               goto done;
+       }
+       ip_input_measure_bins = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip_input_getperf SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       if (req->oldptr == USER_ADDR_NULL)
+               req->oldlen = (size_t)sizeof (struct ipstat);
+
+       return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
+}
+
index f59d299a91141ee7a9093b58eada14ed7a40f239..383751d4d07d559537beb48acd059ab80a1b5e6a 100644 (file)
@@ -95,6 +95,7 @@
 #include <net/ntstat.h>
 #include <net/net_osdep.h>
 #include <net/dlil.h>
+#include <net/net_perf.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 
 u_short ip_id;
 
+static int sysctl_reset_ip_output_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip_output_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS;
 static void ip_out_cksum_stats(int, u_int32_t);
 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
 static int ip_optcopy(struct ip *, struct ip *);
@@ -184,6 +188,24 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug,
        CTLFLAG_RW | CTLFLAG_LOCKED, &ip_select_srcif_debug, 0,
        "log source interface selection debug info");
 
+static int ip_output_measure = 0;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf,
+       CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip_output_measure, 0, sysctl_reset_ip_output_stats, "I",
+       "Do time measurement");
+
+static uint64_t ip_output_measure_bins = 0;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_bins,
+       CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip_output_measure_bins, 0,
+       sysctl_ip_output_measure_bins, "I",
+       "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, output_perf_data,
+       CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+       0, 0, sysctl_ip_output_getperf, "S,net_perf",
+       "IP output performance data (struct net_perf, net/net_perf.h)");
+
 #define        IMO_TRACE_HIST_SIZE     32      /* size of trace history */
 
 /* For gdb */
@@ -259,8 +281,10 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
        ipfilter_t inject_filter_ref = NULL;
        struct mbuf *packetlist;
        uint32_t sw_csum, pktcnt = 0, scnt = 0, bytecnt = 0;
+       uint32_t packets_processed = 0;
        unsigned int ifscope = IFSCOPE_NONE;
        struct flowadv *adv = NULL;
+       struct timeval start_tv;
 #if IPSEC
        struct socket *so = NULL;
        struct secpolicy *sp = NULL;
@@ -326,6 +350,8 @@ ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
         ((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) ||          \
         (!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp)))
 
+       if (ip_output_measure)
+               net_perf_start_time(&net_perf, &start_tv);
        KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
 
        VERIFY(m0->m_flags & M_PKTHDR);
@@ -495,6 +521,7 @@ ipfw_tags_done:
 #endif /* DUMMYNET */
 
 loopit:
+       packets_processed++;
        ipobf.isbroadcast = FALSE;
        ipobf.didfilter = FALSE;
 #if IPFIREWALL_FORWARD
@@ -1172,6 +1199,11 @@ sendit:
                necp_mark_packet_from_ip(m, necp_matched_policy_id);
                switch (necp_result) {
                        case NECP_KERNEL_POLICY_RESULT_PASS:
+                               /* Check if the interface is allowed */
+                               if (!necp_packet_is_allowed_over_interface(m, ifp)) {
+                                       error = EHOSTUNREACH;
+                                       goto bad;
+                               }
                                goto skip_ipsec;
                        case NECP_KERNEL_POLICY_RESULT_DROP:
                        case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
@@ -1182,9 +1214,20 @@ sendit:
                                /* Verify that the packet is being routed to the tunnel */
                                struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter);
                                if (policy_ifp == ifp) {
+                                       /* Check if the interface is allowed */
+                                       if (!necp_packet_is_allowed_over_interface(m, ifp)) {
+                                               error = EHOSTUNREACH;
+                                               goto bad;
+                                       }
                                        goto skip_ipsec;
                                } else {
                                        if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, &necp_route, AF_INET)) {
+                                               /* Check if the interface is allowed */
+                                               if (!necp_packet_is_allowed_over_interface(m, policy_ifp)) {
+                                                       error = EHOSTUNREACH;
+                                                       goto bad;
+                                               }
+
                                                /* Set ifp to the tunnel interface, since it is compatible with the packet */
                                                ifp = policy_ifp;
                                                ro = &necp_route;
@@ -1200,8 +1243,13 @@ sendit:
                                break;
                }
        }
+       /* Catch-all to check if the interface is allowed */
+       if (!necp_packet_is_allowed_over_interface(m, ifp)) {
+               error = EHOSTUNREACH;
+               goto bad;
+       }
 #endif /* NECP */
-       
+
 #if IPSEC
        if (ipsec_bypass != 0 || (flags & IP_NOIPSEC))
                goto skip_ipsec;
@@ -1896,6 +1944,10 @@ done:
 #endif /* IPFIREWALL_FORWARD */
 
        KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0);
+       if (ip_output_measure) {
+               net_perf_measure_time(&net_perf, &start_tv, packets_processed);
+               net_perf_histogram(&net_perf, packets_processed);
+       }
        return (error);
 bad:
        if (pktcnt > 0)
@@ -3466,3 +3518,58 @@ ip_gre_output(struct mbuf *m)
 
        return (error);
 }
+
+static int
+sysctl_reset_ip_output_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error, i;
+
+       i = ip_output_measure;
+       error = sysctl_handle_int(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* impose bounds */
+       if (i < 0 || i > 1) {
+               error = EINVAL;
+               goto done;
+       }
+       if (ip_output_measure != i && i == 1) {
+               net_perf_initialize(&net_perf, ip_output_measure_bins);
+       }
+       ip_output_measure = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip_output_measure_bins SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       uint64_t i;
+
+       i = ip_output_measure_bins;
+       error = sysctl_handle_quad(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* validate data */
+       if (!net_perf_validate_bins(i)) {
+               error = EINVAL;
+               goto done;
+       }
+       ip_output_measure_bins = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip_output_getperf SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       if (req->oldptr == USER_ADDR_NULL)
+               req->oldlen = (size_t)sizeof (struct ipstat);
+
+       return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
+}
+
index 9440d9ad383537d7543a7638c24d33033a6106c3..99982111d18a2d82a2ed81670211630a53b0c6ed 100644 (file)
@@ -221,6 +221,13 @@ struct     ipstat {
        u_int32_t ips_snd_swcsum_bytes; /* ip hdr swcksum (outbound), bytes */
        u_int32_t ips_adj;              /* total packets trimmed/adjusted */
        u_int32_t ips_adj_hwcsum_clr;   /* hwcksum discarded during adj */
+       u_int32_t ips_rxc_collisions;   /* rx chaining collisions */
+       u_int32_t ips_rxc_chained;      /* rx chains */
+       u_int32_t ips_rxc_notchain;     /* rx bypassed chaining */
+       u_int32_t ips_rxc_chainsz_gt2;  /* rx chain size greater than 2 */
+       u_int32_t ips_rxc_chainsz_gt4;  /* rx chain size greater than 4 */
+       u_int32_t ips_rxc_notlist;      /* count of pkts through ip_input */
+
 };
 
 struct ip_linklocal_stat {
index 9dfc68ed3ac6dc2503b5467297c64503751ecfb7..304c4c05ca15986d13de9c652c9ef59b3cc45e13 100644 (file)
@@ -198,21 +198,23 @@ mp_pcbinfo_detach(struct mppcbinfo *mppi)
 int
 mp_pcballoc(struct socket *so, struct mppcbinfo *mppi)
 {
-       struct mppcb *mpp;
+       struct mppcb *mpp = NULL;
 
        VERIFY(sotomppcb(so) == NULL);
 
        lck_mtx_lock(&mppi->mppi_lock);
        if (mppi->mppi_count >= mptcp_socket_limit) {
                lck_mtx_unlock(&mppi->mppi_lock);
-               mptcplog((LOG_ERR, "Reached MPTCP socket limit."));
+               mptcplog((LOG_ERR, "MPTCP Socket: Reached MPTCP socket limit."),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                return (ENOBUFS);
        }
        lck_mtx_unlock(&mppi->mppi_lock);
 
        mpp = zalloc(mppi->mppi_zone);
-       if (mpp == NULL)
+       if (mpp == NULL) {
                return (ENOBUFS);
+       }
 
        bzero(mpp, mppi->mppi_size);
        lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr);
@@ -221,6 +223,12 @@ mp_pcballoc(struct socket *so, struct mppcbinfo *mppi)
        mpp->mpp_socket = so;
        so->so_pcb = mpp;
 
+       if (NULL == mppi->mppi_pcbe_create(so, mpp)) {
+               lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp);
+               zfree(mppi->mppi_zone, mpp);
+               return (ENOBUFS);
+       }
+
        lck_mtx_lock(&mppi->mppi_lock);
        mpp->mpp_flags |= MPP_ATTACHED;
        TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry);
index 3c317a6926eef5aff1f65dcc9cdd280eecc22aad..eba202b85a4b32494251a88461c7649d763d61ca 100644 (file)
@@ -74,6 +74,8 @@ struct mppcbinfo {
        decl_lck_mtx_data(, mppi_lock);         /* global PCB lock */
        uint32_t (*mppi_gc)(struct mppcbinfo *); /* garbage collector func */
        uint32_t (*mppi_timer)(struct mppcbinfo *); /* timer func */
+       /* Extended pcb create func */
+       void *(*mppi_pcbe_create) (struct socket *mp_so, struct mppcb *mpp);
 };
 
 __BEGIN_DECLS
index 1945ecfcfeb9e6660277a4ef25e7dd752a02c619..d218931be567b8f8d4a0f89c21db93404325690d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -61,10 +61,6 @@ int mptcp_enable = 1;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
        &mptcp_enable, 0, "Enable Multipath TCP Support");
 
-int mptcp_dbg = 0;
-SYSCTL_INT(_net_inet_mptcp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_dbg, 0, "Enable Multipath TCP Debugging");
-
 /* Number of times to try negotiating MPTCP on SYN retransmissions */
 int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
@@ -130,6 +126,61 @@ int mptcp_rwnotify = 0;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rwnotify, CTLFLAG_RW | CTLFLAG_LOCKED,
        &mptcp_rwnotify, 0, "Enable RW notify on resume");
 
+/*
+ * Using RTT history for sending new data
+ */
+int mptcp_use_rtthist = 1;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_use_rtthist, 0, "Disable RTT History");
+
+#define MPTCP_RTTHIST_MINTHRESH 500
+int mptcp_rtthist_rtthresh = 600;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
+
+/*
+ * Use RTO history for sending new data
+ */
+int mptcp_use_rto = 1;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_use_rto, 0, "Disable RTO for subflow selection");
+
+#define MPTCP_RTO_MINTHRESH 1000
+int mptcp_rtothresh = 1500;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_rtothresh, 0, "RTO threshold");
+
+/*
+ * Use server's chosen path for sending new data
+ */
+int mptcp_peerswitch = 1;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, use_peer, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_peerswitch, 0, "Use peer");
+
+#define MPTCP_PEERSWITCH_CNTMIN 3
+uint32_t mptcp_peerswitch_cnt = 3;
+SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, peerswitchno, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_peerswitch_cnt, 0, "Set threshold based on peer's data arrival");
+
+/*
+ * Probe the preferred path, when it is not in use
+ */
+#define MPTCP_PROBETO_MIN 500
+uint32_t mptcp_probeto = 1000;
+SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_probeto, 0, "Disable probing by setting to 0");
+
+#define MPTCP_PROBE_MX 15
+uint32_t mptcp_probecnt = 5;
+SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_probecnt, 0, "Number of probe writes");
+
+/*
+ * Static declarations
+ */
+static int mptcp_validate_csum(struct tcpcb *, struct mbuf *, int);
+static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int);
+
 /*
  * MPTCP input, called when data has been read from a subflow socket.
  */
@@ -184,7 +235,8 @@ fallback:
                    struct sockbuf *, &mp_so->so_snd,
                    struct mptses *, mpte);
                count = mp_so->so_rcv.sb_cc - count;
-               mptcplog3((LOG_DEBUG, "%s: fread %d bytes\n", __func__, count));
+               mptcplog((LOG_DEBUG, "MPTCP Receiver: Fallback read %d bytes\n",
+                   count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
                return;
        }
 
@@ -255,8 +307,9 @@ fallback:
                        } else {
                                m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn));
                        }
-                       mptcplog((LOG_INFO, "%s: %llu %d 2 \n", __func__,
-                           mp_tp->mpt_rcvatmark, m->m_pkthdr.len));
+                       mptcplog((LOG_INFO, "MPTCP Receiver: Left Edge %llu\n",
+                           mp_tp->mpt_rcvatmark),
+                           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
                }
 
                MPT_UNLOCK(mp_tp);
@@ -272,7 +325,9 @@ fallback:
                count = mp_so->so_rcv.sb_cc - count;
                tcpstat.tcps_mp_rcvtotal++;
                tcpstat.tcps_mp_rcvbytes += count;
-               mptcplog3((LOG_DEBUG, "%s: read %d bytes\n", __func__, count));
+               mptcplog((LOG_DEBUG, "MPTCP Receiver: Read %d bytes\n", count),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
+
                /*
                 * The data received at the MPTCP layer will never exceed the
                 * receive window because anything to the right of the
@@ -299,25 +354,30 @@ mptcp_output(struct mptses *mpte)
        struct mptsub *mpts;
        struct mptsub *mpts_tried = NULL;
        struct socket *mp_so;
+       struct mptsub *preferred_mpts = NULL;
        int error = 0;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        mp_so = mpte->mpte_mppcb->mpp_socket;
        if (mp_so->so_state & SS_CANTSENDMORE) {
+               mptcplog((LOG_DEBUG, "MPTCP Sender: cantsendmore\n"),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                return (EPIPE);
        }
 
 try_again:
        /* get the "best" subflow to be used for transmission */
-       mpts = mptcp_get_subflow(mpte, NULL);
+       mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts);
        if (mpts == NULL) {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx has no usable subflow\n",
-                   __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
+               mptcplog((LOG_ERR, "MPTCP Sender: mp_so 0x%llx no subflow\n",
+                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
                goto out;
        }
 
-       mptcplog3((LOG_INFO, "%s: mp_so 0x%llx cid %d \n", __func__,
-           (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid));
+       mptcplog((LOG_DEBUG, "MPTCP Sender: mp_so 0x%llx using cid %d \n",
+           (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid),
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
 
        /* In case there's just one flow, we reattempt later */
        MPTS_LOCK(mpts);
@@ -328,11 +388,10 @@ try_again:
                mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
                mpts_tried->mpts_flags |= MPTSF_ACTIVE;
                MPTS_UNLOCK(mpts_tried);
-               MPT_LOCK(mpte->mpte_mptcb);
-               mptcp_start_timer(mpte->mpte_mptcb, MPTT_REXMT);
-               MPT_UNLOCK(mpte->mpte_mptcb);
-               mptcplog((LOG_INFO, "%s: mp_so 0x%llx retry later\n",
-                   __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
+               mptcp_start_timer(mpte, MPTT_REXMT);
+               mptcplog((LOG_DEBUG, "MPTCP Sender: mp_so 0x%llx retry later\n",
+                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                goto out;
        }
 
@@ -345,19 +404,53 @@ try_again:
                mpts->mpts_flags &= ~MPTSF_ACTIVE;
                mpts_tried = mpts;
                MPTS_UNLOCK(mpts);
-               mptcplog((LOG_INFO, "%s: error = %d \n", __func__, error));
+               mptcplog((LOG_INFO, "MPTCP Sender: Error = %d \n", error),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
                goto try_again;
        }
        /* The model is to have only one active flow at a time */
        mpts->mpts_flags |= MPTSF_ACTIVE;
+       mpts->mpts_probesoon = mpts->mpts_probecnt = 0;
        MPTS_UNLOCK(mpts);
+
+       /* Allows us to update the smoothed rtt */
+       if ((mptcp_probeto) && (mptcp_probeto >= MPTCP_PROBETO_MIN) &&
+           (mpts != preferred_mpts) && (preferred_mpts != NULL)) {
+               MPTS_LOCK(preferred_mpts);
+               if (preferred_mpts->mpts_probesoon) {
+                       if ((tcp_now - preferred_mpts->mpts_probesoon) >
+                           mptcp_probeto) {
+                               (void) mptcp_subflow_output(mpte, preferred_mpts);
+                               if (preferred_mpts->mpts_probecnt >=
+                                   MIN(mptcp_probecnt, MPTCP_PROBE_MX)) {
+                                       preferred_mpts->mpts_probesoon = 0;
+                                       preferred_mpts->mpts_probecnt = 0;
+                               }
+                       }
+               } else {
+                       preferred_mpts->mpts_probesoon = tcp_now;
+                       preferred_mpts->mpts_probecnt = 0;
+               }
+               MPTS_UNLOCK(preferred_mpts);
+       }
+
        if (mpte->mpte_active_sub == NULL) {
                mpte->mpte_active_sub = mpts;
        } else if (mpte->mpte_active_sub != mpts) {
+               mptcplog((LOG_DEBUG, "MPTCP Sender: switch [cid %d, srtt %d]"
+                   "to [cid %d, srtt %d]\n",
+                   mpte->mpte_active_sub->mpts_connid,
+                   mpte->mpte_active_sub->mpts_srtt >> 5,
+                   mpts->mpts_connid,
+                   mpts->mpts_srtt >> 5),
+                   MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
+
                MPTS_LOCK(mpte->mpte_active_sub);
                mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
+               mpts->mpts_peerswitch = 0;
                MPTS_UNLOCK(mpte->mpte_active_sub);
                mpte->mpte_active_sub = mpts;
+               tcpstat.tcps_mp_switches++;
        }
 out:
        /* subflow errors should not be percolated back up */
@@ -367,13 +460,17 @@ out:
 /*
  * Return the most eligible subflow to be used for sending data.
  * This function also serves to check if any alternate subflow is available
- * or not.
+ * or not. best and second_best flows are chosen by their priority. third_best
+ * could be best or second_best but is under loss at the time of evaluation.
  */
 struct mptsub *
-mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore)
+mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred)
 {
        struct mptsub *mpts;
-       struct mptsub *fallback = NULL;
+       struct mptsub *best = NULL;
+       struct mptsub *second_best = NULL;
+       struct mptsub *third_best = NULL;
+       struct mptsub *symptoms_best = NULL;
        struct socket *so = NULL;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
@@ -389,6 +486,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore)
                /* There can only be one subflow in degraded state */
                if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
                        MPTS_UNLOCK(mpts);
+                       best = mpts;
                        break;
                }
 
@@ -421,10 +519,14 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore)
                                    (mptcp_no_rto_spike(so))) {
                                        mpts->mpts_flags &= ~MPTSF_FAILINGOVER;
                                        so->so_flags &= ~SOF_MP_TRYFAILOVER;
-                                       fallback = mpts;
                                        socket_unlock(so, 1);
                                } else {
-                                       fallback = mpts;
+                                       third_best = mpts;
+                                       mptcplog((LOG_DEBUG, "MPTCP Sender: "
+                                           "%s cid %d in failover\n",
+                                           __func__, third_best->mpts_connid),
+                                           MPTCP_SENDER_DBG,
+                                           MPTCP_LOGLVL_VERBOSE);
                                        socket_unlock(so, 1);
                                        MPTS_UNLOCK(mpts);
                                        continue;
@@ -435,25 +537,82 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore)
                        }
                }
 
+               /* When there are no preferred flows, use first one in list */
+               if ((!second_best) && !(mpts->mpts_flags & MPTSF_PREFERRED))
+                       second_best = mpts;
+
                if (mpts->mpts_flags & MPTSF_PREFERRED) {
-                       MPTS_UNLOCK(mpts);
-                       break;
+                       best = mpts;
                }
 
-               /* When there are no preferred flows, use first one in list */
-               fallback = mpts;
-
                MPTS_UNLOCK(mpts);
        }
+
        /*
         * If there is no preferred or backup subflow, and there is no active
         * subflow use the last usable subflow.
         */
-       if (mpts == NULL) {
-               return (fallback);
+       if (best == NULL) {
+               return (second_best ? second_best : third_best);
        }
 
-       return (mpts);
+       if (second_best == NULL) {
+               return (best ? best : third_best);
+       }
+
+       if (preferred != NULL)
+               *preferred = best;
+
+       /* Use a hint from symptomsd if it exists */
+       symptoms_best = mptcp_use_symptoms_hints(best, second_best);
+       if (symptoms_best != NULL)
+               return (symptoms_best);
+
+       /* Compare RTTs, select second_best if best's rtt exceeds rttthresh */
+       if ((mptcp_use_rtthist) &&
+           (best->mpts_srtt) && (second_best->mpts_srtt) &&
+           (best->mpts_srtt > second_best->mpts_srtt) &&
+           (best->mpts_srtt >= MAX((MPTCP_RTTHIST_MINTHRESH << 5),
+           (mptcp_rtthist_rtthresh << 5)))) {
+               tcpstat.tcps_mp_sel_rtt++;
+               mptcplog((LOG_DEBUG, "MPTCP Sender: %s best cid %d"
+                   " at rtt %d,  second cid %d at rtt %d\n", __func__,
+                   best->mpts_connid, best->mpts_srtt >> 5,
+                   second_best->mpts_connid,
+                   second_best->mpts_srtt >> 5),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+               return (second_best);
+       }
+
+       /* Compare RTOs, select second_best if best's rto exceeds rtothresh */
+       if ((mptcp_use_rto) &&
+           (best->mpts_rxtcur) && (second_best->mpts_rxtcur) &&
+           (best->mpts_rxtcur > second_best->mpts_rxtcur) &&
+           (best->mpts_rxtcur >=
+           MAX(MPTCP_RTO_MINTHRESH, mptcp_rtothresh))) {
+               tcpstat.tcps_mp_sel_rto++;
+               mptcplog((LOG_DEBUG, "MPTCP Sender: %s best cid %d"
+                   " at rto %d, second cid %d at rto %d\n", __func__,
+                   best->mpts_connid, best->mpts_rxtcur,
+                   second_best->mpts_connid, second_best->mpts_rxtcur),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+
+               return (second_best);
+       }
+
+       /* If second_best received data, use second_best */
+       if (mptcp_peerswitch &&
+           (second_best->mpts_peerswitch >
+           MAX(MPTCP_PEERSWITCH_CNTMIN, mptcp_peerswitch_cnt))) {
+               tcpstat.tcps_mp_sel_peer++;
+               mptcplog((LOG_DEBUG, "MPTCP Sender: %s: best cid %d"
+                   " but using cid %d after receiving %d segments\n",
+                   __func__, best->mpts_connid, second_best->mpts_connid,
+                   second_best->mpts_peerswitch), MPTCP_SENDER_DBG,
+                   MPTCP_LOGLVL_LOG);
+               return (second_best);
+       }
+       return (best);
 }
 
 struct mptsub *
@@ -481,10 +640,71 @@ mptcp_get_pending_subflow(struct mptses *mpte, struct mptsub *ignore)
        return (mpts);
 }
 
+static const char *
+mptcp_event_to_str(uint32_t event)
+{
+       const char *c = "UNDEFINED";
+       switch (event) {
+       case MPCE_CLOSE:
+               c = "MPCE_CLOSE";
+               break;
+       case MPCE_RECV_DATA_ACK:
+               c = "MPCE_RECV_DATA_ACK";
+               break;
+       case MPCE_RECV_DATA_FIN:
+               c = "MPCE_RECV_DATA_FIN";
+               break;
+       }
+       return (c);
+}
+
+static const char *
+mptcp_state_to_str(mptcp_state_t state)
+{
+        const char *c = "UNDEFINED";
+       switch (state) {
+       case MPTCPS_CLOSED:
+               c = "MPTCPS_CLOSED";
+               break;
+       case MPTCPS_LISTEN:
+               c = "MPTCPS_LISTEN";
+               break;
+       case MPTCPS_ESTABLISHED:
+               c = "MPTCPS_ESTABLISHED";
+               break;
+       case MPTCPS_CLOSE_WAIT:
+               c = "MPTCPS_CLOSE_WAIT";
+               break;
+       case MPTCPS_FIN_WAIT_1:
+               c = "MPTCPS_FIN_WAIT_1";
+               break;
+       case MPTCPS_CLOSING:
+               c = "MPTCPS_CLOSING";
+               break;
+       case MPTCPS_LAST_ACK:
+               c = "MPTCPS_LAST_ACK";
+               break;
+       case MPTCPS_FIN_WAIT_2:
+               c = "MPTCPS_FIN_WAIT_2";
+               break;
+       case MPTCPS_TIME_WAIT:
+               c = "MPTCPS_TIME_WAIT";
+               break;
+       case MPTCPS_FASTCLOSE_WAIT:
+               c = "MPTCPS_FASTCLOSE_WAIT";
+               break;
+       case MPTCPS_TERMINATE:
+               c = "MPTCPS_TERMINATE";
+               break;
+       }
+       return (c);
+}
+
 void
 mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
 {
        MPT_LOCK_ASSERT_HELD(mp_tp);
+       mptcp_state_t old_state = mp_tp->mpt_state;
 
        DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, 
            uint32_t, event);
@@ -556,8 +776,11 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
        }
        DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, 
            uint32_t, event);
-       mptcplog((LOG_INFO, "%s: state = %d\n",
-           __func__, mp_tp->mpt_state));
+       mptcplog((LOG_INFO, "MPTCP State: %s to %s on event %s\n",
+           mptcp_state_to_str(old_state),
+           mptcp_state_to_str(mp_tp->mpt_state),
+           mptcp_event_to_str(event)),
+           MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG);
 }
 
 /*
@@ -619,25 +842,28 @@ mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp,
     uint16_t csum)
 {
        if (mdss_data_len == 0) {
-               mptcplog((LOG_INFO, "%s: Received infinite mapping.",
-                   __func__));
+               mptcplog((LOG_INFO, "MPTCP Receiver: Infinite Mapping.\n"),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+
                if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
-                       mptcplog((LOG_ERR, "%s: Bad checksum value %x \n",
-                           __func__, csum));
+                       mptcplog((LOG_ERR, "MPTCP Receiver: Bad checksum %x \n",
+                           csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
                }
                mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
                return;
        }
        MPT_LOCK(mp_tp);
-       if (mptcp_dbg >= MP_VERBOSE_DEBUG_1)
-               printf("%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n",
-                   __func__, seqn, mdss_data_len, full_dsn,
-                   mp_tp->mpt_rcvnxt);
+               mptcplog((LOG_DEBUG,
+                   "MPTCP Receiver: seqn = %x len = %x full = %llx "
+                   "rcvnxt = %llu \n",
+                   seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
 
        /* Process a Data FIN packet , handled in mptcp_do_fin_opt */
        if ((seqn == 0) && (mdss_data_len == 1)) {
-               mptcplog((LOG_INFO, "%s: Data FIN DSS opt state = %d \n",
-                   __func__, mp_tp->mpt_state));
+               mptcplog((LOG_INFO, "MPTCP Receiver: Data FIN in %s state \n",
+                   mptcp_state_to_str(mp_tp->mpt_state)),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
                MPT_UNLOCK(mp_tp);
                return;
        }
@@ -687,6 +913,52 @@ mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info,
            csum);
 }
 
+static int
+mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
+    int hdrlen)
+{
+       u_int32_t sseq, datalen;
+
+       if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+               return 0;
+
+       sseq = m->m_pkthdr.mp_rseq + tp->irs;
+       datalen = m->m_pkthdr.mp_rlen;
+
+#if 0
+       /* enable this to test TCP fallback post connection establishment */
+       if (SEQ_GT(sseq, (tp->irs+1)))
+               datalen = m->m_pkthdr.len - hdrlen - 1;
+#endif
+
+       /* unacceptable DSS option, fallback to TCP */
+       if (m->m_pkthdr.len > ((int) datalen + hdrlen)) {
+               mptcplog((LOG_ERR, "MPTCP Receiver: "
+                   "%s: mbuf len %d, MPTCP expected %d",
+                   __func__, m->m_pkthdr.len, datalen),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+       } else {
+               return 0;
+       }
+       tp->t_mpflags |= TMPF_SND_MPFAIL;
+       mptcp_notify_mpfail(so);
+       m_freem(m);
+       return -1;
+}
+
+int
+mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen)
+{
+       if (mptcp_validate_csum(tp, m, drop_hdrlen) != 0)
+               return -1;
+
+       mptcp_insert_rmap(tp, m);
+       if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m,
+           drop_hdrlen) != 0)
+               return -1;
+       return 0;
+}
+
 /*
  * MPTCP Checksum support
  * The checksum is calculated whenever the MPTCP DSS option is included
@@ -695,7 +967,23 @@ mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info,
  * DSS option.
  */
 
-uint16_t
+static int
+mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen)
+{
+       uint16_t mptcp_csum = 0;
+       mptcp_csum = mptcp_input_csum(tp, m, drop_hdrlen);
+       if (mptcp_csum) {
+               tp->t_mpflags |= TMPF_SND_MPFAIL;
+               tp->t_mpflags &= ~TMPF_EMBED_DSN;
+               mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
+               m_freem(m);
+               tcpstat.tcps_mp_badcsum++;
+               return -1;
+       }
+       return 0;
+}
+
+static uint16_t
 mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off)
 {
        struct mptcb *mp_tp = tptomptp(tp);
@@ -735,7 +1023,8 @@ mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off)
        ADDCARRY(sum);
        DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
            uint32_t, sum);
-       mptcplog((LOG_INFO, "%s: sum = %x \n", __func__, sum));
+       mptcplog((LOG_DEBUG, "MPTCP Receiver: sum = %x \n", sum),
+           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
        return (~sum & 0xffff);
 }
 
@@ -774,5 +1063,35 @@ mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len,
        DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
            uint32_t, sum);
        *csump = sum;
-       mptcplog3((LOG_INFO, "%s: sum = %x \n", __func__, sum));
+       mptcplog((LOG_DEBUG, "MPTCP Sender: sum = %x \n", sum),
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
+}
+
+/*
+ * When WiFi signal starts fading, there's more loss and RTT spikes.
+ * Check if there has been a large spike by comparing against
+ * a tolerable RTT spike threshold.
+ */
+boolean_t
+mptcp_no_rto_spike(struct socket *so)
+{
+       struct tcpcb *tp = intotcpcb(sotoinpcb(so));
+       int32_t spike = 0;
+
+       if (tp->t_rxtcur > MAX(mptcp_rtothresh, MPTCP_RTO_MINTHRESH)) {
+               spike = tp->t_rxtcur - mptcp_rtothresh;
+
+               mptcplog((LOG_DEBUG, "MPTCP Socket: %s: spike = %d rto = %d"
+                   "best = %d cur = %d\n", __func__, spike,
+                   tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
+                   tp->t_rttcur),
+                   (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
+
+       }
+
+       if (spike > 0 ) {
+               return (FALSE);
+       } else {
+               return (TRUE);
+       }
 }
index 0dc8c9c61257a10f5d22e2b3522d942d0a80b3ce..3ea265ebceb08490d7ab7ac594ca4d8015a3beeb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -54,7 +54,7 @@
 #define        MPO_FASTCLOSE   0x7
 
 /* MPTCP Protocol version */
-#define        MP_DRAFT_VERSION_12     0x0
+#define        MPTCP_STD_VERSION_0     0x0
 
 /*
  * MPTCP MP_CAPABLE TCP Option definitions
index 173e56075bd7f96d25cf80e77e93c87ac1b556dd..414e76c5f1e9344ecf2bb7479374eb51fd1bb67b 100644 (file)
@@ -30,7 +30,7 @@
 #include <netinet/in_systm.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-
+#include <sys/syslog.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <net/if.h>
@@ -112,10 +112,6 @@ mptcp_setup_first_subflow_syn_opts(struct socket *so, int flags, u_char *opt,
                memcpy(opt + optlen, &mptcp_opt,
                    mptcp_opt.mmc_common.mmco_len);
                optlen += mptcp_opt.mmc_common.mmco_len;
-               if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) {
-                       printf("%s: SYN_ACK localkey = %llx \n",
-                           __func__, mp_localkey);
-               }
        } else {
                /* Only the SYN flag is set */
                struct mptcp_mpcapable_opt_common mptcp_opt;
@@ -205,8 +201,9 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt,
                mpjoin_req.mmjo_addr_id = tp->t_local_aid;
                mpjoin_req.mmjo_peer_token = mptcp_get_remotetoken(tp->t_mptcb);
                if (mpjoin_req.mmjo_peer_token == 0) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG)
-                               printf("%s: zero peer token \n", __func__);
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: %s: peer token 0",
+                               __func__),
+                               MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                }       
                mptcp_get_rands(tp->t_local_aid, tptomptp(tp),
                    &mpjoin_req.mmjo_rand, NULL);
@@ -217,8 +214,6 @@ mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt,
                    (so->so_flags & SOF_MPTCP_FASTJOIN)) {
                        soevent(so,
                            (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFASTJ));
-                       if (mptcp_dbg >= MP_ERR_DEBUG)
-                               printf("%s: fast join request\n", __func__);
                }
        }
        return (optlen);
@@ -306,8 +301,9 @@ mptcp_send_mpfail(struct tcpcb *tp, u_char *opt, unsigned int optlen)
        memcpy(opt + optlen, &fail_opt, len);
        optlen += len;
        tp->t_mpflags &= ~TMPF_SND_MPFAIL;
-       if (mptcp_dbg >= MP_ERR_DEBUG)
-               printf("%s: %d \n", __func__, tp->t_local_aid);
+       mptcplog((LOG_DEBUG, "MPTCP Socket: %s: %d \n", __func__,
+           tp->t_local_aid), (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), 
+           MPTCP_LOGLVL_LOG);
        return (optlen);
 }
 
@@ -370,12 +366,13 @@ mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen)
                optlen += csum_len;
        }
 
-       if (mptcp_dbg == MP_VERBOSE_DEBUG_1) {
-               printf("%s: dsn = %x, seq = %x len = %x\n", __func__,
-                   ntohl(infin_opt.mdss_dsn),
-                   ntohl(infin_opt.mdss_subflow_seqn),
-                   ntohs(infin_opt.mdss_data_len));
-       }
+       mptcplog((LOG_DEBUG, "MPTCP Socket: %s: dsn = %x, seq = %x len = %x\n", 
+           __func__,
+           ntohl(infin_opt.mdss_dsn),
+           ntohl(infin_opt.mdss_subflow_seqn),
+           ntohs(infin_opt.mdss_data_len)),
+           (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
+           MPTCP_LOGLVL_LOG);
 
        /* so->so_flags &= ~SOF_MPTCP_CLIENT; */
        tp->t_mpflags |= TMPF_INFIN_SENT;
@@ -444,7 +441,8 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
        boolean_t send_64bit_dsn = FALSE;
        boolean_t send_64bit_ack = FALSE;
        u_int32_t old_mpt_flags = tp->t_mpflags &
-               (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL);
+           (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL |
+           TMPF_MPCAP_RETRANSMIT);
 
        if ((mptcp_enable == 0) ||
            (mp_tp == NULL) ||
@@ -465,12 +463,11 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
 
        if ((MAX_TCPOPTLEN - optlen) <
            sizeof (struct mptcp_mpcapable_opt_common)) {
-               if (mptcp_dbg >= MP_ERR_DEBUG) {
-                       printf("MPTCP ERROR %s: no space left %d flags %x "
-                           "tp->t_mpflags %x"
-                           "len %d\n", __func__, optlen, flags, tp->t_mpflags,
-                           datalen);
-               }
+               mptcplog((LOG_ERR, "MPTCP Socket:  "
+                   "%s: no space left %d flags %x "
+                   "tp->t_mpflags %x "
+                   "len %d\n", __func__, optlen, flags, tp->t_mpflags,
+                   datalen), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                goto ret_optlen;
        }
 
@@ -501,9 +498,10 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
                goto fastjoin_send;
        }
 
-       if ((tp->t_mpflags & TMPF_PREESTABLISHED) &&
+       if (((tp->t_mpflags & TMPF_PREESTABLISHED) &&
            (!(tp->t_mpflags & TMPF_SENT_KEYS)) &&
-           (!(tp->t_mpflags & TMPF_JOINED_FLOW))) {
+           (!(tp->t_mpflags & TMPF_JOINED_FLOW))) ||
+           (tp->t_mpflags & TMPF_MPCAP_RETRANSMIT)) {
                struct mptcp_mpcapable_opt_rsp1 mptcp_opt;
                if ((MAX_TCPOPTLEN - optlen) <
                    sizeof (struct mptcp_mpcapable_opt_rsp1))
@@ -513,7 +511,7 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
                mptcp_opt.mmc_common.mmco_len =
                    sizeof (struct mptcp_mpcapable_opt_rsp1);
                mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE;
-               mptcp_opt.mmc_common.mmco_version = MP_DRAFT_VERSION_12;
+               mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version;
                /* HMAC-SHA1 is the proposal */
                mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
                MPT_LOCK(mp_tp);
@@ -524,19 +522,16 @@ mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
                MPT_UNLOCK(mp_tp);
                memcpy(opt + optlen, &mptcp_opt, mptcp_opt.mmc_common.mmco_len);
                optlen += mptcp_opt.mmc_common.mmco_len;
-               tp->t_mpflags |= TMPF_SENT_KEYS;
+               tp->t_mpflags |= TMPF_SENT_KEYS | TMPF_MPTCP_TRUE;
                so->so_flags |= SOF_MPTCP_TRUE;
                tp->t_mpflags &= ~TMPF_PREESTABLISHED;
-               tp->t_mpflags |= TMPF_MPTCP_TRUE;
+               tp->t_mpflags &= ~TMPF_MPCAP_RETRANSMIT;
 
                if (!tp->t_mpuna) {
                        tp->t_mpuna = tp->snd_una;
                } else {
                        /* its a retransmission of the MP_CAPABLE ACK */
                }
-               if (mptcp_dbg >= MP_ERR_DEBUG) {
-                       printf("MPTCP SUCCESS %s: established.\n", __func__);
-               }
                goto ret_optlen;
        } else if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
                if (tp->t_mpflags & TMPF_SND_REM_ADDR) {
@@ -593,11 +588,9 @@ fastjoin_send:
 
 #define        CHECK_OPTLEN    {                                               \
        if ((MAX_TCPOPTLEN - optlen) < len) {                           \
-               if (mptcp_dbg >= MP_ERR_DEBUG) {                        \
-                       printf("MPTCP ERROR %s: len %d optlen %d \n",   \
-                           __func__,                                   \
-                           len, optlen);                               \
-               }                                                       \
+               mptcplog((LOG_ERR, "MPTCP Socket:  "                    \
+                   "%s: len %d optlen %d \n", __func__, len, optlen),  \
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);                \
                goto ret_optlen;                                        \
        }                                                               \
 }
@@ -616,10 +609,11 @@ fastjoin_send:
 #define        CHECK_DATALEN {                                                 \
        /* MPTCP socket does not support IP options */                  \
        if ((datalen + optlen + len) > tp->t_maxopd) {                  \
-               if (mptcp_dbg >= MP_VERBOSE_DEBUG_2)                    \
-                       printf("%s: nosp %d len %d opt %d %d %d\n",     \
-                           __func__, datalen, len, optlen,             \
-                           tp->t_maxseg, tp->t_maxopd);                \
+               mptcplog((LOG_ERR, "MPTCP Socket:  "                    \
+                   "%s: nosp %d len %d opt %d %d %d\n",                \
+                   __func__, datalen, len, optlen,                     \
+                   tp->t_maxseg, tp->t_maxopd),                        \
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);                \
                /* remove option length from payload len */             \
                datalen = tp->t_maxopd - optlen - len;                  \
        }                                                               \
@@ -691,12 +685,13 @@ fastjoin_send:
                            mdss_subflow_seqn));
                }
                optlen += len;
-               if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
-                       printf("%s: long DSS = %llx ACK = %llx \n",
-                           __func__,
-                           mptcp_ntoh64(dsn_ack_opt.mdss_dsn),
-                           mptcp_ntoh64(dsn_ack_opt.mdss_ack));
-               }
+               mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                   "%s: long DSS = %llx ACK = %llx \n",
+                   __func__,
+                   mptcp_ntoh64(dsn_ack_opt.mdss_dsn),
+                   mptcp_ntoh64(dsn_ack_opt.mdss_ack)),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
+               
                tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
                goto ret_optlen;
        }
@@ -746,13 +741,6 @@ fastjoin_send:
                            offsetof(struct mptcp_dsn_opt, mdss_subflow_seqn));
                }
                optlen += len;
-               if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
-                       printf("%s: DSS option. dsn = %x, seq = %x len = %x\n",
-                           __func__,
-                           ntohl(dsn_opt.mdss_dsn),
-                           ntohl(dsn_opt.mdss_subflow_seqn),
-                           ntohs(dsn_opt.mdss_data_len));
-               }
                tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
                goto ret_optlen;
        }
@@ -957,10 +945,6 @@ do_ack64_only:
                if (((mp_tp->mpt_sndnxt + 1) != mp_tp->mpt_sndmax) ||
                    (mp_tp->mpt_snduna == mp_tp->mpt_sndmax)) {
                        MPT_UNLOCK(mp_tp);
-                       if (mptcp_dbg == MP_VERBOSE_DEBUG_2)
-                               printf("%s: Fin state %d %llu %llu\n", __func__,
-                                   mp_tp->mpt_state, mp_tp->mpt_sndnxt,
-                                   mp_tp->mpt_sndmax);
                        goto ret_optlen;
                }
 
@@ -990,7 +974,8 @@ ret_optlen:
        if (TRUE == *p_mptcp_acknow ) {
                VERIFY(old_mpt_flags != 0);
                u_int32_t new_mpt_flags = tp->t_mpflags &
-                   (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL);
+                   (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL |
+                   TMPF_MPCAP_RETRANSMIT);
 
                /*
                 * If none of the above mpflags were acted on by
@@ -1006,10 +991,18 @@ ret_optlen:
                 * we haven't modified the logic in tcp_output to avoid
                 * that.
                 */
-               if (old_mpt_flags == new_mpt_flags) { 
+               if ((old_mpt_flags == new_mpt_flags) || (new_mpt_flags == 0)) {
                        tp->t_mpflags &= ~(TMPF_SND_MPPRIO
-                           | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL);
+                           | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL |
+                           TMPF_MPCAP_RETRANSMIT);
                        *p_mptcp_acknow = FALSE;
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: %s: no action \n",
+                           __func__), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+               } else {
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: acknow set, "
+                           "old flags %x new flags %x \n",
+                           old_mpt_flags, new_mpt_flags),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
                }
        }
 
@@ -1020,19 +1013,50 @@ ret_optlen:
  * MPTCP Options Input Processing
  */
 
+static int
+mptcp_sanitize_option(struct tcpcb *tp, int mptcp_subtype)
+{
+       struct mptcb *mp_tp = tptomptp(tp);
+       int ret = 1;
+
+       if (mp_tp == NULL) {
+               mptcplog((LOG_ERR, "MPTCP Socket: %s: NULL mpsocket \n",
+                   __func__), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
+               return (0);
+       }
+
+       switch (mptcp_subtype) {
+               case MPO_CAPABLE:
+                       break;
+               case MPO_JOIN:          /* fall through */
+               case MPO_DSS:           /* fall through */
+               case MPO_FASTCLOSE:     /* fall through */
+               case MPO_FAIL:          /* fall through */
+               case MPO_REMOVE_ADDR:   /* fall through */
+               case MPO_ADD_ADDR:      /* fall through */
+               case MPO_PRIO:          /* fall through */
+                       if (mp_tp->mpt_state < MPTCPS_ESTABLISHED)
+                               ret = 0;
+                       break;
+               default:
+                       ret = 0;
+                       mptcplog((LOG_ERR, "MPTCP Socket: "
+                           "%s: type = %d \n", __func__,
+                           mptcp_subtype),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
+                       break;
+       }
+       return (ret);
+}
 
 static int
-mptcp_valid_mpcapable_common_opt(u_char *cp, u_int32_t mptcp_version)
+mptcp_valid_mpcapable_common_opt(u_char *cp)
 {
        struct mptcp_mpcapable_opt_common *rsp =
            (struct mptcp_mpcapable_opt_common *)cp;
 
        /* mmco_kind, mmco_len and mmco_subtype are validated before */
 
-       /* In future, there can be more than one version supported */
-       if (rsp->mmco_version != mptcp_version)
-               return (0);
-
        if (!(rsp->mmco_flags & MPCAP_PROPOSAL_SBIT))
                return (0);
 
@@ -1061,15 +1085,8 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
        }                                                               \
 }
 
-       if (mp_tp == NULL) {
-               if (mptcp_dbg == MP_ERR_DEBUG)
-                       printf("MPTCP ERROR %s: NULL mpsocket \n", __func__);
-               tcpstat.tcps_invalid_mpcap++;
-               return;
-       }
-
        /* Validate the kind, len, flags */
-       if (mptcp_valid_mpcapable_common_opt(cp, mp_tp->mpt_version) != 1) {
+       if (mptcp_valid_mpcapable_common_opt(cp) != 1) {
                tcpstat.tcps_invalid_mpcap++;
                return;
        }
@@ -1080,14 +1097,24 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                return;
        } else if ((th->th_flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
 
+               /* Handle old duplicate SYN/ACK retransmission */
+               if (SEQ_GT(tp->rcv_nxt, (tp->irs + 1)))
+                       return;
+
+               /* handle SYN/ACK retransmission by acknowledging with ACK */
+               if (mp_tp->mpt_state >= MPTCPS_ESTABLISHED) {
+                       tp->t_mpflags |= TMPF_MPCAP_RETRANSMIT;
+                       return;
+               }
+
                /* A SYN/ACK contains peer's key and flags */
                if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp)) {
                        /* complain */
-                       if (mptcp_dbg == MP_ERR_DEBUG) {
-                               printf("%s: SYN_ACK optlen = %d, sizeof mp opt \
-                                   = %lu \n", __func__, optlen,
-                                   sizeof (struct mptcp_mpcapable_opt_rsp));
-                       }
+                       mptcplog((LOG_ERR, "MPTCP Socket: "
+                           "%s: SYN_ACK optlen = %d, sizeof mp opt = %lu \n",
+                           __func__, optlen,
+                           sizeof (struct mptcp_mpcapable_opt_rsp)),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        tcpstat.tcps_invalid_mpcap++;
                        return;
                }
@@ -1103,16 +1130,15 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                rsp = (struct mptcp_mpcapable_opt_rsp *)cp;
                MPT_LOCK_SPIN(mp_tp);
                mp_tp->mpt_remotekey = rsp->mmc_localkey;
+               /* For now just downgrade to the peer's version */
+               mp_tp->mpt_peer_version = rsp->mmc_common.mmco_version;
+               if (rsp->mmc_common.mmco_version < mp_tp->mpt_version) {
+                       mp_tp->mpt_version = rsp->mmc_common.mmco_version;
+                       tcpstat.tcps_mp_verdowngrade++;
+               }
                MPT_UNLOCK(mp_tp);
                tp->t_mpflags |= TMPF_PREESTABLISHED;
 
-               if (mptcp_dbg > MP_VERBOSE_DEBUG_1) {
-                       printf("SYN_ACK pre established, optlen = %d, tp \
-                           state = %d  sport = %x dport = %x key = %llx \n",
-                           optlen, tp->t_state, th->th_sport, th->th_dport,
-                           mp_tp->mpt_remotekey);
-               }
-
        } else if ((th->th_flags & TH_ACK) &&
                (tp->t_mpflags & TMPF_PREESTABLISHED)) {
 
@@ -1123,10 +1149,9 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) &&
                    !(((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
                    MPCAP_CHECKSUM_CBIT)) {
-                       if (mptcp_dbg == MP_ERR_DEBUG) {
-                               printf("%s: checksum negotiation failure \n",
-                                   __func__);
-                       }
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "%s: checksum negotiation failure \n", __func__),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        MPTCP_OPT_ERROR_PATH(tp);
                        return;
                }
@@ -1134,10 +1159,9 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM) &&
                    (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
                    MPCAP_CHECKSUM_CBIT)) {
-                       if (mptcp_dbg == MP_ERR_DEBUG) {
-                               printf("%s: checksum negotiation failure 2.\n",
-                                   __func__);
-                       }
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "%s: checksum negotiation failure 2.\n", __func__),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        MPTCP_OPT_ERROR_PATH(tp);
                        return;
                }
@@ -1148,38 +1172,33 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                 */
                if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp1)) {
                        /* complain */
-                       if (mptcp_dbg == MP_ERR_DEBUG) {
-                               printf("%s: ACK optlen = %d , sizeof mp option \
-                               = %lu, state = %d \n",
-                               __func__,
-                               optlen,
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "%s: ACK optlen = %d , sizeof mp option = %lu, "
+                           " state = %d \n", __func__, optlen,
                                sizeof (struct mptcp_mpcapable_opt_rsp1),
-                               tp->t_state);
-                       }
+                               tp->t_state), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        MPTCP_OPT_ERROR_PATH(tp);
                        return;
                }
 
                rsp1 = (struct mptcp_mpcapable_opt_rsp1 *)cp;
+
                /* Skipping MPT_LOCK for invariant key */
                if (rsp1->mmc_remotekey != *mp_tp->mpt_localkey) {
-                       if (mptcp_dbg == MP_ERR_DEBUG) {
-                               printf("MPTCP ERROR %s: key mismatch locally "
-                                   "stored key. rsp = %llx local = %llx \n",
-                                   __func__, rsp1->mmc_remotekey,
-                                   *mp_tp->mpt_localkey);
-                       }
-                       tp->t_mpflags &= ~TMPF_PREESTABLISHED;
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "%s: key mismatch locally stored key. "
+                           "rsp = %llx local = %llx \n", __func__, 
+                           rsp1->mmc_remotekey, *mp_tp->mpt_localkey),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        MPTCP_OPT_ERROR_PATH(tp);
                        return;
                } else {
                        /* We received both keys. Almost an MPTCP connection */
                        /* Skipping MPT_LOCK for invariant key */
                        if (mp_tp->mpt_remotekey != rsp1->mmc_localkey) {
-                               if (mptcp_dbg == MP_ERR_DEBUG) {
-                                       printf("MPTCP ERROR %s: keys don't"
-                                           " match\n", __func__);
-                               }
+                               mptcplog((LOG_ERR, "MPTCP Socket: "
+                                   "%s: keys don't match\n", __func__),
+                                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                                tp->t_mpflags &= ~TMPF_PREESTABLISHED;
                                MPTCP_OPT_ERROR_PATH(tp);
                                return;
@@ -1191,14 +1210,12 @@ mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                        MPT_LOCK(mp_tp);
                        DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, 
                            uint32_t, 0 /* event */);
+                       mptcplog((LOG_DEBUG, "MPTCP State: "
+                                   "MPTCPS_ESTABLISHED \n"),
+                                   MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG);
+
                        mp_tp->mpt_state = MPTCPS_ESTABLISHED;
                        MPT_UNLOCK(mp_tp);
-                       if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) {
-                               printf("MPTCP SUCCESS %s: rem key = %llx local \
-                               key = %llx \n",
-                               __func__, mp_tp->mpt_remotekey,
-                               *mp_tp->mpt_localkey);
-                       }
                }
                if (tp->t_mpuna) {
                        tp->t_mpuna = 0;
@@ -1219,7 +1236,6 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
        }                                                               \
 }
        int error = 0;
-       struct mptcb *mp_tp = tptomptp(tp);
 
        if ((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) {
                /* We won't accept join requests as an active opener */
@@ -1229,12 +1245,11 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                }
 
                if (optlen != sizeof (struct mptcp_mpjoin_opt_req)) {
-                       if (mptcp_dbg == MP_ERR_DEBUG) {
-                               printf("SYN: unexpected optlen = %d, mp option"
-                                   "= %lu\n",
-                                   optlen,
-                                   sizeof (struct mptcp_mpjoin_opt_req));
-                       }
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "%s: SYN: unexpected optlen = %d, mp option"
+                           "= %lu\n", __func__, optlen,
+                           sizeof (struct mptcp_mpjoin_opt_req)),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        /* send RST and close */
                        MPTCP_JOPT_ERROR_PATH(tp);
                        return;
@@ -1246,9 +1261,10 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                    (struct mptcp_mpjoin_opt_req *)cp;
                mp_so = mptcp_find_mpso(join_req->mmjo_peer_token);
                if (!mp_so) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG)
-                               printf("%s: cannot find mp_so token = %x\n",
-                                   __func__, join_req->mmjo_peer_token);
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "%s: cannot find mp_so token = %x\n",
+                           __func__, join_req->mmjo_peer_token),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        /* send RST */
                        MPTCP_JOPT_ERROR_PATH(tp);
                        return;
@@ -1270,21 +1286,11 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                    (struct mptcp_mpjoin_opt_rsp *)cp;
 
                if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp)) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG) {
-                               printf("SYN_ACK: unexpected optlen = %d mp "
-                                   "option = %lu\n", optlen,
-                                   sizeof (struct mptcp_mpjoin_opt_rsp));
-                       }
-                       tp->t_mpflags &= ~TMPF_PREESTABLISHED;
-                       /* send RST and close */
-                       MPTCP_JOPT_ERROR_PATH(tp);
-                       return;
-               }
-
-               if (mp_tp == NULL) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG)
-                               printf("%s: cannot find mp_tp in SYN_ACK\n",
-                                   __func__);
+                       mptcplog((LOG_ERR, "MPTCP Socket: "     
+                           "SYN_ACK: unexpected optlen = %d mp "
+                           "option = %lu\n", optlen,
+                           sizeof (struct mptcp_mpjoin_opt_rsp)),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        tp->t_mpflags &= ~TMPF_PREESTABLISHED;
                        /* send RST and close */
                        MPTCP_JOPT_ERROR_PATH(tp);
@@ -1297,10 +1303,9 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                error = mptcp_validate_join_hmac(tp,
                    (u_char*)&join_rsp->mmjo_mac, SHA1_TRUNCATED);
                if (error) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG) {
-                               printf("%s: SYN_ACK error = %d \n", __func__,
-                                   error);
-                       }
+                       mptcplog((LOG_ERR, "MPTCP Socket: %s: "
+                           "SYN_ACK error = %d \n", __func__, error),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        tp->t_mpflags &= ~TMPF_PREESTABLISHED;
                        /* send RST and close */
                        MPTCP_JOPT_ERROR_PATH(tp);
@@ -1313,19 +1318,14 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                    (struct mptcp_mpjoin_opt_rsp2 *)cp;
                
                if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp2)) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG) {
-                               printf("ACK: unexpected optlen = %d mp option "
-                                   "= %lu \n", optlen,
-                                   sizeof (struct mptcp_mpjoin_opt_rsp2));
-                       }
-                       tp->t_mpflags &= ~TMPF_PREESTABLISHED;
-                       /* send RST and close */
-                       MPTCP_JOPT_ERROR_PATH(tp);
-                       return;
-               }
+                       mptcplog((LOG_ERR, "MPTCP Socket: "
+                           "ACK: unexpected optlen = %d mp option "
+                           "= %lu \n", optlen,
+                           sizeof (struct mptcp_mpjoin_opt_rsp2)),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
 
-               if (mp_tp == NULL) {
                        tp->t_mpflags &= ~TMPF_PREESTABLISHED;
+                       /* send RST and close */
                        MPTCP_JOPT_ERROR_PATH(tp);
                        return;
                }
@@ -1333,10 +1333,9 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                error = mptcp_validate_join_hmac(tp, join_rsp2->mmjo_mac,
                    SHA1_RESULTLEN);
                if (error) {
-                       if (mptcp_dbg >= MP_ERR_DEBUG) {
-                               printf("%s: ACK error = %d\n", __func__,
-                                   error);
-                       }
+                       mptcplog((LOG_ERR, "MPTCP Socket: "
+                           "%s: ACK error = %d\n", __func__, error),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        tp->t_mpflags &= ~TMPF_PREESTABLISHED;
                        MPTCP_JOPT_ERROR_PATH(tp);
                        return;
@@ -1346,9 +1345,6 @@ mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                tp->t_flags |= TF_ACKNOW;
                tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
                tp->t_inpcb->inp_socket->so_flags |= SOF_MPTCP_TRUE;
-               if (mptcp_dbg >= MP_ERR_DEBUG) {
-                       printf("MPTCP SUCCESS %s: join \n", __func__);
-               }
        }
 }
 
@@ -1361,8 +1357,6 @@ mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac, int mac_len)
        u_int32_t rem_rand, loc_rand;
 
        mp_tp = tp->t_mptcb;
-       if (mp_tp == NULL)
-               return (-1);
 
        rem_rand = loc_rand = 0;
 
@@ -1394,9 +1388,6 @@ mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp)
        struct mptcb *mp_tp = tptomptp(tp);
        int close_notify = 0;
 
-       if (mp_tp == NULL)
-               return;
-
        MPT_LOCK(mp_tp);
        if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) &&
            MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) {
@@ -1413,15 +1404,12 @@ mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp)
                }
        } else {
                MPT_UNLOCK(mp_tp);
-               if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
-                       printf("%s: unexpected dack %llx snduna %llx "
-                           "sndmax %llx\n", __func__, full_dack,
-                           mp_tp->mpt_snduna, mp_tp->mpt_sndmax);
-               }
-       }
-
-       if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
-               printf("%s: full_dack = %llu \n", __func__, full_dack);
+               mptcplog((LOG_ERR,"MPTCP Socket: "
+                   "%s: unexpected dack %llx snduna %llx "
+                   "sndmax %llx\n", __func__, full_dack,
+                   mp_tp->mpt_snduna, mp_tp->mpt_sndmax),
+                   (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                   MPTCP_LOGLVL_LOG);
        }
 }
 
@@ -1435,17 +1423,14 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
 
 #define        MPTCP_DSS_OPT_SZ_CHK(len, expected_len) {               \
        if (len != expected_len) {                              \
-               if (mptcp_dbg >= MP_ERR_DEBUG) {                \
-                       printf("MPTCP ERROR %s: bad len = %d"   \
-                           "dss: %x \n", __func__,             \
-                           len,                                \
-                           dss_rsp->mdss_flags);               \
-               }                                               \
+               mptcplog((LOG_ERR, "MPTCP Socket: "             \
+                   "%s: bad len = %d dss: %x \n", __func__,    \
+                   len, dss_rsp->mdss_flags),                  \
+                   (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),      \
+                   MPTCP_LOGLVL_LOG);                          \
                return;                                         \
        }                                                       \
 }
-       if (mp_tp == NULL)
-               return;
 
        if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
                csum_len = 2;
@@ -1519,9 +1504,10 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
                        MPTCP_DSS_OPT_SZ_CHK(dsn64->mdss_copt.mdss_len,
                            sizeof (struct mptcp_dsn64_opt) + csum_len);
 
-                       if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
-                               printf("%s: 64-bit M present.\n", __func__);
-                       }
+                       mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                           "%s: 64-bit M present.\n", __func__),
+                           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                           MPTCP_LOGLVL_LOG);
 
                        MPT_LOCK_SPIN(mp_tp);
                        mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
@@ -1552,10 +1538,10 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
                        MPTCP_DSS_OPT_SZ_CHK(dack64->mdss_copt.mdss_len,
                            sizeof (struct mptcp_data_ack64_opt));
 
-
-                       if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
-                               printf("%s: 64-bit A present. \n", __func__);
-                       }
+                       mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                           "%s: 64-bit A present. \n", __func__),
+                           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                           MPTCP_LOGLVL_LOG);
 
                        MPT_LOCK_SPIN(mp_tp);
                        mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
@@ -1574,10 +1560,10 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
                        MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
                            sizeof (struct mptcp_dss64_ack32_opt) + csum_len);
 
-                       if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
-                               printf("%s: 64-bit M and 32-bit A present.\n",
-                                       __func__);
-                       }
+                       mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                           "%s: 64-bit M and 32-bit A present.\n", __func__),
+                           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                           MPTCP_LOGLVL_LOG);
 
                        u_int32_t dack = dss_ack_rsp->mdss_ack;
                        NTOHL(dack);
@@ -1606,10 +1592,11 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
                            dss32_ack64_opt->mdss_copt.mdss_len,
                            sizeof (struct mptcp_dss32_ack64_opt) + csum_len);
 
-                       if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
-                               printf("%s: 32-bit M and 64-bit A present.\n",
-                                       __func__);
-                       }
+                       mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                           "%s: 32-bit M and 64-bit A present.\n", __func__),
+                           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                           MPTCP_LOGLVL_LOG);
+                       
                        full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack);
                        mptcp_do_dss_opt_ack_meat(full_dack, tp);
                        NTOHL(dss32_ack64_opt->mdss_dsn);
@@ -1643,10 +1630,11 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
                        MPTCP_DSS_OPT_SZ_CHK(dss64_ack64->mdss_copt.mdss_len,
                            sizeof (struct mptcp_dss64_ack64_opt) + csum_len);
 
-                       if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
-                               printf("%s: 64-bit M and 64-bit A present.\n",
-                                       __func__);
-                       }
+                       mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                           "%s: 64-bit M and 64-bit A present.\n", __func__),
+                           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                           MPTCP_LOGLVL_LOG);
+
                        MPT_LOCK_SPIN(mp_tp);
                        mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
                        mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
@@ -1670,10 +1658,11 @@ mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
                        break;
                }
                default:
-                       if (mptcp_dbg >= MP_ERR_DEBUG) {
-                               printf("MPTCP ERROR %s: File bug, DSS flags = %x\n",
-                                       __func__, dss_rsp->mdss_flags);
-                       }
+                       mptcplog((LOG_DEBUG,"MPTCP Socket: "
+                           "%s: File bug, DSS flags = %x\n", __func__,
+                           dss_rsp->mdss_flags),
+                           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+                           MPTCP_LOGLVL_LOG);
                        break;
        }
 }
@@ -1684,6 +1673,10 @@ mptcp_do_fin_opt(struct tcpcb *tp)
 {
        struct mptcb *mp_tp = (struct mptcb *)tp->t_mptcb;
 
+       mptcplog((LOG_DEBUG,"MPTCP Socket: %s \n", __func__),
+           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+           MPTCP_LOGLVL_LOG);
+
        if (!(tp->t_mpflags & TMPF_RECV_DFIN)) {
                if (mp_tp != NULL) {
                        MPT_LOCK(mp_tp);
@@ -1723,13 +1716,7 @@ mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
                struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
 
                if (dss_rsp->mdss_subtype == MPO_DSS) {
-                       if (mptcp_dbg > MP_VERBOSE_DEBUG_4) {
-                               printf("%s: DSS option received: %d ",
-                                   __func__, dss_rsp->mdss_flags);
-                       }
                        if (dss_rsp->mdss_flags & MDSS_F) {
-                               if (mptcp_dbg >= MP_VERBOSE_DEBUG_1)
-                                       printf("%s: received FIN\n", __func__);
                                mptcp_do_fin_opt(tp);
                        }
 
@@ -1747,8 +1734,9 @@ mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
        if (th->th_flags != TH_ACK)
                return;
 
-       if (mptcp_dbg > MP_VERBOSE_DEBUG_2)
-               printf("%s: received \n", __func__);
+       mptcplog((LOG_DEBUG,"MPTCP Socket: %s: \n", __func__),
+           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG),
+           MPTCP_LOGLVL_LOG);
 
        if (fc_opt->mfast_len != sizeof (struct mptcp_fastclose_opt)) {
                tcpstat.tcps_invalid_opt++;
@@ -1773,15 +1761,6 @@ mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
                return;
        }
 
-       MPT_LOCK(mp_tp);
-       if (mp_tp->mpt_state != MPTCPS_FASTCLOSE_WAIT) {
-               mp_tp->mpt_state = MPTCPS_FASTCLOSE_WAIT;
-               DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, 
-                   uint32_t, 0 /* event */);
-               mptcp_start_timer(mp_tp, MPTT_FASTCLOSE);
-       }
-       MPT_UNLOCK(mp_tp);
-
        /* Reset this flow */
        tp->t_mpflags |= TMPF_RESET;
 
@@ -1813,12 +1792,13 @@ mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
        if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST))
                return;
 
+       mptcplog((LOG_DEBUG, "MPTCP Socket: %s: \n", __func__),
+           (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), MPTCP_LOGLVL_LOG);
+
        if (fail_opt->mfail_len != sizeof (struct mptcp_mpfail_opt))
                return;
 
        mp_tp = (struct mptcb *)tp->t_mptcb;
-       if (mp_tp == NULL)
-               return;
        MPT_LOCK(mp_tp);
        mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL;
        mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn);
@@ -1832,7 +1812,7 @@ mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
        mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
 }
 
-int
+void
 tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
     struct tcpopt *to, int optlen)
 {
@@ -1840,10 +1820,13 @@ tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
 
        /* All MPTCP options have atleast 4 bytes */
        if (optlen < 4)
-               return (0);
+               return;
 
        mptcp_subtype = (cp[2] >> 4);
 
+       if (mptcp_sanitize_option(tp, mptcp_subtype) == 0)
+               return;
+
        switch (mptcp_subtype) {
                case MPO_CAPABLE:
                        mptcp_do_mpcapable_opt(tp, cp, th, optlen);
@@ -1866,10 +1849,9 @@ tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
                        to->to_flags |= TOF_MPTCP;
                        break;
                default:
-                       printf("%s: type = %d\n", __func__, mptcp_subtype);
                        break;
        }
-       return (0);
+       return;
 }
 
 /*
@@ -1914,9 +1896,9 @@ mptcp_send_addaddr_opt(struct tcpcb *tp, struct mptcp_addaddr_opt *opt)
 void
 mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt)
 {
-       if (mptcp_dbg >= MP_ERR_DEBUG)
-               printf("%s: local id %d remove id %d \n", __func__,
-                   tp->t_local_aid, tp->t_rem_aid);
+       mptcplog((LOG_DEBUG,"MPTCP Socket: %s: local id %d remove id %d \n",
+           __func__, tp->t_local_aid, tp->t_rem_aid),
+           (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
 
        bzero(opt, sizeof (*opt));
        opt->mr_kind = TCPOPT_MULTIPATH;
@@ -1983,7 +1965,8 @@ mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen)
        memcpy(cp + optlen, &mpprio, sizeof (mpprio));
        optlen += sizeof (mpprio);
        tp->t_mpflags &= ~TMPF_SND_MPPRIO;
-       if (mptcp_dbg >= MP_ERR_DEBUG)
-               printf("%s: aid = %d \n", __func__, tp->t_local_aid);
+       mptcplog((LOG_DEBUG, "MPTCP Socket: %s: aid = %d \n", __func__,
+           tp->t_local_aid), 
+           (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
        return (optlen);
 }
index 8c925b9b9b3f6c0658d1cedc933a19b2ebbf969a..a9450dee9d0f17ec3a852f80eab325498093033a 100644 (file)
@@ -41,7 +41,7 @@
 #define        MPTCP_CAPABLE_RETRIES   (2)
 
 __BEGIN_DECLS
-extern int tcp_do_mptcp_options(struct tcpcb *, u_char *, struct tcphdr *,
+extern void tcp_do_mptcp_options(struct tcpcb *, u_char *, struct tcphdr *,
     struct tcpopt *, int);
 extern unsigned mptcp_setup_syn_opts(struct socket *, int, u_char*, unsigned);
 extern unsigned mptcp_setup_join_ack_opts(struct tcpcb *, u_char*, unsigned);
index 6537a1c5f189a47fd05a3c17c67e3a1d26a287a8..b4ecb3ff09e4985aab8be9dfc55aa0f6f3b800f3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -47,6 +47,7 @@
 #include <mach/sdt.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
@@ -146,6 +147,7 @@ static void mptcp_subflow_wupcall(struct socket *, void *, int);
 static void mptcp_subflow_eupcall(struct socket *, void *, uint32_t);
 static void mptcp_update_last_owner(struct mptsub *, struct socket *);
 static void mptcp_output_needed(struct mptses *mpte, struct mptsub *to_mpts);
+static void mptcp_get_rtt_measurement(struct mptsub *, struct mptses *);
 
 /*
  * Possible return values for subflow event handlers.  Note that success
@@ -159,26 +161,25 @@ typedef enum {
        MPTS_EVRET_OK                   = 2,    /* OK */
        MPTS_EVRET_CONNECT_PENDING      = 3,    /* resume pended connects */
        MPTS_EVRET_DISCONNECT_FALLBACK  = 4,    /* abort all but preferred */
-       MPTS_EVRET_OK_UPDATE            = 5,    /* OK with conninfo update */
 } ev_ret_t;
 
-static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_connreset_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_cantrcvmore_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_cantsendmore_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_timeout_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_suspend_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_resume_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_fastjoin_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_deleteok_ev(struct mptses *, struct mptsub *);
-static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *);
+static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_connreset_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_cantrcvmore_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_cantsendmore_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_timeout_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_suspend_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_resume_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_fastjoin_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_deleteok_ev(struct mptses *, struct mptsub *, uint64_t *);
+static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *, uint64_t *);
 
 static const char *mptcp_evret2str(ev_ret_t);
 
@@ -206,9 +207,14 @@ SYSCTL_DECL(_net_inet);
 
 SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "MPTCP");
 
-uint32_t mptcp_verbose = 0;            /* more noise if greater than 1 */
-SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, verbose, CTLFLAG_RW|CTLFLAG_LOCKED,
-       &mptcp_verbose, 0, "MPTCP verbosity level");
+uint32_t mptcp_dbg_area = 0;           /* more noise if greater than 1 */
+SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, dbg_area, CTLFLAG_RW|CTLFLAG_LOCKED,
+       &mptcp_dbg_area, 0, "MPTCP debug area");
+
+uint32_t mptcp_dbg_level = 0;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dbg_level, CTLFLAG_RW | CTLFLAG_LOCKED,
+       &mptcp_dbg_level, 0, "MPTCP debug level");
+
 
 SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD|CTLFLAG_LOCKED,
        &mtcbinfo.mppi_count, 0, "Number of active PCBs");
@@ -229,12 +235,11 @@ SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, delayed, CTLFLAG_RW|CTLFLAG_LOCKED,
        &mptcp_delayed_subf_start, 0, "MPTCP Delayed Subflow start");
 
 /*
- * SYSCTL for RTT spike measurement threshold in msecs.
+ * sysctl to use network status hints from symptomsd
  */
-int32_t mptcp_rto_spike_thresh = 3000;
-SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, rto_spikethresh,
-       CTLFLAG_RW|CTLFLAG_LOCKED, &mptcp_rto_spike_thresh, 0,
-       "MPTCP RTT spike thresh");
+uint32_t mptcp_use_symptomsd = 1;
+SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, usesymptoms, CTLFLAG_RW|CTLFLAG_LOCKED,
+       &mptcp_use_symptomsd, 0, "MPTCP Use SymptomsD");
 
 static struct protosw mptcp_subflow_protosw;
 static struct pr_usrreqs mptcp_subflow_usrreqs;
@@ -243,6 +248,80 @@ static struct ip6protosw mptcp_subflow_protosw6;
 static struct pr_usrreqs mptcp_subflow_usrreqs6;
 #endif /* INET6 */
 
+typedef struct mptcp_subflow_event_entry {
+       uint64_t        sofilt_hint_mask;
+       ev_ret_t        (*sofilt_hint_ev_hdlr)(
+                           struct mptses *mpte,
+                           struct mptsub *mpts,
+                           uint64_t *p_mpsofilt_hint);
+} mptsub_ev_entry_t;
+
+static mptsub_ev_entry_t mpsub_ev_entry_tbl [] = {
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_MPCANTRCVMORE,
+               .sofilt_hint_ev_hdlr =  mptcp_subflow_mpcantrcvmore_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_MPFAILOVER,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_failover_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_CONNRESET,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_connreset_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_MUSTRST,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_mustrst_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_CANTRCVMORE,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_cantrcvmore_ev,
+       },
+       {       .sofilt_hint_mask = SO_FILT_HINT_CANTSENDMORE,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_cantsendmore_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_TIMEOUT,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_timeout_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_NOSRCADDR,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_nosrcaddr_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_IFDENIED,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_ifdenied_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_SUSPEND,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_suspend_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_RESUME,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_resume_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_CONNECTED,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_connected_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_MPSTATUS,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_mpstatus_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_DELETEOK,
+               .sofilt_hint_ev_hdlr = mptcp_deleteok_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_DISCONNECTED,
+               .sofilt_hint_ev_hdlr = mptcp_subflow_disconnected_ev,
+       },
+       {
+               .sofilt_hint_mask = SO_FILT_HINT_MPFASTJ,
+               .sofilt_hint_ev_hdlr = mptcp_fastjoin_ev,
+       }
+};
+
 /*
  * Protocol pr_init callback.
  */
@@ -327,9 +406,10 @@ mptcp_init(struct protosw *pp, struct domain *dp)
        mtcbinfo.mppi_lock_attr = lck_attr_alloc_init();
        lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp,
            mtcbinfo.mppi_lock_attr);
-       mtcbinfo.mppi_gc = mptcp_gc;
 
+       mtcbinfo.mppi_gc = mptcp_gc;
        mtcbinfo.mppi_timer = mptcp_timer;
+       mtcbinfo.mppi_pcbe_create = mptcp_sescreate;
 
        /* attach to MP domain for garbage collection to take place */
        mp_pcbinfo_attach(&mtcbinfo);
@@ -364,13 +444,12 @@ mptcp_init(struct protosw *pp, struct domain *dp)
 
        /* Set up a list of unique keys */
        mptcp_key_pool_init();
-
 }
 
 /*
  * Create an MPTCP session, called as a result of opening a MPTCP socket.
  */
-struct mptses *
+void *
 mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp)
 {
        struct mppcbinfo *mppi;
@@ -382,8 +461,8 @@ mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp)
        mppi = mpp->mpp_pcbinfo;
        VERIFY(mppi != NULL);
 
-       mpte = &((struct mpp_mtp *)mpp)->mpp_ses;
-       mp_tp = &((struct mpp_mtp *)mpp)->mtcb;
+       __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
+       __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
 
        /* MPTCP Multipath PCB Extension */
        bzero(mpte, sizeof (*mpte));
@@ -394,8 +473,8 @@ mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp)
 
        TAILQ_INIT(&mpte->mpte_sopts);
        TAILQ_INIT(&mpte->mpte_subflows);
-       mpte->mpte_associd = ASSOCID_ANY;
-       mpte->mpte_connid_last = CONNID_ANY;
+       mpte->mpte_associd = SAE_ASSOCID_ANY;
+       mpte->mpte_connid_last = SAE_CONNID_ANY;
 
        lck_mtx_init(&mpte->mpte_thread_lock, mppi->mppi_lock_grp,
            mppi->mppi_lock_attr);
@@ -420,7 +499,7 @@ mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp)
        lck_mtx_init(&mp_tp->mpt_lock, mppi->mppi_lock_grp,
            mppi->mppi_lock_attr);
        mp_tp->mpt_mpte = mpte;
-
+       mp_tp->mpt_state = MPTCPS_CLOSED;
 out:
        if (error != 0)
                lck_mtx_destroy(&mpte->mpte_thread_lock, mppi->mppi_lock_grp);
@@ -625,9 +704,10 @@ mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom,
         */
        if ((error = socreate_internal(dom, so, SOCK_STREAM,
            IPPROTO_TCP, p, SOCF_ASYNC | SOCF_MP_SUBFLOW, PROC_NULL)) != 0) {
-               mptcplog((LOG_ERR, "MPTCP ERROR %s: mp_so 0x%llx unable to "
-                   "create subflow socket error %d\n", __func__,
-                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error));
+               mptcplog((LOG_ERR, "MPTCP Socket: subflow socreate mp_so 0x%llx"
+                   " unable to create subflow socket error %d\n",
+                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                return (error);
        }
 
@@ -705,11 +785,13 @@ mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom,
                interim = (mpo->mpo_flags & MPOF_INTERIM);
                if (mptcp_subflow_sosetopt(mpte, *so, mpo) != 0 && interim) {
                        char buf[32];
-                       mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d "
-                           "interim record removed\n", __func__,
+                       mptcplog((LOG_ERR, "MPTCP Socket: subflow socreate"
+                           " mp_so 0x%llx"
+                           " sopt %s val %d interim record removed\n",
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                            mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
-                           buf, sizeof (buf)), mpo->mpo_intval));
+                           buf, sizeof (buf)), mpo->mpo_intval),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
                        mptcp_sopt_remove(mpte, mpo);
                        mptcp_sopt_free(mpo);
                        continue;
@@ -807,8 +889,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
                dst_se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head);
                VERIFY(dst_se != NULL);
 
-               mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d "
-                   "[pended %s]\n", __func__,
+               mptcplog((LOG_DEBUG, "MPTCP Socket: connectx mp_so 0x%llx "
+                   "dst %s[%d] cid %d [pended %s]\n",
                    (u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket),
                    inet_ntop(af, ((af == AF_INET) ?
                    (void *)&SIN(dst_se->se_addr)->sin_addr.s_addr :
@@ -818,7 +900,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
                    ntohs(SIN6(dst_se->se_addr)->sin6_port)),
                    mpts->mpts_connid,
                    ((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ?
-                   "YES" : "NO")));
+                   "YES" : "NO")),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
        }
 
        mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING;
@@ -829,8 +912,8 @@ mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
        /* connect the subflow socket */
        error = soconnectxlocked(so, &mpts->mpts_src_sl, &mpts->mpts_dst_sl,
            mpts->mpts_mpcr.mpcr_proc, mpts->mpts_mpcr.mpcr_ifscope,
-           mpte->mpte_associd, NULL, TCP_CONNREQF_MPTCP,
-           &mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr));
+           mpte->mpte_associd, NULL, CONNREQF_MPTCP,
+           &mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr), NULL, NULL);
        socket_unlock(so, 0);
 
        /* Allocate a unique address id per subflow */
@@ -1147,7 +1230,7 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
        VERIFY(mpts->mpts_mpte == NULL);
        VERIFY(mpts->mpts_socket == NULL);
        VERIFY(mpts->mpts_dst_sl != NULL);
-       VERIFY(mpts->mpts_connid == CONNID_ANY);
+       VERIFY(mpts->mpts_connid == SAE_CONNID_ANY);
 
        /* select source (if specified) and destination addresses */
        if ((error = in_selectaddrs(AF_UNSPEC, &mpts->mpts_src_sl, &src_se,
@@ -1197,17 +1280,17 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
        }
 
        /*
-        * Increment the counter, while avoiding 0 (CONNID_ANY) and
-        * -1 (CONNID_ALL).
+        * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
+        * -1 (SAE_CONNID_ALL).
         */
        mpte->mpte_connid_last++;
-       if (mpte->mpte_connid_last == CONNID_ALL ||
-           mpte->mpte_connid_last == CONNID_ANY)
+       if (mpte->mpte_connid_last == SAE_CONNID_ALL ||
+           mpte->mpte_connid_last == SAE_CONNID_ANY)
                mpte->mpte_connid_last++;
 
        mpts->mpts_connid = mpte->mpte_connid_last;
-       VERIFY(mpts->mpts_connid != CONNID_ANY &&
-           mpts->mpts_connid != CONNID_ALL);
+       VERIFY(mpts->mpts_connid != SAE_CONNID_ANY &&
+           mpts->mpts_connid != SAE_CONNID_ALL);
        
        /* Allocate a unique address id per subflow */
        mpte->mpte_addrid_last++;
@@ -1226,11 +1309,12 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
                VERIFY(mpts->mpts_outif != NULL);
                mpts->mpts_flags |= MPTSF_BOUND_IF;
 
-               mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindif %s[%d] "
-                   "cid %d\n", __func__,
+               mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_add mp_so 0x%llx "
+                   "bindif %s[%d] cid d\n",
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mpts->mpts_outif->if_xname,
-                   ifscope, mpts->mpts_connid));
+                   ifscope, mpts->mpts_connid),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
                socket_unlock(so, 0);
        }
 
@@ -1268,13 +1352,14 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
                if (af == AF_INET || af == AF_INET6) {
                        char sbuf[MAX_IPv6_STR_LEN];
 
-                       mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindip %s[%d] "
-                           "cid %d\n", __func__,
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_add "
+                           "mp_so 0x%llx bindip %s[%d] cid %d\n",
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                            inet_ntop(af, ((af == AF_INET) ?
                            (void *)&SIN(sa)->sin_addr.s_addr :
                            (void *)&SIN6(sa)->sin6_addr), sbuf, sizeof (sbuf)),
-                           ntohs(lport), mpts->mpts_connid));
+                           ntohs(lport), mpts->mpts_connid),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                }
        }
 
@@ -1353,7 +1438,8 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
        if (af == AF_INET || af == AF_INET6) {
                char dbuf[MAX_IPv6_STR_LEN];
 
-               mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d "
+               mptcplog((LOG_DEBUG, "MPTCP Socket: %s "
+                   "mp_so 0x%llx dst %s[%d] cid %d "
                    "[pending %s]\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    inet_ntop(af, ((af == AF_INET) ?
@@ -1364,7 +1450,8 @@ mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
                    ntohs(SIN6(dst_se->se_addr)->sin6_port)),
                    mpts->mpts_connid,
                    ((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ?
-                   "YES" : "NO")));
+                   "YES" : "NO")),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
        }
 
        /* connect right away if first attempt, or if join can be done now */
@@ -1401,22 +1488,26 @@ mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close)
        if (close && !((mpts->mpts_flags & MPTSF_DELETEOK) &&
            (mpts->mpts_flags & MPTSF_USER_DISCONNECT))) {
                MPTS_UNLOCK(mpts);
-               mptcplog((LOG_DEBUG, "%s: %d %x\n", __func__,
-                   mpts->mpts_soerror, mpts->mpts_flags));
+               mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_del returning"
+                   " mp_so 0x%llx flags %x\n",
+                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_flags),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                return;
        }
 
-       mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d "
-           "[close %s] %d %x\n", __func__,
+       mptcplog((LOG_DEBUG, "MPTCP Socket: subflow_del mp_so 0x%llx "
+           "[u=%d,r=%d] cid %d [close %s] %d %x error %d\n",
            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
            mp_so->so_usecount,
            mp_so->so_retaincnt, mpts->mpts_connid,
            (close ? "YES" : "NO"), mpts->mpts_soerror,
-           mpts->mpts_flags));
+           mpts->mpts_flags,
+           mp_so->so_error),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
 
        VERIFY(mpts->mpts_mpte == mpte);
-       VERIFY(mpts->mpts_connid != CONNID_ANY &&
-           mpts->mpts_connid != CONNID_ALL);
+       VERIFY(mpts->mpts_connid != SAE_CONNID_ANY &&
+           mpts->mpts_connid != SAE_CONNID_ALL);
 
        VERIFY(mpts->mpts_flags & MPTSF_ATTACHED);
        atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED);
@@ -1466,8 +1557,8 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts,
 
        VERIFY(mpts->mpts_mpte == mpte);
        VERIFY(mpts->mpts_socket != NULL);
-       VERIFY(mpts->mpts_connid != CONNID_ANY &&
-           mpts->mpts_connid != CONNID_ALL);
+       VERIFY(mpts->mpts_connid != SAE_CONNID_ANY &&
+           mpts->mpts_connid != SAE_CONNID_ALL);
 
        if (mpts->mpts_flags & (MPTSF_DISCONNECTING|MPTSF_DISCONNECTED))
                return;
@@ -1493,9 +1584,10 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts,
        socket_lock(so, 0);
        if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
            (so->so_state & SS_ISCONNECTED)) {
-               mptcplog((LOG_DEBUG, "%s: cid %d fin %d [linger %s]\n",
-                   __func__, mpts->mpts_connid, send_dfin,
-                   (deleteok ? "NO" : "YES")));
+               mptcplog((LOG_DEBUG, "MPTCP Socket %s: cid %d fin %d "
+                   "[linger %s]\n", __func__, mpts->mpts_connid, send_dfin,
+                   (deleteok ? "NO" : "YES")),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
 
                if (send_dfin)
                        mptcp_send_dfin(so);
@@ -1570,34 +1662,38 @@ mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts)
 
        error = sock_receive_internal(so, NULL, &m, 0, NULL);
        if (error != 0 && error != EWOULDBLOCK) {
-               mptcplog((LOG_ERR, "%s: cid %d error %d\n",
-                   __func__, mpts->mpts_connid, error));
+               mptcplog((LOG_ERR, "MPTCP Receiver: %s cid %d error %d\n",
+                   __func__, mpts->mpts_connid, error),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
                MPTS_UNLOCK(mpts);
-               mpts_alt = mptcp_get_subflow(mpte, mpts);
+               mpts_alt = mptcp_get_subflow(mpte, mpts, NULL);
                if (mpts_alt == NULL) {
                        if (mptcp_delayed_subf_start) {
                                mpts_alt = mptcp_get_pending_subflow(mpte,
                                    mpts);
                                if (mpts_alt) {
-                                       mptcplog((LOG_INFO,"%s: pending %d\n",
-                                           __func__, mpts_alt->mpts_connid));
+                                       mptcplog((LOG_DEBUG,"MPTCP Receiver:"
+                                       " %s: pending %d\n",
+                                       __func__, mpts_alt->mpts_connid),
+                                       MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
                                } else {
-                                       mptcplog((LOG_ERR, "%s: no pending",
-                                           "%d\n", __func__,
-                                           mpts->mpts_connid));
-                                       mpte->mpte_mppcb->mpp_socket->so_error =
-                                           error;
+                                       mptcplog((LOG_ERR, "MPTCP Receiver:"
+                                           " %s: no pending flow for cid %d",
+                                           __func__, mpts->mpts_connid),
+                                           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
                                }
                        } else {
-                               mptcplog((LOG_ERR, "%s: no alt path cid %d\n",
-                                   __func__, mpts->mpts_connid));
-                               mpte->mpte_mppcb->mpp_socket->so_error = error;
+                               mptcplog((LOG_ERR, "MPTCP Receiver: %s: no alt"
+                                   " path for cid %d\n", __func__, 
+                                   mpts->mpts_connid),
+                                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
                        }
                }
                MPTS_LOCK(mpts);
        } else if (error == 0) {
-               mptcplog3((LOG_DEBUG, "%s: cid %d \n",
-                   __func__, mpts->mpts_connid));
+               mptcplog((LOG_DEBUG, "MPTCP Receiver: %s: cid %d \n",
+                   __func__, mpts->mpts_connid),
+                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
        }
 
        /* In fallback, make sure to accept data on all but one subflow */
@@ -1608,6 +1704,13 @@ mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts)
        }
 
        if (m != NULL) {
+
+               /* Did we receive data on the backup subflow? */
+               if (!(mpts->mpts_flags & MPTSF_ACTIVE))
+                       mpts->mpts_peerswitch++;
+               else
+                       mpts->mpts_peerswitch = 0;
+
                /*
                 * Release subflow lock since this may trigger MPTCP to send,
                 * possibly on a different subflow.  An extra reference has
@@ -1680,9 +1783,10 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
 
        /* subflow socket is suspended? */
        if (mpts->mpts_flags & MPTSF_SUSPENDED) {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d is flow "
-                   "controlled\n", __func__,
-                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid));
+               mptcplog((LOG_ERR, "MPTCP Sender: %s mp_so 0x%llx cid %d is "
+                   "flow controlled\n", __func__,
+                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
                goto out;
        }
 
@@ -1690,9 +1794,10 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
        if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) &&
            !(mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
            !(mpts->mpts_flags & MPTSF_FASTJ_SEND)) {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d not "
+               mptcplog((LOG_ERR, "MPTCP Sender: %s mp_so 0x%llx cid %d not "
                    "MPTCP capable\n", __func__,
-                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid));
+                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
                goto out;
        }
 
@@ -1741,8 +1846,9 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
        if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
                u_int64_t len = 0;
                len = mp_tp->mpt_snduna - mpt_dsn;
+               MPT_UNLOCK(mp_tp);
                sbdrop(&mp_so->so_snd, (int)len);
-
+               MPT_LOCK(mp_tp);
        }
 
        /*
@@ -1820,12 +1926,13 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
                mpt_mbuf = mpt_mbuf->m_next;
                mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn;
        }
-       if ((mpts->mpts_connid == 2) || (mpts->mpts_flags & MPTSF_MP_DEGRADED))
-               mptcplog2((LOG_INFO, "%s: snduna = %llu off = %lld id = %d"
-                   " %llu \n",
-                   __func__,
-                   mp_tp->mpt_snduna, off, mpts->mpts_connid,
-                   mpts->mpts_sndnxt));
+       if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
+               mptcplog((LOG_DEBUG, "MPTCP Sender: %s cid = %d "
+                   "snduna = %llu sndnxt = %llu probe %d\n",
+                   __func__, mpts->mpts_connid,
+                   mp_tp->mpt_snduna, mpts->mpts_sndnxt,
+                   mpts->mpts_probecnt),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
 
        VERIFY(mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP));
 
@@ -1870,19 +1977,6 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
                        tail = m;
                }
 
-               /* last contiguous mapping is stored for error cases */
-               if (mpts->mpts_lastmap.mptsl_dsn +
-                   mpts->mpts_lastmap.mptsl_len == mpt_dsn) {
-                       mpts->mpts_lastmap.mptsl_len += tot_sent;
-               } else if (MPTCP_SEQ_LT((mpts->mpts_lastmap.mptsl_dsn +
-                   mpts->mpts_lastmap.mptsl_len), mpt_dsn)) {
-                       if (m->m_pkthdr.mp_dsn == 0)
-                               panic("%s %llu", __func__, mpt_dsn);
-                       mpts->mpts_lastmap.mptsl_dsn = m->m_pkthdr.mp_dsn;
-                       mpts->mpts_lastmap.mptsl_sseq = m->m_pkthdr.mp_rseq;
-                       mpts->mpts_lastmap.mptsl_len = m->m_pkthdr.mp_rlen;
-               }
-
                tot_sent += mlen;
                off = 0;
                mpt_mbuf = mpt_mbuf->m_next;
@@ -1906,7 +2000,18 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
 
        if (error == 0) {
                mpts->mpts_sndnxt += tot_sent;
+
+               if (mpts->mpts_probesoon && mpts->mpts_maxseg && tot_sent) {
+                       tcpstat.tcps_mp_num_probes++;
+                       if (tot_sent < mpts->mpts_maxseg)
+                               mpts->mpts_probecnt += 1;
+                       else
+                               mpts->mpts_probecnt +=
+                                   tot_sent/mpts->mpts_maxseg;
+               }
+
                MPT_LOCK(mp_tp);
+
                if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mpts->mpts_sndnxt)) {
                        if (MPTCP_DATASEQ_HIGH32(mpts->mpts_sndnxt) >
                            MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt))
@@ -1922,14 +2027,18 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
                        mpts->mpts_flags &= ~MPTSF_FASTJ_SEND;
                }
 
-               if ((mpts->mpts_connid >= 2) ||
-                   (mpts->mpts_flags & MPTSF_MP_DEGRADED))
-                       mptcplog2((LOG_DEBUG, "%s: cid %d wrote %d %d\n",
+               if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
+                   (mpts->mpts_probesoon != 0))
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: %s cid %d "
+                           "wrote %d %d probe %d probedelta %d\n",
                            __func__, mpts->mpts_connid, (int)tot_sent,
-                           (int) sb_cc));
+                           (int) sb_cc, mpts->mpts_probecnt,
+                           (tcp_now - mpts->mpts_probesoon)),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
        } else {
-               mptcplog((LOG_ERR, "MPTCP ERROR %s: cid %d error %d len %zd\n",
-                   __func__, mpts->mpts_connid, error, tot_sent));
+               mptcplog((LOG_ERR, "MPTCP Sender: %s cid %d error %d len %zd\n",
+                   __func__, mpts->mpts_connid, error, tot_sent),
+                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
        }
 out:
        return (error);
@@ -1966,11 +2075,14 @@ mptcp_subflow_eupcall(struct socket *so, void *arg, uint32_t events)
  * Called for handling events related to the underlying subflow socket.
  */
 static ev_ret_t
-mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        uint32_t events, save_events;
        ev_ret_t ret = MPTS_EVRET_OK;
-
+       int i = 0;
+       int mpsub_ev_entry_count = sizeof(mpsub_ev_entry_tbl)/
+               sizeof(mpsub_ev_entry_tbl[0]);
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        MPTS_LOCK_ASSERT_HELD(mpts);
 
@@ -1991,88 +2103,22 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts)
        DTRACE_MPTCP3(subflow__events, struct mptses *, mpte,
            struct mptsub *, mpts, uint32_t, events);
 
-       mptcplog2((LOG_DEBUG, "%s: cid %d events=%b\n", __func__,
-           mpts->mpts_connid, events, SO_FILT_HINT_BITS));
-
-       if ((events & SO_FILT_HINT_MPCANTRCVMORE) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_mpcantrcvmore_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_MPCANTRCVMORE;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_MPFAILOVER) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_failover_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_MPFAILOVER;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_CONNRESET) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_connreset_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_CONNRESET;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_MUSTRST) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_mustrst_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_MUSTRST;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_CANTRCVMORE) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_cantrcvmore_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_CANTRCVMORE;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_CANTSENDMORE) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_cantsendmore_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_CANTSENDMORE;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_TIMEOUT) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_timeout_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_TIMEOUT;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_NOSRCADDR) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_nosrcaddr_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_NOSRCADDR;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_IFDENIED) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_ifdenied_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_IFDENIED;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_SUSPEND) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_suspend_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_SUSPEND;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_RESUME) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_resume_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_RESUME;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_CONNECTED) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_connected_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_CONNECTED;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_MPSTATUS) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_mpstatus_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_MPSTATUS;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_DELETEOK) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_deleteok_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_DELETEOK;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_DISCONNECTED) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_subflow_disconnected_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_DISCONNECTED;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
-       }
-       if ((events & SO_FILT_HINT_MPFASTJ) && (ret >= MPTS_EVRET_OK)) {
-               ev_ret_t error = mptcp_fastjoin_ev(mpte, mpts);
-               events &= ~SO_FILT_HINT_MPFASTJ;
-               ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
+       mptcplog((LOG_DEBUG, "MPTCP Events: %s cid %d events=%b\n", __func__,
+           mpts->mpts_connid, events, SO_FILT_HINT_BITS),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
+
+       /*
+        * Process all the socket filter hints and reset the hint
+        * once it is handled
+        */
+       for (i = 0; (i < mpsub_ev_entry_count) && events; i++) {
+               if ((events & mpsub_ev_entry_tbl[i].sofilt_hint_mask) &&
+                   (ret >= MPTS_EVRET_OK)) {
+                       ev_ret_t error =
+                               mpsub_ev_entry_tbl[i].sofilt_hint_ev_hdlr(mpte, mpts, p_mpsofilt_hint);
+                       events &= ~mpsub_ev_entry_tbl[i].sofilt_hint_mask;
+                       ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
+               }
        }
 
        /*
@@ -2080,16 +2126,16 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts)
         * so loudly complain if we have any unprocessed one(s).
         */
        if (events != 0 || ret < MPTS_EVRET_OK) {
-               mptcplog((LOG_ERR, "%s%s: cid %d evret %s (%d)"
+               mptcplog((LOG_ERR, "MPTCP Events %s%s: cid %d evret %s (%d)"
                    " unhandled events=%b\n",
-                   (events != 0) ? "MPTCP_ERROR " : "", 
+                   (events != 0) && (ret == MPTS_EVRET_OK) ? "MPTCP_ERROR " : "", 
                    __func__, mpts->mpts_connid,
-                   mptcp_evret2str(ret), ret, events, SO_FILT_HINT_BITS));
+                   mptcp_evret2str(ret), ret, events, SO_FILT_HINT_BITS),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR);
        }
 
        /* clear the ones we've processed */
        atomic_bitclear_32(&mpts->mpts_evctl, save_events);
-       
        return (ret);
 }
 
@@ -2097,7 +2143,8 @@ mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_CONNRESET subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
@@ -2113,8 +2160,10 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts)
        linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
            !(mp_so->so_flags & SOF_PCBCLEARING));
 
-       mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
-           mpts->mpts_connid, (linger ? "YES" : "NO")));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d [linger %s]\n", __func__,
+           mpts->mpts_connid, (linger ? "YES" : "NO")),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        /*
         * We got a TCP RST for this subflow connection.
@@ -2131,11 +2180,7 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts)
                mpts->mpts_soerror = mp_so->so_error = ECONNREFUSED;
        } else if (mpte->mpte_nummpcapflows < 1) {
                mpts->mpts_soerror = mp_so->so_error = ECONNRESET;
-               MPT_UNLOCK(mp_tp);
-               MPTS_UNLOCK(mpts);
-               soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET);
-               MPTS_LOCK(mpts);
-               MPT_LOCK(mp_tp);
+               *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET;
        }
        MPT_UNLOCK(mp_tp);
 
@@ -2151,8 +2196,10 @@ mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        struct socket *so;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
@@ -2160,7 +2207,9 @@ mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
 
        so = mpts->mpts_socket;
 
-       mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d\n", __func__, mpts->mpts_connid),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        /*
         * We got a FIN for this subflow connection.  This subflow socket
@@ -2176,8 +2225,10 @@ mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        struct socket *so;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
@@ -2185,7 +2236,10 @@ mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts)
 
        so = mpts->mpts_socket;
 
-       mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d\n", __func__, mpts->mpts_connid),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
+
        return (MPTS_EVRET_OK); /* keep the subflow socket around */
 }
 
@@ -2193,8 +2247,10 @@ mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_TIMEOUT subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
        boolean_t linger;
@@ -2209,8 +2265,10 @@ mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts)
        linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
            !(mp_so->so_flags & SOF_PCBCLEARING));
 
-       mptcplog((LOG_NOTICE, "%s: cid %d [linger %s]\n", __func__,
-           mpts->mpts_connid, (linger ? "YES" : "NO")));
+       mptcplog((LOG_NOTICE, "MPTCP Events: "
+           "%s: cid %d [linger %s]\n", __func__,
+           mpts->mpts_connid, (linger ? "YES" : "NO")),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        if (mpts->mpts_soerror == 0)
                mpts->mpts_soerror = ETIMEDOUT;
@@ -2242,8 +2300,10 @@ mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
        boolean_t linger;
@@ -2269,8 +2329,10 @@ mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts)
        linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
            !(mp_so->so_flags & SOF_PCBCLEARING));
 
-       mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
-           mpts->mpts_connid, (linger ? "YES" : "NO")));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s cid %d [linger %s]\n", __func__,
+           mpts->mpts_connid, (linger ? "YES" : "NO")),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        if (mpts->mpts_soerror == 0)
                mpts->mpts_soerror = EADDRNOTAVAIL;
@@ -2307,7 +2369,8 @@ mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts)
  * indicates that the remote side sent a Data FIN
  */
 static ev_ret_t
-mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        struct socket *so, *mp_so;
        struct mptcb *mp_tp;
@@ -2318,7 +2381,9 @@ mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
        so = mpts->mpts_socket;
        mp_tp = mpte->mpte_mptcb;
 
-       mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d\n", __func__, mpts->mpts_connid),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
        
        /*
        * We got a Data FIN for the MPTCP connection.  
@@ -2328,11 +2393,7 @@ mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
        */
        MPT_LOCK(mp_tp);
        if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT) {
-               MPT_UNLOCK(mp_tp);
-               MPTS_UNLOCK(mpts);
-               soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
-               MPTS_LOCK(mpts);
-               MPT_LOCK(mp_tp);
+               *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE;
        }
        MPT_UNLOCK(mp_tp);
        return (MPTS_EVRET_OK); /* keep the subflow socket around */
@@ -2342,7 +2403,8 @@ mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
  */
 static ev_ret_t
-mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        struct mptsub *mpts_alt = NULL;
        struct socket *so = NULL;
@@ -2352,18 +2414,23 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts)
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        MPTS_LOCK_ASSERT_HELD(mpts);
        mp_so = mpte->mpte_mppcb->mpp_socket;
-       mptcplog2((LOG_NOTICE, "%s: mp_so 0x%llx\n", __func__,
-           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
+       mptcplog((LOG_NOTICE, "MPTCP Events: "
+           "%s: mp_so 0x%llx\n", __func__,
+           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        MPTS_UNLOCK(mpts);
-       mpts_alt = mptcp_get_subflow(mpte, mpts);
+       mpts_alt = mptcp_get_subflow(mpte, mpts, NULL);
 
        /*
         * If there is no alternate eligible subflow, ignore the
         * failover hint.
         */
        if (mpts_alt == NULL) {
-               mptcplog2((LOG_WARNING, "%s: no alternate path\n", __func__));
+               mptcplog((LOG_WARNING, "MPTCP Events: "
+                   "%s: no alternate path\n", __func__),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR);
+
                if (mptcp_delayed_subf_start) {
                        mpts_alt = mptcp_get_pending_subflow(mpte, mpts);
                        if (mpts_alt != NULL) {
@@ -2393,9 +2460,12 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts)
                socket_unlock(so, 1);
        }
        if (altpath_exists) {
-               mptcplog2((LOG_INFO, "%s: cid = %d\n",
-                   __func__, mpts_alt->mpts_connid));
+               mptcplog((LOG_INFO, "MPTCP Events: "
+                   "%s: cid = %d\n",
+                   __func__, mpts_alt->mpts_connid),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
                mpts_alt->mpts_flags |= MPTSF_ACTIVE;
+               mpts_alt->mpts_peerswitch = 0;
                struct mptcb *mp_tp = mpte->mpte_mptcb;
                /* Bring the subflow's notion of snd_nxt into the send window */
                MPT_LOCK(mp_tp);
@@ -2409,12 +2479,13 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts)
        MPTS_UNLOCK(mpts_alt);
 
        if (altpath_exists) {
-               soevent(mp_so,
-                   SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
-               mptcplog((LOG_NOTICE, "%s: mp_so 0x%llx switched from "
+               *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED;
+               mptcplog((LOG_NOTICE, "MPTCP Events: "
+                   "%s: mp_so 0x%llx switched from "
                    "%d to %d\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
-                   mpts->mpts_connid, mpts_alt->mpts_connid));
+                   mpts->mpts_connid, mpts_alt->mpts_connid),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
                tcpstat.tcps_mp_switches++;
        }
 
@@ -2423,8 +2494,9 @@ mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts)
                mpts->mpts_flags |= MPTSF_FAILINGOVER;
                mpts->mpts_flags &= ~MPTSF_ACTIVE;
        } else {
-               mptcplog2((LOG_INFO, "%s: no alt cid = %d\n",
-                   __func__, mpts->mpts_connid));
+               mptcplog((LOG_DEBUG, "MPTCP Events %s: no alt cid = %d\n",
+                   __func__, mpts->mpts_connid),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 done:
                so = mpts->mpts_socket;
                socket_lock(so, 1);
@@ -2439,7 +2511,8 @@ done:
  * Handle SO_FILT_HINT_IFDENIED subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
@@ -2455,8 +2528,10 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts)
        linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
            !(mp_so->so_flags & SOF_PCBCLEARING));
 
-       mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
-           mpts->mpts_connid, (linger ? "YES" : "NO")));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d [linger %s]\n", __func__,
+           mpts->mpts_connid, (linger ? "YES" : "NO")),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        if (mpts->mpts_soerror == 0)
                mpts->mpts_soerror = EHOSTUNREACH;
@@ -2469,9 +2544,7 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts)
         * has been established, let the upper layer call disconnectx.
         */
        mptcp_subflow_disconnect(mpte, mpts, !linger);
-       MPTS_UNLOCK(mpts);
-
-       soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED);
+       *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED;
 
        MPT_LOCK(mp_tp);
        if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
@@ -2479,7 +2552,6 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts)
        }
        MPT_UNLOCK(mp_tp);
 
-       MPTS_LOCK(mpts);
        /*
         * Keep the subflow socket around, unless the MPTCP socket has
         * been detached or the subflow has been disconnected explicitly,
@@ -2492,8 +2564,10 @@ mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_SUSPEND subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        struct socket *so;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
@@ -2504,8 +2578,9 @@ mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts)
        /* the subflow connection is being flow controlled */
        mpts->mpts_flags |= MPTSF_SUSPENDED;
 
-       mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__,
-           mpts->mpts_connid));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d\n", __func__,
+           mpts->mpts_connid), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        return (MPTS_EVRET_OK); /* keep the subflow socket around */
 }
@@ -2514,8 +2589,10 @@ mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_RESUME subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        struct socket *so;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
@@ -2526,7 +2603,9 @@ mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts)
        /* the subflow connection is no longer flow controlled */
        mpts->mpts_flags &= ~MPTSF_SUSPENDED;
 
-       mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d\n", __func__, mpts->mpts_connid),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        return (MPTS_EVRET_OK); /* keep the subflow socket around */
 }
@@ -2535,7 +2614,8 @@ mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_CONNECTED subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        char buf0[MAX_IPv6_STR_LEN], buf1[MAX_IPv6_STR_LEN];
        struct sockaddr_entry *src_se, *dst_se;
@@ -2545,6 +2625,9 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
        struct ifnet *outifp;
        int af, error = 0;
        boolean_t mpok = FALSE;
+       boolean_t cell = FALSE;
+       boolean_t wifi = FALSE;
+       boolean_t wired = FALSE;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        VERIFY(mpte->mpte_mppcb != NULL);
@@ -2563,8 +2646,10 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
                socket_lock(so, 0);
                if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
                    (so->so_state & SS_ISCONNECTED)) {
-                   mptcplog((LOG_DEBUG, "%s: cid %d disconnect before tcp connect\n",
-                       __func__, mpts->mpts_connid));
+                   mptcplog((LOG_DEBUG, "MPTCP Events: "
+                       "%s: cid %d disconnect before tcp connect\n",
+                       __func__, mpts->mpts_connid),
+                       MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
                        (void) soshutdownlock(so, SHUT_RD);
                        (void) soshutdownlock(so, SHUT_WR);
                        (void) sodisconnectlocked(so);
@@ -2635,13 +2720,15 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
                        if ((mpts->mpts_flags & MPTSF_BOUND_IP) &&
                            bcmp(&ms->sin_addr, &s->sin_addr,
                            sizeof (ms->sin_addr)) != 0) {
-                               mptcplog((LOG_ERR, "%s: cid %d local "
+                               mptcplog((LOG_ERR, "MPTCP Events: "
+                                   "%s: cid %d local "
                                    "address %s (expected %s)\n", __func__,
                                    mpts->mpts_connid, inet_ntop(AF_INET,
                                    (void *)&s->sin_addr.s_addr, buf0,
                                    sizeof (buf0)), inet_ntop(AF_INET,
                                    (void *)&ms->sin_addr.s_addr, buf1,
-                                   sizeof (buf1))));
+                                   sizeof (buf1))),
+                                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR);
                        }
                        bcopy(s, ms, sizeof (*s));
                }
@@ -2660,13 +2747,15 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
                        if ((mpts->mpts_flags & MPTSF_BOUND_IP) &&
                            bcmp(&ms->sin6_addr, &s->sin6_addr,
                            sizeof (ms->sin6_addr)) != 0) {
-                               mptcplog((LOG_ERR, "%s: cid %d local "
+                               mptcplog((LOG_ERR, "MPTCP Events: "
+                                   "%s: cid %d local "
                                    "address %s (expected %s)\n", __func__,
                                    mpts->mpts_connid, inet_ntop(AF_INET6,
                                    (void *)&s->sin6_addr, buf0,
                                    sizeof (buf0)), inet_ntop(AF_INET6,
                                    (void *)&ms->sin6_addr, buf1,
-                                   sizeof (buf1))));
+                                   sizeof (buf1))),
+                                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR);
                        }
                        bcopy(s, ms, sizeof (*s));
                }
@@ -2679,8 +2768,10 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
        }
 
        if (error != 0) {
-               mptcplog((LOG_ERR, "%s: cid %d getsockaddr failed (%d)\n",
-                   __func__, mpts->mpts_connid, error));
+               mptcplog((LOG_ERR, "MPTCP Events "
+                   "%s: cid %d getsockaddr failed (%d)\n",
+                   __func__, mpts->mpts_connid, error),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR);
        }
 
        /* get/verify the outbound interface */
@@ -2688,10 +2779,12 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
        if (mpts->mpts_flags & MPTSF_BOUND_IF) {
                VERIFY(mpts->mpts_outif != NULL);
                if (mpts->mpts_outif != outifp) {
-                       mptcplog((LOG_ERR, "%s: cid %d outif %s "
+                       mptcplog((LOG_ERR, "MPTCP Events: %s: cid %d outif %s "
                            "(expected %s)\n", __func__, mpts->mpts_connid,
                            ((outifp != NULL) ? outifp->if_xname : "NULL"),
-                           mpts->mpts_outif->if_xname));
+                           mpts->mpts_outif->if_xname),
+                           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_ERR);
+
                        if (outifp == NULL)
                                outifp = mpts->mpts_outif;
                }
@@ -2699,9 +2792,31 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
                mpts->mpts_outif = outifp;
        }
 
+       mpts->mpts_srtt = (intotcpcb(sotoinpcb(so)))->t_srtt;
+       mpts->mpts_rxtcur = (intotcpcb(sotoinpcb(so)))->t_rxtcur;
+       mpts->mpts_maxseg = (intotcpcb(sotoinpcb(so)))->t_maxseg;
+
+       cell = IFNET_IS_CELLULAR(mpts->mpts_outif);
+       wifi = (!cell && IFNET_IS_WIFI(mpts->mpts_outif));
+       wired = (!wifi && IFNET_IS_WIRED(mpts->mpts_outif));
+
+       if (cell)
+               mpts->mpts_linktype |= MPTSL_CELL;
+       else if (wifi)
+               mpts->mpts_linktype |= MPTSL_WIFI;
+       else if (wired)
+               mpts->mpts_linktype |= MPTSL_WIRED;
+
        socket_unlock(so, 0);
 
-       mptcplog((LOG_DEBUG, "%s: cid %d outif %s %s[%d] -> %s[%d] "
+       mptcplog((LOG_DEBUG, "MPTCP Sender: %s: cid %d "
+           "establishment srtt %d \n", __func__,
+           mpts->mpts_connid, (mpts->mpts_srtt >> 5)),
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+
+
+       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+           "%s: cid %d outif %s %s[%d] -> %s[%d] "
            "is %s\n", __func__, mpts->mpts_connid, ((outifp != NULL) ?
            outifp->if_xname : "NULL"), inet_ntop(af, (af == AF_INET) ?
            (void *)&SIN(src_se->se_addr)->sin_addr.s_addr :
@@ -2714,12 +2829,13 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
            ((af == AF_INET) ? ntohs(SIN(dst_se->se_addr)->sin_port) :
            ntohs(SIN6(dst_se->se_addr)->sin6_port)),
            ((mpts->mpts_flags & MPTSF_MP_CAPABLE) ?
-           "MPTCP capable" : "a regular TCP")));
+           "MPTCP capable" : "a regular TCP")),
+           (MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG), MPTCP_LOGLVL_LOG);
 
        mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE);
        MPTS_UNLOCK(mpts);
 
-       soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
+       *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED;
 
        MPT_LOCK(mp_tp);
        if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
@@ -2735,6 +2851,10 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
                                MPT_UNLOCK(mp_tp);
                                mpok = FALSE;
                        } else {
+                               mptcplog((LOG_DEBUG, "MPTCP State: "
+                                   "MPTCPS_ESTABLISHED for mp_so 0x%llx \n",
+                                   (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
+                                   MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG);
                                mp_tp->mpt_state = MPTCPS_ESTABLISHED;
                                mpte->mpte_associd = mpts->mpts_connid;
                                DTRACE_MPTCP2(state__change, 
@@ -2800,7 +2920,8 @@ mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
  */
 static ev_ret_t
-mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
@@ -2816,8 +2937,10 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts)
        linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
            !(mp_so->so_flags & SOF_PCBCLEARING));
 
-       mptcplog2((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
-           mpts->mpts_connid, (linger ? "YES" : "NO")));
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: cid %d [linger %s]\n", __func__,
+           mpts->mpts_connid, (linger ? "YES" : "NO")),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        if (mpts->mpts_flags & MPTSF_DISCONNECTED)
                return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
@@ -2838,16 +2961,15 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts)
         * Right now, we simply unblock any waiters at the MPTCP socket layer
         * if the MPTCP connection has not been established.
         */
-       MPTS_UNLOCK(mpts);
-
-       soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
+       *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED;
 
        if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) {
                mpte->mpte_nummpcapflows--;
                if (mpte->mpte_active_sub == mpts) {
                        mpte->mpte_active_sub = NULL;
-                       mptcplog((LOG_DEBUG, "%s: resetting active subflow \n",
-                           __func__));
+                       mptcplog((LOG_DEBUG, "MPTCP Events: "
+                           "%s: resetting active subflow \n",
+                           __func__), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
                }
                mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET;
        }
@@ -2855,12 +2977,13 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts)
        MPT_LOCK(mp_tp);
        if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
                MPT_UNLOCK(mp_tp);
+               MPTS_UNLOCK(mpts);
                soisdisconnected(mp_so);
+               MPTS_LOCK(mpts);
        } else {
                MPT_UNLOCK(mp_tp);
        }
 
-       MPTS_LOCK(mpts);
        /*
         * The underlying subflow socket has been disconnected;
         * it is no longer useful to us.  Keep the subflow socket
@@ -2875,11 +2998,12 @@ mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts)
  * Handle SO_FILT_HINT_MPSTATUS subflow socket event
  */
 static ev_ret_t
-mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts,
+               uint64_t *p_mpsofilt_hint)
 {
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
-       ev_ret_t ret = MPTS_EVRET_OK_UPDATE;
+       ev_ret_t ret = MPTS_EVRET_OK;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        VERIFY(mpte->mpte_mppcb != NULL);
@@ -2918,16 +3042,24 @@ mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts)
        if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
                VERIFY(!(mp_tp->mpt_flags & MPTCPF_JOIN_READY));
                ret = MPTS_EVRET_DISCONNECT_FALLBACK;
+               *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED |
+                       SO_FILT_HINT_CONNINFO_UPDATED;
        } else if (mpts->mpts_flags & MPTSF_MP_READY) {
                mp_tp->mpt_flags |= MPTCPF_JOIN_READY;
                ret = MPTS_EVRET_CONNECT_PENDING;
+       } else {
+               *p_mpsofilt_hint |= SO_FILT_HINT_LOCKED |
+                       SO_FILT_HINT_CONNINFO_UPDATED;
        }
 
-       mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx mpt_flags=%b cid %d "
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s: mp_so 0x%llx mpt_flags=%b cid %d "
            "mptsf=%b\n", __func__,
            (u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket),
            mp_tp->mpt_flags, MPTCPF_BITS, mpts->mpts_connid,
-           mpts->mpts_flags, MPTSF_BITS));
+           mpts->mpts_flags, MPTSF_BITS),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
+
 done:
        MPT_UNLOCK(mp_tp);
        socket_unlock(so, 0);
@@ -2938,7 +3070,8 @@ done:
  * Handle SO_FILT_HINT_MUSTRST subflow socket event
  */
 static ev_ret_t
-mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
        struct socket *mp_so, *so;
        struct mptcb *mp_tp;
@@ -2982,25 +3115,35 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts)
                    &t_template->tt_t, (struct mbuf *)NULL,
                    tp->rcv_nxt, tp->snd_una, TH_RST, &tra);
                (void) m_free(dtom(t_template));
-               mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx cid %d \n",
+               mptcplog((LOG_DEBUG, "MPTCP Events: "
+                   "%s: mp_so 0x%llx cid %d \n",
                    __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
-                   so, mpts->mpts_connid));
+                   so, mpts->mpts_connid),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
        }
        socket_unlock(so, 0);
        mptcp_subflow_disconnect(mpte, mpts, !linger);
-       MPTS_UNLOCK(mpts);
 
-       soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED |
-           SO_FILT_HINT_CONNRESET);
+       *p_mpsofilt_hint |=  (SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
+
+       if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP))
+               *p_mpsofilt_hint |= SO_FILT_HINT_CONNRESET;
 
        MPT_LOCK(mp_tp);
        if ((mp_tp->mpt_state < MPTCPS_ESTABLISHED) ||
            (mp_tp->mpt_state == MPTCPS_FASTCLOSE_WAIT)) {
                mp_so->so_error = ECONNABORTED;
        }
+       /*
+        * Ideally there should be a state transition for when a FASTCLOSE
+        * is received. Right now we keep the connection in MPTCPS_ESTABLISHED
+        * state and only go to terminal state when the user level code calls
+        * close after processing the SO_FILT_HINT_CONNRESET event.
+        */
+       if (mp_tp->mpt_gc_ticks == MPT_GC_TICKS)
+               mp_tp->mpt_gc_ticks = MPT_GC_TICKS_FAST;
        MPT_UNLOCK(mp_tp);
 
-       MPTS_LOCK(mpts);
        /*
         * Keep the subflow socket around unless the subflow has been
         * disconnected explicitly.
@@ -3009,16 +3152,20 @@ mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts)
 }
 
 static ev_ret_t
-mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        MPTS_LOCK_ASSERT_HELD(mpts);
        VERIFY(mpte->mpte_mppcb != NULL);
        
        if (mpte->mpte_nummpcapflows == 0) {
                struct mptcb *mp_tp = mpte->mpte_mptcb;
-               mptcplog((LOG_DEBUG,"%s %llx %llx \n",
-                   __func__, mp_tp->mpt_snduna, mpts->mpts_sndnxt));
+               mptcplog((LOG_DEBUG,"MPTCP Events: %s: %llx %llx \n",
+                   __func__, mp_tp->mpt_snduna, mpts->mpts_sndnxt),
+                   MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
+
                mpte->mpte_active_sub = mpts;
                mpts->mpts_flags |= (MPTSF_FASTJ_SEND | MPTSF_ACTIVE);
                MPT_LOCK(mp_tp);
@@ -3038,12 +3185,17 @@ mptcp_fastjoin_ev(struct mptses *mpte, struct mptsub *mpts)
 }
 
 static ev_ret_t
-mptcp_deleteok_ev(struct mptses *mpte, struct mptsub *mpts)
+mptcp_deleteok_ev(struct mptses *mpte, struct mptsub *mpts,
+       uint64_t *p_mpsofilt_hint)
 {
+#pragma unused(p_mpsofilt_hint)
        MPTE_LOCK_ASSERT_HELD(mpte);
        MPTS_LOCK_ASSERT_HELD(mpts);
        VERIFY(mpte->mpte_mppcb != NULL);
-       mptcplog((LOG_DEBUG, "%s cid %d\n", __func__, mpts->mpts_connid));
+
+       mptcplog((LOG_DEBUG, "MPTCP Events: "
+           "%s cid %d\n", __func__, mpts->mpts_connid),
+           MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
 
        mpts->mpts_flags |= MPTSF_DELETEOK;
        if (mpts->mpts_flags & MPTSF_DISCONNECTED)
@@ -3070,8 +3222,7 @@ mptcp_evret2str(ev_ret_t ret)
        case MPTS_EVRET_OK:
                c = "MPTS_EVRET_OK";
                break;
-       case MPTS_EVRET_OK_UPDATE:
-               c = "MPTS_EVRET_OK_UPDATE";
+       default:
                break;
        }
        return (c);
@@ -3145,17 +3296,21 @@ mptcp_subflow_sosetopt(struct mptses *mpte, struct socket *so,
 
        error = sosetoptlock(so, &sopt, 0);     /* already locked */
        if (error == 0) {
-               mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s "
+               mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s "
                    "val %d set successful\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
-                   buf, sizeof (buf)), mpo->mpo_intval));
+                   buf, sizeof (buf)), mpo->mpo_intval),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
        } else {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s "
+               mptcplog((LOG_ERR, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s "
                    "val %d set error %d\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
-                   buf, sizeof (buf)), mpo->mpo_intval, error));
+                   buf, sizeof (buf)), mpo->mpo_intval, error),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
        }
        return (error);
 }
@@ -3188,16 +3343,20 @@ mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so,
 
        error = sogetoptlock(so, &sopt, 0);     /* already locked */
        if (error == 0) {
-               mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s "
+               mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s "
                    "val %d get successful\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
-                   buf, sizeof (buf)), mpo->mpo_intval));
+                   buf, sizeof (buf)), mpo->mpo_intval),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
        } else {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s get error %d\n",
+               mptcplog((LOG_ERR, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s get error %d\n",
                    __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mptcp_sopt2str(mpo->mpo_level,
-                   mpo->mpo_name, buf, sizeof (buf)), error));
+                   mpo->mpo_name, buf, sizeof (buf)), error),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
        }
        return (error);
 }
@@ -3218,8 +3377,6 @@ mptcp_gc(struct mppcbinfo *mppi)
 
        lck_mtx_assert(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
 
-       mptcplog3((LOG_DEBUG, "%s: running\n", __func__));
-
        TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) {
                struct socket *mp_so;
                struct mptses *mpte;
@@ -3233,16 +3390,20 @@ mptcp_gc(struct mppcbinfo *mppi)
                mp_tp = mpte->mpte_mptcb;
                VERIFY(mp_tp != NULL);
 
-               mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx found "
+               mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx found "
                    "(u=%d,r=%d,s=%d)\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount,
-                   mp_so->so_retaincnt, mpp->mpp_state));
+                   mp_so->so_retaincnt, mpp->mpp_state),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
 
                if (!lck_mtx_try_lock(&mpp->mpp_lock)) {
-                       mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped "
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                           "%s: mp_so 0x%llx skipped "
                            "(u=%d,r=%d)\n", __func__,
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
-                           mp_so->so_usecount, mp_so->so_retaincnt));
+                           mp_so->so_usecount, mp_so->so_retaincnt),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
                        active++;
                        continue;
                }
@@ -3252,12 +3413,15 @@ mptcp_gc(struct mppcbinfo *mppi)
                        boolean_t wakeup = FALSE;
                        struct mptsub *mpts, *tmpts;
 
-                       mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped "
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                           "%s: mp_so 0x%llx skipped "
                            "[u=%d,r=%d] %d %d\n", __func__,
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                            mp_so->so_usecount, mp_so->so_retaincnt,
                            mp_tp->mpt_gc_ticks,
-                           mp_tp->mpt_state));
+                           mp_tp->mpt_state),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+
                        MPT_LOCK(mp_tp);
                        if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
                                if (mp_tp->mpt_gc_ticks > 0)
@@ -3290,11 +3454,13 @@ mptcp_gc(struct mppcbinfo *mppi)
                }
 
                if (mpp->mpp_state != MPPCB_STATE_DEAD) {
-                       mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped "
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                           "%s: mp_so 0x%llx skipped "
                            "[u=%d,r=%d,s=%d]\n", __func__,
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                            mp_so->so_usecount, mp_so->so_retaincnt,
-                           mpp->mpp_state));
+                           mpp->mpp_state),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
                        lck_mtx_unlock(&mpp->mpp_lock);
                        active++;
                        continue;
@@ -3307,10 +3473,13 @@ mptcp_gc(struct mppcbinfo *mppi)
                 * allow it to be destroyed below during the next round.
                 */
                if (mp_so->so_usecount == 1) {
-                       mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx scheduled for "
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                           "%s: mp_so 0x%llx scheduled for "
                            "termination [u=%d,r=%d]\n", __func__,
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
-                           mp_so->so_usecount, mp_so->so_retaincnt));
+                           mp_so->so_usecount, mp_so->so_retaincnt),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+
                        /* signal MPTCP thread to terminate */
                        mptcp_thread_terminate_signal(mpte);
                        lck_mtx_unlock(&mpp->mpp_lock);
@@ -3318,9 +3487,12 @@ mptcp_gc(struct mppcbinfo *mppi)
                        continue;
                }
 
-               mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
+               mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
                    __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
-                   mp_so->so_usecount, mp_so->so_retaincnt));
+                   mp_so->so_usecount, mp_so->so_retaincnt),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+
                DTRACE_MPTCP4(dispose, struct socket *, mp_so, 
                    struct sockbuf *, &mp_so->so_rcv,
                    struct sockbuf *, &mp_so->so_snd,
@@ -3362,8 +3534,8 @@ mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno)
 struct mptses *
 mptcp_close(struct mptses *mpte, struct mptcb *mp_tp)
 {
-       struct socket *mp_so;
-       struct mptsub *mpts, *tmpts;
+       struct socket *mp_so = NULL;
+       struct mptsub *mpts = NULL, *tmpts = NULL;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
        MPT_LOCK_ASSERT_HELD(mp_tp);
@@ -3451,7 +3623,7 @@ mptcp_thread_dowork(struct mptses *mpte)
        struct socket *mp_so;
        struct mptsub *mpts, *tmpts;
        boolean_t connect_pending = FALSE, disconnect_fallback = FALSE;
-       boolean_t conninfo_update = FALSE;
+       uint64_t mpsofilt_hint_mask = 0;
 
        MPTE_LOCK(mpte);                /* same as MP socket lock */
        VERIFY(mpte->mpte_mppcb != NULL);
@@ -3468,11 +3640,16 @@ mptcp_thread_dowork(struct mptses *mpte)
                mptcp_update_last_owner(mpts, mp_so);
                
                mptcp_subflow_input(mpte, mpts);
-               ret = mptcp_subflow_events(mpte, mpts);
+
+               mptcp_get_rtt_measurement(mpts, mpte);
+
+               ret = mptcp_subflow_events(mpte, mpts, &mpsofilt_hint_mask);
 
                if (mpts->mpts_flags & MPTSF_ACTIVE) {
-                       mptcplog3((LOG_INFO, "%s: cid %d \n", __func__,
-                           mpts->mpts_connid));
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                           "%s: cid %d \n", __func__,
+                           mpts->mpts_connid),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
                        (void) mptcp_subflow_output(mpte, mpts);
                }
 
@@ -3487,9 +3664,6 @@ mptcp_thread_dowork(struct mptses *mpte)
                MPTS_UNLOCK(mpts);
 
                switch (ret) {
-               case MPTS_EVRET_OK_UPDATE:
-                       conninfo_update = TRUE;
-                       break;
                case MPTS_EVRET_OK:
                        /* nothing to do */
                        break;
@@ -3502,13 +3676,19 @@ mptcp_thread_dowork(struct mptses *mpte)
                case MPTS_EVRET_DISCONNECT_FALLBACK:
                        disconnect_fallback = TRUE;
                        break;
+               default:
+                       mptcplog((LOG_DEBUG,
+                           "MPTCP Socket: %s: mptcp_subflow_events "
+                           "returned invalid value: %d\n",  __func__,
+                           ret),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+                       break;
                }
                MPTS_REMREF(mpts);              /* ours */
        }
 
-       if (conninfo_update) {
-               soevent(mp_so, SO_FILT_HINT_LOCKED |
-                   SO_FILT_HINT_CONNINFO_UPDATED);
+       if (mpsofilt_hint_mask) {
+               soevent(mp_so, mpsofilt_hint_mask);
        }
 
        if (!connect_pending && !disconnect_fallback) {
@@ -3531,7 +3711,7 @@ mptcp_thread_dowork(struct mptses *mpte)
                        mpts->mpts_flags |= MPTSF_MP_DEGRADED;
 
                        if (mpts->mpts_flags & (MPTSF_DISCONNECTING|
-                           MPTSF_DISCONNECTED)) {
+                           MPTSF_DISCONNECTED|MPTSF_CONNECT_PENDING)) {
                                MPTS_UNLOCK(mpts);
                                continue;
                        }
@@ -3843,8 +4023,6 @@ mptcp_free_key(mptcp_key_t *key)
        struct mptcp_key_entry *key_elm;
        int pt = RandomULong();
 
-       mptcplog((LOG_INFO, "%s\n", __func__));
-
        lck_mtx_lock(&mptcp_keys_pool.mkph_lock);
        key_holder = (struct mptcp_key_entry *)(void*)((caddr_t)key -
            offsetof(struct mptcp_key_entry, mkey_value));
@@ -3974,10 +4152,12 @@ mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so)
        }
        if (found) {
                LIST_REMOVE(sauth_entry, msae_next);
-               zfree(mpt_subauth_zone, sauth_entry);
        }
        MPT_UNLOCK(mp_tp);
 
+       if (found)
+               zfree(mpt_subauth_zone, sauth_entry);
+
        tp->t_mptcb = NULL;
        socket_unlock(so, 0);
 }
@@ -4014,18 +4194,21 @@ mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp,
                if (sauth_entry->msae_laddr_id == laddr_id) {
                        if ((sauth_entry->msae_raddr_id != 0) &&
                            (sauth_entry->msae_raddr_id != raddr_id)) {
-                               mptcplog((LOG_ERR, "MPTCP ERROR %s: mismatched"
+                               mptcplog((LOG_ERR, "MPTCP Socket: %s mismatched"
                                    " address ids %d %d \n", __func__, raddr_id,
-                                   sauth_entry->msae_raddr_id));
+                                   sauth_entry->msae_raddr_id),
+                                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                                MPT_UNLOCK(mp_tp);
                                return;
                        }
                        sauth_entry->msae_raddr_id = raddr_id;
                        if ((sauth_entry->msae_raddr_rand != 0) &&
                            (sauth_entry->msae_raddr_rand != raddr_rand)) {
-                               mptcplog((LOG_ERR, "%s: dup SYN_ACK %d %d \n",
+                               mptcplog((LOG_ERR, "MPTCP Socket: "
+                                   "%s: dup SYN_ACK %d %d \n",
                                    __func__, raddr_rand,
-                                   sauth_entry->msae_raddr_rand));
+                                   sauth_entry->msae_raddr_rand),
+                                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                                MPT_UNLOCK(mp_tp);
                                return;
                        }
@@ -4185,7 +4368,7 @@ mptcp_init_authparms(struct mptcb *mp_tp)
        MPT_LOCK_ASSERT_HELD(mp_tp);
 
        /* Only Version 0 is supported for auth purposes */
-       if (mp_tp->mpt_version != MP_DRAFT_VERSION_12)
+       if (mp_tp->mpt_version != MPTCP_STD_VERSION_0)
                return (-1);
 
        /* Setup local and remote tokens and Initial DSNs */
@@ -4197,8 +4380,8 @@ mptcp_init_authparms(struct mptcb *mp_tp)
 
        if (!mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest,
            SHA1_RESULTLEN)) {
-               mptcplog((LOG_ERR, "MPTCP ERROR %s: unexpected failure",
-                   __func__));
+               mptcplog((LOG_ERR, "MPTCP Socket: %s: unexpected failure",
+                   __func__), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                return (-1);
        }
        mptcp_generate_token(remote_digest, SHA1_RESULTLEN,
@@ -4224,7 +4407,7 @@ static void
 mptcp_conn_properties(struct mptcb *mp_tp)
 {
        /* There is only Version 0 at this time */
-       mp_tp->mpt_version = MP_DRAFT_VERSION_12;
+       mp_tp->mpt_version = MPTCP_STD_VERSION_0;
 
        /* Set DSS checksum flag */
        if (mptcp_dss_csum)
@@ -4300,7 +4483,7 @@ mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m)
        if (m == NULL)
                return;
 
-       mp_tp = &((struct mpp_mtp *)mpp)->mtcb;
+       __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
        MPT_LOCK(mp_tp);
        if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
                MPT_UNLOCK(mp_tp);
@@ -4344,10 +4527,12 @@ mptcp_preproc_sbdrop(struct mbuf *m, unsigned int len)
                                if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
                                        m->m_pkthdr.mp_rseq += len;
                                }
-                               mptcplog3((LOG_INFO,
-                                   "%s: %llu %u %d %d\n", __func__,
+                               mptcplog((LOG_DEBUG, "MPTCP Sender: "
+                                   "%s: dsn 0x%llu ssn %u len %d %d\n",
+                                   __func__,
                                    m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rseq,
-                                   m->m_pkthdr.mp_rlen, len));
+                                   m->m_pkthdr.mp_rlen, len),
+                                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                                m->m_pkthdr.mp_rlen -= len;
                                return;
                        }
@@ -4445,21 +4630,24 @@ mptcp_output_getm_dsnmap64(struct socket *so, int off, uint32_t datalen,
                        /* case A */
                        runlen += mnext->m_pkthdr.mp_rlen;
                        contig_len += mnext->m_pkthdr.mp_rlen;
-                       mptcplog3((LOG_INFO, "%s: contig \n",
-                           __func__));
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: %s: contig \n",
+                           __func__), MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                } else {
                        /* case B */
-                       mptcplog((LOG_INFO, 
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: "
                            "%s: discontig datalen %d contig_len %d cc %d \n",
-                           __func__, datalen, contig_len, so->so_snd.sb_cc));
+                           __func__, datalen, contig_len, so->so_snd.sb_cc),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                        break;
                }
                mnext = mnext->m_next;
        }
        datalen = min(datalen, UINT16_MAX);
        *data_len = min(datalen, contig_len);
-       mptcplog3((LOG_INFO, "%s: %llu %u %d %d \n", __func__,
-           *dsn, *relseq, *data_len, off));
+       mptcplog((LOG_DEBUG, "MPTCP Sender: "
+           "%s: %llu %u %d %d \n", __func__,
+           *dsn, *relseq, *data_len, off),
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
 }
 
 /*
@@ -4485,13 +4673,13 @@ mptcp_adj_rcvnxt(struct tcpcb *tp, struct mbuf *m)
 }
 
 /*
- * Note that this is called only from tcp_input() which may trim data
- * after the dsn mapping is inserted into the mbuf. When it trims data
- * tcp_input calls m_adj() which does not remove the m_pkthdr even if the
- * m_len becomes 0 as a result of trimming the mbuf. The dsn map insertion
- * cannot be delayed after trim, because data can be in the reassembly
- * queue for a while and the DSN option info in tp will be overwritten for
- * every new packet received.
+ * Note that this is called only from tcp_input() via mptcp_input_preproc()
+ * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
+ * When it trims data tcp_input calls m_adj() which does not remove the
+ * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
+ * The dsn map insertion cannot be delayed after trim, because data can be in
+ * the reassembly queue for a while and the DSN option info in tp will be
+ * overwritten for every new packet received.
  * The dsn map will be adjusted just prior to appending to subflow sockbuf
  * with mptcp_adj_rmap()
  */
@@ -4542,11 +4730,6 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m)
                return 0;
        }
 
-       if (m->m_pkthdr.len > (int)datalen) {
-               panic("%s: mbuf len = %d expected = %d", __func__,
-                   m->m_pkthdr.len, datalen);
-       }
-
        old_rcvnxt = tp->rcv_nxt - m->m_pkthdr.len;
        if (SEQ_GT(old_rcvnxt, sseq)) {
                /* data trimmed from the left */
@@ -4556,15 +4739,12 @@ mptcp_adj_rmap(struct socket *so, struct mbuf *m)
                m->m_pkthdr.mp_rlen = m->m_pkthdr.len;
        } else if (old_rcvnxt == sseq) {
                /*
-                * Data was trimmed from the right
+                * data was trimmed from the right
                 */
                m->m_pkthdr.mp_rlen = m->m_pkthdr.len;
        } else {
-               /* handle gracefully with reass or fallback */
                mptcp_notify_mpfail(so);
-               m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP;
-               m_freem(m);
-               return -1;
+               return (-1);
        }
        mptcp_adj_rcvnxt(tp, m);
        return 0;
@@ -4587,13 +4767,6 @@ mptcp_act_on_txfail(struct socket *so)
        if (tp == NULL)
                return;
 
-       if (tp->t_state != TCPS_ESTABLISHED)
-               mptcplog((LOG_INFO, "%s: state = %d \n", __func__,
-                   tp->t_state));
-       
-       mptcplog((LOG_INFO, "%s: Failover = %d \n", __func__,
-           (so->so_flags & SOF_MP_TRYFAILOVER) ? 1 : 0));
-
        if (so->so_flags & SOF_MP_TRYFAILOVER) {
                return;
        }
@@ -4625,8 +4798,9 @@ mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq)
                    (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) {
                        off = dsn_fail - dsn;
                        *tcp_seq = m->m_pkthdr.mp_rseq + off;
-                       mptcplog((LOG_INFO, "%s: %llu %llu \n",
-                           __func__, dsn, dsn_fail));
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: %s: %llu %llu \n",
+                           __func__, dsn, dsn_fail),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
                        return (0);
                }
 
@@ -4638,7 +4812,9 @@ mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq)
         * not much else to do.
         */
 
-       mptcplog((LOG_ERR, "%s: %llu not found \n", __func__, dsn_fail));
+       mptcplog((LOG_ERR, "MPTCP Sender: "
+           "%s: %llu not found \n", __func__, dsn_fail),
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
        return (-1);
 }
 
@@ -4858,7 +5034,7 @@ fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts)
                SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr;
        } else 
 #endif
-       {
+       if ((inp->inp_vflag & INP_IPV4) != 0) {
                flow->flow_src.ss_family = AF_INET;
                flow->flow_dst.ss_family = AF_INET;
                flow->flow_src.ss_len = sizeof(struct sockaddr_in);
@@ -4868,8 +5044,15 @@ fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts)
                SIN(&flow->flow_src)->sin_addr = inp->inp_laddr;
                SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr;
        }
+       flow->flow_len = sizeof(*flow);
+       flow->flow_tcpci_offset = offsetof(mptcp_flow_t, flow_ci);
        flow->flow_flags = mpts->mpts_flags;
        flow->flow_cid = mpts->mpts_connid;
+       flow->flow_sndnxt = mpts->mpts_sndnxt;
+       flow->flow_relseq = mpts->mpts_rel_seq;
+       flow->flow_soerror = mpts->mpts_soerror;
+       flow->flow_probecnt = mpts->mpts_probecnt;
+       flow->flow_peerswitch = mpts->mpts_peerswitch;
 }
 
 static int
@@ -4899,16 +5082,41 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS
        }
        TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
                flows = NULL;
-               bzero(&mptcpci, sizeof(mptcpci));
                lck_mtx_lock(&mpp->mpp_lock);
                VERIFY(mpp->mpp_flags & MPP_ATTACHED);
+               if (mpp->mpp_flags & MPP_DEFUNCT) {
+                       lck_mtx_unlock(&mpp->mpp_lock);
+                       continue;
+               }
                mpte = mptompte(mpp);
                VERIFY(mpte != NULL);
                mp_tp = mpte->mpte_mptcb;
                VERIFY(mp_tp != NULL);
-               /* N.B. we don't take the mpt_lock just for the state. */
+
+               bzero(&mptcpci, sizeof(mptcpci));
+               MPT_LOCK(mp_tp);
                mptcpci.mptcpci_state = mp_tp->mpt_state;
+               mptcpci.mptcpci_flags = mp_tp->mpt_flags;
+               mptcpci.mptcpci_ltoken = mp_tp->mpt_localtoken;
+               mptcpci.mptcpci_rtoken = mp_tp->mpt_remotetoken;
+               mptcpci.mptcpci_notsent_lowat = mp_tp->mpt_notsent_lowat;
+               mptcpci.mptcpci_snduna = mp_tp->mpt_snduna;
+               mptcpci.mptcpci_sndnxt = mp_tp->mpt_sndnxt;
+               mptcpci.mptcpci_sndmax = mp_tp->mpt_sndmax;
+               mptcpci.mptcpci_lidsn = mp_tp->mpt_local_idsn;
+               mptcpci.mptcpci_sndwnd = mp_tp->mpt_sndwnd;
+               mptcpci.mptcpci_rcvnxt = mp_tp->mpt_rcvnxt;
+               mptcpci.mptcpci_rcvatmark = mp_tp->mpt_rcvatmark;
+               mptcpci.mptcpci_ridsn = mp_tp->mpt_remote_idsn;
+               mptcpci.mptcpci_rcvwnd = mp_tp->mpt_rcvwnd;
+               MPT_UNLOCK(mp_tp);
+
                mptcpci.mptcpci_nflows = mpte->mpte_numflows;
+               mptcpci.mptcpci_mpte_flags = mpte->mpte_flags;
+               mptcpci.mptcpci_mpte_addrid = mpte->mpte_addrid_last;
+               mptcpci.mptcpci_flow_offset =
+                   offsetof(conninfo_mptcp_t, mptcpci_flows);
+
                len = sizeof(*flows) * mpte->mpte_numflows;
                if (mpte->mpte_numflows != 0) {
                        flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
@@ -4922,8 +5130,7 @@ mptcp_pcblist SYSCTL_HANDLER_ARGS
                            sizeof(mptcpci) - sizeof(mptcp_flow_t));
                } else {
                        mptcpci.mptcpci_len = sizeof(mptcpci);
-                       error = SYSCTL_OUT(req, &mptcpci,
-                           sizeof(mptcpci));
+                       error = SYSCTL_OUT(req, &mptcpci, sizeof(mptcpci));
                }   
                if (error) {
                        lck_mtx_unlock(&mpp->mpp_lock);
@@ -4993,35 +5200,6 @@ output_needed:
        MPTS_LOCK(to_mpts);
 }
 
-
-/*
- * When WiFi signal starts fading, there's more loss and RTT spikes.
- * Check if there has been a large spike by comparing against
- * a tolerable RTT spike threshold.
- */
-boolean_t 
-mptcp_no_rto_spike(struct socket *so)
-{
-       struct tcpcb *tp = intotcpcb(sotoinpcb(so));
-       int32_t spike = 0;
-
-       if (tp->t_rxtcur > mptcp_rto_spike_thresh) {
-               spike = tp->t_rxtcur - mptcp_rto_spike_thresh;
-       
-               mptcplog2((LOG_INFO, "%s: spike = %d rto = %d",
-                   "best = %d cur = %d\n", __func__, spike,
-                   tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
-                   tp->t_rttcur));
-       
-       }
-
-       if (spike > 0 ) {
-               return (FALSE);
-       } else { 
-               return (TRUE);
-       }       
-}
-
 /*
  * Set notsent lowat mark on the MPTCB
  */
@@ -5079,9 +5257,11 @@ mptcp_notsent_lowat_check(struct socket *so) {
        if ((notsent == 0) ||
            ((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <=
            mp_tp->mpt_notsent_lowat)) {
-               mptcplog3((LOG_INFO, "%s: lowat %d notsent %d actual %d \n",
-                   __func__, mp_tp->mpt_notsent_lowat, notsent,
-                   notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)));
+               mptcplog((LOG_DEBUG, "MPTCP Sender: "
+                   "lowat %d notsent %d actual %d \n",
+                   mp_tp->mpt_notsent_lowat, notsent,
+                   notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)),
+                   MPTCP_SENDER_DBG , MPTCP_LOGLVL_VERBOSE);
                MPT_UNLOCK(mp_tp);
                return (1);
        }
@@ -5106,9 +5286,10 @@ mptcp_notsent_lowat_check(struct socket *so) {
                            notsent > 0 && (notsent <= (int)tp->t_maxseg)) {
                                retval = 1;
                        }
-                       mptcplog3((LOG_INFO, "%s: lowat %d notsent %d"
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: lowat %d notsent %d"
                            " nodelay false \n",
-                           __func__, mp_tp->mpt_notsent_lowat, notsent));
+                           mp_tp->mpt_notsent_lowat, notsent),
+                           MPTCP_SENDER_DBG , MPTCP_LOGLVL_VERBOSE);
                        socket_unlock(subf_so, 0);
                        MPTS_UNLOCK(mpts);
                        return (retval);
@@ -5118,3 +5299,220 @@ mptcp_notsent_lowat_check(struct socket *so) {
        return (0);
 }
 
+static void
+mptcp_get_rtt_measurement(struct mptsub *mpts, struct mptses *mpte)
+{
+       MPTE_LOCK_ASSERT_HELD(mpte);
+       MPTS_LOCK_ASSERT_HELD(mpts);
+
+       struct socket *subflow_so = mpts->mpts_socket;
+       socket_lock(subflow_so, 0);
+       mpts->mpts_srtt = (intotcpcb(sotoinpcb(subflow_so)))->t_srtt;
+       mpts->mpts_rxtcur = (intotcpcb(sotoinpcb(subflow_so)))->t_rxtcur;
+       socket_unlock(subflow_so, 0);
+}
+
+/* Using Symptoms Advisory to detect poor WiFi or poor Cell */
+static kern_ctl_ref mptcp_kern_ctrl_ref = NULL;
+static uint32_t mptcp_kern_skt_inuse = 0;
+symptoms_advisory_t mptcp_advisory;
+
+static errno_t
+mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
+       void **unitinfo)
+{
+#pragma unused(kctlref, sac, unitinfo)
+       /*
+        * We don't need to do anything here. But we can atleast ensure
+        * only one user opens the MPTCP_KERN_CTL_NAME control socket.
+        */
+       if (OSCompareAndSwap(0, 1, &mptcp_kern_skt_inuse))
+               return (0);
+       else
+               return (EALREADY);
+}
+
+static errno_t
+mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit,
+       void *unitinfo)
+{
+#pragma unused(kctlref, kcunit, unitinfo)
+       if (OSCompareAndSwap(1, 0, &mptcp_kern_skt_inuse)) {
+               /* TBD needs to be locked if the size grows more than an int */
+               bzero(&mptcp_advisory, sizeof(mptcp_advisory));
+               return (0);
+       }
+       else {
+               return (EINVAL);
+       }
+}
+
+static errno_t
+mptcp_symptoms_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
+       mbuf_t m, int flags)
+{
+#pragma unused(kctlref, kcunit, unitinfo, flags)
+       symptoms_advisory_t     *sa = NULL;
+
+       if (mbuf_pkthdr_len(m) < sizeof(*sa)) {
+               mbuf_freem(m);
+               return (EINVAL);
+       }
+
+       if (mbuf_len(m) >= sizeof(*sa))
+               sa = mbuf_data(m);
+       else
+               return (EINVAL);
+
+       if (mptcp_advisory.sa_nwk_status_int != sa->sa_nwk_status_int) {
+               /*
+                * we could use this notification to notify all mptcp pcbs
+                * of the change in network status. But its difficult to
+                * define if sending REMOVE_ADDR or MP_PRIO is appropriate
+                * given that these are only soft indicators of the network
+                * state. Leaving this as TBD for now.
+                */
+       }
+
+       if (sa->sa_nwk_status != SYMPTOMS_ADVISORY_NOCOMMENT) {
+               mptcplog((LOG_DEBUG, "MPTCP Events: %s wifi %d,%d cell %d,%d\n",
+                   __func__, sa->sa_wifi_status, mptcp_advisory.sa_wifi_status,
+                   sa->sa_cell_status, mptcp_advisory.sa_cell_status),
+                   MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG,
+                   MPTCP_LOGLVL_LOG);
+
+               if ((sa->sa_wifi_status &
+                   (SYMPTOMS_ADVISORY_WIFI_BAD | SYMPTOMS_ADVISORY_WIFI_OK)) !=
+                   (SYMPTOMS_ADVISORY_WIFI_BAD | SYMPTOMS_ADVISORY_WIFI_OK)) {
+                       mptcp_advisory.sa_wifi_status = sa->sa_wifi_status;
+               }
+
+               if ((sa->sa_cell_status &
+                   (SYMPTOMS_ADVISORY_CELL_BAD | SYMPTOMS_ADVISORY_CELL_OK)) !=
+                   (SYMPTOMS_ADVISORY_CELL_BAD | SYMPTOMS_ADVISORY_CELL_OK)) {
+                       mptcp_advisory.sa_cell_status = sa->sa_cell_status;
+               }
+       } else {
+               mptcplog((LOG_DEBUG, "MPTCP Events: %s NOCOMMENT "
+                   "wifi %d cell %d\n", __func__,
+                   mptcp_advisory.sa_wifi_status,
+                   mptcp_advisory.sa_cell_status),
+                   MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
+       }
+       return (0);
+}
+
+void
+mptcp_control_register(void)
+{
+       /* Set up the advisory control socket */
+       struct kern_ctl_reg mptcp_kern_ctl;
+
+       bzero(&mptcp_kern_ctl, sizeof(mptcp_kern_ctl));
+       strlcpy(mptcp_kern_ctl.ctl_name, MPTCP_KERN_CTL_NAME,
+           sizeof(mptcp_kern_ctl.ctl_name));
+       mptcp_kern_ctl.ctl_connect = mptcp_symptoms_ctl_connect;
+       mptcp_kern_ctl.ctl_disconnect = mptcp_symptoms_ctl_disconnect;
+       mptcp_kern_ctl.ctl_send = mptcp_symptoms_ctl_send;
+       mptcp_kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED;
+
+       (void)ctl_register(&mptcp_kern_ctl, &mptcp_kern_ctrl_ref);
+}
+
+int
+mptcp_is_wifi_unusable(void)
+{
+       /* a false return val indicates there is no info or wifi is ok */
+       return (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_BAD);
+}
+
+int
+mptcp_is_cell_unusable(void)
+{
+       /* a false return val indicates there is no info or cell is ok */
+       return (mptcp_advisory.sa_cell_status & SYMPTOMS_ADVISORY_CELL_BAD);
+}
+
+struct mptsub*
+mptcp_use_symptoms_hints(struct mptsub* best, struct mptsub *second_best)
+{
+       struct mptsub *cellsub = NULL;
+       struct mptsub *wifisub = NULL;
+       struct mptsub *wiredsub = NULL;
+
+       VERIFY ((best != NULL) && (second_best != NULL));
+
+       if (!mptcp_use_symptomsd)
+               return (NULL);
+
+       if (!mptcp_kern_skt_inuse)
+               return (NULL);
+
+       /*
+        * There could be devices with more than one wifi interface or 
+        * more than one wired or cell interfaces. 
+        * TBD: SymptomsD is unavailable on such platforms as of now.
+        * Try to prefer best when possible in general.
+        * Also, SymptomsD sends notifications about wifi only when it
+        * is primary.
+        */
+       if (best->mpts_linktype & MPTSL_WIFI)
+               wifisub = best;
+       else if (best->mpts_linktype & MPTSL_CELL)
+               cellsub = best;
+       else if (best->mpts_linktype & MPTSL_WIRED)
+               wiredsub = best;
+
+       /*
+        * On platforms with wired paths, don't use hints about wifi or cell.
+        * Currently, SymptomsD is not available on platforms with wired paths.
+        */
+       if (wiredsub)
+               return (NULL);
+
+       if ((wifisub == NULL) && (second_best->mpts_linktype & MPTSL_WIFI))
+               wifisub = second_best;
+
+       if ((cellsub == NULL) && (second_best->mpts_linktype & MPTSL_CELL))
+               cellsub = second_best;
+
+       if ((wiredsub == NULL) && (second_best->mpts_linktype & MPTSL_WIRED))
+               wiredsub = second_best;
+
+       if ((wifisub == best) && mptcp_is_wifi_unusable()) {
+               tcpstat.tcps_mp_sel_symtomsd++;
+               if (mptcp_is_cell_unusable()) {
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint"
+                           " suggests both Wifi and Cell are bad. Wired %s.",
+                           (wiredsub == NULL) ? "none" : "present"),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+                       return (wiredsub);
+               } else {
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint"
+                           " suggests Wifi bad, Cell good. Wired %s.",
+                           (wiredsub == NULL) ? "none" : "present"),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+                       return ((wiredsub != NULL) ? wiredsub : cellsub);
+               }
+       }
+
+       if ((cellsub == best) && (mptcp_is_cell_unusable())) {
+               tcpstat.tcps_mp_sel_symtomsd++;
+               if (mptcp_is_wifi_unusable()) {
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint"
+                           " suggests both Cell and Wifi are bad. Wired %s.",
+                           (wiredsub == NULL) ? "none" : "present"),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+                       return (wiredsub);
+               } else {
+                       mptcplog((LOG_DEBUG, "MPTCP Sender: SymptomsD hint"
+                           " suggests Cell bad, Wifi good. Wired %s.",
+                           (wiredsub == NULL) ? "none" : "present"),
+                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
+                       return ((wiredsub != NULL) ? wiredsub : wifisub);
+               }
+       }
+
+       /* little is known about the state of the network or wifi is good */
+       return (NULL); 
+}
index 322aba51497c27245e5c527ddac1a473ff919ed7..5469b8323cec331633682ab940b258fb620b9f58 100644 (file)
@@ -74,8 +74,6 @@ mptcp_timer_demux(struct mptses *mpte, uint32_t now_msecs)
        int resched_timer = 0;
 
        DTRACE_MPTCP2(timer, struct mptses *, mpte, struct mptcb *, mp_tp);
-       mptcplog2((LOG_DEBUG, "%s: running %d\n", __func__,
-           mp_tp->mpt_timer_vals));
 
        MPTE_LOCK_ASSERT_HELD(mpte);
        MPT_LOCK(mp_tp);
@@ -98,8 +96,10 @@ mptcp_timer_demux(struct mptses *mpte, uint32_t now_msecs)
                        } else {
                                mp_tp->mpt_sndnxt = mp_tp->mpt_rtseq;
                                MPT_UNLOCK(mp_tp);
-                               mptcplog((LOG_DEBUG, "%s: REXMT %d times.\n",
-                                   __func__, mp_tp->mpt_rxtshift));
+                               mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                                  "%s: REXMT %d times.\n",
+                                   __func__, mp_tp->mpt_rxtshift),
+                                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                                mptcp_output(mpte);
                                MPT_LOCK(mp_tp);
                        }
@@ -167,27 +167,32 @@ mptcp_timer(struct mppcbinfo *mppi)
 }
 
 void
-mptcp_start_timer(struct mptcb *mp_tp, int timer_type)
+mptcp_start_timer(struct mptses *mpte, int timer_type)
 {
        struct timeval now;
+       struct mptcb *mp_tp = mpte->mpte_mptcb;
 
        microuptime(&now);
 
-       MPT_LOCK_ASSERT_HELD(mp_tp);
-
        DTRACE_MPTCP2(start__timer, struct mptcb *, mp_tp, int, timer_type);
-       mptcplog((LOG_DEBUG, "%s %d\n", __func__, timer_type));
+       mptcplog((LOG_DEBUG, "MPTCP Socket: %s: %d\n", __func__, timer_type),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
 
        switch (timer_type) {
        case MPTT_REXMT:
+               MPT_LOCK(mp_tp);
                mp_tp->mpt_timer_vals |= MPTT_REXMT;
                mp_tp->mpt_rxtstart = TIMEVAL_TO_HZ(now);
                mp_tp->mpt_rxtshift = 0;
                mp_tp->mpt_rtseq = mp_tp->mpt_sndnxt;
+               MPT_UNLOCK(mp_tp);
                break;
        case MPTT_TW:
+               /* XXX: Not implemented yet */
+               MPT_LOCK(mp_tp);
                mp_tp->mpt_timer_vals |= MPTT_TW;
                mp_tp->mpt_timewait = TIMEVAL_TO_HZ(now);
+               MPT_UNLOCK(mp_tp);
                break;
        case MPTT_FASTCLOSE:
                /* NO-OP */
@@ -203,9 +208,7 @@ void
 mptcp_cancel_timer(struct mptcb *mp_tp, int timer_type)
 {
        MPT_LOCK_ASSERT_HELD(mp_tp);
-
        DTRACE_MPTCP2(cancel__timer, struct mptcb *, mp_tp, int, timer_type);
-       mptcplog3((LOG_DEBUG, "%s %d\n", __func__, timer_type));
 
        switch (timer_type) {
        case MPTT_REXMT:
index b5dac77a662c0bac9557341bf019974137ad3d7e..94da71dc562558f02a3372856429c28f52b45c56 100644 (file)
@@ -36,7 +36,7 @@
 
 __BEGIN_DECLS
 extern uint32_t mptcp_timer(struct mppcbinfo *);
-extern void mptcp_start_timer(struct mptcb *, int);
+extern void mptcp_start_timer(struct mptses *, int);
 extern void mptcp_cancel_timer(struct mptcb *, int);
 extern void mptcp_cancel_all_timers(struct mptcb *);
 __END_DECLS
index 268c7284f9684d3f39920495d1059eacf2d6c194..d61ad1fc3c6e3d964c6d128991410648545df9ae 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -56,25 +56,25 @@ static int mptcp_usr_detach(struct socket *);
 static int mptcp_attach(struct socket *, struct proc *);
 static int mptcp_detach(struct socket *, struct mppcb *);
 static int mptcp_connectx(struct mptses *, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
-    uint32_t, void *, uint32_t);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, uint32_t);
 static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
-    uint32_t, void *, uint32_t);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
-static int mptcp_getconnids(struct mptses *, associd_t, uint32_t *,
+static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
     user_addr_t);
-static int mptcp_getconninfo(struct mptses *, connid_t *, uint32_t *,
+static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
     uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
     uint32_t *, user_addr_t, uint32_t *);
 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
     struct proc *);
-static int mptcp_disconnectx(struct mptses *, associd_t, connid_t);
+static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t);
 static int mptcp_usr_disconnect(struct socket *);
-static int mptcp_usr_disconnectx(struct socket *, associd_t, connid_t);
+static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
 static struct mptses *mptcp_usrclosed(struct mptses *);
-static int mptcp_usr_peeloff(struct socket *, associd_t, struct socket **);
-static int mptcp_peeloff(struct mptses *, associd_t, struct socket **);
+static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **);
+static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **);
 static int mptcp_usr_rcvd(struct socket *, int);
 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
     struct sockaddr *, struct mbuf *, struct proc *);
@@ -154,9 +154,9 @@ static int
 mptcp_attach(struct socket *mp_so, struct proc *p)
 {
 #pragma unused(p)
-       struct mptses *mpte;
-       struct mptcb *mp_tp;
-       struct mppcb *mpp;
+       struct mptses *mpte = NULL;
+       struct mptcb *mp_tp = NULL;
+       struct mppcb *mpp = NULL;
        int error = 0;
 
        if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
@@ -177,25 +177,16 @@ mptcp_attach(struct socket *mp_so, struct proc *p)
        mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
        mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE;
 
-       if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0)
+       if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
                goto out;
+       }
 
        mpp = sotomppcb(mp_so);
        VERIFY(mpp != NULL);
-
-       mpte = mptcp_sescreate(mp_so, mpp);
-       if (mpte == NULL) {
-               mp_pcbdetach(mpp);
-               error = ENOBUFS;
-               goto out;
-       }
+       mpte = (struct mptses *)mpp->mpp_pcbe;
+       VERIFY(mpte != NULL);
        mp_tp = mpte->mpte_mptcb;
        VERIFY(mp_tp != NULL);
-
-       MPT_LOCK(mp_tp);
-       mp_tp->mpt_state = MPTCPS_CLOSED;
-       MPT_UNLOCK(mp_tp);
-
 out:
        return (error);
 }
@@ -217,7 +208,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
        mppi = mpp->mpp_pcbinfo;
        VERIFY(mppi != NULL);
 
-       mpte = &((struct mpp_mtp *)mpp)->mpp_ses;
+       __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
        VERIFY(mpte->mpte_mppcb == mpp);
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
@@ -230,7 +221,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
         */
        mp_pcbdetach(mpp);
 
-       (void) mptcp_disconnectx(mpte, ASSOCID_ALL, CONNID_ALL);
+       (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
 
        /*
         * XXX: adi@apple.com
@@ -250,7 +241,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
 static int
 mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
     uint32_t arglen)
 {
 #pragma unused(p, aid, flags, arg, arglen)
@@ -264,10 +255,12 @@ mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl,
        VERIFY(dst_sl != NULL && *dst_sl != NULL);
        VERIFY(pcid != NULL);
 
-       mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__,
-           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
+       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+           "%s: mp_so 0x%llx\n", __func__,
+           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
 
-       DTRACE_MPTCP3(connectx, struct mptses *, mpte, associd_t, aid,
+       DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid,
            struct socket *, mp_so);
 
        mpts = mptcp_subflow_alloc(M_WAITOK);
@@ -312,12 +305,14 @@ out:
 static int
 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
-#pragma unused(arg, arglen)
+#pragma unused(arg, arglen, uio, bytes_written)
        struct mppcb *mpp = sotomppcb(mp_so);
-       struct mptses *mpte;
+       struct mptses *mpte = NULL;
+       struct mptcb *mp_tp = NULL;
+
        int error = 0;
 
        if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
@@ -327,6 +322,14 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl,
        mpte = mptompte(mpp);
        VERIFY(mpte != NULL);
 
+       mp_tp = mpte->mpte_mptcb;
+       VERIFY(mp_tp != NULL);
+
+       if (mp_tp->mpt_flags &  MPTCPF_FALLBACK_TO_TCP) {
+               error = EINVAL;
+               goto out;
+       }
+
        error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope,
            aid, pcid, flags, arg, arglen);
 out:
@@ -342,7 +345,7 @@ mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
 
        /* MPTCP has at most 1 association */
-       *cnt = (mpte->mpte_associd != ASSOCID_ANY) ? 1 : 0;
+       *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
 
        /* just asking how many there are? */
        if (aidp == USER_ADDR_NULL)
@@ -356,7 +359,7 @@ mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
  * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
  */
 static int
-mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt,
+mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
     user_addr_t cidp)
 {
        struct mptsub *mpts;
@@ -364,7 +367,7 @@ mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt,
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
 
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL &&
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
            aid != mpte->mpte_associd)
                return (EINVAL);
 
@@ -389,7 +392,7 @@ mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt,
  * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
  */
 static int
-mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags,
+mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
     uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
     user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
     user_addr_t aux_data, uint32_t *aux_len)
@@ -402,15 +405,15 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags,
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
 
-       if (*cid == CONNID_ALL)
+       if (*cid == SAE_CONNID_ALL)
                return (EINVAL);
 
        TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
-               if (mpts->mpts_connid == *cid || *cid == CONNID_ANY)
+               if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY)
                        break;
        }
        if (mpts == NULL)
-               return ((*cid == CONNID_ANY) ? ENXIO : EINVAL);
+               return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL);
 
        MPTS_LOCK(mpts);
        ifp = mpts->mpts_outif;
@@ -484,8 +487,11 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags,
                                goto out;
                }
        }
-       mptcplog2((LOG_INFO, "%s: cid %d flags %x \n",
-           __func__, mpts->mpts_connid, mpts->mpts_flags));
+       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+           "%s: cid %d flags %x \n",
+           __func__, mpts->mpts_connid, mpts->mpts_flags),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+
 out:
        MPTS_UNLOCK(mpts);
        return (error);
@@ -495,15 +501,17 @@ out:
  * Handle SIOCSCONNORDER
  */
 int
-mptcp_setconnorder(struct mptses *mpte, connid_t cid, uint32_t rank)
+mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank)
 {
        struct mptsub *mpts, *mpts1;
        int error = 0;
 
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
-       mptcplog((LOG_DEBUG, "%s: cid %d rank %d \n", __func__, cid, rank));
+       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+           "%s: cid %d rank %d \n", __func__, cid, rank),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
 
-       if (cid == CONNID_ANY || cid == CONNID_ALL) {
+       if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
                error = EINVAL;
                goto out;
        }
@@ -588,8 +596,7 @@ mptcp_connorder_helper(struct mptsub *mpts)
                tp->t_mpflags &= ~TMPF_BACKUP_PATH;
        else
                tp->t_mpflags |= TMPF_BACKUP_PATH;
-       mptcplog((LOG_DEBUG, "%s cid %d flags %x", __func__,
-           mpts->mpts_connid, mpts->mpts_flags));      
+
        socket_unlock(so, 0);
 
 }
@@ -598,7 +605,7 @@ mptcp_connorder_helper(struct mptsub *mpts)
  * Handle SIOCSGONNORDER
  */
 int
-mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank)
+mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank)
 {
        struct mptsub *mpts;
        int error = 0;
@@ -607,7 +614,7 @@ mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank)
        VERIFY(rank != NULL);
        *rank = 0;
 
-       if (cid == CONNID_ANY || cid == CONNID_ALL) {
+       if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
                error = EINVAL;
                goto out;
        }
@@ -748,7 +755,7 @@ out:
  * connection while keeping the MPTCP-level connection (association).
  */
 static int
-mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid)
+mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid)
 {
        struct mptsub *mpts;
        struct socket *mp_so;
@@ -760,16 +767,19 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid)
        mp_so = mpte->mpte_mppcb->mpp_socket;
        mp_tp = mpte->mpte_mptcb;
 
-       mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__,
-           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error));
-       DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, associd_t, aid,
-           connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp);
+       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+           "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__,
+           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
 
-       VERIFY(aid == ASSOCID_ANY || aid == ASSOCID_ALL ||
+       DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid,
+           sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp);
+
+       VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL ||
            aid == mpte->mpte_associd);
 
        /* terminate the association? */
-       if (cid == CONNID_ANY || cid == CONNID_ALL) {
+       if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
                /* if we're not detached, go thru socket state checks */
                if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
                        if (!(mp_so->so_state & (SS_ISCONNECTED|
@@ -799,10 +809,26 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid)
                                (void) mptcp_output(mpte);
                }
        } else {
+               bool disconnect_embryonic_subflows = false;
+               struct socket *so = NULL;
+               
                TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
                        if (mpts->mpts_connid != cid)
                                continue;
+
                        MPTS_LOCK(mpts);
+                       /*
+                        * Check if disconnected subflow is the one used
+                        * to initiate MPTCP connection.
+                        * If it is and the connection is not yet join ready
+                        * disconnect all other subflows.
+                        */
+                       so = mpts->mpts_socket;
+                       if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && 
+                           so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
+                               disconnect_embryonic_subflows = true;
+                       }
+
                        mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
                        mptcp_subflow_disconnect(mpte, mpts, FALSE);
                        MPTS_UNLOCK(mpts);
@@ -813,6 +839,16 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid)
                        error = EINVAL;
                        goto out;
                }
+               
+               if (disconnect_embryonic_subflows) {
+                       TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
+                               if (mpts->mpts_connid == cid)
+                                       continue;
+                               MPTS_LOCK(mpts);
+                               mptcp_subflow_disconnect(mpte, mpts, TRUE);
+                               MPTS_UNLOCK(mpts);
+                       }
+               }
        }
 
        if (error == 0)
@@ -836,7 +872,7 @@ mptcp_usr_disconnect(struct socket *mp_so)
 {
        int error = 0;
 
-       error = mptcp_usr_disconnectx(mp_so, ASSOCID_ALL, CONNID_ALL);
+       error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
        return (error);
 }
 
@@ -844,7 +880,7 @@ mptcp_usr_disconnect(struct socket *mp_so)
  * User-protocol pru_disconnectx callback.
  */
 static int
-mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid)
+mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
 {
        struct mppcb *mpp = sotomppcb(mp_so);
        struct mptses *mpte;
@@ -858,7 +894,7 @@ mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid)
        VERIFY(mpte != NULL);
        MPTE_LOCK_ASSERT_HELD(mpte);    /* same as MP socket lock */
 
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL &&
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
            aid != mpte->mpte_associd) {
                error = EINVAL;
                goto out;
@@ -915,7 +951,7 @@ mptcp_usrclosed(struct mptses *mpte)
  * User-protocol pru_peeloff callback.
  */
 static int
-mptcp_usr_peeloff(struct socket *mp_so, associd_t aid, struct socket **psop)
+mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop)
 {
        struct mppcb *mpp = sotomppcb(mp_so);
        struct mptses *mpte;
@@ -942,7 +978,7 @@ out:
  * yet associated (MPTCP-level connection has not been established.)
  */
 static int
-mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop)
+mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop)
 {
        struct socket *so = NULL, *mp_so;
        struct mptsub *mpts;
@@ -954,16 +990,16 @@ mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop)
        VERIFY(psop != NULL);
        *psop = NULL;
 
-       DTRACE_MPTCP3(peeloff, struct mptses *, mpte, associd_t, aid,
+       DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid,
            struct socket *, mp_so);
 
        /* peeloff cannot happen after an association is established */
-       if (mpte->mpte_associd != ASSOCID_ANY) {
+       if (mpte->mpte_associd != SAE_ASSOCID_ANY) {
                error = EINVAL;
                goto out;
        }
 
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) {
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
                error = EINVAL;
                goto out;
        }
@@ -1006,8 +1042,11 @@ mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop)
        }
        *psop = so;
 
-       mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__,
-           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
+       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+           "%s: mp_so 0x%llx\n", __func__,
+           (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
+           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
+
 out:
        return (error);
 }
@@ -1653,13 +1692,15 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
                if (mpo == NULL) {
                        error = ENOBUFS;
                } else {
-                       mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s "
+                       mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                           "%s: mp_so 0x%llx sopt %s "
                            "val %d %s\n", __func__,
                            (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                            mptcp_sopt2str(level, optname, buf,
                            sizeof (buf)), optval,
                            (mpo->mpo_flags & MPOF_ATTACHED) ?
-                           "updated" : "recorded"));
+                           "updated" : "recorded"),
+                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
 
                        /* initialize or update, as needed */
                        mpo->mpo_intval = optval;
@@ -1695,16 +1736,20 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
        }
 out:
        if (error == 0 && mpo != NULL) {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d set %s\n",
+               mptcplog((LOG_ERR, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s val %d set %s\n",
                    __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mptcp_sopt2str(level, optname, buf,
                    sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ?
-                   "pending" : "successful"));
+                   "pending" : "successful"),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
        } else if (error != 0) {
-               mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s can't be issued "
+               mptcplog((LOG_ERR, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s can't be issued "
                    "error %d\n", __func__,
                    (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level,
-                   optname, buf, sizeof (buf)), error));
+                   optname, buf, sizeof (buf)), error),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
        }
        return (error);
 }
@@ -1841,10 +1886,12 @@ mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
        /* we only handle socket and TCP-level socket options for MPTCP */
        if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
                char buf[32];
-               mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s level not "
+               mptcplog((LOG_DEBUG, "MPTCP Socket: "
+                   "%s: mp_so 0x%llx sopt %s level not "
                    "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
                    mptcp_sopt2str(sopt->sopt_level,
-                   sopt->sopt_name, buf, sizeof (buf))));
+                   sopt->sopt_name, buf, sizeof (buf))),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
                error = EINVAL;
                goto out;
        }
index 9faaba48ff06ecb236562cd3a11191f463c0db04..905ab934a815cc68dce73b76dd59823c4f6bc857 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -55,8 +55,8 @@ struct mptses {
        TAILQ_HEAD(, mptsub) mpte_subflows;     /* list of subflows */
        uint16_t        mpte_numflows;          /* # of subflows in list */
        uint16_t        mpte_nummpcapflows;     /* # of MP_CAP subflows */
-       associd_t       mpte_associd;           /* MPTCP association ID */
-       connid_t        mpte_connid_last;       /* last used connection ID */
+       sae_associd_t   mpte_associd;           /* MPTCP association ID */
+       sae_connid_t    mpte_connid_last;       /* last used connection ID */
        /*
         * Threading (protected by mpte_thread_lock)
         */
@@ -139,7 +139,7 @@ struct mptsub {
        uint32_t                mpts_flags;     /* see flags below */
        uint32_t                mpts_evctl;     /* subflow control events */
        uint32_t                mpts_family;    /* address family */
-       connid_t                mpts_connid;    /* subflow connection ID */
+       sae_connid_t            mpts_connid;    /* subflow connection ID */
        int                     mpts_oldintval; /* sopt_val before sosetopt  */
        uint32_t                mpts_rank;      /* subflow priority/rank */
        int32_t                 mpts_soerror;   /* most recent subflow error */
@@ -150,13 +150,18 @@ struct mptsub {
        struct ifnet            *mpts_outif;    /* outbound interface */
        u_int64_t               mpts_sndnxt;    /* next byte to send in mp so */
        u_int32_t               mpts_rel_seq;   /* running count of subflow # */
-       struct {
-               u_int64_t       mptsl_dsn;      /* Data Sequence Number */
-               u_int32_t       mptsl_sseq;     /* Corresponding Data Seq */
-               u_int32_t       mptsl_len;      /* length of mapping */
-       } mpts_lastmap;
        struct protosw          *mpts_oprotosw; /* original protosw */
        struct mptsub_connreq   mpts_mpcr;      /* connection request */
+       int32_t                 mpts_srtt;      /* tcp's rtt estimate */
+       int32_t                 mpts_rxtcur;    /* tcp's rto estimate */
+       uint32_t                mpts_probesoon; /* send probe after probeto */
+       uint32_t                mpts_probecnt;  /* number of probes sent */
+       uint32_t                mpts_maxseg;    /* cached value of t_maxseg */
+       uint32_t                mpts_peerswitch;/* no of uses of backup so */
+#define MPTSL_WIRED            0x01
+#define MPTSL_WIFI             0x02
+#define MPTSL_CELL             0x04
+       uint32_t                mpts_linktype;  /* wired, wifi, cell */
 };
 
 /*
@@ -327,10 +332,12 @@ struct mptcb {
        /*
         * Zombie handling
         */
-#define        MPT_GC_TICKS    (60)
+#define        MPT_GC_TICKS            (30)
+#define MPT_GC_TICKS_FAST      (10)
        int32_t         mpt_gc_ticks;           /* Used for zombie deletion */
 
        u_int32_t       mpt_notsent_lowat;      /* TCP_NOTSENT_LOWAT support */
+       u_int32_t       mpt_peer_version;       /* Version from peer */
 };
 
 /* valid values for mpt_flags (see also notes on mpts_flags above) */
@@ -349,9 +356,10 @@ struct mptcb {
        "\6SND_64BITDSN\7SND_64BITACK\10RCVD_64BITACK\11POST_FALLBACK_SYNC"
 
 /* valid values for mpt_timer_vals */
-#define        MPTT_REXMT      0x01    /* Starting Retransmit Timer */
-#define        MPTT_TW         0x02    /* Starting Timewait Timer */
-#define        MPTT_FASTCLOSE  0x04    /* Starting Fastclose wait timer */
+#define        MPTT_REXMT              0x01    /* Starting Retransmit Timer */
+#define        MPTT_TW                 0x02    /* Starting Timewait Timer */
+#define        MPTT_FASTCLOSE          0x04    /* Starting Fastclose wait timer */
+//#define MPTT_PROBE_TIMER     0x08    /* Timer for probing preferred path */
 
 #define        MPT_LOCK_ASSERT_HELD(_mpt)                                      \
        lck_mtx_assert(&(_mpt)->mpt_lock, LCK_MTX_ASSERT_OWNED)
@@ -430,12 +438,20 @@ struct mptcp_keys_pool_head {
 #define        MPTCP_RWIN_MAX  (1<<16)
 
 /* MPTCP Debugging Levels */
-#define        MP_NODEBUG              0x0
-#define        MP_ERR_DEBUG            0x1
-#define        MP_VERBOSE_DEBUG_1      0x2
-#define        MP_VERBOSE_DEBUG_2      0x3
-#define        MP_VERBOSE_DEBUG_3      0x4
-#define        MP_VERBOSE_DEBUG_4      0x5     /* output path debugging */
+#define        MPTCP_LOGLVL_NONE       0x0     /* No debug logging */
+#define        MPTCP_LOGLVL_ERR        0x1     /* Errors in execution are logged */
+#define        MPTCP_LOGLVL_LOG        0x2     /* Important logs */
+#define        MPTCP_LOGLVL_VERBOSE    0x3     /* Verbose logs */
+
+/* MPTCP sub-components for debug logging */
+#define MPTCP_NO_DBG           0x00    /* No areas are logged */
+#define MPTCP_STATE_DBG                0x01    /* State machine logging */
+#define MPTCP_SOCKET_DBG       0x02    /* Socket call logging */
+#define MPTCP_SENDER_DBG       0x04    /* Sender side logging */
+#define MPTCP_RECEIVER_DBG     0x08    /* Receiver logging */
+#define MPTCP_EVENTS_DBG       0x10    /* Subflow events logging */
+#define MPTCP_ALL_DBG          (MPTCP_STATE_DBG | MPTCP_SOCKET_DBG | \
+    MPTCP_SENDER_DBG | MPTCP_RECEIVER_DBG | MPTCP_EVENTS_DBG)
 
 /* Mask to obtain 32-bit portion of data sequence number */
 #define        MPTCP_DATASEQ_LOW32_MASK        (0xffffffff)
@@ -490,12 +506,13 @@ struct mptcp_keys_pool_head {
        }                                                               \
 }
 
-#define        mptcplog(x)     do { if (mptcp_verbose >= 1) log x; } while (0)
-#define        mptcplog2(x)    do { if (mptcp_verbose >= 2) log x; } while (0)
-#define        mptcplog3(x)    do { if (mptcp_verbose >= 3) log x; } while (0)
+#define        mptcplog(x, y, z)       do {                                    \
+       if ((mptcp_dbg_area & y) &&                                     \
+           (mptcp_dbg_level >= z))                                     \
+               log x;                                                  \
+} while (0)
 
 extern int mptcp_enable;       /* Multipath TCP */
-extern int mptcp_dbg;          /* Multipath TCP DBG */
 extern int mptcp_mpcap_retries;        /* Multipath TCP retries */
 extern int mptcp_join_retries; /* Multipath TCP Join retries */
 extern int mptcp_dss_csum;     /* Multipath DSS Option checksum */
@@ -506,7 +523,9 @@ extern int mptcp_remaddr_enable;/* REMOVE_ADDR option enable/disable */
 extern int mptcp_fastjoin;     /* Enable FastJoin */
 extern int mptcp_zerortt_fastjoin; /* Enable Data after SYN Fast Join */
 extern int mptcp_rwnotify;     /* Enable RW notification on resume */
-extern uint32_t mptcp_verbose; /* verbose and mptcp_dbg must be unified */
+extern uint32_t mptcp_dbg_level;       /* Multipath TCP debugging level */
+extern uint32_t mptcp_dbg_area;        /* Multipath TCP debugging area */
+
 #define MPPCB_LIMIT    16
 extern uint32_t mptcp_socket_limit; /* max number of mptcp sockets allowed */
 extern uint32_t mptcp_delayed_subf_start; /* delayed cellular subflow start */ 
@@ -515,7 +534,7 @@ extern int tcp_jack_rxmt;   /* Join ACK retransmission value in msecs */
 __BEGIN_DECLS
 extern void mptcp_init(struct protosw *, struct domain *);
 extern int mptcp_ctloutput(struct socket *, struct sockopt *);
-extern struct mptses *mptcp_sescreate(struct socket *, struct mppcb *);
+extern void *mptcp_sescreate(struct socket *, struct mppcb *);
 extern void mptcp_drain(void);
 extern struct mptses *mptcp_drop(struct mptses *, struct mptcb *, int);
 extern struct mptses *mptcp_close(struct mptses *, struct mptcb *);
@@ -524,8 +543,8 @@ extern int mptcp_unlock(struct socket *, int, void *);
 extern lck_mtx_t *mptcp_getlock(struct socket *, int);
 extern void mptcp_thread_signal(struct mptses *);
 extern void mptcp_flush_sopts(struct mptses *);
-extern int mptcp_setconnorder(struct mptses *, connid_t, uint32_t);
-extern int mptcp_getconnorder(struct mptses *, connid_t, uint32_t *);
+extern int mptcp_setconnorder(struct mptses *, sae_connid_t, uint32_t);
+extern int mptcp_getconnorder(struct mptses *, sae_connid_t, uint32_t *);
 
 extern struct mptopt *mptcp_sopt_alloc(int);
 extern const char *mptcp_sopt2str(int, int, char *, int);
@@ -580,9 +599,12 @@ extern void  mptcp_output_getm_dsnmap64(struct socket *, int, uint32_t,
     u_int64_t *, u_int32_t *, u_int16_t *);
 extern void mptcp_send_dfin(struct socket *);
 extern void mptcp_act_on_txfail(struct socket *);
-extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *);
+extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *,
+    struct mptsub **);
 extern struct mptsub *mptcp_get_pending_subflow(struct mptses *,
     struct mptsub *);
+extern struct mptsub* mptcp_use_symptoms_hints(struct mptsub*,
+    struct mptsub *);
 extern int mptcp_get_map_for_dsn(struct socket *, u_int64_t, u_int32_t *);
 extern int32_t mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len);
 extern int32_t mptcp_sbspace(struct mptcb *);
@@ -593,25 +615,81 @@ extern boolean_t mptcp_no_rto_spike(struct socket*);
 extern int mptcp_set_notsent_lowat(struct mptses *mpte, int optval);
 extern u_int32_t mptcp_get_notsent_lowat(struct mptses *mpte);
 extern int mptcp_notsent_lowat_check(struct socket *so);
-
+extern void mptcp_control_register(void);
+extern int mptcp_is_wifi_unusable(void);
+extern int mptcp_is_cell_unusable(void);
 __END_DECLS
 
 #endif /* BSD_KERNEL_PRIVATE */
 #ifdef PRIVATE
+
 typedef struct mptcp_flow {
+       size_t                  flow_len;
+       size_t                  flow_tcpci_offset;
        uint32_t                flow_flags;
-       connid_t                flow_cid;
+       sae_connid_t            flow_cid;
        struct sockaddr_storage flow_src;
        struct sockaddr_storage flow_dst;
-       conninfo_tcp_t          flow_ci;
+       uint64_t                flow_sndnxt;    /* subflow's sndnxt snapshot */
+       uint32_t                flow_relseq;    /* last subflow rel seq# */
+       int32_t                 flow_soerror;   /* subflow level error */
+       uint32_t                flow_probecnt;  /* number of probes sent */
+       uint32_t                flow_peerswitch;/* did peer switch */
+       conninfo_tcp_t          flow_ci;        /* must be the last field */
 } mptcp_flow_t;
 
 typedef struct conninfo_mptcp {
        size_t          mptcpci_len;
-       size_t          mptcpci_nflows;
-       uint32_t        mptcpci_state;
+       size_t          mptcpci_flow_offset;    /* offsetof first flow */
+       size_t          mptcpci_nflows;         /* number of subflows */
+       uint32_t        mptcpci_state;          /* MPTCP level state */
+       uint32_t        mptcpci_mpte_flags;     /* Session flags */
+       uint32_t        mptcpci_flags;          /* MPTCB flags */
+       uint32_t        mptcpci_ltoken;         /* local token */
+       uint32_t        mptcpci_rtoken;         /* remote token */
+       uint32_t        mptcpci_notsent_lowat;  /* NOTSENT_LOWAT */
+
+       /* Send side */
+       uint64_t        mptcpci_snduna;         /* DSN of last unacked byte */
+       uint64_t        mptcpci_sndnxt;         /* DSN of next byte to send */
+       uint64_t        mptcpci_sndmax;         /* DSN of max byte sent */
+       uint64_t        mptcpci_lidsn;          /* Local IDSN */
+       uint32_t        mptcpci_sndwnd;         /* Send window snapshot */
+
+       /* Receive side */
+       uint64_t        mptcpci_rcvnxt;         /* Next expected DSN */
+       uint64_t        mptcpci_rcvatmark;      /* Session level rcvnxt */
+       uint64_t        mptcpci_ridsn;          /* Peer's IDSN */
+       uint32_t        mptcpci_rcvwnd;         /* Receive window */
+
+       uint8_t         mptcpci_mpte_addrid;    /* last addr id */
+
        mptcp_flow_t    mptcpci_flows[1];
 } conninfo_mptcp_t;
 
+/* Use SymptomsD notifications of wifi and cell status in subflow selection */
+#define MPTCP_KERN_CTL_NAME    "com.apple.network.advisory"
+typedef struct symptoms_advisory {
+       union {
+               uint32_t        sa_nwk_status_int;
+               struct {
+                       union {
+#define SYMPTOMS_ADVISORY_NOCOMMENT    0x00
+                               uint16_t        sa_nwk_status;
+                               struct {
+#define SYMPTOMS_ADVISORY_WIFI_BAD     0x01
+#define SYMPTOMS_ADVISORY_WIFI_OK      0x02
+                                       uint8_t sa_wifi_status;
+#define SYMPTOMS_ADVISORY_CELL_BAD     0x01
+#define SYMPTOMS_ADVISORY_CELL_OK      0x02
+                                       uint8_t sa_cell_status;
+                               };
+                       };
+                       uint16_t        sa_unused;
+               };
+       };
+} symptoms_advisory_t;
+
+
 #endif /* PRIVATE */
 #endif /* _NETINET_MPTCP_VAR_H_ */
index 173b506a6f3fc2cae1a52db7d0f2dd9edf4b2db0..c2b41a36509d2e90ffe3b9377db459c98f2e376a 100644 (file)
@@ -230,11 +230,12 @@ rip_input(m, iphlen)
                        continue;
                if (last) {
                        struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
-               
+
                        skipit = 0;
-                       
+
 #if NECP
-                       if (n && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
+                       if (n && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0,
+                               &ip->ip_dst, &ip->ip_src, ifp, NULL, NULL)) {
                                m_freem(n);
                                /* do not inject data to pcb */
                                skipit = 1;
@@ -286,7 +287,8 @@ rip_input(m, iphlen)
 
        skipit = 0;
 #if NECP
-       if (last && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
+       if (last && !necp_socket_is_allowed_to_send_recv_v4(last, 0, 0,
+               &ip->ip_dst, &ip->ip_src, ifp, NULL, NULL)) {
                m_freem(m);
                OSAddAtomic(1, &ipstat.ips_delivered);
                /* do not inject data to pcb */
@@ -402,7 +404,7 @@ rip_output(
                        m_freem(m);
                        return(EMSGSIZE);
                }
-               M_PREPEND(m, sizeof(struct ip), M_WAIT);
+               M_PREPEND(m, sizeof(struct ip), M_WAIT, 1);
                if (m == NULL)
                        return ENOBUFS;
                ip = mtod(m, struct ip *);
@@ -437,19 +439,21 @@ rip_output(
 
        if (inp->inp_laddr.s_addr != INADDR_ANY)
                ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
-       
+
 #if NECP
        {
                necp_kernel_policy_id policy_id;
-               if (!necp_socket_is_allowed_to_send_recv_v4(inp, 0, 0, &ip->ip_src, &ip->ip_dst, NULL, &policy_id)) {
+               u_int32_t route_rule_id;
+               if (!necp_socket_is_allowed_to_send_recv_v4(inp, 0, 0,
+                       &ip->ip_src, &ip->ip_dst, NULL, &policy_id, &route_rule_id)) {
                        m_freem(m);
                        return(EHOSTUNREACH);
                }
 
-               necp_mark_packet_from_socket(m, inp, policy_id);
+               necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id);
        }
 #endif /* NECP */
-       
+
 #if IPSEC
        if (inp->inp_sp != NULL && ipsec_setsocket(m, so) != 0) {
                m_freem(m);
@@ -479,6 +483,7 @@ rip_output(
         * to pass the PCB cached route pointer directly to IP and
         * the modules beneath it.
         */
+       // TODO: PASS DOWN ROUTE RULE ID
        error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
            imo, &ipoa);
 
index c4ecae9d89c44dfa415998e05efcf465fa3391fa..134a615355dde9a55db2005336b7f747cf385ec0 100644 (file)
@@ -63,8 +63,8 @@
 
 #ifndef _NETINET_TCP_H_
 #define _NETINET_TCP_H_
+#include <sys/types.h>
 #include <sys/appleapiopts.h>
-#include <sys/_types.h>
 #include <machine/endian.h>
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
@@ -138,6 +138,9 @@ struct tcphdr {
 #define        TCPOPT_MULTIPATH                30
 #endif
 
+#define        TCPOPT_FASTOPEN                 34
+#define        TCPOLEN_FASTOPEN_REQ            2
+
 /* Option definitions */
 #define TCPOPT_SACK_PERMIT_HDR \
 (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK_PERMITTED<<8|TCPOLEN_SACK_PERMITTED)
@@ -217,6 +220,8 @@ struct tcphdr {
 #define        TCP_KEEPCNT             0x102   /* number of keepalives before close */
 #define        TCP_SENDMOREACKS        0x103   /* always ack every other packet */
 #define        TCP_ENABLE_ECN          0x104   /* Enable ECN on a connection */
+#define        TCP_FASTOPEN            0x105   /* Enable/Disable TCP Fastopen on this socket */
+#define        TCP_CONNECTION_INFO     0x106   /* State of TCP connection */
 
 #ifdef PRIVATE
 #define        TCP_INFO                0x200   /* retrieve tcp_info structure */
@@ -251,6 +256,13 @@ struct tcphdr {
 
 #define TCPI_FLAG_LOSSRECOVERY 0x01    /* Currently in loss recovery */
 
+struct tcp_conn_status {
+       unsigned int    probe_activated : 1;
+       unsigned int    write_probe_failed : 1;
+       unsigned int    read_probe_failed : 1;
+       unsigned int    conn_probe_failed : 1;
+};
+
 /*
  * Add new fields to this structure at the end only. This will preserve
  * binary compatibility.
@@ -314,6 +326,19 @@ struct tcp_info {
        u_int64_t       tcpi_wired_rxbytes __attribute((aligned(8)));   /* bytes received over Wired */
        u_int64_t       tcpi_wired_txpackets __attribute((aligned(8))); /* packets transmitted over Wired */
        u_int64_t       tcpi_wired_txbytes __attribute((aligned(8)));   /* bytes transmitted over Wired */
+       struct tcp_conn_status  tcpi_connstatus; /* status of connection probes */
+
+       u_int16_t       /* Client-side information */
+               tcpi_tfo_cookie_req:1, /* Cookie requested? */
+               tcpi_tfo_cookie_rcv:1, /* Cookie received? */
+               tcpi_tfo_syn_loss:1,   /* Fallback to reg. TCP after SYN-loss */
+               tcpi_tfo_syn_data_sent:1, /* SYN+data has been sent out */
+               tcpi_tfo_syn_data_acked:1, /* SYN+data has been fully acknowledged */
+               /* And the following are for server-side information (must be set on the listener socket) */
+               tcpi_tfo_syn_data_rcv:1, /* Server received SYN+data with a valid cookie */
+               tcpi_tfo_cookie_req_rcv:1, /* Server received cookie-request */
+               tcpi_tfo_cookie_sent:1, /* Server announced cookie */
+               tcpi_tfo_cookie_invalid:1; /* Server received an invalid cookie */
 };
 
 struct tcp_measure_bw_burst {
@@ -360,6 +385,50 @@ typedef struct conninfo_tcp {
 #pragma pack()
 
 #endif /* PRIVATE */
+
+struct tcp_connection_info {
+        u_int8_t       tcpi_state;     /* connection state */
+        u_int8_t       tcpi_snd_wscale; /* Window scale for send window */
+        u_int8_t       tcpi_rcv_wscale; /* Window scale for receive window */
+        u_int8_t       __pad1;
+        u_int32_t      tcpi_options;   /* TCP options supported */
+#define TCPCI_OPT_TIMESTAMPS   0x00000001 /* Timestamps enabled */
+#define TCPCI_OPT_SACK         0x00000002 /* SACK enabled */
+#define TCPCI_OPT_WSCALE       0x00000004 /* Window scaling enabled */
+#define TCPCI_OPT_ECN          0x00000008 /* ECN enabled */
+        u_int32_t      tcpi_flags;     /* flags */
+#define TCPCI_FLAG_LOSSRECOVERY 0x00000001
+#define TCPCI_FLAG_REORDERING_DETECTED  0x00000002
+        u_int32_t      tcpi_rto;       /* retransmit timeout in ms */
+        u_int32_t      tcpi_maxseg;    /* maximum segment size supported */
+        u_int32_t      tcpi_snd_ssthresh; /* slow start threshold in bytes */
+        u_int32_t      tcpi_snd_cwnd;  /* send congestion window in bytes */
+        u_int32_t      tcpi_snd_wnd;   /* send widnow in bytes */
+        u_int32_t      tcpi_snd_sbbytes; /* bytes in send socket buffer, including in-flight data */
+        u_int32_t      tcpi_rcv_wnd;   /* receive window in bytes*/
+        u_int32_t      tcpi_rttcur;    /* most recent RTT in ms */
+        u_int32_t      tcpi_srtt;      /* average RTT in ms */
+        u_int32_t      tcpi_rttvar;    /* RTT variance */
+       u_int32_t
+                       /* Client-side information */
+                       tcpi_tfo_cookie_req:1, /* Cookie requested? */
+                       tcpi_tfo_cookie_rcv:1, /* Cookie received? */
+                       tcpi_tfo_syn_loss:1,   /* Fallback to reg. TCP after SYN-loss */
+                       tcpi_tfo_syn_data_sent:1, /* SYN+data has been sent out */
+                       tcpi_tfo_syn_data_acked:1, /* SYN+data has been fully acknowledged */
+                       /* And the following are for server-side information (must be set on the listener socket) */
+                       tcpi_tfo_syn_data_rcv:1, /* Server received SYN+data with a valid cookie */
+                       tcpi_tfo_cookie_req_rcv:1, /* Server received cookie-request */
+                       tcpi_tfo_cookie_sent:1, /* Server announced cookie */
+                       tcpi_tfo_cookie_invalid:1, /* Server received an invalid cookie */
+                       __pad2:23;
+        u_int64_t      tcpi_txpackets __attribute__((aligned(8)));
+        u_int64_t      tcpi_txbytes __attribute__((aligned(8)));
+        u_int64_t      tcpi_txretransmitbytes __attribute__((aligned(8)));
+        u_int64_t      tcpi_rxpackets __attribute__((aligned(8)));
+        u_int64_t      tcpi_rxbytes __attribute__((aligned(8)));
+        u_int64_t      tcpi_rxoutoforderbytes __attribute__((aligned(8)));
+};
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 
 #endif
diff --git a/bsd/netinet/tcp_cache.c b/bsd/netinet/tcp_cache.c
new file mode 100644 (file)
index 0000000..cb3b86d
--- /dev/null
@@ -0,0 +1,743 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* TCP-cache to store and retrieve TCP-related information */
+
+#include <net/flowhash.h>
+#include <net/route.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_cache.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <kern/locks.h>
+#include <sys/queue.h>
+#include <dev/random/randomdev.h>
+
+struct tcp_heuristic_key {
+       union {
+               uint8_t thk_net_signature[IFNET_SIGNATURELEN];
+               union {
+                       struct in_addr addr;
+                       struct in6_addr addr6;
+               } thk_ip;
+       };
+       sa_family_t     thk_family;
+};
+
+struct tcp_heuristic {
+       SLIST_ENTRY(tcp_heuristic) list;
+
+       u_int32_t       th_last_access;
+
+       struct tcp_heuristic_key        th_key;
+
+       /*
+        * If tfo_cookie_loss is changed to a smaller type, it might be worth
+        * checking for integer-overflow in tcp_cache_tfo_inc_loss
+        */
+       u_int32_t       th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */
+       u_int32_t       th_tfo_fallback_trials; /* Number of times we did not try out TFO due to SYN-loss */
+       u_int32_t       th_tfo_cookie_backoff; /* Time until when we should not try out TFO */
+
+       u_int8_t        th_tfo_in_backoff:1, /* Are we doing TFO due to the backoff timer? */
+                       th_tfo_aggressive_fallback:1, /* Agressive fallback due to nasty middlebox */
+                       th_tfo_snd_middlebox_supp:1, /* We are sure that the network supports TFO in upstream direction */
+                       th_tfo_rcv_middlebox_supp:1; /* We are sure that the network supports TFO in downstream direction*/
+};
+
+struct tcp_heuristics_head {
+       SLIST_HEAD(tcp_heur_bucket, tcp_heuristic) tcp_heuristics;
+
+       /* Per-hashbucket lock to avoid lock-contention */
+       lck_mtx_t       thh_mtx;
+};
+
+struct tcp_cache_key {
+       sa_family_t     tck_family;
+
+       struct tcp_heuristic_key tck_src;
+       union {
+               struct in_addr addr;
+               struct in6_addr addr6;
+       } tck_dst;
+};
+
+struct tcp_cache {
+       SLIST_ENTRY(tcp_cache) list;
+
+       u_int32_t       tc_last_access;
+
+       struct tcp_cache_key tc_key;
+
+       u_int8_t        tc_tfo_cookie[TFO_COOKIE_LEN_MAX];
+       u_int8_t        tc_tfo_cookie_len;
+};
+
+struct tcp_cache_head {
+       SLIST_HEAD(tcp_cache_bucket, tcp_cache) tcp_caches;
+
+       /* Per-hashbucket lock to avoid lock-contention */
+       lck_mtx_t       tch_mtx;
+};
+
+static u_int32_t tcp_cache_hash_seed;
+
+size_t tcp_cache_size;
+
+/*
+ * The maximum depth of the hash-bucket. This way we limit the tcp_cache to
+ * TCP_CACHE_BUCKET_SIZE * tcp_cache_size and have "natural" garbage collection
+ */
+#define        TCP_CACHE_BUCKET_SIZE 5
+
+static struct tcp_cache_head *tcp_cache;
+
+decl_lck_mtx_data(, tcp_cache_mtx);
+
+static lck_attr_t      *tcp_cache_mtx_attr;
+static lck_grp_t       *tcp_cache_mtx_grp;
+static lck_grp_attr_t  *tcp_cache_mtx_grp_attr;
+
+static struct tcp_heuristics_head *tcp_heuristics;
+
+decl_lck_mtx_data(, tcp_heuristics_mtx);
+
+static lck_attr_t      *tcp_heuristic_mtx_attr;
+static lck_grp_t       *tcp_heuristic_mtx_grp;
+static lck_grp_attr_t  *tcp_heuristic_mtx_grp_attr;
+
+/* Number of SYN-losses we accept */
+#define TFO_MAX_COOKIE_LOSS    2
+
+/*
+ * Round up to next higher power-of 2.  See "Bit Twiddling Hacks".
+ *
+ * Might be worth moving this to a library so that others
+ * (e.g., scale_to_powerof2()) can use this as well instead of a while-loop.
+ */
+static u_int32_t tcp_cache_roundup2(u_int32_t a)
+{
+       a--;
+       a |= a >> 1;
+       a |= a >> 2;
+       a |= a >> 4;
+       a |= a >> 8;
+       a |= a >> 16;
+       a++;
+
+       return a;
+}
+
+static void tcp_cache_hash_src(struct inpcb *inp, struct tcp_heuristic_key *key)
+{
+       struct ifnet *ifn = inp->inp_last_outifp;
+       uint8_t len = sizeof(key->thk_net_signature);
+       uint16_t flags;
+
+       if (inp->inp_vflag & INP_IPV6) {
+               int ret;
+
+               key->thk_family = AF_INET6;
+               ret = ifnet_get_netsignature(ifn, AF_INET6, &len, &flags,
+                   key->thk_net_signature);
+
+               /*
+                * ifnet_get_netsignature only returns EINVAL if ifn is NULL
+                * (we made sure that in the other cases it does not). So,
+                * in this case we should take the connection's address.
+                */
+               if (ret == ENOENT || ret == EINVAL)
+                       memcpy(&key->thk_ip.addr6, &inp->in6p_laddr, sizeof(struct in6_addr));
+       } else {
+               int ret;
+
+               key->thk_family = AF_INET;
+               ret = ifnet_get_netsignature(ifn, AF_INET, &len, &flags,
+                   key->thk_net_signature);
+
+               /*
+                * ifnet_get_netsignature only returns EINVAL if ifn is NULL
+                * (we made sure that in the other cases it does not). So,
+                * in this case we should take the connection's address.
+                */
+               if (ret == ENOENT || ret == EINVAL)
+                       memcpy(&key->thk_ip.addr, &inp->inp_laddr, sizeof(struct in_addr));
+       }
+}
+
+static u_int16_t tcp_cache_hash(struct inpcb *inp, struct tcp_cache_key *key)
+{
+       u_int32_t hash;
+
+       bzero(key, sizeof(struct tcp_cache_key));
+
+       tcp_cache_hash_src(inp, &key->tck_src);
+
+       if (inp->inp_vflag & INP_IPV6) {
+               key->tck_family = AF_INET6;
+               memcpy(&key->tck_dst.addr6, &inp->in6p_faddr,
+                   sizeof(struct in6_addr));
+       } else {
+               key->tck_family = AF_INET;
+               memcpy(&key->tck_dst.addr, &inp->inp_faddr,
+                   sizeof(struct in_addr));
+       }
+
+       hash = net_flowhash(key, sizeof(struct tcp_cache_key),
+           tcp_cache_hash_seed);
+
+       return (hash & (tcp_cache_size - 1));
+}
+
+static void tcp_cache_unlock(struct tcp_cache_head *head)
+{
+       lck_mtx_unlock(&head->tch_mtx);
+}
+
+/*
+ * Make sure that everything that happens after tcp_getcache_with_lock()
+ * is short enough to justify that you hold the per-bucket lock!!!
+ *
+ * Otherwise, better build another lookup-function that does not hold the
+ * lock and you copy out the bits and bytes.
+ *
+ * That's why we provide the head as a "return"-pointer so that the caller
+ * can give it back to use for tcp_cache_unlock().
+ */
+static struct tcp_cache *tcp_getcache_with_lock(struct tcpcb *tp, int create,
+    struct tcp_cache_head **headarg)
+{
+       struct inpcb *inp = tp->t_inpcb;
+       struct tcp_cache *tpcache = NULL;
+       struct tcp_cache_head *head;
+       struct tcp_cache_key key;
+       u_int16_t hash;
+       int i = 0;
+
+       hash = tcp_cache_hash(inp, &key);
+       head = &tcp_cache[hash];
+
+       lck_mtx_lock(&head->tch_mtx);
+
+       /*** First step: Look for the tcp_cache in our bucket ***/
+       SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
+               if (memcmp(&tpcache->tc_key, &key, sizeof(key)) == 0)
+                       break;
+
+               i++;
+       }
+
+       /*** Second step: If it's not there, create/recycle it ***/
+       if ((tpcache == NULL) && create) {
+               if (i >= TCP_CACHE_BUCKET_SIZE) {
+                       struct tcp_cache *oldest_cache = NULL;
+                       u_int32_t max_age = 0;
+
+                       /* Look for the oldest tcp_cache in the bucket */
+                       SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
+                               u_int32_t age = tcp_now - tpcache->tc_last_access;
+                               if (age > max_age) {
+                                       max_age = age;
+                                       oldest_cache = tpcache;
+                               }
+                       }
+                       VERIFY(oldest_cache != NULL);
+
+                       tpcache = oldest_cache;
+
+                       /* We recycle, thus let's indicate that there is no cookie */
+                       tpcache->tc_tfo_cookie_len = 0;
+               } else {
+                       /* Create a new cache and add it to the list */
+                       tpcache = _MALLOC(sizeof(struct tcp_cache), M_TEMP,
+                           M_NOWAIT | M_ZERO);
+                       if (tpcache == NULL)
+                               goto out_null;
+
+                       SLIST_INSERT_HEAD(&head->tcp_caches, tpcache, list);
+               }
+
+               memcpy(&tpcache->tc_key, &key, sizeof(key));
+       }
+
+       if (tpcache == NULL)
+               goto out_null;
+
+       /* Update timestamp for garbage collection purposes */
+       tpcache->tc_last_access = tcp_now;
+       *headarg = head;
+
+       return (tpcache);
+
+out_null:
+       tcp_cache_unlock(head);
+       return (NULL);
+}
+
+void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len)
+{
+       struct tcp_cache_head *head;
+       struct tcp_cache *tpcache;
+
+       /* Call lookup/create function */
+       tpcache = tcp_getcache_with_lock(tp, 1, &head);
+       if (tpcache == NULL)
+               return;
+
+       tpcache->tc_tfo_cookie_len = len;
+       memcpy(tpcache->tc_tfo_cookie, cookie, len);
+
+       tcp_cache_unlock(head);
+}
+
+/*
+ * Get the cookie related to 'tp', and copy it into 'cookie', provided that len
+ * is big enough (len designates the available memory.
+ * Upon return, 'len' is set to the cookie's length.
+ *
+ * Returns 0 if we should request a cookie.
+ * Returns 1 if the cookie has been found and written.
+ */
+int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len)
+{
+       struct tcp_cache_head *head;
+       struct tcp_cache *tpcache;
+
+       /* Call lookup/create function */
+       tpcache = tcp_getcache_with_lock(tp, 1, &head);
+       if (tpcache == NULL)
+               return (0);
+
+       if (tpcache->tc_tfo_cookie_len == 0) {
+               tcp_cache_unlock(head);
+               return (0);
+       }
+
+       /*
+        * Not enough space - this should never happen as it has been checked
+        * in tcp_tfo_check. So, fail here!
+        */
+       VERIFY(tpcache->tc_tfo_cookie_len <= *len);
+
+       memcpy(cookie, tpcache->tc_tfo_cookie, tpcache->tc_tfo_cookie_len);
+       *len = tpcache->tc_tfo_cookie_len;
+
+       tcp_cache_unlock(head);
+
+       return (1);
+}
+
+unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp)
+{
+       struct tcp_cache_head *head;
+       struct tcp_cache *tpcache;
+       unsigned int cookie_len;
+
+       /* Call lookup/create function */
+       tpcache = tcp_getcache_with_lock(tp, 1, &head);
+       if (tpcache == NULL)
+               return (0);
+
+       cookie_len = tpcache->tc_tfo_cookie_len;
+
+       tcp_cache_unlock(head);
+
+       return cookie_len;
+}
+
+static u_int16_t tcp_heuristics_hash(struct inpcb *inp,
+                                    struct tcp_heuristic_key *key)
+{
+       u_int32_t hash;
+
+       bzero(key, sizeof(struct tcp_heuristic_key));
+
+       tcp_cache_hash_src(inp, key);
+
+       hash = net_flowhash(key, sizeof(struct tcp_heuristic_key),
+           tcp_cache_hash_seed);
+
+       return (hash & (tcp_cache_size - 1));
+}
+
+static void tcp_heuristic_unlock(struct tcp_heuristics_head *head)
+{
+       lck_mtx_unlock(&head->thh_mtx);
+}
+
+/*
+ * Make sure that everything that happens after tcp_getheuristic_with_lock()
+ * is short enough to justify that you hold the per-bucket lock!!!
+ *
+ * Otherwise, better build another lookup-function that does not hold the
+ * lock and you copy out the bits and bytes.
+ *
+ * That's why we provide the head as a "return"-pointer so that the caller
+ * can give it back to use for tcp_heur_unlock().
+ *
+ *
+ * ToDo - way too much code-duplication. We should create an interface to handle
+ * bucketized hashtables with recycling of the oldest element.
+ */
+static struct tcp_heuristic *tcp_getheuristic_with_lock(struct tcpcb *tp,
+    int create, struct tcp_heuristics_head **headarg)
+{
+       struct inpcb *inp = tp->t_inpcb;
+       struct tcp_heuristic *tpheur = NULL;
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic_key key;
+       u_int16_t hash;
+       int i = 0;
+
+       hash = tcp_heuristics_hash(inp, &key);
+       head = &tcp_heuristics[hash];
+
+       lck_mtx_lock(&head->thh_mtx);
+
+       /*** First step: Look for the tcp_heur in our bucket ***/
+       SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
+               if (memcmp(&tpheur->th_key, &key, sizeof(key)) == 0)
+                       break;
+
+               i++;
+       }
+
+       /*** Second step: If it's not there, create/recycle it ***/
+       if ((tpheur == NULL) && create) {
+               if (i >= TCP_CACHE_BUCKET_SIZE) {
+                       struct tcp_heuristic *oldest_heur = NULL;
+                       u_int32_t max_age = 0;
+
+                       /* Look for the oldest tcp_heur in the bucket */
+                       SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
+                               u_int32_t age = tcp_now - tpheur->th_last_access;
+                               if (age > max_age) {
+                                       max_age = age;
+                                       oldest_heur = tpheur;
+                               }
+                       }
+                       VERIFY(oldest_heur != NULL);
+
+                       tpheur = oldest_heur;
+
+                       /* We recycle - set everything to 0 */
+                       tpheur->th_tfo_cookie_loss = 0;
+                       tpheur->th_tfo_fallback_trials = 0;
+                       tpheur->th_tfo_cookie_backoff = 0;
+                       tpheur->th_tfo_in_backoff = 0;
+                       tpheur->th_tfo_aggressive_fallback = 0;
+                       tpheur->th_tfo_snd_middlebox_supp = 0;
+                       tpheur->th_tfo_rcv_middlebox_supp = 0;
+               } else {
+                       /* Create a new heuristic and add it to the list */
+                       tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP,
+                           M_NOWAIT | M_ZERO);
+                       if (tpheur == NULL)
+                               goto out_null;
+
+                       SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
+               }
+
+               memcpy(&tpheur->th_key, &key, sizeof(key));
+       }
+
+       if (tpheur == NULL)
+               goto out_null;
+
+       /* Update timestamp for garbage collection purposes */
+       tpheur->th_last_access = tcp_now;
+       *headarg = head;
+
+       return (tpheur);
+
+out_null:
+       tcp_heuristic_unlock(head);
+       return (NULL);
+}
+
+void tcp_heuristic_tfo_success(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+
+       struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+       if (tpheur == NULL)
+               return;
+
+       tpheur->th_tfo_cookie_loss = 0;
+
+       tcp_heuristic_unlock(head);
+}
+
+void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+
+       struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+       if (tpheur == NULL)
+               return;
+
+       tpheur->th_tfo_rcv_middlebox_supp = 1;
+
+       tcp_heuristic_unlock(head);
+
+       tp->t_tfo_flags |= TFO_F_NO_RCVPROBING;
+}
+
+void tcp_heuristic_tfo_snd_good(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+
+       struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+       if (tpheur == NULL)
+               return;
+
+       tpheur->th_tfo_snd_middlebox_supp = 1;
+
+       tcp_heuristic_unlock(head);
+
+       tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
+}
+
+void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic *tpheur;
+
+       tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+       if (tpheur == NULL)
+               return;
+
+       /* Potential integer overflow, but tfo_cookie_loss is 32-bits */
+       tpheur->th_tfo_cookie_loss++;
+
+       tcp_heuristic_unlock(head);
+}
+
+void tcp_heuristic_tfo_middlebox(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic *tpheur;
+
+       tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
+       if (tpheur == NULL)
+               return;
+
+       tpheur->th_tfo_aggressive_fallback = 1;
+
+       tcp_heuristic_unlock(head);
+}
+
+void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic *tpheur;
+
+       /*
+        * Don't attempt to create it! Keep the heuristics clean if the
+        * server does not support TFO. This reduces the lookup-cost on
+        * our side.
+        */
+       tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
+       if (tpheur == NULL)
+               return;
+
+       tpheur->th_tfo_cookie_loss = 0;
+       tpheur->th_tfo_aggressive_fallback = 0;
+
+       tcp_heuristic_unlock(head);
+}
+
+boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp)
+{
+       struct tcp_heuristics_head *head;
+       struct tcp_heuristic *tpheur;
+
+       /* Get the tcp-heuristic. */
+       tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
+       if (tpheur == NULL)
+               return (true);
+
+       if (tpheur->th_tfo_aggressive_fallback) {
+               /* Aggressive fallback - don't do TFO anymore... :'( */
+               tcp_heuristic_unlock(head);
+               return (false);
+       }
+
+       if (tpheur->th_tfo_cookie_loss >= TFO_MAX_COOKIE_LOSS &&
+           (tpheur->th_tfo_fallback_trials < tcp_tfo_fallback_min ||
+            TSTMP_GT(tpheur->th_tfo_cookie_backoff, tcp_now))) {
+               /*
+                * So, when we are in SYN-loss mode we try to stop using TFO
+                * for the next 'tcp_tfo_fallback_min' connections. That way,
+                * we are sure that never more than 1 out of tcp_tfo_fallback_min
+                * connections will suffer from our nice little middelbox.
+                *
+                * After that we first wait for 2 minutes. If we fail again,
+                * we wait for yet another 60 minutes.
+                */
+               tpheur->th_tfo_fallback_trials++;
+               if (tpheur->th_tfo_fallback_trials >= tcp_tfo_fallback_min &&
+                   !tpheur->th_tfo_in_backoff) {
+                       if (tpheur->th_tfo_cookie_loss == TFO_MAX_COOKIE_LOSS)
+                               /* Backoff for 2 minutes */
+                               tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 2 * TCP_RETRANSHZ);
+                       else
+                               /* Backoff for 60 minutes */
+                               tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 60 * TCP_RETRANSHZ);
+
+                       tpheur->th_tfo_in_backoff = 1;
+               }
+
+               tcp_heuristic_unlock(head);
+               return (false);
+       }
+
+       /*
+        * We give it a new shot, set trials back to 0. This allows to
+        * start counting again from zero in case we get yet another SYN-loss
+        */
+       tpheur->th_tfo_fallback_trials = 0;
+       tpheur->th_tfo_in_backoff = 0;
+
+       if (tpheur->th_tfo_rcv_middlebox_supp)
+               tp->t_tfo_flags |= TFO_F_NO_RCVPROBING;
+       if (tpheur->th_tfo_snd_middlebox_supp)
+               tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
+
+       tcp_heuristic_unlock(head);
+
+       return (true);
+}
+
+static void sysctl_cleartfocache(void)
+{
+       int i;
+
+       for (i = 0; i < tcp_cache_size; i++) {
+               struct tcp_cache_head *head = &tcp_cache[i];
+               struct tcp_cache *tpcache, *tmp;
+               struct tcp_heuristics_head *hhead = &tcp_heuristics[i];
+               struct tcp_heuristic *tpheur, *htmp;
+
+               lck_mtx_lock(&head->tch_mtx);
+               SLIST_FOREACH_SAFE(tpcache, &head->tcp_caches, list, tmp) {
+                       SLIST_REMOVE(&head->tcp_caches, tpcache, tcp_cache, list);
+                       _FREE(tpcache, M_TEMP);
+               }
+               lck_mtx_unlock(&head->tch_mtx);
+
+               lck_mtx_lock(&hhead->thh_mtx);
+               SLIST_FOREACH_SAFE(tpheur, &hhead->tcp_heuristics, list, htmp) {
+                       SLIST_REMOVE(&hhead->tcp_heuristics, tpheur, tcp_heuristic, list);
+                       _FREE(tpheur, M_TEMP);
+               }
+               lck_mtx_unlock(&hhead->thh_mtx);
+       }
+}
+
+/* This sysctl is useful for testing purposes only */
+static int tcpcleartfo = 0;
+
+static int sysctl_cleartfo SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error = 0, val, oldval = tcpcleartfo;
+
+       val = oldval;
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error || !req->newptr)
+               return (error);
+
+       /*
+        * The actual value does not matter. If the value is set, it triggers
+        * the clearing of the TFO cache. If a future implementation does not
+        * use the route entry to hold the TFO cache, replace the route sysctl.
+        */
+
+       if (val != oldval)
+               sysctl_cleartfocache();
+
+       tcpcleartfo = val;
+
+       return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, clear_tfocache, CTLTYPE_INT | CTLFLAG_RW |
+       CTLFLAG_LOCKED, &tcpcleartfo, 0, &sysctl_cleartfo, "I",
+       "Toggle to clear the TFO destination based heuristic cache");
+
+void tcp_cache_init(void)
+{
+       uint64_t sane_size_meg = sane_size / 1024 / 1024;
+       int i;
+
+       /*
+        * On machines with <100MB of memory this will result in a (full) cache-size
+        * of 32 entries, thus 32 * 5 * 64bytes = 10KB. (about 0.01 %)
+        * On machines with > 4GB of memory, we have a cache-size of 1024 entries,
+        * thus about 327KB.
+        *
+        * Side-note: we convert to u_int32_t. If sane_size is more than
+        * 16000 TB, we loose precision. But, who cares? :)
+        */
+       tcp_cache_size = tcp_cache_roundup2((u_int32_t)(sane_size_meg >> 2));
+       if (tcp_cache_size < 32)
+               tcp_cache_size = 32;
+       else if (tcp_cache_size > 1024)
+               tcp_cache_size = 1024;
+
+       tcp_cache = _MALLOC(sizeof(struct tcp_cache_head) * tcp_cache_size,
+           M_TEMP, M_ZERO);
+       if (tcp_cache == NULL)
+               panic("Allocating tcp_cache failed at boot-time!");
+
+       tcp_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
+       tcp_cache_mtx_grp = lck_grp_alloc_init("tcpcache", tcp_cache_mtx_grp_attr);
+       tcp_cache_mtx_attr = lck_attr_alloc_init();
+
+       tcp_heuristics = _MALLOC(sizeof(struct tcp_heuristics_head) * tcp_cache_size,
+           M_TEMP, M_ZERO);
+       if (tcp_heuristics == NULL)
+               panic("Allocating tcp_heuristic failed at boot-time!");
+
+       tcp_heuristic_mtx_grp_attr = lck_grp_attr_alloc_init();
+       tcp_heuristic_mtx_grp = lck_grp_alloc_init("tcpheuristic", tcp_heuristic_mtx_grp_attr);
+       tcp_heuristic_mtx_attr = lck_attr_alloc_init();
+
+       for (i = 0; i < tcp_cache_size; i++) {
+               lck_mtx_init(&tcp_cache[i].tch_mtx, tcp_cache_mtx_grp,
+                   tcp_cache_mtx_attr);
+               SLIST_INIT(&tcp_cache[i].tcp_caches);
+
+               lck_mtx_init(&tcp_heuristics[i].thh_mtx, tcp_heuristic_mtx_grp,
+                   tcp_heuristic_mtx_attr);
+               SLIST_INIT(&tcp_heuristics[i].tcp_heuristics);
+       }
+
+       tcp_cache_hash_seed = RandomULong();
+}
diff --git a/bsd/netinet/tcp_cache.h b/bsd/netinet/tcp_cache.h
new file mode 100644 (file)
index 0000000..601aec8
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* TCP-cache to store and retrieve TCP-related information */
+
+#ifndef _NETINET_TCP_CACHE_H
+#define _NETINET_TCP_CACHE_H
+
+#include <netinet/tcp_var.h>
+#include <netinet/in.h>
+
+extern void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len);
+extern int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len);
+extern unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp);
+
+extern void tcp_heuristic_tfo_inc_loss(struct tcpcb *tp);
+extern void tcp_heuristic_tfo_snd_good(struct tcpcb *tp);
+extern void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp);
+extern void tcp_heuristic_tfo_middlebox(struct tcpcb *tp);
+extern void tcp_heuristic_tfo_reset_loss(struct tcpcb *tp);
+extern void tcp_heuristic_tfo_success(struct tcpcb *tp);
+extern boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp);
+
+extern void tcp_cache_init(void);
+
+#endif /* _NETINET_TCP_CACHE_H */
+
index fe20ea9a958dcd59a69672b75e2e9f2e2b1dcb6c..fdb4f8fbf9dc65d41a183050fdfb9954ec250265 100644 (file)
@@ -51,6 +51,7 @@ struct tcp_cc_debug_state {
        uint32_t ccd_snd_cwnd;
        uint32_t ccd_snd_wnd;
        uint32_t ccd_snd_ssthresh;
+       uint32_t ccd_pipeack;
        uint32_t ccd_rttcur;
        uint32_t ccd_rxtcur;
        uint32_t ccd_srtt;
@@ -93,6 +94,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, use_newreno,
        CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_use_newreno, 0, 
        "Use TCP NewReno by default");
 
+static int tcp_check_cwnd_nonvalidated = 1;
+#if (DEBUG || DEVELOPMENT)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, cwnd_nonvalidated,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_check_cwnd_nonvalidated, 0,
+    "Check if congestion window is non-validated");
+#endif /* (DEBUG || DEVELOPMENT) */
+
  #define SET_SNDSB_IDEAL_SIZE(sndsb, size) \
        sndsb->sb_idealsize = min(max(tcp_sendspace, tp->snd_ssthresh), \
        tcp_autosndbuf_max);
@@ -224,6 +232,7 @@ tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event)
                dbg_state.ccd_snd_cwnd = tp->snd_cwnd;
                dbg_state.ccd_snd_wnd = tp->snd_wnd;
                dbg_state.ccd_snd_ssthresh = tp->snd_ssthresh;
+               dbg_state.ccd_pipeack = tp->t_pipeack;
                dbg_state.ccd_rttcur = tp->t_rttcur;
                dbg_state.ccd_rxtcur = tp->t_rxtcur;
                dbg_state.ccd_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
@@ -403,3 +412,62 @@ tcp_cc_after_idle_stretchack(struct tcpcb *tp)
                tcp_reset_stretch_ack(tp);
        }
 }
+
+/*
+ * Detect if the congestion window is non-vlidated according to
+ * draft-ietf-tcpm-newcwv-07
+ */
+
+inline uint32_t
+tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp)
+{
+       if (tp->t_pipeack == 0 || tcp_check_cwnd_nonvalidated == 0) {
+               tp->t_flagsext &= ~TF_CWND_NONVALIDATED;
+               return (0);
+       }
+       if (tp->t_pipeack >= (tp->snd_cwnd) >> 1)
+               tp->t_flagsext &= ~TF_CWND_NONVALIDATED;
+       else
+               tp->t_flagsext |= TF_CWND_NONVALIDATED;
+       return (tp->t_flagsext & TF_CWND_NONVALIDATED);
+}
+
+/*
+ * Adjust congestion window in response to congestion in non-validated
+ * phase.
+ */
+inline void
+tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp)
+{
+       tp->t_pipeack = tcp_get_max_pipeack(tp);
+       tcp_clear_pipeack_state(tp);
+       tp->snd_cwnd = (max(tp->t_pipeack, tp->t_lossflightsize) >> 1);
+       tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES);
+       tp->snd_cwnd += tp->t_maxseg * tcprexmtthresh;
+       tp->t_flagsext &= ~TF_CWND_NONVALIDATED;
+}
+
+/*
+ * Return maximum of all the pipeack samples. Since the number of samples
+ * TCP_PIPEACK_SAMPLE_COUNT is 3 at this time, it will be simpler to do
+ * a comparision. We should change ths if the number of samples increases.
+ */
+inline u_int32_t
+tcp_get_max_pipeack(struct tcpcb *tp)
+{
+       u_int32_t max_pipeack = 0;
+       max_pipeack = (tp->t_pipeack_sample[0] > tp->t_pipeack_sample[1]) ?
+           tp->t_pipeack_sample[0] : tp->t_pipeack_sample[1];
+       max_pipeack = (tp->t_pipeack_sample[2] > max_pipeack) ?
+           tp->t_pipeack_sample[2] : max_pipeack;
+
+       return (max_pipeack);
+}
+
+inline void
+tcp_clear_pipeack_state(struct tcpcb *tp)
+{
+       bzero(tp->t_pipeack_sample, sizeof(tp->t_pipeack_sample));
+       tp->t_pipeack_ind = 0;
+       tp->t_lossflightsize = 0;
+}
index 6ee5567a6d99ef7b71ed0f5ba01827fa67ba4f5d..e9df6b45192eb0775aa29c178ae377590751f244 100644 (file)
@@ -132,6 +132,11 @@ extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
 
 #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index])
 #define        TCP_CC_CWND_INIT_BYTES  4380
+/*
+ * The congestion window will have to be reset after a
+ * non-validated period -- currently set to 3 minutes
+ */
+#define        TCP_CC_CWND_NONVALIDATED_PERIOD (3 * 60 * TCP_RETRANSHZ)
 
 extern void    tcp_cc_init(void);
 extern void tcp_cc_resize_sndbuf(struct tcpcb *tp);
@@ -142,6 +147,10 @@ extern void tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th,
        int32_t event);
 extern void tcp_cc_allocate_state(struct tcpcb *tp);
 extern void tcp_cc_after_idle_stretchack(struct tcpcb *tp);
+extern uint32_t tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp);
+extern void tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp);
+extern u_int32_t tcp_get_max_pipeack(struct tcpcb *tp);
+extern void tcp_clear_pipeack_state(struct tcpcb *tp);
 
 #endif /* KERNEL */
 #endif /* _NETINET_CC_H_ */
index 2eb86f1a91e8645e4f21ff5ac4f5fad74b26bfa1..29a3aed786581eeeba6250f09a710f7143065a1c 100644 (file)
@@ -130,6 +130,11 @@ static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp)
 
        tcp_cubic_clear_state(tp);
        tcp_cc_cwnd_init_or_reset(tp);
+       tp->t_pipeack = 0;
+       tcp_clear_pipeack_state(tp);
+
+       /* Start counting bytes for RFC 3465 again */
+       tp->t_bytes_acked = 0;
 
        /*
         * slow start threshold could get initialized to a lower value
@@ -144,9 +149,6 @@ static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp)
 
        /* Initialize cubic last max to be same as ssthresh */
        tp->t_ccstate->cub_last_max = tp->snd_ssthresh;
-
-       /* If stretch ack was auto-disabled, re-evaluate it */
-       tcp_cc_after_idle_stretchack(tp);
 }
 
 /*
@@ -273,6 +275,10 @@ tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th)
 {
        u_int32_t cubic_target_win, tcp_win, rtt;
 
+       /* Do not increase congestion window in non-validated phase */
+       if (tcp_cc_is_cwnd_nonvalidated(tp) != 0)
+               return;
+
        tp->t_bytes_acked += BYTES_ACKED(th, tp);
 
        rtt = get_base_rtt(tp);
@@ -320,6 +326,10 @@ tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th)
 static void
 tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th)
 {
+       /* Do not increase the congestion window in non-validated phase */
+       if (tcp_cc_is_cwnd_nonvalidated(tp) != 0)
+               return;
+
        if (tp->snd_cwnd >= tp->snd_ssthresh) {
                /* Congestion avoidance phase */
                tcp_cubic_congestion_avd(tp, th);
@@ -329,6 +339,7 @@ tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th)
                 * by RFC 3465 section 2.3
                 */
                uint32_t acked, abc_lim, incr;
+
                acked = BYTES_ACKED(th, tp);
                abc_lim = (tcp_do_rfc3465_lim2 && 
                        tp->snd_nxt == tp->snd_max) ?
@@ -352,6 +363,12 @@ tcp_cubic_pre_fr(struct tcpcb *tp)
        tp->t_ccstate->cub_tcp_bytes_acked = 0;
 
        win = min(tp->snd_cwnd, tp->snd_wnd);
+       if (tp->t_flagsext & TF_CWND_NONVALIDATED) {
+               tp->t_lossflightsize = tp->snd_max - tp->snd_una;
+               win = (max(tp->t_pipeack, tp->t_lossflightsize)) >> 1;
+       } else {
+               tp->t_lossflightsize = 0;
+       }
        /*
         * Note the congestion window at which packet loss occurred as
         * cub_last_max.
@@ -427,6 +444,27 @@ tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th)
 
        if (SEQ_LEQ(th->th_ack, tp->snd_max))
                flight_size = tp->snd_max - th->th_ack;
+
+       if (SACK_ENABLED(tp) && tp->t_lossflightsize > 0) {
+               u_int32_t total_rxt_size = 0, ncwnd;
+               /*
+                * When SACK is enabled, the number of retransmitted bytes
+                * can be counted more accurately.
+                */
+               total_rxt_size = tcp_rxtseg_total_size(tp);
+               ncwnd = max(tp->t_pipeack, tp->t_lossflightsize);
+               if (total_rxt_size <= ncwnd) {
+                       ncwnd = ncwnd - total_rxt_size;
+               }
+
+               /*
+                * To avoid sending a large burst at the end of recovery
+                * set a max limit on ncwnd
+                */
+               ncwnd = min(ncwnd, (tp->t_maxseg << 6));
+               ncwnd = ncwnd >> 1;
+               flight_size = max(ncwnd, flight_size);
+       }
        /*
         * Complete ack. The current window was inflated for fast recovery.
         * It has to be deflated post recovery.
@@ -450,6 +488,16 @@ static void
 tcp_cubic_after_timeout(struct tcpcb *tp)
 {
        VERIFY(tp->t_ccstate != NULL);
+
+       /*
+        * Avoid adjusting congestion window due to SYN retransmissions.
+        * If more than one byte (SYN) is outstanding then it is still
+        * needed to adjust the window.
+        */
+       if (tp->t_state < TCPS_ESTABLISHED &&
+           ((int)(tp->snd_max - tp->snd_una) <= 1))
+               return;
+
        if (!IN_FASTRECOVERY(tp)) {
                tcp_cubic_clear_state(tp);
                tcp_cubic_pre_fr(tp);
@@ -479,8 +527,6 @@ tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index)
 {
 #pragma unused(old_cc_index)
        tcp_cubic_cwnd_init_or_reset(tp);
-       /* Start counting bytes for RFC 3465 again */
-       tp->t_bytes_acked = 0;
 
        OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets);
 }
index d7a7130a518139f432ed09f330f65f167e0b8c5d..89dc86651569c94f57f07fed54d5be1e91600b4c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
index ab5242853b749bf90c5e8be48a8b954adb43f5a7..8f2a92cc8a4b197c7365a4e9a22fbf6c26738e03 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
+#include <netinet/tcp_cache.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -144,6 +145,8 @@ struct tcphdr tcp_savetcp;
 #include <netinet/mptcp_opt.h> 
 #endif /* MPTCP */
 
+#include <corecrypto/ccaes.h>
+
 #define DBG_LAYER_BEG          NETDBG_CODE(DBG_NETTCP, 0)
 #define DBG_LAYER_END          NETDBG_CODE(DBG_NETTCP, 2)
 #define DBG_FNC_TCP_INPUT       NETDBG_CODE(DBG_NETTCP, (3 << 8))
@@ -154,119 +157,141 @@ tcp_cc  tcp_ccgen;
 struct tcpstat tcpstat;
 
 static int log_in_vain = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &log_in_vain, 0, "Log all incoming TCP connections");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &log_in_vain, 0,
+    "Log all incoming TCP connections");
 
 static int blackhole = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &blackhole, 0, "Do not send RST when dropping refused connections");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &blackhole, 0,
+    "Do not send RST when dropping refused connections");
 
 int tcp_delack_enabled = 3;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_delack_enabled, 0, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_delack_enabled, 0,
     "Delay ACK to try and piggyback it onto a data packet");
 
 int tcp_lq_overflow = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_lq_overflow, 0, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_lq_overflow, 0,
     "Listen Queue Overflow");
 
 int tcp_recv_bg = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbg, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_recv_bg, 0, 
-    "Receive background");
+    &tcp_recv_bg, 0, "Receive background");
 
 #if TCP_DROP_SYNFIN
 static int drop_synfin = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &drop_synfin, 0,
+    "Drop TCP packets with SYN+FIN set");
 #endif
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
     "TCP Segment Reassembly Queue");
 
 static int tcp_reass_overflows = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD | CTLFLAG_LOCKED,
-    &tcp_reass_overflows, 0,
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_reass_overflows, 0,
     "Global number of TCP Segment Reassembly Queue Overflows");
 
 
 __private_extern__ int slowlink_wsize = 8192;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &slowlink_wsize, 0, "Maximum advertised window size for slowlink");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
+    &slowlink_wsize, 0, "Maximum advertised window size for slowlink");
 
 int maxseg_unacked = 8;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &maxseg_unacked, 0, "Maximum number of outstanding segments left unacked");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &maxseg_unacked, 0,
+    "Maximum number of outstanding segments left unacked");
 
-int    tcp_do_rfc3465 = 1;
+int tcp_do_rfc3465 = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &tcp_do_rfc3465, 0, "");
-
-int    tcp_do_rfc3465_lim2 = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &tcp_do_rfc3465_lim2, 0, "Appropriate bytes counting w/ L=2*SMSS");
+    &tcp_do_rfc3465, 0, "");
 
-int    rtt_samples_per_slot = 20;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_samples_per_slot, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &rtt_samples_per_slot, 0, "Number of RTT samples stored for rtt history");
+int tcp_do_rfc3465_lim2 = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_rfc3465_lim2, 0,
+    "Appropriate bytes counting w/ L=2*SMSS");
 
-int    tcp_allowed_iaj = ALLOWED_IAJ;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_allowed_iaj, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &tcp_allowed_iaj, 0, "Allowed inter-packet arrival jiter");
+int rtt_samples_per_slot = 20;
 
-int    tcp_acc_iaj_high_thresh = ACC_IAJ_HIGH_THRESH;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_high_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &tcp_acc_iaj_high_thresh, 0, "Used in calculating maximum accumulated IAJ");
+int tcp_allowed_iaj = ALLOWED_IAJ;
+int tcp_acc_iaj_high_thresh = ACC_IAJ_HIGH_THRESH;
+u_int32_t tcp_autorcvbuf_inc_shift = 3;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_allowed_iaj,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_allowed_iaj, 0,
+    "Allowed inter-packet arrival jiter");
+#if (DEVELOPMENT || DEBUG)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_high_thresh,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_acc_iaj_high_thresh, 0,
+    "Used in calculating maximum accumulated IAJ");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufincshift,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autorcvbuf_inc_shift, 0,
+    "Shift for increment in receive socket buffer size");
+#endif /* (DEVELOPMENT || DEBUG) */
 
 u_int32_t tcp_do_autorcvbuf = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautorcvbuf, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &tcp_do_autorcvbuf, 0, "Enable automatic socket buffer tuning");
-
-u_int32_t tcp_autorcvbuf_inc_shift = 3;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufincshift, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &tcp_autorcvbuf_inc_shift, 0, "Shift for increment in receive socket buffer size");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, doautorcvbuf,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_autorcvbuf, 0,
+    "Enable automatic socket buffer tuning");
 
 u_int32_t tcp_autorcvbuf_max = 512 * 1024;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufmax, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &tcp_autorcvbuf_max, 0, "Maximum receive socket buffer size");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufmax,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autorcvbuf_max, 0,
+    "Maximum receive socket buffer size");
 
 int sw_lro = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
         &sw_lro, 0, "Used to coalesce TCP packets");
 
 int lrodebug = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, lrodbg, CTLFLAG_RW | CTLFLAG_LOCKED,
-        &lrodebug, 0, "Used to debug SW LRO");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lrodbg,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &lrodebug, 0,
+    "Used to debug SW LRO");
 
 int lro_start = 4;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_startcnt, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &lro_start, 0, "Segments for starting LRO computed as power of 2");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_startcnt,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &lro_start, 0,
+    "Segments for starting LRO computed as power of 2");
 
 extern int tcp_do_autosendbuf;
 
 int limited_txmt = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, limited_transmit, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &limited_txmt, 0, "Enable limited transmit");
-
 int early_rexmt = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, early_rexmt, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &early_rexmt, 0, "Enable Early Retransmit");
-
 int sack_ackadv = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_ackadv, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &sack_ackadv, 0, "Use SACK with cumulative ack advancement as a dupack");
+int tcp_dsack_enable = 1;
+
+#if (DEVELOPMENT || DEBUG)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, limited_transmit,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &limited_txmt, 0,
+    "Enable limited transmit");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, early_rexmt,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &early_rexmt, 0,
+    "Enable Early Retransmit");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_ackadv,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &sack_ackadv, 0,
+    "Use SACK with cumulative ack advancement as a dupack");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, dsack_enable,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_dsack_enable, 0,
+    "use DSACK TCP option to report duplicate segments");
+#endif /* (DEVELOPMENT || DEBUG) */
 
 #if CONFIG_IFEF_NOWINDOWSCALE
 int tcp_obey_ifef_nowindowscale = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &tcp_obey_ifef_nowindowscale, 0, "");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_obey_ifef_nowindowscale, 0, "");
 #endif
 
 extern int tcp_TCPTV_MIN;
 extern int tcp_acc_iaj_high;
 extern int tcp_acc_iaj_react_limit;
-extern struct zone *tcp_reass_zone;
 
 int tcprexmtthresh = 3;
 
@@ -279,19 +304,20 @@ struct inpcbhead tcb;
 struct inpcbinfo tcbinfo;
 
 static void tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *,
-    struct tcpopt *, unsigned int);
-static void     tcp_pulloutofband(struct socket *,
-           struct tcphdr *, struct mbuf *, int);
+    struct tcpopt *);
+static void tcp_finalize_options(struct tcpcb *, struct tcpopt *, unsigned int);
+static void tcp_pulloutofband(struct socket *,
+    struct tcphdr *, struct mbuf *, int);
 static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *,
     struct ifnet *);
-static void    tcp_xmit_timer(struct tcpcb *, int, u_int32_t, tcp_seq);
+static void tcp_xmit_timer(struct tcpcb *, int, u_int32_t, tcp_seq);
 static inline unsigned int tcp_maxmtu(struct rtentry *);
 static inline int tcp_stretch_ack_enable(struct tcpcb *tp);
 static inline void tcp_adaptive_rwtimo_check(struct tcpcb *, int);
 
 #if TRAFFIC_MGT
 static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen,
-       int reset_size);
+    int reset_size);
 void compute_iaj(struct tcpcb *tp, int nlropkts, int lro_delay_factor);
 static void compute_iaj_meat(struct tcpcb *tp, uint32_t cur_iaj);
 #endif /* TRAFFIC_MGT */
@@ -301,19 +327,19 @@ static inline unsigned int tcp_maxmtu6(struct rtentry *);
 #endif
 
 static void tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sb, 
-       struct tcpopt *to, u_int32_t tlen);
+    struct tcpopt *to, u_int32_t tlen);
 
 void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
 static void tcp_sbsnd_trim(struct sockbuf *sbsnd);
 static inline void tcp_sbrcv_tstmp_check(struct tcpcb *tp);
 static inline void tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sb,
-       u_int32_t newsize, u_int32_t idealsize);
+    u_int32_t newsize, u_int32_t idealsize);
 static void tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th);
 static void tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, 
-       struct tcphdr *th);
+    struct tcphdr *th);
 static void tcp_early_rexmt_check(struct tcpcb *tp, struct tcphdr *th);
 static void tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th,
-       struct tcpopt *to);
+    struct tcpopt *to);
 /*
  * Constants used for resizing receive socket buffer 
  * when timestamps are not supported 
@@ -328,7 +354,7 @@ static void tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th,
 #define TCP_EARLY_REXMT_WIN (60 * TCP_RETRANSHZ) /* 60 seconds */
 #define TCP_EARLY_REXMT_LIMIT 10
 
-extern  void    ipfwsyslog( int level, const char *format,...);
+extern void ipfwsyslog( int level, const char *format,...);
 extern int fw_verbose;
 
 #if IPFIREWALL
@@ -357,7 +383,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rcvsspktcnt, CTLFLAG_RW | CTLFLAG_LOCKED,
 
 static int tcp_dropdropablreq(struct socket *head);
 static void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th);
-
 static void update_base_rtt(struct tcpcb *tp, uint32_t rtt);
 void tcp_set_background_cc(struct socket *so);
 void tcp_set_foreground_cc(struct socket *so);
@@ -570,6 +595,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m,
        boolean_t cell = IFNET_IS_CELLULAR(ifp);
        boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp));
        boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp));
+       boolean_t dsack_set = FALSE;
 
        /*
         * Call with th==0 after become established to
@@ -632,10 +658,25 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m,
         * segment.  If it provides all of our data, drop us.
         */
        if (p != NULL) {
-               register int i;
+               int i;
                /* conversion to int (in i) handles seq wraparound */
                i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
                if (i > 0) {
+                       if (TCP_DSACK_ENABLED(tp) && i > 1) {
+                               /*
+                                * Note duplicate data sequnce numbers
+                                * to report in DSACK option
+                                */
+                               tp->t_dsack_lseq = th->th_seq;
+                               tp->t_dsack_rseq = th->th_seq +
+                                   min(i, *tlenp);
+
+                               /*
+                                * Report only the first part of partial/
+                                * non-contiguous duplicate sequence space
+                                */
+                               dsack_set = TRUE;
+                       }
                        if (i >= *tlenp) {
                                tcpstat.tcps_rcvduppack++;
                                tcpstat.tcps_rcvdupbyte += *tlenp;
@@ -681,9 +722,31 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m,
         * if they are completely covered, dequeue them.
         */
        while (q) {
-               register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
+               int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
                if (i <= 0)
                        break;
+
+               /*
+                * Report only the first part of partial/non-contiguous
+                * duplicate segment in dsack option. The variable
+                * dsack_set will be true if a previous entry has some of
+                * the duplicate sequence space.
+                */
+               if (TCP_DSACK_ENABLED(tp) && i > 1 && !dsack_set) {
+                       if (tp->t_dsack_lseq == 0) {
+                               tp->t_dsack_lseq = q->tqe_th->th_seq;
+                               tp->t_dsack_rseq =
+                                   tp->t_dsack_lseq + min(i, q->tqe_len);
+                       } else {
+                               /*
+                                * this segment overlaps data in multple
+                                * entries in the reassembly queue, move
+                                * the right sequence number further.
+                                */
+                               tp->t_dsack_rseq =
+                                   tp->t_dsack_rseq + min(i, q->tqe_len);
+                       }
+               }
                if (i < q->tqe_len) {
                        q->tqe_th->th_seq += i;
                        q->tqe_len -= i;
@@ -828,7 +891,8 @@ msg_unordered_delivery:
 }
 
 /*
- * Reduce congestion window.
+ * Reduce congestion window -- used when ECN is seen or when a tail loss
+ * probe recovers the last packet.
  */
 static void
 tcp_reduce_congestion_window(
@@ -842,25 +906,43 @@ tcp_reduce_congestion_window(
        if (CC_ALGO(tp)->pre_fr != NULL)
                CC_ALGO(tp)->pre_fr(tp);
        ENTER_FASTRECOVERY(tp);
-       tp->snd_recover = tp->snd_max;
+       if (tp->t_flags & TF_SENTFIN)
+               tp->snd_recover = tp->snd_max - 1;
+       else
+               tp->snd_recover = tp->snd_max;
        tp->t_timer[TCPT_REXMT] = 0;
        tp->t_timer[TCPT_PTO] = 0;
        tp->t_rtttime = 0;
-       tp->snd_cwnd = tp->snd_ssthresh +
-                tp->t_maxseg * tcprexmtthresh;
+       if (tp->t_flagsext & TF_CWND_NONVALIDATED) {
+               tcp_cc_adjust_nonvalidated_cwnd(tp);
+       } else {
+               tp->snd_cwnd = tp->snd_ssthresh +
+                   tp->t_maxseg * tcprexmtthresh;
+       }
 }
 
 /*
- * The application wants to get an event if there
- * is a stall during read. Set the initial keepalive
- * timeout to be equal to twice RTO.
+ * This function is called upon reception of data on a socket. It's purpose is
+ * to handle the adaptive keepalive timers that monitor whether the connection
+ * is making progress. First the adaptive read-timer, second the TFO probe-timer.
+ *
+ * The application wants to get an event if there is a stall during read.
+ * Set the initial keepalive timeout to be equal to twice RTO.
+ *
+ * If the outgoing interface is in marginal conditions, we need to
+ * enable read probes for that too.
  */
 static inline void
-tcp_adaptive_rwtimo_check(struct tcpcb *tp, int tlen) 
+tcp_adaptive_rwtimo_check(struct tcpcb *tp, int tlen)
 {
-       if (tp->t_adaptive_rtimo > 0 && tlen > 0 &&
-               tp->t_state == TCPS_ESTABLISHED) {
-               tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 
+       struct ifnet *outifp = tp->t_inpcb->inp_last_outifp;
+
+       if ((tp->t_adaptive_rtimo > 0 ||
+           (outifp != NULL &&
+           (outifp->if_eflags & IFEF_PROBE_CONNECTIVITY)))
+           && tlen > 0 &&
+           tp->t_state == TCPS_ESTABLISHED) {
+               tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
                        (TCP_REXMTVAL(tp) << 1));
                tp->t_flagsext |= TF_DETECT_READSTALL;
                tp->t_rtimo_probes = 0;
@@ -982,7 +1064,9 @@ tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sbrcv,
  */
 static void
 tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, 
-       struct tcpopt *to, u_int32_t pktlen) {
+       struct tcpopt *to, u_int32_t pktlen) 
+{
+       struct socket *so = sbrcv->sb_so;
        
        /*
         * Do not grow the receive socket buffer if
@@ -1000,6 +1084,7 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv,
                tcp_cansbgrow(sbrcv) == 0 ||
                sbrcv->sb_hiwat >= tcp_autorcvbuf_max ||
                (tp->t_flagsext & TF_RECV_THROTTLE) ||
+               (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ||
                !LIST_EMPTY(&tp->t_segq)) {
                /* Can not resize the socket buffer, just return */
                goto out;
@@ -1247,8 +1332,7 @@ tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcphdr *th,
        bad_rexmt_win = (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 
        /* If the ack has ECN CE bit, then cwnd has to be adjusted */
-       if ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON)
-           && (th->th_flags & TH_ECE))
+       if (TCP_ECN_ENABLED(tp) && (th->th_flags & TH_ECE))
                return (0);
        if (TSTMP_SUPPORTED(tp)) {
                if (rxtime > 0 && (to->to_flags & TOF_TS)
@@ -1297,6 +1381,9 @@ tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th)
                tp->snd_ssthresh = tp->snd_ssthresh_prev;
                if (tp->t_flags & TF_WASFRECOVERY)
                        ENTER_FASTRECOVERY(tp);
+
+               /* Do not use the loss flight size in this case */
+               tp->t_lossflightsize = 0;
        }
        tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES);
        tp->snd_recover = tp->snd_recover_prev;
@@ -1340,6 +1427,19 @@ tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
            && tp->t_tlphighrxt > 0
            && SEQ_GEQ(th->th_ack, tp->t_tlphighrxt)
            && !tcp_detect_bad_rexmt(tp, th, to, tp->t_tlpstart)) {
+               /*
+                * check DSACK information also to make sure that
+                * the TLP was indeed needed
+                */
+               if (tcp_rxtseg_dsack_for_tlp(tp)) {
+                       /*
+                        * received a DSACK to indicate that TLP was
+                        * not needed
+                        */
+                       tcp_rxtseg_clean(tp);
+                       goto out;
+               }
+
                /*
                 * The tail loss probe recovered the last packet and 
                 * we need to adjust the congestion window to take
@@ -1351,8 +1451,17 @@ tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
                        EXIT_FASTRECOVERY(tp);
                }
                tcp_ccdbg_trace(tp, th, TCP_CC_TLP_RECOVER_LASTPACKET);
+       } else if (tcp_rxtseg_detect_bad_rexmt(tp, th->th_ack)) {
+               /*
+                * All of the retransmitted segments were duplicated, this
+                * can be an indication of bad fast retransmit.
+                */
+               tcpstat.tcps_dsack_badrexmt++;
+               tcp_bad_rexmt_restore_state(tp, th);
+               tcp_ccdbg_trace(tp, th, TCP_CC_DSACK_BAD_REXMT);
+               tcp_rxtseg_clean(tp);
        }
-
+out:
        tp->t_flagsext &= ~(TF_SENT_TLPROBE);
        tp->t_tlphighrxt = 0;
        tp->t_tlpstart = 0;
@@ -1482,6 +1591,135 @@ tcp_early_rexmt_check (struct tcpcb *tp, struct tcphdr *th)
        tp->t_tlpstart = 0;
 }
 
+static boolean_t
+tcp_tfo_syn(tp, to)
+       struct tcpcb *tp;
+       struct tcpopt *to;
+{
+       u_char out[CCAES_BLOCK_SIZE];
+       unsigned char len;
+
+       if (!(to->to_flags & (TOF_TFO | TOF_TFOREQ)) ||
+           !(tcp_fastopen & TCP_FASTOPEN_SERVER))
+               return (FALSE);
+
+       if ((to->to_flags & TOF_TFOREQ)) {
+               tp->t_tfo_flags |= TFO_F_OFFER_COOKIE;
+
+               tp->t_tfo_stats |= TFO_S_COOKIEREQ_RECV;
+               tcpstat.tcps_tfo_cookie_req_rcv++;
+               return (FALSE);
+       }
+
+       /* Ok, then it must be an offered cookie. We need to check that ... */
+       tcp_tfo_gen_cookie(tp->t_inpcb, out, sizeof(out));
+
+       len = *to->to_tfo - TCPOLEN_FASTOPEN_REQ;
+       to->to_tfo++;
+       if (memcmp(out, to->to_tfo, len)) {
+               /* Cookies are different! Let's return and offer a new cookie */
+               tp->t_tfo_flags |= TFO_F_OFFER_COOKIE;
+
+               tp->t_tfo_stats |= TFO_S_COOKIE_INVALID;
+               tcpstat.tcps_tfo_cookie_invalid++;
+               return (FALSE);
+       }
+
+       if (OSIncrementAtomic(&tcp_tfo_halfcnt) >= tcp_tfo_backlog) {
+               /* Need to decrement again as we just increased it... */
+               OSDecrementAtomic(&tcp_tfo_halfcnt);
+               return (FALSE);
+       }
+
+       tp->t_tfo_flags |= TFO_F_COOKIE_VALID;
+
+       tp->t_tfo_stats |= TFO_S_SYNDATA_RCV;
+       tcpstat.tcps_tfo_syn_data_rcv++;
+
+       return (TRUE);
+}
+
+static void
+tcp_tfo_synack(tp, to)
+       struct tcpcb *tp;
+       struct tcpopt *to;
+{
+       if (to->to_flags & TOF_TFO) {
+               unsigned char len = *to->to_tfo - TCPOLEN_FASTOPEN_REQ;
+
+               /*
+                * If this happens, things have gone terribly wrong. len should
+                * have been check in tcp_dooptions.
+                */
+               VERIFY(len <= TFO_COOKIE_LEN_MAX);
+
+               to->to_tfo++;
+
+               tcp_cache_set_cookie(tp, to->to_tfo, len);
+               tcp_heuristic_tfo_success(tp);
+
+               tp->t_tfo_stats |= TFO_S_COOKIE_RCV;
+               tcpstat.tcps_tfo_cookie_rcv++;
+       } else {
+               /*
+                * Thus, no cookie in the response, but we either asked for one
+                * or sent SYN+DATA. Now, we need to check whether we had to
+                * rexmit the SYN. If that's the case, it's better to start
+                * backing of TFO-cookie requests.
+                */
+               if (tp->t_tfo_flags & TFO_F_SYN_LOSS)
+                       tcp_heuristic_tfo_inc_loss(tp);
+               else
+                       tcp_heuristic_tfo_reset_loss(tp);
+       }
+}
+
+static void
+tcp_tfo_rcv_probe(struct tcpcb *tp, int tlen)
+{
+       if (tlen == 0) {
+               tp->t_tfo_probe_state = TFO_PROBE_PROBING;
+
+               /*
+                * We send the probe out rather quickly (after one RTO). It does not
+                * really hurt that much, it's only one additional segment on the wire.
+                */
+               tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, (TCP_REXMTVAL(tp)));
+       } else {
+               /* If SYN/ACK+data, don't probe. We got the data! */
+               tcp_heuristic_tfo_rcv_good(tp);
+       }
+}
+
+static void
+tcp_tfo_rcv_data(struct tcpcb *tp)
+{
+       /* Transition from PROBING to NONE as data has been received */
+       if (tp->t_tfo_probe_state >= TFO_PROBE_PROBING) {
+               tp->t_tfo_probe_state = TFO_PROBE_NONE;
+
+               /* Data has been received - we are good to go! */
+               tcp_heuristic_tfo_rcv_good(tp);
+       }
+}
+
+static void
+tcp_tfo_rcv_ack(struct tcpcb *tp, struct tcphdr *th)
+{
+       if (tp->t_tfo_probe_state == TFO_PROBE_PROBING &&
+           tp->t_tfo_probes > 0) {
+               if (th->th_seq == tp->rcv_nxt) {
+                       /* No hole, so stop probing */
+                       tp->t_tfo_probe_state = TFO_PROBE_NONE;
+               } else if (SEQ_GT(th->th_seq, tp->rcv_nxt)) {
+                       /* There is a hole! Wait a bit for data... */
+                       tp->t_tfo_probe_state = TFO_PROBE_WAIT_DATA;
+                       tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
+                           TCP_REXMTVAL(tp));
+               }
+       }
+}
+
 void
 tcp_input(m, off0)
        struct mbuf *m;
@@ -1506,11 +1744,13 @@ tcp_input(m, off0)
        int iss = 0, nosock = 0; 
        u_int32_t tiwin, sack_bytes_acked = 0;
        struct tcpopt to;               /* options in this segment */
-       struct sockaddr_in *next_hop = NULL;
 #if TCPDEBUG
        short ostate = 0;
 #endif
+#if IPFIREWALL
+       struct sockaddr_in *next_hop = NULL;
        struct m_tag *fwd_tag;
+#endif /* IPFIREWALL */
        u_char ip_ecn = IPTOS_ECN_NOTECT;
        unsigned int ifscope;
        uint8_t isconnected, isdisconnected;
@@ -1520,11 +1760,11 @@ tcp_input(m, off0)
        int turnoff_lro = 0, win;
 #if MPTCP
        struct mptcb *mp_tp = NULL;
-       uint16_t mptcp_csum = 0;
 #endif /* MPTCP */
        boolean_t cell = IFNET_IS_CELLULAR(ifp);
        boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp));
        boolean_t wired = (!wifi && IFNET_IS_WIRED(ifp));
+       boolean_t recvd_dsack = FALSE;
        struct tcp_respond_args tra;
 
 #define TCP_INC_VAR(stat, npkts) do {                  \
@@ -1532,7 +1772,7 @@ tcp_input(m, off0)
 } while (0)
 
        TCP_INC_VAR(tcpstat.tcps_rcvtotal, nlropkts);
-
+#if IPFIREWALL
        /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
        if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
                fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
@@ -1547,6 +1787,7 @@ tcp_input(m, off0)
                next_hop = ipfwd_tag->next_hop;
                m_tag_delete(m, fwd_tag);
        }
+#endif /* IPFIREWALL */
 
 #if INET6
        struct ip6_hdr *ip6 = NULL;
@@ -1803,24 +2044,6 @@ findpcb:
         */
        if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
                ifscope = inp->inp_boundifp->if_index;
-#if NECP
-       if (inp != NULL && (
-#if INET6
-               isipv6 ? !necp_socket_is_allowed_to_send_recv_v6(inp,
-                       th->th_dport, th->th_sport, &ip6->ip6_dst,
-                       &ip6->ip6_src, ifp, NULL) :
-#endif
-               !necp_socket_is_allowed_to_send_recv_v4(inp, th->th_dport,
-                       th->th_sport, &ip->ip_dst, &ip->ip_src,
-                       ifp, NULL))) {
-               if (in_pcb_checkstate(inp, WNT_RELEASE, 0)
-                   == WNT_STOPUSING) {
-                       inp = NULL;     /* pretend we didn't find it */
-               }
-               IF_TCP_STATINC(ifp, badformatipsec);
-               goto dropnosock;
-       }
-#endif /* NECP */
 
        /*
         * If the state is CLOSED (i.e., TCB does not exist) then
@@ -1917,10 +2140,35 @@ findpcb:
        tcp_lock(so, 1, 0);
        if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
                tcp_unlock(so, 1, (void *)2);
-               inp = NULL;     // pretend we didn't find it 
+               inp = NULL;     // pretend we didn't find it
                goto dropnosock;
        }
 
+#if NECP
+#if INET6
+       if (isipv6) {
+               if (!necp_socket_is_allowed_to_send_recv_v6(inp, th->th_dport,
+                                                           th->th_sport,
+                                                           &ip6->ip6_dst,
+                                                           &ip6->ip6_src,
+                                                           ifp, NULL, NULL)) {
+                       IF_TCP_STATINC(ifp, badformatipsec);
+                       goto drop;
+               }
+       } else
+#endif
+       {
+               if (!necp_socket_is_allowed_to_send_recv_v4(inp, th->th_dport,
+                                                           th->th_sport,
+                                                           &ip->ip_dst,
+                                                           &ip->ip_src,
+                                                           ifp, NULL, NULL)) {
+                       IF_TCP_STATINC(ifp, badformatipsec);
+                       goto drop;
+               }
+       }
+#endif /* NECP */
+
        tp = intotcpcb(inp);
        if (tp == 0) {
                rstreason = BANDLIM_RST_CLOSEDPORT;
@@ -2210,7 +2458,10 @@ findpcb:
                                                                M_NOWAIT);
                        } else
 #endif /* INET6 */
+                       {
                                inp->inp_options = ip_srcroute();
+                               inp->inp_ip_tos = oinp->inp_ip_tos;
+                       }
                        tcp_lock(oso, 0, 0);
 #if IPSEC
                        /* copy old policy into new socket's */
@@ -2229,7 +2480,7 @@ findpcb:
                                struct tcpcb *, tp, int32_t, TCPS_LISTEN);
                        tp->t_state = TCPS_LISTEN;
                        tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY);
-                       tp->t_flagsext |= (tp0->t_flagsext & (TF_RXTFINDROP|TF_NOTIMEWAIT));
+                       tp->t_flagsext |= (tp0->t_flagsext & (TF_RXTFINDROP|TF_NOTIMEWAIT|TF_FASTOPEN));
                        tp->t_keepinit = tp0->t_keepinit;
                        tp->t_keepcnt = tp0->t_keepcnt;
                        tp->t_keepintvl = tp0->t_keepintvl;
@@ -2292,9 +2543,12 @@ findpcb:
         *      TE_SENDECE will be cleared when we receive a packet with TH_CWR set.
         */
        if (ip_ecn == IPTOS_ECN_CE && tp->t_state == TCPS_ESTABLISHED &&
-               ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON)) && tlen > 0 &&
-               SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
-               SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+           TCP_ECN_ENABLED(tp) && tlen > 0 &&
+           SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
+           SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+               tcpstat.tcps_ecn_recv_ce++;
+               /* Mark this connection as it received CE from network */
+               tp->ecn_flags |= TE_RECV_ECN_CE;
                tp->ecn_flags |= TE_SENDECE;
        }
        
@@ -2302,7 +2556,7 @@ findpcb:
         * Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't
         * bother doing extensive checks for state and whatnot.
         */
-       if ((thflags & TH_CWR) == TH_CWR) {
+       if (thflags & TH_CWR) {
                tp->ecn_flags &= ~TE_SENDECE;
        }
 
@@ -2314,8 +2568,10 @@ findpcb:
         */
        if (tp->t_state == TCPS_ESTABLISHED
            && (tp->ecn_flags & TE_SETUPSENT)
-           && (ip_ecn == IPTOS_ECN_CE || (thflags & TH_CWR)))
+           && (ip_ecn == IPTOS_ECN_CE || (thflags & TH_CWR))) {
                tcp_reset_stretch_ack(tp);
+               CLEAR_IAJ_STATE(tp);
+       }
 
        /* 
         * Try to determine if we are receiving a packet after a long time.
@@ -2344,48 +2600,36 @@ findpcb:
         * else do it below (after getting remote address).
         */
        if (tp->t_state != TCPS_LISTEN && optp) {
-               tcp_dooptions(tp, optp, optlen, th, &to, ifscope);
+               tcp_dooptions(tp, optp, optlen, th, &to);
 #if MPTCP
-               mptcp_csum = mptcp_input_csum(tp, m, drop_hdrlen);
-               if (mptcp_csum) {
-                       tp->t_mpflags |= TMPF_SND_MPFAIL;
-                       tp->t_mpflags &= ~TMPF_EMBED_DSN;
-                       mptcp_notify_mpfail(so);
-                       m_freem(m);
-                       tcpstat.tcps_mp_badcsum++;
+               if (mptcp_input_preproc(tp, m, drop_hdrlen) != 0) {
+                       tp->t_flags |= TF_ACKNOW;
+                       (void) tcp_output(tp);
                        tcp_check_timer_state(tp);
                        tcp_unlock(so, 1, 0);
                        KERNEL_DEBUG(DBG_FNC_TCP_INPUT |
                            DBG_FUNC_END,0,0,0,0,0);
-                       return;     
+                       return;
                }
-               mptcp_insert_rmap(tp, m);
 #endif /* MPTCP */
        }
        if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
-               if (to.to_flags & TOF_TS) {
-                       tp->t_flags |= TF_RCVD_TSTMP;
-                       tp->ts_recent = to.to_tsval;
-                       tp->ts_recent_age = tcp_now;
-               }
-               if (to.to_flags & TOF_MSS)
-                       tcp_mss(tp, to.to_mss, ifscope);
-               if (SACK_ENABLED(tp)) {
-                       if (!(to.to_flags & TOF_SACK))
-                               tp->t_flagsext &= ~(TF_SACK_ENABLE);
-                       else
-                               tp->t_flags |= TF_SACK_PERMIT;
-               }
+               if (!(thflags & TH_ACK) ||
+                   (SEQ_GT(th->th_ack, tp->iss) &&
+                   SEQ_LEQ(th->th_ack, tp->snd_max)))
+                       tcp_finalize_options(tp, &to, ifscope);
        }
 
 #if TRAFFIC_MGT
-       /* Compute inter-packet arrival jitter. According to RFC 3550, inter-packet 
-        * arrival jitter is defined as the difference in packet spacing at the 
-        * receiver compared to the sender for a pair of packets. When two packets 
-        * of maximum segment size come one after the other with consecutive 
-        * sequence numbers, we consider them as packets sent together at the 
-        * sender and use them as a pair to compute inter-packet arrival jitter.
-        * This metric indicates the delay induced by the network components due
+       /*
+        * Compute inter-packet arrival jitter. According to RFC 3550,
+        * inter-packet arrival jitter is defined as the difference in
+        * packet spacing at the receiver compared to the sender for a
+        * pair of packets. When two packets of maximum segment size come
+        * one after the other with consecutive sequence numbers, we
+        * consider them as packets sent together at the sender and use
+        * them as a pair to compute inter-packet arrival jitter. This
+        * metric indicates the delay induced by the network components due
         * to queuing in edge/access routers.
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
@@ -2405,15 +2649,17 @@ findpcb:
                }
                if ( tp->iaj_size == 0 || seg_size > tp->iaj_size ||
                        (seg_size == tp->iaj_size && tp->iaj_rcv_ts == 0)) {
-                       /* State related to inter-arrival jitter is uninitialized 
-                        * or we are trying to find a good first packet to start 
-                        * computing the metric
+                       /*
+                        * State related to inter-arrival jitter is
+                        * uninitialized or we are trying to find a good
+                        * first packet to start computing the metric
                         */
                        update_iaj_state(tp, seg_size, 0);
                } else {
                        if (seg_size == tp->iaj_size) {
-                               /* Compute inter-arrival jitter taking this packet 
-                                * as the second packet
+                               /*
+                                * Compute inter-arrival jitter taking
+                                * this packet as the second packet
                                 */
                                if (pktf_sw_lro_pkt)
                                        compute_iaj(tp, nlropkts,
@@ -2422,12 +2668,15 @@ findpcb:
                                        compute_iaj(tp, 1, 0);
                        } 
                        if (seg_size  < tp->iaj_size) {
-                               /* There is a smaller packet in the stream.
-                                * Some times the maximum size supported on a path can 
-                                * change if there is a new link with smaller MTU. 
-                                * The receiver will not know about this change. 
-                                * If there are too many packets smaller than iaj_size, 
-                                * we try to learn the iaj_size again.
+                               /*
+                                * There is a smaller packet in the stream.
+                                * Some times the maximum size supported
+                                * on a path can change if there is a new
+                                * link with smaller MTU. The receiver will
+                                * not know about this change. If there
+                                * are too many packets smaller than
+                                * iaj_size, we try to learn the iaj_size
+                                * again.
                                 */
                                TCP_INC_VAR(tp->iaj_small_pkt, nlropkts); 
                                if (tp->iaj_small_pkt > RESET_IAJ_SIZE_THRESH) {
@@ -2506,12 +2755,15 @@ findpcb:
                                /* Recalculate the RTT */
                                tcp_compute_rtt(tp, &to, th);
 
+                               VERIFY(SEQ_GEQ(th->th_ack, tp->snd_una));
                                acked = BYTES_ACKED(th, tp);
                                tcpstat.tcps_rcvackpack++;
                                tcpstat.tcps_rcvackbyte += acked;
                                
-                               /* Handle an ack that is in sequence during congestion
-                                * avoidance phase. The calculations in this function 
+                               /*
+                                * Handle an ack that is in sequence during
+                                * congestion avoidance phase. The
+                                * calculations in this function
                                 * assume that snd_una is not updated yet. 
                                 */
                                if (CC_ALGO(tp)->congestion_avd != NULL)
@@ -2559,6 +2811,10 @@ findpcb:
                                            OFFSET_FROM_START(tp,
                                            tp->t_rxtcur);
                                }
+                               if (!SLIST_EMPTY(&tp->t_rxt_segments) &&
+                                   !TCP_DSACK_SEQ_IN_WINDOW(tp,
+                                   tp->t_dsack_lastuna, tp->snd_una))
+                                       tcp_rxtseg_clean(tp);
 
                                if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
                                        tp->t_bwmeas != NULL)
@@ -2568,6 +2824,8 @@ findpcb:
                                        (void) tcp_output(tp);
                                }
 
+                               tcp_tfo_rcv_ack(tp, th);
+
                                tcp_check_timer_state(tp);
                                tcp_unlock(so, 1, 0);
                                KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
@@ -2687,6 +2945,9 @@ findpcb:
 
                        tcp_adaptive_rwtimo_check(tp, tlen);
 
+                       if (tlen > 0)
+                               tcp_tfo_rcv_data(tp);
+
                        tcp_check_timer_state(tp);
                        tcp_unlock(so, 1, 0);
                        KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
@@ -2769,7 +3030,9 @@ findpcb:
                } else
 #endif
            {
-                       lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+                       lck_mtx_assert(
+                           &((struct inpcb *)so->so_pcb)->inpcb_mtx,
+                           LCK_MTX_ASSERT_OWNED);
                        MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
                       M_NOWAIT);
                        if (sin == NULL)
@@ -2791,15 +3054,12 @@ findpcb:
                        FREE(sin, M_SONAME);
                }
 
-               tcp_dooptions(tp, optp, optlen, th, &to, ifscope);
+               tcp_dooptions(tp, optp, optlen, th, &to);
+               tcp_finalize_options(tp, &to, ifscope);
+
+               if (tfo_enabled(tp) && tcp_tfo_syn(tp, &to))
+                       isconnected = TRUE;
 
-               if (SACK_ENABLED(tp)) {
-                       if (!(to.to_flags & TOF_SACK))
-                               tp->t_flagsext &= ~(TF_SACK_ENABLE);
-                       else
-                               tp->t_flags |= TF_SACK_PERMIT;
-               }
-               
                if (iss)
                        tp->iss = iss;
                else {
@@ -2855,8 +3115,8 @@ findpcb:
                }
 
        /*
-        * If the state is SYN_RECEIVED:
-        *      if seg contains an ACK, but not for our SYN/ACK, send a RST.
+        * If the state is SYN_RECEIVED and the seg contains an ACK,
+        * but not for our SYN/ACK, send a RST.
         */
        case TCPS_SYN_RECEIVED:
                if ((thflags & TH_ACK) &&
@@ -2874,6 +3134,7 @@ findpcb:
                 * lower if we assume scaling and the other end does not.
                 */
                if ((thflags & TH_SYN) &&
+                   (tp->irs == th->th_seq) &&
                    !(to.to_flags & TOF_SCALE))
                        tp->t_flags &= ~TF_RCVD_SCALE;
                break;
@@ -2928,9 +3189,12 @@ findpcb:
                        if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE)) {
                                /* ECN-setup SYN-ACK */
                                tp->ecn_flags |= TE_SETUPRECEIVED;
-                               tcpstat.tcps_ecn_setup++;
-                       }
-                       else {
+                               if (TCP_ECN_ENABLED(tp))
+                                       tcpstat.tcps_ecn_client_success++;
+                       } else {
+                               if (tp->ecn_flags & TE_SETUPSENT &&
+                                   tp->t_rxtshift == 0)
+                                       tcpstat.tcps_ecn_not_supported++;
                                /* non-ECN-setup SYN-ACK */
                                tp->ecn_flags &= ~TE_SENDIPECT;
                        }
@@ -2941,13 +3205,25 @@ findpcb:
                        /* XXXMAC: SOCK_UNLOCK(so); */
 #endif
                        /* Do window scaling on this connection? */
-                       if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
-                               (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+                       if (TCP_WINDOW_SCALE_ENABLED(tp)) {
                                tp->snd_scale = tp->requested_s_scale;
                                tp->rcv_scale = tp->request_r_scale;
                        }
+
                        tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale);
                        tp->snd_una++;          /* SYN is acked */
+                       if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+                               tp->snd_nxt = tp->snd_una;
+
+                       /*
+                        * We have sent more in the SYN than what is being
+                        * acked. (e.g., TFO)
+                        * We should restart the sending from what the receiver
+                        * has acknowledged immediately.
+                        */
+                       if (SEQ_GT(tp->snd_nxt, th->th_ack))
+                               tp->snd_nxt = th->th_ack;
+
                        /*
                         * If there's data, delay ACK; if there's also a FIN
                         * ACKNOW will be turned on later.
@@ -2971,19 +3247,24 @@ findpcb:
                        tp->t_starttime = tcp_now;
                        tcp_sbrcv_tstmp_check(tp);
                        if (tp->t_flags & TF_NEEDFIN) {
-                               DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
-                                       struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
+                               DTRACE_TCP4(state__change, void, NULL,
+                                   struct inpcb *, inp,
+                                   struct tcpcb *, tp, int32_t,
+                                   TCPS_FIN_WAIT_1);
                                tp->t_state = TCPS_FIN_WAIT_1;
                                tp->t_flags &= ~TF_NEEDFIN;
                                thflags &= ~TH_SYN;
                        } else {
-                               DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
-                                       struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED);
+                               DTRACE_TCP4(state__change, void, NULL,
+                                   struct inpcb *, inp, struct tcpcb *,
+                                   tp, int32_t, TCPS_ESTABLISHED);
                                tp->t_state = TCPS_ESTABLISHED;
-                               tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
-                                       TCP_CONN_KEEPIDLE(tp));
+                               tp->t_timer[TCPT_KEEP] =
+                                   OFFSET_FROM_START(tp,
+                                   TCP_CONN_KEEPIDLE(tp));
                                if (nstat_collect)
-                                       nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt);
+                                       nstat_route_connect_success(
+                                           tp->t_inpcb->inp_route.ro_rt);
                        }
 #if MPTCP
                        /*
@@ -3001,6 +3282,19 @@ findpcb:
                        } else
 #endif /* MPTCP */
                                isconnected = TRUE;
+
+                       if (tp->t_tfo_flags & (TFO_F_COOKIE_REQ | TFO_F_COOKIE_SENT)) {
+                               tcp_tfo_synack(tp, &to);
+
+                               if ((tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) &&
+                                   SEQ_LT(tp->snd_una, th->th_ack)) {
+                                       tp->t_tfo_stats |= TFO_S_SYN_DATA_ACKED;
+                                       tcpstat.tcps_tfo_syn_data_acked++;
+
+                                       if (!(tp->t_tfo_flags & TFO_F_NO_RCVPROBING))
+                                               tcp_tfo_rcv_probe(tp, tlen);
+                               }
+                       }
                } else {
                        /*
                         *  Received initial SYN in SYN-SENT[*] state => simul-
@@ -3016,6 +3310,12 @@ findpcb:
                                struct tcpcb *, tp, int32_t, TCPS_SYN_RECEIVED);
                        tp->t_state = TCPS_SYN_RECEIVED;
 
+                       /*
+                        * During simultaneous open, TFO should not be used.
+                        * So, we disable it here, to prevent that data gets
+                        * sent on the SYN/ACK.
+                        */
+                       tcp_disable_tfo(tp);
                }
 
 trimthenstep6:
@@ -3062,8 +3362,8 @@ trimthenstep6:
         * or recovers by adjusting its sequence numberering 
         */
        case TCPS_ESTABLISHED:
-               if (thflags & TH_SYN)  
-                       goto dropafterack; 
+               if (thflags & TH_SYN)
+                       goto dropafterack;
                break;
        }
 
@@ -3216,7 +3516,7 @@ trimthenstep6:
                                    rxbytes, tlen);
                                tp->t_stat.rxduplicatebytes += tlen;
                        }
-                       if (tlen)
+                       if (tlen > 0)
                                goto dropafterack;
                        goto drop;
                }
@@ -3275,6 +3575,16 @@ trimthenstep6:
                        tcpstat.tcps_rcvpartduppack++;
                        tcpstat.tcps_rcvpartdupbyte += todrop;
                }
+
+               if (TCP_DSACK_ENABLED(tp) && todrop > 1) {
+                       /*
+                        * Note the duplicate data sequence space so that
+                        * it can be reported in DSACK option.
+                        */
+                       tp->t_dsack_lseq = th->th_seq;
+                       tp->t_dsack_rseq = th->th_seq + todrop;
+                       tp->t_flags |= TF_ACKNOW;
+               }
                if (nstat_collect) {
                        nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, 
                                todrop, NSTAT_RX_FLAG_DUPLICATE);
@@ -3294,15 +3604,19 @@ trimthenstep6:
        }
 
        /*
-        * If new data are received on a connection after the user processes
-        * are gone, then RST the other end.  Note that an MPTCP subflow socket
-        * would have SS_NOFDREF set by default, so check to make sure that
-        * we test for SOF_MP_SUBFLOW socket flag (which would be cleared when
-        * the socket is closed.)
+        * If new data are received on a connection after the user
+        * processes are gone, then RST the other end.
+        * Send also a RST when we received a data segment after we've
+        * sent our FIN when the socket is defunct.
+        * Note that an MPTCP subflow socket would have SS_NOFDREF set
+        * by default so check to make sure that we test for SOF_MP_SUBFLOW
+        * socket flag (which would be cleared when the socket is closed.)
         */
-       if (!(so->so_flags & SOF_MP_SUBFLOW) && 
-           (so->so_state & SS_NOFDREF) &&
-           tp->t_state > TCPS_CLOSE_WAIT && tlen) {
+       if (!(so->so_flags & SOF_MP_SUBFLOW) && tlen &&
+           (((so->so_state & SS_NOFDREF) &&
+           tp->t_state > TCPS_CLOSE_WAIT) ||
+           ((so->so_flags & SOF_DEFUNCT) &&
+           tp->t_state > TCPS_FIN_WAIT_1))) {
                tp = tcp_close(tp);
                tcpstat.tcps_rcvafterclose++;
                rstreason = BANDLIM_UNLIMITED;
@@ -3397,9 +3711,34 @@ trimthenstep6:
         */
        if ((thflags & TH_ACK) == 0) {
                if (tp->t_state == TCPS_SYN_RECEIVED ||
-                   (tp->t_flags & TF_NEEDSYN))
+                   (tp->t_flags & TF_NEEDSYN)) {
+                       if ((tfo_enabled(tp))) {
+                               /*
+                                * So, we received a valid segment while in
+                                * SYN-RECEIVED (TF_NEEDSYN is actually never
+                                * set, so this is dead code).
+                                * As this cannot be an RST (see that if a bit
+                                * higher), and it does not have the ACK-flag
+                                * set, we want to retransmit the SYN/ACK.
+                                * Thus, we have to reset snd_nxt to snd_una to
+                                * trigger the going back to sending of the
+                                * SYN/ACK. This is more consistent with the
+                                * behavior of tcp_output(), which expects
+                                * to send the segment that is pointed to by
+                                * snd_nxt.
+                                */
+                               tp->snd_nxt = tp->snd_una;
+
+                               /*
+                                * We need to make absolutely sure that we are
+                                * going to reply upon a duplicate SYN-segment.
+                                */
+                               if (th->th_flags & TH_SYN)
+                                       needoutput = 1;
+                       }
+
                        goto step6;
-               else if (tp->t_flags & TF_ACKNOW)
+               else if (tp->t_flags & TF_ACKNOW)
                        goto dropafterack;
                else
                        goto drop;
@@ -3421,8 +3760,7 @@ trimthenstep6:
                tcpstat.tcps_connects++;
 
                /* Do window scaling? */
-               if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
-                       (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+               if (TCP_WINDOW_SCALE_ENABLED(tp)) {
                        tp->snd_scale = tp->requested_s_scale;
                        tp->rcv_scale = tp->request_r_scale;
                        tp->snd_wnd = th->th_win << tp->snd_scale;
@@ -3436,18 +3774,21 @@ trimthenstep6:
                tp->t_starttime = tcp_now;
                tcp_sbrcv_tstmp_check(tp);
                if (tp->t_flags & TF_NEEDFIN) {
-                       DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
-                               struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
+                       DTRACE_TCP4(state__change, void, NULL,
+                           struct inpcb *, inp,
+                           struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
                        tp->t_state = TCPS_FIN_WAIT_1;
                        tp->t_flags &= ~TF_NEEDFIN;
                } else {
-                       DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
-                               struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED);
+                       DTRACE_TCP4(state__change, void, NULL,
+                           struct inpcb *, inp,
+                           struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED);
                        tp->t_state = TCPS_ESTABLISHED;
                        tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
                                TCP_CONN_KEEPIDLE(tp));
                        if (nstat_collect)
-                               nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt);
+                               nstat_route_connect_success(
+                                   tp->t_inpcb->inp_route.ro_rt);
                }
                /*
                 * If segment contains data or ACK, will call tcp_reass()
@@ -3458,7 +3799,6 @@ trimthenstep6:
                            NULL, ifp);
                tp->snd_wl1 = th->th_seq - 1;
 
-               /* FALLTHROUGH */
 #if MPTCP
                /*
                 * Do not send the connect notification for additional subflows
@@ -3470,6 +3810,55 @@ trimthenstep6:
                } else
 #endif /* MPTCP */
                        isconnected = TRUE;
+               if ((tp->t_tfo_flags & TFO_F_COOKIE_VALID)) {
+                       /* Done this when receiving the SYN */
+                       isconnected = FALSE;
+
+                       OSDecrementAtomic(&tcp_tfo_halfcnt);
+
+                       /* Panic if something has gone terribly wrong. */
+                       VERIFY(tcp_tfo_halfcnt >= 0);
+
+                       tp->t_tfo_flags &= ~TFO_F_COOKIE_VALID;
+               }
+
+               /*
+                * In case there is data in the send-queue (e.g., TFO is being
+                * used, or connectx+data has been done), then if we would
+                * "FALLTHROUGH", we would handle this ACK as if data has been
+                * acknowledged. But, we have to prevent this. And this
+                * can be prevented by increasing snd_una by 1, so that the
+                * SYN is not considered as data (snd_una++ is actually also
+                * done in SYN_SENT-state as part of the regular TCP stack).
+                *
+                * In case there is data on this ack as well, the data will be
+                * handled by the label "dodata" right after step6.
+                */
+               if (so->so_snd.sb_cc) {
+                       tp->snd_una++;  /* SYN is acked */
+                       if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+                               tp->snd_nxt = tp->snd_una;
+
+                       /*
+                        * No duplicate-ACK handling is needed. So, we
+                        * directly advance to processing the ACK (aka,
+                        * updating the RTT estimation,...)
+                        *
+                        * But, we first need to handle eventual SACKs,
+                        * because TFO will start sending data with the
+                        * SYN/ACK, so it might be that the client
+                        * includes a SACK with its ACK.
+                        */
+                       if (SACK_ENABLED(tp) &&
+                           (to.to_nsacks > 0 ||
+                            !TAILQ_EMPTY(&tp->snd_holes)))
+                               tcp_sack_doack(tp, &to, th,
+                                   &sack_bytes_acked);
+
+                       goto process_ACK;
+               }
+
+               /* FALLTHROUGH */
 
        /*
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
@@ -3490,6 +3879,21 @@ trimthenstep6:
                        tcpstat.tcps_rcvacktoomuch++;
                        goto dropafterack;
                }
+               if (SACK_ENABLED(tp) && to.to_nsacks > 0) {
+                       recvd_dsack = tcp_sack_process_dsack(tp, &to, th);
+                       /*
+                        * If DSACK is received and this packet has no
+                        * other SACK information, it can be dropped.
+                        * We do not want to treat it as a duplicate ack.
+                        */
+                       if (recvd_dsack &&
+                           SEQ_LEQ(th->th_ack, tp->snd_una) &&
+                           to.to_nsacks == 0) {
+                               tcp_bad_rexmt_check(tp, th, &to);
+                               goto drop;
+                       }
+               }
+
                if (SACK_ENABLED(tp) &&
                    (to.to_nsacks > 0 || !TAILQ_EMPTY(&tp->snd_holes)))
                        tcp_sack_doack(tp, &to, th, &sack_bytes_acked);
@@ -3506,9 +3910,11 @@ trimthenstep6:
                                                tp->t_mpflags |=
                                                    TMPF_MPTCP_TRUE;
                                                so->so_flags |= SOF_MPTCP_TRUE;
-                                               if (mptcp_dbg >= MP_ERR_DEBUG)
-                                                       printf("MPTCP SUCCESS"
-                                                           " %s \n",__func__);
+                                               mptcplog((LOG_DEBUG, "MPTCP "
+                                                   "Sockets: %s \n",__func__),
+                                                   MPTCP_SOCKET_DBG,
+                                                   MPTCP_LOGLVL_LOG);
+
                                                tp->t_timer[TCPT_JACK_RXMT] = 0;
                                                tp->t_mprxtshift = 0;
                                                isconnected = TRUE;
@@ -3522,6 +3928,9 @@ trimthenstep6:
                        }
                }
 #endif /* MPTCP */
+
+               tcp_tfo_rcv_ack(tp, th);
+
                /*
                 * If we have outstanding data (other than
                 * a window probe), this is a completely
@@ -3538,11 +3947,10 @@ trimthenstep6:
                                 * instead of the dupack
                                 */ 
                                if ((thflags & TH_FIN) &&
-                                       (tp->t_flags & TF_SENTFIN) &&
-                                       !TCPS_HAVERCVDFIN(tp->t_state) &&
-                                       (th->th_ack + 1) == tp->snd_max) {
+                                   (tp->t_flags & TF_SENTFIN) &&
+                                   !TCPS_HAVERCVDFIN(tp->t_state) &&
+                                   (th->th_ack + 1) == tp->snd_max)
                                        break;
-                               }
 process_dupack:
 #if MPTCP
                                /*
@@ -3554,8 +3962,10 @@ process_dupack:
                                }
 
                                if ((isconnected) && (tp->t_mpflags & TMPF_JOINED_FLOW)) {
-                                       if (mptcp_dbg >= MP_ERR_DEBUG)
-                                               printf("%s:  bypass ack recovery\n",__func__);
+                                       mptcplog((LOG_DEBUG, "MPTCP "
+                                           "Sockets: bypass ack recovery\n"),
+                                           MPTCP_SOCKET_DBG, 
+                                           MPTCP_LOGLVL_VERBOSE);
                                        break;
                                }
 #endif /* MPTCP */
@@ -3683,8 +4093,10 @@ process_dupack:
                                                        break;
                                                }
                                        }
-                                       
-                                       tp->snd_recover = tp->snd_max;
+                                       if (tp->t_flags & TF_SENTFIN)
+                                               tp->snd_recover = tp->snd_max - 1;
+                                       else
+                                               tp->snd_recover = tp->snd_max;
                                        tp->t_timer[TCPT_PTO] = 0;
                                        tp->t_rtttime = 0;
 
@@ -3700,7 +4112,8 @@ process_dupack:
                                            == TF_PKTS_REORDERED &&
                                            !IN_FASTRECOVERY(tp) &&
                                            tp->t_reorderwin > 0 &&
-                                           tp->t_state == TCPS_ESTABLISHED) {
+                                           (tp->t_state == TCPS_ESTABLISHED ||
+                                           tp->t_state == TCPS_FIN_WAIT_1)) {
                                                tp->t_timer[TCPT_DELAYFR] =
                                                    OFFSET_FROM_START(tp,
                                                    tp->t_reorderwin);
@@ -3711,6 +4124,7 @@ process_dupack:
                                                break;
                                        }
 
+                                       tcp_rexmt_save_state(tp);
                                        /*
                                         * If the current tcp cc module has 
                                         * defined a hook for tasks to run
@@ -3720,35 +4134,29 @@ process_dupack:
                                                CC_ALGO(tp)->pre_fr(tp);
                                        ENTER_FASTRECOVERY(tp);
                                        tp->t_timer[TCPT_REXMT] = 0;
-                                       if ((tp->ecn_flags & TE_ECN_ON)
-                                           == TE_ECN_ON) 
+                                       if (TCP_ECN_ENABLED(tp))
                                                tp->ecn_flags |= TE_SENDCWR;
 
                                        if (SACK_ENABLED(tp)) {
                                                tcpstat.tcps_sack_recovery_episode++;
                                                tp->sack_newdata = tp->snd_nxt;
                                                tp->snd_cwnd = tp->t_maxseg;
-
-                                               /*
-                                                * Enable probe timeout to detect 
-                                                * a tail loss in the recovery
-                                                * window.
-                                                */
-                                               tp->t_timer[TCPT_PTO] =
-                                                   OFFSET_FROM_START(tp,
-                                                   max(10, (tp->t_srtt >> TCP_RTT_SHIFT)));
-
+                                               tp->t_flagsext &=
+                                                   ~TF_CWND_NONVALIDATED;
                                                tcp_ccdbg_trace(tp, th,
                                                    TCP_CC_ENTER_FASTRECOVERY);
-
                                                (void) tcp_output(tp);
                                                goto drop;
                                        }
                                        tp->snd_nxt = th->th_ack;
                                        tp->snd_cwnd = tp->t_maxseg;
                                        (void) tcp_output(tp);
-                                       tp->snd_cwnd = tp->snd_ssthresh +
-                                            tp->t_maxseg * tp->t_dupacks;
+                                       if (tp->t_flagsext & TF_CWND_NONVALIDATED) {
+                                               tcp_cc_adjust_nonvalidated_cwnd(tp);
+                                       } else {
+                                               tp->snd_cwnd = tp->snd_ssthresh +
+                                                    tp->t_maxseg * tp->t_dupacks;
+                                       }
                                        if (SEQ_GT(onxt, tp->snd_nxt))
                                                tp->snd_nxt = onxt;
                                        tcp_ccdbg_trace(tp, th,
@@ -3801,7 +4209,8 @@ process_dupack:
                                EXIT_FASTRECOVERY(tp);
                                if (CC_ALGO(tp)->post_fr != NULL)
                                        CC_ALGO(tp)->post_fr(tp, th);
-
+                               tp->t_pipeack = 0;
+                               tcp_clear_pipeack_state(tp);
                                tcp_ccdbg_trace(tp, th,
                                    TCP_CC_EXIT_FASTRECOVERY);
                        }
@@ -3849,14 +4258,14 @@ process_dupack:
                        tp->t_flags &= ~TF_NEEDSYN;
                        tp->snd_una++;
                        /* Do window scaling? */
-                       if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
-                               (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+                       if (TCP_WINDOW_SCALE_ENABLED(tp)) {
                                tp->snd_scale = tp->requested_s_scale;
                                tp->rcv_scale = tp->request_r_scale;
                        }
                }
 
 process_ACK:
+               VERIFY(SEQ_GEQ(th->th_ack, tp->snd_una));
                acked = BYTES_ACKED(th, tp);
                tcpstat.tcps_rcvackpack++;
                tcpstat.tcps_rcvackbyte += acked;
@@ -3895,9 +4304,21 @@ process_ACK:
                if (acked == 0)
                        goto step6;
 
+               /*
+                * When outgoing data has been acked (except the SYN+data), we
+                * mark this connection as "sending good" for TFO.
+                */
+               if ((tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) &&
+                   !(tp->t_tfo_flags & TFO_F_NO_SNDPROBING) &&
+                   !(th->th_flags & TH_SYN))
+                       tcp_heuristic_tfo_snd_good(tp);
 
-               if ((thflags & TH_ECE) != 0 &&
-                       ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON)) {
+               /*
+                * If TH_ECE is received, make sure that ECN is enabled
+                * on that connection and we have sent ECT on data packets.
+                */
+               if ((thflags & TH_ECE) != 0 && TCP_ECN_ENABLED(tp) &&
+                   (tp->ecn_flags & TE_SENDIPECT)) {
                        /*
                         * Reduce the congestion window if we haven't
                         * done so.
@@ -3905,6 +4326,12 @@ process_ACK:
                        if (!IN_FASTRECOVERY(tp)) {
                                tcp_reduce_congestion_window(tp);
                                tp->ecn_flags |= (TE_INRECOVERY|TE_SENDCWR);
+                               /*
+                                * Also note that the connection received
+                                * ECE atleast once
+                                */
+                               tp->ecn_flags |= TE_RECV_ECN_ECE;
+                               tcpstat.tcps_ecn_recv_ece++;
                                tcp_ccdbg_trace(tp, th, TCP_CC_ECN_RCVD);
                        }
                }
@@ -3957,6 +4384,10 @@ process_ACK:
                }
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
+               if (!SLIST_EMPTY(&tp->t_rxt_segments) &&
+                   !TCP_DSACK_SEQ_IN_WINDOW(tp, tp->t_dsack_lastuna,
+                   tp->snd_una))
+                       tcp_rxtseg_clean(tp);
                if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
                        tp->t_bwmeas != NULL)
                        tcp_bwmeas_check(tp);
@@ -4181,9 +4612,15 @@ dodata:
         * is presented to the user (this happens in tcp_usrreq.c,
         * case PRU_RCVD).  If a FIN has already been received on this
         * connection then we just ignore the text.
+        *
+        * If we are in SYN-received state and got a valid TFO cookie, we want
+        * to process the data.
         */
        if ((tlen || (thflags & TH_FIN)) &&
-           TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+           TCPS_HAVERCVDFIN(tp->t_state) == 0 &&
+           (TCPS_HAVEESTABLISHED(tp->t_state) ||
+            (tp->t_state == TCPS_SYN_RECEIVED &&
+            (tp->t_tfo_flags & TFO_F_COOKIE_VALID)))) {
                tcp_seq save_start = th->th_seq;
                tcp_seq save_end = th->th_seq + tlen;
                m_adj(m, drop_hdrlen);  /* delayed header drop */
@@ -4199,9 +4636,7 @@ dodata:
                 * immediately when segments are out of order (so
                 * fast retransmit can work).
                 */
-               if (th->th_seq == tp->rcv_nxt &&
-                   LIST_EMPTY(&tp->t_segq) &&
-                   TCPS_HAVEESTABLISHED(tp->t_state)) {
+               if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq)) {
                        TCP_INC_VAR(tp->t_unacksegs, nlropkts);
                        /*
                         * Calculate the RTT on the receiver only if the 
@@ -4255,6 +4690,9 @@ dodata:
 
                tcp_adaptive_rwtimo_check(tp, tlen);
 
+               if (tlen > 0)
+                       tcp_tfo_rcv_data(tp);
+
                if (tp->t_flags & TF_DELACK) 
                {
 #if INET6
@@ -4497,17 +4935,12 @@ drop:
        return;
 }
 
-static void
-tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
 /*
  * Parse TCP options and place in tcpopt.
  */
-       struct tcpcb *tp;
-       u_char *cp;
-       int cnt;
-       struct tcphdr *th;
-       struct tcpopt *to;
-       unsigned int input_ifscope;
+static void
+tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th,
+    struct tcpopt *to)
 {
        u_short mss = 0;
        int opt, optlen;
@@ -4537,6 +4970,8 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
                                continue;
                        bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
                        NTOHS(mss);
+                       to->to_mss = mss;
+                       to->to_flags |= TOF_MSS;
                        break;
 
                case TCPOPT_WINDOW:
@@ -4545,8 +4980,7 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
                        if (!(th->th_flags & TH_SYN))
                                continue;
                        to->to_flags |= TOF_SCALE;
-                       tp->t_flags |= TF_RCVD_SCALE;
-                       tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+                       to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
                        break;
 
                case TCPOPT_TIMESTAMP:
@@ -4559,15 +4993,10 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
                        bcopy((char *)cp + 6,
                            (char *)&to->to_tsecr, sizeof(to->to_tsecr));
                        NTOHL(to->to_tsecr);
-                       /*
-                        * A timestamp received in a SYN makes
-                        * it ok to send timestamp requests and replies.
-                        */
-                       if (th->th_flags & TH_SYN) {
-                               tp->t_flags |= TF_RCVD_TSTMP;
-                               tp->ts_recent = to->to_tsval;
-                               tp->ts_recent_age = tcp_now;
-                       }
+                       /* Re-enable sending Timestamps if we received them */
+                       if (!(tp->t_flags & TF_REQ_TSTMP) &&
+                           tcp_do_rfc1323 == 1)
+                               tp->t_flags |= TF_REQ_TSTMP;
                        break;
                case TCPOPT_SACK_PERMITTED:
                        if (!tcp_do_sack ||
@@ -4584,7 +5013,26 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
                        tcpstat.tcps_sack_rcv_blocks++;
 
                        break;
-               
+               case TCPOPT_FASTOPEN:
+                       if (optlen == TCPOLEN_FASTOPEN_REQ) {
+                               if (tp->t_state != TCPS_LISTEN)
+                                       continue;
+
+                               to->to_flags |= TOF_TFOREQ;
+                       } else {
+                               if (optlen < TCPOLEN_FASTOPEN_REQ ||
+                                   (optlen - TCPOLEN_FASTOPEN_REQ) > TFO_COOKIE_LEN_MAX ||
+                                   (optlen - TCPOLEN_FASTOPEN_REQ) < TFO_COOKIE_LEN_MIN)
+                                       continue;
+                               if (tp->t_state != TCPS_LISTEN &&
+                                   tp->t_state != TCPS_SYN_SENT)
+                                       continue;
+
+                               to->to_flags |= TOF_TFO;
+                               to->to_tfo = cp + 1;
+                       }
+
+                       break;
 #if MPTCP
                case TCPOPT_MULTIPATH:
                        tcp_do_mptcp_options(tp, cp, th, to, optlen);
@@ -4592,8 +5040,33 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
 #endif /* MPTCP */
                }
        }
-       if (th->th_flags & TH_SYN)
-               tcp_mss(tp, mss, input_ifscope);        /* sets t_maxseg */
+}
+
+static void
+tcp_finalize_options(struct tcpcb *tp, struct tcpopt *to, unsigned int ifscope)
+{
+       if (to->to_flags & TOF_TS) {
+               tp->t_flags |= TF_RCVD_TSTMP;
+               tp->ts_recent = to->to_tsval;
+               tp->ts_recent_age = tcp_now;
+
+       }
+       if (to->to_flags & TOF_MSS)
+               tcp_mss(tp, to->to_mss, ifscope);
+       if (SACK_ENABLED(tp)) {
+               if (!(to->to_flags & TOF_SACK))
+                       tp->t_flagsext &= ~(TF_SACK_ENABLE);
+               else
+                       tp->t_flags |= TF_SACK_PERMIT;
+       }
+       if (to->to_flags & TOF_SCALE) {
+               tp->t_flags |= TF_RCVD_SCALE;
+               tp->requested_s_scale = to->to_requested_s_scale;
+
+               /* Re-enable window scaling, if the option is received */
+               if (tp->request_r_scale > 0)
+                       tp->t_flags |= TF_REQ_SCALE;
+       }
 }
 
 /*
@@ -4693,15 +5166,38 @@ update_base_rtt(struct tcpcb *tp, uint32_t rtt)
 static void
 tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th)
 {
+       int rtt = 0;
        VERIFY(to != NULL && th != NULL);
+       if (tp->t_rtttime != 0 && SEQ_GT(th->th_ack, tp->t_rtseq)) {
+               u_int32_t pipe_ack_val;
+               rtt = tcp_now - tp->t_rtttime;
+               /*
+                * Compute pipe ack -- the amount of data acknowledged
+                * in the last RTT
+                */
+               if (SEQ_GT(th->th_ack, tp->t_pipeack_lastuna)) {
+                       pipe_ack_val = th->th_ack - tp->t_pipeack_lastuna;
+                       /* Update the sample */
+                       tp->t_pipeack_sample[tp->t_pipeack_ind++] =
+                           pipe_ack_val;
+                       tp->t_pipeack_ind %= TCP_PIPEACK_SAMPLE_COUNT;
+
+                       /* Compute the max of the pipeack samples */
+                       pipe_ack_val = tcp_get_max_pipeack(tp);
+                       tp->t_pipeack = (pipe_ack_val >
+                                   TCP_CC_CWND_INIT_BYTES) ?
+                                   pipe_ack_val : 0;
+               }
+               /* start another measurement */
+               tp->t_rtttime = 0;
+       }
        if (((to->to_flags & TOF_TS) != 0) && 
                (to->to_tsecr != 0) &&
                TSTMP_GEQ(tcp_now, to->to_tsecr)) {
-               tcp_xmit_timer(tp, tcp_now - to->to_tsecr, 
+               tcp_xmit_timer(tp, (tcp_now - to->to_tsecr),
                        to->to_tsecr, th->th_ack);
-       } else if (tp->t_rtttime != 0 && SEQ_GT(th->th_ack, tp->t_rtseq)) {
-               tcp_xmit_timer(tp, tcp_now - tp->t_rtttime, 0,
-                       th->th_ack);
+       } else if (rtt > 0) {
+               tcp_xmit_timer(tp, rtt, 0, th->th_ack);
        }
 }
 
@@ -4800,7 +5296,6 @@ tcp_xmit_timer(register struct tcpcb *tp, int rtt,
 compute_rto:
        nstat_route_rtt(tp->t_inpcb->inp_route.ro_rt, tp->t_srtt, 
                tp->t_rttvar);
-       tp->t_rtttime = 0;
        tp->t_rxtshift = 0;
        tp->t_rxtstart = 0;
 
@@ -4848,10 +5343,9 @@ static inline unsigned int
 tcp_maxmtu6(struct rtentry *rt)
 {
        unsigned int maxmtu;
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = NULL;
 
        RT_LOCK_ASSERT_HELD(rt);
-       lck_rw_lock_shared(nd_if_rwlock);
        if ((ndi = ND_IFINFO(rt->rt_ifp)) != NULL && !ndi->initialized)
                ndi = NULL;
        if (ndi != NULL)
@@ -4862,7 +5356,6 @@ tcp_maxmtu6(struct rtentry *rt)
                maxmtu = MIN(rt->rt_rmx.rmx_mtu, IN6_LINKMTU(rt->rt_ifp));
        if (ndi != NULL)
                lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
 
        return (maxmtu);
 }
@@ -5005,6 +5498,12 @@ tcp_mss(tp, offer, input_ifscope)
 #else
        mss = tcp_maxmtu(rt);
 #endif
+
+#if NECP
+       // At this point, the mss is just the MTU. Adjust if necessary.
+       mss = necp_socket_get_effective_mtu(inp, mss);
+#endif /* NECP */
+
        mss -= min_protoh;
 
        if (rt->rt_rmx.rmx_mtu == 0) {
@@ -5163,6 +5662,12 @@ tcp_mssopt(tp)
 #endif
        /* Route locked during lookup above */
        RT_UNLOCK(rt);
+
+#if NECP
+       // At this point, the mss is just the MTU. Adjust if necessary.
+       mss = necp_socket_get_effective_mtu(tp->t_inpcb, mss);
+#endif /* NECP */
+
        return (mss - min_protoh);
 }
 
@@ -5477,7 +5982,7 @@ inp_fc_unthrottle_tcp(struct inpcb *inp)
                CC_ALGO(tp)->pre_fr(tp);
 
        tp->snd_cwnd = tp->snd_ssthresh;
-
+       tp->t_flagsext &= ~TF_CWND_NONVALIDATED;
        /*
         * Restart counting for ABC as we changed the
         * congestion window just now.
@@ -5490,6 +5995,7 @@ inp_fc_unthrottle_tcp(struct inpcb *inp)
         * to backoff retransmit timer.
         */
        tp->t_rxtshift = 0;
+       tp->t_rtttime = 0;
 
        /*
         * Start the output stream again. Since we are
@@ -5699,8 +6205,8 @@ tcp_input_checksum(int af, struct mbuf *m, struct tcphdr *th, int off, int tlen)
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats,
-    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
-    tcp_getstat, "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_getstat,
+    "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 
 static int
 sysctl_rexmtthresh SYSCTL_HANDLER_ARGS
@@ -5727,5 +6233,7 @@ sysctl_rexmtthresh SYSCTL_HANDLER_ARGS
        return (0);
 }
 
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmt_thresh, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
-       &tcprexmtthresh, 0, &sysctl_rexmtthresh, "I", "Duplicate ACK Threshold for Fast Retransmit");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmt_thresh, CTLTYPE_INT | CTLFLAG_RW |
+       CTLFLAG_LOCKED, &tcprexmtthresh, 0, &sysctl_rexmtthresh, "I",
+       "Duplicate ACK Threshold for Fast Retransmit");
+
index 09f594baf8b4a9028cfa394921feea8d739aa0e9..aa2d32dbc711dad9cf37be7f59df4d4c09367823 100644 (file)
@@ -358,9 +358,6 @@ tcp_ledbat_after_idle(struct tcpcb *tp) {
        
        /* Reset the congestion window */
        tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz;
-
-       /* If stretch ack was auto disabled, re-evaluate the situation */
-       tcp_cc_after_idle_stretchack(tp);
 }
 
 /* Function to change the congestion window when the retransmit 
index a1e590a0a718bfd3c0fe21d4a35c0169b6b0e37d..f2de1c010877404ab7391cdff64797c5aaa51f0b 100644 (file)
@@ -134,9 +134,6 @@ int tcp_newreno_cleanup(struct tcpcb *tp) {
 void
 tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp) {
        tcp_cc_cwnd_init_or_reset(tp);
-
-       /* If stretch ack was auto disabled, re-evaluate the situation */
-       tcp_cc_after_idle_stretchack(tp);
 }
 
 
index b693e0512eccdda6f47f5e0711c2d228892b4685..e348fadde93e7ef07cb1d2e952c14a39d704f06f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #endif
 #include <netinet/tcp.h>
 #define        TCPOUTFLAGS
+#include <netinet/tcp_cache.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/mptcp_opt.h>
 #endif
 
+#include <corecrypto/ccaes.h>
+
 #define DBG_LAYER_BEG          NETDBG_CODE(DBG_NETTCP, 1)
 #define DBG_LAYER_END          NETDBG_CODE(DBG_NETTCP, 3)
 #define DBG_FNC_TCP_OUTPUT     NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1)
@@ -245,6 +248,137 @@ static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int,
 static struct mbuf* tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th);
 static int tcp_recv_throttle(struct tcpcb *tp);
 
+static int32_t tcp_tfo_check(struct tcpcb *tp, int32_t len)
+{
+       struct socket *so = tp->t_inpcb->inp_socket;
+       unsigned int optlen = 0;
+       unsigned int cookie_len;
+
+       if (tp->t_flags & TF_NOOPT)
+               goto fallback;
+
+       if (!tcp_heuristic_do_tfo(tp))
+               goto fallback;
+
+       optlen += TCPOLEN_MAXSEG;
+
+       if (tp->t_flags & TF_REQ_SCALE)
+               optlen += 4;
+
+#if MPTCP
+       if ((so->so_flags & SOF_MP_SUBFLOW) && mptcp_enable &&
+           tp->t_rxtshift <= mptcp_mpcap_retries)
+               optlen += sizeof(struct mptcp_mpcapable_opt_common) + sizeof(mptcp_key_t);
+#endif /* MPTCP */
+
+       if (tp->t_flags & TF_REQ_TSTMP)
+               optlen += TCPOLEN_TSTAMP_APPA;
+
+       if (SACK_ENABLED(tp))
+               optlen += TCPOLEN_SACK_PERMITTED;
+
+       /* Now, decide whether to use TFO or not */
+
+       /* Don't even bother trying if there is no space at all... */
+       if (MAX_TCPOPTLEN - optlen < TCPOLEN_FASTOPEN_REQ)
+               goto fallback;
+
+       cookie_len = tcp_cache_get_cookie_len(tp);
+       if (cookie_len == 0)
+               /* No cookie, so we request one */
+               return (0);
+
+       /* Do not send SYN+data if there is more in the queue than MSS */
+       if (so->so_snd.sb_cc > (tp->t_maxopd - MAX_TCPOPTLEN))
+               goto fallback;
+
+       /* Ok, everything looks good. We can go on and do TFO */
+       return (len);
+
+fallback:
+       tp->t_flagsext &= ~TF_FASTOPEN;
+       return (0);
+}
+
+/* Returns the number of bytes written to the TCP option-space */
+static unsigned
+tcp_tfo_write_cookie_rep(struct tcpcb *tp, unsigned optlen, u_char *opt)
+{
+       u_char out[CCAES_BLOCK_SIZE];
+       unsigned ret = 0;
+       u_char *bp;
+
+       if ((MAX_TCPOPTLEN - optlen) <
+           (TCPOLEN_FASTOPEN_REQ + TFO_COOKIE_LEN_DEFAULT))
+               return (ret);
+
+       tcp_tfo_gen_cookie(tp->t_inpcb, out, sizeof(out));
+
+       bp = opt + optlen;
+
+       *bp++ = TCPOPT_FASTOPEN;
+       *bp++ = 2 + TFO_COOKIE_LEN_DEFAULT;
+       memcpy(bp, out, TFO_COOKIE_LEN_DEFAULT);
+       ret += 2 + TFO_COOKIE_LEN_DEFAULT;
+
+       tp->t_tfo_stats |= TFO_S_COOKIE_SENT;
+       tcpstat.tcps_tfo_cookie_sent++;
+
+       return (ret);
+}
+
+static unsigned
+tcp_tfo_write_cookie(struct tcpcb *tp, unsigned optlen, int32_t *len,
+                    u_char *opt)
+{
+       u_int8_t tfo_len = MAX_TCPOPTLEN - optlen - TCPOLEN_FASTOPEN_REQ;
+       unsigned ret = 0;
+       int res;
+       u_char *bp;
+
+       bp = opt + optlen;
+
+       /*
+        * The cookie will be copied in the appropriate place within the
+        * TCP-option space. That way we avoid the need for an intermediate
+        * variable.
+        */
+       res = tcp_cache_get_cookie(tp, bp + TCPOLEN_FASTOPEN_REQ, &tfo_len);
+       if (res == 0) {
+               *bp++ = TCPOPT_FASTOPEN;
+               *bp++ = TCPOLEN_FASTOPEN_REQ;
+               ret += TCPOLEN_FASTOPEN_REQ;
+
+               tp->t_tfo_flags |= TFO_F_COOKIE_REQ;
+
+               tp->t_tfo_stats |= TFO_S_COOKIE_REQ;
+               tcpstat.tcps_tfo_cookie_req++;
+       } else {
+               *bp++ = TCPOPT_FASTOPEN;
+               *bp++ = TCPOLEN_FASTOPEN_REQ + tfo_len;
+
+               ret += TCPOLEN_FASTOPEN_REQ + tfo_len;
+
+               tp->t_tfo_flags |= TFO_F_COOKIE_SENT;
+
+               /* If there is some data, let's track it */
+               if (*len) {
+                       tp->t_tfo_stats |= TFO_S_SYN_DATA_SENT;
+                       tcpstat.tcps_tfo_syn_data_sent++;
+               }
+       }
+
+       return (ret);
+}
+
+static inline bool
+tcp_send_ecn_flags_on_syn(struct tcpcb *tp, struct socket *so)
+{
+       return(!((tp->ecn_flags & TE_SETUPSENT) ||
+           (so->so_flags & SOF_MP_SUBFLOW) ||
+           (tp->t_flagsext & TF_FASTOPEN)));
+}
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  *
@@ -291,6 +425,7 @@ tcp_output(struct tcpcb *tp)
        int i, sack_rxmit;
        int tso = 0;
        int sack_bytes_rxmt;
+       tcp_seq old_snd_nxt = 0;
        struct sackhole *p;
 #if IPSEC
        unsigned ipsec_optlen = 0;
@@ -319,6 +454,7 @@ tcp_output(struct tcpcb *tp)
        boolean_t cell = FALSE;
        boolean_t wifi = FALSE;
        boolean_t wired = FALSE;
+       boolean_t sack_rescue_rxt = FALSE;
 
        /*
         * Determine length of data that should be transmitted,
@@ -333,9 +469,22 @@ tcp_output(struct tcpcb *tp)
         */
        idle_time = tcp_now - tp->t_rcvtime;
        if (idle && idle_time >= TCP_IDLETIMEOUT(tp)) {
-               if (CC_ALGO(tp)->after_idle != NULL) 
+               if (CC_ALGO(tp)->after_idle != NULL &&
+                   (tp->tcp_cc_index != TCP_CC_ALGO_CUBIC_INDEX ||
+                   idle_time >= TCP_CC_CWND_NONVALIDATED_PERIOD)) {
                        CC_ALGO(tp)->after_idle(tp);
-               tcp_ccdbg_trace(tp, NULL, TCP_CC_IDLE_TIMEOUT);
+                       tcp_ccdbg_trace(tp, NULL, TCP_CC_IDLE_TIMEOUT);
+               }
+
+               /*
+                * Do some other tasks that need to be done after
+                * idle time
+                */
+               if (!SLIST_EMPTY(&tp->t_rxt_segments))
+                       tcp_rxtseg_clean(tp);
+
+               /* If stretch ack was auto-disabled, re-evaluate it */
+               tcp_cc_after_idle_stretchack(tp);
        }
        tp->t_flags &= ~TF_LASTIDLE;
        if (idle) {
@@ -631,11 +780,16 @@ after_sack_rexmit:
         * in which case len is already set.
         */
        if (sack_rxmit == 0) {
-               if (sack_bytes_rxmt == 0)
+               if (sack_bytes_rxmt == 0) {
                        len = min(so->so_snd.sb_cc, sendwin) - off;
-               else {
+               else {
                        int32_t cwin;
 
+                       cwin = tp->snd_cwnd -
+                           (tp->snd_nxt - tp->sack_newdata) -
+                           sack_bytes_rxmt;
+                       if (cwin < 0)
+                               cwin = 0;
                         /*
                         * We are inside of a SACK recovery episode and are
                         * sending new data, having retransmitted all the
@@ -652,15 +806,37 @@ after_sack_rexmit:
                         * of len is bungled by the optimizer.
                         */
                        if (len > 0) {
-                               cwin = tp->snd_cwnd - 
-                                       (tp->snd_nxt - tp->sack_newdata) -
-                                       sack_bytes_rxmt;
-                               if (cwin < 0)
-                                       cwin = 0;
                                len = imin(len, cwin);
-                       }
-                       else 
+                       } else {
                                len = 0;
+                       }
+                       /*
+                        * At this point SACK recovery can not send any
+                        * data from scoreboard or any new data. Check
+                        * if we can do a rescue retransmit towards the
+                        * tail end of recovery window.
+                        */
+                       if (len == 0 && cwin > 0 &&
+                           SEQ_LT(tp->snd_fack, tp->snd_recover) &&
+                           !(tp->t_flagsext & TF_RESCUE_RXT)) {
+                               len = min((tp->snd_recover - tp->snd_fack),
+                                   tp->t_maxseg);
+                               len = imin(len, cwin);
+                               old_snd_nxt = tp->snd_nxt;
+                               sack_rescue_rxt = TRUE;
+                               tp->snd_nxt = tp->snd_recover - len;
+                               /*
+                                * If FIN has been sent, snd_max
+                                * must have been advanced to cover it.
+                                */
+                               if ((tp->t_flags & TF_SENTFIN) &&
+                                   tp->snd_max == tp->snd_recover)
+                                       tp->snd_nxt--;
+
+                               off = tp->snd_nxt - tp->snd_una;
+                               sendalot = 0;
+                               tp->t_flagsext |= TF_RESCUE_RXT;
+                       }
                }
        }
 
@@ -686,7 +862,7 @@ after_sack_rexmit:
         * know that foreign host supports TAO, suppress sending segment.
         */
        if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
-               if (tp->t_state != TCPS_SYN_RECEIVED)
+               if (tp->t_state != TCPS_SYN_RECEIVED || tfo_enabled(tp))
                        flags &= ~TH_SYN;
                off--, len++;
                if (len > 0 && tp->t_state == TCPS_SYN_SENT) {
@@ -731,12 +907,19 @@ after_sack_rexmit:
         * Be careful not to send data and/or FIN on SYN segments.
         * This measure is needed to prevent interoperability problems
         * with not fully conformant TCP implementations.
+        *
+        * In case of TFO, we handle the setting of the len in
+        * tcp_tfo_check. In case TFO is not enabled, never ever send
+        * SYN+data.
         */
-       if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
+       if ((flags & TH_SYN) && !tfo_enabled(tp)) {
                len = 0;
                flags &= ~TH_FIN;
        }
 
+       if ((flags & TH_SYN) && tp->t_state <= TCPS_SYN_SENT && tfo_enabled(tp))
+               len = tcp_tfo_check(tp, len);
+
        /*
         * The check here used to be (len < 0). Some times len is zero
         * when the congestion window is closed and we need to check
@@ -872,7 +1055,8 @@ after_sack_rexmit:
                    (tp->t_state > TCPS_CLOSED) &&
                    ((tp->t_mpflags & TMPF_SND_MPPRIO) ||
                    (tp->t_mpflags & TMPF_SND_REM_ADDR) ||
-                   (tp->t_mpflags & TMPF_SND_MPFAIL))) {
+                   (tp->t_mpflags & TMPF_SND_MPFAIL) ||
+                   (tp->t_mpflags & TMPF_MPCAP_RETRANSMIT))) {
                        if (len > 0) {
                                len = 0;
                        }
@@ -1125,7 +1309,7 @@ just_return:
        return (0);
 
 send:
-       /* 
+       /*
         * Set TF_MAXSEGSNT flag if the segment size is greater than
         * the max segment size.
         */
@@ -1178,101 +1362,9 @@ send:
                        }
 #endif /* MPTCP */
                }
-       }
-       
-       /*
-        * RFC 3168 states that:
-        * - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared
-        * to handle the TCP ECE flag, even if you also later send a
-        * non-ECN-setup SYN/SYN-ACK.
-        * - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set
-        * the ip ECT flag.
-        * 
-        * It is not clear how the ECE flag would ever be set if you never
-        * set the IP ECT flag on outbound packets. All the same, we use
-        * the TE_SETUPSENT to indicate that we have committed to handling
-        * the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate
-        * whether or not we should set the IP ECT flag on outbound packet
-        *
-        * For a SYN-ACK, send an ECN setup SYN-ACK
-        */
-       if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN))
-           && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
-               if ((tp->ecn_flags & TE_SETUPRECEIVED) != 0) {
-                       if ((tp->ecn_flags & TE_SETUPSENT) == 0) {
-                               /* Setting TH_ECE makes this an ECN-setup SYN-ACK */
-                               flags |= TH_ECE;
-                               
-                               /*
-                                * Record that we sent the ECN-setup and
-                                * default to setting IP ECT.
-                                */
-                               tp->ecn_flags |= (TE_SETUPSENT|TE_SENDIPECT);
-                               tcpstat.tcps_ecn_setup++;
-                       } else {
-                               /*
-                                * We sent an ECN-setup SYN-ACK but it was
-                                * dropped. Fallback to non-ECN-setup
-                                * SYN-ACK and clear flag to indicate that
-                                * we should not send data with IP ECT set
-                                *
-                                * Pretend we didn't receive an 
-                                * ECN-setup SYN.
-                                */
-                               tp->ecn_flags &= ~TE_SETUPRECEIVED;
-                               /*
-                                * We already incremented the counter
-                                * assuming that the ECN setup will
-                                * succeed. Decrementing here to
-                                * correct it.
-                                */
-                               tcpstat.tcps_ecn_setup--;
-                       }
-               }
-       } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN))
-           && (flags & (TH_SYN | TH_ACK)) == TH_SYN) {
-               if ((tp->ecn_flags & TE_SETUPSENT) == 0) {
-                       /* Setting TH_ECE and TH_CWR makes this an ECN-setup SYN */
-                       flags |= (TH_ECE | TH_CWR);
-                       
-                       /*
-                        * Record that we sent the ECN-setup and default to
-                        * setting IP ECT.
-                        */
-                       tp->ecn_flags |= (TE_SETUPSENT | TE_SENDIPECT);
-               } else {
-                       /*
-                        * We sent an ECN-setup SYN but it was dropped.
-                        * Fall back to no ECN and clear flag indicating
-                        * we should send data with IP ECT set.
-                        */
-                       tp->ecn_flags &= ~TE_SENDIPECT;
-               }
-       }
-       
-       /*
-        * Check if we should set the TCP CWR flag.
-        * CWR flag is sent when we reduced the congestion window because
-        * we received a TCP ECE or we performed a fast retransmit. We
-        * never set the CWR flag on retransmitted packets. We only set
-        * the CWR flag on data packets. Pure acks don't have this set.
-        */
-       if ((tp->ecn_flags & TE_SENDCWR) != 0 && len != 0 &&
-               !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) {
-               flags |= TH_CWR;
-               tp->ecn_flags &= ~TE_SENDCWR;
-               tcpstat.tcps_sent_cwr++;
-       }
-       
-       /*
-        * Check if we should set the TCP ECE flag.
-        */
-       if ((tp->ecn_flags & TE_SENDECE) != 0 && len == 0) {
-               flags |= TH_ECE;
-               tcpstat.tcps_sent_ece++;
        }
 
-       /*
+       /*
         * Send a timestamp and echo-reply if this is a SYN and our side
         * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
         * and our peer have sent timestamps in our SYN's.
@@ -1339,6 +1431,15 @@ send:
        }
 #endif /* MPTCP */
 
+       if (tfo_enabled(tp) && !(tp->t_flags & TF_NOOPT) &&
+           (flags & (TH_SYN | TH_ACK)) == TH_SYN)
+               optlen += tcp_tfo_write_cookie(tp, optlen, &len, opt);
+
+       if (tfo_enabled(tp) &&
+           (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK) &&
+           (tp->t_tfo_flags & TFO_F_OFFER_COOKIE))
+               optlen += tcp_tfo_write_cookie_rep(tp, optlen, opt);
+
        if (SACK_ENABLED(tp) && ((tp->t_flags & TF_NOOPT) == 0)) {
                /*
                 * Send SACKs if necessary.  This should be the last
@@ -1354,14 +1455,16 @@ send:
                 * 10 bytes for SACK options 40 - (12 + 18).
                 */
                if (TCPS_HAVEESTABLISHED(tp->t_state) &&
-                   (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
+                   (tp->t_flags & TF_SACK_PERMIT) &&
+                   (tp->rcv_numsacks > 0 || TCP_SEND_DSACK_OPT(tp)) &&
                    MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
                        int nsack, padlen;
                        u_char *bp = (u_char *)opt + optlen;
                        u_int32_t *lp;
 
                        nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK;
-                       nsack = min(nsack, tp->rcv_numsacks);
+                       nsack = min(nsack, (tp->rcv_numsacks +
+                           (TCP_SEND_DSACK_OPT(tp) ? 1 : 0)));
                        sackoptlen = (2 + nsack * TCPOLEN_SACK);
 
                        /*
@@ -1378,6 +1481,21 @@ send:
                        *bp++ = TCPOPT_SACK;
                        *bp++ = sackoptlen;
                        lp = (u_int32_t *)(void *)bp;
+
+                       /*
+                        * First block of SACK option should represent
+                        * DSACK. Prefer to send SACK information if there
+                        * is space for only one SACK block. This will
+                        * allow for faster recovery.
+                        */
+                       if (TCP_SEND_DSACK_OPT(tp) && nsack > 0 &&
+                           (tp->rcv_numsacks == 0 || nsack > 1)) {
+                               *lp++ = htonl(tp->t_dsack_lseq);
+                               *lp++ = htonl(tp->t_dsack_rseq);
+                               tcpstat.tcps_dsack_sent++;
+                               nsack--;
+                       }
+                       VERIFY(nsack == 0 || tp->rcv_numsacks >= nsack);
                        for (i = 0; i < nsack; i++) {
                                struct sackblk sack = tp->sackblks[i];
                                *lp++ = htonl(sack.start);
@@ -1399,8 +1517,119 @@ send:
                }
        }
 
+       /*
+        * RFC 3168 states that:
+        * - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared
+        * to handle the TCP ECE flag, even if you also later send a
+        * non-ECN-setup SYN/SYN-ACK.
+        * - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set
+        * the ip ECT flag.
+        *
+        * It is not clear how the ECE flag would ever be set if you never
+        * set the IP ECT flag on outbound packets. All the same, we use
+        * the TE_SETUPSENT to indicate that we have committed to handling
+        * the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate
+        * whether or not we should set the IP ECT flag on outbound packet
+        *
+        * For a SYN-ACK, send an ECN setup SYN-ACK
+        */
+       if ((tcp_ecn_inbound || (tp->t_flags & TF_ENABLE_ECN))
+           && (flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
+               if (tp->ecn_flags & TE_SETUPRECEIVED) {
+                       if (tcp_send_ecn_flags_on_syn(tp, so)) {
+                               /*
+                                * Setting TH_ECE makes this an ECN-setup
+                                * SYN-ACK
+                                */
+                               flags |= TH_ECE;
+
+                               /*
+                                * Record that we sent the ECN-setup and
+                                * default to setting IP ECT.
+                                */
+                               tp->ecn_flags |= (TE_SETUPSENT|TE_SENDIPECT);
+                               tcpstat.tcps_ecn_server_setup++;
+                               tcpstat.tcps_ecn_server_success++;
+                       } else {
+                               /*
+                                * We sent an ECN-setup SYN-ACK but it was
+                                * dropped. Fallback to non-ECN-setup
+                                * SYN-ACK and clear flag to indicate that
+                                * we should not send data with IP ECT set
+                                *
+                                * Pretend we didn't receive an
+                                * ECN-setup SYN.
+                                *
+                                * We already incremented the counter
+                                * assuming that the ECN setup will
+                                * succeed. Decrementing here
+                                * tcps_ecn_server_success to correct it.
+                                */
+                               if (tp->ecn_flags & TE_SETUPSENT) {
+                                       tcpstat.tcps_ecn_lost_synack++;
+                                       tcpstat.tcps_ecn_server_success--;
+                               }
+
+                               tp->ecn_flags &=
+                                   ~(TE_SETUPRECEIVED | TE_SENDIPECT |
+                                   TE_SENDCWR);
+                       }
+               }
+       } else if ((tcp_ecn_outbound || (tp->t_flags & TF_ENABLE_ECN))
+           && (flags & (TH_SYN | TH_ACK)) == TH_SYN) {
+               if (tcp_send_ecn_flags_on_syn(tp, so)) {
+                       /*
+                        * Setting TH_ECE and TH_CWR makes this an
+                        * ECN-setup SYN
+                        */
+                       flags |= (TH_ECE | TH_CWR);
+                       tcpstat.tcps_ecn_client_setup++;
+
+                       /*
+                        * Record that we sent the ECN-setup and default to
+                        * setting IP ECT.
+                        */
+                       tp->ecn_flags |= (TE_SETUPSENT | TE_SENDIPECT);
+               } else {
+                       /*
+                        * We sent an ECN-setup SYN but it was dropped.
+                        * Fall back to non-ECN and clear flag indicating
+                        * we should send data with IP ECT set.
+                        */
+                       if (tp->ecn_flags & TE_SETUPSENT)
+                               tcpstat.tcps_ecn_lost_syn++;
+                       tp->ecn_flags &= ~TE_SENDIPECT;
+               }
+       }
+
+       /*
+        * Check if we should set the TCP CWR flag.
+        * CWR flag is sent when we reduced the congestion window because
+        * we received a TCP ECE or we performed a fast retransmit. We
+        * never set the CWR flag on retransmitted packets. We only set
+        * the CWR flag on data packets. Pure acks don't have this set.
+        */
+       if ((tp->ecn_flags & TE_SENDCWR) != 0 && len != 0 &&
+           !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) {
+               flags |= TH_CWR;
+               tp->ecn_flags &= ~TE_SENDCWR;
+       }
+
+       /*
+        * Check if we should set the TCP ECE flag.
+        */
+       if ((tp->ecn_flags & TE_SENDECE) != 0 && len == 0) {
+               flags |= TH_ECE;
+               tcpstat.tcps_ecn_sent_ece++;
+       }
+
+
        hdrlen += optlen;
 
+       /* Reset DSACK sequence numbers */
+       tp->t_dsack_lseq = 0;
+       tp->t_dsack_rseq = 0;
+
 #if INET6
        if (isipv6)
                ipoptlen = ip6_optlen(inp);
@@ -1501,6 +1730,7 @@ send:
         * the template for sends on this connection.
         */
        if (len) {
+               tp->t_pmtud_lastseg_size = len + optlen + ipoptlen;
                if ((tp->t_flagsext & TF_FORCE) && len == 1)
                        tcpstat.tcps_sndprobe++;
                else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
@@ -1636,8 +1866,10 @@ send:
                 * (This will keep happy those implementations which only
                 * give data to the user when a buffer fills or
                 * a PUSH comes in.)
+                *
+                * On SYN-segments we should not add the PUSH-flag.
                 */
-               if (off + len == so->so_snd.sb_cc)
+               if (off + len == so->so_snd.sb_cc && !(flags & TH_SYN))
                        flags |= TH_PUSH;
        } else {
                if (tp->t_flags & TF_ACKNOW)
@@ -1696,8 +1928,9 @@ send:
                /* this picks up the pseudo header (w/o the length) */
                tcp_fillheaders(tp, ip, th);
                if ((tp->ecn_flags & TE_SENDIPECT) != 0 && len &&
-                       !SEQ_LT(tp->snd_nxt, tp->snd_max) && !sack_rxmit) {
-                       ip->ip_tos = IPTOS_ECN_ECT0;
+                   !SEQ_LT(tp->snd_nxt, tp->snd_max) &&
+                   !sack_rxmit && !(flags & TH_SYN)) {
+                       ip->ip_tos |= IPTOS_ECN_ECT0;
                }
 #if PF_ECN
                m->m_pkthdr.pf_mtag.pftag_hdr = (void *)ip;
@@ -1710,7 +1943,7 @@ send:
         * window for use in delaying messages about window sizes.
         * If resending a FIN, be sure not to use a new sequence number.
         */
-       if (flags & TH_FIN && (tp->t_flags & TF_SENTFIN) &&
+       if ((flags & TH_FIN) && (tp->t_flags & TF_SENTFIN) &&
            tp->snd_nxt == tp->snd_max)
                tp->snd_nxt--;
        /*
@@ -1725,16 +1958,30 @@ send:
         * right edge of the window, so use snd_nxt in that
         * case, since we know we aren't doing a retransmission.
         * (retransmit and persist are mutually exclusive...)
+        *
+        * Note the state of this retransmit segment to detect spurious
+        * retransmissions.
         */
        if (sack_rxmit == 0) {
-               if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
+               if (len || (flags & (TH_SYN|TH_FIN)) ||
+                   tp->t_timer[TCPT_PERSIST]) {
                        th->th_seq = htonl(tp->snd_nxt);
-               else
+                       if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+                               if (SACK_ENABLED(tp) && len > 1) {
+                                       tcp_rxtseg_insert(tp, tp->snd_nxt,
+                                           (tp->snd_nxt + len - 1));
+                               }
+                               m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
+                       }
+               } else {
                        th->th_seq = htonl(tp->snd_max);
+               }
        } else {
                th->th_seq = htonl(p->rxmit);
+               tcp_rxtseg_insert(tp, p->rxmit, (p->rxmit + len - 1));
                p->rxmit += len;
                tp->sackhint.sack_bytes_rexmit += len;
+               m->m_pkthdr.pkt_flags |= PKTF_TCP_REXMT;
        }
        th->th_ack = htonl(tp->rcv_nxt);
        tp->last_ack_sent = tp->rcv_nxt;
@@ -1873,7 +2120,13 @@ send:
                }
                if (sack_rxmit)
                        goto timer;
-               tp->snd_nxt += len;
+               if (sack_rescue_rxt == TRUE) {
+                       tp->snd_nxt = old_snd_nxt;
+                       sack_rescue_rxt = FALSE;
+                       tcpstat.tcps_pto_in_recovery++;
+               } else {
+                       tp->snd_nxt += len;
+               }
                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
                        tp->snd_max = tp->snd_nxt;
                        /*
@@ -1884,6 +2137,9 @@ send:
                                tp->t_rtttime = tcp_now;
                                tp->t_rtseq = startseq;
                                tcpstat.tcps_segstimed++;
+
+                               /* update variables related to pipe ack */
+                               tp->t_pipeack_lastuna = tp->snd_una;
                        }
                }
 
@@ -2031,20 +2287,21 @@ timer:
 #endif /* INET6 */
                if (path_mtu_discovery && (tp->t_flags & TF_PMTUD))
                        ip->ip_off |= IP_DF;
-       
+
 #if NECP
        {
                necp_kernel_policy_id policy_id;
-               if (!necp_socket_is_allowed_to_send_recv(inp, &policy_id)) {
+               u_int32_t route_rule_id;
+               if (!necp_socket_is_allowed_to_send_recv(inp, &policy_id, &route_rule_id)) {
                        m_freem(m);
                        error = EHOSTUNREACH;
                        goto out;
                }
 
-               necp_mark_packet_from_socket(m, inp, policy_id);
+               necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id);
        }
 #endif /* NECP */
-       
+
 #if IPSEC
        if (inp->inp_sp != NULL)
                ipsec_setsocket(m, so);
index e3b339360618c65a85a8604f40933508b74e03c0..7d8b715ed65db24ea46544dfa1f7fdee9f806a0a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -122,8 +122,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKE
     &tcp_sack_globalholes, 0,
     "Global number of TCP SACK holes currently allocated");
 
+static int tcp_detect_reordering = 1;
+static int tcp_dsack_ignore_hw_duplicates = 0;
+
+#if (DEVELOPMENT || DEBUG)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, detect_reordering,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_detect_reordering, 0, "");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ignore_hw_duplicates,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_dsack_ignore_hw_duplicates, 0, "");
+#endif /* (DEVELOPMENT || DEBUG) */
+
 extern struct zone *sack_hole_zone;
 
+#define        TCP_VALIDATE_SACK_SEQ_NUMBERS(_tp_, _sb_, _ack_) \
+    (SEQ_GT((_sb_)->end, (_sb_)->start) && \
+    SEQ_GT((_sb_)->start, (_tp_)->snd_una) && \
+    SEQ_GT((_sb_)->start, (_ack_)) && \
+    SEQ_LT((_sb_)->start, (_tp_)->snd_max) && \
+    SEQ_GT((_sb_)->end, (_tp_)->snd_una) && \
+    SEQ_LEQ((_sb_)->end, (_tp_)->snd_max))
+
 /*
  * This function is called upon receipt of new valid data (while not in header
  * prediction mode), and it updates the ordered list of sacks.
@@ -294,7 +315,7 @@ tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end,
        if (tp->sackhint.nexthole == NULL)
                tp->sackhint.nexthole = hole;
 
-       return hole;
+       return(hole);
 }
 
 /*
@@ -349,7 +370,8 @@ tcp_sack_detect_reordering(struct tcpcb *tp, struct sackhole *s,
        }
 
        if (reordered) {
-               if (!(tp->t_flagsext & TF_PKTS_REORDERED)) {
+               if (tcp_detect_reordering == 1 &&
+                   !(tp->t_flagsext & TF_PKTS_REORDERED)) {
                        tp->t_flagsext |= TF_PKTS_REORDERED;
                        tcpstat.tcps_detect_reordering++;
                }
@@ -415,12 +437,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th,
                    &sack, sizeof(sack));
                sack.start = ntohl(sack.start);
                sack.end = ntohl(sack.end);
-               if (SEQ_GT(sack.end, sack.start) &&
-                   SEQ_GT(sack.start, tp->snd_una) &&
-                   SEQ_GT(sack.start, th_ack) &&
-                   SEQ_LT(sack.start, tp->snd_max) &&
-                   SEQ_GT(sack.end, tp->snd_una) &&
-                   SEQ_LEQ(sack.end, tp->snd_max))
+               if (TCP_VALIDATE_SACK_SEQ_NUMBERS(tp, &sack, th_ack))
                        sack_blocks[num_sack_blks++] = sack;
        }
 
@@ -651,7 +668,20 @@ tcp_sack_partialack(tp, th)
                num_segs * tp->t_maxseg);
        if (tp->snd_cwnd > tp->snd_ssthresh)
                tp->snd_cwnd = tp->snd_ssthresh;
-       tp->t_flags |= TF_ACKNOW;
+       if (SEQ_LT(tp->snd_fack, tp->snd_recover) &&
+           tp->snd_fack == th->th_ack && TAILQ_EMPTY(&tp->snd_holes)) {
+               struct sackhole *temp;
+               /*
+                * we received a partial ack but there is no sack_hole
+                * that will cover the remaining seq space. In this case,
+                * create a hole from snd_fack to snd_recover so that
+                * the sack recovery will continue.
+                */
+               temp = tcp_sackhole_insert(tp, tp->snd_fack,
+                   tp->snd_recover, NULL);
+               if (temp != NULL)
+                       tp->snd_fack = tp->snd_recover;
+       }
        (void) tcp_output(tp);
 }
 
@@ -762,7 +792,7 @@ tcp_sack_adjust(struct tcpcb *tp)
 }
 
 /*
- * This function returns true if more than (tcprexmtthresh - 1) * SMSS
+ * This function returns TRUE if more than (tcprexmtthresh - 1) * SMSS
  * bytes with sequence numbers greater than snd_una have been SACKed. 
  */
 boolean_t
@@ -785,3 +815,131 @@ tcp_sack_byte_islost(struct tcpcb *tp)
        return ((unacked_bytes - sndhole_bytes) >
            ((tcprexmtthresh - 1) * tp->t_maxseg));
 }
+
+/*
+ * Process any DSACK options that might be present on an input packet
+ */
+
+boolean_t
+tcp_sack_process_dsack(struct tcpcb *tp, struct tcpopt *to,
+    struct tcphdr *th)
+{
+       struct sackblk first_sack, second_sack;
+       struct tcp_rxt_seg *rxseg;
+
+       bcopy(to->to_sacks, &first_sack, sizeof(first_sack));
+       first_sack.start = ntohl(first_sack.start);
+       first_sack.end = ntohl(first_sack.end);
+
+       if (to->to_nsacks > 1) {
+               bcopy((to->to_sacks + TCPOLEN_SACK), &second_sack,
+                   sizeof(second_sack));
+               second_sack.start = ntohl(second_sack.start);
+               second_sack.end = ntohl(second_sack.end);
+       }
+
+       if (SEQ_LT(first_sack.start, th->th_ack) &&
+           SEQ_LEQ(first_sack.end, th->th_ack)) {
+               /*
+                * There is a dsack option reporting a duplicate segment
+                * also covered by cumulative acknowledgement.
+                *
+                * Validate the sequence numbers before looking at dsack
+                * option. The duplicate notification can come after
+                * snd_una moves forward. In order to set a window of valid
+                * sequence numbers to look for, we set a maximum send
+                * window within which the DSACK option will be processed.
+                */
+               if (!(TCP_DSACK_SEQ_IN_WINDOW(tp, first_sack.start, th->th_ack) &&
+                   TCP_DSACK_SEQ_IN_WINDOW(tp, first_sack.end, th->th_ack))) {
+                       to->to_nsacks--;
+                       to->to_sacks += TCPOLEN_SACK;
+                       tcpstat.tcps_dsack_recvd_old++;
+
+                       /*
+                        * returning true here so that the ack will not be
+                        * treated as duplicate ack.
+                        */
+                       return (TRUE);
+               }
+       } else if (to->to_nsacks > 1 &&
+           SEQ_LEQ(second_sack.start, first_sack.start) &&
+           SEQ_GEQ(second_sack.end, first_sack.end)) {
+               /*
+                * there is a dsack option in the first block not
+                * covered by the cumulative acknowledgement but covered
+                * by the second sack block.
+                *
+                * verify the sequence numbes on the second sack block
+                * before processing the DSACK option. Returning false
+                * here will treat the ack as a duplicate ack.
+                */
+               if (!TCP_VALIDATE_SACK_SEQ_NUMBERS(tp, &second_sack,
+                   th->th_ack)) {
+                       to->to_nsacks--;
+                       to->to_sacks += TCPOLEN_SACK;
+                       tcpstat.tcps_dsack_recvd_old++;
+                       return (TRUE);
+               }
+       } else {
+               /* no dsack options, proceed with processing the sack */
+               return (FALSE);
+       }
+
+       /* Update the tcpopt pointer to exclude dsack block */
+       to->to_nsacks--;
+       to->to_sacks += TCPOLEN_SACK;
+       tcpstat.tcps_dsack_recvd++;
+
+       /* ignore DSACK option, if DSACK is disabled */
+       if (tp->t_flagsext & TF_DISABLE_DSACK)
+               return (TRUE);
+
+       /* If the DSACK is for TLP mark it as such */
+       if ((tp->t_flagsext & TF_SENT_TLPROBE) &&
+           first_sack.end == tp->t_tlphighrxt) {
+               if ((rxseg = tcp_rxtseg_find(tp, first_sack.start,
+                   (first_sack.end - 1))) != NULL)
+                       rxseg->rx_flags |= TCP_RXT_DSACK_FOR_TLP;
+       }
+       /* Update the sender's retransmit segment state */
+       if (((tp->t_rxtshift == 1 && first_sack.start == tp->snd_una) ||
+           ((tp->t_flagsext & TF_SENT_TLPROBE) &&
+           first_sack.end == tp->t_tlphighrxt)) &&
+           TAILQ_EMPTY(&tp->snd_holes) &&
+           SEQ_GT(th->th_ack, tp->snd_una)) {
+               /*
+                * If the dsack is for a retransmitted packet and one of
+                * the two cases is true, it indicates ack loss:
+                * - retransmit timeout and first_sack.start == snd_una
+                * - TLP probe and first_sack.end == tlphighrxt
+                *
+                * Ignore dsack and do not update state when there is
+                * ack loss
+                */
+               tcpstat.tcps_dsack_ackloss++;
+
+               return (TRUE);
+       } else if ((rxseg = tcp_rxtseg_find(tp, first_sack.start,
+           (first_sack.end - 1))) == NULL) {
+               /*
+                * Duplicate notification was not triggered by a
+                * retransmission. This might be due to network duplication,
+                * disable further DSACK processing.
+                */
+               if (!tcp_dsack_ignore_hw_duplicates) {
+                       tp->t_flagsext |= TF_DISABLE_DSACK;
+                       tcpstat.tcps_dsack_disable++;
+               }
+       } else {
+               /*
+                * If the segment was retransmitted only once, mark it as
+                * spurious. Otherwise ignore the duplicate notification.
+                */
+               if (rxseg->rx_count == 1)
+                       rxseg->rx_flags |= TCP_RXT_SPURIOUS;
+               else
+                       rxseg->rx_flags &= ~TCP_RXT_SPURIOUS;
+       }
+       return (TRUE);
+}
index e68e181dac3f59fc2a577638d7b5d361b84f6976..6fafa0f5f853b481cabd900abf4f5ac82c9e3767 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_cc.h>
+#include <netinet/tcp_cache.h>
 #include <kern/thread_call.h>
 
 #if INET6
 #include <security/mac_framework.h>
 #endif /* MAC_NET */
 
+#include <corecrypto/ccaes.h>
+#include <libkern/crypto/aes.h>
 #include <libkern/crypto/md5.h>
 #include <sys/kdebug.h>
 #include <mach/sdt.h>
@@ -171,6 +174,28 @@ SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
 
 extern int tcp_do_autorcvbuf;
 
+int tcp_sysctl_fastopenkey(struct sysctl_oid *, void *, int ,
+    struct sysctl_req *);
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, fastopen_key,
+    CTLTYPE_STRING | CTLFLAG_WR,
+    0 , 0, tcp_sysctl_fastopenkey, "S", "TCP Fastopen key");
+
+/* Current count of half-open TFO connections */
+int    tcp_tfo_halfcnt = 0;
+
+/* Maximum of half-open TFO connection backlog */
+int    tcp_tfo_backlog = 10;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fastopen_backlog, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_tfo_backlog, 0, "Backlog queue for half-open TFO connections");
+
+int    tcp_fastopen = TCP_FASTOPEN_CLIENT | TCP_FASTOPEN_SERVER;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_fastopen, 0, "Enable TCP Fastopen (RFC 7413)");
+
+int    tcp_tfo_fallback_min = 10;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fastopen_fallback_min, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_tfo_fallback_min, 0, "Mininum number of trials without TFO when in fallback mode");
+
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
@@ -182,10 +207,12 @@ extern int tcp_do_autorcvbuf;
 int    tcp_minmss = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_minmss , 0, "Minmum TCP Maximum Segment Size");
-
-static int     tcp_do_rfc1323 = 1;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
+int tcp_do_rfc1323 = 1;
+#if (DEVELOPMENT || DEBUG)
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_rfc1323 , 0,
+    "Enable rfc1323 (high performance TCP) extensions");
+#endif /* (DEVELOPMENT || DEBUG) */
 
 // Not used
 static int     tcp_do_rfc1644 = 0;
@@ -208,15 +235,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW | CTLFLAG_LOCKED, &
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static int     tcp_strict_rfc1948 = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948, CTLFLAG_RW | CTLFLAG_LOCKED,
+static int     tcp_isn_reseed_interval = 0;
+#if (DEVELOPMENT || DEBUG)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_strict_rfc1948, 0, "Determines if RFC1948 is followed exactly");
 
-static int     tcp_isn_reseed_interval = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret");
-static int     tcp_background_io_enabled = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_enabled, CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_background_io_enabled, 0, "Background IO Enabled");
+#endif /* (DEVELOPMENT || DEBUG) */
 
 int    tcp_TCPTV_MIN = 100;    /* 100ms minimum RTT */
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_min, CTLFLAG_RW | CTLFLAG_LOCKED,
@@ -231,7 +259,8 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, randomize_ports, CTLFLAG_RW | CTLFLAG_LOCKED
     &tcp_use_randomport, 0, "Randomize TCP port numbers");
 
 __private_extern__ int tcp_win_scale = 3;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_win_scale, 0, "Window scaling factor");
 
 static void    tcp_cleartaocache(void);
@@ -240,6 +269,7 @@ static void tcp_notify(struct inpcb *, int);
 struct zone    *sack_hole_zone;
 struct zone    *tcp_reass_zone;
 struct zone    *tcp_bwmeas_zone;
+struct zone    *tcp_rxt_seg_zone;
 
 extern int slowlink_wsize;     /* window correction for slow links */
 extern int path_mtu_discovery;
@@ -292,6 +322,71 @@ static lck_grp_t *tcp_uptime_mtx_grp = NULL;               /* mutex group definition */
 static lck_grp_attr_t *tcp_uptime_mtx_grp_attr = NULL; /* mutex group attributes */
 int tcp_notsent_lowat_check(struct socket *so);
 
+static aes_encrypt_ctx tfo_ctx; /* Crypto-context for TFO */
+
+void
+tcp_tfo_gen_cookie(struct inpcb *inp, u_char *out, size_t blk_size)
+{
+       u_char in[CCAES_BLOCK_SIZE];
+#if INET6
+       int isipv6 = inp->inp_vflag & INP_IPV6;
+#endif
+
+       VERIFY(blk_size == CCAES_BLOCK_SIZE);
+
+       bzero(&in[0], CCAES_BLOCK_SIZE);
+       bzero(&out[0], CCAES_BLOCK_SIZE);
+
+#if INET6
+       if (isipv6)
+               memcpy(in, &inp->in6p_faddr, sizeof(struct in6_addr));
+       else
+#endif /* INET6 */
+               memcpy(in, &inp->inp_faddr, sizeof(struct in_addr));
+
+       aes_encrypt_cbc(in, NULL, 1, out, &tfo_ctx);
+}
+
+__private_extern__ int
+tcp_sysctl_fastopenkey(__unused struct sysctl_oid *oidp, __unused void *arg1,
+    __unused int arg2, struct sysctl_req *req)
+{
+       int error = 0;
+       /* TFO-key is expressed as a string in hex format (+1 to account for \0 char) */
+       char keystring[TCP_FASTOPEN_KEYLEN * 2 + 1];
+       u_int32_t key[TCP_FASTOPEN_KEYLEN / sizeof(u_int32_t)];
+       int i;
+
+       /* -1, because newlen is len without the terminating \0 character */
+       if (req->newlen != (sizeof(keystring) - 1)) {
+               error = EINVAL;
+               goto exit;
+       }
+
+       /* sysctl_io_string copies keystring into the oldptr of the sysctl_req.
+        * Make sure everything is zero, to avoid putting garbage in there or
+        * leaking the stack.
+        */
+       bzero(keystring, sizeof(keystring));
+
+       error = sysctl_io_string(req, keystring, sizeof(keystring), 0, NULL);
+       if (error)
+               goto exit;
+
+       for (i = 0; i < (TCP_FASTOPEN_KEYLEN / sizeof(u_int32_t)); i++) {
+               /* We jump over the keystring in 8-character (4 byte in hex) steps */
+               if (sscanf(&keystring[i * 8], "%8x", &key[i]) != 1) {
+                       error = EINVAL;
+                       goto exit;
+               }
+       }
+
+       aes_encrypt_key128((u_char *)key, &tfo_ctx);
+
+exit:
+       return (error);
+}
+
 int  get_inpcb_str_size(void)
 {
        return sizeof(struct inpcb);
@@ -342,6 +437,15 @@ static int scale_to_powerof2(int size) {
        return ret;
 }
 
+static void
+tcp_tfo_init()
+{
+       u_char key[TCP_FASTOPEN_KEYLEN];
+
+       read_random(key, sizeof(key));
+       aes_encrypt_key128(key, &tfo_ctx);
+}
+
 /*
  * Tcp initialization
  */
@@ -373,6 +477,8 @@ tcp_init(struct protosw *pp, struct domain *dp)
        read_random(&tcp_now, sizeof(tcp_now));
        tcp_now = tcp_now & 0x3fffffff; /* Starts tcp internal clock at a random value */
 
+       tcp_tfo_init();
+
        LIST_INIT(&tcb);
        tcbinfo.ipi_listhead = &tcb;
 
@@ -421,6 +527,7 @@ tcp_init(struct protosw *pp, struct domain *dp)
        zone_change(tcbinfo.ipi_zone, Z_EXPAND, TRUE);
 
        tcbinfo.ipi_gc = tcp_gc;
+       tcbinfo.ipi_timer = tcp_itimer;
        in_pcbinfo_attach(&tcbinfo);
 
        str_size = P2ROUNDUP(sizeof(struct sackhole), sizeof(u_int64_t));
@@ -452,6 +559,12 @@ tcp_init(struct protosw *pp, struct domain *dp)
        zone_change(tcp_cc_zone, Z_CALLERACCT, FALSE);
        zone_change(tcp_cc_zone, Z_EXPAND, TRUE);
 
+       str_size = P2ROUNDUP(sizeof(struct tcp_rxt_seg), sizeof(u_int64_t));
+       tcp_rxt_seg_zone = zinit(str_size, 10000 * str_size, 0,
+           "tcp_rxt_seg_zone");
+       zone_change(tcp_rxt_seg_zone, Z_CALLERACCT, FALSE);
+       zone_change(tcp_rxt_seg_zone, Z_EXPAND, TRUE);
+
 #if INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
@@ -494,6 +607,9 @@ tcp_init(struct protosw *pp, struct domain *dp)
        /* Initialize TCP LRO data structures */
        tcp_lro_init();
 
+       /* Initialize TCP Cache */
+       tcp_cache_init();
+
        /*
         * If more than 60 MB of mbuf pool is available, increase the
         * maximum allowed receive and send socket buffer size.
@@ -776,9 +892,9 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 #endif
 
 #if NECP
-       necp_mark_packet_from_socket(m, tp ? tp->t_inpcb : NULL, 0);
+       necp_mark_packet_from_socket(m, tp ? tp->t_inpcb : NULL, 0, 0);
 #endif /* NECP */
-       
+
 #if IPSEC
        if (tp != NULL && tp->t_inpcb->inp_sp != NULL &&
                ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL) != 0) {
@@ -888,7 +1004,7 @@ tcp_newtcpcb(inp)
 
        calculate_tcp_clock();
 
-       if (!so->cached_in_sock_layer) {
+       if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
             it = (struct inp_tp *)(void *)inp;
             tp = &it->tcb;
        } else {
@@ -909,6 +1025,7 @@ tcp_newtcpcb(inp)
                tp->t_flagsext |= TF_SACK_ENABLE;
 
        TAILQ_INIT(&tp->snd_holes);
+       SLIST_INIT(&tp->t_rxt_segments);
        tp->t_inpcb = inp;      /* XXX */
        /*
         * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
@@ -1199,12 +1316,29 @@ no_valid_rt:
 
        /* free the reassembly queue, if any */
        (void) tcp_freeq(tp);
+       if (TCP_ECN_ENABLED(tp)) {
+               if (tp->ecn_flags & TE_RECV_ECN_CE)
+                       tcpstat.tcps_ecn_conn_recv_ce++;
+               if (tp->ecn_flags & TE_RECV_ECN_ECE)
+                       tcpstat.tcps_ecn_conn_recv_ece++;
+               if (tp->ecn_flags & (TE_RECV_ECN_CE | TE_RECV_ECN_ECE)) {
+                       if (tp->t_stat.txretransmitbytes > 0 ||
+                           tp->t_stat.rxoutoforderbytes > 0)
+                               tcpstat.tcps_ecn_conn_pl_ce++;
+                       else
+                               tcpstat.tcps_ecn_conn_nopl_ce++;
+               } else {
+                       if (tp->t_stat.txretransmitbytes > 0 ||
+                           tp->t_stat.rxoutoforderbytes > 0)
+                               tcpstat.tcps_ecn_conn_plnoce++;
+               }
+       }
 
        tcp_free_sackholes(tp);
        if (tp->t_bwmeas != NULL) {
                tcp_bwmeas_free(tp);
        }
-
+       tcp_rxtseg_clean(tp);
        /* Free the packet list */
        if (tp->t_pktlist_head != NULL)
                m_freem_list(tp->t_pktlist_head);
@@ -1220,7 +1354,7 @@ no_valid_rt:
        tp->t_mptcb = NULL;
 #endif /* MPTCP */
 
-       if (so->cached_in_sock_layer)
+       if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER)
            inp->inp_saved_ppcb = (caddr_t) tp;
 
        tp->t_state = TCPS_CLOSED;
@@ -1259,6 +1393,16 @@ no_valid_rt:
        }
        tp->tcp_cc_index = TCP_CC_ALGO_NONE;
 
+       /* Can happen if we close the socket before receiving the third ACK */
+       if ((tp->t_tfo_flags & TFO_F_COOKIE_VALID)) {
+               OSDecrementAtomic(&tcp_tfo_halfcnt);
+
+               /* Panic if something has gone terribly wrong. */
+               VERIFY(tcp_tfo_halfcnt >= 0);
+
+               tp->t_tfo_flags &= ~TFO_F_COOKIE_VALID;
+       }
+
 #if INET6
        if (SOCK_CHECK_DOM(so, PF_INET6))
                in6_pcbdetach(inp);
@@ -1292,39 +1436,44 @@ tcp_freeq(tp)
        return (rv);
 }
 
+
+/*
+ * Walk the tcpbs, if existing, and flush the reassembly queue,
+ * if there is one when do_tcpdrain is enabled
+ * Also defunct the extended background idle socket
+ * Do it next time if the pcbinfo lock is in use
+ */
 void
 tcp_drain()
 {
-       if (do_tcpdrain)
-       {
-               struct inpcb *inp;
-               struct tcpcb *tp;
-       /*
-        * Walk the tcpbs, if existing, and flush the reassembly queue,
-        * if there is one...
-        * Do it next time if the pcbinfo lock is in use
-        */
-               if (!lck_rw_try_lock_exclusive(tcbinfo.ipi_lock)) 
-                       return;
+       struct inpcb *inp;
+       struct tcpcb *tp;
 
-               LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
-                       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
-                               WNT_STOPUSING) {
-                               tcp_lock(inp->inp_socket, 1, 0);
-                               if (in_pcb_checkstate(inp, WNT_RELEASE, 1)
-                                       == WNT_STOPUSING) {
-                                       /* lost a race, try the next one */
-                                       tcp_unlock(inp->inp_socket, 1, 0);
-                                       continue;
-                               } 
-                               tp = intotcpcb(inp);
-                               tcp_freeq(tp);
+       if (!lck_rw_try_lock_exclusive(tcbinfo.ipi_lock)) 
+               return;
+
+       LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
+               if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
+                       WNT_STOPUSING) {
+                       tcp_lock(inp->inp_socket, 1, 0);
+                       if (in_pcb_checkstate(inp, WNT_RELEASE, 1)
+                               == WNT_STOPUSING) {
+                               /* lost a race, try the next one */
                                tcp_unlock(inp->inp_socket, 1, 0);
-                       }
-               }
-               lck_rw_done(tcbinfo.ipi_lock);
+                               continue;
+                       } 
+                       tp = intotcpcb(inp);
+
+                       if (do_tcpdrain)        
+                               tcp_freeq(tp);
 
+                       so_drain_extended_bk_idle(inp->inp_socket);
+
+                       tcp_unlock(inp->inp_socket, 1, 0);
+               }
        }
+       lck_rw_done(tcbinfo.ipi_lock);
+
 }
 
 /*
@@ -1813,6 +1962,9 @@ tcp_ctlinput(cmd, sa, vip)
        if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
                return;
 
+       if ((unsigned)cmd >= PRC_NCMDS)
+               return;
+
        if (cmd == PRC_MSGSIZE)
                notify = tcp_mtudisc;
        else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
@@ -1826,7 +1978,7 @@ tcp_ctlinput(cmd, sa, vip)
        /* Source quench is deprecated */
        else if (cmd == PRC_QUENCH) 
                return;
-       else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)
+       else if (inetctlerrmap[cmd] == 0)
                return;
        if (ip) {
                struct tcphdr th;
@@ -1931,10 +2083,12 @@ tcp6_ctlinput(cmd, sa, d)
            sa->sa_len != sizeof(struct sockaddr_in6))
                return;
 
+       if ((unsigned)cmd >= PRC_NCMDS)
+               return;
+
        if (cmd == PRC_MSGSIZE)
                notify = tcp_mtudisc;
-       else if (!PRC_IS_REDIRECT(cmd) &&
-                ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
+       else if (!PRC_IS_REDIRECT(cmd) && (inet6ctlerrmap[cmd] == 0))
                return;
        /* Source quench is deprecated */
        else if (cmd == PRC_QUENCH) 
@@ -2117,6 +2271,7 @@ tcp_mtudisc(
        struct socket *so = inp->inp_socket;
        int offered;
        int mss;
+       u_int32_t mtu;
 #if INET6
        int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
@@ -2142,7 +2297,17 @@ tcp_mtudisc(
                }
                taop = rmx_taop(rt->rt_rmx);
                offered = taop->tao_mssopt;
-               mss = rt->rt_rmx.rmx_mtu -
+               mtu = rt->rt_rmx.rmx_mtu;
+
+               /* Route locked during lookup above */
+               RT_UNLOCK(rt);
+
+#if NECP
+               // Adjust MTU if necessary.
+               mtu = necp_socket_get_effective_mtu(inp, mtu);
+#endif /* NECP */
+
+               mss = mtu -
 #if INET6
                        (isipv6 ?
                         sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
@@ -2153,9 +2318,6 @@ tcp_mtudisc(
 #endif /* INET6 */
                        ;
 
-               /* Route locked during lookup above */
-               RT_UNLOCK(rt);
-
                if (offered)
                        mss = min(mss, offered);
                /*
@@ -2592,14 +2754,16 @@ tcp_getlock(
        }
 }
 
-/* Determine if we can grow the recieve socket buffer to avoid sending
+/* 
+ * Determine if we can grow the recieve socket buffer to avoid sending
  * a zero window update to the peer. We allow even socket buffers that 
  * have fixed size (set by the application) to grow if the resource
  * constraints are met. They will also be trimmed after the application
  * reads data.
  */
 static void
-tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) {
+tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb)
+{
        u_int32_t rcvbufinc = tp->t_maxseg << 4;
        u_int32_t rcvbuf = sb->sb_hiwat;
        struct socket *so = tp->t_inpcb->inp_socket;
@@ -2614,6 +2778,7 @@ tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) {
        if (tcp_do_autorcvbuf == 1 &&
                tcp_cansbgrow(sb) &&
                (tp->t_flags & TF_SLOWLINK) == 0 &&
+               (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 &&
                (rcvbuf - sb->sb_cc) < rcvbufinc &&
                rcvbuf < tcp_autorcvbuf_max &&
                (sb->sb_idealsize > 0 &&
@@ -2783,6 +2948,10 @@ calculate_tcp_clock()
 void
 tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so) {
        u_int32_t maxsockbufsize;
+       if (!tcp_do_rfc1323) {
+               tp->request_r_scale = 0;
+               return;
+       }
 
        tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale);
        maxsockbufsize = ((so->so_rcv.sb_flags & SB_USRSIZE) != 0) ?
@@ -2827,5 +2996,197 @@ tcp_notsent_lowat_check(struct socket *so) {
        return(0);
 }
 
+void
+tcp_rxtseg_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end) {
+       struct tcp_rxt_seg *rxseg = NULL, *prev = NULL, *next = NULL;
+       u_int32_t rxcount = 0;
+
+       if (SLIST_EMPTY(&tp->t_rxt_segments))
+               tp->t_dsack_lastuna = tp->snd_una;
+       /*
+        * First check if there is a segment already existing for this
+        * sequence space.
+        */
+
+       SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+               if (SEQ_GT(rxseg->rx_start, start))
+                       break;
+               prev = rxseg;
+       }
+       next = rxseg;
+
+       /* check if prev seg is for this sequence */
+       if (prev != NULL && SEQ_LEQ(prev->rx_start, start) &&
+           SEQ_GEQ(prev->rx_end, end)) {
+               prev->rx_count++;
+               return;
+       }
+
+       /*
+        * There are a couple of possibilities at this point.
+        * 1. prev overlaps with the beginning of this sequence
+        * 2. next overlaps with the end of this sequence
+        * 3. there is no overlap.
+        */
+
+       if (prev != NULL && SEQ_GT(prev->rx_end, start)) {
+               if (prev->rx_start == start && SEQ_GT(end, prev->rx_end)) {
+                       start = prev->rx_end + 1;
+                       prev->rx_count++;
+               } else {
+                       prev->rx_end = (start - 1);
+                       rxcount = prev->rx_count;
+               }
+       }
+
+       if (next != NULL && SEQ_LT(next->rx_start, end)) {
+               if (SEQ_LEQ(next->rx_end, end)) {
+                       end = next->rx_start - 1;
+                       next->rx_count++;
+               } else {
+                       next->rx_start = end + 1;
+                       rxcount = next->rx_count;
+               }
+       }
+       if (!SEQ_LT(start, end))
+               return;
+
+       rxseg = (struct tcp_rxt_seg *) zalloc(tcp_rxt_seg_zone);
+       if (rxseg == NULL) {
+               return;
+       }
+       bzero(rxseg, sizeof(*rxseg));
+       rxseg->rx_start = start;
+       rxseg->rx_end = end;
+       rxseg->rx_count = rxcount + 1;
+
+       if (prev != NULL) {
+               SLIST_INSERT_AFTER(prev, rxseg, rx_link);
+       } else {
+               SLIST_INSERT_HEAD(&tp->t_rxt_segments, rxseg, rx_link);
+       }
+       return;
+}
+
+struct tcp_rxt_seg *
+tcp_rxtseg_find(struct tcpcb *tp, tcp_seq start, tcp_seq end)
+{
+       struct tcp_rxt_seg *rxseg;
+       if (SLIST_EMPTY(&tp->t_rxt_segments))
+               return (NULL);
+
+       SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+               if (SEQ_LEQ(rxseg->rx_start, start) &&
+                   SEQ_GEQ(rxseg->rx_end, end))
+                       return (rxseg);
+               if (SEQ_GT(rxseg->rx_start, start))
+                       break;
+       }
+       return (NULL);
+}
+
+void
+tcp_rxtseg_clean(struct tcpcb *tp)
+{
+       struct tcp_rxt_seg *rxseg, *next;
+
+       SLIST_FOREACH_SAFE(rxseg, &tp->t_rxt_segments, rx_link, next) {
+               SLIST_REMOVE(&tp->t_rxt_segments, rxseg,
+                   tcp_rxt_seg, rx_link);
+               zfree(tcp_rxt_seg_zone, rxseg);
+       }
+       tp->t_dsack_lastuna = tp->snd_max;
+}
+
+boolean_t
+tcp_rxtseg_detect_bad_rexmt(struct tcpcb *tp, tcp_seq th_ack)
+{
+       boolean_t bad_rexmt;
+       struct tcp_rxt_seg *rxseg;
+
+       if (SLIST_EMPTY(&tp->t_rxt_segments))
+               return (FALSE);
+
+       /*
+        * If all of the segments in this window are not cumulatively
+        * acknowledged, then there can still be undetected packet loss.
+        * Do not restore congestion window in that case.
+        */
+       if (SEQ_LT(th_ack, tp->snd_recover))
+               return (FALSE);
+
+       bad_rexmt = TRUE;
+       SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+               if (rxseg->rx_count > 1 ||
+                   !(rxseg->rx_flags & TCP_RXT_SPURIOUS)) {
+                       bad_rexmt = FALSE;
+                       break;
+               }
+       }
+       return (bad_rexmt);
+}
+
+boolean_t
+tcp_rxtseg_dsack_for_tlp(struct tcpcb *tp)
+{
+       boolean_t dsack_for_tlp = FALSE;
+       struct tcp_rxt_seg *rxseg;
+       if (SLIST_EMPTY(&tp->t_rxt_segments))
+               return (FALSE);
+
+       SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+               if (rxseg->rx_count == 1 &&
+                   SLIST_NEXT(rxseg,rx_link) == NULL &&
+                   (rxseg->rx_flags & TCP_RXT_DSACK_FOR_TLP)) {
+                       dsack_for_tlp = TRUE;
+                       break;
+               }
+       }
+       return (dsack_for_tlp);
+}
+
+u_int32_t
+tcp_rxtseg_total_size(struct tcpcb *tp) {
+       struct tcp_rxt_seg *rxseg;
+       u_int32_t total_size = 0;
+
+       SLIST_FOREACH(rxseg, &tp->t_rxt_segments, rx_link) {
+               total_size += (rxseg->rx_end - rxseg->rx_start) + 1;
+       }
+       return (total_size);
+}
+
+void
+tcp_get_connectivity_status(struct tcpcb *tp,
+       struct tcp_conn_status *connstatus)
+{
+       if (tp == NULL || connstatus == NULL)
+               return;
+       bzero(connstatus, sizeof(*connstatus));
+       if (tp->t_rxtshift >= TCP_CONNECTIVITY_PROBES_MAX) {
+               if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+                       connstatus->write_probe_failed = 1;
+               } else {
+                       connstatus->conn_probe_failed = 1;
+               }
+       }
+       if (tp->t_rtimo_probes >= TCP_CONNECTIVITY_PROBES_MAX)
+               connstatus->read_probe_failed = 1;
+       if (tp->t_inpcb != NULL && tp->t_inpcb->inp_last_outifp != NULL
+           && (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_PROBE_CONNECTIVITY))
+               connstatus->probe_activated = 1;
+       return;
+}
+
+boolean_t
+tfo_enabled(const struct tcpcb *tp)
+{
+       return !!(tp->t_flagsext & TF_FASTOPEN);
+}
+
+void
+tcp_disable_tfo(struct tcpcb *tp)
+{
+       tp->t_flagsext &= ~TF_FASTOPEN;
+}
 
-/* DSEP Review Done pl-20051213-v02 @3253,@3391,@3400 */
index aa2317164e82fc8f0e50e31efcc4526f88dda69f..046163f7bb9df624b0f35301c1c14012a14a693f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -89,6 +89,7 @@
 #endif
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
+#include <netinet/tcp_cache.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 /* Max number of times a stretch ack can be delayed on a connection */
 #define        TCP_STRETCHACK_DELAY_THRESHOLD  5
 
+/*
+ * If the host processor has been sleeping for too long, this is the threshold
+ * used to avoid sending stale retransmissions.
+ */
+#define        TCP_SLEEP_TOO_LONG      (10 * 60 * 1000) /* 10 minutes in ms */
+
 /* tcp timer list */
 struct tcptimerlist tcp_timer_list;
 
@@ -220,17 +227,9 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_fastmode_idlemax,
  * SYN retransmits.  Setting it to 0 disables the dropping off of those
  * two options.
  */
-static int tcp_broken_peer_syn_rxmit_thres = 7;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres,
-    CTLFLAG_RW | CTLFLAG_LOCKED,
-    &tcp_broken_peer_syn_rxmit_thres, 0, 
-    "Number of retransmitted SYNs before "
-    "TCP disables rfc1323 and rfc1644 during the rest of attempts");
-
-/* A higher threshold on local connections for disabling RFC 1323 options */
-static int tcp_broken_peer_syn_rxmit_thres_local = 10;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rexmit_thres_local, 
-    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres_local, 0,
+static int tcp_broken_peer_syn_rxmit_thres = 10;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rexmit_thres,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres, 0,
     "Number of retransmitted SYNs before disabling RFC 1323 "
     "options on local connections");
 
@@ -254,6 +253,14 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
     CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_pmtud_black_hole_mss, 0,
     "Path MTU Discovery Black Hole Detection lowered MSS");
 
+#define        TCP_REPORT_STATS_INTERVAL       43200 /* 12 hours, in seconds */
+int tcp_report_stats_interval = TCP_REPORT_STATS_INTERVAL;
+#if (DEVELOPMENT || DEBUG)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, report_stats_interval,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_report_stats_interval, 0,
+    "Report stats interval");
+#endif /* (DEVELOPMENT || DEBUG) */
+
 /* performed garbage collection of "used" sockets */
 static boolean_t tcp_gc_done = FALSE;
 
@@ -274,28 +281,66 @@ int       tcp_delack = TCP_RETRANSHZ / 10;
 int    tcp_jack_rxmt = TCP_RETRANSHZ / 2;
 #endif /* MPTCP */
 
+static boolean_t tcp_itimer_done = FALSE;
+
 static void tcp_remove_timer(struct tcpcb *tp);
 static void tcp_sched_timerlist(uint32_t offset);
-static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode);
+static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode,
+    u_int16_t probe_if_index);
 static void tcp_sched_timers(struct tcpcb *tp);
 static inline void tcp_set_lotimer_index(struct tcpcb *);
-static void tcp_rexmt_save_state(struct tcpcb *tp);
 __private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp);
 __private_extern__ void tcp_report_stats(void);
 
 /*
- * Macro to compare two timers. If there is a reset of the sign bit, 
- * it is safe to assume that the timer has wrapped around. By doing 
- * signed comparision, we take care of wrap around such that the value 
+ * Macro to compare two timers. If there is a reset of the sign bit,
+ * it is safe to assume that the timer has wrapped around. By doing
+ * signed comparision, we take care of wrap around such that the value
  * with the sign bit reset is actually ahead of the other.
  */
 inline int32_t
-timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) { 
+timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) {
        return (int32_t)((t1 + toff1) - (t2 + toff2));
 };
 
 static u_int64_t tcp_last_report_time;
-#define        TCP_REPORT_STATS_INTERVAL       345600 /* 4 days, in seconds */
+
+/*
+ * Structure to store previously reported stats so that we can send
+ * incremental changes in each report interval.
+ */
+struct tcp_last_report_stats {
+       u_int32_t       tcps_connattempt;
+       u_int32_t       tcps_accepts;
+       u_int32_t       tcps_ecn_client_setup;
+       u_int32_t       tcps_ecn_server_setup;
+       u_int32_t       tcps_ecn_client_success;
+       u_int32_t       tcps_ecn_server_success;
+       u_int32_t       tcps_ecn_not_supported;
+       u_int32_t       tcps_ecn_lost_syn;
+       u_int32_t       tcps_ecn_lost_synack;
+       u_int32_t       tcps_ecn_recv_ce;
+       u_int32_t       tcps_ecn_recv_ece;
+       u_int32_t       tcps_ecn_sent_ece;
+       u_int32_t       tcps_ecn_conn_recv_ce;
+       u_int32_t       tcps_ecn_conn_recv_ece;
+       u_int32_t       tcps_ecn_conn_plnoce;
+       u_int32_t       tcps_ecn_conn_pl_ce;
+       u_int32_t       tcps_ecn_conn_nopl_ce;
+
+       /* TFO-related statistics */
+       u_int32_t       tcps_tfo_syn_data_rcv;
+       u_int32_t       tcps_tfo_cookie_req_rcv;
+       u_int32_t       tcps_tfo_cookie_sent;
+       u_int32_t       tcps_tfo_cookie_invalid;
+       u_int32_t       tcps_tfo_cookie_req;
+       u_int32_t       tcps_tfo_cookie_rcv;
+       u_int32_t       tcps_tfo_syn_data_sent;
+       u_int32_t       tcps_tfo_syn_data_acked;
+       u_int32_t       tcps_tfo_syn_loss;
+       u_int32_t       tcps_tfo_blackhole;
+};
+
 
 /* Returns true if the timer is on the timer list */
 #define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST)
@@ -350,6 +395,9 @@ add_to_time_wait(struct tcpcb *tp, uint32_t delay)
        if (tp->t_inpcb->inp_socket->so_options & SO_NOWAKEFROMSLEEP)
                socket_post_kev_msg_closed(tp->t_inpcb->inp_socket);
 
+       /* 19182803: Notify nstat that connection is closing before waiting. */
+       nstat_pcb_detach(tp->t_inpcb);
+
        if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
                tcp_unlock(tp->t_inpcb->inp_socket, 0, 0);
                lck_rw_lock_exclusive(pcbinfo->ipi_lock);
@@ -592,7 +640,7 @@ int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
 
 static int tcp_totbackoff = 511;       /* sum of tcp_backoff[] */
 
-static void tcp_rexmt_save_state(struct tcpcb *tp)
+void tcp_rexmt_save_state(struct tcpcb *tp)
 {
        u_int32_t fsize;
        if (TSTMP_SUPPORTED(tp)) {
@@ -669,6 +717,8 @@ tcp_timers(tp, timer)
 #if INET6
        int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0;
 #endif /* INET6 */
+       u_int64_t accsleep_ms;
+       u_int32_t last_sleep_ms = 0;
 
        so = tp->t_inpcb->inp_socket;
        idle_time = tcp_now - tp->t_rcvtime;
@@ -702,6 +752,9 @@ tcp_timers(tp, timer)
         * to a longer retransmit interval and retransmit one segment.
         */
        case TCPT_REXMT:
+               accsleep_ms = mach_absolutetime_asleep / 1000000UL;
+               if (accsleep_ms > tp->t_accsleep_ms)
+                       last_sleep_ms = accsleep_ms - tp->t_accsleep_ms;
                /*
                 * Drop a connection in the retransmit timer
                 * 1. If we have retransmitted more than TCP_MAXRXTSHIFT
@@ -714,14 +767,15 @@ tcp_timers(tp, timer)
                 * receiving an ack
                 */
                if (++tp->t_rxtshift > TCP_MAXRXTSHIFT ||
-                   (tp->t_rxt_conndroptime > 0 
-                   && tp->t_rxtstart > 0 && 
-                   (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime)
-                   || ((tp->t_flagsext & TF_RXTFINDROP) != 0 &&
-                       (tp->t_flags & TF_SENTFIN) != 0 &&
-                       tp->t_rxtshift >= 4)) {
+                   (tp->t_rxt_conndroptime > 0 && tp->t_rxtstart > 0 &&
+                   (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime) ||
+                   ((tp->t_flagsext & TF_RXTFINDROP) != 0 &&
+                   (tp->t_flags & TF_SENTFIN) != 0 && tp->t_rxtshift >= 4) ||
+                   (tp->t_rxtshift > 4 && last_sleep_ms >= TCP_SLEEP_TOO_LONG)) {
                        if ((tp->t_flagsext & TF_RXTFINDROP) != 0) {
                                tcpstat.tcps_rxtfindrop++;
+                       } else if (last_sleep_ms >= TCP_SLEEP_TOO_LONG) {
+                               tcpstat.tcps_drop_after_sleep++;
                        } else {
                                tcpstat.tcps_timeoutdrop++;
                        }
@@ -736,6 +790,7 @@ tcp_timers(tp, timer)
                }
 
                tcpstat.tcps_rexmttimeo++;
+               tp->t_accsleep_ms = accsleep_ms;
 
                if (tp->t_rxtshift == 1 && 
                        tp->t_state == TCPS_ESTABLISHED) {
@@ -788,9 +843,41 @@ tcp_timers(tp, timer)
                        tp->t_flagsext &= ~(TF_DELAY_RECOVERY);
                }
 
+               if (tp->t_state == TCPS_SYN_RECEIVED)
+                       tcp_disable_tfo(tp);
+
+               if ((tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) &&
+                   !(tp->t_tfo_flags & TFO_F_NO_SNDPROBING) &&
+                   ((tp->t_state != TCPS_SYN_SENT && tp->t_rxtshift > 1) ||
+                    tp->t_rxtshift > 2)) {
+                       /*
+                        * For regular retransmissions, a first one is being
+                        * done for tail-loss probe.
+                        * Thus, if rxtshift > 1, this means we have sent the segment
+                        * a total of 3 times.
+                        *
+                        * If we are in SYN-SENT state, then there is no tail-loss
+                        * probe thus we have to let rxtshift go up to 3.
+                        */
+                       tcp_heuristic_tfo_middlebox(tp);
+
+                       so->so_error = ENODATA;
+                       sorwakeup(so);
+                       sowwakeup(so);
+               }
+
                if (tp->t_state == TCPS_SYN_SENT) {
                        rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
                        tp->t_stat.synrxtshift = tp->t_rxtshift;
+
+                       /* When retransmitting, disable TFO */
+                       if (tfo_enabled(tp)) {
+                               tp->t_flagsext &= ~TF_FASTOPEN;
+                               tp->t_tfo_flags |= TFO_F_SYN_LOSS;
+
+                               tp->t_tfo_stats |= TFO_S_SYN_LOSS;
+                               tcpstat.tcps_tfo_syn_loss++;
+                       }
                } else {
                        rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
                }
@@ -810,9 +897,10 @@ tcp_timers(tp, timer)
                if (tcp_pmtud_black_hole_detect &&
                        !(tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) &&
                        (tp->t_state == TCPS_ESTABLISHED)) {
-                       if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT))
-                           == (TF_PMTUD|TF_MAXSEGSNT)) &&
-                                (tp->t_rxtshift == 2)) {
+                       if ((tp->t_flags & TF_PMTUD) &&
+                           ((tp->t_flags & TF_MAXSEGSNT)
+                           || tp->t_pmtud_lastseg_size > tcp_pmtud_black_hole_mss) &&
+                           tp->t_rxtshift == 2) {
                                /* 
                                 * Enter Path MTU Black-hole Detection mechanism:
                                 * - Disable Path MTU Discovery (IP "DF" bit).
@@ -874,10 +962,7 @@ tcp_timers(tp, timer)
                 * Do this only on non-local connections.
                 */
                if (tp->t_state == TCPS_SYN_SENT &&
-                   ((!(tp->t_flags & TF_LOCAL) &&
-                   tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres) ||
-                   ((tp->t_flags & TF_LOCAL) && 
-                   tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres_local)))
+                   tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres)
                        tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC);
 
                /*
@@ -923,17 +1008,23 @@ tcp_timers(tp, timer)
                 * least once, the value of ssthresh is held constant
                 */
                if (tp->t_rxtshift == 1 && 
-                       CC_ALGO(tp)->after_timeout != NULL)
+                   CC_ALGO(tp)->after_timeout != NULL) {
                        CC_ALGO(tp)->after_timeout(tp);
+                       /*
+                        * CWR notifications are to be sent on new data
+                        * right after Fast Retransmits and ECE
+                        * notification receipts.
+                        */
+                       if (TCP_ECN_ENABLED(tp))
+                               tp->ecn_flags |= TE_SENDCWR;
+               }
 
                EXIT_FASTRECOVERY(tp);
 
-               /* CWR notifications are to be sent on new data right after
-                * RTOs, Fast Retransmits and ECE notification receipts.
-                */
-               if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) {
-                       tp->ecn_flags |= TE_SENDCWR;
-               }
+               /* Exit cwnd non validated phase */
+               tp->t_flagsext &= ~TF_CWND_NONVALIDATED;
+
+
 fc_output:
                tcp_ccdbg_trace(tp, NULL, TCP_CC_REXMT_TIMEOUT);
 
@@ -999,7 +1090,8 @@ fc_output:
                        goto dropit;
                if ((always_keepalive ||
                    (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ||
-                   (tp->t_flagsext & TF_DETECT_READSTALL)) &&
+                   (tp->t_flagsext & TF_DETECT_READSTALL) ||
+                   (tp->t_tfo_probe_state == TFO_PROBE_PROBING)) &&
                    (tp->t_state <= TCPS_CLOSING || tp->t_state == TCPS_FIN_WAIT_2)) {
                        if (idle_time >= TCP_CONN_KEEPIDLE(tp) + TCP_CONN_MAXIDLE(tp))
                                goto dropit;
@@ -1037,12 +1129,14 @@ fc_output:
                                        tp->t_rtimo_probes++;
                        }
                        tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
-                               TCP_CONN_KEEPINTVL(tp));
+                           TCP_CONN_KEEPINTVL(tp));
                } else {
                        tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
-                               TCP_CONN_KEEPIDLE(tp));
+                           TCP_CONN_KEEPIDLE(tp));
                }
                if (tp->t_flagsext & TF_DETECT_READSTALL) {
+                       struct ifnet *outifp = tp->t_inpcb->inp_last_outifp;
+                       bool reenable_probe = false;
                        /* 
                         * The keep alive packets sent to detect a read
                         * stall did not get a response from the 
@@ -1050,17 +1144,54 @@ fc_output:
                         * If the number of probes sent reaches the limit,
                         * generate an event.
                         */
-                       if (tp->t_rtimo_probes > tp->t_adaptive_rtimo) {
-                               /* Generate an event */
-                               soevent(so,
-                                       (SO_FILT_HINT_LOCKED|
-                                       SO_FILT_HINT_ADAPTIVE_RTIMO));
-                               tcp_keepalive_reset(tp);
+                       if (tp->t_adaptive_rtimo > 0) {
+                               if (tp->t_rtimo_probes > tp->t_adaptive_rtimo) {
+                                       /* Generate an event */
+                                       soevent(so,
+                                           (SO_FILT_HINT_LOCKED |
+                                           SO_FILT_HINT_ADAPTIVE_RTIMO));
+                                       tcp_keepalive_reset(tp);
+                               } else {
+                                       reenable_probe = true;
+                               }
+                       } else if (outifp != NULL &&
+                           (outifp->if_eflags & IFEF_PROBE_CONNECTIVITY) &&
+                           tp->t_rtimo_probes <= TCP_CONNECTIVITY_PROBES_MAX) {
+                               reenable_probe = true;
                        } else {
+                               tp->t_flagsext &= ~TF_DETECT_READSTALL;
+                       }
+                       if (reenable_probe) {
+                               int ind = min(tp->t_rtimo_probes,
+                                   TCP_MAXRXTSHIFT);
                                tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(
-                                       tp, TCP_REXMTVAL(tp));
+                                   tp, tcp_backoff[ind] * TCP_REXMTVAL(tp));
                        }
                }
+               if (tp->t_tfo_probe_state == TFO_PROBE_PROBING) {
+                       int ind;
+
+                       tp->t_tfo_probes++;
+                       ind = min(tp->t_tfo_probes, TCP_MAXRXTSHIFT);
+
+                       /*
+                        * We take the minimum among the time set by true
+                        * keepalive (see above) and the backoff'd RTO. That
+                        * way we backoff in case of packet-loss but will never
+                        * timeout slower than regular keepalive due to the
+                        * backing off.
+                        */
+                       tp->t_timer[TCPT_KEEP] = min(OFFSET_FROM_START(
+                           tp, tcp_backoff[ind] * TCP_REXMTVAL(tp)),
+                           tp->t_timer[TCPT_KEEP]);
+               } else if (tp->t_tfo_probe_state == TFO_PROBE_WAIT_DATA) {
+                       /* Still no data! Let's assume a TFO-error and err out... */
+                       tcp_heuristic_tfo_middlebox(tp);
+
+                       so->so_error = ENODATA;
+                       sorwakeup(so);
+                       tcpstat.tcps_tfo_blackhole++;
+               }
                break;
        case TCPT_DELACK:
                if (tcp_delack_enabled && (tp->t_flags & TF_DELACK)) {
@@ -1138,10 +1269,7 @@ fc_output:
 
        case TCPT_PTO:
        {
-               tcp_seq old_snd_nxt;
                int32_t snd_len;
-               boolean_t rescue_rxt = FALSE;
-
                tp->t_flagsext &= ~(TF_SENT_TLPROBE);
 
                /*
@@ -1149,50 +1277,34 @@ fc_output:
                 * send a probe
                 */
                if (tp->t_state != TCPS_ESTABLISHED ||
-                   tp->t_rxtshift > 0 || tp->snd_max == tp->snd_una ||
-                   !SACK_ENABLED(tp) || TAILQ_EMPTY(&tp->snd_holes) ||
-                   (IN_FASTRECOVERY(tp) &&
-                   (SEQ_GEQ(tp->snd_fack, tp->snd_recover) ||
-                   SEQ_GT(tp->snd_nxt, tp->sack_newdata))))
+                   (tp->t_rxtshift > 0 && !(tp->t_flagsext & TF_PROBING))
+                   || tp->snd_max == tp->snd_una ||
+                   !SACK_ENABLED(tp) || !TAILQ_EMPTY(&tp->snd_holes) ||
+                   IN_FASTRECOVERY(tp))
                        break;
 
+               /*
+                * If there is no new data to send or if the
+                * connection is limited by receive window then
+                * retransmit the last segment, otherwise send
+                * new data.
+                */
+               snd_len = min(so->so_snd.sb_cc, tp->snd_wnd)
+                   - (tp->snd_max - tp->snd_una);
+               if (snd_len > 0) {
+                       tp->snd_nxt = tp->snd_max;
+               } else {
+                       snd_len = min((tp->snd_max - tp->snd_una),
+                           tp->t_maxseg);
+                       tp->snd_nxt = tp->snd_max - snd_len;
+               }
+
                tcpstat.tcps_pto++;
+               if (tp->t_flagsext & TF_PROBING)
+                       tcpstat.tcps_probe_if++;
 
                /* If timing a segment in this window, stop the timer */
                tp->t_rtttime = 0;
-
-               if (IN_FASTRECOVERY(tp)) {
-                       /*
-                        * Send a probe to detect tail loss in a
-                        * recovery window when the connection is in
-                        * fast_recovery.
-                        */
-                       old_snd_nxt = tp->snd_nxt;
-                       rescue_rxt = TRUE;
-                       VERIFY(SEQ_GEQ(tp->snd_fack, tp->snd_una));
-                       snd_len = min((tp->snd_recover - tp->snd_fack),
-                           tp->t_maxseg);
-                       tp->snd_nxt = tp->snd_recover - snd_len;
-                       tcpstat.tcps_pto_in_recovery++;
-                       tcp_ccdbg_trace(tp, NULL, TCP_CC_TLP_IN_FASTRECOVERY);
-               } else {
-                       /*
-                        * If there is no new data to send or if the
-                        * connection is limited by receive window then
-                        * retransmit the last segment, otherwise send
-                        * new data.
-                        */
-                       snd_len = min(so->so_snd.sb_cc, tp->snd_wnd)
-                           - (tp->snd_max - tp->snd_una);
-                       if (snd_len > 0) {
-                               tp->snd_nxt = tp->snd_max;
-                       } else {
-                               snd_len = min((tp->snd_max - tp->snd_una),
-                                   tp->t_maxseg);
-                               tp->snd_nxt = tp->snd_max - snd_len;
-                       }
-               }
-
                /* Note that tail loss probe is being sent */
                tp->t_flagsext |= TF_SENT_TLPROBE;
                tp->t_tlpstart = tcp_now;
@@ -1202,14 +1314,6 @@ fc_output:
                tp->snd_cwnd -= tp->t_maxseg;
 
                tp->t_tlphighrxt = tp->snd_nxt;
-
-               /*
-                * If a tail loss probe was sent after entering recovery,
-                * restore the old snd_nxt value so that other packets
-                * will get retransmitted correctly.
-                */
-               if (rescue_rxt)
-                       tp->snd_nxt = old_snd_nxt;
                break;
        }
        case TCPT_DELAYFR:
@@ -1227,11 +1331,13 @@ fc_output:
                        break;
 
                VERIFY(SACK_ENABLED(tp));
-               if (CC_ALGO(tp)->pre_fr != NULL)
+               tcp_rexmt_save_state(tp);
+               if (CC_ALGO(tp)->pre_fr != NULL) {
                        CC_ALGO(tp)->pre_fr(tp);
+                       if (TCP_ECN_ENABLED(tp))
+                               tp->ecn_flags |= TE_SENDCWR;
+               }
                ENTER_FASTRECOVERY(tp);
-               if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON)
-                       tp->ecn_flags |= TE_SENDCWR;
 
                tp->t_timer[TCPT_REXMT] = 0;
                tcpstat.tcps_sack_recovery_episode++;
@@ -1332,7 +1438,6 @@ need_to_resched_timerlist(u_int32_t runtime, u_int16_t mode)
 void
 tcp_sched_timerlist(uint32_t offset) 
 {
-
        uint64_t deadline = 0;
        struct tcptimerlist *listp = &tcp_timer_list;
 
@@ -1361,8 +1466,9 @@ tcp_sched_timerlist(uint32_t offset)
  * timers for this connection.
  */
 u_int32_t
-tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode) {
-
+tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode,
+       u_int16_t probe_if_index)
+{
        struct socket *so;
        u_int16_t i = 0, index = TCPT_NONE, lo_index = TCPT_NONE;
        u_int32_t timer_val, offset = 0, lo_timer = 0;
@@ -1390,6 +1496,18 @@ tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *te_mode) {
                goto done;
        }
 
+       /*
+        * If this connection is over an interface that needs to
+        * be probed, send probe packets to reinitiate communication.
+        */
+       if (probe_if_index > 0 && tp->t_inpcb->inp_last_outifp != NULL &&
+           tp->t_inpcb->inp_last_outifp->if_index == probe_if_index) {
+               tp->t_flagsext |= TF_PROBING;
+               tcp_timers(tp, TCPT_PTO);
+               tp->t_timer[TCPT_PTO] = 0;
+               tp->t_flagsext &= TF_PROBING;
+       }
+
        /*
         * Since the timer thread needs to wait for tcp lock, it may race
         * with another thread that can cancel or reschedule the timer
@@ -1551,7 +1669,8 @@ tcp_run_timerlist(void * arg1, void * arg2) {
 
                lck_mtx_unlock(listp->mtx);
 
-               offset = tcp_run_conn_timer(tp, &te_mode);
+               offset = tcp_run_conn_timer(tp, &te_mode,
+                   listp->probe_if_index);
                
                lck_mtx_lock(listp->mtx);
 
@@ -1617,12 +1736,13 @@ tcp_run_timerlist(void * arg1, void * arg2) {
        listp->running = FALSE;
        listp->pref_mode = 0;
        listp->pref_offset = 0;
+       listp->probe_if_index = 0;
 
        lck_mtx_unlock(listp->mtx);
 }
 
 /*
- * Function to check if the timerlist needs to be reschduled to run this
+ * Function to check if the timerlist needs to be rescheduled to run this
  * connection's timers correctly.
  */
 void 
@@ -1745,7 +1865,8 @@ done:
 }
                
 static inline void
-tcp_set_lotimer_index(struct tcpcb *tp) {
+tcp_set_lotimer_index(struct tcpcb *tp)
+{
        uint16_t i, lo_index = TCPT_NONE, mode = 0;
        uint32_t lo_timer = 0;
        for (i = 0; i < TCPT_NTIMERS; ++i) {
@@ -1770,8 +1891,8 @@ tcp_set_lotimer_index(struct tcpcb *tp) {
 }
 
 void
-tcp_check_timer_state(struct tcpcb *tp) {
-
+tcp_check_timer_state(struct tcpcb *tp)
+{
        lck_mtx_assert(&tp->t_inpcb->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 
        if (tp->t_inpcb->inp_flags2 & INP2_TIMEWAIT)
@@ -1783,6 +1904,19 @@ tcp_check_timer_state(struct tcpcb *tp) {
        return;
 }
 
+static inline void
+tcp_cumulative_stat(u_int32_t cur, u_int32_t *prev, u_int32_t *dest)
+{
+       /* handle wrap around */
+       int32_t diff = (int32_t) (cur - *prev);
+       if (diff > 0)
+               *dest = diff;
+       else
+               *dest = 0;
+       *prev = cur;
+       return;
+}
+
 __private_extern__ void
 tcp_report_stats(void)
 {
@@ -1790,11 +1924,12 @@ tcp_report_stats(void)
        struct sockaddr_in dst;
        struct sockaddr_in6 dst6;
        struct rtentry *rt = NULL;
+       static struct tcp_last_report_stats prev;
        u_int64_t var, uptime;  
 
 #define        stat    data.u.tcp_stats
        if (((uptime = net_uptime()) - tcp_last_report_time) <
-               TCP_REPORT_STATS_INTERVAL)
+               tcp_report_stats_interval)
                return;
 
        tcp_last_report_time = uptime;
@@ -1869,7 +2004,274 @@ tcp_report_stats(void)
                        (var * 100) / tcpstat.tcps_sndpack;
        }
 
+       if (tcp_ecn_outbound == 1)
+               stat.ecn_client_enabled = 1;
+       if (tcp_ecn_inbound == 1)
+               stat.ecn_server_enabled = 1;
+       tcp_cumulative_stat(tcpstat.tcps_connattempt,
+           &prev.tcps_connattempt, &stat.connection_attempts);
+       tcp_cumulative_stat(tcpstat.tcps_accepts,
+           &prev.tcps_accepts, &stat.connection_accepts);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_client_setup,
+           &prev.tcps_ecn_client_setup, &stat.ecn_client_setup);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_server_setup,
+           &prev.tcps_ecn_server_setup, &stat.ecn_server_setup);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_client_success,
+           &prev.tcps_ecn_client_success, &stat.ecn_client_success);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_server_success,
+           &prev.tcps_ecn_server_success, &stat.ecn_server_success);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_not_supported,
+           &prev.tcps_ecn_not_supported, &stat.ecn_not_supported);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_lost_syn,
+           &prev.tcps_ecn_lost_syn, &stat.ecn_lost_syn);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_lost_synack,
+           &prev.tcps_ecn_lost_synack, &stat.ecn_lost_synack);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_recv_ce,
+           &prev.tcps_ecn_recv_ce, &stat.ecn_recv_ce);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_recv_ece,
+           &prev.tcps_ecn_recv_ece, &stat.ecn_recv_ece);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_recv_ece,
+           &prev.tcps_ecn_recv_ece, &stat.ecn_recv_ece);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_sent_ece,
+           &prev.tcps_ecn_sent_ece, &stat.ecn_sent_ece);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_sent_ece,
+           &prev.tcps_ecn_sent_ece, &stat.ecn_sent_ece);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_conn_recv_ce,
+           &prev.tcps_ecn_conn_recv_ce, &stat.ecn_conn_recv_ce);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_conn_recv_ece,
+           &prev.tcps_ecn_conn_recv_ece, &stat.ecn_conn_recv_ece);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_conn_plnoce,
+           &prev.tcps_ecn_conn_plnoce, &stat.ecn_conn_plnoce);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_conn_pl_ce,
+           &prev.tcps_ecn_conn_pl_ce, &stat.ecn_conn_pl_ce);
+       tcp_cumulative_stat(tcpstat.tcps_ecn_conn_nopl_ce,
+           &prev.tcps_ecn_conn_nopl_ce, &stat.ecn_conn_nopl_ce);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_rcv,
+           &prev.tcps_tfo_syn_data_rcv, &stat.tfo_syn_data_rcv);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_req_rcv,
+           &prev.tcps_tfo_cookie_req_rcv, &stat.tfo_cookie_req_rcv);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_sent,
+           &prev.tcps_tfo_cookie_sent, &stat.tfo_cookie_sent);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_invalid,
+           &prev.tcps_tfo_cookie_invalid, &stat.tfo_cookie_invalid);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_req,
+           &prev.tcps_tfo_cookie_req, &stat.tfo_cookie_req);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_cookie_rcv,
+           &prev.tcps_tfo_cookie_rcv, &stat.tfo_cookie_rcv);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_sent,
+           &prev.tcps_tfo_syn_data_sent, &stat.tfo_syn_data_sent);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_syn_data_acked,
+           &prev.tcps_tfo_syn_data_acked, &stat.tfo_syn_data_acked);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_syn_loss,
+           &prev.tcps_tfo_syn_loss, &stat.tfo_syn_loss);
+       tcp_cumulative_stat(tcpstat.tcps_tfo_blackhole,
+           &prev.tcps_tfo_blackhole, &stat.tfo_blackhole);
+
        nstat_sysinfo_send_data(&data);
 
 #undef stat
 }
+
+void
+tcp_interface_send_probe(u_int16_t probe_if_index)
+{
+       int32_t offset = 0;
+       struct tcptimerlist *listp = &tcp_timer_list;
+
+       /* Make sure TCP clock is up to date */
+       calculate_tcp_clock();
+
+       lck_mtx_lock(listp->mtx);
+       if (listp->probe_if_index > 0) {
+               tcpstat.tcps_probe_if_conflict++;
+               goto done;
+       }
+
+       listp->probe_if_index = probe_if_index;
+       if (listp->running)
+               goto done;
+
+       /*
+        * Reschedule the timerlist to run within the next 10ms, which is
+        * the fastest that we can do.
+        */
+       offset = TCP_TIMER_10MS_QUANTUM;
+       if (listp->scheduled) {
+               int32_t diff;
+               diff = timer_diff(listp->runtime, 0, tcp_now, offset);
+               if (diff <= 0) {
+                       /* The timer will fire sooner than what's needed */
+                       goto done;
+               }
+       }
+       listp->mode = TCP_TIMERLIST_10MS_MODE;
+       listp->idleruns = 0;
+
+       tcp_sched_timerlist(offset);
+
+done:
+       lck_mtx_unlock(listp->mtx);
+       return;
+}
+
+/*
+ * Enable read probes on this connection, if:
+ * - it is in established state
+ * - doesn't have any data outstanding
+ * - the outgoing ifp matches
+ * - we have not already sent any read probes
+ */
+static void
+tcp_enable_read_probe(struct tcpcb *tp, struct ifnet *ifp)
+{
+       if (tp->t_state == TCPS_ESTABLISHED &&
+           tp->snd_max == tp->snd_una &&
+           tp->t_inpcb->inp_last_outifp == ifp &&
+           !(tp->t_flagsext & TF_DETECT_READSTALL) &&
+           tp->t_rtimo_probes == 0) {
+               tp->t_flagsext |= TF_DETECT_READSTALL;
+               tp->t_rtimo_probes = 0;
+               tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
+                   TCP_TIMER_10MS_QUANTUM);
+               if (tp->tentry.index == TCPT_NONE) {
+                       tp->tentry.index = TCPT_KEEP;
+                       tp->tentry.runtime = tcp_now +
+                           TCP_TIMER_10MS_QUANTUM;
+               } else {
+                       int32_t diff = 0;
+
+                       /* Reset runtime to be in next 10ms */
+                       diff = timer_diff(tp->tentry.runtime, 0,
+                           tcp_now, TCP_TIMER_10MS_QUANTUM);
+                       if (diff > 0) {
+                               tp->tentry.index = TCPT_KEEP;
+                               tp->tentry.runtime = tcp_now +
+                                   TCP_TIMER_10MS_QUANTUM;
+                               if (tp->tentry.runtime == 0)
+                                       tp->tentry.runtime++;
+                       }
+               }
+       }
+}
+
+/*
+ * Disable read probe and reset the keep alive timer
+ */
+static void
+tcp_disable_read_probe(struct tcpcb *tp)
+{
+       if (tp->t_adaptive_rtimo == 0 &&
+           ((tp->t_flagsext & TF_DETECT_READSTALL) ||
+           tp->t_rtimo_probes > 0)) {
+               tcp_keepalive_reset(tp);
+       }
+}
+
+/*
+ * Reschedule the tcp timerlist in the next 10ms to re-enable read/write
+ * probes on connections going over a particular interface.
+ */
+void
+tcp_probe_connectivity(struct ifnet *ifp, u_int32_t enable)
+{
+       int32_t offset;
+       struct tcptimerlist *listp = &tcp_timer_list;
+       struct inpcbinfo *pcbinfo = &tcbinfo;
+       struct inpcb *inp, *nxt;
+
+       if (ifp == NULL)
+               return;
+
+       /* update clock */
+       calculate_tcp_clock();
+
+       /*
+        * Enable keep alive timer on all connections that are
+        * active/established on this interface.
+        */
+       lck_rw_lock_shared(pcbinfo->ipi_lock);
+
+       LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, nxt) {
+               struct tcpcb *tp = NULL;
+               if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) ==
+                   WNT_STOPUSING)
+                       continue;
+
+               /* Acquire lock to look at the state of the connection */
+               tcp_lock(inp->inp_socket, 1, 0);
+
+               /* Release the want count */
+               if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+                       tcp_unlock(inp->inp_socket, 1, 0);
+                       continue;
+               }
+
+               tp = intotcpcb(inp);
+               if (enable)
+                       tcp_enable_read_probe(tp, ifp);
+               else
+                       tcp_disable_read_probe(tp);
+
+               tcp_unlock(inp->inp_socket, 1, 0);
+       }
+       lck_rw_done(pcbinfo->ipi_lock);
+
+       lck_mtx_lock(listp->mtx);
+       if (listp->running) {
+               listp->pref_mode |= TCP_TIMERLIST_10MS_MODE;
+               goto done;
+       }
+
+       /* Reschedule within the next 10ms */
+       offset = TCP_TIMER_10MS_QUANTUM;
+       if (listp->scheduled) {
+               int32_t diff;
+               diff = timer_diff(listp->runtime, 0, tcp_now, offset);
+               if (diff <= 0) {
+                       /* The timer will fire sooner than what's needed */
+                       goto done;
+               }
+       }
+       listp->mode = TCP_TIMERLIST_10MS_MODE;
+       listp->idleruns = 0;
+
+       tcp_sched_timerlist(offset);
+done:
+       lck_mtx_unlock(listp->mtx);
+       return;
+}
+
+void
+tcp_itimer(struct inpcbinfo *ipi)
+{
+       struct inpcb *inp, *nxt;
+
+       if (lck_rw_try_lock_exclusive(ipi->ipi_lock) == FALSE) {
+               if (tcp_itimer_done == TRUE) {
+                       tcp_itimer_done = FALSE;
+                       atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
+                       return;
+               }
+               /* Upgrade failed, lost lock now take it again exclusive */
+               lck_rw_lock_exclusive(ipi->ipi_lock);
+       }
+       tcp_itimer_done = TRUE;
+
+       LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) {
+               struct socket *so;
+
+               if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+                       continue;
+               so = inp->inp_socket;
+               tcp_lock(so, 1, 0);
+               if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+                       tcp_unlock(so, 1, 0);
+                       continue;
+               }
+               so_check_extended_bk_idle_time(so);
+               tcp_unlock(so, 1, 0);
+       }
+
+       lck_rw_done(ipi->ipi_lock);
+}
+
index 0e7a43f119bc939d6fbe4d1ae02a3b0dd6612a86..177cd162c29e99645918bf5db0587d472d037af3 100644 (file)
@@ -262,6 +262,7 @@ struct tcptimerlist {
        uint32_t pref_offset;   /* Preferred offset set by a connection */
        uint32_t idleruns;      /* Number of times the list has been idle in fast mode */
        struct tcptimerentry *next_te;  /* next timer entry pointer to process */
+       u_int16_t probe_if_index; /* Interface index that needs to send probes */
 
 };
 
index 350884ae144e7e95fd86a5692c0c583e7667c37c..96b17ba5afc36dcc5d686091895d01c405f08f98 100644 (file)
@@ -121,6 +121,8 @@ void        tcp_fill_info(struct tcpcb *, struct tcp_info *);
 errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
 
 int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *);
+static void tcp_connection_fill_info(struct tcpcb *tp,
+    struct tcp_connection_info *tci);
 
 /*
  * TCP protocol interface to socket abstraction.
@@ -395,6 +397,27 @@ tcp6_usr_listen(struct socket *so, struct proc *p)
 }
 #endif /* INET6 */
 
+static int
+tcp_connect_complete(struct socket *so)
+{
+       struct tcpcb *tp = sototcpcb(so);
+       int error = 0;
+
+       /* TFO delays the tcp_output until later, when the app calls write() */
+       if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+               if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, NULL))
+                       return (EHOSTUNREACH);
+
+               /* Initialize enough state so that we can actually send data */
+               tcp_mss(tp, -1, IFSCOPE_NONE);
+               tp->snd_wnd = tp->t_maxseg;
+       } else {
+               error = tcp_output(tp);
+       }
+
+       return (error);
+}
+
 /*
  * Initiate connection to peer.
  * Create a template for use in transmissions on this connection.
@@ -433,6 +456,9 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
                } else {
                        error = ENETDOWN;
                }
+
+               /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
+               so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
                return error;
        }
 #endif /* FLOW_DIVERT */
@@ -463,15 +489,18 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 
        if ((error = tcp_connect(tp, nam, p)) != 0)
                goto out;
-       error = tcp_output(tp);
+
+       error = tcp_connect_complete(so);
+
        COMMON_END(PRU_CONNECT);
 }
 
 static int
 tcp_usr_connectx_common(struct socket *so, int af,
     struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl,
-    struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid,
-    uint32_t flags, void *arg, uint32_t arglen)
+    struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
+    uint32_t flags, void *arg, uint32_t arglen, struct uio *auio,
+    user_ssize_t *bytes_written)
 {
 #pragma unused(aid)
 #if !MPTCP
@@ -480,6 +509,7 @@ tcp_usr_connectx_common(struct socket *so, int af,
        struct sockaddr_entry *src_se = NULL, *dst_se = NULL;
        struct inpcb *inp = sotoinpcb(so);
        int error;
+       user_ssize_t datalen = 0;
 
        if (inp == NULL)
                return (EINVAL);
@@ -499,7 +529,11 @@ tcp_usr_connectx_common(struct socket *so, int af,
 #if NECP
        inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope);
 #endif /* NECP */
-       
+
+       if ((so->so_flags1 & SOF1_DATA_IDEMPOTENT) &&
+           (tcp_fastopen & TCP_FASTOPEN_CLIENT))
+               sototcpcb(so)->t_flagsext |= TF_FASTOPEN;
+
        /*
         * We get here for 2 cases:
         *
@@ -513,7 +547,7 @@ tcp_usr_connectx_common(struct socket *so, int af,
         *      bind to source address and/or interface as necessary.
         */
 #if MPTCP
-       if (flags & TCP_CONNREQF_MPTCP) {
+       if (flags & CONNREQF_MPTCP) {
                struct mptsub_connreq *mpcr = arg;
 
                /* Check to make sure this came down from MPTCP */
@@ -559,8 +593,37 @@ tcp_usr_connectx_common(struct socket *so, int af,
                /* NOTREACHED */
        }
 
+       if (error != 0)
+               return (error);
+
+       /* if there is data, copy it */
+       if (auio != NULL) {
+               socket_unlock(so, 0);
+
+               VERIFY(bytes_written != NULL);
+
+               datalen = uio_resid(auio);
+               error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
+                   (uio_t)auio, NULL, NULL, 0);
+               socket_lock(so, 0);
+
+               if (error == 0 || error == EWOULDBLOCK)
+                       *bytes_written = datalen - uio_resid(auio);
+
+               /*
+                * sosend returns EWOULDBLOCK if it's a non-blocking
+                * socket or a timeout occured (this allows to return
+                * the amount of queued data through sendit()).
+                *
+                * However, connectx() returns EINPROGRESS in case of a
+                * blocking socket. So we change the return value here.
+                */
+               if (error == EWOULDBLOCK)
+                       error = EINPROGRESS;
+       }
+
        if (error == 0 && pcid != NULL)
-               *pcid = 1;      /* there is only 1 connection for a TCP */
+               *pcid = 1; /* there is only one connection in regular TCP */
 
        return (error);
 }
@@ -568,11 +631,12 @@ tcp_usr_connectx_common(struct socket *so, int af,
 static int
 tcp_usr_connectx(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
        return (tcp_usr_connectx_common(so, AF_INET, src_sl, dst_sl,
-           p, ifscope, aid, pcid, flags, arg, arglen));
+           p, ifscope, aid, pcid, flags, arg, arglen, uio,
+           bytes_written));
 }
 
 #if INET6
@@ -648,27 +712,28 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
                inp->inp_vflag &= ~INP_IPV6;
                if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0)
                        goto out;
-               error = tcp_output(tp);
+
+               error = tcp_connect_complete(so);
                goto out;
        }
        inp->inp_vflag &= ~INP_IPV4;
        inp->inp_vflag |= INP_IPV6;
        if ((error = tcp6_connect(tp, nam, p)) != 0)
                goto out;
-       error = tcp_output(tp);
-       if (error)
-               goto out;
+
+       error = tcp_connect_complete(so);
        COMMON_END(PRU_CONNECT);
 }
 
 static int
 tcp6_usr_connectx(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
        return (tcp_usr_connectx_common(so, AF_INET6, src_sl, dst_sl,
-           p, ifscope, aid, pcid, flags, arg, arglen));
+           p, ifscope, aid, pcid, flags, arg, arglen, uio,
+           bytes_written));
 }
 #endif /* INET6 */
 
@@ -704,10 +769,10 @@ tcp_usr_disconnect(struct socket *so)
  * User-protocol pru_disconnectx callback.
  */
 static int
-tcp_usr_disconnectx(struct socket *so, associd_t aid, connid_t cid)
+tcp_usr_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
 {
 #pragma unused(cid)
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL)
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL)
                return (EINVAL);
 
        return (tcp_usr_disconnect(so));
@@ -871,7 +936,13 @@ tcp_usr_rcvd(struct socket *so, __unused int flags)
                goto out;
        tcp_sbrcv_trim(tp, &so->so_rcv);
 
-       tcp_output(tp);
+       /*
+        * This tcp_output is solely there to trigger window-updates.
+        * However, we really do not want these window-updates while we
+        * are still in SYN_SENT or SYN_RECEIVED.
+        */
+       if (TCPS_HAVEESTABLISHED(tp->t_state))
+               tcp_output(tp);
 
 #if CONTENT_FILTER
        cfil_sock_buf_update(&so->so_rcv);
@@ -995,7 +1066,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
        VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) ||
            (so->so_snd.sb_flags & SB_NOCOMPRESS));
 
-       if(!(flags & PRUS_OOB)) {
+       if(!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
                /* Call msg send if message delivery is enabled */
                if (so->so_flags & SOF_ENABLE_MSGS)
                        sbappendmsg_snd(&so->so_snd, m);
@@ -1003,6 +1074,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
                        sbappendstream(&so->so_snd, m);
 
                if (nam && tp->t_state < TCPS_SYN_SENT) {
+
                        /*
                         * Do implied connect if not yet connected,
                         * initialize window to default value, and
@@ -1076,6 +1148,19 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
                error = tcp_output(tp);
                tp->t_flagsext &= ~TF_FORCE;
        }
+
+
+       /*
+        * We wait for the socket to successfully connect before returning.
+        * This allows us to signal a timeout to the application.
+        */
+       if (so->so_state & SS_ISCONNECTING) {
+               if (so->so_state & SS_NBIO)
+                       error = EWOULDBLOCK;
+               else
+                       error = sbwait(&so->so_snd);
+       }
+
        COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 
                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 }
@@ -1133,6 +1218,17 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
        COMMON_END(PRU_RCVOOB);
 }
 
+static int
+tcp_usr_preconnect(struct socket *so)
+{
+       int error = tcp_output(sototcpcb(so));
+
+       /* One read has been done. This was enough. Get back to "normal" behavior. */
+       so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
+
+       COMMON_END(PRU_PRECONNECT);
+}
+
 /* xxx - should be const */
 struct pr_usrreqs tcp_usrreqs = {
        .pru_abort =            tcp_usr_abort,
@@ -1154,6 +1250,7 @@ struct pr_usrreqs tcp_usrreqs = {
        .pru_sockaddr =         in_getsockaddr,
        .pru_sosend =           sosend,
        .pru_soreceive =        soreceive,
+       .pru_preconnect =       tcp_usr_preconnect,
 };
 
 #if INET6
@@ -1177,6 +1274,7 @@ struct pr_usrreqs tcp6_usrreqs = {
        .pru_sockaddr =         in6_mapped_sockaddr,
        .pru_sosend =           sosend,
        .pru_soreceive =        soreceive,
+       .pru_preconnect =       tcp_usr_preconnect,
 };
 #endif /* INET6 */
 
@@ -1462,21 +1560,19 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
        ti->tcpi_state = tp->t_state;
        
        if (tp->t_state > TCPS_LISTEN) {
-               if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
+               if (TSTMP_SUPPORTED(tp))
                        ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
-               if (tp->t_flags & TF_SACK_PERMIT)
+               if (SACK_ENABLED(tp))
                        ti->tcpi_options |= TCPI_OPT_SACK;
-               if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
+               if (TCP_WINDOW_SCALE_ENABLED(tp)) {
                        ti->tcpi_options |= TCPI_OPT_WSCALE;
                        ti->tcpi_snd_wscale = tp->snd_scale;
                        ti->tcpi_rcv_wscale = tp->rcv_scale;
                }
 
                /* Are we in retranmission episode */
-               if (tp->snd_max != tp->snd_nxt)
+               if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0)
                        ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
-               else
-                       ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY;
 
                ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
                ti->tcpi_snd_mss = tp->t_maxseg;
@@ -1489,7 +1585,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
 
                ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
                ti->tcpi_snd_cwnd = tp->snd_cwnd;
-               ti->tcpi_snd_sbbytes = tp->t_inpcb->inp_socket->so_snd.sb_cc;
+               ti->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
        
                ti->tcpi_rcv_space = tp->rcv_wnd;
 
@@ -1535,6 +1631,18 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
                ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes;
                ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets;
                ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes;
+               tcp_get_connectivity_status(tp, &ti->tcpi_connstatus);
+
+               ti->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
+               ti->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
+               ti->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
+               ti->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
+
+               ti->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
+               ti->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
+               ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
+               ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
+               ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
        }
 }
 
@@ -1599,6 +1707,59 @@ tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
        return 0;
 }
 
+static void
+tcp_connection_fill_info(struct tcpcb *tp, struct tcp_connection_info *tci)
+{
+       struct inpcb *inp = tp->t_inpcb;
+
+       bzero(tci, sizeof(*tci));
+       tci->tcpi_state = tp->t_state;
+       if (tp->t_state > TCPS_LISTEN) {
+               if (TSTMP_SUPPORTED(tp))
+                       tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS;
+               if (SACK_ENABLED(tp))
+                       tci->tcpi_options |= TCPCI_OPT_SACK;
+               if (TCP_WINDOW_SCALE_ENABLED(tp)) {
+                       tci->tcpi_options |= TCPCI_OPT_WSCALE;
+                       tci->tcpi_snd_wscale = tp->snd_scale;
+                       tci->tcpi_rcv_wscale = tp->rcv_scale;
+               }
+               if (TCP_ECN_ENABLED(tp))
+                       tci->tcpi_options |= TCPCI_OPT_ECN;
+               if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0)
+                       tci->tcpi_flags |= TCPCI_FLAG_LOSSRECOVERY;
+               if (tp->t_flagsext & TF_PKTS_REORDERED)
+                       tci->tcpi_flags |= TCPCI_FLAG_REORDERING_DETECTED;
+               tci->tcpi_rto = (tp->t_timer[TCPT_REXMT] > 0) ?
+                       tp->t_rxtcur : 0;
+               tci->tcpi_maxseg = tp->t_maxseg;
+               tci->tcpi_snd_ssthresh = tp->snd_ssthresh;
+               tci->tcpi_snd_cwnd = tp->snd_cwnd;
+               tci->tcpi_snd_wnd = tp->snd_wnd;
+               tci->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
+               tci->tcpi_rcv_wnd = tp->rcv_wnd;
+               tci->tcpi_rttcur = tp->t_rttcur;
+               tci->tcpi_srtt = (tp->t_srtt >> TCP_RTT_SHIFT);
+               tci->tcpi_rttvar = (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
+               tci->tcpi_txpackets = inp->inp_stat->txpackets;
+               tci->tcpi_txbytes = inp->inp_stat->txbytes;
+               tci->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
+               tci->tcpi_rxpackets = inp->inp_stat->rxpackets;
+               tci->tcpi_rxbytes = inp->inp_stat->rxbytes;
+               tci->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
+
+               tci->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
+               tci->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
+               tci->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
+               tci->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
+               tci->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
+               tci->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
+               tci->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
+               tci->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
+               tci->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
+       }
+}
+
 
 __private_extern__ int 
 tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
@@ -1978,11 +2139,11 @@ tcp_ctloutput(so, sopt)
                        break;
                case TCP_ADAPTIVE_READ_TIMEOUT:
                        error = sooptcopyin(sopt, &optval, sizeof (optval),
-                               sizeof(optval));
+                           sizeof(optval));
                        if (error)
                                break;
                        if (optval < 0 || 
-                               optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
+                           optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
                                error = EINVAL;
                                break;
                        } else if (optval == 0) {
@@ -1994,11 +2155,11 @@ tcp_ctloutput(so, sopt)
                        break;
                case TCP_ADAPTIVE_WRITE_TIMEOUT:
                        error = sooptcopyin(sopt, &optval, sizeof (optval),
-                               sizeof (optval));
+                           sizeof (optval));
                        if (error)
                                break;
                        if (optval < 0 || 
-                               optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
+                           optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
                                error = EINVAL;
                                break;
                        } else {
@@ -2076,6 +2237,29 @@ tcp_ctloutput(so, sopt)
                                        tcp_pmtud_revert_segment_size(tp);
                        }
                        break;
+               case TCP_FASTOPEN:
+                       if (!(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
+                               error = ENOTSUP;
+                               break;
+                       }
+
+                       error = sooptcopyin(sopt, &optval, sizeof(optval),
+                               sizeof(optval));
+                       if (error)
+                               break;
+                       if (optval < 0 || optval > 1) {
+                               error = EINVAL;
+                               break;
+                       }
+                       if (tp->t_state != TCPS_LISTEN) {
+                               error =  EINVAL;
+                               break;
+                       }
+                       if (optval)
+                               tp->t_flagsext |= TF_FASTOPEN;
+                       else
+                               tcp_disable_tfo(tp);
+                       break;
                case SO_FLUSH:
                        if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
                            sizeof (optval))) != 0)
@@ -2145,6 +2329,14 @@ tcp_ctloutput(so, sopt)
                case TCP_NOTIMEWAIT:
                        optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0;
                        break;
+               case TCP_FASTOPEN:
+                       if (tp->t_state != TCPS_LISTEN ||
+                           !(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
+                               error = ENOTSUP;
+                               break;
+                       }
+                       optval = tfo_enabled(tp);
+                       break;
                case TCP_MEASURE_SND_BW:
                        optval = tp->t_flagsext & TF_MEASURESNDBW;
                        break;
@@ -2156,6 +2348,13 @@ tcp_ctloutput(so, sopt)
                        goto done;
                        /* NOT REACHED */
                }
+               case TCP_CONNECTION_INFO: {
+                       struct tcp_connection_info tci;
+                       tcp_connection_fill_info(tp, &tci);
+                       error = sooptcopyout(sopt, &tci,
+                           sizeof(struct tcp_connection_info));
+                       goto done;
+               }
                case TCP_MEASURE_BW_BURST: {
                        struct tcp_measure_bw_burst out;
                        if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
@@ -2271,6 +2470,14 @@ SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
 
+/* Sysctl for testing and tuning the connectx with data api */
+#define TCP_PRECONNECT_SBSZ_MAX 1460
+#define TCP_PRECONNECT_SBSZ_MIN (TCP_MSS)
+#define TCP_PRECONNECT_SBSZ_DEF        (TCP6_MSS)
+static int tcp_preconnect_sbspace = TCP_PRECONNECT_SBSZ_DEF;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, preconn_sbsz, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_preconnect_sbspace, 0, "Maximum preconnect space");
+
 
 /*
  * Attach TCP protocol to socket, allocating
@@ -2306,6 +2513,12 @@ tcp_attach(so, p)
                if (error)
                        return (error);
        }
+
+       if (so->so_snd.sb_preconn_hiwat == 0) {
+               soreserve_preconnect(so, imin(TCP_PRECONNECT_SBSZ_MAX,
+                   imax(tcp_preconnect_sbspace, TCP_PRECONNECT_SBSZ_MIN)));
+       }
+
        if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0)
                so->so_rcv.sb_flags |= SB_AUTOSIZE;
        if ((so->so_snd.sb_flags & SB_USRSIZE) == 0)
index bf9fb3f20cf4f5d3a99586945e05a6dd64e8cd34..1ec0559eeaf32ee6c9b577f05a6a93cc1fe10789 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #ifndef _NETINET_TCP_VAR_H_
 #define _NETINET_TCP_VAR_H_
+#include <sys/types.h>
 #include <sys/appleapiopts.h>
 #include <sys/queue.h>
 #include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 
 #if defined(__LP64__)
@@ -145,7 +147,9 @@ struct name {                               \
  * The maximum value of adaptive timeout is set to 10 which will allow
  * transmission of enough number of probes to the peer.
  */
-#define TCP_ADAPTIVE_TIMEOUT_MAX 10
+#define        TCP_ADAPTIVE_TIMEOUT_MAX        10
+
+#define        TCP_CONNECTIVITY_PROBES_MAX     5
 
 /*
  * Kernel variables for tcp.
@@ -178,6 +182,16 @@ struct sackhint {
        int     sack_bytes_rexmit;
 };
 
+struct tcp_rxt_seg {
+       tcp_seq rx_start;
+       tcp_seq rx_end;
+       u_int16_t rx_count;
+       u_int16_t rx_flags;
+#define        TCP_RXT_SPURIOUS        0x1     /* received DSACK notification */
+#define        TCP_RXT_DSACK_FOR_TLP   0x2
+       SLIST_ENTRY(tcp_rxt_seg) rx_link;
+};
+
 struct tcptemp {
        u_char  tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
        struct  tcphdr tt_t;
@@ -235,11 +249,11 @@ struct tcp_ccstate {
 struct tcpcb {
        struct  tsegqe_head t_segq;
        int     t_dupacks;              /* consecutive dup acks recd */
+       int     t_state;                /* state of this connection */
        uint32_t t_timer[TCPT_NTIMERS]; /* tcp timers */
        struct tcptimerentry tentry;    /* entry in timer list */
 
        struct  inpcb *t_inpcb;         /* back pointer to internet pcb */
-       int     t_state;                /* state of this connection */
        uint32_t        t_flags;
 #define        TF_ACKNOW       0x00001         /* ack peer immediately */
 #define        TF_DELACK       0x00002         /* ack, but try to delay it */
@@ -291,9 +305,9 @@ struct tcpcb {
        u_int32_t       rcv_wnd;                /* receive window */
        tcp_seq rcv_up;                 /* receive urgent pointer */
 
-       u_int32_t       snd_wnd;                /* send window */
-       u_int32_t       snd_cwnd;               /* congestion-controlled window */
-       u_int32_t       snd_ssthresh;           /* snd_cwnd size threshold for
+       u_int32_t       snd_wnd;        /* send window */
+       u_int32_t       snd_cwnd;       /* congestion-controlled window */
+       u_int32_t       snd_ssthresh;   /* snd_cwnd size threshold for
                                         * for slow start exponential to
                                         * linear switch
                                         */
@@ -314,15 +328,16 @@ struct tcpcb {
        int     t_srtt;                 /* smoothed round-trip time */
        int     t_rttvar;               /* variance in round-trip time */
 
+       u_int64_t       t_accsleep_ms;  /* accumulated sleep time since last boot */
        u_int16_t       t_reassqlen;    /* length of reassembly queue */
        u_int16_t       t_rxtshift;     /* log(2) of rexmt exp. backoff */
-       u_int   t_rttmin;               /* minimum rtt allowed */
-       u_int   t_rttbest;              /* best rtt we've seen */
-       u_int   t_rttcur;               /* most recent value of rtt */
-       u_int32_t       t_rttupdated;           /* number of times rtt sampled */
+       u_int32_t       t_rttmin;       /* minimum rtt allowed */
+       u_int32_t       t_rttbest;      /* best rtt we've seen */
+       u_int32_t       t_rttcur;       /* most recent value of rtt */
+       u_int32_t       t_rttupdated;   /* number of times rtt sampled */
        u_int32_t       t_rxt_conndroptime;     /* retxmt conn gets dropped after this time, when set */
-       u_int32_t       t_rxtstart;             /* time at which retransmission started */
-       u_int32_t       max_sndwnd;             /* largest window peer has offered */
+       u_int32_t       t_rxtstart;     /* time at which retransmission started */
+       u_int32_t       max_sndwnd;     /* largest window peer has offered */
 
        int     t_softerror;            /* possible error not yet reported */
 /* out-of-band data */
@@ -331,9 +346,9 @@ struct tcpcb {
 #define        TCPOOB_HAVEDATA 0x01
 #define        TCPOOB_HADDATA  0x02
 /* RFC 1323 variables */
-       u_int8_t        snd_scale;              /* window scaling for send window */
-       u_int8_t        rcv_scale;              /* window scaling for recv window */
-       u_int8_t        request_r_scale;        /* pending window scaling */
+       u_int8_t        snd_scale;      /* window scaling for send window */
+       u_int8_t        rcv_scale;      /* window scaling for recv window */
+       u_int8_t        request_r_scale; /* pending window scaling */
        u_int8_t        requested_s_scale;
        u_int8_t        tcp_cc_index;   /* index of congestion control algorithm */
        u_int8_t        t_adaptive_rtimo;       /* Read timeout used as a multiple of RTT */
@@ -344,21 +359,21 @@ struct tcpcb {
        u_int16_t       t_early_rexmt_count; /* count of early rexmts */
        u_int32_t       t_early_rexmt_win; /* window for limiting early rexmts */ 
 
-       u_int32_t       ts_recent;              /* timestamp echo data */
+       u_int32_t       ts_recent;      /* timestamp echo data */
 
-       u_int32_t       ts_recent_age;          /* when last updated */
+       u_int32_t       ts_recent_age;  /* when last updated */
        tcp_seq last_ack_sent;
 /* RFC 1644 variables */
        tcp_cc  cc_send;                /* send connection count */
        tcp_cc  cc_recv;                /* receive connection count */
 /* RFC 3465 variables */
-       u_int32_t       t_bytes_acked;          /* ABC "bytes_acked" parameter */
+       u_int32_t       t_bytes_acked;  /* ABC "bytes_acked" parameter */
 
        int     t_lastchain;            /* amount of packets chained last time around */
        u_int16_t       t_unacksegs;    /* received but unacked segments for delaying acks */
        u_int8_t        t_rexmtthresh;  /* duplicate ack threshold for entering fast recovery */
        u_int8_t        t_rtimo_probes; /* number of adaptive rtimo probes sent */
-       u_int32_t       t_persist_timeout;      /* ZWP persistence limit as set by PERSIST_TIMEOUT */
+       u_int32_t       t_persist_timeout; /* ZWP persistence limit as set by PERSIST_TIMEOUT */
        u_int32_t       t_persist_stop;         /* persistence limit deadline if triggered by ZWP */
        u_int32_t       t_notsent_lowat;        /* Low water for not sent data */
 
@@ -374,6 +389,8 @@ struct tcpcb {
 #define TE_SENDCWR             0x08    /* Indicate that the next non-retransmit should have the TCP CWR flag set */
 #define TE_SENDECE             0x10    /* Indicate that the next packet should have the TCP ECE flag set */
 #define TE_INRECOVERY          0x20    /* connection entered recovery after receiving ECE */
+#define TE_RECV_ECN_CE         0x40    /* Received IPTOS_ECN_CE marking atleast once */
+#define TE_RECV_ECN_ECE        0x80    /* Received ECE marking atleast once */
 #define TE_ECN_ON              (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
 
 /* state for bad retransmit recovery */
@@ -389,18 +406,18 @@ struct tcpcb {
 
 /* SACK related state */
        int16_t snd_numholes;           /* number of holes seen by sender */
+       tcp_seq sack_newdata;           /* New data xmitted in this recovery
+                                          episode starts at this seq number */
        TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
                                                /* SACK scoreboard (sorted) */
        tcp_seq snd_fack;               /* last seq number(+1) sack'd by rcv'r*/
        int     rcv_numsacks;           /* # distinct sack blks present */
        struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
-       tcp_seq sack_newdata;           /* New data xmitted in this recovery
-                                          episode starts at this seq number */
        struct sackhint sackhint;       /* SACK scoreboard hint */
        
-       u_int32_t       t_pktlist_sentlen; /* total bytes in transmit chain */
        struct mbuf     *t_pktlist_head; /* First packet in transmit chain */
        struct mbuf     *t_pktlist_tail; /* Last packet in transmit chain */
+       u_int32_t       t_pktlist_sentlen; /* total bytes in transmit chain */
 
        u_int32_t       t_keepidle;     /* keepalive idle timer (override global if > 0) */
        u_int32_t       t_keepinit;     /* connection timeout, i.e. idle time 
@@ -409,7 +426,8 @@ struct tcpcb {
        u_int32_t       t_keepcnt;      /* number of keepalives before close */
 
        u_int32_t       tso_max_segment_size;   /* TSO maximum segment unit for NIC */
-       u_int32_t       t_pmtud_saved_maxopd;   /* MSS saved before performing PMTU-D BlackHole detection */
+       u_int16_t       t_pmtud_lastseg_size;   /* size of the last sent segment */
+       u_int16_t       t_pmtud_saved_maxopd;   /* MSS saved before performing PMTU-D BlackHole detection */
        u_int32_t       t_pmtud_start_ts;       /* Time of PMTUD blackhole detection */
        
        struct
@@ -445,12 +463,18 @@ struct tcpcb {
 #define TF_FORCE               0x8000          /* force 1 byte out */
 #define        TF_DISABLE_STRETCHACK   0x10000         /* auto-disable stretch ack */
 #define        TF_NOBLACKHOLE_DETECTION 0x20000        /* Disable PMTU blackhole detection */
+#define        TF_DISABLE_DSACK        0x40000         /* Ignore DSACK due to n/w duplication */
+#define        TF_RESCUE_RXT           0x80000         /* SACK rescue retransmit */
+#define        TF_CWND_NONVALIDATED    0x100000        /* cwnd non validated */
+#define        TF_PROBING              0x200000        /* Trigger probe timeout */
+#define        TF_FASTOPEN             0x400000        /* TCP Fastopen is enabled */
 
 #if TRAFFIC_MGT
        /* Inter-arrival jitter related state */
        uint32_t        iaj_rcv_ts;             /* tcp clock when the first packet was received */
        uint16_t        iaj_size;               /* Size of packet for iaj measurement */
-       uint16_t        iaj_small_pkt;          /* Count of packets smaller than iaj_size */
+       uint8_t         iaj_small_pkt;          /* Count of packets smaller than iaj_size */
+       uint8_t         t_pipeack_ind;          /* index for next pipeack sample */
        uint16_t        iaj_pktcnt;             /* packet count, to avoid throttling initially */
        uint16_t        acc_iaj;                /* Accumulated iaj */
        tcp_seq         iaj_rwintop;            /* recent max advertised window */
@@ -465,6 +489,19 @@ struct tcpcb {
 /* Tail loss probe related state */
        tcp_seq         t_tlphighrxt;           /* snd_nxt after PTO */
        u_int32_t       t_tlpstart;             /* timestamp at PTO */
+/* DSACK data receiver state */
+       tcp_seq         t_dsack_lseq;           /* DSACK left sequence */
+       tcp_seq         t_dsack_rseq;           /* DSACK right sequence */
+/* DSACK data sender state */
+       SLIST_HEAD(tcp_rxt_seghead, tcp_rxt_seg) t_rxt_segments;
+       tcp_seq         t_dsack_lastuna;        /* snd_una when last recovery episode started */
+/* state for congestion window validation (draft-ietf-tcpm-newcwv-07) */
+#define        TCP_PIPEACK_SAMPLE_COUNT        3
+       u_int32_t       t_pipeack_sample[TCP_PIPEACK_SAMPLE_COUNT];     /* pipeack, bytes acked within RTT */
+       tcp_seq         t_pipeack_lastuna; /* una when pipeack measurement started */
+       u_int32_t       t_pipeack;
+       u_int32_t       t_lossflightsize;
+
 #if MPTCP
        u_int32_t       t_mpflags;              /* flags for multipath TCP */
 
@@ -492,14 +529,64 @@ struct tcpcb {
 #define TMPF_SND_MPFAIL                0x00200000 /* Received mapping csum failure */
 #define TMPF_FASTJOIN_SEND     0x00400000 /* Fast join early data send */
 #define TMPF_FASTJOINBY2_SEND  0x00800000 /* Fast join send after 3 WHS */
+#define TMPF_MPCAP_RETRANSMIT  0x01000000 /* Retransmission of 3rd ACK */
 
-       void                    *t_mptcb;       /* pointer to MPTCP TCB */
        tcp_seq                 t_mpuna;        /* unacknowledged sequence */
+       void                    *t_mptcb;       /* pointer to MPTCP TCB */
        struct mpt_dsn_map      t_rcv_map;      /* Receive mapping list */
        u_int8_t                t_local_aid;    /* Addr Id for authentication */
        u_int8_t                t_rem_aid;      /* Addr ID of another subflow */
        u_int8_t                t_mprxtshift;   /* join retransmission */
 #endif /* MPTCP */
+
+#define        TFO_F_OFFER_COOKIE      0x01 /* We will offer a cookie */
+#define        TFO_F_COOKIE_VALID      0x02 /* The received cookie is valid */
+#define        TFO_F_COOKIE_REQ        0x04 /* Client requested a new cookie */
+#define        TFO_F_COOKIE_SENT       0x08 /* Client did send a cookie in the SYN */
+#define        TFO_F_SYN_LOSS          0x10 /* A SYN-loss triggered a fallback to regular TCP on the client-side */
+#define        TFO_F_NO_RCVPROBING     0x20 /* This network is guaranteed to support TFO in the downstream direction */
+#define        TFO_F_NO_SNDPROBING     0x40 /* This network is guaranteed to support TFO in the upstream direction */
+       u_int8_t                t_tfo_flags;
+#define        TFO_S_SYNDATA_RCV       0x01 /* SYN+data has been received */
+#define        TFO_S_COOKIEREQ_RECV    0x02 /* TFO-cookie request received */
+#define        TFO_S_COOKIE_SENT       0x04 /* TFO-cookie announced in SYN/ACK */
+#define        TFO_S_COOKIE_INVALID    0x08 /* Received TFO-cookie is invalid */
+#define        TFO_S_COOKIE_REQ        0x10 /* TFO-cookie requested within the SYN */
+#define        TFO_S_COOKIE_RCV        0x20 /* TFO-cookie received in SYN/ACK */
+#define        TFO_S_SYN_DATA_SENT     0x40 /* SYN+data sent */
+#define        TFO_S_SYN_DATA_ACKED    0x80 /* SYN+data has been acknowledged in SYN/ACK */
+#define        TFO_S_SYN_LOSS          0x0100 /* SYN+TFO has been lost - fallback to regular TCP */
+       u_int16_t               t_tfo_stats;
+
+       u_int8_t                t_tfo_probes; /* TFO-probes we did send */
+/*
+ * This here is the TFO-probing state-machine. Transitions are as follows:
+ *
+ * Current state: PROBE_NONE
+ *               Event: SYN+DATA acknowledged
+ *                      Action: Transition to PROBE_PROBING and set keepalive-timer
+ *
+ * Current state: PROBE_PROBING (initial state)
+ *               Event: Receive data
+ *                      Action: Transition to PROBE_NONE and cancel keepalive-timer
+ *               Event: Receive ACK that does not indicate a hole
+ *                      Action: Transition to PROBE_NONE and cancel keepalive-timer
+ *               Event: Receive ACK that indicates a hole
+ *                      Action: Transition to PROBE_WAIT_DATA and set a short timer
+ *                              to wait for the final segment.
+ *               Event: Keepalive-timeout (did not receive any segment)
+ *                      Action: Signal ETIMEDOUT as with regular keepalive-timers
+ *
+ * Current state: PROBE_WAIT_DATA
+ *               Event: Receive data
+ *                      Action: Transition to PROBE_NONE and cancel keepalive-timer
+ *               Event: Data-timeout (did not receive the expected data)
+ *                      Action: Signal ENODATA up to the app and close everything.
+ */
+#define        TFO_PROBE_NONE          0 /* Not probing now */
+#define        TFO_PROBE_PROBING       1 /* Sending out TCP-keepalives waiting for reply */
+#define        TFO_PROBE_WAIT_DATA     2 /* Received reply, waiting for data */
+       u_int8_t                t_tfo_probe_state;
 };
 
 #define IN_FASTRECOVERY(tp)    (tp->t_flags & TF_FASTRECOVERY)
@@ -516,6 +603,8 @@ struct tcpcb {
        (_tp_)->t_flags |= TF_FASTRECOVERY;                     \
        if (INP_IS_FLOW_CONTROLLED((_tp_)->t_inpcb))            \
                inp_reset_fc_state((_tp_)->t_inpcb);            \
+       if (!SLIST_EMPTY(&tp->t_rxt_segments))                  \
+               tcp_rxtseg_clean(tp);                           \
 } while(0)
 
 #define EXIT_FASTRECOVERY(_tp_) do {           \
@@ -525,6 +614,8 @@ struct tcpcb {
        (_tp_)->t_bytes_acked = 0;              \
        (_tp_)->ecn_flags &= ~TE_INRECOVERY;    \
        (_tp_)->t_timer[TCPT_PTO] = 0;          \
+       (_tp_)->t_flagsext &= ~TF_RESCUE_RXT;   \
+       (_tp_)->t_lossflightsize = 0;           \
 } while(0)
 
 /*
@@ -546,34 +637,72 @@ extern int tcprexmtthresh;
        (((_tp_)->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == \
                (TF_REQ_TSTMP|TF_RCVD_TSTMP))
 
+/*
+ * This condition is true if window scale option is supported
+ * on a connection
+ */
+#define        TCP_WINDOW_SCALE_ENABLED(_tp_) \
+       (((_tp_)->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == \
+               (TF_RCVD_SCALE|TF_REQ_SCALE))
+
+/* Is ECN enabled end-to-end */
+#define        TCP_ECN_ENABLED(_tp_) \
+       (((_tp_)->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON))
+
 /*
  * Gives number of bytes acked by this ack
  */
 #define BYTES_ACKED(_th_, _tp_) \
        ((_th_)->th_ack - (_tp_)->snd_una)
 
+/* Returns true if a DSACK option should be sent */
+#define        TCP_SEND_DSACK_OPT(_tp_) \
+       ((_tp_)->t_dsack_lseq > 0 && (_tp_)->t_dsack_rseq > 0)
+
+/* Check if DSACK option should be processed */
+#define        TCP_DSACK_ENABLED(tp)   (tcp_dsack_enable == 1 && \
+    !(tp->t_flagsext & TF_DISABLE_DSACK))
+
+/*
+ * Returns true if a DSACK sequence is within the max send window that will
+ * be accepted. In order to set a window to validate sequence numbers, the
+ * max send window within which a DSACK option is processed is limited.
+ *
+ * We need to choose a maximum window to check if the sequence number is
+ * within the window. One arbitrary choice is 256 * MSS because if the
+ * window is as large as 256 segments it might be big enough to ignore the
+ * DSACK option. Choosing a much larger limit means that the memory for
+ * retransmit segments can be held for a longer time.
+ */
+#define        TCP_DSACK_MAX_SEND_WINDOW(_tp_) ((_tp_)->t_maxseg << 8)
+#define TCP_DSACK_SEQ_IN_WINDOW(_tp_, _seq_, _una_) \
+    (SEQ_LEQ((_seq_), (_tp_)->snd_max) && \
+    SEQ_GEQ((_seq_), ((_una_) - TCP_DSACK_MAX_SEND_WINDOW(_tp_))))
+
+
 enum tcp_cc_event {
-       TCP_CC_CWND_INIT,
-       TCP_CC_INSEQ_ACK_RCVD,
-       TCP_CC_ACK_RCVD,
-       TCP_CC_ENTER_FASTRECOVERY,
-       TCP_CC_IN_FASTRECOVERY,
-       TCP_CC_EXIT_FASTRECOVERY,
-       TCP_CC_PARTIAL_ACK,
-       TCP_CC_IDLE_TIMEOUT,
-       TCP_CC_REXMT_TIMEOUT,
-       TCP_CC_ECN_RCVD,
-       TCP_CC_BAD_REXMT_RECOVERY,
-       TCP_CC_OUTPUT_ERROR,
-       TCP_CC_CHANGE_ALGO,
-       TCP_CC_FLOW_CONTROL,
-       TCP_CC_SUSPEND,
-       TCP_CC_LIMITED_TRANSMIT,
-       TCP_CC_EARLY_RETRANSMIT,
-       TCP_CC_TLP_RECOVERY,
-       TCP_CC_TLP_RECOVER_LASTPACKET,
-       TCP_CC_DELAY_FASTRECOVERY,
-       TCP_CC_TLP_IN_FASTRECOVERY
+       TCP_CC_CWND_INIT,       /* 0 */
+       TCP_CC_INSEQ_ACK_RCVD,  /* 1 */
+       TCP_CC_ACK_RCVD,        /* 2 */
+       TCP_CC_ENTER_FASTRECOVERY, /* 3 */
+       TCP_CC_IN_FASTRECOVERY, /* 4 */
+       TCP_CC_EXIT_FASTRECOVERY,  /* 5 */
+       TCP_CC_PARTIAL_ACK,     /* 6 */
+       TCP_CC_IDLE_TIMEOUT,    /* 7 */
+       TCP_CC_REXMT_TIMEOUT,   /* 8 */
+       TCP_CC_ECN_RCVD,        /* 9 */
+       TCP_CC_BAD_REXMT_RECOVERY, /* 10 */
+       TCP_CC_OUTPUT_ERROR,    /* 11 */
+       TCP_CC_CHANGE_ALGO,     /* 12 */
+       TCP_CC_FLOW_CONTROL,    /* 13 */
+       TCP_CC_SUSPEND,         /* 14 */
+       TCP_CC_LIMITED_TRANSMIT, /* 15 */
+       TCP_CC_EARLY_RETRANSMIT, /* 16 */
+       TCP_CC_TLP_RECOVERY,    /* 17 */
+       TCP_CC_TLP_RECOVER_LASTPACKET, /* 18 */
+       TCP_CC_DELAY_FASTRECOVERY, /* 19 */
+       TCP_CC_TLP_IN_FASTRECOVERY, /* 20 */
+       TCP_CC_DSACK_BAD_REXMT  /* 21 */
 };
 
 /*
@@ -591,12 +720,15 @@ struct tcpopt {
 #define        TOF_SIGLEN      0x0080  /* signature length valid (RFC2385) */
 #define        TOF_SACK        0x0100          /* Peer sent SACK option */
 #define        TOF_MPTCP       0x0200  /* MPTCP options to be dropped */
+#define        TOF_TFO         0x0400  /* TFO cookie option present */
+#define TOF_TFOREQ     0x0800  /* TFO cookie request present */
        u_int32_t               to_tsval;
        u_int32_t               to_tsecr;
        u_int16_t       to_mss;
        u_int8_t        to_requested_s_scale;
        u_int8_t        to_nsacks;      /* number of SACK blocks */
        u_char          *to_sacks;      /* pointer to the first SACK blocks */
+       u_char          *to_tfo;        /* pointer to the TFO cookie */
 };
 
 /*
@@ -635,6 +767,11 @@ struct rmxp_tao {
 #define        TCP_RTTVAR_SHIFT        4       /* shift for rttvar; 4 bits */
 #define        TCP_DELTA_SHIFT         2       /* see tcp_input.c */
 
+/* TFO-specific defines */
+#define        TFO_COOKIE_LEN_MIN      4
+#define        TFO_COOKIE_LEN_DEFAULT  8
+#define        TFO_COOKIE_LEN_MAX      16
+
 /*
  * The initial retransmission should happen at rtt + 4 * rttvar.
  * Because of the way we do the smoothing, srtt and rttvar
@@ -767,6 +904,9 @@ struct tcpcb {
        u_int32_t t_badrxtwin;          /* window for retransmit recovery */
 };
 
+#define        tcps_ecn_setup  tcps_ecn_client_success
+#define        tcps_sent_cwr   tcps_ecn_recv_ece
+#define        tcps_sent_ece   tcps_ecn_sent_ece
 
 /*
  * TCP statistics.
@@ -924,17 +1064,61 @@ struct   tcpstat {
        u_int32_t       tcps_rto_after_pto;     /* RTO after a probe */
        u_int32_t       tcps_tlp_recovery;      /* TLP induced fast recovery */
        u_int32_t       tcps_tlp_recoverlastpkt; /* TLP recoverd last pkt */
-       u_int32_t       tcps_ecn_setup;         /* connection negotiated ECN */
-       u_int32_t       tcps_sent_cwr;          /* Sent CWR, ECE received */
-       u_int32_t       tcps_sent_ece;          /* Sent ECE notification */
+       u_int32_t       tcps_ecn_client_success; /* client-side connection negotiated ECN */
+       u_int32_t       tcps_ecn_recv_ece;      /* ECE received, sent CWR */
+       u_int32_t       tcps_ecn_sent_ece;      /* Sent ECE notification */
        u_int32_t       tcps_detect_reordering; /* Detect pkt reordering */
        u_int32_t       tcps_delay_recovery;    /* Delay fast recovery */
        u_int32_t       tcps_avoid_rxmt;        /* Retransmission was avoided */
        u_int32_t       tcps_unnecessary_rxmt;  /* Retransmission was not needed */
        u_int32_t       tcps_nostretchack;      /* disabled stretch ack algorithm on a connection */
        u_int32_t       tcps_rescue_rxmt;       /* SACK rescue retransmit */
-       u_int32_t       tcps_pto_in_recovery;   /* PTO during fast recovery */
-       u_int32_t       tcps_pmtudbh_reverted;  /* PMTU Blackhole detection, segement size reverted */
+       u_int32_t       tcps_pto_in_recovery;   /* rescue retransmit in fast recovery */
+       u_int32_t       tcps_pmtudbh_reverted;  /* PMTU Blackhole detection, segment size reverted */
+
+       /* DSACK related statistics */
+       u_int32_t       tcps_dsack_disable;     /* DSACK disabled due to n/w duplication */
+       u_int32_t       tcps_dsack_ackloss;     /* ignore DSACK due to ack loss */
+       u_int32_t       tcps_dsack_badrexmt;    /* DSACK based bad rexmt recovery */
+       u_int32_t       tcps_dsack_sent;        /* Sent DSACK notification */
+       u_int32_t       tcps_dsack_recvd;       /* Received a valid DSACK option */
+       u_int32_t       tcps_dsack_recvd_old;   /* Received an out of window DSACK option */
+
+       /* MPTCP Subflow selection stats */
+       u_int32_t       tcps_mp_sel_symtomsd;   /* By symptomsd */
+       u_int32_t       tcps_mp_sel_rtt;        /* By RTT comparison */
+       u_int32_t       tcps_mp_sel_rto;        /* By RTO comparison */
+       u_int32_t       tcps_mp_sel_peer;       /* By peer's output pattern */
+       u_int32_t       tcps_mp_num_probes;     /* Number of probes sent */
+       u_int32_t       tcps_mp_verdowngrade;   /* MPTCP version downgrade */
+       u_int32_t       tcps_drop_after_sleep;  /* drop after long AP sleep */
+       u_int32_t       tcps_probe_if;          /* probe packets after interface availability */
+       u_int32_t       tcps_probe_if_conflict; /* Can't send probe packets for interface */
+
+       u_int32_t       tcps_ecn_client_setup;  /* Attempted ECN setup from client side */
+       u_int32_t       tcps_ecn_server_setup;  /* Attempted ECN setup from server side */
+       u_int32_t       tcps_ecn_server_success; /* server-side connection negotiated ECN */
+       u_int32_t       tcps_ecn_lost_synack;   /* Lost SYN-ACK with ECN setup */
+       u_int32_t       tcps_ecn_lost_syn;      /* Lost SYN with ECN setup */
+       u_int32_t       tcps_ecn_not_supported; /* Server did not support ECN setup */
+       u_int32_t       tcps_ecn_recv_ce;       /* Received CE from the network */
+       u_int32_t       tcps_ecn_conn_recv_ce;  /* Number of connections received CE atleast once */
+       u_int32_t       tcps_ecn_conn_recv_ece; /* Number of connections received ECE atleast once */
+       u_int32_t       tcps_ecn_conn_plnoce;   /* Number of connections that received no CE and sufferred packet loss */
+       u_int32_t       tcps_ecn_conn_pl_ce;    /* Number of connections that received CE and sufferred packet loss */
+       u_int32_t       tcps_ecn_conn_nopl_ce;  /* Number of connections that received CE and sufferred no packet loss */
+
+       /* TFO-related statistics */
+       u_int32_t       tcps_tfo_syn_data_rcv;  /* Received a SYN+data with valid cookie */
+       u_int32_t       tcps_tfo_cookie_req_rcv;/* Received a TFO cookie-request */
+       u_int32_t       tcps_tfo_cookie_sent;   /* Offered a TFO-cookie to the client */
+       u_int32_t       tcps_tfo_cookie_invalid;/* Received an invalid TFO-cookie */
+       u_int32_t       tcps_tfo_cookie_req;    /* Cookie requested with the SYN */
+       u_int32_t       tcps_tfo_cookie_rcv;    /* Cookie received in a SYN/ACK */
+       u_int32_t       tcps_tfo_syn_data_sent; /* SYN+data+cookie sent */
+       u_int32_t       tcps_tfo_syn_data_acked;/* SYN+data has been acknowledged */
+       u_int32_t       tcps_tfo_syn_loss;      /* SYN+TFO has been lost and we fallback */
+       u_int32_t       tcps_tfo_blackhole;     /* TFO got blackholed by a middlebox. */
 };
 
 struct tcpstat_local {
@@ -1188,21 +1372,22 @@ struct  xtcpcb_n {
 SYSCTL_DECL(_net_inet_tcp);
 #endif /* SYSCTL_DECL */
 
-/*
- * Flags for TCP's connectx(2) user-protocol request routine.
- */
-#if MPTCP
-#define        TCP_CONNREQF_MPTCP      0x1     /* called internally by MPTCP */
-#endif /* MPTCP */
-
 extern struct inpcbhead tcb;           /* head of queue of active tcpcb's */
 extern struct inpcbinfo tcbinfo;
 extern struct tcpstat tcpstat; /* tcp statistics */
 extern int tcp_mssdflt;        /* XXX */
 extern int tcp_minmss;
+#define        TCP_FASTOPEN_SERVER 0x01
+#define        TCP_FASTOPEN_CLIENT 0x02
+
+extern int tcp_tfo_halfcnt;
+extern int tcp_tfo_backlog;
+extern int tcp_fastopen;
+extern int tcp_tfo_fallback_min;
 extern int ss_fltsz;
 extern int ss_fltsz_local;
 extern         int tcp_do_rfc3390;             /* Calculate ss_fltsz according to RFC 3390 */
+extern int tcp_do_rfc1323;
 extern int target_qdelay;
 extern u_int32_t tcp_now;              /* for RFC 1323 timestamps */ 
 extern struct timeval tcp_uptime;
@@ -1213,6 +1398,10 @@ extern int tcp_do_rfc3465;
 extern int tcp_do_rfc3465_lim2;
 extern int maxseg_unacked;
 extern int tcp_use_newreno;
+extern struct zone *tcp_reass_zone;
+extern struct zone *tcp_rxt_seg_zone;
+extern int tcp_ecn_outbound;
+extern int tcp_ecn_inbound;
 
 
 #if CONFIG_IFEF_NOWINDOWSCALE
@@ -1255,6 +1444,7 @@ struct rtentry *
         tcp_rtlookup(struct inpcb *, unsigned int);
 void    tcp_setpersist(struct tcpcb *);
 void    tcp_gc(struct inpcbinfo *);
+void     tcp_itimer(struct inpcbinfo *ipi);
 void    tcp_check_timer_state(struct tcpcb *tp);
 void    tcp_run_timerlist(void *arg1, void *arg2);
 
@@ -1265,6 +1455,8 @@ void       tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int);
 
 void tcp_sack_doack(struct tcpcb *, struct tcpopt *, struct tcphdr *,
     u_int32_t *);
+extern boolean_t tcp_sack_process_dsack(struct tcpcb *, struct tcpopt *,
+    struct tcphdr *);
 int tcp_detect_bad_rexmt(struct tcpcb *, struct tcphdr *, struct tcpopt *,
     u_int32_t rxtime);
 void    tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
@@ -1320,8 +1512,25 @@ extern int tcp_input_checksum(int, struct mbuf *, struct tcphdr *, int, int);
 extern void tcp_getconninfo(struct socket *, struct conninfo_tcp *);
 extern void add_to_time_wait(struct tcpcb *, uint32_t delay);
 extern void tcp_pmtud_revert_segment_size(struct tcpcb *tp);
+extern void tcp_rxtseg_insert(struct tcpcb *, tcp_seq, tcp_seq);
+extern struct tcp_rxt_seg *tcp_rxtseg_find(struct tcpcb *, tcp_seq, tcp_seq);
+extern void tcp_rxtseg_clean(struct tcpcb *);
+extern boolean_t tcp_rxtseg_detect_bad_rexmt(struct tcpcb *, tcp_seq);
+extern boolean_t tcp_rxtseg_dsack_for_tlp(struct tcpcb *);
+extern u_int32_t tcp_rxtseg_total_size(struct tcpcb *tp);
+extern void tcp_rexmt_save_state(struct tcpcb *tp);
+extern void tcp_interface_send_probe(u_int16_t if_index_available);
+extern void tcp_probe_connectivity(struct ifnet *ifp, u_int32_t enable);
+extern void tcp_get_connectivity_status(struct tcpcb *,
+    struct tcp_conn_status *);
+
+extern boolean_t tfo_enabled(const struct tcpcb *tp);
+extern void tcp_disable_tfo(struct tcpcb *tp);
+extern void tcp_tfo_gen_cookie(struct inpcb *inp, u_char *out, size_t blk_size);
+#define        TCP_FASTOPEN_KEYLEN 16
+
 #if MPTCP
-extern uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int);
+extern int mptcp_input_preproc(struct tcpcb *, struct mbuf *, int);
 extern void mptcp_output_csum(struct tcpcb *, struct mbuf *, int32_t, unsigned, 
     u_int64_t, u_int32_t *);
 extern int mptcp_adj_mss(struct tcpcb *, boolean_t);
index ef82a6f1546ceff5a49fabbe8ce7b2bd15ff89c0..3f4a473469b2a5b538a8fe3fd30eb87ba6f52f8a 100644 (file)
@@ -80,4 +80,36 @@ struct udphdr {
  * User-settable options (used with setsockopt).
  */
 #define        UDP_NOCKSUM     0x01    /* don't checksum outbound payloads */
-#endif
+#ifdef PRIVATE
+#define UDP_KEEPALIVE_OFFLOAD  0x02 /* Send keep-alive at a given interval */
+#endif /* PRIVATE */
+
+#ifdef PRIVATE
+/*
+ * This is a mechanism to offload keep-alive or heartbeat messages
+ * to the Wifi driver when the host processor is sleeping. The application
+ * will give a small amount of data that can be placed in the message. The
+ * application will also specify an interval at which these messages
+ * should be sent.
+ *
+ * The purpose of these messages is to detect loss of connectivity in
+ * peer-to-peer communication without keeping the host processor awake.
+ *
+ * The application will pass this data to the kernel using setsockopt. It
+ * can set the interval to 0 to disable keepalive offload. 
+ */
+#define        UDP_KEEPALIVE_OFFLOAD_DATA_SIZE 32
+
+/* Maximum keep alive interval in seconds */
+#define UDP_KEEPALIVE_INTERVAL_MAX_SECONDS     65536
+
+struct udp_keepalive_offload {
+       u_char ka_data[UDP_KEEPALIVE_OFFLOAD_DATA_SIZE];
+       u_int16_t ka_interval;          /* interval in seconds */
+       u_int8_t ka_data_len;           /* valid length of ka_data */
+       u_int8_t ka_type;               /* type of application */
+#define        UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY      0x1
+};
+
+#endif /* PRIVATE */
+#endif /* _NETINET_UDP_H */
index af468dc312e799616c8a9a664682118f3c2082e7..09a5a3631e86f7cc0287a33c21e77c210de6c9d7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -114,6 +114,10 @@ extern int esp_udp_encap_port;
 #include <net/necp.h>
 #endif /* NECP */
 
+#if FLOW_DIVERT
+#include <netinet/flow_divert.h>
+#endif /* FLOW_DIVERT */
+
 #define        DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETUDP, 0)
 #define        DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETUDP, 2)
 #define        DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETUDP, 1)
@@ -202,11 +206,11 @@ static int udp_attach(struct socket *, int, struct proc *);
 static int udp_bind(struct socket *, struct sockaddr *, struct proc *);
 static int udp_connect(struct socket *, struct sockaddr *, struct proc *);
 static int udp_connectx(struct socket *, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
-    uint32_t, void *, uint32_t);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
 static int udp_detach(struct socket *);
 static int udp_disconnect(struct socket *);
-static int udp_disconnectx(struct socket *, associd_t, connid_t);
+static int udp_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
 static int udp_send(struct socket *, int, struct mbuf *, struct sockaddr *,
     struct mbuf *, struct proc *);
 static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int,
@@ -237,6 +241,7 @@ struct pr_usrreqs udp_usrreqs = {
        .pru_sockaddr =         in_getsockaddr,
        .pru_sosend =           sosend,
        .pru_soreceive =        soreceive,
+       .pru_soreceive_list =   soreceive_list,
 };
 
 void
@@ -509,7 +514,9 @@ udp_input(struct mbuf *m, int iphlen)
 
 #if NECP
                        skipit = 0;
-                       if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
+                       if (!necp_socket_is_allowed_to_send_recv_v4(inp, 
+                           uh->uh_dport, uh->uh_sport, &ip->ip_dst,
+                           &ip->ip_src, ifp, NULL, NULL)) {
                                /* do not inject data to pcb */
                                skipit = 1;
                        }
@@ -683,7 +690,8 @@ udp_input(struct mbuf *m, int iphlen)
                goto bad;
        }
 #if NECP
-       if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport, uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL)) {
+       if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport,
+           uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, NULL, NULL)) {
                udp_unlock(inp->inp_socket, 1, 0);
                IF_UDP_STATINC(ifp, badipsec);
                goto bad;
@@ -944,7 +952,78 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
                        else
                                inp->inp_flags &= ~INP_UDP_NOCKSUM;
                        break;
+               case UDP_KEEPALIVE_OFFLOAD:
+               {
+                       struct udp_keepalive_offload ka;
+                       /*
+                        * If the socket is not connected, the stack will
+                        * not know the destination address to put in the
+                        * keepalive datagram. Return an error now instead
+                        * of failing later.
+                        */
+                       if (!(so->so_state & SS_ISCONNECTED)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       if (sopt->sopt_valsize != sizeof(ka)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       if ((error = sooptcopyin(sopt, &ka, sizeof(ka),
+                           sizeof(ka))) != 0)
+                               break;
+
+                       /* application should specify the type */
+                       if (ka.ka_type == 0)
+                               return (EINVAL);
+
+                       if (ka.ka_interval == 0) {
+                               /*
+                                * if interval is 0, disable the offload
+                                * mechanism
+                                */
+                               if (inp->inp_keepalive_data != NULL)
+                                       FREE(inp->inp_keepalive_data,
+                                           M_TEMP);
+                               inp->inp_keepalive_data = NULL;
+                               inp->inp_keepalive_datalen = 0;
+                               inp->inp_keepalive_interval = 0;
+                               inp->inp_keepalive_type = 0;
+                               inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
+                       } else {
+                               if (inp->inp_keepalive_data != NULL) {
+                                       FREE(inp->inp_keepalive_data,
+                                           M_TEMP);
+                                       inp->inp_keepalive_data = NULL;
+                               }
 
+                               inp->inp_keepalive_datalen = min(
+                                   ka.ka_data_len,
+                                   UDP_KEEPALIVE_OFFLOAD_DATA_SIZE);
+                               if (inp->inp_keepalive_datalen > 0) {
+                                       MALLOC(inp->inp_keepalive_data,
+                                           u_int8_t *, 
+                                           inp->inp_keepalive_datalen,
+                                           M_TEMP, M_WAITOK);
+                                       if (inp->inp_keepalive_data == NULL) {
+                                               inp->inp_keepalive_datalen = 0;
+                                               error = ENOMEM;
+                                               break;
+                                       }
+                                       bcopy(ka.ka_data,
+                                           inp->inp_keepalive_data,
+                                           inp->inp_keepalive_datalen);
+                               } else {
+                                       inp->inp_keepalive_datalen = 0;
+                               }
+                               inp->inp_keepalive_interval =
+                                   min(UDP_KEEPALIVE_INTERVAL_MAX_SECONDS,
+                                   ka.ka_interval);
+                               inp->inp_keepalive_type = ka.ka_type;
+                               inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
+                       }
+                       break;
+               }
                case SO_FLUSH:
                        if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
                            sizeof (optval))) != 0)
@@ -1516,7 +1595,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
         * Calculate data length and get a mbuf
         * for UDP and IP headers.
         */
-       M_PREPEND(m, sizeof (struct udpiphdr), M_DONTWAIT);
+       M_PREPEND(m, sizeof (struct udpiphdr), M_DONTWAIT, 1);
        if (m == 0) {
                error = ENOBUFS;
                goto abort;
@@ -1553,19 +1632,21 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
 
        KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport,
                     ui->ui_src.s_addr, ui->ui_dst.s_addr, ui->ui_ulen);
-       
+
 #if NECP
        {
                necp_kernel_policy_id policy_id;
-               if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport, &laddr, &faddr, NULL, &policy_id)) {
+               u_int32_t route_rule_id;
+               if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport,
+                   &laddr, &faddr, NULL, &policy_id, &route_rule_id)) {
                        error = EHOSTUNREACH;
                        goto abort;
                }
 
-               necp_mark_packet_from_socket(m, inp, policy_id);
+               necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id);
        }
 #endif /* NECP */
-       
+
 #if IPSEC
        if (inp->inp_sp != NULL && ipsec_setsocket(m, inp->inp_socket) != 0) {
                error = ENOBUFS;
@@ -1667,8 +1748,17 @@ abort:
                 * If the destination route is unicast, update outifp with
                 * that of the route interface used by IP.
                 */
-               if (rt != NULL && (outifp = rt->rt_ifp) != inp->inp_last_outifp)
-                       inp->inp_last_outifp = outifp;  /* no reference needed */
+               if (rt != NULL &&
+                   (outifp = rt->rt_ifp) != inp->inp_last_outifp) {
+                       inp->inp_last_outifp = outifp; /* no reference needed */
+
+                       so->so_pktheadroom = P2ROUNDUP(
+                           sizeof(struct udphdr) +
+                           sizeof(struct ip) +
+                           ifnet_hdrlen(outifp) +
+                           ifnet_packetpreamblelen(outifp),
+                           sizeof(u_int32_t));
+               }
        } else {
                ROUTE_RELEASE(&inp->inp_route);
        }
@@ -1794,12 +1884,8 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
                return (EAFNOSUPPORT);
 
        inp = sotoinpcb(so);
-       if (inp == NULL
-#if NECP
-               || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
-               )
-               return (inp == NULL ? EINVAL : EPROTOTYPE);
+       if (inp == NULL)
+               return (EINVAL);
        error = in_pcbbind(inp, nam, p);
        return (error);
 }
@@ -1811,14 +1897,29 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
        int error;
 
        inp = sotoinpcb(so);
-       if (inp == NULL
-#if NECP
-               || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
-               )
-               return (inp == NULL ? EINVAL : EPROTOTYPE);
+       if (inp == NULL)
+               return (EINVAL);
        if (inp->inp_faddr.s_addr != INADDR_ANY)
                return (EISCONN);
+
+#if NECP
+#if FLOW_DIVERT
+       if (necp_socket_should_use_flow_divert(inp)) {
+               uint32_t fd_ctl_unit =
+                   necp_socket_get_flow_divert_control_unit(inp);
+               if (fd_ctl_unit > 0) {
+                       error = flow_divert_pcb_init(so, fd_ctl_unit);
+                       if (error == 0) {
+                               error = flow_divert_connect_out(so, nam, p);
+                       }
+               } else {
+                       error = ENETDOWN;
+               }
+               return (error);
+       }
+#endif /* FLOW_DIVERT */
+#endif /* NECP */
+
        error = in_pcbconnect(inp, nam, p, IFSCOPE_NONE, NULL);
        if (error == 0) {
                soisconnected(so);
@@ -1831,13 +1932,15 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 int
 udp_connectx_common(struct socket *so, int af,
     struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl,
-    struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid,
-    uint32_t flags, void *arg, uint32_t arglen)
+    struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
+    uint32_t flags, void *arg, uint32_t arglen,
+    struct uio *uio, user_ssize_t *bytes_written)
 {
 #pragma unused(aid, flags, arg, arglen)
        struct sockaddr_entry *src_se = NULL, *dst_se = NULL;
        struct inpcb *inp = sotoinpcb(so);
        int error;
+       user_ssize_t datalen = 0;
 
        if (inp == NULL)
                return (EINVAL);
@@ -1855,7 +1958,8 @@ udp_connectx_common(struct socket *so, int af,
        VERIFY(src_se == NULL || src_se->se_addr->sa_family == af);
 
 #if NECP
-       inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL, dst_se ? dst_se->se_addr : NULL, ifscope);
+       inp_update_necp_policy(inp, src_se ? src_se->se_addr : NULL,
+           dst_se ? dst_se->se_addr : NULL, ifscope);
 #endif /* NECP */
        
        /* bind socket to the specified interface, if requested */
@@ -1885,8 +1989,39 @@ udp_connectx_common(struct socket *so, int af,
                /* NOTREACHED */
        }
 
+       if (error != 0)
+               return (error);
+
+       /*
+        * If there is data, copy it. DATA_IDEMPOTENT is ignored.
+        * CONNECT_RESUME_ON_READ_WRITE is ignored. 
+        */
+       if (uio != NULL) {
+               socket_unlock(so, 0);
+
+               VERIFY(bytes_written != NULL);
+
+               datalen = uio_resid(uio);
+               error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
+                   (uio_t)uio, NULL, NULL, 0);
+               socket_lock(so, 0);
+
+               /* If error returned is EMSGSIZE, for example, disconnect */
+               if (error == 0 || error == EWOULDBLOCK)
+                       *bytes_written = datalen - uio_resid(uio);
+               else
+                       (void)so->so_proto->pr_usrreqs->pru_disconnectx(so,
+                           SAE_ASSOCID_ANY, SAE_CONNID_ANY);
+               /*
+                * mask the EWOULDBLOCK error so that the caller
+                * knows that atleast the connect was successful.
+                */
+               if (error == EWOULDBLOCK)
+                       error = 0;
+       }
+
        if (error == 0 && pcid != NULL)
-               *pcid = 1;      /* there is only 1 connection for UDP */
+               *pcid = 1;      /* there is only 1 connection for UDP */
 
        return (error);
 }
@@ -1894,11 +2029,11 @@ udp_connectx_common(struct socket *so, int af,
 static int
 udp_connectx(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
        return (udp_connectx_common(so, AF_INET, src_sl, dst_sl,
-           p, ifscope, aid, pcid, flags, arg, arglen));
+           p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written));
 }
 
 static int
@@ -1953,10 +2088,10 @@ udp_disconnect(struct socket *so)
 }
 
 static int
-udp_disconnectx(struct socket *so, associd_t aid, connid_t cid)
+udp_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
 {
 #pragma unused(cid)
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL)
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL)
                return (EINVAL);
 
        return (udp_disconnect(so));
@@ -1966,22 +2101,29 @@ static int
 udp_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct proc *p)
 {
+#ifndef FLOW_DIVERT
 #pragma unused(flags)
+#endif /* !(FLOW_DIVERT) */
        struct inpcb *inp;
 
        inp = sotoinpcb(so);
-       if (inp == NULL
-#if NECP
-               || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
-               ) {
+       if (inp == NULL) {
                if (m != NULL)
                        m_freem(m);
                if (control != NULL)
                        m_freem(control);
-               return (inp == NULL ? EINVAL : EPROTOTYPE);
+               return (EINVAL);
        }
 
+#if NECP
+#if FLOW_DIVERT
+       if (necp_socket_should_use_flow_divert(inp)) {
+               /* Implicit connect */
+               return (flow_divert_implicit_data_out(so, flags, m, addr, control, p));
+       }
+#endif /* FLOW_DIVERT */
+#endif /* NECP */
+
        return (udp_output(inp, m, addr, control, p));
 }
 
@@ -2251,3 +2393,258 @@ udp_input_checksum(struct mbuf *m, struct udphdr *uh, int off, int ulen)
 
        return (0);
 }
+
+extern void
+udp_fill_keepalive_offload_frames(ifnet_t ifp,
+     struct ifnet_keepalive_offload_frame *frames_array,
+     u_int32_t frames_array_count, size_t frame_data_offset,
+     u_int32_t *used_frames_count);
+
+void
+udp_fill_keepalive_offload_frames(ifnet_t ifp,
+    struct ifnet_keepalive_offload_frame *frames_array,
+    u_int32_t frames_array_count, size_t frame_data_offset,
+    u_int32_t *used_frames_count)
+{
+       struct inpcb *inp;
+       inp_gen_t gencnt;
+       u_int32_t frame_index = *used_frames_count;
+
+       if (ifp == NULL || frames_array == NULL ||
+           frames_array_count == 0 ||
+           frame_index >= frames_array_count ||
+           frame_data_offset >= IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE)
+               return;
+
+       lck_rw_lock_shared(udbinfo.ipi_lock);
+       gencnt = udbinfo.ipi_gencnt;
+       LIST_FOREACH(inp, udbinfo.ipi_listhead, inp_list) {
+               struct socket *so;
+               u_int8_t *data;
+               struct ifnet_keepalive_offload_frame *frame;
+               struct mbuf *m = NULL;
+
+               if (frame_index >= frames_array_count)
+                       break;
+
+               if (inp->inp_gencnt > gencnt ||
+                   inp->inp_state == INPCB_STATE_DEAD)
+                       continue;
+
+               if ((so = inp->inp_socket) == NULL ||
+                   (so->so_state & SS_DEFUNCT))
+                       continue;
+               /*
+                * check for keepalive offload flag without socket
+                * lock to avoid a deadlock
+                */
+               if (!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD)) {
+                       continue;
+               }
+
+               udp_lock(so, 1, 0);
+               if (!(inp->inp_vflag & (INP_IPV4 | INP_IPV6))) {
+                       udp_unlock(so, 1, 0);
+                       continue;
+               }
+               if ((inp->inp_vflag & INP_IPV4) &&
+                   (inp->inp_laddr.s_addr == INADDR_ANY ||
+                   inp->inp_faddr.s_addr == INADDR_ANY)) {
+                       udp_unlock(so, 1, 0);
+                       continue;
+               }
+               if ((inp->inp_vflag & INP_IPV6) &&
+                   (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+                   IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))) {
+                       udp_unlock(so, 1, 0);
+                       continue;
+               }
+               if (inp->inp_lport == 0 || inp->inp_fport == 0) {
+                       udp_unlock(so, 1, 0);
+                       continue;
+               }
+               if (inp->inp_last_outifp == NULL ||
+                   inp->inp_last_outifp->if_index != ifp->if_index) {
+                       udp_unlock(so, 1, 0);
+                       continue;
+               }
+               if ((inp->inp_vflag & INP_IPV4)) {
+                       if ((frame_data_offset + sizeof(struct udpiphdr) + 
+                           inp->inp_keepalive_datalen) >
+                           IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+                               udp_unlock(so, 1, 0);
+                               continue;
+                       }
+                       if ((sizeof(struct udpiphdr) +
+                           inp->inp_keepalive_datalen) > _MHLEN) {
+                               udp_unlock(so, 1, 0);
+                               continue;
+                       }
+               } else {
+                       if ((frame_data_offset + sizeof(struct ip6_hdr) +
+                           sizeof(struct udphdr) +
+                           inp->inp_keepalive_datalen) >
+                           IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
+                               udp_unlock(so, 1, 0);
+                               continue;
+                       }
+                       if ((sizeof(struct ip6_hdr) + sizeof(struct udphdr) +
+                           inp->inp_keepalive_datalen) > _MHLEN) {
+                               udp_unlock(so, 1, 0);
+                               continue;
+                       }
+               }
+               MGETHDR(m, M_WAIT, MT_HEADER);
+               if (m == NULL) {
+                       udp_unlock(so, 1, 0);
+                       continue;
+               }
+               /*
+                * This inp has all the information that is needed to
+                * generate an offload frame.
+                */
+               if (inp->inp_vflag & INP_IPV4) {
+                       struct ip *ip;
+                       struct udphdr *udp;
+
+                       frame = &frames_array[frame_index];
+                       frame->length = frame_data_offset +
+                           sizeof(struct udpiphdr) +
+                           inp->inp_keepalive_datalen;
+                       frame->ether_type =
+                           IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4;
+                       frame->interval = inp->inp_keepalive_interval;
+                       switch (inp->inp_keepalive_type) {
+                       case UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY:
+                               frame->type =
+                                   IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY;
+                               break;
+                       default:
+                               break;
+                       }
+                       data = mtod(m, u_int8_t *);
+                       bzero(data, sizeof(struct udpiphdr));
+                       ip = (__typeof__(ip))(void *)data;
+                       udp = (__typeof__(udp))(void *) (data +
+                           sizeof(struct ip));
+                       m->m_len = sizeof(struct udpiphdr);
+                       data = data + sizeof(struct udpiphdr);
+                       if (inp->inp_keepalive_datalen > 0 &&
+                           inp->inp_keepalive_data != NULL) {
+                               bcopy(inp->inp_keepalive_data, data,
+                                   inp->inp_keepalive_datalen);
+                               m->m_len += inp->inp_keepalive_datalen;
+                       }
+                       m->m_pkthdr.len = m->m_len;
+
+                       ip->ip_v = IPVERSION;
+                       ip->ip_hl = (sizeof(struct ip) >> 2);
+                       ip->ip_p = IPPROTO_UDP;
+                       ip->ip_len = htons(sizeof(struct udpiphdr) +
+                           (u_short)inp->inp_keepalive_datalen);
+                       ip->ip_ttl = inp->inp_ip_ttl;
+                       ip->ip_tos = inp->inp_ip_tos;
+                       ip->ip_src = inp->inp_laddr;
+                       ip->ip_dst = inp->inp_faddr;
+                       ip->ip_sum = in_cksum_hdr_opt(ip);
+
+                       udp->uh_sport = inp->inp_lport;
+                       udp->uh_dport = inp->inp_fport;
+                       udp->uh_ulen = htons(sizeof(struct udphdr) +
+                           (u_short)inp->inp_keepalive_datalen);
+
+                       if (!(inp->inp_flags & INP_UDP_NOCKSUM)) {
+                               udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
+                                   ip->ip_dst.s_addr,
+                                   htons(sizeof(struct udphdr) +
+                                   (u_short)inp->inp_keepalive_datalen +
+                                   IPPROTO_UDP));
+                               m->m_pkthdr.csum_flags = CSUM_UDP;
+                               m->m_pkthdr.csum_data = offsetof(struct udphdr,
+                                   uh_sum);
+                       }
+                       m->m_pkthdr.pkt_proto = IPPROTO_UDP;
+                       in_delayed_cksum(m);
+                       bcopy(m->m_data, frame->data + frame_data_offset,
+                           m->m_len);
+               } else {
+                       struct ip6_hdr *ip6;
+                       struct udphdr *udp6;
+
+                       VERIFY(inp->inp_vflag & INP_IPV6);
+                       frame = &frames_array[frame_index];
+                       frame->length = frame_data_offset +
+                           sizeof(struct ip6_hdr) +
+                           sizeof(struct udphdr) +
+                           inp->inp_keepalive_datalen;
+                       frame->ether_type =
+                           IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6;
+                       frame->interval = inp->inp_keepalive_interval;
+                       switch (inp->inp_keepalive_type) {
+                       case UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY:
+                               frame->type =
+                                   IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY;
+                               break;
+                       default:
+                               break;
+                       }
+                       data = mtod(m, u_int8_t *);
+                       bzero(data, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
+                       ip6 = (__typeof__(ip6))(void *)data;
+                       udp6 = (__typeof__(udp6))(void *)(data +
+                           sizeof(struct ip6_hdr));
+                       m->m_len = sizeof(struct ip6_hdr) +
+                           sizeof(struct udphdr);
+                       data = data + (sizeof(struct ip6_hdr) +
+                           sizeof(struct udphdr));
+                       if (inp->inp_keepalive_datalen > 0 &&
+                           inp->inp_keepalive_data != NULL) {
+                               bcopy(inp->inp_keepalive_data, data,
+                                   inp->inp_keepalive_datalen);
+                               m->m_len += inp->inp_keepalive_datalen;
+                       }
+                       m->m_pkthdr.len = m->m_len;
+                       ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
+                       ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+                       ip6->ip6_vfc |= IPV6_VERSION;
+                       ip6->ip6_nxt = IPPROTO_UDP;
+                       ip6->ip6_hlim = ip6_defhlim;
+                       ip6->ip6_plen = htons(sizeof(struct udphdr) +
+                           (u_short)inp->inp_keepalive_datalen);
+                       ip6->ip6_src = inp->in6p_laddr;
+                       if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
+                               ip6->ip6_src.s6_addr16[1] = 0;
+
+                       ip6->ip6_dst = inp->in6p_faddr;
+                       if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
+                               ip6->ip6_dst.s6_addr16[1] = 0;
+
+                       udp6->uh_sport = inp->in6p_lport;
+                       udp6->uh_dport = inp->in6p_fport;
+                       udp6->uh_ulen = htons(sizeof(struct udphdr) +
+                           (u_short)inp->inp_keepalive_datalen);
+                       if (!(inp->inp_flags & INP_UDP_NOCKSUM)) {
+                               udp6->uh_sum = in6_pseudo(&ip6->ip6_src,
+                                   &ip6->ip6_dst,
+                                   htonl(sizeof(struct udphdr) +
+                                   (u_short)inp->inp_keepalive_datalen +
+                                   IPPROTO_UDP));
+                               m->m_pkthdr.csum_flags = CSUM_UDPIPV6;
+                               m->m_pkthdr.csum_data = offsetof(struct udphdr,
+                                   uh_sum);
+                       }
+                       m->m_pkthdr.pkt_proto = IPPROTO_UDP;
+                       in6_delayed_cksum(m);
+                       bcopy(m->m_data, frame->data + frame_data_offset,
+                           m->m_len);
+               }
+               if (m != NULL) {
+                       m_freem(m);
+                       m = NULL;
+               }
+               frame_index++;
+               udp_unlock(so, 1, 0);
+       }
+       lck_rw_done(udbinfo.ipi_lock);
+       *used_frames_count = frame_index;
+}
index 42d56a3c825dadb22104c68a2aec60c4eaaa92f2..c82931b50b50b61043e94c56457279013355e5c7 100644 (file)
@@ -168,8 +168,8 @@ extern int udp_ctloutput(struct socket *, struct sockopt *);
 extern void udp_init(struct protosw *, struct domain *);
 extern void udp_input(struct mbuf *, int);
 extern int udp_connectx_common(struct socket *, int, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
-    uint32_t, void *, uint32_t);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, uint32_t, struct uio*, user_ssize_t *);
 extern void udp_notify(struct inpcb *inp, int errno);
 extern int udp_shutdown(struct socket *so);
 extern int udp_lock(struct socket *, int, void *);
index ae89bdbdad93df1bd8d5d1280d342337fa5d0cc3..2e6677f6c3c92319a9858019cd67510d7d3ca42e 100644 (file)
@@ -10,27 +10,34 @@ include $(MakeInc_def)
 DATAFILES = \
        ah.h ipsec.h \
        esp.h in6.h ipcomp.h raw_ip6.h \
-       in6_var.h nd6.h
+       scope6_var.h in6_var.h nd6.h
 
 PRIVATE_DATAFILES = \
-       in6_pcb.h ip6_var.h mld6_var.h ip6_fw.h
+       in6.h \
+       in6_pcb.h \
+       in6_var.h \
+       ip6_fw.h \
+       ip6_var.h \
+       mld6_var.h \
+       nd6.h \
+       scope6_var.h
 
 PRIVATE_KERNELFILES = \
        ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \
        ip6_ecn.h ip6protosw.h ipcomp6.h ipsec6.h \
-       scope6_var.h tcp6_var.h udp6_var.h
+       tcp6_var.h udp6_var.h
 
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = netinet6
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${DATAFILES}
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
-INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
+INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES})
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index 417c67360f28079d6f158c7a4626b29afa582d1d..60a6e5b691d8db47d92060d96b720997c548ca1c 100644 (file)
@@ -961,10 +961,9 @@ ah_hmac_sha2_384_init(state, sav)
 
        state->sav = sav;
        state->foo = (void *)_MALLOC(128 + 128 + sizeof(SHA384_CTX),
-           M_TEMP, M_NOWAIT);
+           M_TEMP, M_NOWAIT | M_ZERO);
        if (!state->foo)
                return ENOBUFS;
-       bzero(state->foo, 128 + 128 + sizeof(SHA384_CTX));
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 128);
@@ -1104,10 +1103,9 @@ ah_hmac_sha2_512_init(state, sav)
 
        state->sav = sav;
        state->foo = (void *)_MALLOC(128 + 128 + sizeof(SHA512_CTX),
-           M_TEMP, M_NOWAIT);
+           M_TEMP, M_NOWAIT | M_ZERO);
        if (!state->foo)
                return ENOBUFS;
-       bzero(state->foo, 128 + 128 + sizeof(SHA512_CTX));
 
        ipad = (u_char *)state->foo;
        opad = (u_char *)(ipad + 128);
index 00967821bec229e10c605f82d770d2eaa7f8116e..28f53d5cc3d236141488a08423bb5acdc6e42bd7 100644 (file)
@@ -430,7 +430,10 @@ ah4_input(struct mbuf *m, int off)
                }
                ip = mtod(m, struct ip *);
                /* ECN consideration. */
-               ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos);
+               if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) {
+                       IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+                       goto fail;
+               }
                if (!key_checktunnelsanity(sav, AF_INET,
                            (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
                        ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch "
@@ -607,7 +610,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
        struct secasvar *sav = NULL;
        u_int16_t nxt;
        size_t stripsiz = 0;
-
+       sa_family_t ifamily;
 
        IP6_EXTHDR_CHECK(m, off, sizeof(struct ah), {return IPPROTO_DONE;});
        ah = (struct ah *)(void *)(mtod(m, caddr_t) + off);
@@ -816,7 +819,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
                /* RFC 2402 */
                stripsiz = sizeof(struct newah) + siz1;
        }
-       if (ipsec6_tunnel_validate(m, off + stripsiz, nxt, sav)) {
+       if (ipsec6_tunnel_validate(m, off + stripsiz, nxt, sav, &ifamily)) {
                ifaddr_t ifa;
                struct sockaddr_storage addr;
 
@@ -829,6 +832,12 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
                 */
                u_int32_t flowinfo;     /*net endian*/
 
+               if (ifamily == AF_INET) {
+                       ipseclog((LOG_NOTICE, "ipsec tunnel protocol mismatch "
+                           "in IPv6 AH input: %s\n", ipsec_logsastr(sav)));
+                       goto fail;
+               }
+
                flowinfo = ip6->ip6_flow;
                m_adj(m, off + stripsiz);
                if (m->m_len < sizeof(*ip6)) {
@@ -844,7 +853,10 @@ ah6_input(struct mbuf **mp, int *offp, int proto)
                }
                ip6 = mtod(m, struct ip6_hdr *);
                /* ECN consideration. */
-               ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow);
+               if (ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow) == 0) {
+                       IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
+                       goto fail;
+               }
                if (!key_checktunnelsanity(sav, AF_INET6,
                            (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
                        ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch "
index 048a792cbcf5b9aa1a40cba9aca7f6a2583bd754..b2deaa2f3f95d29c99403ef459537935923247c1 100644 (file)
@@ -117,6 +117,10 @@ struct esp_algorithm {
                struct secasvar *, u_int8_t *, u_int8_t *);
        int (*blockencrypt)(const struct esp_algorithm *,
                struct secasvar *, u_int8_t *, u_int8_t *);
+       /* For Authenticated Encryption Methods */
+       size_t icvlen;
+       int (*finalizedecrypt)(struct secasvar *, u_int8_t *, uint);
+       int (*finalizeencrypt)(struct secasvar *, u_int8_t *, uint);
 };
 
 extern const struct esp_algorithm *esp_algorithm_lookup(int);
index b5236fdd7db9c6bdb978f1298d68b20c36471fb1..90c01fe194f91235c06badddd102b90cd1fd8534 100644 (file)
@@ -142,37 +142,54 @@ static int esp_cbc_decrypt(struct mbuf *, size_t,
        struct secasvar *, const struct esp_algorithm *, int);
 static int esp_cbc_encrypt(struct mbuf *, size_t, size_t,
        struct secasvar *, const struct esp_algorithm *, int);
+static int esp_gcm_mature(struct secasvar *);
 
 #define MAXIVLEN       16
 
+#define ESP_AESGCM_KEYLEN128 160 // 16-bytes key + 4 bytes salt
+#define ESP_AESGCM_KEYLEN192 224 // 24-bytes key + 4 bytes salt
+#define ESP_AESGCM_KEYLEN256 288 // 32-bytes key + 4 bytes salt
+
 static const struct esp_algorithm des_cbc =
        { 8, -1, esp_descbc_mature, 64, 64, esp_des_schedlen,
                "des-cbc",
                esp_descbc_ivlen, esp_cbc_decrypt,
                esp_cbc_encrypt, esp_des_schedule,
-               esp_des_blockdecrypt, esp_des_blockencrypt, };
+               esp_des_blockdecrypt, esp_des_blockencrypt,
+               0, 0, 0 };
 static const struct esp_algorithm des3_cbc =
        { 8, 8, esp_cbc_mature, 192, 192, esp_3des_schedlen,
                "3des-cbc",
                esp_common_ivlen, esp_cbc_decrypt,
                esp_cbc_encrypt, esp_3des_schedule,
-               esp_3des_blockdecrypt, esp_3des_blockencrypt, };
+               esp_3des_blockdecrypt, esp_3des_blockencrypt,
+               0, 0, 0 };
 static const struct esp_algorithm null_esp =
        { 1, 0, esp_null_mature, 0, 2048, 0, "null",
                esp_common_ivlen, esp_null_decrypt,
-               esp_null_encrypt, NULL, NULL, NULL };
+               esp_null_encrypt, NULL, NULL, NULL,
+               0, 0, 0 };
 static const struct esp_algorithm aes_cbc =
        { 16, 16, esp_cbc_mature, 128, 256, esp_aes_schedlen,
                "aes-cbc",
                esp_common_ivlen, esp_cbc_decrypt_aes,
                esp_cbc_encrypt_aes, esp_aes_schedule,
-               0, 0 };
+               0, 0,
+               0, 0, 0 };
+static const struct esp_algorithm aes_gcm =
+       { 4, 8, esp_gcm_mature, ESP_AESGCM_KEYLEN128, ESP_AESGCM_KEYLEN256, esp_gcm_schedlen,
+               "aes-gcm",
+               esp_common_ivlen, esp_gcm_decrypt_aes,
+               esp_gcm_encrypt_aes, esp_gcm_schedule,
+               0, 0,
+               16, esp_gcm_decrypt_finalize, esp_gcm_encrypt_finalize};
 
 static const struct esp_algorithm *esp_algorithms[] = {
        &des_cbc,
        &des3_cbc,
        &null_esp,
-       &aes_cbc
+       &aes_cbc,
+       &aes_gcm,
 };
 
 const struct esp_algorithm *
@@ -188,6 +205,8 @@ esp_algorithm_lookup(idx)
                return &null_esp;
        case SADB_X_EALG_RIJNDAELCBC:
                return &aes_cbc;
+       case SADB_X_EALG_AES_GCM:
+               return &aes_gcm;
        default:
                return NULL;
        }
@@ -468,6 +487,62 @@ esp_cbc_mature(sav)
        return 0;
 }
 
+static int
+esp_gcm_mature(sav)
+       struct secasvar *sav;
+{
+       int keylen;
+       const struct esp_algorithm *algo;
+
+       if (sav->flags & SADB_X_EXT_OLD) {
+               ipseclog((LOG_ERR,
+                   "esp_gcm_mature: algorithm incompatible with esp-old\n"));
+               return 1;
+       }
+       if (sav->flags & SADB_X_EXT_DERIV) {
+               ipseclog((LOG_ERR,
+                   "esp_gcm_mature: algorithm incompatible with derived\n"));
+               return 1;
+       }
+
+       if (!sav->key_enc) {
+               ipseclog((LOG_ERR, "esp_gcm_mature: no key is given.\n"));
+               return 1;
+       }
+
+       algo = esp_algorithm_lookup(sav->alg_enc);
+       if (!algo) {
+               ipseclog((LOG_ERR,
+                   "esp_gcm_mature: unsupported algorithm.\n"));
+               return 1;
+       }
+
+       keylen = sav->key_enc->sadb_key_bits;
+       if (keylen < algo->keymin || algo->keymax < keylen) {
+               ipseclog((LOG_ERR,
+                   "esp_gcm_mature %s: invalid key length %d.\n",
+                   algo->name, sav->key_enc->sadb_key_bits));
+               return 1;
+       }
+       switch (sav->alg_enc) {
+       case SADB_X_EALG_AES_GCM:
+               /* allows specific key sizes only */
+               if (!(keylen == ESP_AESGCM_KEYLEN128 || keylen == ESP_AESGCM_KEYLEN192 || keylen == ESP_AESGCM_KEYLEN256)) {
+                       ipseclog((LOG_ERR,
+                           "esp_gcm_mature %s: invalid key length %d.\n",
+                           algo->name, keylen));
+                       return 1;
+               }
+               break;
+       default:
+               ipseclog((LOG_ERR,
+                         "esp_gcm_mature %s: invalid algo %d.\n", sav->alg_enc));
+               return 1;
+       }
+
+       return 0;
+}
+
 static int
 esp_3des_schedlen(
        __unused const struct esp_algorithm *algo)
index c8a809490c863549cfd8ba5a469e9f0f45cbc516..277e6963ed5bae230ed785569387999cedbc438d 100644 (file)
@@ -138,7 +138,7 @@ extern lck_mtx_t  *sadb_mutex;
                ? sizeof(struct newesp) : sizeof(struct esp))
 
 static struct ip *
-esp4_input_strip_UDP_encap (struct mbuf *m, int iphlen)
+esp4_input_strip_udp_encap (struct mbuf *m, int iphlen)
 {
        // strip the udp header that's encapsulating ESP
        struct ip *ip;
@@ -155,6 +155,24 @@ esp4_input_strip_UDP_encap (struct mbuf *m, int iphlen)
        return ip;
 }
 
+static struct ip6_hdr *
+esp6_input_strip_udp_encap (struct mbuf *m, int ip6hlen)
+{
+       // strip the udp header that's encapsulating ESP
+       struct ip6_hdr *ip6;
+       size_t     stripsiz = sizeof(struct udphdr);
+
+       ip6 = mtod(m, __typeof__(ip6));
+       ovbcopy((caddr_t)ip6, (caddr_t)(((u_char *)ip6) + stripsiz), ip6hlen);
+       m->m_data += stripsiz;
+       m->m_len -= stripsiz;
+       m->m_pkthdr.len -= stripsiz;
+       ip6 = mtod(m, __typeof__(ip6));
+       ip6->ip6_plen = ip6->ip6_plen - stripsiz;
+       ip6->ip6_nxt = IPPROTO_ESP;
+       return ip6;
+}
+
 void
 esp4_input(m, off)
        struct mbuf *m;
@@ -257,6 +275,16 @@ esp4_input(m, off)
        }
 
        seq = ntohl(((struct newesp *)esp)->esp_seq);
+
+       /* Save ICV from packet for verification later */
+       size_t siz = 0;
+       unsigned char saved_icv[AH_MAXSUMSIZE];
+       if (algo->finalizedecrypt) {
+               siz = algo->icvlen;
+               m_copydata(m, m->m_pkthdr.len - siz, siz, (caddr_t) saved_icv);
+               goto delay_icv;
+       }
+
        if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay
         && (sav->alg_auth && sav->key_auth)))
                goto noreplaycheck;
@@ -283,7 +311,6 @@ esp4_input(m, off)
        u_char sum0[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        u_char sum[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        const struct ah_algorithm *sumalgo;
-       size_t siz;
 
        sumalgo = ah_algorithm_lookup(sav->alg_auth);
        if (!sumalgo)
@@ -317,6 +344,8 @@ esp4_input(m, off)
                goto bad;
        }
 
+delay_icv:
+
        /* strip off the authentication data */
        m_adj(m, -siz);
        ip = mtod(m, struct ip *);
@@ -398,6 +427,23 @@ noreplaycheck:
 
        m->m_flags |= M_DECRYPTED;
 
+       if (algo->finalizedecrypt)
+        {
+           unsigned char tag[algo->icvlen];
+           if ((*algo->finalizedecrypt)(sav, tag, algo->icvlen)) {
+               ipseclog((LOG_ERR, "packet decryption ICV failure\n"));
+               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+               KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0);
+               goto bad;
+           }     
+           if (memcmp(saved_icv, tag, algo->icvlen)) {
+               ipseclog((LOG_ERR, "packet decryption ICV mismatch\n"));
+               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+               KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0);
+               goto bad;
+           }
+       }
+
        /*
         * find the trailer of the ESP.
         */
@@ -448,7 +494,7 @@ noreplaycheck:
                                sav->remote_ike_port = ntohs(encap_uh->uh_sport);
                        }
                }
-               ip = esp4_input_strip_UDP_encap(m, off);
+               ip = esp4_input_strip_udp_encap(m, off);
                esp = (struct esp *)(void *)(((u_int8_t *)ip) + off);
        }
 
@@ -491,7 +537,10 @@ noreplaycheck:
                        }
                        ip = mtod(m, struct ip *);
                        /* ECN consideration. */
-                       ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos);
+                       if (ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos) == 0) {
+                               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+                               goto bad;
+                       }
                        if (!key_checktunnelsanity(sav, AF_INET,
                            (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
                                ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
@@ -533,8 +582,10 @@ noreplaycheck:
                        ip6 = mtod(m, struct ip6_hdr *);
 
                        /* ECN consideration. */
-                       /* XXX To be fixed later if needed */
-                       //  ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos);
+                       if (ip64_ecn_egress(ip4_ipsec_ecn, &tos, &ip6->ip6_flow) == 0) {
+                               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+                               goto bad;
+                       }
 
                        if (!key_checktunnelsanity(sav, AF_INET6,
                            (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
@@ -747,6 +798,7 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
 #pragma unused(proto)
        struct mbuf *m = *mp;
        int off = *offp;
+       struct ip *ip;
        struct ip6_hdr *ip6;
        struct esp *esp;
        struct esptail esptail;
@@ -755,9 +807,11 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
        struct secasvar *sav = NULL;
        size_t taillen;
        u_int16_t nxt;
+       char *nproto;
        const struct esp_algorithm *algo;
        int ivlen;
        size_t esplen;
+       sa_family_t ifamily;
 
        /* sanity check for alignment. */
        if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) {
@@ -789,6 +843,14 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
                goto bad;
        }
 
+       nproto = ip6_get_prevhdr(m, off);
+       if (nproto == NULL || (*nproto != IPPROTO_ESP &&
+           !(*nproto == IPPROTO_UDP && off >= sizeof(struct udphdr)))) {
+               ipseclog((LOG_DEBUG, "IPv6 ESP input: invalid protocol type\n"));
+               IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
+               goto bad;
+       }
+
        /* find the sassoc. */
        spi = esp->esp_spi;
 
@@ -832,6 +894,15 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
 
        seq = ntohl(((struct newesp *)esp)->esp_seq);
 
+       /* Save ICV from packet for verification later */
+       size_t siz = 0;
+       unsigned char saved_icv[AH_MAXSUMSIZE];
+       if (algo->finalizedecrypt) {
+               siz = algo->icvlen;
+               m_copydata(m, m->m_pkthdr.len - siz, siz, (caddr_t) saved_icv);
+               goto delay_icv;
+       }
+
        if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay
         && (sav->alg_auth && sav->key_auth)))
                goto noreplaycheck;
@@ -858,7 +929,6 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
        u_char sum0[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        u_char sum[AH_MAXSUMSIZE] __attribute__((aligned(4)));
        const struct ah_algorithm *sumalgo;
-       size_t siz;
 
        sumalgo = ah_algorithm_lookup(sav->alg_auth);
        if (!sumalgo)
@@ -892,6 +962,8 @@ esp6_input(struct mbuf **mp, int *offp, int proto)
                goto bad;
        }
 
+delay_icv:
+
        /* strip off the authentication data */
        m_adj(m, -siz);
        ip6 = mtod(m, struct ip6_hdr *);
@@ -969,6 +1041,23 @@ noreplaycheck:
 
        m->m_flags |= M_DECRYPTED;
 
+       if (algo->finalizedecrypt)
+        {
+           unsigned char tag[algo->icvlen];
+           if ((*algo->finalizedecrypt)(sav, tag, algo->icvlen)) {
+               ipseclog((LOG_ERR, "packet decryption ICV failure\n"));
+               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+               KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0);
+               goto bad;
+           }     
+           if (memcmp(saved_icv, tag, algo->icvlen)) {
+               ipseclog((LOG_ERR, "packet decryption ICV mismatch\n"));
+               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+               KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0);
+               goto bad;
+           }
+       }
+
        /*
         * find the trailer of the ESP.
         */
@@ -1002,8 +1091,38 @@ noreplaycheck:
                }
        }
 
+       if (*nproto == IPPROTO_UDP) {
+               // offset includes the outer ip and udp header lengths.
+               if (m->m_len < off) {
+                       m = m_pullup(m,  off);
+                       if (!m) {
+                               ipseclog((LOG_DEBUG,
+                                       "IPv6 ESP input: invalid udp encapsulated ESP packet length\n"));
+                               IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
+                               goto bad;
+                       }
+               }
+
+               // check the UDP encap header to detect changes in the source port, and then strip the header
+               off -= sizeof(struct udphdr); // off no longer includes the udphdr's size
+               // if peer is behind nat and this is the latest esp packet
+               if ((sav->flags & SADB_X_EXT_NATT_DETECTED_PEER) != 0 &&
+                   (sav->flags & SADB_X_EXT_OLD) == 0 &&
+                   seq && sav->replay &&
+                   seq >= sav->replay->lastseq)  {
+                       struct udphdr *encap_uh = (__typeof__(encap_uh))(void *)((caddr_t)ip6 + off);
+                       if (encap_uh->uh_sport &&
+                           ntohs(encap_uh->uh_sport) != sav->remote_ike_port) {
+                               sav->remote_ike_port = ntohs(encap_uh->uh_sport);
+                       }
+               }
+               ip6 = esp6_input_strip_udp_encap(m, off);
+               esp = (struct esp *)(void *)(((u_int8_t *)ip6) + off);
+       }
+
+
        /* was it transmitted over the IPsec tunnel SA? */
-       if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) {
+       if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav, &ifamily)) {
                ifaddr_t ifa;
                struct sockaddr_storage addr;
 
@@ -1017,32 +1136,79 @@ noreplaycheck:
                u_int32_t flowinfo;     /*net endian*/
                flowinfo = ip6->ip6_flow;
                m_adj(m, off + esplen + ivlen);
-               if (m->m_len < sizeof(*ip6)) {
+               if (ifamily == AF_INET6) {
+                       if (m->m_len < sizeof(*ip6)) {
 #ifndef PULLDOWN_TEST
-                       /*
-                        * m_pullup is prohibited in KAME IPv6 input processing
-                        * but there's no other way!
-                        */
+                               /*
+                                * m_pullup is prohibited in KAME IPv6 input processing
+                                * but there's no other way!
+                                */
 #else
-                       /* okay to pullup in m_pulldown style */
+                               /* okay to pullup in m_pulldown style */
 #endif
-                       m = m_pullup(m, sizeof(*ip6));
-                       if (!m) {
+                               m = m_pullup(m, sizeof(*ip6));
+                               if (!m) {
+                                       IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
+                                       goto bad;
+                               }
+                       }
+                       ip6 = mtod(m, struct ip6_hdr *);
+                       /* ECN consideration. */
+                       if (ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow) == 0) {
                                IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
                                goto bad;
                        }
-               }
-               ip6 = mtod(m, struct ip6_hdr *);
-               /* ECN consideration. */
-               ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow);
-               if (!key_checktunnelsanity(sav, AF_INET6,
-                           (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
-                       ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
-                           "in IPv6 ESP input: %s %s\n",
-                           ipsec6_logpacketstr(ip6, spi),
-                           ipsec_logsastr(sav)));
-                       IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
-                       goto bad;
+                       if (!key_checktunnelsanity(sav, AF_INET6,
+                                   (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
+                               ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
+                                   "in IPv6 ESP input: %s %s\n",
+                                   ipsec6_logpacketstr(ip6, spi),
+                                   ipsec_logsastr(sav)));
+                               IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
+                               goto bad;
+                       }
+
+                       if (ip6_doscopedroute) {
+                               struct sockaddr_in6 *ip6addr;
+
+                               bzero(&addr, sizeof(addr));
+                               ip6addr = (__typeof__(ip6addr))&addr;
+                               ip6addr->sin6_family = AF_INET6;
+                               ip6addr->sin6_len = sizeof(*ip6addr);
+                               ip6addr->sin6_addr = ip6->ip6_dst;
+                       }
+               } else if (ifamily == AF_INET) {
+                       struct sockaddr_in *ipaddr;
+
+                       if (m->m_len < sizeof(*ip)) {
+                               m = m_pullup(m, sizeof(*ip));
+                               if (!m) {
+                                       IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+                                       goto bad;
+                               }
+                       }
+                       ip = mtod(m, struct ip *);
+                       /* ECN consideration. */
+                       if (ip46_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip->ip_tos) == 0) {
+                               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+                               goto bad;
+                       }
+                       if (!key_checktunnelsanity(sav, AF_INET,
+                           (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
+                               ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
+                           "in ESP input: %s %s\n",
+                           ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
+                               IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+                               goto bad;
+                       }
+
+                       if (ip_doscopedroute) {
+                               bzero(&addr, sizeof(addr));
+                               ipaddr = (__typeof__(ipaddr))&addr;
+                               ipaddr->sin_family = AF_INET;
+                               ipaddr->sin_len = sizeof(*ipaddr);
+                               ipaddr->sin_addr = ip->ip_dst;
+                       }
                }
 
                key_sa_recordxfer(sav, m);
@@ -1052,15 +1218,7 @@ noreplaycheck:
                        goto bad;
                }
 
-               if (ip6_doscopedroute) {
-                       struct sockaddr_in6 *ip6addr;
-
-                       bzero(&addr, sizeof(addr));
-                       ip6addr = (__typeof__(ip6addr))&addr;
-                       ip6addr->sin6_family = AF_INET6;
-                       ip6addr->sin6_len = sizeof(*ip6addr);
-                       ip6addr->sin6_addr = ip6->ip6_dst;
-
+               if (ip_doscopedroute || ip6_doscopedroute) {
                        // update the receiving interface address based on the inner address
                        ifa = ifa_ifwithaddr((struct sockaddr *)&addr);
                        if (ifa) {
@@ -1318,7 +1476,7 @@ esp6_ctlinput(cmd, sa, d)
                                if (sav->state == SADB_SASTATE_MATURE ||
                                    sav->state == SADB_SASTATE_DYING)
                                        valid++;
-                               key_freesav(sav, KEY_SADB_LOCKED);
+                               key_freesav(sav, KEY_SADB_UNLOCKED);
                        }
 
                        /* XXX Further validation? */
index 9f6c0e0f0cbe0dbee378120e718a99d8c1f9997c..e17336346bbb2585c5ba8abed492ba064730b187 100644 (file)
@@ -127,16 +127,15 @@ extern lck_mtx_t *sadb_mutex;
  * compute ESP header size.
  */
 size_t
-esp_hdrsiz(isr)
-       struct ipsecrequest *isr;
+esp_hdrsiz(__unused struct ipsecrequest *isr)
 {
 
+#if 0
        /* sanity check */
        if (isr == NULL)
                panic("esp_hdrsiz: NULL was passed.\n");
 
 
-#if 0
        lck_mtx_lock(sadb_mutex);
        {
                struct secasvar *sav;
@@ -247,13 +246,14 @@ esp_output(m, nexthdrp, md, af, sav)
        u_int8_t nxt = 0;
        size_t plen;    /*payload length to be encrypted*/
        size_t espoff;
+       size_t esphlen; /* sizeof(struct esp/newesp) + ivlen */
        int ivlen;
        int afnumber;
        size_t extendsiz;
        int error = 0;
        struct ipsecstat *stat;
        struct udphdr *udp = NULL;
-       int     udp_encapsulate = (sav->flags & SADB_X_EXT_NATT && af == AF_INET &&
+       int     udp_encapsulate = (sav->flags & SADB_X_EXT_NATT && (af == AF_INET || af == AF_INET6) &&
                        (esp_udp_encap_port & 0xFFFF) != 0);
 
        KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_START, sav->ivlen,0,0,0,0);
@@ -339,7 +339,6 @@ esp_output(m, nexthdrp, md, af, sav)
        struct ip6_hdr *ip6 = NULL;
 #endif
        size_t esplen;  /* sizeof(struct esp/newesp) */
-       size_t esphlen; /* sizeof(struct esp/newesp) + ivlen */
        size_t hlen = 0;        /* ip header len */
 
        if (sav->flags & SADB_X_EXT_OLD) {
@@ -717,6 +716,21 @@ esp_output(m, nexthdrp, md, af, sav)
        /*
         * calculate ICV if required.
         */
+       size_t siz = 0;
+       u_char authbuf[AH_MAXSUMSIZE] __attribute__((aligned(4)));
+
+        if (algo->finalizeencrypt) {
+               siz = algo->icvlen;
+               if ((*algo->finalizeencrypt)(sav, authbuf, siz)) {
+                       ipseclog((LOG_ERR, "packet encryption ICV failure\n"));
+                       IPSEC_STAT_INCREMENT(stat->out_inval);
+                       error = EINVAL;
+                       KERNEL_DEBUG(DBG_FNC_ENCRYPT | DBG_FUNC_END, 1,error,0,0,0);
+                       goto fail;
+               }
+               goto fill_icv;
+       }
+
        if (!sav->replay)
                goto noantireplay;
        if (!sav->key_auth)
@@ -726,12 +740,6 @@ esp_output(m, nexthdrp, md, af, sav)
 
     {
                const struct ah_algorithm *aalgo;
-               u_char authbuf[AH_MAXSUMSIZE] __attribute__((aligned(4)));
-               u_char *p;
-               size_t siz;
-       #if INET
-               struct ip *ip;
-       #endif
        
                aalgo = ah_algorithm_lookup(sav->alg_auth);
                if (!aalgo)
@@ -747,7 +755,13 @@ esp_output(m, nexthdrp, md, af, sav)
                        IPSEC_STAT_INCREMENT(stat->out_inval);
                        goto fail;
                }
-       
+    }
+
+ fill_icv:
+    {
+               struct ip *ip;
+               u_char *p;
+
                n = m;
                while (n->m_next)
                        n = n->m_next;
@@ -803,10 +817,22 @@ esp_output(m, nexthdrp, md, af, sav)
     
        if (udp_encapsulate) {
                struct ip *ip;
-               ip = mtod(m, struct ip *);
-               udp->uh_ulen = htons(ntohs(ip->ip_len) - (IP_VHL_HL(ip->ip_vhl) << 2));
-       }
+               struct ip6_hdr *ip6;
 
+               switch (af) {
+               case AF_INET:
+                   ip = mtod(m, struct ip *);
+                   udp->uh_ulen = htons(ntohs(ip->ip_len) - (IP_VHL_HL(ip->ip_vhl) << 2));
+                   break;
+               case AF_INET6:
+                   ip6 = mtod(m, struct ip6_hdr *);
+                   udp->uh_ulen = htons(plen + siz + extendsiz + esphlen);
+                   udp->uh_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst, htonl(ntohs(udp->uh_ulen) + IPPROTO_UDP));
+                   m->m_pkthdr.csum_flags = CSUM_UDPIPV6;
+                   m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+                   break;
+               }
+       }
 
 noantireplay:
        lck_mtx_lock(sadb_mutex);
index 4ca425239e2127d35c8c82494d2b87b9f1daa10c..d05f8bf7e583b748a00162526533d5b16964cac7 100644 (file)
 
 #define MAX_REALIGN_LEN 2000
 #define AES_BLOCKLEN 16
+#define ESP_GCM_SALT_LEN 4   // RFC 4106 Section 4
+#define ESP_GCM_IVLEN 8
+#define ESP_GCM_ALIGN 16
 
 extern lck_mtx_t *sadb_mutex;
 
+typedef struct {
+        ccgcm_ctx *decrypt;
+        ccgcm_ctx *encrypt;
+        ccgcm_ctx ctxt[0];
+} aes_gcm_ctx;
+
 int
 esp_aes_schedlen(
        __unused const struct esp_algorithm *algo)
@@ -535,3 +544,448 @@ esp_cbc_encrypt_aes(
 
        return 0;
 }
+
+int
+esp_gcm_schedlen(
+       __unused const struct esp_algorithm *algo)
+{
+        return (sizeof(aes_gcm_ctx) + aes_decrypt_get_ctx_size_gcm() + aes_encrypt_get_ctx_size_gcm() + ESP_GCM_ALIGN);
+}
+
+int
+esp_gcm_schedule( __unused const struct esp_algorithm *algo,
+                struct secasvar *sav)
+{
+       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
+       aes_gcm_ctx *ctx = (aes_gcm_ctx*)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN);
+       int rc;
+
+       ctx->decrypt = &ctx->ctxt[0];
+       ctx->encrypt = &ctx->ctxt[aes_decrypt_get_ctx_size_gcm() / sizeof(ccgcm_ctx)];
+
+       rc = aes_decrypt_key_gcm((const unsigned char *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ctx->decrypt);
+       if (rc) {
+               return (rc);
+       }
+
+       rc = aes_encrypt_key_gcm((const unsigned char *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ctx->encrypt);
+       if (rc) {
+               return (rc);
+       }
+       return (rc);
+}
+
+int
+esp_gcm_encrypt_finalize(struct secasvar *sav,
+                        unsigned char *tag, unsigned int tag_bytes)
+{
+       aes_gcm_ctx *ctx = (aes_gcm_ctx*)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN);
+       return (aes_encrypt_finalize_gcm(tag, tag_bytes, ctx->encrypt));
+}
+
+int
+esp_gcm_decrypt_finalize(struct secasvar *sav,
+                        unsigned char *tag, unsigned int tag_bytes)
+{
+       aes_gcm_ctx *ctx = (aes_gcm_ctx*)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN);
+       return (aes_decrypt_finalize_gcm(tag, tag_bytes, ctx->decrypt));
+}
+
+int
+esp_gcm_encrypt_aes(
+       struct mbuf *m,
+       size_t off,
+       __unused size_t plen,
+       struct secasvar *sav,
+       const struct esp_algorithm *algo __unused,
+       int ivlen)
+{
+       struct mbuf *s;
+       struct mbuf *d, *d0, *dp;
+       int soff;       /* offset from the head of chain, to head of this mbuf */
+       int sn, dn;     /* offset from the head of the mbuf, to meat */
+       size_t ivoff, bodyoff;
+       u_int8_t *dptr, *sp, *sp_unaligned, *sp_aligned = NULL;
+       aes_gcm_ctx *ctx;
+       struct mbuf *scut;
+       int scutoff;
+       int i, len;
+       unsigned char nonce[ESP_GCM_SALT_LEN+ivlen];
+       
+       if (ivlen != ESP_GCM_IVLEN) {
+               ipseclog((LOG_ERR, "%s: unsupported ivlen %d\n", __FUNCTION__, ivlen));
+               m_freem(m);
+               return EINVAL;
+       }
+
+       if (sav->flags & SADB_X_EXT_OLD) {
+               /* RFC 1827 */
+               ivoff = off + sizeof(struct esp);
+               bodyoff = off + sizeof(struct esp) + ivlen;
+       } else {
+               ivoff = off + sizeof(struct newesp);
+               bodyoff = off + sizeof(struct newesp) + ivlen;
+       }
+
+       m_copyback(m, ivoff, ivlen, sav->iv);
+
+       if (m->m_pkthdr.len < bodyoff) {
+               ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__,
+                   m->m_pkthdr.len, (u_int32_t)bodyoff));
+               m_freem(m);
+               return EINVAL;
+       }
+
+       /* Set IV */
+       memcpy(nonce, _KEYBUF(sav->key_enc)+_KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ESP_GCM_SALT_LEN);
+       memcpy(nonce+ESP_GCM_SALT_LEN, sav->iv, ivlen);
+
+       ctx = (aes_gcm_ctx *)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN);
+       if (aes_encrypt_set_iv_gcm(nonce, sizeof(nonce), ctx->encrypt)) {
+               ipseclog((LOG_ERR, "%s: failed to set IV\n", __FUNCTION__));
+               m_freem(m);
+               bzero(nonce, sizeof(nonce));
+               return EINVAL;
+       }
+       bzero(nonce, sizeof(nonce));
+
+       /* Set Additional Authentication Data */
+       if (!(sav->flags & SADB_X_EXT_OLD)) {
+               struct newesp esp;
+               m_copydata(m, off, sizeof(esp), (caddr_t) &esp);
+               if (aes_encrypt_aad_gcm((unsigned char*)&esp, sizeof(esp), ctx->encrypt)) {
+                       ipseclog((LOG_ERR, "%s: packet decryption AAD failure\n", __FUNCTION__));
+                       m_freem(m);
+                       return EINVAL;
+               }
+       }
+
+       s = m;
+       soff = sn = dn = 0;
+       d = d0 = dp = NULL;
+       sp = dptr = NULL;
+       
+       /* skip headers/IV */
+       while (soff < bodyoff) {
+               if (soff + s->m_len > bodyoff) {
+                       sn = bodyoff - soff;
+                       break;
+               }
+
+               soff += s->m_len;
+               s = s->m_next;
+       }
+       scut = s;
+       scutoff = sn;
+
+       /* skip over empty mbuf */
+       while (s && s->m_len == 0)
+               s = s->m_next;
+       
+       while (soff < m->m_pkthdr.len) {
+               /* source */
+               sp = mtod(s, u_int8_t *) + sn;
+               len = s->m_len - sn;
+
+               /* destination */
+               if (!d || (dn + len > d->m_len)) {
+                       if (d)
+                               dp = d;
+                       MGET(d, M_DONTWAIT, MT_DATA);
+                       i = m->m_pkthdr.len - (soff + sn);
+                       if (d && i > MLEN) {
+                               MCLGET(d, M_DONTWAIT);
+                               if ((d->m_flags & M_EXT) == 0) {
+                                       d = m_mbigget(d, M_DONTWAIT);
+                                       if ((d->m_flags & M_EXT) == 0) {
+                                               m_free(d);
+                                               d = NULL;
+                                       }
+                               }
+                       }
+                       if (!d) {
+                               m_freem(m);
+                               if (d0)
+                                       m_freem(d0);
+                               return ENOBUFS;
+                       }
+                       if (!d0)
+                               d0 = d;
+                       if (dp)
+                               dp->m_next = d;
+
+                       // try to make mbuf data aligned
+                       if (!IPSEC_IS_P2ALIGNED(d->m_data)) {
+                               m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data));
+                       }
+
+                       d->m_len = M_TRAILINGSPACE(d);
+
+                       if (d->m_len > i)
+                               d->m_len = i;
+
+                       dptr = mtod(d, u_int8_t *);
+                       dn = 0;
+               }
+               
+               /* adjust len if greater than space available */
+               if (len > d->m_len - dn)
+                       len = d->m_len - dn;
+               
+               /* encrypt */
+               // check input pointer alignment and use a separate aligned buffer (if sp is not aligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(sp)) {
+                       sp_unaligned = NULL;
+               } else {
+                       sp_unaligned = sp;
+                       if (len > MAX_REALIGN_LEN) {
+                               return ENOBUFS;
+                       }
+                       if (sp_aligned == NULL) {
+                               sp_aligned = (u_int8_t *)_MALLOC(MAX_REALIGN_LEN, M_SECA, M_DONTWAIT);
+                               if (sp_aligned == NULL)
+                                       return ENOMEM;
+                       }
+                       sp = sp_aligned;
+                       memcpy(sp, sp_unaligned, len);
+               }
+
+               if (aes_encrypt_gcm(sp, len, dptr+dn, ctx->encrypt)) {
+                       ipseclog((LOG_ERR, "%s: failed to encrypt\n", __FUNCTION__));
+                       m_freem(m);
+                       return EINVAL;
+               }
+
+               // update unaligned pointers
+               if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) {
+                       sp = sp_unaligned;
+               }
+
+               /* update offsets */
+               sn += len;
+               dn += len;
+
+               /* find the next source block and skip empty mbufs */
+               while (s && sn >= s->m_len) {
+                       sn -= s->m_len;
+                       soff += s->m_len;
+                       s = s->m_next;
+               }
+       }
+
+       /* free un-needed source mbufs and add dest mbufs to chain */
+       m_freem(scut->m_next);
+       scut->m_len = scutoff;
+       scut->m_next = d0;
+       
+       // free memory
+       if (sp_aligned != NULL) {
+               FREE(sp_aligned, M_SECA);
+               sp_aligned = NULL;
+       }
+
+       /* generate new iv */
+       key_sa_stir_iv(sav);
+
+       return 0;
+}
+
+int
+esp_gcm_decrypt_aes(m, off, sav, algo, ivlen)
+       struct mbuf *m;
+       size_t off;
+       struct secasvar *sav;
+       const struct esp_algorithm *algo __unused;
+       int ivlen;
+{
+       struct mbuf *s;
+       struct mbuf *d, *d0, *dp;
+       int soff;       /* offset from the head of chain, to head of this mbuf */
+       int sn, dn;     /* offset from the head of the mbuf, to meat */
+       size_t ivoff, bodyoff;
+       u_int8_t iv[ESP_GCM_IVLEN] __attribute__((aligned(4))), *dptr;
+       u_int8_t *sp, *sp_unaligned, *sp_aligned = NULL;
+       aes_gcm_ctx *ctx;
+       struct mbuf *scut;
+       int scutoff;
+       int     i, len;
+       unsigned char nonce[ESP_GCM_SALT_LEN+ivlen];
+
+       if (ivlen != ESP_GCM_IVLEN) {
+               ipseclog((LOG_ERR, "%s: unsupported ivlen %d\n", __FUNCTION__, ivlen));
+               m_freem(m);
+               return EINVAL;
+       }
+
+       if (sav->flags & SADB_X_EXT_OLD) {
+               /* RFC 1827 */
+               ivoff = off + sizeof(struct esp);
+               bodyoff = off + sizeof(struct esp) + ivlen;
+       } else {
+               ivoff = off + sizeof(struct newesp);
+               bodyoff = off + sizeof(struct newesp) + ivlen;
+       }
+
+       if (m->m_pkthdr.len < bodyoff) {
+               ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__, 
+                   m->m_pkthdr.len, (u_int32_t)bodyoff));
+               m_freem(m);
+               return EINVAL;
+       }
+
+       /* grab iv */
+       m_copydata(m, ivoff, ivlen, (caddr_t) iv);
+
+       /* Set IV */
+       memcpy(nonce, _KEYBUF(sav->key_enc)+_KEYLEN(sav->key_enc)-ESP_GCM_SALT_LEN, ESP_GCM_SALT_LEN);
+       memcpy(nonce+ESP_GCM_SALT_LEN, iv, ivlen);
+
+       ctx = (aes_gcm_ctx *)P2ROUNDUP(sav->sched, ESP_GCM_ALIGN);
+       if (aes_decrypt_set_iv_gcm(nonce, sizeof(nonce), ctx->decrypt)) {
+               ipseclog((LOG_ERR, "%s: failed to set IV\n", __FUNCTION__));
+               m_freem(m);
+               bzero(nonce, sizeof(nonce));
+               return EINVAL;
+       }
+       bzero(nonce, sizeof(nonce));
+
+       /* Set Additional Authentication Data */
+       if (!(sav->flags & SADB_X_EXT_OLD)) {
+               struct newesp esp;
+               m_copydata(m, off, sizeof(esp), (caddr_t) &esp);
+               if (aes_decrypt_aad_gcm((unsigned char*)&esp, sizeof(esp), ctx->decrypt)) {
+                       ipseclog((LOG_ERR, "%s: packet decryption AAD failure\n", __FUNCTION__));
+                       return EINVAL;
+               }
+       }
+
+       s = m;
+       soff = sn = dn = 0;
+       d = d0 = dp = NULL;
+       sp = dptr = NULL;
+       
+       /* skip header/IV offset */
+       while (soff < bodyoff) {
+               if (soff + s->m_len > bodyoff) {
+                       sn = bodyoff - soff;
+                       break;
+               }
+
+               soff += s->m_len;
+               s = s->m_next;
+       }
+       scut = s;
+       scutoff = sn;
+
+       /* skip over empty mbuf */
+       while (s && s->m_len == 0)
+               s = s->m_next;
+       
+       while (soff < m->m_pkthdr.len) {
+               /* source */
+               sp = mtod(s, u_int8_t *) + sn;
+               len = s->m_len - sn;
+
+               /* destination */
+               if (!d || (dn + len > d->m_len)) {
+                       if (d)
+                               dp = d;
+                       MGET(d, M_DONTWAIT, MT_DATA);
+                       i = m->m_pkthdr.len - (soff + sn);
+                       if (d && i > MLEN) {
+                               MCLGET(d, M_DONTWAIT);
+                               if ((d->m_flags & M_EXT) == 0) {
+                                       d = m_mbigget(d, M_DONTWAIT);
+                                       if ((d->m_flags & M_EXT) == 0) {
+                                               m_free(d);
+                                               d = NULL;
+                                       }
+                               }
+                       }
+                       if (!d) {
+                               m_freem(m);
+                               if (d0)
+                                       m_freem(d0);
+                               return ENOBUFS;
+                       }
+                       if (!d0)
+                               d0 = d;
+                       if (dp)
+                               dp->m_next = d;
+
+                       // try to make mbuf data aligned
+                       if (!IPSEC_IS_P2ALIGNED(d->m_data)) {
+                               m_adj(d, IPSEC_GET_P2UNALIGNED_OFS(d->m_data));
+                       }
+
+                       d->m_len = M_TRAILINGSPACE(d);
+
+                       if (d->m_len > i)
+                               d->m_len = i;
+
+                       dptr = mtod(d, u_int8_t *);     
+                       dn = 0;
+               }
+
+               /* adjust len if greater than space available in dest */
+               if (len > d->m_len - dn)
+                       len = d->m_len - dn;
+
+               /* Decrypt */
+               // check input pointer alignment and use a separate aligned buffer (if sp is unaligned on 4-byte boundary).
+               if (IPSEC_IS_P2ALIGNED(sp)) {
+                       sp_unaligned = NULL;
+               } else {
+                       sp_unaligned = sp;
+                       if (len > MAX_REALIGN_LEN) {
+                               return ENOBUFS;
+                       }
+                       if (sp_aligned == NULL) {
+                               sp_aligned = (u_int8_t *)_MALLOC(MAX_REALIGN_LEN, M_SECA, M_DONTWAIT);
+                               if (sp_aligned == NULL)
+                                       return ENOMEM;
+                       }
+                       sp = sp_aligned;
+                       memcpy(sp, sp_unaligned, len);
+               }
+               // no need to check output pointer alignment
+
+               if (aes_decrypt_gcm(sp, len, dptr + dn, ctx->decrypt)) {
+                       ipseclog((LOG_ERR, "%s: failed to decrypt\n", __FUNCTION__));
+                       m_freem(m);
+                       return EINVAL;
+               }
+               
+               // update unaligned pointers
+               if (!IPSEC_IS_P2ALIGNED(sp_unaligned)) {
+                       sp = sp_unaligned;
+               }
+
+               /* udpate offsets */
+               sn += len;
+               dn += len;
+               
+               /* find the next source block */
+               while (s && sn >= s->m_len) {
+                       sn -= s->m_len;
+                       soff += s->m_len;
+                       s = s->m_next;
+               }
+       }
+
+       /* free un-needed source mbufs and add dest mbufs to chain */
+       m_freem(scut->m_next);
+       scut->m_len = scutoff;
+       scut->m_next = d0;
+
+       // free memory
+       if (sp_aligned != NULL) {
+               FREE(sp_aligned, M_SECA);
+               sp_aligned = NULL;
+       }
+       
+       /* just in case */
+       bzero(iv, sizeof(iv));
+
+       return 0;
+}
index 098d13321973d584d59a5fb52e445483ba08a30f..75d92c6e824f31fc5ba4eaa9c609fd895560ed41 100644 (file)
@@ -68,4 +68,10 @@ int
 esp_cbc_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, 
        const struct esp_algorithm *, int);
 
+int esp_gcm_schedlen(const struct esp_algorithm *);
+int esp_gcm_schedule(const struct esp_algorithm *, struct secasvar *);
+int esp_gcm_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, const struct esp_algorithm *, int);
+int esp_gcm_decrypt_aes(struct mbuf *, size_t, struct secasvar *, const struct esp_algorithm *, int);
+int esp_gcm_encrypt_finalize(struct secasvar *, unsigned char *, unsigned int);
+int esp_gcm_decrypt_finalize(struct secasvar *, unsigned char *, unsigned int);
 #endif /* BSD_KERNEL_PRIVATE */
index abe6b2e217822e21bbfd0d7f12d3606cb16b4793..1a071841590b85a7f4ae63ce3cfd5bcc34e6456a 100644 (file)
@@ -234,8 +234,8 @@ frag6_icmp6_timeex_error(struct fq6_head *diq6)
                MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) {
                        MBUFQ_REMOVE(diq6, m);
                        MBUFQ_NEXT(m) = NULL;
-                       icmp6_error(m, ICMP6_TIME_EXCEEDED,
-                           ICMP6_TIME_EXCEED_REASSEMBLY, 0);
+                       icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
+                           ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0);
                }
        }
 }
index 99d92784cd6d19ef5bf9bb2b8f83c335fe5bef97..fb8d179bfa7dbfd4b7baf7a60ff5eeb96c603705 100644 (file)
@@ -284,7 +284,11 @@ icmp6_error2(struct mbuf *m, int type, int code, int param,
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
-icmp6_error(struct mbuf *m, int type, int code, int param)
+icmp6_error(struct mbuf *m, int type, int code, int param) {
+       icmp6_error_flag(m, type, code, param, ICMP6_ERROR_RST_MRCVIF);
+}
+
+void icmp6_error_flag (struct mbuf *m, int type, int code, int param, int flags)
 {
        struct ip6_hdr *oip6, *nip6;
        struct icmp6_hdr *icmp6;
@@ -393,7 +397,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
                m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
        preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
-       M_PREPEND(m, preplen, M_DONTWAIT);
+       M_PREPEND(m, preplen, M_DONTWAIT, 1);
        if (m && m->m_len < preplen)
                m = m_pullup(m, preplen);
        if (m == NULL) {
@@ -420,7 +424,9 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
         * clear m->m_pkthdr.rcvif for safety, we should have enough scope
         * information in ip header (nip6).
         */
-       m->m_pkthdr.rcvif = NULL;
+       if (flags & ICMP6_ERROR_RST_MRCVIF) {
+               m->m_pkthdr.rcvif = NULL;
+       }
 
        icmp6stat.icp6s_outhist[type]++;
        icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
@@ -2140,7 +2146,7 @@ icmp6_reflect(m, off)
        int type, code;
        struct ifnet *outif = NULL;
        struct sockaddr_in6 sa6_src, sa6_dst;
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = NULL;
        u_int32_t oflow;
        struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 },
            IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 };
@@ -2291,7 +2297,6 @@ icmp6_reflect(m, off)
                ip6->ip6_flow |= (oflow & htonl(0x0ff00000));
        }
        ip6->ip6_nxt = IPPROTO_ICMPV6;
-       lck_rw_lock_shared(nd_if_rwlock);
        if (outif != NULL && (ndi = ND_IFINFO(outif)) != NULL &&
            ndi->initialized) {
                lck_mtx_lock(&ndi->lock);
@@ -2308,23 +2313,21 @@ icmp6_reflect(m, off)
        } else {
                ip6->ip6_hlim = ip6_defhlim;
        }
-       lck_rw_done(nd_if_rwlock);
        /* Use the same traffic class as in the request to match IPv4 */
        icmp6->icmp6_cksum = 0;
        icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
-                                       sizeof(struct ip6_hdr), plen);
+           sizeof(struct ip6_hdr), plen);
 
        /*
         * XXX option handling
         */
-
        m->m_flags &= ~(M_BCAST|M_MCAST);
 
        if (outif != NULL) {
                ifnet_release(outif);
                outif = NULL;
        }
-       m->m_pkthdr.rcvif = NULL;
+
        m->m_pkthdr.csum_data = 0;
        m->m_pkthdr.csum_flags = 0;
        ip6_output(m, NULL, NULL, IPV6_OUTARGS, NULL, &outif, &ip6oa);
index e0db9a422f20246c3d5468a6018ec2e38587fed9..40ece5dfc200eb56b8e833ad5b235787969c0c85 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -210,15 +210,15 @@ static int in6_to_kamescope(struct sockaddr_in6 *, struct ifnet *);
 static void in6_ifaddr_set_dadprogress(struct in6_ifaddr *);
 
 static int in6_getassocids(struct socket *, uint32_t *, user_addr_t);
-static int in6_getconnids(struct socket *, associd_t, uint32_t *, user_addr_t);
-static int in6_getconninfo(struct socket *, connid_t, uint32_t *,
+static int in6_getconnids(struct socket *, sae_associd_t, uint32_t *,
+    user_addr_t);
+static int in6_getconninfo(struct socket *, sae_connid_t, uint32_t *,
     uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
     uint32_t *, user_addr_t, uint32_t *);
 
 static void in6_if_up_dad_start(struct ifnet *);
 
 extern lck_mtx_t *nd6_mutex;
-extern int in6_init2done;
 
 #define        IN6IFA_TRACE_HIST_SIZE  32      /* size of trace history */
 
@@ -799,7 +799,7 @@ in6ctl_llstop(struct ifnet *ifp)
        pr0.ndpr_ifp = ifp;
        pr0.ndpr_prefix.sin6_addr.s6_addr16[0] = IPV6_ADDR_INT16_ULL;
        in6_setscope(&pr0.ndpr_prefix.sin6_addr, ifp, NULL);
-       pr = nd6_prefix_lookup(&pr0);
+       pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_UNSPEC);
        if (pr) {
                lck_mtx_lock(nd6_mutex);
                NDPR_LOCK(pr);
@@ -1144,64 +1144,85 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        if (ifp == NULL)
                return (ENXIO);
 
+       /*
+        * Unlock the socket since ifnet_ioctl() may be invoked by
+        * one of the ioctl handlers below.  Socket will be re-locked
+        * prior to returning.
+        */
+       if (so != NULL) {
+               socket_unlock(so, 0);
+               so_unlocked = TRUE;
+       }
+
        /*
         * ioctls which require ifp but not interface address.
         */
        switch (cmd) {
        case SIOCAUTOCONF_START:        /* struct in6_ifreq */
-               if (!privileged)
-                       return (EPERM);
-               return (in6_autoconf(ifp, TRUE));
-               /* NOTREACHED */
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
+               error = in6_autoconf(ifp, TRUE);
+               goto done;
 
        case SIOCAUTOCONF_STOP:         /* struct in6_ifreq */
-               if (!privileged)
-                       return (EPERM);
-               return (in6_autoconf(ifp, FALSE));
-               /* NOTREACHED */
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
+               error = in6_autoconf(ifp, FALSE);
+               goto done;
 
        case SIOCLL_START_32:           /* struct in6_aliasreq_32 */
        case SIOCLL_START_64:           /* struct in6_aliasreq_64 */
-               if (!privileged)
-                       return (EPERM);
-               return (in6ctl_llstart(ifp, cmd, data));
-               /* NOTREACHED */
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
+               error = in6ctl_llstart(ifp, cmd, data);
+               goto done;
 
        case SIOCLL_STOP:               /* struct in6_ifreq */
-               if (!privileged)
-                       return (EPERM);
-               return (in6ctl_llstop(ifp));
-               /* NOTREACHED */
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
+               error = in6ctl_llstop(ifp);
+               goto done;
 
        case SIOCSETROUTERMODE_IN6:     /* struct in6_ifreq */
-               if (!privileged)
-                       return (EPERM);
-
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
                bcopy(&((struct in6_ifreq *)(void *)data)->ifr_intval,
                    &intval, sizeof (intval));
 
-               return (in6_setrouter(ifp, intval));
-               /* NOTREACHED */
+               error = in6_setrouter(ifp, intval);
+               goto done;
 
        case SIOCPROTOATTACH_IN6_32:    /* struct in6_aliasreq_32 */
        case SIOCPROTOATTACH_IN6_64:    /* struct in6_aliasreq_64 */
-               if (!privileged)
-                       return (EPERM);
-               return (in6_domifattach(ifp));
-               /* NOTREACHED */
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
+               error = in6_domifattach(ifp);
+               goto done;
 
        case SIOCPROTODETACH_IN6:       /* struct in6_ifreq */
-               if (!privileged)
-                       return (EPERM);
-
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
                /* Cleanup interface routes and addresses */
                in6_purgeif(ifp);
 
                if ((error = proto_unplumb(PF_INET6, ifp)))
                        log(LOG_ERR, "SIOCPROTODETACH_IN6: %s error=%d\n",
                            if_name(ifp), error);
-               return (error);
-               /* NOTREACHED */
+               goto done;
 
        case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
        case SIOCSPFXFLUSH_IN6:         /* struct in6_ifreq */
@@ -1209,8 +1230,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        case SIOCSDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
        case SIOCSDEFIFACE_IN6_64:      /* struct in6_ndifreq_64 */
        case SIOCSIFINFO_FLAGS:         /* struct in6_ndireq */
-               if (!privileged)
-                       return (EPERM);
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
                /* FALLTHRU */
        case OSIOCGIFINFO_IN6:          /* struct in6_ondireq */
        case SIOCGIFINFO_IN6:           /* struct in6_ondireq */
@@ -1222,8 +1245,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        case SIOCGNBRINFO_IN6_64:       /* struct in6_nbrinfo_64 */
        case SIOCGDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
        case SIOCGDEFIFACE_IN6_64:      /* struct in6_ndifreq_64 */
-               return (nd6_ioctl(cmd, data, ifp));
-               /* NOTREACHED */
+               error = nd6_ioctl(cmd, data, ifp);
+               goto done;
 
        case SIOCSIFPREFIX_IN6:         /* struct in6_prefixreq (deprecated) */
        case SIOCDIFPREFIX_IN6:         /* struct in6_prefixreq (deprecated) */
@@ -1234,26 +1257,27 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                log(LOG_NOTICE,
                    "prefix ioctls are now invalidated. "
                    "please use ifconfig.\n");
-               return (EOPNOTSUPP);
-               /* NOTREACHED */
+               error = EOPNOTSUPP;
+               goto done;
 
        case SIOCSSCOPE6:               /* struct in6_ifreq (deprecated) */
        case SIOCGSCOPE6:               /* struct in6_ifreq (deprecated) */
        case SIOCGSCOPE6DEF:            /* struct in6_ifreq (deprecated) */
-               return (EOPNOTSUPP);
-               /* NOTREACHED */
+               error = EOPNOTSUPP;
+               goto done;
        
        case SIOCLL_CGASTART_32:        /* struct in6_llstartreq_32 */
        case SIOCLL_CGASTART_64:        /* struct in6_llstartreq_64 */
                if (!privileged)
-                       return (EPERM);
-               return (in6ctl_cgastart(ifp, cmd, data));
-               /* NOTREACHED */
+                       error = EPERM;
+               else
+                       error = in6ctl_cgastart(ifp, cmd, data);
+               goto done;
 
        case SIOCGIFSTAT_IN6:           /* struct in6_ifreq */
        case SIOCGIFSTAT_ICMP6:         /* struct in6_ifreq */
-               return (in6ctl_gifstat(ifp, cmd, ifr));
-               /* NOTREACHED */
+               error = in6ctl_gifstat(ifp, cmd, ifr);
+               goto done;
        }
 
        /*
@@ -1268,13 +1292,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                 * on a single interface, SIOCSIFxxx ioctls are deprecated.
                 */
                /* we decided to obsolete this command (20000704) */
-               return (EOPNOTSUPP);
-               /* NOTREACHED */
+               error = EOPNOTSUPP;
+               goto done;
 
        case SIOCAIFADDR_IN6_32:        /* struct in6_aliasreq_32 */
        case SIOCAIFADDR_IN6_64:        /* struct in6_aliasreq_64 */
-               if (!privileged)
-                       return (EPERM);
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               } 
                /*
                 * Convert user ifra to the kernel form, when appropriate.
                 * This allows the conversion between different data models
@@ -1289,8 +1315,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 
        case SIOCDIFADDR_IN6:           /* struct in6_ifreq */
        case SIOCSIFALIFETIME_IN6:      /* struct in6_ifreq */
-               if (!privileged)
-                       return (EPERM);
+               if (!privileged) {
+                       error = EPERM;
+                       goto done;
+               }
                /* FALLTHRU */
        case SIOCGIFADDR_IN6:           /* struct in6_ifreq */
        case SIOCGIFDSTADDR_IN6:        /* struct in6_ifreq */
@@ -1323,12 +1351,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                                    htons(ifp->if_index);
                        } else if (sa6->sin6_addr.s6_addr16[1] !=
                            htons(ifp->if_index)) {
-                               return (EINVAL); /* link ID contradicts */
+                               error = EINVAL; /* link ID contradicts */
+                               goto done;
                        }
                        if (sa6->sin6_scope_id) {
                                if (sa6->sin6_scope_id !=
-                                   (u_int32_t)ifp->if_index)
-                                       return (EINVAL);
+                                   (u_int32_t)ifp->if_index) {
+                                       error = EINVAL;
+                                       goto done;
+                               }
                                sa6->sin6_scope_id = 0; /* XXX: good way? */
                        }
                }
@@ -1346,8 +1377,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
         */
        switch (cmd) {
        case SIOCDIFADDR_IN6:           /* struct in6_ifreq */
-               if (ia == NULL)
-                       return (EADDRNOTAVAIL);
+               if (ia == NULL) {
+                       error = EADDRNOTAVAIL;
+                       goto done;
+               }
                /* FALLTHROUGH */
        case SIOCAIFADDR_IN6_32:        /* struct in6_aliasreq_32 */
        case SIOCAIFADDR_IN6_64:        /* struct in6_aliasreq_64 */
@@ -1365,16 +1398,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                break;
        }
 
-       /*
-        * Unlock the socket since ifnet_ioctl() may be invoked by
-        * one of the ioctl handlers below.  Socket will be re-locked
-        * prior to returning.
-        */
-       if (so != NULL) {
-               socket_unlock(so, 0);
-               so_unlocked = TRUE;
-       }
-
        /*
         * And finally process address-related ioctls.
         */
@@ -1508,8 +1531,8 @@ in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra)
        pr0.ndpr_stateflags |= NDPRF_STATIC;
        lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr);
 
-       /* add the prefix if there's one. */
-       if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
+       /* add the prefix if there's none. */
+       if ((pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_NEVER)) == NULL) {
                /*
                 * nd6_prelist_add will install the corresponding interface
                 * route.
@@ -1530,7 +1553,7 @@ in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra)
 
        /* if this is a new autoconfed addr */
        addtmp = FALSE;
-       if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && ia->ia6_ndpr == NULL) {
+       if (ia->ia6_ndpr == NULL) {
                NDPR_LOCK(pr);
                ++pr->ndpr_addrcnt;
                VERIFY(pr->ndpr_addrcnt != 0);
@@ -1541,7 +1564,11 @@ in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra)
                 * If this is the first autoconf address from the prefix,
                 * create a temporary address as well (when specified).
                 */
-               addtmp = (ip6_use_tempaddr && pr->ndpr_addrcnt == 1);
+               if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
+                   ip6_use_tempaddr &&
+                   pr->ndpr_addrcnt == 1) {
+                       addtmp = true;
+               }
                NDPR_UNLOCK(pr);
        }
 
@@ -1606,21 +1633,11 @@ in6ctl_difaddr(struct ifnet *ifp, struct in6_ifaddr *ia)
                    ia->ia_prefixmask.sin6_addr.s6_addr32[i];
        }
        IFA_UNLOCK(&ia->ia_ifa);
-       /*
-        * The logic of the following condition is a bit complicated.
-        * We expire the prefix when
-        * 1. the address obeys autoconfiguration and it is the
-        *    only owner of the associated prefix, or
-        * 2. the address does not obey autoconf and there is no
-        *    other owner of the prefix.
-        */
-       if ((pr = nd6_prefix_lookup(&pr0)) != NULL) {
+
+       if ((pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_UNSPEC)) != NULL) {
                IFA_LOCK(&ia->ia_ifa);
                NDPR_LOCK(pr);
-               if (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
-                   pr->ndpr_addrcnt == 1) ||
-                   ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 &&
-                   pr->ndpr_addrcnt == 0)) {
+               if (pr->ndpr_addrcnt == 1) {
                        /* XXX: just for expiration */
                        pr->ndpr_expire = 1;
                }
@@ -1714,9 +1731,8 @@ in6_setrouter(struct ifnet *ifp, int enable)
                return (ENODEV);
 
        if (enable) {
-               struct nd_ifinfo *ndi;
+               struct nd_ifinfo *ndi = NULL;
 
-               lck_rw_lock_shared(nd_if_rwlock);
                ndi = ND_IFINFO(ifp);
                if (ndi != NULL && ndi->initialized) {
                        lck_mtx_lock(&ndi->lock);
@@ -1724,14 +1740,10 @@ in6_setrouter(struct ifnet *ifp, int enable)
                                /* No proxy if we are an advertising router */
                                ndi->flags &= ~ND6_IFF_PROXY_PREFIXES;
                                lck_mtx_unlock(&ndi->lock);
-                               lck_rw_done(nd_if_rwlock);
                                (void) nd6_if_prproxy(ifp, FALSE);
                        } else {
                                lck_mtx_unlock(&ndi->lock);
-                               lck_rw_done(nd_if_rwlock);
                        }
-               } else {
-                       lck_rw_done(nd_if_rwlock);
                }
        }
 
@@ -1789,7 +1801,7 @@ in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags)
        struct in6_multi *in6m_sol;
        struct in6_multi_mship *imm;
        struct rtentry *rt;
-       int delay, error;
+       int delay, error = 0;
 
        VERIFY(ifp != NULL && ia != NULL);
        ifa = &ia->ia_ifa;
@@ -1985,15 +1997,6 @@ in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags)
        }
 #undef MLTMASK_LEN
 
-       /*
-        * Make sure to initialize ND6 information.  this is to workaround
-        * issues with interfaces with IPv6 addresses, which have never brought
-        * up.  We are assuming that it is safe to nd6_ifattach multiple times.
-        * NOTE: this is how stf0 gets initialized
-        */
-       if ((error = nd6_ifattach(ifp)) != 0)
-               goto unwind;
-
        /* Ensure nd6_service() is scheduled as soon as it's convenient */
        ++nd6_sched_timeout_want;
 
@@ -2430,39 +2433,36 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
        }
 
        /*
-        * When an autoconfigured address is being removed, release the
-        * reference to the base prefix.  Also, since the release might
-        * affect the status of other (detached) addresses, call
+        * When IPv6 address is being removed, release the
+        * reference to the base prefix.
+        * Also, since the release might, affect the status
+        * of other (detached) addresses, call
         * pfxlist_onlink_check().
         */
        ifa = &oia->ia_ifa;
        IFA_LOCK(ifa);
+       if (oia->ia6_ndpr == NULL) {
+               log(LOG_NOTICE, "in6_unlink_ifa: IPv6 address "
+                   "0x%llx has no prefix\n",
+                   (uint64_t)VM_KERNEL_ADDRPERM(oia));
+       } else {
+               struct nd_prefix *pr = oia->ia6_ndpr;
+               oia->ia6_flags &= ~IN6_IFF_AUTOCONF;
+               oia->ia6_ndpr = NULL;
+               NDPR_LOCK(pr);
+               VERIFY(pr->ndpr_addrcnt != 0);
+               pr->ndpr_addrcnt--;
+               NDPR_UNLOCK(pr);
+               NDPR_REMREF(pr);        /* release addr reference */
+       }
+       IFA_UNLOCK(ifa);
+       lck_rw_done(&in6_ifaddr_rwlock);
+
        if ((oia->ia6_flags & IN6_IFF_AUTOCONF) != 0) {
-               if (oia->ia6_ndpr == NULL) {
-                       log(LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address "
-                           "0x%llx has no prefix\n",
-                           (uint64_t)VM_KERNEL_ADDRPERM(oia));
-               } else {
-                       struct nd_prefix *pr = oia->ia6_ndpr;
-
-                       oia->ia6_flags &= ~IN6_IFF_AUTOCONF;
-                       oia->ia6_ndpr = NULL;
-                       NDPR_LOCK(pr);
-                       VERIFY(pr->ndpr_addrcnt != 0);
-                       pr->ndpr_addrcnt--;
-                       NDPR_UNLOCK(pr);
-                       NDPR_REMREF(pr);        /* release addr reference */
-               }
-               IFA_UNLOCK(ifa);
-               lck_rw_done(&in6_ifaddr_rwlock);
                lck_mtx_lock(nd6_mutex);
                pfxlist_onlink_check();
                lck_mtx_unlock(nd6_mutex);
-       } else {
-               IFA_UNLOCK(ifa);
-               lck_rw_done(&in6_ifaddr_rwlock);
        }
-
        /*
         * release another refcnt for the link from in6_ifaddrs.
         * Do this only if it's not already unlinked in the event that we lost
@@ -3481,9 +3481,8 @@ in6_setmaxmtu(void)
 
        ifnet_head_lock_shared();
        TAILQ_FOREACH(ifp, &ifnet_head, if_list) {
-               struct nd_ifinfo *ndi;
+               struct nd_ifinfo *ndi = NULL;
 
-               lck_rw_lock_shared(nd_if_rwlock);
                if ((ndi = ND_IFINFO(ifp)) != NULL && !ndi->initialized)
                        ndi = NULL;
                if (ndi != NULL)
@@ -3493,7 +3492,6 @@ in6_setmaxmtu(void)
                        maxmtu = IN6_LINKMTU(ifp);
                if (ndi != NULL)
                        lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
        }
        ifnet_head_done();
        if (maxmtu)     /* update only when maxmtu is positive */
@@ -3854,9 +3852,8 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia)
                if ((ifp->if_eflags & IFEF_IPV6_ROUTER) != 0) {
                        optdad = 0;
                } else {
-                       struct nd_ifinfo *ndi;
+                       struct nd_ifinfo *ndi = NULL;
 
-                       lck_rw_lock_shared(nd_if_rwlock);
                        ndi = ND_IFINFO(ifp);
                        VERIFY (ndi != NULL && ndi->initialized);
                        lck_mtx_lock(&ndi->lock);
@@ -3864,7 +3861,6 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia)
                                optdad = 0;
                        }
                        lck_mtx_unlock(&ndi->lock);
-                       lck_rw_done(nd_if_rwlock);
                }
        }
 
@@ -3880,6 +3876,19 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia)
                        } else if (ia->ia6_flags & IN6_IFF_SECURED) {
                                if (optdad & ND6_OPTIMISTIC_DAD_SECURED)
                                        flags = IN6_IFF_OPTIMISTIC;
+                       } else {
+                               /*
+                                * Keeping the behavior for temp and CGA
+                                * SLAAC addresses to have a knob for optimistic
+                                * DAD.
+                                * Other than that if ND6_OPTIMISTIC_DAD_AUTOCONF
+                                * is set, we should default to optimistic
+                                * DAD.
+                                * For now this means SLAAC addresses with interface
+                                * identifier derived from modified EUI-64 bit
+                                * identifiers.
+                                */
+                               flags = IN6_IFF_OPTIMISTIC;
                        }
                } else if ((optdad & ND6_OPTIMISTIC_DAD_DYNAMIC) &&
                    (ia->ia6_flags & IN6_IFF_DYNAMIC)) {
@@ -3920,13 +3929,13 @@ static int
 in6_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp)
 {
        struct in6pcb *in6p = sotoin6pcb(so);
-       associd_t aid;
+       sae_associd_t aid;
 
        if (in6p == NULL || in6p->inp_state == INPCB_STATE_DEAD)
                return (EINVAL);
 
        /* IN6PCB has no concept of association */
-       aid = ASSOCID_ANY;
+       aid = SAE_ASSOCID_ANY;
        *cnt = 0;
 
        /* just asking how many there are? */
@@ -3940,16 +3949,16 @@ in6_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp)
  * Handle SIOCGCONNIDS ioctl for PF_INET6 domain.
  */
 static int
-in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt,
+in6_getconnids(struct socket *so, sae_associd_t aid, uint32_t *cnt,
     user_addr_t cidp)
 {
        struct in6pcb *in6p = sotoin6pcb(so);
-       connid_t cid;
+       sae_connid_t cid;
 
        if (in6p == NULL || in6p->inp_state == INPCB_STATE_DEAD)
                return (EINVAL);
 
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL)
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL)
                return (EINVAL);
 
        /* if connected, return 1 connection count */
@@ -3960,7 +3969,7 @@ in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt,
                return (0);
 
        /* if IN6PCB is connected, assign it connid 1 */
-       cid = ((*cnt != 0) ? 1 : CONNID_ANY);
+       cid = ((*cnt != 0) ? 1 : SAE_CONNID_ANY);
 
        return (copyout(&cid, cidp, sizeof (cid)));
 }
@@ -3969,7 +3978,7 @@ in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt,
  * Handle SIOCGCONNINFO ioctl for PF_INET6 domain.
  */
 static int
-in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
+in6_getconninfo(struct socket *so, sae_connid_t cid, uint32_t *flags,
     uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
     user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
     user_addr_t aux_data, uint32_t *aux_len)
@@ -3990,7 +3999,7 @@ in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
                goto out;
        }
 
-       if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) {
+       if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL && cid != 1) {
                error = EINVAL;
                goto out;
        }
index d8d71fe3e9b95a141783e59eb1ef66a72819ef29..bdda501035d6486cd7f402ebbea93310053625bd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
index 6142092c518d1adb8ac279c08a8a5587f0934699..7058b39768ceb08b6e98c873c8d4e9783f791ea5 100644 (file)
@@ -162,7 +162,7 @@ in6_gif_output(
        }
 
        /* prepend new IP header */
-       M_PREPEND(m, sizeof (struct ip6_hdr), M_DONTWAIT);
+       M_PREPEND(m, sizeof (struct ip6_hdr), M_DONTWAIT, 1);
        if (m && mbuf_len(m) < sizeof (struct ip6_hdr))
                m = m_pullup(m, sizeof (struct ip6_hdr));
        if (m == NULL) {
@@ -185,7 +185,7 @@ in6_gif_output(
                m_freem(m);
                return (ENETUNREACH);
        }
-       ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
+       ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_NORMAL : ECN_NOCARE,
            &otos, &itos);
        ip6->ip6_flow &= ~htonl(0xff << 20);
        ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
@@ -245,6 +245,7 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
        struct ip6_hdr *ip6;
        int af = 0;
        u_int32_t otos;
+       int egress_success = 0;
 
        ip6 = mtod(m, struct ip6_hdr *);
 
@@ -274,9 +275,9 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
                }
                ip = mtod(m, struct ip *);
                if (gifp->if_flags & IFF_LINK1)
-                       ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
+                       egress_success = ip_ecn_egress(ECN_NORMAL, &otos8, &ip->ip_tos);
                else
-                       ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
+                       egress_success = ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
                break;
            }
 #endif /* INET */
@@ -291,9 +292,9 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
                }
                ip6 = mtod(m, struct ip6_hdr *);
                if (gifp->if_flags & IFF_LINK1)
-                       ip6_ecn_egress(ECN_ALLOWED, &otos, &ip6->ip6_flow);
+                       egress_success = ip6_ecn_egress(ECN_NORMAL, &otos, &ip6->ip6_flow);
                else
-                       ip6_ecn_egress(ECN_NOCARE, &otos, &ip6->ip6_flow);
+                       egress_success = ip6_ecn_egress(ECN_NOCARE, &otos, &ip6->ip6_flow);
                break;
            }
 #endif
@@ -303,6 +304,12 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
                return (IPPROTO_DONE);
        }
 
+       if (egress_success == 0) {
+               ip6stat.ip6s_nogif++;
+               m_freem(m);
+               return (IPPROTO_DONE);
+       }
+
        /* Replace the rcvif by gifp for ifnet_input to route it correctly */
        if (m->m_pkthdr.rcvif)
                m->m_pkthdr.rcvif = gifp;
index e2a232a903a67e97a7800e83e860b67a854d55d3..8b5379f53b71d56528924b8cb897d1a2d682a739 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -518,7 +518,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct in6_aliasreq *ifra)
         * address, and then reconfigure another one, the prefix is still
         * valid with referring to the old link-local address.
         */
-       if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
+       if ((pr = nd6_prefix_lookup(&pr0, ND6_PREFIX_EXPIRY_UNSPEC)) == NULL) {
                if ((error = nd6_prelist_add(&pr0, NULL, &pr, TRUE)) != 0) {
                        IFA_REMREF(&ia->ia_ifa);
                        lck_mtx_destroy(&pr0.ndpr_lock, ifa_mtx_grp);
@@ -685,6 +685,7 @@ in6_ifattach_prelim(struct ifnet *ifp)
        struct in6_ifextra *ext;
        void **pbuf, *base;
        int error = 0;
+       struct in6_ifaddr *ia6 = NULL;
 
        VERIFY(ifp != NULL);
 
@@ -720,13 +721,13 @@ skipmcast:
 #endif
 
        if (ifp->if_inet6data == NULL) {
-               ext = (struct in6_ifextra *)_MALLOC(in6_extra_size, M_IFADDR,
+               ext = (struct in6_ifextra *)_MALLOC(in6_extra_bufsize, M_IFADDR,
                    M_WAITOK|M_ZERO);
                if (!ext)
                        return (ENOMEM);
                base = (void *)P2ROUNDUP((intptr_t)ext + sizeof(uint64_t),
                    sizeof(uint64_t));
-               VERIFY(((intptr_t)base + in6_extra_size) <= 
+               VERIFY(((intptr_t)base + in6_extra_size) <=
                    ((intptr_t)ext + in6_extra_bufsize));
                pbuf = (void **)((intptr_t)base - sizeof(void *));
                *pbuf = ext;
@@ -734,7 +735,7 @@ skipmcast:
                VERIFY(IS_P2ALIGNED(ifp->if_inet6data, sizeof(uint64_t)));
        } else {
                /*
-                * Since the structure is never freed, we need to zero out 
+                * Since the structure is never freed, we need to zero out
                 * some of its members. We avoid zeroing out the scope6
                 * structure on purpose because other threads might be
                 * using its contents.
@@ -743,12 +744,28 @@ skipmcast:
                    sizeof(IN6_IFEXTRA(ifp)->icmp6_ifstat));
                bzero(&IN6_IFEXTRA(ifp)->in6_ifstat,
                    sizeof(IN6_IFEXTRA(ifp)->in6_ifstat));
+               /*
+                * XXX When recycling, nd_ifinfo gets initialized, other
+                * than the lock, inside nd6_ifattach
+                */
        }
 
-       /* initialize NDP variables */
-       if ((error = nd6_ifattach(ifp)) != 0)
-               return (error);
-
+       /*
+        * XXX Only initialize NDP ifinfo for the interface
+        * if interface has not yet been configured with
+        * link local IPv6 address.
+        * Could possibly be optimized with an interface flag if need
+        * be. For now using in6ifa_ifpforlinklocal.
+        */
+       ia6 = in6ifa_ifpforlinklocal(ifp, 0);
+       if (ia6 == NULL) {
+               /* initialize NDP variables */
+               nd6_ifattach(ifp);
+       } else {
+               VERIFY(ND_IFINFO(ifp)->initialized);
+               IFA_REMREF(&ia6->ia_ifa);
+               ia6 = NULL;
+       }
        scope6_ifattach(ifp);
 
        /* initialize loopback interface address */
@@ -873,8 +890,8 @@ int
 in6_ifattach_llstartreq(struct ifnet *ifp, struct in6_llstartreq *llsr)
 {
        struct in6_aliasreq ifra;
-       struct in6_ifaddr *ia6;
-       struct nd_ifinfo *ndi;
+       struct in6_ifaddr *ia6 = NULL;
+       struct nd_ifinfo *ndi = NULL;
        int error;
 
        VERIFY(llsr != NULL);
@@ -889,14 +906,11 @@ in6_ifattach_llstartreq(struct ifnet *ifp, struct in6_llstartreq *llsr)
        if (nd6_send_opstate == ND6_SEND_OPMODE_DISABLED)
                return (ENXIO);
 
-       lck_rw_lock_shared(nd_if_rwlock);
        ndi = ND_IFINFO(ifp);
        VERIFY(ndi != NULL && ndi->initialized);
        if ((ndi->flags & ND6_IFF_INSECURE) != 0) {
-               lck_rw_done(nd_if_rwlock);
                return (ENXIO);
        }
-       lck_rw_done(nd_if_rwlock);
 
        /* assign a link-local address, only if there isn't one here already. */
        ia6 = in6ifa_ifpforlinklocal(ifp, 0);
@@ -1142,10 +1156,8 @@ in6_iid_mktmp(struct ifnet *ifp, u_int8_t *retbuf, const u_int8_t *baseid,
     int generate)
 {
        u_int8_t nullbuf[8];
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       ndi = ND_IFINFO(ifp);
        VERIFY(ndi != NULL && ndi->initialized);
        lck_mtx_lock(&ndi->lock);
        bzero(nullbuf, sizeof (nullbuf));
@@ -1164,28 +1176,27 @@ in6_iid_mktmp(struct ifnet *ifp, u_int8_t *retbuf, const u_int8_t *baseid,
 
        bcopy(ndi->randomid, retbuf, 8);
        lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
 }
 
 void
 in6_tmpaddrtimer(void *arg)
 {
 #pragma unused(arg)
-       int i;
-       struct nd_ifinfo *ndi;
+       struct ifnet *ifp = NULL;
+       struct nd_ifinfo *ndi = NULL;
        u_int8_t nullbuf[8];
 
        timeout(in6_tmpaddrtimer, (caddr_t)0, (ip6_temp_preferred_lifetime -
            ip6_desync_factor - ip6_temp_regen_advance) * hz);
 
-       lck_rw_lock_shared(nd_if_rwlock);
        bzero(nullbuf, sizeof (nullbuf));
-       for (i = 1; i < if_index + 1; i++) {
-               if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
-                       break;
-               ndi = &nd_ifinfo[i];
-               if (!ndi->initialized)
+       ifnet_head_lock_shared();
+       for (ifp = ifnet_head.tqh_first; ifp;
+           ifp = ifp->if_link.tqe_next) {
+               ndi = ND_IFINFO(ifp);
+               if ((NULL == ndi) || (FALSE == ndi->initialized)) {
                        continue;
+               }
                lck_mtx_lock(&ndi->lock);
                if (bcmp(ndi->randomid, nullbuf, sizeof (nullbuf)) != 0) {
                        /*
@@ -1197,5 +1208,5 @@ in6_tmpaddrtimer(void *arg)
                }
                lck_mtx_unlock(&ndi->lock);
        }
-       lck_rw_done(nd_if_rwlock);
+       ifnet_head_done();
 }
index a76ce4fe0b311fdcc92ea93abdb01b83ce54df90..74dd6496ce988d26dbe759c405023af3ec60509a 100644 (file)
@@ -1655,7 +1655,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                if (error)
                        return (error);
                /* we never use msfr.msfr_srcs; */
-               memcpy(&msfr, &msfr64, sizeof(msfr));
+               memcpy(&msfr, &msfr64, sizeof(msfr64));
        } else {
                error = sooptcopyin(sopt, &msfr32,
                    sizeof(struct __msfilterreq32),
@@ -1663,7 +1663,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                if (error)
                        return (error);
                /* we never use msfr.msfr_srcs; */
-               memcpy(&msfr, &msfr32, sizeof(msfr));
+               memcpy(&msfr, &msfr32, sizeof(msfr32));
        }
 
        if (msfr.msfr_group.ss_family != AF_INET6 ||
@@ -1735,7 +1735,6 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                        IM6O_UNLOCK(imo);
                        return (ENOBUFS);
                }
-               bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss));
        }
 
        /*
@@ -1784,7 +1783,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
                msfr32.msfr_ifindex = msfr.msfr_ifindex;
                msfr32.msfr_fmode   = msfr.msfr_fmode;
                msfr32.msfr_nsrcs   = msfr.msfr_nsrcs;
-               memcpy(&msfr64.msfr_group, &msfr.msfr_group,
+               memcpy(&msfr32.msfr_group, &msfr.msfr_group,
                    sizeof(struct sockaddr_storage));
                error = sooptcopyout(sopt, &msfr32,
                    sizeof(struct __msfilterreq32));
index 63beb9a91d689c698a4ddb88644748bc29c93b97..660f8da4f03200dd5f4bdd1e611015adfa52655e 100644 (file)
@@ -857,7 +857,7 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg,
        u_int32_t flowinfo;
        int errno;
 
-       if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6)
+       if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
                return;
 
        sa6_dst = (struct sockaddr_in6 *)(void *)dst;
@@ -1041,16 +1041,9 @@ void
 in6_losing(struct inpcb *in6p)
 {
        struct rtentry *rt;
-       struct rt_addrinfo info;
 
        if ((rt = in6p->in6p_route.ro_rt) != NULL) {
                RT_LOCK(rt);
-               bzero((caddr_t)&info, sizeof (info));
-               info.rti_info[RTAX_DST] =
-                   (struct sockaddr *)&in6p->in6p_route.ro_dst;
-               info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-               info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-               rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
                if (rt->rt_flags & RTF_DYNAMIC) {
                        /*
                         * Prevent another thread from modifying rt_key,
index 53e362c89e16b1d5f86f5cce4fc2863054464bb3..3aedbea2f0b2e06ec506db51957b2d778f3deff8 100644 (file)
@@ -178,7 +178,7 @@ struct ip6protosw inet6sw[] = {
        .pr_type =              SOCK_DGRAM,
        .pr_protocol =          IPPROTO_UDP,
        .pr_flags =             PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK|
-                               PR_EVCONNINFO,
+                               PR_EVCONNINFO|PR_PRECONN_WRITE,
        .pr_input =             udp6_input,
        .pr_ctlinput =          udp6_ctlinput,
        .pr_ctloutput =         ip6_ctloutput,
@@ -194,7 +194,8 @@ struct ip6protosw inet6sw[] = {
        .pr_type =              SOCK_STREAM,
        .pr_protocol =          IPPROTO_TCP,
        .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK|
-                               PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO,
+                               PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO|
+                               PR_PRECONN_WRITE|PR_DATA_IDEMPOTENT,
        .pr_input =             tcp6_input,
        .pr_ctlinput =          tcp6_ctlinput,
        .pr_ctloutput =         tcp_ctloutput,
index 86b703bf5c64d4b45bf6c6db316786e8b8771460..fda321bb7fd7bac3e3a4d13a4db789b925c61458 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -133,6 +133,16 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug,
        CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0,
        "log source interface selection debug info");
 
+static int ip6_select_srcaddr_debug = 0;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcaddr_debug,
+       CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcaddr_debug, 0,
+       "log source address selection debug info");
+
+static int ip6_select_src_expensive_secondary_if = 0;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_src_expensive_secondary_if,
+       CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_src_expensive_secondary_if, 0,
+       "allow source interface selection to use expensive secondaries");
+
 #define        ADDR_LABEL_NOTAPP (-1)
 struct in6_addrpolicy defaultaddrpolicy;
 
@@ -164,6 +174,18 @@ static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *);
 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
 void addrsel_policy_init(void);
 
+#define        SASEL_DO_DBG(inp) \
+       (ip6_select_srcaddr_debug && (inp) != NULL && \
+           (inp)->inp_socket != NULL && \
+           ((inp)->inp_socket->so_options & SO_DEBUG))
+
+#define SASEL_LOG(fmt, ...) \
+do { \
+       if (SASEL_DO_DBG(inp)) \
+               printf("%s:%d " fmt "\n",\
+                   __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+} while (0); \
+
 /*
  * Return an IPv6 address, which is the most appropriate for a given
  * destination and user specified options.
@@ -171,21 +193,22 @@ void addrsel_policy_init(void);
  * an entry to the caller for later use.
  */
 #define        REPLACE(r) do {\
-       if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \
-               sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
-               ip6stat.ip6s_sources_rule[(r)]++; \
+       SASEL_LOG("REPLACE r %d ia %s ifp1 %s\n", \
+           (r), s_src, ifp1->if_xname); \
+       srcrule = (r); \
        goto replace; \
 } while (0)
+
 #define        NEXTSRC(r) do {\
-       if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \
-               sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
-               ip6stat.ip6s_sources_rule[(r)]++; \
+       SASEL_LOG("NEXTSRC r %d ia %s ifp1 %s\n", \
+           (r), s_src, ifp1->if_xname); \
        goto next;              /* XXX: we can't use 'continue' here */ \
 } while (0)
+
 #define        BREAK(r) do { \
-       if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \
-               sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
-               ip6stat.ip6s_sources_rule[(r)]++; \
+       SASEL_LOG("BREAK r %d ia %s ifp1 %s\n", \
+           (r), s_src, ifp1->if_xname); \
+       srcrule = (r); \
        goto out;               /* XXX: we can't use 'break' here */ \
 } while (0)
 
@@ -212,6 +235,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
        struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF, 0 };
        boolean_t islocal = FALSE;
        uint64_t secs = net_uptime();
+       char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
+       const struct in6_addr *tmp;
+       int bestrule = IP6S_SRCRULE_0;
 
        dst = dstsock->sin6_addr; /* make a copy for local operation */
        *errorp = 0;
@@ -313,6 +339,17 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                goto done;
        }
 
+       if (SASEL_DO_DBG(inp)) {
+               (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src));
+
+               tmp = &in6addr_any;
+               (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src));
+
+               printf("%s out src %s dst %s ifscope %d ifp %s\n", 
+                   __func__, s_src, s_dst, ifscope,
+                   ifp ? ifp->if_xname : "NULL");
+       }
+
        *errorp = in6_setscope(&dst, ifp, &odstzone);
        if (*errorp != 0) {
                src_storage = NULL;
@@ -326,6 +363,11 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                u_int32_t srczone, osrczone, dstzone;
                struct in6_addr src;
                struct ifnet *ifp1 = ia->ia_ifp;
+               int srcrule;
+
+               if (SASEL_DO_DBG(inp))
+                       (void) inet_ntop(AF_INET6, &ia->ia_addr.sin6_addr,
+                            s_src, sizeof (s_src));
 
                IFA_LOCK(&ia->ia_ifa);
                /*
@@ -335,27 +377,37 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                 * XXX: we should probably use sin6_scope_id here.
                 */
                if (in6_setscope(&dst, ifp1, &dstzone) ||
-                   odstzone != dstzone)
+                   odstzone != dstzone) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s odstzone %d != dstzone %d\n",
+                           s_src, ifp1->if_xname, odstzone, dstzone);
                        goto next;
-
+               }
                src = ia->ia_addr.sin6_addr;
                if (in6_setscope(&src, ifp, &osrczone) ||
                    in6_setscope(&src, ifp1, &srczone) ||
-                   osrczone != srczone)
+                   osrczone != srczone) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s osrczone %d != srczone %d\n",
+                           s_src, ifp1->if_xname, osrczone, srczone);
                        goto next;
-
+               }
                /* avoid unusable addresses */
                if ((ia->ia6_flags &
-                   (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED)))
+                   (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s ia6_flags 0x%x\n",
+                           s_src, ifp1->if_xname, ia->ia6_flags);
                        goto next;
-
-               if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs))
+               }
+               if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs)) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s IFA6_IS_DEPRECATED\n",
+                           s_src, ifp1->if_xname);
                        goto next;
-
+               }
                if (!nd6_optimistic_dad &&
-                   (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
+                   (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s IN6_IFF_OPTIMISTIC\n",
+                           s_src, ifp1->if_xname);
                        goto next;
-
+               }
                /* Rule 1: Prefer same address */
                if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
                        BREAK(IP6S_SRCRULE_1); /* there should be no better candidate */
@@ -530,16 +582,39 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
                 * Last resort: just keep the current candidate.
                 * Or, do we need more rules?
                 */
+               if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) &&
+                   ip6_select_src_expensive_secondary_if == 0) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n",
+                           s_src, ifp1->if_xname);
+                       ip6stat.ip6s_sources_skip_expensive_secondary_if++;
+                       goto next;
+               }
+               SASEL_LOG("NEXT ia %s ifp1 %s last resort\n",
+                   s_src, ifp1->if_xname);
                IFA_UNLOCK(&ia->ia_ifa);
                continue;
 
 replace:
+               /*
+                * Ignore addresses on secondary interfaces that are marked
+                * expensive
+                */
+               if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) &&
+                   ip6_select_src_expensive_secondary_if == 0) {
+                       SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n",
+                           s_src, ifp1->if_xname);
+                       ip6stat.ip6s_sources_skip_expensive_secondary_if++;
+                       goto next;
+               }
+               bestrule = srcrule;
                best_scope = (new_scope >= 0 ? new_scope :
                    in6_addrscope(&ia->ia_addr.sin6_addr));
                best_policy = (new_policy ? new_policy :
                    in6_addrsel_lookup_policy(&ia->ia_addr));
                best_matchlen = (new_matchlen >= 0 ? new_matchlen :
                    in6_matchlen(&ia->ia_addr.sin6_addr, &dst));
+               SASEL_LOG("NEXT ia %s ifp1 %s best_scope %d new_scope %d dst_scope %d\n",
+                   s_src, ifp1->if_xname, best_scope, new_scope, dst_scope);
                IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
                IFA_UNLOCK(&ia->ia_ifa);
                if (ia_best != NULL)
@@ -577,10 +652,21 @@ out:
        }
 
        IFA_LOCK_SPIN(&ia->ia_ifa);
+       if (bestrule < IP6S_SRCRULE_COUNT)
+               ip6stat.ip6s_sources_rule[bestrule]++;
        *src_storage = satosin6(&ia->ia_addr)->sin6_addr;
        IFA_UNLOCK(&ia->ia_ifa);
        IFA_REMREF(&ia->ia_ifa);
 done:
+       if (SASEL_DO_DBG(inp)) {
+               (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src));
+
+               tmp = (src_storage != NULL) ? src_storage : &in6addr_any;
+               (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src));
+           
+               printf("%s out src %s dst %s ifscope %d dst_scope %d best_scope %d\n", 
+                   __func__, s_src, s_dst, ifscope, dst_scope, best_scope);
+       }
        if (ifpp != NULL) {
                /* if ifp is non-NULL, refcnt held in in6_selectif() */
                *ifpp = ifp;
@@ -668,6 +754,10 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
        select_srcif = (ip6_doscopedroute && srcsock != NULL &&
            !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr));
 
+       if (ip6_select_srcif_debug) {
+               printf("%s src %s dst %s ifscope %d select_srcif %d\n", 
+                   __func__, s_src, s_dst, ifscope, select_srcif);
+       }
        /*
         * If Scoped Routing is disabled, ignore the given ifscope.
         * Otherwise even if source selection won't be performed,
@@ -792,12 +882,15 @@ getsrcif:
 
                if (ip6_select_srcif_debug && ifa != NULL) {
                        if (ro->ro_rt != NULL) {
-                               printf("%s->%s ifscope %d->%d ifa_if %s "
-                                   "ro_if %s\n", s_src, s_dst, ifscope,
+                               printf("%s %s->%s ifscope %d->%d ifa_if %s "
+                                   "ro_if %s\n",
+                                   __func__, 
+                                   s_src, s_dst, ifscope,
                                    scope, if_name(ifa->ifa_ifp),
                                    if_name(rt_ifp));
                        } else {
-                               printf("%s->%s ifscope %d->%d ifa_if %s\n",
+                               printf("%s %s->%s ifscope %d->%d ifa_if %s\n",
+                                   __func__, 
                                    s_src, s_dst, ifscope, scope,
                                    if_name(ifa->ifa_ifp));
                        }
@@ -827,10 +920,14 @@ getsrcif:
                ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr);
 
                if (ip6_select_srcif_debug && ifa != NULL) {
-                       printf("%s->%s ifscope %d ifa_if %s\n",
+                       printf("%s %s->%s ifscope %d ifa_if %s\n",
+                           __func__,
                            s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
+               } else if (ip6_select_srcif_debug) {
+                       printf("%s %s->%s ifscope %d ifa_if NULL\n",
+                           __func__,
+                           s_src, s_dst, ifscope);
                }
-
        }
 
 getroute:
@@ -1119,8 +1216,10 @@ done:
        if (error == 0) {
                if (retrt != NULL && route != NULL)
                        *retrt = route->ro_rt;  /* ro_rt may be NULL */
-       } else if (select_srcif && ip6_select_srcif_debug) {
-               printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
+       }  
+       if (ip6_select_srcif_debug) {
+               printf("%s %s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
+                   __func__,
                    s_src, s_dst, ifscope,
                    (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE",
                    (ifp != NULL) ? if_name(ifp) : "NONE", error);
@@ -1137,7 +1236,7 @@ done:
  * caller provides a non-NULL retifp.  The caller is responsible for checking
  * if the returned ifp is valid and release its reference at all times.
  */
-static int
+int
 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ip6_out_args *ip6oa, struct ifnet **retifp)
@@ -1235,25 +1334,19 @@ in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
 {
        if (in6p && in6p->in6p_hops >= 0) {
                return (in6p->in6p_hops);
-       } else {
-               lck_rw_lock_shared(nd_if_rwlock);
-               if (ifp && ifp->if_index < nd_ifinfo_indexlim) {
-                       u_int8_t chlim;
-                       struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-
-                       if (ndi->initialized) {
-                               /* access chlim without lock, for performance */
-                               chlim = ndi->chlim;
-                       } else {
-                               chlim = ip6_defhlim;
-                       }
-                       lck_rw_done(nd_if_rwlock);
-                       return (chlim);
+       } else if (NULL != ifp) {
+               u_int8_t chlim;
+               struct nd_ifinfo *ndi = ND_IFINFO(ifp);
+               if (ndi && ndi->initialized) {
+                       /* access chlim without lock, for performance */
+                       chlim = ndi->chlim;
                } else {
-                       lck_rw_done(nd_if_rwlock);
-                       return (ip6_defhlim);
+                       chlim = ip6_defhlim;
                }
+               return (chlim);
        }
+
+       return (ip6_defhlim);
 }
 
 /*
index 7157b4c042cbb702396c9b05d7c68dd9e2191de2..07cc9e16f551f8319d54f60633d9dcd8e21fdf82 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -97,8 +97,8 @@
 #ifdef BSD_KERNEL_PRIVATE
 #include <sys/tree.h>
 #include <sys/mcache.h>
-#include <netinet6/scope6_var.h>
 #endif /* BSD_KERNEL_PRIVATE */
+#include <netinet6/scope6_var.h>
 #include <sys/kern_event.h>
 #include <net/ethernet.h>
 
@@ -311,18 +311,6 @@ struct icmp6_ifstat {
        u_quad_t ifs6_out_mlddone;
 };
 
-#ifdef BSD_KERNEL_PRIVATE
-/*
- * Per-interface IPv6 structures.
- */
-struct in6_ifextra {
-       struct scope6_id scope6_id;
-       struct in6_ifstat in6_ifstat;
-       struct icmp6_ifstat icmp6_ifstat;
-};
-#define        IN6_IFEXTRA(_ifp)       ((struct in6_ifextra *)(_ifp->if_inet6data))
-#endif /* BSD_KERNEL_PRIVATE */
-
 struct in6_ifreq {
        char    ifr_name[IFNAMSIZ];
        union {
@@ -336,7 +324,7 @@ struct in6_ifreq {
                struct in6_addrlifetime ifru_lifetime;
                struct in6_ifstat ifru_stat;
                struct icmp6_ifstat ifru_icmp6stat;
-               u_int32_t ifru_scope_id[16];
+               u_int32_t ifru_scope_id[SCOPE6_ID_MAX];
        } ifr_ifru;
 };
 
@@ -849,6 +837,22 @@ struct in6_multi_mship {
        LIST_ENTRY(in6_multi_mship) i6mm_chain;  /* multicast options chain */
 };
 
+#ifdef BSD_KERNEL_PRIVATE
+#include <netinet6/nd6_var.h>
+/*
+ *  * Per-interface IPv6 structures.
+ *   */
+struct in6_ifextra {
+       struct scope6_id        scope6_id;
+       struct in6_ifstat       in6_ifstat;
+       struct icmp6_ifstat     icmp6_ifstat;
+       struct nd_ifinfo        nd_ifinfo;
+       uint32_t                netsig_len;
+       u_int8_t                netsig[IFNET_SIGNATURELEN];
+};
+#define IN6_IFEXTRA(_ifp)       ((struct in6_ifextra *)(_ifp->if_inet6data))
+#endif /* BSD_KERNEL_PRIVATE */
+
 struct mld_ifinfo;
 
 /*
index 2506dd3af2c5a2cc06cee90e70ac62c1bcc4ccde..cfad473a6aca69ca511cf262b81f6693d8059b13 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2013, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,5 +66,9 @@
 
 #ifdef BSD_KERNEL_PRIVATE
 extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *);
-extern void ip6_ecn_egress(int, const u_int32_t *, u_int32_t *);
+extern int ip6_ecn_egress(int, const u_int32_t *, u_int32_t *);
+extern void ip46_ecn_ingress(int, u_int32_t *, const u_int8_t *);
+extern int ip46_ecn_egress(int, const u_int32_t *, u_int8_t *);
+extern void ip64_ecn_ingress(int, u_int8_t *, const u_int32_t *);
+extern int ip64_ecn_egress(int, const u_int8_t *, u_int32_t *);
 #endif /* BSD_KERNEL_PRIVATE */
index 4f3c61ee01b50eb14166ca0b6b85c9029a69cac4..2f5ab8fbfbddbf4c3142b0453725b3e68f25a10f 100644 (file)
@@ -223,8 +223,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 
        if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
                /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
-               icmp6_error(m, ICMP6_TIME_EXCEEDED,
-                               ICMP6_TIME_EXCEED_TRANSIT, 0);
+               icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
+                               ICMP6_TIME_EXCEED_TRANSIT, 0, 0);
                return (NULL);
        }
 
index 4cc199ca8bc0b53ee9ac2c6225b599d2199a3ec7..e9521ba3c14e5a400264314426a8293c075f95d0 100644 (file)
 #include <net/ntstat.h>
 #include <net/init.h>
 #include <net/net_osdep.h>
+#include <net/net_perf.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -204,6 +205,9 @@ static lck_grp_attr_t       *ip6_mutex_grp_attr;
 extern int loopattach_done;
 extern void addrsel_policy_init(void);
 
+static int sysctl_reset_ip6_input_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_input_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_input_getperf SYSCTL_HANDLER_ARGS;
 static void ip6_init_delayed(void);
 static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
 
@@ -223,6 +227,23 @@ SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, adj_clear_hwcksum,
        CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_adj_clear_hwcksum, 0,
        "Invalidate hwcksum info when adjusting length");
 
+static int ip6_input_measure = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, input_perf,
+       CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip6_input_measure, 0, sysctl_reset_ip6_input_stats, "I", "Do time measurement");
+
+static uint64_t ip6_input_measure_bins = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, input_perf_bins,
+       CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_input_measure_bins, 0,
+       sysctl_ip6_input_measure_bins, "I",
+       "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, input_perf_data,
+       CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+       0, 0, sysctl_ip6_input_getperf, "S,net_perf",
+       "IP6 input performance data (struct net_perf, net/net_perf.h)");
+
 /*
  * On platforms which require strict alignment (currently for anything but
  * i386 or x86_64), check if the IP header pointer is 32-bit aligned; if not,
@@ -263,7 +284,18 @@ static void
 ip6_proto_input(protocol_family_t protocol, mbuf_t packet)
 {
 #pragma unused(protocol)
+#if INET
+       struct timeval start_tv;
+       if (ip6_input_measure)
+               net_perf_start_time(&net_perf, &start_tv);
+#endif /* INET */
        ip6_input(packet);
+#if INET
+       if (ip6_input_measure) {
+               net_perf_measure_time(&net_perf, &start_tv, 1);
+               net_perf_histogram(&net_perf, 1);
+       }
+#endif /* INET */
 }
 
 /*
@@ -605,6 +637,7 @@ ip6_input(struct mbuf *m)
        }
 
        ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
+
        /*
         * Check against address spoofing/corruption.
         */
@@ -670,20 +703,20 @@ ip6_input(struct mbuf *m)
        }
 #endif
 #if IPFW2
-        /*
-         * Check with the firewall...
-         */
-        if (ip6_fw_enable && ip6_fw_chk_ptr) {
-                u_short port = 0;
-                /* If ipfw says divert, we have to just drop packet */
-                /* use port as a dummy argument */
-                if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
-                        m_freem(m);
-                        m = NULL;
-                }
-                if (!m)
-                        goto done;
-        }
+       /*
+        * Check with the firewall...
+        */
+       if (ip6_fw_enable && ip6_fw_chk_ptr) {
+               u_short port = 0;
+               /* If ipfw says divert, we have to just drop packet */
+               /* use port as a dummy argument */
+               if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
+                       m_freem(m);
+                       m = NULL;
+               }
+               if (!m)
+                       goto done;
+       }
 #endif /* IPFW2 */
 
        /*
@@ -1697,7 +1730,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
                                }
                                break;
                        case IPPROTO_ROUTING:
-                               if (!in6p->inp_flags & IN6P_RTHDR)
+                               if (!(in6p->inp_flags & IN6P_RTHDR))
                                        break;
 
                                mp = sbcreatecontrol_mbuf((caddr_t)ip6e, elen,
@@ -1994,3 +2027,57 @@ u_char   inet6ctlerrmap[PRC_NCMDS] = {
        0,              0,              0,              0,
        ENOPROTOOPT
 };
+
+static int
+sysctl_reset_ip6_input_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error, i;
+
+       i = ip6_input_measure;
+       error = sysctl_handle_int(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* impose bounds */
+       if (i < 0 || i > 1) {
+               error = EINVAL;
+               goto done;
+       }
+       if (ip6_input_measure != i && i == 1) {
+               net_perf_initialize(&net_perf, ip6_input_measure_bins);
+       }
+       ip6_input_measure = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip6_input_measure_bins SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       uint64_t i;
+
+       i = ip6_input_measure_bins;
+       error = sysctl_handle_quad(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* validate data */
+       if (!net_perf_validate_bins(i)) {
+               error = EINVAL;
+               goto done;
+       }
+       ip6_input_measure_bins = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip6_input_getperf SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       if (req->oldptr == USER_ADDR_NULL)
+               req->oldlen = (size_t)sizeof (struct ipstat);
+
+       return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
+}
index 812bf2b3ac2d7563a8b4fd377b29b2387fbdb595..7767822d7403cfce884d6b46752e7d831697f485 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #include <net/route.h>
 #include <net/dlil.h>
 #include <net/net_osdep.h>
+#include <net/net_perf.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -158,6 +159,9 @@ extern int ipsec_bypass;
 #include <net/pfvar.h>
 #endif /* PF */
 
+static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
+static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static void ip6_out_cksum_stats(int, u_int32_t);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
@@ -176,6 +180,34 @@ static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
     int, uint32_t, uint32_t);
+extern int udp_ctloutput(struct socket *, struct sockopt *);
+static int ip6_do_fragmentation(struct mbuf **morig,
+    uint32_t optlen, struct ifnet *ifp, uint32_t unfragpartlen,
+    struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp, uint32_t mtu,
+    int nxt0);
+static int ip6_fragment_packet(struct mbuf **m,
+    struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
+    uint32_t mtu, boolean_t alwaysfrag, uint32_t unfragpartlen,
+    struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
+
+SYSCTL_DECL(_net_inet6_ip6);
+
+static int ip6_output_measure = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
+       CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
+
+static uint64_t ip6_output_measure_bins = 0;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
+       CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
+       sysctl_ip6_output_measure_bins, "I",
+       "bins for chaining performance data histogram");
+
+static net_perf_t net_perf;
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
+       CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
+       0, 0, sysctl_ip6_output_getperf, "S,net_perf",
+       "IP6 output performance data (struct net_perf, net/net_perf.h)");
 
 #define        IM6O_TRACE_HIST_SIZE    32      /* size of trace history */
 
@@ -210,47 +242,19 @@ static struct zone *im6o_zone;            /* zone for ip6_moptions */
 #define        IM6O_ZONE_MAX           64              /* maximum elements in zone */
 #define        IM6O_ZONE_NAME          "ip6_moptions"  /* zone name */
 
-SYSCTL_DECL(_net_inet6_ip6);
-
-static int ip6_maxchainsent = 0;
-SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
-       CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
-       "use dlil_output_list");
-
 /*
- * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
- * walks through the packet chain and sends each mbuf separately.
+ * ip6_output() calls ip6_output_list() to do the work
  */
 int
-ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
+ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct ip6_out_args *ip6oa)
 {
-#pragma unused(packetlist)
-       struct mbuf *m = m0, *nextpkt;
-       int error = 0;
-
-       while (m != NULL) {
-               /*
-                * Break the chain before calling ip6_output() and free the
-                * mbufs if there was an error.
-                */
-               nextpkt = m->m_nextpkt;
-               m->m_nextpkt = NULL;
-               error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa);
-               if (error != 0) {
-                       if (nextpkt != NULL)
-                               m_freem_list(nextpkt);
-                       return (error);
-               }
-               m = nextpkt;
-       }
-
-       return (error);
+       return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
 }
 
 /*
- * IP6 output. The packet in mbuf chain m contains a skeletal IP6
+ * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
@@ -265,15 +269,18 @@ ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
  * which is rt_rmx.rmx_mtu.
  */
 int
-ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
-    int flags, struct ip6_moptions *im6o, struct ifnet **ifpp,
-    struct ip6_out_args *ip6oa)
+ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
+    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
+    struct ifnet **ifpp, struct ip6_out_args *ip6oa)
 {
        struct ip6_hdr *ip6;
        u_char *nexthdrp;
        struct ifnet *ifp = NULL, *origifp = NULL;      /* refcnt'd */
+       struct ifnet **ifpp_save = ifpp;
        struct mbuf *m, *mprev;
-       int hlen, tlen, len, off, nxt0;
+       struct mbuf *sendchain = NULL, *sendchain_last = NULL;
+       struct mbuf *inputchain = NULL;
+       int nxt0;
        struct route_in6 *ro_pmtu = NULL;
        struct rtentry *rt = NULL;
        struct sockaddr_in6 *dst, src_sa, dst_sa;
@@ -287,6 +294,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
        ipfilter_t inject_filter_ref;
        struct ipf_pktopts *ippo = NULL;
        struct flowadv *adv = NULL;
+       uint32_t pktcnt = 0;
+       uint32_t packets_processed = 0;
+       struct timeval start_tv;
 #if DUMMYNET
        struct m_tag *tag;
        struct ip6_out_args saved_ip6oa;
@@ -331,6 +341,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
                struct {
                        boolean_t select_srcif : 1;
                        boolean_t hdrsplit : 1;
+                       boolean_t route_selected : 1;
                        boolean_t dontfrag : 1;
 #if IPSEC
                        boolean_t needipsec : 1;
@@ -340,6 +351,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
                uint32_t raw;
        } ip6obf = { .raw = 0 };
 
+       if (ip6_output_measure)
+               net_perf_start_time(&net_perf, &start_tv);
+
        VERIFY(m0->m_flags & M_PKTHDR);
 
        /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
@@ -354,6 +368,13 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
            KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
                struct dn_pkt_tag       *dn_tag;
 
+               /*
+                * ip6_output_list() cannot handle chains of packets reinjected
+                * by dummynet. The same restriction applies to
+                * ip_output_list().
+                */
+               VERIFY(0 == packetchain);
+
                dn_tag = (struct dn_pkt_tag *)(tag+1);
                args.fwa_pf_rule = dn_tag->dn_pf_rule;
 
@@ -388,7 +409,6 @@ tags_done:
 #endif /* DUMMYNET */
 
        m = m0;
-       m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
 
 #if IPSEC
        if (ipsec_bypass == 0) {
@@ -412,10 +432,6 @@ tags_done:
        }
 #endif /* IPSEC */
        
-       ip6 = mtod(m, struct ip6_hdr *);
-       nxt0 = ip6->ip6_nxt;
-       finaldst = ip6->ip6_dst;
-       inject_filter_ref = ipf_get_inject_filter(m);
        ippo = &ipf_pktopts;
 
        if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
@@ -455,6 +471,14 @@ tags_done:
                ip6oa->ip6oa_retflags = 0;
        }
 
+       /*
+        * Clear out ifpp to be filled in after determining route. ifpp_save is
+        * used to keep old value to release reference properly and dtrace
+        * ipsec tunnel traffic properly.
+        */
+       if (ifpp != NULL && *ifpp != NULL)
+               *ifpp = NULL;
+
 #if DUMMYNET
        if (args.fwa_pf_rule) {
                ip6 = mtod(m, struct ip6_hdr *);
@@ -463,6 +487,43 @@ tags_done:
        }
 #endif /* DUMMYNET */
 
+#if NECP
+       /*
+        * Since all packets are assumed to come from same socket, necp lookup
+        * only needs to happen once per function entry.
+        */
+       necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
+           (flags & IPV6_OUTARGS) ? ip6oa : NULL, &necp_result,
+           &necp_result_parameter);
+#endif /* NECP */
+
+       /*
+        * If a chain was passed in, prepare for ther first iteration. For all
+        * other iterations, this work will be done at evaluateloop: label.
+        */
+       if (packetchain) {
+               /*
+                * Remove m from the chain during processing to avoid
+                * accidental frees on entire list.
+                */
+               inputchain = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+       }
+
+loopit:
+       packets_processed++;
+       m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
+       ip6 = mtod(m, struct ip6_hdr *);
+       nxt0 = ip6->ip6_nxt;
+       finaldst = ip6->ip6_dst;
+       ip6obf.hdrsplit = FALSE;
+       ro_pmtu = NULL;
+
+       if (!SLIST_EMPTY(&m->m_pkthdr.tags))
+               inject_filter_ref = ipf_get_inject_filter(m);
+       else
+               inject_filter_ref = NULL;
+
 #define        MAKE_EXTHDR(hp, mp) do {                                        \
        if (hp != NULL) {                                               \
                struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
@@ -499,46 +560,65 @@ tags_done:
 #undef MAKE_EXTHDR
 
 #if NECP
-       necp_matched_policy_id = necp_ip6_output_find_policy_match (m, flags, (flags & IPV6_OUTARGS) ? ip6oa : NULL,
-                                                                                  &necp_result, &necp_result_parameter);
        if (necp_matched_policy_id) {
                necp_mark_packet_from_ip(m, necp_matched_policy_id);
+
                switch (necp_result) {
-                       case NECP_KERNEL_POLICY_RESULT_PASS:
+               case NECP_KERNEL_POLICY_RESULT_PASS:
+                       goto skip_ipsec;
+               case NECP_KERNEL_POLICY_RESULT_DROP:
+               case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
+                       /*
+                        * Flow divert packets should be blocked at the IP
+                        * layer.
+                        */
+                       error = EHOSTUNREACH;
+                       goto freehdrs;
+               case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
+                       /*
+                        * Verify that the packet is being routed to the tunnel
+                        */
+                       struct ifnet *policy_ifp =
+                           necp_get_ifnet_from_result_parameter(
+                               &necp_result_parameter);
+
+                       if (policy_ifp == ifp) {
                                goto skip_ipsec;
-                       case NECP_KERNEL_POLICY_RESULT_DROP:
-                       case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
-                               /* Flow divert packets should be blocked at the IP layer */
-                               error = EHOSTUNREACH;
-                               goto bad;
-                       case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
-                               /* Verify that the packet is being routed to the tunnel */
-                               struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter);
-                               if (policy_ifp == ifp) {
+                       } else {
+                               if (necp_packet_can_rebind_to_ifnet(m,
+                                   policy_ifp, (struct route *)&necp_route,
+                                   AF_INET6)) {
+                                       /*
+                                        * Set scoped index to the tunnel
+                                        * interface, since it is compatible
+                                        * with the packet. This will only work
+                                        * for callers who pass IPV6_OUTARGS,
+                                        * but that covers all of the clients
+                                        * we care about today.
+                                        */
+                                       if (flags & IPV6_OUTARGS) {
+                                               ip6oa->ip6oa_boundif =
+                                                   policy_ifp->if_index;
+                                               ip6oa->ip6oa_flags |=
+                                                   IP6OAF_BOUND_IF;
+                                       }
+                                       if (opt != NULL
+                                           && opt->ip6po_pktinfo != NULL) {
+                                               opt->ip6po_pktinfo->
+                                                   ipi6_ifindex =
+                                                       policy_ifp->if_index;
+                                       }
+                                       ro = &necp_route;
                                        goto skip_ipsec;
                                } else {
-                                       if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, (struct route *)&necp_route, AF_INET6)) {
-                                               /* Set scoped index to the tunnel interface, since it is compatible with the packet */
-                                               /* This will only work for callers who pass IPV6_OUTARGS, but that covers all of the
-                                                  clients we care about today */
-                                               if (flags & IPV6_OUTARGS) {
-                                                       ip6oa->ip6oa_boundif = policy_ifp->if_index;
-                                                       ip6oa->ip6oa_flags |= IP6OAF_BOUND_IF;
-                                               }
-                                               if (opt != NULL && opt->ip6po_pktinfo != NULL) {
-                                                       opt->ip6po_pktinfo->ipi6_ifindex = policy_ifp->if_index;
-                                               }
-                                               ro = &necp_route;
-                                               goto skip_ipsec;
-                                       } else {
-                                               error = ENETUNREACH;
-                                               goto bad;
-                                       }
+                                       error = ENETUNREACH;
+                                       goto freehdrs;
                                }
-                               break;
                        }
-                       default:
-                               break;
+                       break;
+               }
+               default:
+                       break;
                }
        }
 #endif /* NECP */
@@ -715,6 +795,9 @@ skip_ipsec:
        MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
        MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
 
+       /* It is no longer safe to free the pointers in exthdrs. */
+       exthdrs.merged = TRUE;
+
 #undef MAKE_CHAIN
 
 #if IPSEC
@@ -761,7 +844,10 @@ skip_ipsec:
                                    (mbuf_t *)&m, ippo);
                                if (result == EJUSTRETURN) {
                                        ipf_unref();
-                                       goto done;
+                                       if (m != NULL)
+                                               m_freem(m);
+                                       m = NULL;
+                                       goto evaluateloop;
                                }
                                if (result != 0) {
                                        ipf_unref();
@@ -898,7 +984,6 @@ skip_ipsec:
                ro = &ip6route;
                bzero((caddr_t)ro, sizeof (*ro));
        }
-       VERIFY(ro_pmtu == NULL);        /* must not get here if dummynet */
        ro_pmtu = ro;
        if (opt != NULL && opt->ip6po_rthdr)
                ro = &opt->ip6po_route;
@@ -958,7 +1043,7 @@ skip_ipsec:
 #if IPSEC
        if (ip6obf.needipsec && needipsectun) {
 #if CONFIG_DTRACE
-               struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
+               struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
 #endif /* CONFIG_DTRACE */
                /*
                 * All the extension headers will become inaccessible
@@ -983,15 +1068,16 @@ skip_ipsec:
 
                error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
                /* tunneled in IPv4? packet is gone */
-               if (ipsec_state.tunneled == 4)
-                       goto done;
+               if (ipsec_state.tunneled == 4) {
+                       m = NULL;
+                       goto evaluateloop;
+               }
                m = ipsec_state.m;
                ipsec_saved_route = ro;
                ro = (struct route_in6 *)&ipsec_state.ro;
                dst = SIN6(ipsec_state.dst);
                if (error) {
                        /* mbuf is already reclaimed in ipsec6_output_tunnel. */
-                       m0 = m = NULL;
                        m = NULL;
                        switch (error) {
                        case EHOSTUNREACH:
@@ -1028,10 +1114,12 @@ skip_ipsec:
        }
 #endif /* IPSEC */
 
-       /* for safety */
+       /*
+        * ifp should only be filled in for dummy net packets which will jump
+        * to check_with_pf label.
+        */
        if (ifp != NULL) {
-               ifnet_release(ifp);
-               ifp = NULL;
+               VERIFY(ip6obf.route_selected);
        }
 
        /* adjust pointer */
@@ -1049,24 +1137,32 @@ skip_ipsec:
        dst_sa.sin6_addr = ip6->ip6_dst;
 
        /*
+        * Only call in6_selectroute() on first iteration to avoid taking
+        * multiple references on ifp and rt.
+        *
         * in6_selectroute() might return an ifp with its reference held
         * even in the error case, so make sure to release its reference.
         * ip6oa may be NULL if IPV6_OUTARGS isn't set.
         */
-       if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL,
-           &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) {
-               switch (error) {
-               case EHOSTUNREACH:
-                       ip6stat.ip6s_noroute++;
-                       break;
-               case EADDRNOTAVAIL:
-               default:
-                       break; /* XXX statistics? */
+       if (!ip6obf.route_selected) {
+               error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
+                   &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
+
+               if (error != 0) {
+                       switch (error) {
+                       case EHOSTUNREACH:
+                               ip6stat.ip6s_noroute++;
+                               break;
+                       case EADDRNOTAVAIL:
+                       default:
+                               break; /* XXX statistics? */
+                       }
+                       if (ifp != NULL)
+                               in6_ifstat_inc(ifp, ifs6_out_discard);
+                       /* ifp (if non-NULL) will be released at the end */
+                       goto bad;
                }
-               if (ifp != NULL)
-                       in6_ifstat_inc(ifp, ifs6_out_discard);
-               /* ifp (if non-NULL) will be released at the end */
-               goto bad;
+               ip6obf.route_selected = TRUE;
        }
        if (rt == NULL) {
                /*
@@ -1076,6 +1172,14 @@ skip_ipsec:
                *dst = dst_sa;  /* XXX */
        }
 
+#if NECP
+       /* Catch-all to check if the interface is allowed */
+       if (!necp_packet_is_allowed_over_interface(m, ifp)) {
+               error = EHOSTUNREACH;
+               goto bad;
+       }
+#endif /* NECP */
+
        /*
         * then rt (for unicast) and ifp must be non-NULL valid values.
         */
@@ -1084,9 +1188,11 @@ skip_ipsec:
        }
        if (rt != NULL) {
                RT_LOCK(rt);
-               ia = (struct in6_ifaddr *)(rt->rt_ifa);
-               if (ia != NULL)
-                       IFA_ADDREF(&ia->ia_ifa);
+               if (ia == NULL) {
+                       ia = (struct in6_ifaddr *)(rt->rt_ifa);
+                       if (ia != NULL)
+                               IFA_ADDREF(&ia->ia_ifa);
+               }
                rt->rt_use++;
                RT_UNLOCK(rt);
        }
@@ -1229,8 +1335,11 @@ routefound:
                 */
                if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
                    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
-                       m_freem(m);
-                       goto done;
+                       /* remove m from the packetchain and continue looping */
+                       if (m != NULL)
+                               m_freem(m);
+                       m = NULL;
+                       goto evaluateloop;
                }
        }
 
@@ -1238,10 +1347,8 @@ routefound:
         * Fill the outgoing inteface to tell the upper layer
         * to increment per-interface statistics.
         */
-       if (ifpp != NULL) {
+       if (ifpp != NULL && *ifpp == NULL) {
                ifnet_reference(ifp);   /* for caller */
-               if (*ifpp != NULL)
-                       ifnet_release(*ifpp);
                *ifpp = ifp;
        }
 
@@ -1289,13 +1396,15 @@ routefound:
                u_short port = 0;
                m->m_pkthdr.rcvif = NULL;       /* XXX */
                /* If ipfw says divert, we have to just drop packet */
-               if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
-                       m_freem(m);
-                       goto done;
-               }
-               if (m == NULL) {
-                       error = EACCES;
-                       goto done;
+               if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m) || m == NULL) {
+                       if (m != NULL) {
+                               m_freem(m);
+                               m = NULL;
+                               goto evaluateloop;
+                       } else {
+                               error = EACCES;
+                               goto bad;
+                       }
                }
        }
 #endif /* IPFW2 */
@@ -1324,9 +1433,13 @@ routefound:
                if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
                    ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh),
                    &dummy, &oplen) < 0) {
-                       /* m was already freed at this point */
+                       /*
+                        * m was already freed at this point. Set to NULL so it
+                        * is not re-freed at end of ip6_output_list.
+                        */
+                       m = NULL;
                        error = EINVAL; /* better error? */
-                       goto done;
+                       goto bad;
                }
                m->m_flags &= ~M_LOOP; /* XXX */
                m->m_pkthdr.rcvif = NULL;
@@ -1338,6 +1451,7 @@ check_with_pf:
 #if PF
        if (PF_IS_ENABLED) {
 #if DUMMYNET
+
                /*
                 * TODO: Need to save opt->ip6po_flags for reinjection
                 * rdar://10434993
@@ -1362,58 +1476,206 @@ check_with_pf:
 #endif /* !DUMMYNET */
 
                if (error != 0 || m == NULL) {
-                       /*
-                        * Note that if we ever handle packet chain, we will
-                        * have to restore the linkage from the previous
-                        * packet to the next like in ip_outout_list()
-                        */
                        if (m != NULL) {
                                panic("%s: unexpected packet %p\n",
                                    __func__, m);
                                /* NOTREACHED */
                        }
-                       /* Already freed by callee */
-                       goto done;
+                       /* m was already freed by callee and is now NULL.  */
+                       goto evaluateloop;
                }
                ip6 = mtod(m, struct ip6_hdr *);
        }
 #endif /* PF */
 
+#ifdef IPSEC
+       /* clean ipsec history before fragmentation */
+       ipsec_delaux(m);
+#endif /* IPSEC */
+
        /*
-        * Send the packet to the outgoing interface.
-        * If necessary, do IPv6 fragmentation before sending.
-        *
-        * the logic here is rather complex:
-        * 1: normal case (dontfrag == 0, alwaysfrag == 0)
-        * 1-a: send as is if tlen <= path mtu
-        * 1-b: fragment if tlen > path mtu
-        *
-        * 2: if user asks us not to fragment (dontfrag == 1)
-        * 2-a: send as is if tlen <= interface mtu
-        * 2-b: error if tlen > interface mtu
-        *
-        * 3: if we always need to attach fragment header (alwaysfrag == 1)
-        *      always fragment
-        *
-        * 4: if dontfrag == 1 && alwaysfrag == 1
-        *      error, as we cannot handle this conflicting request
+        * Determine whether fragmentation is necessary. If so, m is passed
+        * back as a chain of packets and original mbuf is freed. Otherwise, m
+        * is unchanged.
         */
-       tlen = m->m_pkthdr.len;
+       error = ip6_fragment_packet(&m, opt,
+           &exthdrs, ifp, mtu, alwaysfrag, unfragpartlen, ro_pmtu, nxt0,
+           optlen);
 
-       if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG))
-               ip6obf.dontfrag = TRUE;
-       else
-               ip6obf.dontfrag = FALSE;
-       if (ip6obf.dontfrag && alwaysfrag) {    /* case 4 */
-               /* conflicting request - can't transmit */
-               error = EMSGSIZE;
+       if (error)
                goto bad;
+
+/*
+ * The evaluateloop label is where we decide whether to continue looping over
+ * packets or call into nd code to send.
+ */
+evaluateloop:
+
+       /*
+        * m may be NULL when we jump to the evaluateloop label from PF or
+        * other code that can drop packets.
+        */
+       if (m != NULL) {
+               /*
+                * If we already have a chain to send, tack m onto the end.
+                * Otherwise make m the start and end of the to-be-sent chain.
+                */
+               if (sendchain != NULL) {
+                       sendchain_last->m_nextpkt = m;
+               } else {
+                       sendchain = m;
+               }
+
+               /* Fragmentation may mean m is a chain. Find the last packet. */
+               while (m->m_nextpkt)
+                       m = m->m_nextpkt;
+               sendchain_last = m;
+               pktcnt++;
+       }
+
+       /* Fill in next m from inputchain as appropriate. */
+       m = inputchain;
+       if (m != NULL) {
+               /* Isolate m from rest of input chain. */
+               inputchain = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+
+               /*
+                * Clear exthdrs and ipsec_state so stale contents are not
+                * reused. Note this also clears the exthdrs.merged flag.
+                */
+               bzero(&exthdrs, sizeof(exthdrs));
+               bzero(&ipsec_state, sizeof(ipsec_state));
+
+               /* Continue looping. */
+               goto loopit;
+       }
+
+       /*
+        * If we get here, there's no more mbufs in inputchain, so send the
+        * sendchain if there is one.
+        */
+       if (pktcnt > 0) {
+               error = nd6_output_list(ifp, origifp, sendchain, dst,
+                   ro->ro_rt, adv);
+               /*
+                * Fall through to done label even in error case because
+                * nd6_output_list frees packetchain in both success and
+                * failure cases.
+                */
+       }
+
+done:
+       if (ifpp_save != NULL && *ifpp_save != NULL) {
+               ifnet_release(*ifpp_save);
+               *ifpp_save = NULL;
+       }
+       ROUTE_RELEASE(&ip6route);
+#if IPSEC
+       ROUTE_RELEASE(&ipsec_state.ro);
+       if (sp != NULL)
+               key_freesp(sp, KEY_SADB_UNLOCKED);
+#endif /* IPSEC */
+#if NECP
+       ROUTE_RELEASE(&necp_route);
+#endif /* NECP */
+#if DUMMYNET
+       ROUTE_RELEASE(&saved_route);
+       ROUTE_RELEASE(&saved_ro_pmtu);
+#endif /* DUMMYNET */
+
+       if (ia != NULL)
+               IFA_REMREF(&ia->ia_ifa);
+       if (src_ia != NULL)
+               IFA_REMREF(&src_ia->ia_ifa);
+       if (ifp != NULL)
+               ifnet_release(ifp);
+       if (origifp != NULL)
+               ifnet_release(origifp);
+       if (ip6_output_measure) {
+               net_perf_measure_time(&net_perf, &start_tv, packets_processed);
+               net_perf_histogram(&net_perf, packets_processed);
+       }
+       return (error);
+
+freehdrs:
+       if (exthdrs.ip6e_hbh != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_hbh");
+               m_freem(exthdrs.ip6e_hbh);
+       }
+       if (exthdrs.ip6e_dest1 != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_dest1");
+               m_freem(exthdrs.ip6e_dest1);
+       }
+       if (exthdrs.ip6e_rthdr != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_rthdr");
+               m_freem(exthdrs.ip6e_rthdr);
+       }
+       if (exthdrs.ip6e_dest2 != NULL) {
+               if (exthdrs.merged)
+                       panic("Double free of ip6e_dest2");
+               m_freem(exthdrs.ip6e_dest2);
+       }
+       /* FALLTHRU */
+bad:
+       if (inputchain != NULL)
+               m_freem_list(inputchain);
+       if (sendchain != NULL)
+               m_freem_list(sendchain);
+       if (m != NULL)
+               m_freem(m);
+
+       goto done;
+
+#undef ipf_pktopts
+#undef exthdrs
+#undef ip6route
+#undef ipsec_state
+#undef saved_route
+#undef saved_ro_pmtu
+#undef args
+}
+
+/* ip6_fragment_packet
+ *
+ * The fragmentation logic is rather complex:
+ * 1: normal case (dontfrag == 0, alwaysfrag == 0)
+ * 1-a:        send as is if tlen <= path mtu
+ * 1-b:        fragment if tlen > path mtu
+ *
+ * 2: if user asks us not to fragment (dontfrag == 1)
+ * 2-a:        send as is if tlen <= interface mtu
+ * 2-b:        error if tlen > interface mtu
+ *
+ * 3: if we always need to attach fragment header (alwaysfrag == 1)
+ *     always fragment
+ *
+ * 4: if dontfrag == 1 && alwaysfrag == 1
+ *     error, as we cannot handle this conflicting request
+ */
+
+static int
+ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
+     struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
+     boolean_t alwaysfrag, uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
+     int nxt0, uint32_t optlen)
+{
+       VERIFY(NULL != mptr);
+       struct mbuf *m = *mptr;
+       int error = 0;
+       size_t tlen = m->m_pkthdr.len;
+       boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
+
+       if (dontfrag && alwaysfrag) {   /* case 4 */
+               /* conflicting request - can't transmit */
+               return EMSGSIZE;
        }
 
-       lck_rw_lock_shared(nd_if_rwlock);
        /* Access without acquiring nd_ifinfo lock for performance */
-       if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) {       /* case 2-b */
-               lck_rw_done(nd_if_rwlock);
+       if (dontfrag && tlen > IN6_LINKMTU(ifp)) {      /* case 2-b */
                /*
                 * Even if the DONTFRAG option is specified, we cannot send the
                 * packet when the data length is larger than the MTU of the
@@ -1429,51 +1691,71 @@ check_with_pf:
                bzero(&ip6cp, sizeof (ip6cp));
                ip6cp.ip6c_cmdarg = (void *)&mtu32;
                pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
-               error = EMSGSIZE;
-               goto bad;
-       } else {
-               lck_rw_done(nd_if_rwlock);
+               return EMSGSIZE;
        }
 
        /*
         * transmit packet without fragmentation
         */
-       if (ip6obf.dontfrag || (!alwaysfrag &&          /* case 1-a and 2-a */
+       if (dontfrag || (!alwaysfrag &&         /* case 1-a and 2-a */
            (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
            (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
-#ifdef IPSEC
-               /* clean ipsec history once it goes out of the node */
-               ipsec_delaux(m);
-#endif /* IPSEC */
-
+               /*
+                * mppn not updated in this case because no new chain is formed
+                * and inserted
+                */
                ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
-
-               if (ro->ro_rt)
-                       RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
-               error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
-               goto done;
+       } else {
+               /*
+                * time to fragment - cases 1-b and 3 are handled inside
+                * ip6_do_fragmentation().
+                * mppn is passed down to be updated to point at fragment chain.
+                */
+               error = ip6_do_fragmentation(mptr, optlen, ifp,
+                   unfragpartlen, mtod(m, struct ip6_hdr *), exthdrsp, mtu, nxt0);
        }
 
+       return error;
+}
+
+/*
+ * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
+ * the packet needs to be fragmented. on success, morig is freed and a chain
+ * of fragments is linked into the packet chain where morig existed. Otherwise,
+ * an errno is returned.
+ */
+static int
+ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
+    uint32_t unfragpartlen, struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp,
+    uint32_t mtu, int nxt0)
+{
+       VERIFY(NULL != mptr);
+       int error = 0;
+
+       struct mbuf *morig = *mptr;
+       struct mbuf *first_mbufp = NULL;
+       struct mbuf *last_mbufp = NULL;
+
+       size_t tlen = morig->m_pkthdr.len;
+
        /*
         * try to fragment the packet.  case 1-b and 3
         */
-       if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
+       if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
                /* TSO and fragment aren't compatible */
-               error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
-               goto bad;
+               return EMSGSIZE;
        } else if (mtu < IPV6_MMTU) {
                /* path MTU cannot be less than IPV6_MMTU */
-               error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
-               goto bad;
+               return EMSGSIZE;
        } else if (ip6->ip6_plen == 0) {
                /* jumbo payload cannot be fragmented */
-               error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
-               goto bad;
+               return EMSGSIZE;
        } else {
-               struct mbuf **mnext, *m_frgpart;
+               size_t hlen, len, off;
+               struct mbuf **mnext = NULL;
                struct ip6_frag *ip6f;
                u_int32_t id = htonl(ip6_randomid());
                u_char nextproto;
@@ -1489,84 +1771,95 @@ check_with_pf:
 
                len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7;
                if (len < 8) {
-                       error = EMSGSIZE;
                        in6_ifstat_inc(ifp, ifs6_out_fragfail);
-                       goto bad;
+                       return EMSGSIZE;
                }
 
-               mnext = &m->m_nextpkt;
-
                /*
                 * Change the next header field of the last header in the
                 * unfragmentable part.
                 */
-               if (exthdrs.ip6e_rthdr != NULL) {
-                       nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
-                       *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
-               } else if (exthdrs.ip6e_dest1 != NULL) {
-                       nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
-                       *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
-               } else if (exthdrs.ip6e_hbh != NULL) {
-                       nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
-                       *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
+               if (exthdrsp->ip6e_rthdr != NULL) {
+                       nextproto = *mtod(exthdrsp->ip6e_rthdr, u_char *);
+                       *mtod(exthdrsp->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
+               } else if (exthdrsp->ip6e_dest1 != NULL) {
+                       nextproto = *mtod(exthdrsp->ip6e_dest1, u_char *);
+                       *mtod(exthdrsp->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
+               } else if (exthdrsp->ip6e_hbh != NULL) {
+                       nextproto = *mtod(exthdrsp->ip6e_hbh, u_char *);
+                       *mtod(exthdrsp->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
                } else {
                        nextproto = ip6->ip6_nxt;
                        ip6->ip6_nxt = IPPROTO_FRAGMENT;
                }
 
-               if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
-                       in6_delayed_cksum_offset(m, 0, optlen, nxt0);
+               if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
+                       in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
 
                /*
                 * Loop through length of segment after first fragment,
                 * make new header and copy data of each part and link onto
                 * chain.
                 */
-               m0 = m;
                for (off = hlen; off < tlen; off += len) {
-                       struct ip6_hdr *mhip6;
+                       struct ip6_hdr *new_mhip6;
+                       struct mbuf *new_m;
+                       struct mbuf *m_frgpart;
 
-                       MGETHDR(m, M_DONTWAIT, MT_HEADER);      /* MAC-OK */
-                       if (m == NULL) {
+                       MGETHDR(new_m, M_DONTWAIT, MT_HEADER);  /* MAC-OK */
+                       if (new_m == NULL) {
                                error = ENOBUFS;
                                ip6stat.ip6s_odropped++;
-                               goto sendorfree;
+                               break;
+                       }
+                       new_m->m_pkthdr.rcvif = NULL;
+                       new_m->m_flags = morig->m_flags & M_COPYFLAGS;
+
+                       if (first_mbufp != NULL) {
+                               /* Every pass through loop but first */
+                               *mnext = new_m;
+                               last_mbufp = new_m;
+                       } else {
+                               /* This is the first element of the fragment chain */
+                               first_mbufp = new_m;
+                               last_mbufp = new_m;
                        }
-                       m->m_pkthdr.rcvif = NULL;
-                       m->m_flags = m0->m_flags & M_COPYFLAGS;
-                       *mnext = m;
-                       mnext = &m->m_nextpkt;
-                       m->m_data += max_linkhdr;
-                       mhip6 = mtod(m, struct ip6_hdr *);
-                       *mhip6 = *ip6;
-                       m->m_len = sizeof (*mhip6);
-                       error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+                       mnext = &new_m->m_nextpkt;
+
+                       new_m->m_data += max_linkhdr;
+                       new_mhip6 = mtod(new_m, struct ip6_hdr *);
+                       *new_mhip6 = *ip6;
+                       new_m->m_len = sizeof (*new_mhip6);
+
+                       error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
                        if (error) {
                                ip6stat.ip6s_odropped++;
-                               goto sendorfree;
+                               break;
                        }
+
                        ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
                        if (off + len >= tlen)
                                len = tlen - off;
                        else
                                ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
-                       mhip6->ip6_plen = htons((u_short)(len + hlen +
+                       new_mhip6->ip6_plen = htons((u_short)(len + hlen +
                            sizeof (*ip6f) - sizeof (struct ip6_hdr)));
-                       if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
+
+                       if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
                                error = ENOBUFS;
                                ip6stat.ip6s_odropped++;
-                               goto sendorfree;
+                               break;
                        }
-                       m_cat(m, m_frgpart);
-                       m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
-                       m->m_pkthdr.rcvif = NULL;
+                       m_cat(new_m, m_frgpart);
+                       new_m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
+                       new_m->m_pkthdr.rcvif = NULL;
 
-                       M_COPY_CLASSIFIER(m, m0);
-                       M_COPY_PFTAG(m, m0);
+                       M_COPY_CLASSIFIER(new_m, morig);
+                       M_COPY_PFTAG(new_m, morig);
 
 #ifdef notyet
 #if CONFIG_MACF_NET
-                       mac_create_fragment(m0, m);
+                       mac_create_fragment(morig, new_m);
 #endif /* CONFIG_MACF_NET */
 #endif /* notyet */
 
@@ -1577,81 +1870,23 @@ check_with_pf:
                        in6_ifstat_inc(ifp, ifs6_out_fragcreat);
                }
 
-               in6_ifstat_inc(ifp, ifs6_out_fragok);
-       }
-
-       /*
-        * Remove leading garbages.
-        */
-sendorfree:
-       m = m0->m_nextpkt;
-       m0->m_nextpkt = NULL;
-       m_freem(m0);
-       for (m0 = m; m != NULL; m = m0) {
-               m0 = m->m_nextpkt;
-               m->m_nextpkt = NULL;
-               if (error == 0) {
-#if IPSEC
-                       /* clean ipsec history once it goes out of the node */
-                       ipsec_delaux(m);
-#endif /* IPSEC */
-                       error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
-                           adv);
+               if (error) {
+                       /* free all the fragments created */
+                       if (first_mbufp != NULL) {
+                               m_freem_list(first_mbufp);
+                               first_mbufp = NULL;
+                       }
+                       last_mbufp = NULL;
                } else {
-                       m_freem(m);
+                       /* successful fragmenting */
+                       m_freem(morig);
+                       *mptr = first_mbufp;
+                       last_mbufp->m_nextpkt = NULL;
+                       ip6stat.ip6s_fragmented++;
+                       in6_ifstat_inc(ifp, ifs6_out_fragok);
                }
        }
-
-       if (error == 0)
-               ip6stat.ip6s_fragmented++;
-
-done:
-       ROUTE_RELEASE(&ip6route);
-#if IPSEC
-       ROUTE_RELEASE(&ipsec_state.ro);
-       if (sp != NULL)
-               key_freesp(sp, KEY_SADB_UNLOCKED);
-#endif /* IPSEC */
-#if NECP
-       ROUTE_RELEASE(&necp_route);
-#endif /* NECP */
-#if DUMMYNET
-       ROUTE_RELEASE(&saved_route);
-       ROUTE_RELEASE(&saved_ro_pmtu);
-#endif /* DUMMYNET */
-
-       if (ia != NULL)
-               IFA_REMREF(&ia->ia_ifa);
-       if (src_ia != NULL)
-               IFA_REMREF(&src_ia->ia_ifa);
-       if (ifp != NULL)
-               ifnet_release(ifp);
-       if (origifp != NULL)
-               ifnet_release(origifp);
-       return (error);
-
-freehdrs:
-       if (exthdrs.ip6e_hbh != NULL)
-               m_freem(exthdrs.ip6e_hbh);
-       if (exthdrs.ip6e_dest1 != NULL)
-               m_freem(exthdrs.ip6e_dest1);
-       if (exthdrs.ip6e_rthdr != NULL)
-               m_freem(exthdrs.ip6e_rthdr);
-       if (exthdrs.ip6e_dest2 != NULL)
-               m_freem(exthdrs.ip6e_dest2);
-       /* FALLTHRU */
-bad:
-       if (m != NULL)
-               m_freem(m);
-       goto done;
-
-#undef ipf_pktopts
-#undef exthdrs
-#undef ip6route
-#undef ipsec_state
-#undef saved_route
-#undef saved_ro_pmtu
-#undef args
+       return error;
 }
 
 static int
@@ -2010,10 +2245,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
 
                if (ifp == NULL)
                        ifp = ro_pmtu->ro_rt->rt_ifp;
-               lck_rw_lock_shared(nd_if_rwlock);
                /* Access without acquiring nd_ifinfo lock for performance */
                ifmtu = IN6_LINKMTU(ifp);
-               lck_rw_done(nd_if_rwlock);
 
                /*
                 * Access rmx_mtu without holding the route entry lock,
@@ -2050,10 +2283,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
                }
        } else {
                if (ifp) {
-                       lck_rw_lock_shared(nd_if_rwlock);
                        /* Don't hold nd_ifinfo lock for performance */
                        mtu = IN6_LINKMTU(ifp);
-                       lck_rw_done(nd_if_rwlock);
                } else {
                        error = EHOSTUNREACH; /* XXX */
                }
@@ -2722,6 +2953,8 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
                        }
                        break;
                }
+       } else if (level == IPPROTO_UDP) {
+               error = udp_ctloutput(so, sopt);
        } else {
                error = EINVAL;
        }
@@ -3894,3 +4127,58 @@ ip6_optlen(struct in6pcb *in6p)
        return (len);
 #undef elen
 }
+
+static int
+sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error, i;
+
+       i = ip6_output_measure;
+       error = sysctl_handle_int(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* impose bounds */
+       if (i < 0 || i > 1) {
+               error = EINVAL;
+               goto done;
+       }
+       if (ip6_output_measure != i && i == 1) {
+               net_perf_initialize(&net_perf, ip6_output_measure_bins);
+       }
+       ip6_output_measure = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       int error;
+       uint64_t i;
+
+       i = ip6_output_measure_bins;
+       error = sysctl_handle_quad(oidp, &i, 0, req);
+       if (error || req->newptr == USER_ADDR_NULL)
+               goto done;
+       /* validate data */
+       if (!net_perf_validate_bins(i)) {
+               error = EINVAL;
+               goto done;
+       }
+       ip6_output_measure_bins = i;
+done:
+       return (error);
+}
+
+static int
+sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+       if (req->oldptr == USER_ADDR_NULL)
+               req->oldlen = (size_t)sizeof (struct ipstat);
+
+       return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
+}
+
index 30926d2dc8f65f2610f723cee3066418d0c947eb..dc2b4399d263f21d25b786612e939638e7b010ad 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -182,6 +182,7 @@ struct ip6_exthdrs {
        struct mbuf *ip6e_dest1;
        struct mbuf *ip6e_rthdr;
        struct mbuf *ip6e_dest2;
+       boolean_t merged;
 };
 
 /*
@@ -253,6 +254,9 @@ struct      ip6_pktopts {
  */
 #endif /* BSD_KERNEL_PRIVATE */
 
+#define        IP6S_SRCRULE_COUNT 16
+#include <netinet6/scope6_var.h>
+
 struct ip6stat {
        u_quad_t ip6s_total;            /* total packets received */
        u_quad_t ip6s_tooshort;         /* packet too short */
@@ -289,32 +293,35 @@ struct    ip6stat {
        /*
         * statistics for improvement of the source address selection
         * algorithm:
-        * XXX: hardcoded 16 = # of ip6 multicast scope types + 1
         */
        /* number of times that address selection fails */
        u_quad_t ip6s_sources_none;
        /* number of times that an address on the outgoing I/F is chosen */
-       u_quad_t ip6s_sources_sameif[16];
+       u_quad_t ip6s_sources_sameif[SCOPE6_ID_MAX];
        /* number of times that an address on a non-outgoing I/F is chosen */
-       u_quad_t ip6s_sources_otherif[16];
+       u_quad_t ip6s_sources_otherif[SCOPE6_ID_MAX];
        /*
         * number of times that an address that has the same scope
         * from the destination is chosen.
         */
-       u_quad_t ip6s_sources_samescope[16];
+       u_quad_t ip6s_sources_samescope[SCOPE6_ID_MAX];
        /*
         * number of times that an address that has a different scope
         * from the destination is chosen.
         */
-       u_quad_t ip6s_sources_otherscope[16];
+       u_quad_t ip6s_sources_otherscope[SCOPE6_ID_MAX];
        /* number of times that a deprecated address is chosen */
-       u_quad_t ip6s_sources_deprecated[16];
+       u_quad_t ip6s_sources_deprecated[SCOPE6_ID_MAX];
 
        u_quad_t ip6s_forward_cachehit;
        u_quad_t ip6s_forward_cachemiss;
 
        /* number of times that each rule of source selection is applied. */
-       u_quad_t ip6s_sources_rule[16];
+       u_quad_t ip6s_sources_rule[IP6S_SRCRULE_COUNT];
+
+       /* number of times we ignored address on expensive secondary interfaces */
+       u_quad_t ip6s_sources_skip_expensive_secondary_if;
+
        /* pkt dropped, no mbufs for control data */
        u_quad_t ip6s_pktdropcntrl;
 
index 90920d436861ce5a8e078ee95d87c18979b9c9a8..43259eea97cfef6487f415363436beb4474ffef8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -155,7 +155,7 @@ int ip4_esp_net_deflev = IPSEC_LEVEL_USE;
 int ip4_ah_trans_deflev = IPSEC_LEVEL_USE;
 int ip4_ah_net_deflev = IPSEC_LEVEL_USE;
 struct secpolicy ip4_def_policy;
-int ip4_ipsec_ecn = 0;         /* ECN ignore(-1)/forbidden(0)/allowed(1) */
+int ip4_ipsec_ecn = ECN_COMPATIBILITY;         /* ECN ignore(-1)/compatibility(0)/normal(1) */
 int ip4_esp_randpad = -1;
 int    esp_udp_encap_port = 0;
 static int sysctl_def_policy SYSCTL_HANDLER_ARGS;
@@ -214,7 +214,7 @@ int ip6_esp_net_deflev = IPSEC_LEVEL_USE;
 int ip6_ah_trans_deflev = IPSEC_LEVEL_USE;
 int ip6_ah_net_deflev = IPSEC_LEVEL_USE;
 struct secpolicy ip6_def_policy;
-int ip6_ipsec_ecn = 0;         /* ECN ignore(-1)/forbidden(0)/allowed(1) */
+int ip6_ipsec_ecn = ECN_COMPATIBILITY;         /* ECN ignore(-1)/compatibility(0)/normal(1) */
 int ip6_esp_randpad = -1;
 
 /* net.inet6.ipsec6 */
@@ -262,12 +262,14 @@ static void vshiftl(unsigned char *, int, int);
 static int ipsec_in_reject(struct secpolicy *, struct mbuf *);
 #if INET6
 static int ipsec64_encapsulate(struct mbuf *, struct secasvar *);
+static int ipsec6_update_routecache_and_output(struct ipsec_output_state *state, struct secasvar *sav);
+static int ipsec46_encapsulate(struct ipsec_output_state *state, struct secasvar *sav);
 #endif
 static struct ipsec_tag *ipsec_addaux(struct mbuf *);
 static struct ipsec_tag *ipsec_findaux(struct mbuf *);
 static void ipsec_optaux(struct mbuf *, struct ipsec_tag *);
 int ipsec_send_natt_keepalive(struct secasvar *sav);
-bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ipsec_offload_frame *frame, size_t frame_data_offset);
+bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ifnet_keepalive_offload_frame *frame, size_t frame_data_offset);
 
 static int
 sysctl_def_policy SYSCTL_HANDLER_ARGS
@@ -1505,10 +1507,9 @@ ipsec_deepcopy_policy(struct secpolicy *src)
        q = &newchain;
        for (p = src->req; p; p = p->next) {
                *q = (struct ipsecrequest *)_MALLOC(sizeof(struct ipsecrequest),
-                                                                                       M_SECA, M_WAITOK);
+                   M_SECA, M_WAITOK | M_ZERO);
                if (*q == NULL)
                        goto fail;
-               bzero(*q, sizeof(**q));
                (*q)->next = NULL;
                
                (*q)->saidx.proto = p->saidx.proto;
@@ -2573,10 +2574,6 @@ ipsec64_encapsulate(m, sav)
                m->m_pkthdr.len += sizeof(struct ip);
                ip6i = mtod(m->m_next, struct ip6_hdr *);
        }
-       /* construct new IPv4 header. see RFC 2401 5.1.2.1 */
-       /* ECN consideration. */
-       /* XXX To be fixed later if needed */
-       // ip_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &oip->ip_tos);    
 
        bcopy(ip6, ip6i, sizeof(struct ip6_hdr));
        ip = mtod(m, struct ip *);
@@ -2593,6 +2590,11 @@ ipsec64_encapsulate(m, sav)
        ip->ip_off = 0;
        ip->ip_ttl = hlim;
        ip->ip_p = IPPROTO_IPV6;
+
+       /* construct new IPv4 header. see RFC 2401 5.1.2.1 */
+       /* ECN consideration. */
+       ip64_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &ip6->ip6_flow);
+
        if (plen + sizeof(struct ip) < IP_MAXPACKET)
                ip->ip_len = htons(plen + sizeof(struct ip));
        else {
@@ -2671,6 +2673,281 @@ ipsec6_encapsulate_utun_esp_keepalive(m_ptr, sav)
 
        return 0;
 }
+
+int
+ipsec6_update_routecache_and_output(state, sav)
+       struct ipsec_output_state *state;
+       struct secasvar *sav;
+{
+       struct sockaddr_in6* dst6;
+       struct route *ro6;
+       struct ip6_hdr *ip6;
+       errno_t error = 0;
+
+       int plen;
+       struct ip6_out_args ip6oa;
+       struct route_in6 ro6_new;
+       struct flowadv *adv = NULL;
+
+       if (!state->m) {
+               return EINVAL;
+       }
+       ip6 = mtod(state->m, struct ip6_hdr *);
+
+       // grab sadb_mutex, before updating sah's route cache
+       lck_mtx_lock(sadb_mutex);
+       ro6 = &sav->sah->sa_route;
+       dst6 = (struct sockaddr_in6 *)(void *)&ro6->ro_dst;
+       if (ro6->ro_rt) {
+               RT_LOCK(ro6->ro_rt);
+       }
+       if (ROUTE_UNUSABLE(ro6) ||
+           !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst)) {
+               if (ro6->ro_rt != NULL)
+                       RT_UNLOCK(ro6->ro_rt);
+               ROUTE_RELEASE(ro6);
+       }
+       if (ro6->ro_rt == 0) {
+               bzero(dst6, sizeof(*dst6));
+               dst6->sin6_family = AF_INET6;
+               dst6->sin6_len = sizeof(*dst6);
+               dst6->sin6_addr = ip6->ip6_dst;
+               rtalloc(ro6);
+               if (ro6->ro_rt) {
+                       RT_LOCK(ro6->ro_rt);
+               }
+       }
+       if (ro6->ro_rt == 0) {
+               ip6stat.ip6s_noroute++;
+               IPSEC_STAT_INCREMENT(ipsec6stat.out_noroute);
+               error = EHOSTUNREACH;
+               // release sadb_mutex, after updating sah's route cache
+               lck_mtx_unlock(sadb_mutex);
+               return error;
+       }
+
+       /*
+        * adjust state->dst if tunnel endpoint is offlink
+        *
+        * XXX: caching rt_gateway value in the state is
+        * not really good, since it may point elsewhere
+        * when the gateway gets modified to a larger
+        * sockaddr via rt_setgate().  This is currently
+        * addressed by SA_SIZE roundup in that routine.
+        */
+       if (ro6->ro_rt->rt_flags & RTF_GATEWAY)
+               dst6 = (struct sockaddr_in6 *)(void *)ro6->ro_rt->rt_gateway;
+       RT_UNLOCK(ro6->ro_rt);
+       ROUTE_RELEASE(&state->ro);
+       route_copyout(&state->ro, ro6, sizeof(state->ro));
+       state->dst = (struct sockaddr *)dst6;
+       state->tunneled = 6;
+       // release sadb_mutex, after updating sah's route cache                                                                                                                          
+       lck_mtx_unlock(sadb_mutex);
+
+       state->m = ipsec6_splithdr(state->m);
+       if (!state->m) {
+               IPSEC_STAT_INCREMENT(ipsec6stat.out_nomem);
+               error = ENOMEM;
+               return error;
+       }
+
+       ip6 = mtod(state->m, struct ip6_hdr *);
+       switch (sav->sah->saidx.proto) {
+       case IPPROTO_ESP:
+#if IPSEC_ESP
+               error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav);
+#else
+               m_freem(state->m);
+               error = EINVAL;
+#endif
+               break;
+       case IPPROTO_AH:
+               error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, sav);
+               break;
+       case IPPROTO_IPCOMP:
+               /* XXX code should be here */
+               /*FALLTHROUGH*/
+       default:
+               ipseclog((LOG_ERR, "%s: unknown ipsec protocol %d\n", __FUNCTION__, sav->sah->saidx.proto));
+               m_freem(state->m);
+               IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
+               error = EINVAL;
+               break;
+       }
+       if (error) {
+               // If error, packet already freed by above output routines
+               state->m = NULL;
+               return error;
+       }
+
+       plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr);
+       if (plen > IPV6_MAXPACKET) {
+               ipseclog((LOG_ERR, "%s: IPsec with IPv6 jumbogram is not supported\n", __FUNCTION__));
+               IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
+               error = EINVAL;/*XXX*/
+               return error;
+       }
+       ip6 = mtod(state->m, struct ip6_hdr *);
+       ip6->ip6_plen = htons(plen);
+
+       ipsec_set_pkthdr_for_interface(sav->sah->ipsec_if, state->m, AF_INET6);
+
+       /* Increment statistics */
+       ifnet_stat_increment_out(sav->sah->ipsec_if, 1, mbuf_pkthdr_len(state->m), 0);
+
+       /* Send to ip6_output */
+       bzero(&ro6_new, sizeof(ro6_new));
+       bzero(&ip6oa, sizeof(ip6oa));
+       ip6oa.ip6oa_flowadv.code = 0;
+       ip6oa.ip6oa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
+       if (state->outgoing_if) {
+               ip6oa.ip6oa_boundif = state->outgoing_if;
+               ip6oa.ip6oa_flags |= IPOAF_BOUND_IF;
+       }
+
+       adv = &ip6oa.ip6oa_flowadv;
+       (void) ip6_output(state->m, NULL, &ro6_new, IPV6_OUTARGS, NULL, NULL, &ip6oa);
+
+       if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
+               error = ENOBUFS;
+               ifnet_disable_output(sav->sah->ipsec_if);
+               return error;
+       }
+
+       return 0;
+}
+
+int
+ipsec46_encapsulate(state, sav)
+       struct secasvar *sav;
+       struct ipsec_output_state *state;
+{
+       struct mbuf *m;
+       struct ip6_hdr *ip6;
+       struct ip *oip;
+       struct ip *ip;
+       size_t hlen;
+       size_t plen;
+
+       m = state->m;
+       if (!m) {
+               return EINVAL;
+       }
+
+       /* can't tunnel between different AFs */
+       if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
+           != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
+           || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET6) {
+               m_freem(m);
+               return EINVAL;
+       }
+#if 0
+       /* XXX if the dst is myself, perform nothing. */
+       if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) {
+               m_freem(m);
+               return EINVAL;
+       }
+#endif
+
+       if (m->m_len < sizeof(*ip)) {
+               panic("ipsec46_encapsulate: assumption failed (first mbuf length)");
+               return EINVAL;
+       }
+
+       ip = mtod(m, struct ip *);
+#ifdef _IP_VHL
+       hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
+#else
+       hlen = ip->ip_hl << 2;
+#endif
+
+       if (m->m_len != hlen) {
+               panic("ipsec46_encapsulate: assumption failed (first mbuf length)");
+               return EINVAL;
+       }
+
+       /* generate header checksum */
+       ip->ip_sum = 0;
+#ifdef _IP_VHL
+       ip->ip_sum = in_cksum(m, hlen);
+#else
+       ip->ip_sum = in_cksum(m, hlen);
+#endif
+
+       plen = m->m_pkthdr.len; // save original IPv4 packet len, this will be ipv6 payload len
+
+       /*
+        * First move the IPv4 header to the second mbuf in the chain
+        */
+       if (M_LEADINGSPACE(m->m_next) < hlen) {
+               struct mbuf *n;
+               MGET(n, M_DONTWAIT, MT_DATA);
+               if (!n) {
+                       m_freem(m);
+                       return ENOBUFS;
+               }
+               n->m_len = hlen;
+               n->m_next = m->m_next;
+               m->m_next = n;
+               m->m_pkthdr.len += sizeof(struct ip6_hdr);
+               oip = mtod(n, struct ip *);
+       } else {
+               m->m_next->m_len += hlen;
+               m->m_next->m_data -= hlen;
+               m->m_pkthdr.len += sizeof(struct ip6_hdr);
+               oip = mtod(m->m_next, struct ip *);
+       }
+       ip = mtod(m, struct ip *);
+       ovbcopy((caddr_t)ip, (caddr_t)oip, hlen);
+
+       /*
+        * Grow the first mbuf to accomodate the new IPv6 header.
+        */
+       if (M_LEADINGSPACE(m) < sizeof(struct ip6_hdr) - hlen) {
+               struct mbuf *n;
+               MGETHDR(n, M_DONTWAIT, MT_HEADER);
+               if (!n) {
+                       m_freem(m);
+                       return ENOBUFS;
+               }
+               M_COPY_PKTHDR(n, m);
+               MH_ALIGN(n, sizeof(struct ip6_hdr));
+               n->m_len = sizeof(struct ip6_hdr);
+               n->m_next = m->m_next;
+               m->m_next = NULL;
+               m_freem(m);
+               state->m = n;
+               m = state->m;
+       } else {         
+               m->m_len += (sizeof(struct ip6_hdr) - hlen);
+               m->m_data -= (sizeof(struct ip6_hdr) - hlen);
+       }
+       ip6 = mtod(m, struct ip6_hdr *);
+       ip6->ip6_flow = 0;
+       ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+       ip6->ip6_vfc |= IPV6_VERSION;
+
+       /* construct new IPv6 header. see RFC 2401 5.1.2.2 */
+       /* ECN consideration. */
+       ip46_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &ip->ip_tos);
+       if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr))
+               ip6->ip6_plen = htons(plen);
+       else {
+               /* ip6->ip6_plen will be updated in ip6_output() */
+       }
+
+       ip6->ip6_nxt = IPPROTO_IPV4;
+       ip6->ip6_hlim = IPV6_DEFHLIM;
+
+       bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.src)->sin6_addr,
+             &ip6->ip6_src, sizeof(ip6->ip6_src));
+       bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.dst)->sin6_addr,
+             &ip6->ip6_dst, sizeof(ip6->ip6_dst));
+
+       return 0;
+}
+
 #endif /*INET6*/
 
 /*
@@ -3052,73 +3329,83 @@ ipsec4_output_internal(struct ipsec_output_state *state, struct secasvar *sav)
                /*
                 * build IPsec tunnel.
                 */
-               /* XXX should be processed with other familiy */
-               if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) {
-                       ipseclog((LOG_ERR, "ipsec4_output: "
-                               "family mismatched between inner and outer spi=%u\n",
-                               (u_int32_t)ntohl(sav->spi)));
-                       error = EAFNOSUPPORT;
-                       goto bad;
-               }
-
                state->m = ipsec4_splithdr(state->m);
                if (!state->m) {
                        error = ENOMEM;
                        goto bad;
                }
-               error = ipsec4_encapsulate(state->m, sav);
-               if (error) {
-                       state->m = NULL;
-                       goto bad;
-               }
-               ip = mtod(state->m, struct ip *);
 
-               // grab sadb_mutex, before updating sah's route cache
-               lck_mtx_lock(sadb_mutex);
-               ro4= &sav->sah->sa_route;
-               dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst;
-               if (ro4->ro_rt != NULL) {
-                       RT_LOCK(ro4->ro_rt);
-               }
-               if (ROUTE_UNUSABLE(ro4) ||
-                       dst4->sin_addr.s_addr != ip->ip_dst.s_addr) {
-                       if (ro4->ro_rt != NULL)
-                               RT_UNLOCK(ro4->ro_rt);
-                       ROUTE_RELEASE(ro4);
-               }
-               if (ro4->ro_rt == 0) {
-                       dst4->sin_family = AF_INET;
-                       dst4->sin_len = sizeof(*dst4);
-                       dst4->sin_addr = ip->ip_dst;
-                       rtalloc(ro4);
-                       if (ro4->ro_rt == 0) {
-                               OSAddAtomic(1, &ipstat.ips_noroute);
-                               error = EHOSTUNREACH;
-                               // release sadb_mutex, after updating sah's route cache
-                               lck_mtx_unlock(sadb_mutex);
+               if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET6) {
+                       error = ipsec46_encapsulate(state, sav);
+                       if (error) {
+                               // packet already freed by encapsulation error handling
+                               state->m = NULL;
+                               return error;
+                       }
+
+                       error = ipsec6_update_routecache_and_output(state, sav);
+                       return error;
+
+               } else if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family == AF_INET) {
+                       error = ipsec4_encapsulate(state->m, sav);
+                       if (error) {
+                               state->m = NULL;
                                goto bad;
                        }
-                       RT_LOCK(ro4->ro_rt);
-               }
+                       ip = mtod(state->m, struct ip *);
 
-               /*
-                * adjust state->dst if tunnel endpoint is offlink
-                *
-                * XXX: caching rt_gateway value in the state is
-                * not really good, since it may point elsewhere
-                * when the gateway gets modified to a larger
-                * sockaddr via rt_setgate().  This is currently
-                * addressed by SA_SIZE roundup in that routine.
-                */
-               if (ro4->ro_rt->rt_flags & RTF_GATEWAY)
-                       dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway;
-               RT_UNLOCK(ro4->ro_rt);
-               ROUTE_RELEASE(&state->ro);
-               route_copyout(&state->ro, ro4, sizeof(state->ro));
-               state->dst = (struct sockaddr *)dst4;
-               state->tunneled = 4;
-               // release sadb_mutex, after updating sah's route cache
-               lck_mtx_unlock(sadb_mutex);
+                       // grab sadb_mutex, before updating sah's route cache
+                       lck_mtx_lock(sadb_mutex);
+                       ro4= &sav->sah->sa_route;
+                       dst4 = (struct sockaddr_in *)(void *)&ro4->ro_dst;
+                       if (ro4->ro_rt != NULL) {
+                               RT_LOCK(ro4->ro_rt);
+                       }
+                       if (ROUTE_UNUSABLE(ro4) ||
+                           dst4->sin_addr.s_addr != ip->ip_dst.s_addr) {
+                               if (ro4->ro_rt != NULL)
+                                       RT_UNLOCK(ro4->ro_rt);
+                               ROUTE_RELEASE(ro4);
+                       }
+                       if (ro4->ro_rt == 0) {
+                               dst4->sin_family = AF_INET;
+                               dst4->sin_len = sizeof(*dst4);
+                               dst4->sin_addr = ip->ip_dst;
+                               rtalloc(ro4);
+                               if (ro4->ro_rt == 0) {
+                                       OSAddAtomic(1, &ipstat.ips_noroute);
+                                       error = EHOSTUNREACH;
+                                       // release sadb_mutex, after updating sah's route cache
+                                       lck_mtx_unlock(sadb_mutex);
+                                       goto bad;
+                               }
+                               RT_LOCK(ro4->ro_rt);
+                       }
+
+                       /*
+                        * adjust state->dst if tunnel endpoint is offlink
+                        *
+                        * XXX: caching rt_gateway value in the state is
+                        * not really good, since it may point elsewhere
+                        * when the gateway gets modified to a larger
+                        * sockaddr via rt_setgate().  This is currently
+                        * addressed by SA_SIZE roundup in that routine.
+                        */
+                       if (ro4->ro_rt->rt_flags & RTF_GATEWAY)
+                               dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway;
+                       RT_UNLOCK(ro4->ro_rt);
+                       ROUTE_RELEASE(&state->ro);
+                       route_copyout(&state->ro, ro4, sizeof(state->ro));
+                       state->dst = (struct sockaddr *)dst4;
+                       state->tunneled = 4;
+                       // release sadb_mutex, after updating sah's route cache                                                                                                    
+                       lck_mtx_unlock(sadb_mutex);
+               } else {
+                       ipseclog((LOG_ERR, "%s: family mismatched between inner and outer spi=%u\n",
+                                 __FUNCTION__, (u_int32_t)ntohl(sav->spi)));
+                       error = EAFNOSUPPORT;
+                       goto bad;
+               }
        }
 
        state->m = ipsec4_splithdr(state->m);
@@ -4201,11 +4488,12 @@ ipsec4_tunnel_validate(m, off, nxt0, sav, ifamily)
 #if INET6
 /* validate inbound IPsec tunnel packet. */
 int
-ipsec6_tunnel_validate(m, off, nxt0, sav)
+ipsec6_tunnel_validate(m, off, nxt0, sav, ifamily)
        struct mbuf *m;         /* no pullup permitted, m->m_len >= ip */
        int off;
        u_int nxt0;
        struct secasvar *sav;
+       sa_family_t *ifamily;
 {
        u_int8_t nxt = nxt0 & 0xff;
        struct sockaddr_in6 *sin6;
@@ -4219,8 +4507,9 @@ ipsec6_tunnel_validate(m, off, nxt0, sav)
        if (m->m_len < sizeof(struct ip6_hdr))
                panic("too short mbuf on ipsec6_tunnel_validate");
 #endif
-       if (nxt != IPPROTO_IPV6)
+       if (nxt != IPPROTO_IPV4 && nxt != IPPROTO_IPV6)
                return 0;
+
        if (m->m_pkthdr.len < off + sizeof(struct ip6_hdr))
                return 0;
        /* do not decapsulate if the SA is for transport mode only */
@@ -4235,8 +4524,16 @@ ipsec6_tunnel_validate(m, off, nxt0, sav)
        if (!IN6_ARE_ADDR_EQUAL(&oip6->ip6_dst, &sin6->sin6_addr))
                return 0;
 
-       if (sav->utun_in_fn) {
-               // the utun SAs don't have a policy (yet).
+       if (sav->utun_in_fn ||
+               sav->sah->ipsec_if != NULL) {
+               // the ipsec/utun interface SAs don't have a policies.
+               if (nxt == IPPROTO_IPV4) {
+                       *ifamily = AF_INET;
+               } else if (nxt == IPPROTO_IPV6) {
+                       *ifamily = AF_INET6;
+               } else {
+                       return 0;
+               }
                return 1;
        }
        
@@ -4246,7 +4543,7 @@ ipsec6_tunnel_validate(m, off, nxt0, sav)
        bzero(&isrc, sizeof(isrc));
        bzero(&idst, sizeof(idst));
        osrc.sin6_family = odst.sin6_family = isrc.sin6_family =
-           idst.sin6_family = AF_INET6;
+           idst.sin6_family = *ifamily = AF_INET6;
        osrc.sin6_len = odst.sin6_len = isrc.sin6_len = idst.sin6_len = 
            sizeof(struct sockaddr_in6);
        osrc.sin6_addr = oip6->ip6_src;
@@ -4640,7 +4937,7 @@ ipsec_send_natt_keepalive(
 __private_extern__ bool
 ipsec_fill_offload_frame(ifnet_t ifp,
                                                 struct secasvar *sav,
-                                                struct ipsec_offload_frame *frame,
+                                                struct ifnet_keepalive_offload_frame *frame,
                                                 size_t frame_data_offset)
 {
        u_int8_t *data = NULL;
@@ -4656,12 +4953,13 @@ ipsec_fill_offload_frame(ifnet_t ifp,
                sav->flags & SADB_X_EXT_ESP_KEEPALIVE ||
                (esp_udp_encap_port & 0xFFFF) == 0 ||
                sav->remote_ike_port == 0 ||
-               (natt_keepalive_interval == 0 && sav->natt_interval == 0)) {
+               (natt_keepalive_interval == 0 && sav->natt_interval == 0 && sav->natt_offload_interval == 0)) {
                /* SA is not eligible for keepalive offload on this interface */
                return (FALSE);
        }
 
-       if (frame_data_offset + sizeof(struct udpiphdr) + 1 > IPSEC_OFFLOAD_FRAME_DATA_SIZE) {
+       if (frame_data_offset + sizeof(struct udpiphdr) + 1 >
+           IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
                /* Not enough room in this data frame */
                return (FALSE);
        }
@@ -4671,7 +4969,10 @@ ipsec_fill_offload_frame(ifnet_t ifp,
        uh = (__typeof__(uh))(void *)(data + frame_data_offset + sizeof(*ip));
 
        frame->length = frame_data_offset + sizeof(struct udpiphdr) + 1;
-       bzero(data, IPSEC_OFFLOAD_FRAME_DATA_SIZE);
+       frame->type = IFNET_KEEPALIVE_OFFLOAD_FRAME_IPSEC;
+       frame->ether_type = IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4;
+
+       bzero(data, IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE);
 
        ip->ip_v = IPVERSION;
        ip->ip_hl = sizeof(struct ip) >> 2;
@@ -4706,7 +5007,9 @@ ipsec_fill_offload_frame(ifnet_t ifp,
        uh->uh_sum = 0;
        *(u_int8_t*)(data + frame_data_offset + sizeof(*ip) + sizeof(*uh)) = 0xFF;
 
-       if (sav->natt_interval != 0) {
+       if (sav->natt_offload_interval != 0) {
+               frame->interval = sav->natt_offload_interval;
+       } else if (sav->natt_interval != 0) {
                frame->interval = sav->natt_interval;
        } else {
                frame->interval = natt_keepalive_interval;
index 9c452d26fc6c086870576441ebcddea16778f7c7..7a422a69051fa468b315102e0c08abe886a10376 100644 (file)
@@ -52,7 +52,7 @@ extern lck_mtx_t         *sadb_stat_mutex;
 
 
 #define IPSEC_STAT_INCREMENT(x)        \
-       {lck_mtx_lock(sadb_stat_mutex); (x)++; lck_mtx_unlock(sadb_stat_mutex);}
+       OSIncrementAtomic64((SInt64 *)&x)
 
 struct secpolicyaddrrange {
        struct sockaddr_storage start;  /* Start (low values) of address range */
@@ -204,32 +204,32 @@ struct secspacq {
 
 /* statistics for ipsec processing */
 struct ipsecstat {
-       u_quad_t in_success /* succeeded inbound process */
-       u_quad_t in_polvio;
+       u_quad_t in_success __attribute__ ((aligned (8))); /* succeeded inbound process */
+       u_quad_t in_polvio __attribute__ ((aligned (8)));
                        /* security policy violation for inbound process */
-       u_quad_t in_nosa;     /* inbound SA is unavailable */
-       u_quad_t in_inval;    /* inbound processing failed due to EINVAL */
-       u_quad_t in_nomem;    /* inbound processing failed due to ENOBUFS */
-       u_quad_t in_badspi;   /* failed getting a SPI */
-       u_quad_t in_ahreplay; /* AH replay check failed */
-       u_quad_t in_espreplay; /* ESP replay check failed */
-       u_quad_t in_ahauthsucc; /* AH authentication success */
-       u_quad_t in_ahauthfail; /* AH authentication failure */
-       u_quad_t in_espauthsucc; /* ESP authentication success */
-       u_quad_t in_espauthfail; /* ESP authentication failure */
-       u_quad_t in_esphist[256];
-       u_quad_t in_ahhist[256];
-       u_quad_t in_comphist[256];
-       u_quad_t out_success; /* succeeded outbound process */
-       u_quad_t out_polvio;
+       u_quad_t in_nosa __attribute__ ((aligned (8)));     /* inbound SA is unavailable */
+       u_quad_t in_inval __attribute__ ((aligned (8)));    /* inbound processing failed due to EINVAL */
+       u_quad_t in_nomem __attribute__ ((aligned (8)));    /* inbound processing failed due to ENOBUFS */
+       u_quad_t in_badspi __attribute__ ((aligned (8)));   /* failed getting a SPI */
+       u_quad_t in_ahreplay __attribute__ ((aligned (8))); /* AH replay check failed */
+       u_quad_t in_espreplay __attribute__ ((aligned (8))); /* ESP replay check failed */
+       u_quad_t in_ahauthsucc __attribute__ ((aligned (8))); /* AH authentication success */
+       u_quad_t in_ahauthfail __attribute__ ((aligned (8))); /* AH authentication failure */
+       u_quad_t in_espauthsucc __attribute__ ((aligned (8))); /* ESP authentication success */
+       u_quad_t in_espauthfail __attribute__ ((aligned (8))); /* ESP authentication failure */
+       u_quad_t in_esphist[256] __attribute__ ((aligned (8)));
+       u_quad_t in_ahhist[256] __attribute__ ((aligned (8)));
+       u_quad_t in_comphist[256] __attribute__ ((aligned (8)));
+       u_quad_t out_success __attribute__ ((aligned (8))); /* succeeded outbound process */
+       u_quad_t out_polvio __attribute__ ((aligned (8)));
                        /* security policy violation for outbound process */
-       u_quad_t out_nosa;    /* outbound SA is unavailable */
-       u_quad_t out_inval;   /* outbound process failed due to EINVAL */
-       u_quad_t out_nomem;    /* inbound processing failed due to ENOBUFS */
-       u_quad_t out_noroute; /* there is no route */
-       u_quad_t out_esphist[256];
-       u_quad_t out_ahhist[256];
-       u_quad_t out_comphist[256];
+       u_quad_t out_nosa __attribute__ ((aligned (8)));    /* outbound SA is unavailable */
+       u_quad_t out_inval __attribute__ ((aligned (8)));   /* outbound process failed due to EINVAL */
+       u_quad_t out_nomem __attribute__ ((aligned (8)));    /* inbound processing failed due to ENOBUFS */
+       u_quad_t out_noroute __attribute__ ((aligned (8))); /* there is no route */
+       u_quad_t out_esphist[256] __attribute__ ((aligned (8)));
+       u_quad_t out_ahhist[256] __attribute__ ((aligned (8)));
+       u_quad_t out_comphist[256] __attribute__ ((aligned (8)));
 };
 
 #ifdef BSD_KERNEL_PRIVATE
index b5b065526f78877812fcf1a0188032c3f85c1eca..018afa4d7c59e42d7ca1e3876072daa41e9eabae 100644 (file)
@@ -81,6 +81,6 @@ extern int ipsec6_output_trans(struct ipsec_output_state *, u_char *,
 extern int ipsec6_output_tunnel(struct ipsec_output_state *,
                                struct secpolicy *, int);
 extern int ipsec6_tunnel_validate(struct mbuf *, int, u_int,
-       struct secasvar *);
+       struct secasvar *, sa_family_t *);
 #endif /* BSD_KERNEL_PRIVATE */
 #endif /* _NETINET6_IPSEC6_H_ */
index 228767199452c2d0559175ebd7ecc7b7ded93664..4dda3d82ef72084eff817624eb321b6e97d3bee3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -2312,16 +2312,20 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
        VERIFY(mli->mli_ifp == ifp);
 
        /*
-        * Groups joined on loopback or marked as 'not reported',
-        * enter the MLD_SILENT_MEMBER state and
-        * are never reported in any protocol exchanges.
+        * Avoid MLD if group is :
+        * 1. Joined on loopback, OR
+        * 2. On a link that is marked MLIF_SILENT
+        * 3. rdar://problem/19227650 Is link local scoped and
+        *    on cellular interface
+        * 4. Is a type that should not be reported (node local
+        *    or all node link local multicast.
         * All other groups enter the appropriate state machine
         * for the version in use on this link.
-        * A link marked as MLIF_SILENT causes MLD to be completely
-        * disabled for the link.
         */
        if ((ifp->if_flags & IFF_LOOPBACK) ||
            (mli->mli_flags & MLIF_SILENT) ||
+           (IFNET_IS_CELLULAR(ifp) &&
+            IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr)) ||
            !mld_is_addr_reported(&inm->in6m_addr)) {
                MLD_PRINTF(("%s: not kicking state machine for silent group\n",
                    __func__));
index 15faf131622b6668cfcb0950639e42d354b0b4f2..8fe0d4d9ee86679db650a85012fcf0e92d3a2ca3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -168,15 +168,9 @@ struct llinfo_nd6 llinfo_nd6 = {
        .ln_prev = &llinfo_nd6,
 };
 
-/* Protected by nd_if_rwlock */
-size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
-struct nd_ifinfo *nd_ifinfo = NULL;
-
-static lck_grp_attr_t  *nd_if_lock_grp_attr;
-static lck_grp_t       *nd_if_lock_grp;
-static lck_attr_t      *nd_if_lock_attr;
-decl_lck_rw_data(, nd_if_rwlock_data);
-lck_rw_t               *nd_if_rwlock = &nd_if_rwlock_data;
+static lck_grp_attr_t  *nd_if_lock_grp_attr = NULL;
+static lck_grp_t       *nd_if_lock_grp = NULL;
+static lck_attr_t      *nd_if_lock_attr = NULL;
 
 /* Protected by nd6_mutex */
 struct nd_drhead nd_defrouter;
@@ -216,6 +210,7 @@ static void nd6_llinfo_free(void *);
 static void nd6_llinfo_purge(struct rtentry *);
 static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
 static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
+static void nd6_llinfo_refresh(struct rtentry *);
 static uint64_t ln_getexpire(struct llinfo_nd6 *);
 
 static void nd6_service(void *);
@@ -267,6 +262,13 @@ SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
        CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
        nd6_sysctl_prlist, "S,in6_defrouter", "");
 
+SYSCTL_DECL(_net_inet6_ip6);
+
+static int ip6_maxchainsent = 0;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
+       CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
+       "use dlil_output_list");
+
 void
 nd6_init(void)
 {
@@ -285,7 +287,6 @@ nd6_init(void)
        nd_if_lock_grp_attr = lck_grp_attr_alloc_init();
        nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr);
        nd_if_lock_attr = lck_attr_alloc_init();
-       lck_rw_init(nd_if_rwlock, nd_if_lock_grp, nd_if_lock_attr);
 
        llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
            LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
@@ -331,7 +332,7 @@ nd6_llinfo_free(void *arg)
 
        /* Just in case there's anything there, free it */
        if (ln->ln_hold != NULL) {
-               m_freem(ln->ln_hold);
+               m_freem_list(ln->ln_hold);
                ln->ln_hold = NULL;
        }
 
@@ -403,6 +404,31 @@ nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
        }
 }
 
+static void
+nd6_llinfo_refresh(struct rtentry *rt)
+{
+       struct llinfo_nd6 *ln = rt->rt_llinfo;
+       uint64_t timenow = net_uptime();
+       /*
+        * Can't refresh permanent, static or entries that are
+        * not direct host entries
+        */
+       if (!ln || ln->ln_expire == 0 ||
+           (rt->rt_flags & RTF_STATIC) ||
+           !(rt->rt_flags & RTF_LLINFO)) {
+               return;
+       }
+
+       if ((ln->ln_state > ND6_LLINFO_INCOMPLETE) &&
+           (ln->ln_state < ND6_LLINFO_PROBE)) {
+               if (ln->ln_expire > timenow) {
+                       ln->ln_expire = timenow;
+                       ln->ln_state = ND6_LLINFO_PROBE;
+               }
+       }
+       return;
+}
+
 void
 ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry)
 {
@@ -437,13 +463,10 @@ ln_getexpire(struct llinfo_nd6 *ln)
 void
 nd6_ifreset(struct ifnet *ifp)
 {
-       struct nd_ifinfo *ndi;
-
-       lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_HELD);
-       VERIFY(ifp != NULL && ifp->if_index < nd_ifinfo_indexlim);
-       ndi = &nd_ifinfo[ifp->if_index];
-
+       struct nd_ifinfo *ndi = ND_IFINFO(ifp);
+       VERIFY(NULL != ndi);
        VERIFY(ndi->initialized);
+
        lck_mtx_assert(&ndi->lock, LCK_MTX_ASSERT_OWNED);
        ndi->linkmtu = ifp->if_mtu;
        ndi->chlim = IPV6_DEFHLIM;
@@ -452,54 +475,12 @@ nd6_ifreset(struct ifnet *ifp)
        ndi->retrans = RETRANS_TIMER;
 }
 
-int
+void
 nd6_ifattach(struct ifnet *ifp)
 {
-       size_t newlim;
-       struct nd_ifinfo *ndi;
-
-       /*
-        * We have some arrays that should be indexed by if_index.
-        * since if_index will grow dynamically, they should grow too.
-        */
-       lck_rw_lock_shared(nd_if_rwlock);
-       newlim = nd_ifinfo_indexlim;
-       if (nd_ifinfo == NULL || if_index >= newlim) {
-               if (!lck_rw_lock_shared_to_exclusive(nd_if_rwlock))
-                       lck_rw_lock_exclusive(nd_if_rwlock);
-               lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
-
-               newlim = nd_ifinfo_indexlim;
-               if (nd_ifinfo == NULL || if_index >= newlim) {
-                       size_t n;
-                       caddr_t q;
-
-                       while (if_index >= newlim)
-                               newlim <<= 1;
-
-                       /* grow nd_ifinfo */
-                       n = newlim * sizeof (struct nd_ifinfo);
-                       q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK);
-                       if (q == NULL) {
-                               lck_rw_done(nd_if_rwlock);
-                               return (ENOBUFS);
-                       }
-                       bzero(q, n);
-                       if (nd_ifinfo != NULL) {
-                               bcopy((caddr_t)nd_ifinfo, q, n/2);
-                               /*
-                                * We might want to pattern fill the old
-                                * array to catch use-after-free cases.
-                                */
-                               FREE((caddr_t)nd_ifinfo, M_IP6NDP);
-                       }
-                       nd_ifinfo = (struct nd_ifinfo *)(void *)q;
-                       nd_ifinfo_indexlim = newlim;
-               }
-       }
+       struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 
-       VERIFY(ifp != NULL);
-       ndi = &nd_ifinfo[ifp->if_index];
+       VERIFY(NULL != ndi);
        if (!ndi->initialized) {
                lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr);
                ndi->flags = ND6_IFF_PERFORMNUD;
@@ -508,42 +489,39 @@ nd6_ifattach(struct ifnet *ifp)
 
        lck_mtx_lock(&ndi->lock);
 
-       if (!(ifp->if_flags & IFF_MULTICAST))
+       if (!(ifp->if_flags & IFF_MULTICAST)) {
                ndi->flags |= ND6_IFF_IFDISABLED;
+       }
 
        nd6_ifreset(ifp);
        lck_mtx_unlock(&ndi->lock);
-
-       lck_rw_done(nd_if_rwlock);
-
        nd6_setmtu(ifp);
-
-       return (0);
+       return;
 }
 
+#if 0
 /*
- * Reset ND level link MTU. This function is called when the physical MTU
- * changes, which means we might have to adjust the ND level MTU.
+ * XXX Look more into this. Especially since we recycle ifnets and do delayed
+ * cleanup
  */
+void
+nd6_ifdetach(struct nd_ifinfo *nd)
+{
+       /* XXX destroy nd's lock? */
+       FREE(nd, M_IP6NDP);
+}
+#endif
+
 void
 nd6_setmtu(struct ifnet *ifp)
 {
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = ND_IFINFO(ifp);
        u_int32_t oldmaxmtu, maxmtu;
 
-       /*
-        * Make sure IPv6 is enabled for the interface first,
-        * because this can be called directly from SIOCSIFMTU for IPv4
-        */
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index >= nd_ifinfo_indexlim ||
-           !nd_ifinfo[ifp->if_index].initialized) {
-               lck_rw_done(nd_if_rwlock);
-               return; /* nd_ifinfo out of bound, or not yet initialized */
+       if ((NULL == ndi) || (FALSE == ndi->initialized)) {
+               return;
        }
 
-       ndi = &nd_ifinfo[ifp->if_index];
-       VERIFY(ndi->initialized);
        lck_mtx_lock(&ndi->lock);
        oldmaxmtu = ndi->maxmtu;
 
@@ -573,11 +551,11 @@ nd6_setmtu(struct ifnet *ifp)
        }
        ndi->linkmtu = ifp->if_mtu;
        lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
 
        /* also adjust in6_maxmtu if necessary. */
-       if (maxmtu > in6_maxmtu)
+       if (maxmtu > in6_maxmtu) {
                in6_setmaxmtu();
+       }
 }
 
 void
@@ -749,6 +727,7 @@ nd6_service(void *arg)
        struct ifnet *ifp = NULL;
        struct in6_ifaddr *ia6, *nia6;
        uint64_t timenow;
+       bool send_nc_failure_kev = false;
 
        lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
        /*
@@ -771,6 +750,50 @@ nd6_service(void *arg)
        net_update_uptime();
        timenow = net_uptime();
 again:
+       /*
+        * send_nc_failure_kev gets set when default router's IPv6 address
+        * can't be resolved.
+        * That can happen either:
+        * 1. When the entry has resolved once but can't be
+        * resolved later and the neighbor cache entry for gateway is deleted
+        * after max probe attempts.
+        *
+        * 2. When the entry is in ND6_LLINFO_INCOMPLETE but can not be resolved
+        * after max neighbor address resolution attempts.
+        *
+        * Both set send_nc_failure_kev to true. ifp is also set to the previous
+        * neighbor cache entry's route's ifp.
+        * Once we are done sending the notification, set send_nc_failure_kev
+        * to false to stop sending false notifications for non default router
+        * neighbors.
+        *
+        * We may to send more information like Gateway's IP that could not be
+        * resolved, however right now we do not install more than one default
+        * route per interface in the routing table.
+        */
+       if (send_nc_failure_kev && ifp->if_addrlen == IF_LLREACH_MAXLEN) {
+               struct kev_msg ev_msg;
+               struct kev_nd6_ndfailure nd6_ndfailure;
+               bzero(&ev_msg, sizeof(ev_msg));
+               bzero(&nd6_ndfailure, sizeof(nd6_ndfailure));
+               ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+               ev_msg.kev_class        = KEV_NETWORK_CLASS;
+               ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
+               ev_msg.event_code       = KEV_ND6_NDFAILURE;
+
+               nd6_ndfailure.link_data.if_family = ifp->if_family;
+               nd6_ndfailure.link_data.if_unit = ifp->if_unit;
+               strlcpy(nd6_ndfailure.link_data.if_name,
+                   ifp->if_name,
+                   sizeof(nd6_ndfailure.link_data.if_name));
+               ev_msg.dv[0].data_ptr = &nd6_ndfailure;
+               ev_msg.dv[0].data_length =
+                       sizeof(nd6_ndfailure);
+               kev_post_msg(&ev_msg);
+       }
+
+       send_nc_failure_kev = false;
+       ifp = NULL;
        /*
         * The global list llinfo_nd6 is modified by nd6_request() and is
         * therefore protected by rnh_lock.  For obvious reasons, we cannot
@@ -791,6 +814,7 @@ again:
                struct sockaddr_in6 *dst;
                struct llinfo_nd6 *next;
                u_int32_t retrans, flags;
+               struct nd_ifinfo *ndi = NULL;
 
                /* ln_next/prev/rt is protected by rnh_lock */
                next = ln->ln_next;
@@ -864,37 +888,10 @@ again:
                        continue;
                }
 
-               lck_rw_lock_shared(nd_if_rwlock);
-               if (ifp->if_index >= nd_ifinfo_indexlim) {
-                       /*
-                        * In the event the nd_ifinfo[] array is not in synch
-                        * by now, we don't want to hold on to the llinfo entry
-                        * forever; just purge it rather than have it consume
-                        * resources.  That's better than transmitting out of
-                        * the interface as the rest of the layers may not be
-                        * ready as well.
-                        *
-                        * We can retire this logic once we get rid of the
-                        * separate array and utilize a per-ifnet structure.
-                        */
-                       retrans = RETRANS_TIMER;
-                       flags = ND6_IFF_PERFORMNUD;
-                       if (ln->ln_expire != 0) {
-                               ln->ln_state = ND6_LLINFO_PURGE;
-                               log (LOG_ERR, "%s: purging rt(0x%llx) "
-                                   "ln(0x%llx) dst %s, if_index %d >= %d\n",
-                                   __func__, (uint64_t)VM_KERNEL_ADDRPERM(rt),
-                                   (uint64_t)VM_KERNEL_ADDRPERM(ln),
-                                   ip6_sprintf(&dst->sin6_addr), ifp->if_index,
-                                   nd_ifinfo_indexlim);
-                       }
-               } else {
-                       struct nd_ifinfo *ndi = ND_IFINFO(ifp);
-                       VERIFY(ndi->initialized);
-                       retrans = ndi->retrans;
-                       flags = ndi->flags;
-               }
-               lck_rw_done(nd_if_rwlock);
+               ndi = ND_IFINFO(ifp);
+               VERIFY(ndi->initialized);
+               retrans = ndi->retrans;
+               flags = ndi->flags;
 
                RT_LOCK_ASSERT_HELD(rt);
 
@@ -920,20 +917,21 @@ again:
                        } else {
                                struct mbuf *m = ln->ln_hold;
                                ln->ln_hold = NULL;
+                               send_nc_failure_kev = (rt->rt_flags & RTF_ROUTER) ? true : false;
                                if (m != NULL) {
-                                       /*
-                                        * Fake rcvif to make ICMP error
-                                        * more helpful in diagnosing
-                                        * for the receiver.
-                                        * XXX: should we consider
-                                        * older rcvif?
-                                        */
-                                       m->m_pkthdr.rcvif = ifp;
                                        RT_ADDREF_LOCKED(rt);
                                        RT_UNLOCK(rt);
                                        lck_mtx_unlock(rnh_lock);
-                                       icmp6_error(m, ICMP6_DST_UNREACH,
-                                           ICMP6_DST_UNREACH_ADDR, 0);
+
+                                       struct mbuf *mnext;
+                                       while (m) {
+                                               mnext = m->m_nextpkt;
+                                               m->m_nextpkt = NULL;
+                                               m->m_pkthdr.rcvif = ifp;
+                                               icmp6_error_flag(m, ICMP6_DST_UNREACH,
+                                                   ICMP6_DST_UNREACH_ADDR, 0, 0);
+                                               m = mnext;
+                                       }
                                } else {
                                        RT_ADDREF_LOCKED(rt);
                                        RT_UNLOCK(rt);
@@ -1008,6 +1006,7 @@ again:
                                ap->aging++;
                                lck_mtx_lock(rnh_lock);
                        } else {
+                               send_nc_failure_kev = (rt->rt_flags & RTF_ROUTER) ? true : false;
                                RT_ADDREF_LOCKED(rt);
                                RT_UNLOCK(rt);
                                lck_mtx_unlock(rnh_lock);
@@ -2184,6 +2183,7 @@ nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
                rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri;
                rt->rt_llinfo_purge     = nd6_llinfo_purge;
                rt->rt_llinfo_free      = nd6_llinfo_free;
+               rt->rt_llinfo_refresh   = nd6_llinfo_refresh;
                rt->rt_flags |= RTF_LLINFO;
                ln->ln_rt = rt;
                /* this is required for "ndp" command. - shin */
@@ -2368,7 +2368,7 @@ nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
 
                rt->rt_flags &= ~RTF_LLINFO;
                if (ln->ln_hold != NULL) {
-                       m_freem(ln->ln_hold);
+                       m_freem_list(ln->ln_hold);
                        ln->ln_hold = NULL;
                }
        }
@@ -2586,10 +2586,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
        struct nd_defrouter *dr;
        struct nd_prefix *pr;
        struct rtentry *rt;
-       int i, error = 0;
+       int error = 0;
 
        VERIFY(ifp != NULL);
-       i = ifp->if_index;
 
        switch (cmd) {
        case SIOCGDRLST_IN6_32:         /* struct in6_drlist_32 */
@@ -2621,59 +2620,58 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
                 * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
                 * instead of in6_ndireq, so we treat it as such.
                 */
-               lck_rw_lock_shared(nd_if_rwlock);
                ndi = ND_IFINFO(ifp);
-               if (!nd_ifinfo || i >= nd_ifinfo_indexlim ||
-                   !ndi->initialized) {
-                       lck_rw_done(nd_if_rwlock);
+               if ((NULL == ndi) || (FALSE == ndi->initialized)){
                        error = EINVAL;
                        break;
                }
                lck_mtx_lock(&ndi->lock);
                linkmtu = IN6_LINKMTU(ifp);
                bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu));
-               bcopy(&nd_ifinfo[i].maxmtu, &ondi->ndi.maxmtu,
+               bcopy(&ndi->maxmtu, &ondi->ndi.maxmtu,
                    sizeof (u_int32_t));
-               bcopy(&nd_ifinfo[i].basereachable, &ondi->ndi.basereachable,
+               bcopy(&ndi->basereachable, &ondi->ndi.basereachable,
                    sizeof (u_int32_t));
-               bcopy(&nd_ifinfo[i].reachable, &ondi->ndi.reachable,
+               bcopy(&ndi->reachable, &ondi->ndi.reachable,
                    sizeof (u_int32_t));
-               bcopy(&nd_ifinfo[i].retrans, &ondi->ndi.retrans,
+               bcopy(&ndi->retrans, &ondi->ndi.retrans,
                    sizeof (u_int32_t));
-               bcopy(&nd_ifinfo[i].flags, &ondi->ndi.flags,
+               bcopy(&ndi->flags, &ondi->ndi.flags,
                    sizeof (u_int32_t));
-               bcopy(&nd_ifinfo[i].recalctm, &ondi->ndi.recalctm,
+               bcopy(&ndi->recalctm, &ondi->ndi.recalctm,
                    sizeof (int));
-               ondi->ndi.chlim = nd_ifinfo[i].chlim;
+               ondi->ndi.chlim = ndi->chlim;
                ondi->ndi.receivedra = 0;
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
                break;
        }
 
        case SIOCSIFINFO_FLAGS: {       /* struct in6_ndireq */
+               /*
+                * XXX BSD has a bunch of checks here to ensure
+                * that interface disabled flag is not reset if
+                * link local address has failed DAD.
+                * Investigate that part.
+                */
                struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data;
                u_int32_t oflags, flags;
-               struct nd_ifinfo *ndi;
+               struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 
                /* XXX: almost all other fields of cndi->ndi is unused */
-               lck_rw_lock_shared(nd_if_rwlock);
-               ndi = ND_IFINFO(ifp);
-               if (!nd_ifinfo || i >= nd_ifinfo_indexlim ||
-                   !ndi->initialized) {
-                       lck_rw_done(nd_if_rwlock);
+               if ((NULL == ndi) || !ndi->initialized) {
                        error = EINVAL;
                        break;
                }
+
                lck_mtx_lock(&ndi->lock);
-               oflags = nd_ifinfo[i].flags;
-               bcopy(&cndi->ndi.flags, &nd_ifinfo[i].flags, sizeof (flags));
-               flags = nd_ifinfo[i].flags;
+               oflags = ndi->flags;
+               bcopy(&cndi->ndi.flags, &(ndi->flags), sizeof (flags));
+               flags = ndi->flags;
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
 
-               if (oflags == flags)
+               if (oflags == flags) {
                        break;
+               }
 
                error = nd6_setifinfo(ifp, oflags, flags);
                break;
@@ -3052,7 +3050,7 @@ fail:
                                 * set the 2nd argument as the 1st one.
                                 */
                                RT_UNLOCK(rt);
-                               nd6_output(ifp, ifp, m, &sin6, rt, NULL);
+                               nd6_output_list(ifp, ifp, m, &sin6, rt, NULL);
                                RT_LOCK(rt);
                        }
                } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
@@ -3159,16 +3157,17 @@ static void
 nd6_slowtimo(void *arg)
 {
 #pragma unused(arg)
-       int i;
-       struct nd_ifinfo *nd6if;
+       struct nd_ifinfo *nd6if = NULL;
+       struct ifnet *ifp = NULL;
+
+       ifnet_head_lock_shared();
+       for (ifp = ifnet_head.tqh_first; ifp;
+           ifp = ifp->if_link.tqe_next) {
+               nd6if = ND_IFINFO(ifp);
+               if ((NULL == nd6if) || (FALSE == nd6if->initialized)) {
+                       continue;
+               }
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       for (i = 1; i < if_index + 1; i++) {
-               if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
-                       break;
-               nd6if = &nd_ifinfo[i];
-               if (!nd6if->initialized)
-                       break;
                lck_mtx_lock(&nd6if->lock);
                if (nd6if->basereachable && /* already initialized */
                    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
@@ -3184,22 +3183,34 @@ nd6_slowtimo(void *arg)
                }
                lck_mtx_unlock(&nd6if->lock);
        }
-       lck_rw_done(nd_if_rwlock);
+       ifnet_head_done();
        timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
 }
 
-#define        senderr(e) { error = (e); goto bad; }
 int
 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
     struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
 {
-       struct mbuf *m = m0;
+       return nd6_output_list(ifp, origifp, m0, dst, hint0, adv);
+}
+
+/*
+ * nd6_output_list()
+ *
+ * Assumption: route determination for first packet can be correctly applied to
+ * all packets in the chain.
+ */
+#define        senderr(e) { error = (e); goto bad; }
+int
+nd6_output_list(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
+    struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
+{
        struct rtentry *rt = hint0, *hint = hint0;
        struct llinfo_nd6 *ln = NULL;
        int error = 0;
        uint64_t timenow;
        struct rtentry *rtrele = NULL;
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = NULL;
 
        if (rt != NULL) {
                RT_LOCK_SPIN(rt);
@@ -3243,7 +3254,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
                                if (rt->rt_ifp != ifp) {
                                        /* XXX: loop care? */
                                        RT_UNLOCK(rt);
-                                       error = nd6_output(ifp, origifp, m0,
+                                       error = nd6_output_list(ifp, origifp, m0,
                                            dst, rt, adv);
                                        rtfree(rt);
                                        return (error);
@@ -3444,16 +3455,15 @@ lookup:
        }
 
        if (!ln || !rt) {
-               if (rt != NULL)
+               if (rt != NULL) {
                        RT_UNLOCK(rt);
-               lck_rw_lock_shared(nd_if_rwlock);
+               }
                ndi = ND_IFINFO(ifp);
                VERIFY(ndi != NULL && ndi->initialized);
                lck_mtx_lock(&ndi->lock);
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
                    !(ndi->flags & ND6_IFF_PERFORMNUD)) {
                        lck_mtx_unlock(&ndi->lock);
-                       lck_rw_done(nd_if_rwlock);
                        log(LOG_DEBUG,
                            "nd6_output: can't allocate llinfo for %s "
                            "(ln=0x%llx, rt=0x%llx)\n",
@@ -3463,7 +3473,6 @@ lookup:
                        senderr(EIO);   /* XXX: good error? */
                }
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
 
                goto sendpkt;   /* send anyway */
        }
@@ -3548,18 +3557,16 @@ lookup:
        if (ln->ln_state == ND6_LLINFO_NOSTATE)
                ln->ln_state = ND6_LLINFO_INCOMPLETE;
        if (ln->ln_hold)
-               m_freem(ln->ln_hold);
-       ln->ln_hold = m;
+               m_freem_list(ln->ln_hold);
+       ln->ln_hold = m0;
        if (ln->ln_expire != 0 && ln->ln_asked < nd6_mmaxtries &&
            ln->ln_expire <= timenow) {
                ln->ln_asked++;
-               lck_rw_lock_shared(nd_if_rwlock);
                ndi = ND_IFINFO(ifp);
                VERIFY(ndi != NULL && ndi->initialized);
                lck_mtx_lock(&ndi->lock);
                ln_setexpire(ln, timenow + ndi->retrans / 1000);
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
                RT_UNLOCK(rt);
                /* We still have a reference on rt (for ln) */
                if (ip6_forwarding)
@@ -3571,6 +3578,9 @@ lookup:
                nd6_sched_timeout(NULL, NULL);
                lck_mtx_unlock(rnh_lock);
        } else {
+               if(ln->ln_state == ND6_LLINFO_INCOMPLETE) {
+                       ln->ln_expire = timenow;
+               }
                RT_UNLOCK(rt);
        }
        /*
@@ -3615,13 +3625,13 @@ sendpkt:
 
        if (ifp->if_flags & IFF_LOOPBACK) {
                /* forwarding rules require the original scope_id */
-               m->m_pkthdr.rcvif = origifp;
-               error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt,
+               m0->m_pkthdr.rcvif = origifp;
+               error = dlil_output(origifp, PF_INET6, m0, (caddr_t)rt,
                    SA(dst), 0, adv);
                goto release;
        } else {
                /* Do not allow loopback address to wind up on a wire */
-               struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+               struct ip6_hdr *ip6 = mtod(m0, struct ip6_hdr *);
 
                if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
                    IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
@@ -3639,25 +3649,34 @@ sendpkt:
                RT_UNLOCK(rt);
        }
 
-       if (hint != NULL && nstat_collect) {
-               int scnt;
+       struct mbuf *mcur = m0;
+       uint32_t pktcnt = 0;
 
-               if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
-                   (m->m_pkthdr.tso_segsz > 0))
-                       scnt = m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
-               else
-                       scnt = 1;
+       while (mcur) {
+               if (hint != NULL && nstat_collect) {
+                       int scnt;
 
-               nstat_route_tx(hint, scnt, m->m_pkthdr.len, 0);
-       }
+                       if ((mcur->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
+                                       (mcur->m_pkthdr.tso_segsz > 0))
+                               scnt = mcur->m_pkthdr.len / mcur->m_pkthdr.tso_segsz;
+                       else
+                               scnt = 1;
 
-       m->m_pkthdr.rcvif = NULL;
-       error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, SA(dst), 0, adv);
+                       nstat_route_tx(hint, scnt, mcur->m_pkthdr.len, 0);
+               }
+               pktcnt++;
+
+               mcur->m_pkthdr.rcvif = NULL;
+               mcur = mcur->m_nextpkt;
+       }
+       if (pktcnt > ip6_maxchainsent)
+               ip6_maxchainsent = pktcnt;
+       error = dlil_output(ifp, PF_INET6, m0, (caddr_t)rt, SA(dst), 0, adv);
        goto release;
 
 bad:
-       if (m != NULL)
-               m_freem(m);
+       if (m0 != NULL)
+               m_freem_list(m0);
 
 release:
        /* Clean up "rt" unless it's already been done */
index 0b1c36bff802cc18db629be8b2a390530e25a962..08b52d26a2a6da5e49be01beaed165ade701ff77 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -69,6 +69,7 @@
 #include <net/flowadv.h>
 #include <kern/locks.h>
 #include <sys/tree.h>
+#include <netinet6/nd6_var.h>
 
 struct llinfo_nd6 {
        /*
@@ -135,6 +136,7 @@ struct      llinfo_nd6 {
 struct nd_ifinfo {
 #else
 /* For binary compatibility, this structure must not change */
+/* NOTE: nd_ifinfo is defined in nd6_var.h */
 struct nd_ifinfo_compat {
 #endif /* !BSD_KERNEL_PRIVATE */
        u_int32_t linkmtu;              /* LinkMTU */
@@ -152,30 +154,6 @@ struct nd_ifinfo_compat {
        u_int8_t randomid[8];   /* current random ID */
 };
 
-#if defined(BSD_KERNEL_PRIVATE)
-struct nd_ifinfo {
-       decl_lck_mtx_data(, lock);
-       boolean_t initialized; /* Flag to see the entry is initialized */
-       u_int32_t linkmtu;              /* LinkMTU */
-       u_int32_t maxmtu;               /* Upper bound of LinkMTU */
-       u_int32_t basereachable;        /* BaseReachableTime */
-       u_int32_t reachable;            /* Reachable Time */
-       u_int32_t retrans;              /* Retrans Timer */
-       u_int32_t flags;                /* Flags */
-       int recalctm;                   /* BaseReacable re-calculation timer */
-       u_int8_t chlim;                 /* CurHopLimit */
-       u_int8_t _pad[3];
-       /* the following 3 members are for privacy extension for addrconf */
-       u_int8_t randomseed0[8]; /* upper 64 bits of SHA1 digest */
-       u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
-       u_int8_t randomid[8];   /* current random ID */
-       /* keep track of routers and prefixes on this link */
-       int32_t nprefixes;
-       int32_t ndefrouters;
-       struct in6_cga_modifier local_cga_modifier;
-};
-#endif /* BSD_KERNEL_PRIVATE */
-
 #define        ND6_IFF_PERFORMNUD              0x1
 #if defined(PRIVATE)
 
@@ -470,14 +448,10 @@ struct    in6_ndifreq_64 {
 #define        ND6_MAX_LIFETIME                0x7fffffff
 
 #ifdef BSD_KERNEL_PRIVATE
-/*
- * Protects nd_ifinfo[]
- */
-extern lck_rw_t *nd_if_rwlock;
-
-#define        ND_IFINFO(ifp) \
-       ((ifp)->if_index < nd_ifinfo_indexlim ? &nd_ifinfo[(ifp)->if_index] : \
-       NULL)
+#define ND_IFINFO(ifp)                         \
+    ((ifp == NULL) ? NULL :                    \
+     ((IN6_IFEXTRA(ifp) == NULL) ? NULL :      \
+      (&IN6_IFEXTRA(ifp)->nd_ifinfo)))
 
 /*
  * In a more readable form, we derive linkmtu based on:
@@ -515,6 +489,10 @@ extern lck_rw_t *nd_if_rwlock;
                (((MIN_RANDOM_FACTOR * (x >> 10)) + (RandomULong() & \
                ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
 
+/* prefix expiry times */
+#define        ND6_PREFIX_EXPIRY_UNSPEC        -1
+#define ND6_PREFIX_EXPIRY_NEVER                0
+
 TAILQ_HEAD(nd_drhead, nd_defrouter);
 struct nd_defrouter {
        decl_lck_mtx_data(, nddr_lock);
@@ -689,10 +667,22 @@ struct nd_prefix_list {
 #endif /* BSD_KERNEL_PRIVATE */
 
 #if defined(PRIVATE)
+struct kev_nd6_ndfailure {
+       struct net_event_data link_data;
+};
+
+struct kev_nd6_ndalive {
+       struct net_event_data link_data;
+};
+
 /* ND6 kernel event subclass value */
-#define        KEV_ND6_SUBCLASS                7
+#define        KEV_ND6_SUBCLASS                7
+
 /* ND6 kernel event action type */
-#define        KEV_ND6_RA                      1
+#define        KEV_ND6_RA                      1
+#define        KEV_ND6_NDFAILURE               2 /* IPv6 neighbor cache entry expiry */
+#define        KEV_ND6_NDALIVE                 3 /* IPv6 neighbor reachable */
+
 /* ND6 RA L2 source address length */
 #define        ND6_ROUTER_LL_SIZE              64
 
@@ -738,11 +728,9 @@ extern int nd6_accept_6to4;
 extern int nd6_maxnudhint;
 extern int nd6_gctimer;
 extern struct llinfo_nd6 llinfo_nd6;
-extern struct nd_ifinfo *nd_ifinfo;
 extern struct nd_drhead nd_defrouter;
 extern struct nd_prhead nd_prefix;
 extern int nd6_debug;
-extern size_t nd_ifinfo_indexlim;
 extern int nd6_onlink_ns_rfc4861;
 extern int nd6_optimistic_dad;
 
@@ -794,8 +782,8 @@ union nd_opts {
 extern int nd6_sched_timeout_want;
 extern void nd6_sched_timeout(struct timeval *, struct timeval *);
 extern void nd6_init(void);
-extern void nd6_ifreset(struct ifnet *);
-extern int nd6_ifattach(struct ifnet *);
+extern void nd6_ifreset(struct ifnet *ifp);
+extern void nd6_ifattach(struct ifnet *);
 extern int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *, int);
 extern void nd6_option_init(void *, int, union nd_opts *);
 extern struct nd_opt_hdr *nd6_option(union nd_opts *);
@@ -811,6 +799,8 @@ extern void nd6_rtrequest(int, struct rtentry *, struct sockaddr *);
 extern int nd6_ioctl(u_long, caddr_t, struct ifnet *);
 extern void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
     char *, int, int, int);
+extern int nd6_output_list(struct ifnet *, struct ifnet *, struct mbuf *,
+    struct sockaddr_in6 *, struct rtentry *, struct flowadv *);
 extern int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
     struct sockaddr_in6 *, struct rtentry *, struct flowadv *);
 extern int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *,
@@ -865,7 +855,7 @@ extern int nd6_prefix_onlink_scoped(struct nd_prefix *, unsigned int);
 extern int nd6_prefix_offlink(struct nd_prefix *);
 extern void pfxlist_onlink_check(void);
 extern struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
-extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *);
+extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *, int);
 extern int in6_init_prefix_ltimes(struct nd_prefix *ndpr);
 extern void rt6_flush(struct in6_addr *, struct ifnet *);
 extern int nd6_setdefaultiface(int);
index 5de3a3b4f5777b82d20b8f7416ca158fb31442ed..64c4720e35525e5c8414041cc8aeba33ec15a860 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -263,6 +263,7 @@ nd6_ns_input(
        struct sockaddr_dl proxydl;
        boolean_t advrouter;
        boolean_t is_dad_probe;
+       int oflgclr = 0;
 
        if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
                nd6log((LOG_INFO, "nd6_ns_input: on ND6ALT interface!\n"));
@@ -456,7 +457,7 @@ nd6_ns_input(
         * src addr     how to process?
         * ---          ---
         * multicast    of course, invalid (rejected in ip6_input)
-        * unicast      somebody is doing address resolution -> ignore
+        * unicast      somebody is doing address resolution
         * unspec       dup address detection
         *
         * The processing is defined in the "draft standard" RFC 4862 (and by
@@ -470,12 +471,19 @@ nd6_ns_input(
                 * duplicate address detection.
                 *
                 * If not, the packet is for addess resolution;
-                * silently ignore it.
+                * silently ignore it when not optimistic
+                *
+                * Per RFC 4429 the reply for an optimistic address must
+                * have the Override flag cleared
                 */
-               if (is_dad_probe)
-                       nd6_dad_ns_input(m, ifa, lladdr, lladdrlen);
+               if (!is_dad_probe && (dadprogress & IN6_IFF_OPTIMISTIC) != 0) {
+                       oflgclr = 1;
+               } else {
+                       if (is_dad_probe)
+                               nd6_dad_ns_input(m, ifa, lladdr, lladdrlen);
 
-               goto freeit;
+                       goto freeit;
+               }
        }
 
        /* Are we an advertising router on this interface? */
@@ -507,7 +515,7 @@ nd6_ns_input(
            ND_NEIGHBOR_SOLICIT, 0);
 
        nd6_na_output(ifp, &saddr6, &taddr6,
-           ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
+           ((anycast || proxy || !tlladdr || oflgclr) ? 0 : ND_NA_FLAG_OVERRIDE) |
            (advrouter ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED,
            tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
  freeit:
@@ -850,6 +858,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
        struct sockaddr_dl *sdl;
        union nd_opts ndopts;
        uint64_t timenow;
+       bool send_nc_alive_kev = false;
 
        if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) {
                nd6log((LOG_INFO, "nd6_na_input: on ND6ALT interface!\n"));
@@ -990,17 +999,16 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
                sdl->sdl_alen = ifp->if_addrlen;
                bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
                if (is_solicited) {
+                       send_nc_alive_kev = (rt->rt_flags & RTF_ROUTER) ? true : false;
                        ln->ln_state = ND6_LLINFO_REACHABLE;
                        if (ln->ln_expire != 0) {
-                               struct nd_ifinfo *ndi;
+                               struct nd_ifinfo *ndi = NULL;
 
-                               lck_rw_lock_shared(nd_if_rwlock);
                                ndi = ND_IFINFO(rt->rt_ifp);
                                VERIFY(ndi != NULL && ndi->initialized);
                                lck_mtx_lock(&ndi->lock);
                                ln_setexpire(ln, timenow + ndi->reachable);
                                lck_mtx_unlock(&ndi->lock);
-                               lck_rw_done(nd_if_rwlock);
                                RT_UNLOCK(rt);
                                lck_mtx_lock(rnh_lock);
                                nd6_sched_timeout(NULL, NULL);
@@ -1091,16 +1099,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
                        if (is_solicited) {
                                ln->ln_state = ND6_LLINFO_REACHABLE;
                                if (ln->ln_expire != 0) {
-                                       struct nd_ifinfo *ndi;
+                                       struct nd_ifinfo *ndi = NULL;
 
-                                       lck_rw_lock_shared(nd_if_rwlock);
                                        ndi = ND_IFINFO(ifp);
                                        VERIFY(ndi != NULL && ndi->initialized);
                                        lck_mtx_lock(&ndi->lock);
                                        ln_setexpire(ln,
                                            timenow + ndi->reachable);
                                        lck_mtx_unlock(&ndi->lock);
-                                       lck_rw_done(nd_if_rwlock);
                                        RT_UNLOCK(rt);
                                        lck_mtx_lock(rnh_lock);
                                        nd6_sched_timeout(NULL, NULL);
@@ -1152,6 +1158,28 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
                }
                ln->ln_router = is_router;
        }
+
+       if (send_nc_alive_kev && (ifp->if_addrlen == IF_LLREACH_MAXLEN)) {
+               struct kev_msg ev_msg;
+               struct kev_nd6_ndalive nd6_ndalive;
+               bzero(&ev_msg, sizeof(ev_msg));
+               bzero(&nd6_ndalive, sizeof(nd6_ndalive));
+               ev_msg.vendor_code      = KEV_VENDOR_APPLE;
+               ev_msg.kev_class        = KEV_NETWORK_CLASS;
+               ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
+               ev_msg.event_code       = KEV_ND6_NDALIVE;
+
+               nd6_ndalive.link_data.if_family = ifp->if_family;
+               nd6_ndalive.link_data.if_unit = ifp->if_unit;
+               strlcpy(nd6_ndalive.link_data.if_name,
+                   ifp->if_name,
+                   sizeof(nd6_ndalive.link_data.if_name));
+               ev_msg.dv[0].data_ptr = &nd6_ndalive;
+               ev_msg.dv[0].data_length =
+                       sizeof(nd6_ndalive);
+               kev_post_msg(&ev_msg);
+       }
+
        RT_LOCK_ASSERT_HELD(rt);
        rt->rt_flags &= ~RTF_REJECT;
 
@@ -1170,8 +1198,9 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
                 * prevent a ln_hold lookup in nd6_output()
                 * (wouldn't happen, though...)
                 */
-               for (m_hold = ln->ln_hold;
-                   m_hold; m_hold = m_hold_next) {
+               m_hold = ln->ln_hold;
+               ln->ln_hold = NULL;
+               for ( ; m_hold; m_hold = m_hold_next) {
                        m_hold_next = m_hold->m_nextpkt;
                        m_hold->m_nextpkt = NULL;
                        /*
@@ -1182,8 +1211,6 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
                        nd6_output(ifp, ifp, m_hold, &sin6, rt, NULL);
                        RT_LOCK_SPIN(rt);
                }
-               ln->ln_hold = NULL;
-
        }
        RT_REMREF_LOCKED(rt);
        RT_UNLOCK(rt);
@@ -1583,16 +1610,14 @@ nd6_dad_start(
         */
        if (tick_delay == NULL) {
                u_int32_t retrans;
-               struct nd_ifinfo *ndi;
+               struct nd_ifinfo *ndi = NULL;
 
                nd6_dad_ns_output(dp, ifa);
-               lck_rw_lock_shared(nd_if_rwlock);
                ndi = ND_IFINFO(ifa->ifa_ifp);
                VERIFY(ndi != NULL && ndi->initialized);
                lck_mtx_lock(&ndi->lock);
                retrans = ndi->retrans * hz / 1000;
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
                timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans);
        } else {
                int ntick;
@@ -1704,7 +1729,7 @@ nd6_dad_timer(struct ifaddr *ifa)
 {
        struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
        struct dadq *dp = NULL;
-       struct nd_ifinfo *ndi;
+       struct nd_ifinfo *ndi = NULL;
 
        /* Sanity check */
        if (ia == NULL) {
@@ -1762,13 +1787,11 @@ nd6_dad_timer(struct ifaddr *ifa)
                 * We have more NS to go.  Send NS packet for DAD.
                 */
                nd6_dad_ns_output(dp, ifa);
-               lck_rw_lock_shared(nd_if_rwlock);
                ndi = ND_IFINFO(ifa->ifa_ifp);
                VERIFY(ndi != NULL && ndi->initialized);
                lck_mtx_lock(&ndi->lock);
                retrans = ndi->retrans * hz / 1000;
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
                timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans);
        } else {
                /*
@@ -1813,13 +1836,11 @@ nd6_dad_timer(struct ifaddr *ifa)
                        ia->ia6_flags &= ~IN6_IFF_DADPROGRESS;
                        IFA_UNLOCK(&ia->ia_ifa);
 
-                       lck_rw_lock_shared(nd_if_rwlock);
                        ndi = ND_IFINFO(ifa->ifa_ifp);
                        VERIFY(ndi != NULL && ndi->initialized);
                        lck_mtx_lock(&ndi->lock);
                        txunsolna = (ndi->flags & ND6_IFF_REPLICATED) != 0;
                        lck_mtx_unlock(&ndi->lock);
-                       lck_rw_done(nd_if_rwlock);
 
                        if (txunsolna) {
                                nd6_unsol_na_output(ifa);
@@ -1877,12 +1898,12 @@ nd6_dad_duplicated(struct ifaddr *ifa)
        IFA_UNLOCK(&ia->ia_ifa);
 
        if (disable) {
+               struct nd_ifinfo *ndi =  ND_IFINFO(ifp);
                log(LOG_ERR, "%s: possible hardware address duplication "
                    "detected, disabling IPv6 for interface.\n", if_name(ifp));
 
-               lck_rw_lock_shared(nd_if_rwlock);
-               nd_ifinfo[ifp->if_index].flags |= ND6_IFF_IFDISABLED;
-               lck_rw_done(nd_if_rwlock);
+               VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
+               ndi->flags |= ND6_IFF_IFDISABLED;
                /* Make sure to set IFEF_IPV6_DISABLED too */
                nd6_if_disable(ifp, TRUE);
        }
@@ -2013,10 +2034,10 @@ static struct mbuf *
 nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr,
     caddr_t lladdr, int lladdrlen)
 {
-       struct ifaddr *ifa;
-       struct in6_ifaddr *ia;
-       struct dadq *dp;
-       struct nd_ifinfo *ndi;
+       struct ifaddr *ifa = NULL;
+       struct in6_ifaddr *ia = NULL;
+       struct dadq *dp = NULL;
+       struct nd_ifinfo *ndi = NULL;
        boolean_t candisable, replicated;
 
        ifa = (struct ifaddr *) in6ifa_ifpwithaddr(ifp, taddr);
@@ -2027,14 +2048,13 @@ nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr,
        replicated = FALSE;
 
        /* Get the ND6_IFF_REPLICATED flag. */
-       lck_rw_lock_shared(nd_if_rwlock);
        ndi = ND_IFINFO(ifp);
        if (ndi != NULL && ndi->initialized) {
                lck_mtx_lock(&ndi->lock);
                replicated = !!(ndi->flags & ND6_IFF_REPLICATED);
                lck_mtx_unlock(&ndi->lock);
        }
-       lck_rw_done(nd_if_rwlock);
+
        if (replicated) {
                nd6log((LOG_INFO, "%s: ignoring duplicate NA on "
                    "replicated interface %s\n", __func__, if_name(ifp)));
index ceb0f7d9c7c029b8a22d682e3b90f9000a99f7af..6227ff00321c2901c47fccfbaf2e235be65f254d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -439,13 +439,8 @@ nd6_ra_input(
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
                mcast = 1;
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index >= nd_ifinfo_indexlim) {
-               lck_rw_done(nd_if_rwlock);
-               goto freeit;
-       }
-       ndi = &nd_ifinfo[ifp->if_index];
-       VERIFY(ndi->initialized);
+       ndi = ND_IFINFO(ifp);
+       VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
        lck_mtx_lock(&ndi->lock);
        bzero(&dr0, sizeof (dr0));
        dr0.rtaddr = saddr6;
@@ -465,11 +460,19 @@ nd6_ra_input(
        }
        if (nd_ra->nd_ra_retransmit)
                ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
-       if (nd_ra->nd_ra_curhoplimit)
-               ndi->chlim = nd_ra->nd_ra_curhoplimit;
+       if (nd_ra->nd_ra_curhoplimit) {
+               if (ndi->chlim < nd_ra->nd_ra_curhoplimit) {
+                       ndi->chlim = nd_ra->nd_ra_curhoplimit;
+               } else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
+                       nd6log((LOG_ERR,
+                           "RA with a lower CurHopLimit sent from "
+                           "%s on %s (current = %d, received = %d). "
+                           "Ignored.\n", ip6_sprintf(&ip6->ip6_src),
+                           if_name(ifp), ndi->chlim,
+                           nd_ra->nd_ra_curhoplimit));
+               }
+       }
        lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
-       ndi = NULL;
        lck_mtx_lock(nd6_mutex);
        dr = defrtrlist_update(&dr0);
        lck_mtx_unlock(nd6_mutex);
@@ -615,13 +618,6 @@ nd6_ra_input(
                        goto skip;
                }
 
-               lck_rw_lock_shared(nd_if_rwlock);
-               if (ifp->if_index >= nd_ifinfo_indexlim) {
-                       lck_rw_done(nd_if_rwlock);
-                       goto freeit;
-               }
-               ndi = &nd_ifinfo[ifp->if_index];
-               VERIFY(ndi->initialized);
                lck_mtx_lock(&ndi->lock);
                /* upper bound */
                if (ndi->maxmtu) {
@@ -630,7 +626,6 @@ nd6_ra_input(
 
                                ndi->linkmtu = mtu;
                                lck_mtx_unlock(&ndi->lock);
-                               lck_rw_done(nd_if_rwlock);
                                if (change) /* in6_maxmtu may change */
                                        in6_setmaxmtu();
                        } else {
@@ -640,17 +635,14 @@ nd6_ra_input(
                                    mtu, ip6_sprintf(&ip6->ip6_src),
                                    ndi->maxmtu));
                                lck_mtx_unlock(&ndi->lock);
-                               lck_rw_done(nd_if_rwlock);
                        }
                } else {
                        lck_mtx_unlock(&ndi->lock);
-                       lck_rw_done(nd_if_rwlock);
                        nd6log((LOG_INFO, "nd6_ra_input: mtu option "
                            "mtu=%d sent from %s; maxmtu unknown, "
                            "ignoring\n",
                            mtu, ip6_sprintf(&ip6->ip6_src)));
                }
-               ndi = NULL;
        }
 
 skip:
@@ -1022,6 +1014,7 @@ defrtrlist_del(struct nd_defrouter *dr)
        struct nd_defrouter *deldr = NULL;
        struct nd_prefix *pr;
        struct ifnet *ifp = dr->ifp;
+       struct nd_ifinfo *ndi = NULL;
        boolean_t resetmtu;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
@@ -1077,19 +1070,15 @@ defrtrlist_del(struct nd_defrouter *dr)
                defrouter_select(ifp);
 
        resetmtu = FALSE;
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index < nd_ifinfo_indexlim) {
-               struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-               VERIFY(ndi->initialized);
-               lck_mtx_lock(&ndi->lock);
-               VERIFY(ndi->ndefrouters >= 0);
-               if (ndi->ndefrouters > 0 && --ndi->ndefrouters == 0) {
-                       nd6_ifreset(ifp);
-                       resetmtu = TRUE;
-               }
-               lck_mtx_unlock(&ndi->lock);
+       ndi = ND_IFINFO(ifp);
+       VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
+       lck_mtx_lock(&ndi->lock);
+       VERIFY(ndi->ndefrouters >= 0);
+       if (ndi->ndefrouters > 0 && --ndi->ndefrouters == 0) {
+               nd6_ifreset(ifp);
+               resetmtu = TRUE;
        }
-       lck_rw_done(nd_if_rwlock);
+       lck_mtx_unlock(&ndi->lock);
 
        if (resetmtu)
                nd6_setmtu(ifp);
@@ -1792,17 +1781,12 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped)
                return (NULL);
        }
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index >= nd_ifinfo_indexlim)
-               goto freeit;
-       ndi = &nd_ifinfo[ifp->if_index];
-       VERIFY(ndi->initialized);
+       ndi = ND_IFINFO(ifp);
+       VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
        lck_mtx_lock(&ndi->lock);
        if (ip6_maxifdefrouters >= 0 &&
            ndi->ndefrouters >= ip6_maxifdefrouters) {
                lck_mtx_unlock(&ndi->lock);
-freeit:
-               lck_rw_done(nd_if_rwlock);
                nddr_free(n);
                return (NULL);
        }
@@ -1814,7 +1798,6 @@ freeit:
        ndi->ndefrouters++;
        VERIFY(ndi->ndefrouters != 0);
        lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
 
        nd6log2((LOG_INFO, "%s: allocating defrouter %s\n", if_name(ifp),
            ip6_sprintf(&new->rtaddr)));
@@ -1964,8 +1947,20 @@ pfxrtr_del(struct nd_pfxrouter *pfr, struct nd_prefix *pr)
        zfree(ndprtr_zone, pfr);
 }
 
+/*
+ * The routine has been modified to atomically refresh expiry
+ * time for nd6 prefix as the part of lookup.
+ * rdar://20339655 explains the corner case where a system going
+ * in sleep gets rid of manual addresses configured in the system
+ * and then schedules the prefix for deletion.
+ * However before the prefix gets deleted, if system comes out
+ * from sleep and configures same address before prefix deletion
+ * , the later prefix deletion will remove the prefix route and
+ * the system will not be able to communicate with other IPv6
+ * neighbor nodes in the same subnet.
+ */
 struct nd_prefix *
-nd6_prefix_lookup(struct nd_prefix *pr)
+nd6_prefix_lookup(struct nd_prefix *pr, int nd6_prefix_expiry)
 {
        struct nd_prefix *search;
 
@@ -1976,6 +1971,9 @@ nd6_prefix_lookup(struct nd_prefix *pr)
                    pr->ndpr_plen == search->ndpr_plen &&
                    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
                    &search->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
+                       if (nd6_prefix_expiry != ND6_PREFIX_EXPIRY_UNSPEC) {
+                               search->ndpr_expire = nd6_prefix_expiry;
+                       }
                        NDPR_ADDREF_LOCKED(search);
                        NDPR_UNLOCK(search);
                        break;
@@ -2075,34 +2073,19 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr,
        int i, error;
 
        if (ip6_maxifprefixes >= 0) {
-               lck_rw_lock_shared(nd_if_rwlock);
-               if (ifp->if_index >= nd_ifinfo_indexlim) {
-                       lck_rw_done(nd_if_rwlock);
-                       return (EINVAL);
-               }
-               ndi = &nd_ifinfo[ifp->if_index];
-               VERIFY(ndi->initialized);
+               ndi = ND_IFINFO(ifp);
+               VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
                lck_mtx_lock(&ndi->lock);
                if (ndi->nprefixes >= ip6_maxifprefixes / 2) {
                        lck_mtx_unlock(&ndi->lock);
-                       lck_rw_done(nd_if_rwlock);
                        purge_detached(ifp);
-                       lck_rw_lock_shared(nd_if_rwlock);
-                       /*
-                        * Refresh pointer since nd_ifinfo[] may have grown;
-                        * repeating the bounds check against nd_ifinfo_indexlim
-                        * isn't necessary since the array never shrinks.
-                        */
-                       ndi = &nd_ifinfo[ifp->if_index];
                        lck_mtx_lock(&ndi->lock);
                }
                if (ndi->nprefixes >= ip6_maxifprefixes) {
                        lck_mtx_unlock(&ndi->lock);
-                       lck_rw_done(nd_if_rwlock);
                        return (ENOMEM);
                }
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
        }
 
        new = ndpr_alloc(M_WAITOK);
@@ -2168,19 +2151,10 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr,
                pfxrtr_add(new, dr);
        }
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       /*
-        * Refresh pointer since nd_ifinfo[] may have grown;
-        * repeating the bounds check against nd_ifinfo_indexlim
-        * isn't necessary since the array never shrinks.
-        */
-       ndi = &nd_ifinfo[ifp->if_index];
-       VERIFY(ndi->initialized);
        lck_mtx_lock(&ndi->lock);
        ndi->nprefixes++;
        VERIFY(ndi->nprefixes != 0);
        lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
 
        lck_mtx_unlock(nd6_mutex);
 
@@ -2196,6 +2170,7 @@ prelist_remove(struct nd_prefix *pr)
        struct nd_pfxrouter *pfr, *next;
        struct ifnet *ifp = pr->ndpr_ifp;
        int e;
+       struct nd_ifinfo *ndi = NULL;
 
        lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
        NDPR_LOCK_ASSERT_HELD(pr);
@@ -2257,16 +2232,12 @@ prelist_remove(struct nd_prefix *pr)
                pfxrtr_del(pfr, pr);
        }
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index < nd_ifinfo_indexlim) {
-               struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-               VERIFY(ndi->initialized);
-               lck_mtx_lock(&ndi->lock);
-               VERIFY(ndi->nprefixes > 0);
-               ndi->nprefixes--;
-               lck_mtx_unlock(&ndi->lock);
-       }
-       lck_rw_done(nd_if_rwlock);
+       ndi = ND_IFINFO(ifp);
+       VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
+       lck_mtx_lock(&ndi->lock);
+       VERIFY(ndi->nprefixes > 0);
+       ndi->nprefixes--;
+       lck_mtx_unlock(&ndi->lock);
 
        /* This must not be the last reference to the nd_prefix */
        if (NDPR_REMREF_LOCKED(pr) == NULL) {
@@ -2312,7 +2283,7 @@ prelist_update(
 #endif
        }
 
-       if ((pr = nd6_prefix_lookup(new)) != NULL) {
+       if ((pr = nd6_prefix_lookup(new, ND6_PREFIX_EXPIRY_UNSPEC)) != NULL) {
                /*
                 * nd6_prefix_lookup() ensures that pr and new have the same
                 * prefix on a same interface.
@@ -3596,14 +3567,13 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
         */
        if (!(pr->ndpr_stateflags & NDPRF_DEFUNCT) &&
            (rt != NULL || error == EEXIST)) {
-               struct nd_ifinfo *ndi;
+               struct nd_ifinfo *ndi = NULL;
 
                VERIFY(pr->ndpr_prproxy_sols_cnt == 0);
                VERIFY(RB_EMPTY(&pr->ndpr_prproxy_sols));
 
-               lck_rw_lock_shared(nd_if_rwlock);
                ndi = ND_IFINFO(ifp);
-               VERIFY(ndi != NULL && ndi->initialized);
+               VERIFY((NULL != ndi)  && (TRUE == ndi->initialized));
                lck_mtx_lock(&ndi->lock);
 
                pr->ndpr_rt = rt;       /* keep reference from rtrequest */
@@ -3625,7 +3595,6 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
                }
 
                lck_mtx_unlock(&ndi->lock);
-               lck_rw_done(nd_if_rwlock);
        } else if (rt != NULL && pr->ndpr_stateflags & NDPRF_DEFUNCT)
                rtfree(rt);
 
@@ -3818,9 +3787,9 @@ nd6_prefix_offlink(struct nd_prefix *pr)
 static struct in6_ifaddr *
 in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp)
 {
-       struct in6_ifaddr *ia6;
-       struct ifnet *ifp;
-       struct nd_ifinfo *ndi;
+       struct in6_ifaddr *ia6 = NULL;
+       struct ifnet *ifp = NULL;
+       struct nd_ifinfo *ndi = NULL;
        struct in6_addr mask;
        struct in6_aliasreq ifra;
        int error, ifaupdate, iidlen, notcga;
@@ -3867,15 +3836,6 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp)
                goto unlock1;
        }
 
-       lck_rw_lock_shared(nd_if_rwlock);
-       if (ifp->if_index >= nd_ifinfo_indexlim) {
-               error = EADDRNOTAVAIL;
-               nd6log((LOG_INFO,
-                   "%s: invalid prefix length %d for %s, ignored\n",
-                   __func__, pr->ndpr_plen, if_name(ifp)));
-               goto unlock2;
-       }
-
        bzero(&ifra, sizeof (ifra));
        strlcpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name));
        ifra.ifra_addr.sin6_family = AF_INET6;
@@ -3890,7 +3850,7 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp)
        ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
        ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
 
-       ndi = &nd_ifinfo[ifp->if_index];
+       ndi = ND_IFINFO(ifp);
        VERIFY(ndi->initialized);
        lck_mtx_lock(&ndi->lock);
 
@@ -3898,7 +3858,6 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp)
            (ndi->flags & ND6_IFF_INSECURE) != 0;
 
        lck_mtx_unlock(&ndi->lock);
-       lck_rw_done(nd_if_rwlock);
        NDPR_UNLOCK(pr);
 
        if (notcga) {
@@ -3990,9 +3949,6 @@ in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp)
        in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia6, NULL);
        goto done;
 
-unlock2:
-       lck_rw_done(nd_if_rwlock);
-
 unlock1:
        NDPR_UNLOCK(pr);
 
diff --git a/bsd/netinet6/nd6_var.h b/bsd/netinet6/nd6_var.h
new file mode 100644 (file)
index 0000000..0743c0a
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET6_ND6_VAR_H_
+#define        _NETINET6_ND6_VAR_H_
+
+#ifdef BSD_KERNEL_PRIVATE
+struct nd_ifinfo {
+       decl_lck_mtx_data(, lock);
+       boolean_t initialized;          /* Flag to see the entry is initialized */
+       u_int32_t linkmtu;              /* LinkMTU */
+       u_int32_t maxmtu;               /* Upper bound of LinkMTU */
+       u_int32_t basereachable;        /* BaseReachableTime */
+       u_int32_t reachable;            /* Reachable Time */
+       u_int32_t retrans;              /* Retrans Timer */
+       u_int32_t flags;                /* Flags */
+       int recalctm;                   /* BaseReacable re-calculation timer */
+       u_int8_t chlim;                 /* CurHopLimit */
+       u_int8_t _pad[3];
+       /* the following 3 members are for privacy extension for addrconf */
+       u_int8_t randomseed0[8]; /* upper 64 bits of SHA1 digest */
+       u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
+       u_int8_t randomid[8];   /* current random ID */
+       /* keep track of routers and prefixes on this link */
+       int32_t nprefixes;
+       int32_t ndefrouters;
+       struct in6_cga_modifier local_cga_modifier;
+};
+#endif /* BSD_KERNEL_PRIVATE */
+#endif /* _NETINET6_ND6_VAR_H_ */
index 3ac3106bd519e6b99699b998e5dbc6993feb2a86..3cee2fb217bf84bcd55da20cb615538530243d5c 100644 (file)
@@ -195,7 +195,8 @@ rip6_input(
                        struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
 
 #if NECP
-                       if (n && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) {
+                       if (n && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0,
+                               &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL, NULL)) {
                                m_freem(n);
                                /* do not inject data into pcb */
                        } else
@@ -226,9 +227,10 @@ rip6_input(
                }
                last = in6p;
        }
-       
+
 #if NECP
-       if (last && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) {
+       if (last && !necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0,
+               &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL, NULL)) {
                m_freem(m);
                ip6stat.ip6s_delivered--;
                /* do not inject data into pcb */
@@ -412,7 +414,7 @@ rip6_output(
                    (htonl(in6p->inp_flowhash) & IPV6_FLOWLABEL_MASK);
        }
 
-       M_PREPEND(m, sizeof(*ip6), M_WAIT);
+       M_PREPEND(m, sizeof(*ip6), M_WAIT, 1);
        if (m == NULL) {
                error = ENOBUFS;
                goto bad;
@@ -547,19 +549,21 @@ rip6_output(
                *p = 0;
                *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
        }
-       
+
 #if NECP
        {
                necp_kernel_policy_id policy_id;
-               if (!necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0, &ip6->ip6_src, &ip6->ip6_dst, NULL, &policy_id)) {
+               u_int32_t route_rule_id;
+               if (!necp_socket_is_allowed_to_send_recv_v6(in6p, 0, 0,
+                       &ip6->ip6_src, &ip6->ip6_dst, NULL, &policy_id, &route_rule_id)) {
                        error = EHOSTUNREACH;
                        goto bad;
                }
 
-               necp_mark_packet_from_socket(m, in6p, policy_id);
+               necp_mark_packet_from_socket(m, in6p, policy_id, route_rule_id);
        }
 #endif /* NECP */
-       
+
 #if IPSEC
        if (in6p->in6p_sp != NULL && ipsec_setsocket(m, so) != 0) {
                error = ENOBUFS;
index 22270e5846dc4635a9c97c6448f138b50235884b..58e9b34f4d32e763b3024f10e98833119c60210c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #define _NETINET6_SCOPE6_VAR_H_
 #include <sys/appleapiopts.h>
 
-#ifdef BSD_KERNEL_PRIVATE
 /*
  * 16 is correspondent to 4bit multicast scope field.
  * i.e. from node-local to global with some reserved/unassigned types.
  */
-#define        SCOPE6_ID_MAX   16
+#define        SCOPE6_ID_MAX   16
+
+#ifdef BSD_KERNEL_PRIVATE
 
 struct scope6_id {
        u_int32_t s6id_list[SCOPE6_ID_MAX];
index 2d64d6b940b20d8d1ff49fafa13413358a4f4a27..a8d0e2fb38462f15b4acba51bbbf722ca85234a3 100644 (file)
  * UDP protocol inplementation.
  * Per RFC 768, August, 1980.
  */
+extern int soreserveheadroom;
 
 int
 udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6,
@@ -306,7 +307,7 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6,
         * Calculate data length and get a mbuf
         * for UDP and IP6 headers.
         */
-       M_PREPEND(m, hlen + sizeof (struct udphdr), M_DONTWAIT);
+       M_PREPEND(m, hlen + sizeof (struct udphdr), M_DONTWAIT, 1);
        if (m == 0) {
                error = ENOBUFS;
                goto release;
@@ -350,19 +351,20 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6,
                flags = IPV6_OUTARGS;
 
                udp6stat.udp6s_opackets++;
-                       
+
 #if NECP
                {
                        necp_kernel_policy_id policy_id;
-                       if (!necp_socket_is_allowed_to_send_recv_v6(in6p, in6p->in6p_lport, fport, laddr, faddr, NULL, &policy_id)) {
+                       u_int32_t route_rule_id;
+                       if (!necp_socket_is_allowed_to_send_recv_v6(in6p, in6p->in6p_lport, fport, laddr, faddr, NULL, &policy_id, &route_rule_id)) {
                                error = EHOSTUNREACH;
                                goto release;
                        }
 
-                       necp_mark_packet_from_socket(m, in6p, policy_id);
+                       necp_mark_packet_from_socket(m, in6p, policy_id, route_rule_id);
                }
 #endif /* NECP */
-                       
+
 #if IPSEC
                if (in6p->in6p_sp != NULL && ipsec_setsocket(m, so) != 0) {
                        error = ENOBUFS;
@@ -440,6 +442,17 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6,
                if ( --in6p->inp_sndinprog_cnt == 0)
                        in6p->inp_flags &= ~(INP_FC_FEEDBACK);
 
+               if (ro.ro_rt != NULL) {
+                       struct ifnet *outif = ro.ro_rt->rt_ifp;
+
+                       so->so_pktheadroom = P2ROUNDUP(
+                           sizeof(struct udphdr) +
+                           hlen +
+                           ifnet_hdrlen(outif) +
+                           ifnet_packetpreamblelen(outif),
+                           sizeof(u_int32_t));
+               }
+
                /* Synchronize PCB cached route */
                in6p_route_copyin(in6p, &ro);
 
@@ -462,8 +475,16 @@ udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6,
                         * with that of the route interface used by IP.
                         */
                        if (rt != NULL &&
-                           (outif = rt->rt_ifp) != in6p->in6p_last_outifp)
+                           (outif = rt->rt_ifp) != in6p->in6p_last_outifp) {
                                in6p->in6p_last_outifp = outif;
+
+                               so->so_pktheadroom = P2ROUNDUP(
+                                   sizeof(struct udphdr) +
+                                   hlen +
+                                   ifnet_hdrlen(outif) +
+                                   ifnet_packetpreamblelen(outif),
+                                   sizeof(u_int32_t));
+                       }                               
                } else {
                        ROUTE_RELEASE(&in6p->in6p_route);
                }
index 29d037b58e6dc2147cf71561a6c4b000184120d0..1e0f9eb37d52770d0a7eeb34dc1f3b8c1b83a0bb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #if IPSEC
 #include <netinet6/ipsec.h>
 #include <netinet6/ipsec6.h>
+#include <netinet6/esp6.h>
+extern int ipsec_bypass;
+extern int esp_udp_encap_port;
 #endif /* IPSEC */
 
 #if NECP
 #include <net/necp.h>
 #endif /* NECP */
 
+#if FLOW_DIVERT
+#include <netinet/flow_divert.h>
+#endif /* FLOW_DIVERT */
+
 /*
  * UDP protocol inplementation.
  * Per RFC 768, August, 1980.
@@ -147,11 +154,11 @@ static int udp6_abort(struct socket *);
 static int udp6_attach(struct socket *, int, struct proc *);
 static int udp6_bind(struct socket *, struct sockaddr *, struct proc *);
 static int udp6_connectx(struct socket *, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
-    uint32_t, void *, uint32_t);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
 static int udp6_detach(struct socket *);
 static int udp6_disconnect(struct socket *);
-static int udp6_disconnectx(struct socket *, associd_t, connid_t);
+static int udp6_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
 static int udp6_send(struct socket *, int, struct mbuf *, struct sockaddr *,
     struct mbuf *, struct proc *);
 static void udp6_append(struct inpcb *, struct ip6_hdr *,
@@ -193,6 +200,7 @@ struct pr_usrreqs udp6_usrreqs = {
        .pru_sockaddr =         in6_mapped_sockaddr,
        .pru_sosend =           sosend,
        .pru_soreceive =        soreceive,
+       .pru_soreceive_list =   soreceive_list,
 };
 
 /*
@@ -411,7 +419,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
                        skipit = 0;
                        if (!necp_socket_is_allowed_to_send_recv_v6(in6p,
                            uh->uh_dport, uh->uh_sport, &ip6->ip6_dst,
-                           &ip6->ip6_src, ifp, NULL)) {
+                           &ip6->ip6_src, ifp, NULL, NULL)) {
                                /* do not inject data to pcb */
                                skipit = 1;
                        }
@@ -477,6 +485,49 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
                        m_freem(m);
                return (IPPROTO_DONE);
        }
+
+#if IPSEC
+       /*
+        * UDP to port 4500 with a payload where the first four bytes are
+        * not zero is a UDP encapsulated IPSec packet. Packets where
+        * the payload is one byte and that byte is 0xFF are NAT keepalive
+        * packets. Decapsulate the ESP packet and carry on with IPSec input
+        * or discard the NAT keep-alive.
+        */
+       if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
+           uh->uh_dport == ntohs((u_short)esp_udp_encap_port)) {
+               int payload_len = ulen - sizeof (struct udphdr) > 4 ? 4 :
+                   ulen - sizeof (struct udphdr);
+
+               if (m->m_len < off + sizeof (struct udphdr) + payload_len) {
+                       if ((m = m_pullup(m, off + sizeof (struct udphdr) +
+                           payload_len)) == NULL) {
+                               udpstat.udps_hdrops++;
+                               goto bad;
+                       }
+                       /*
+                        * Expect 32-bit aligned data pointer on strict-align
+                        * platforms.
+                        */
+                       MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
+
+                       ip6 = mtod(m, struct ip6_hdr *);
+                       uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
+               }
+               /* Check for NAT keepalive packet */
+               if (payload_len == 1 && *(u_int8_t*)
+                   ((caddr_t)uh + sizeof (struct udphdr)) == 0xFF) {
+                       goto bad;
+               } else if (payload_len == 4 && *(u_int32_t*)(void *)
+                   ((caddr_t)uh + sizeof (struct udphdr)) != 0) {
+                       /* UDP encapsulated IPSec packet to pass through NAT */
+                       /* preserve the udp header */
+                       *offp = off + sizeof (struct udphdr);
+                       return (esp6_input(mp, offp, IPPROTO_UDP));
+               }
+       }
+#endif /* IPSEC */
+
        /*
         * Locate pcb for datagram.
         */
@@ -516,7 +567,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
        }
 #if NECP
        if (!necp_socket_is_allowed_to_send_recv_v6(in6p, uh->uh_dport,
-           uh->uh_sport, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL)) {
+           uh->uh_sport, &ip6->ip6_dst, &ip6->ip6_src, ifp, NULL, NULL)) {
                in_pcb_checkstate(in6p, WNT_RELEASE, 0);
                IF_UDP_STATINC(ifp, badipsec);
                goto bad;
@@ -694,12 +745,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
        int error;
 
        inp = sotoinpcb(so);
-       if (inp == NULL
-#if NECP
-               || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
-               )
-               return (inp == NULL ? EINVAL : EPROTOTYPE);
+       if (inp == NULL)
+               return (EINVAL);
 
        inp->inp_vflag &= ~INP_IPV4;
        inp->inp_vflag |= INP_IPV6;
@@ -730,14 +777,17 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 {
        struct inpcb *inp;
        int error;
+#if defined(NECP) && defined(FLOW_DIVERT)
+       int should_use_flow_divert = 0;
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
 
        inp = sotoinpcb(so);
-       if (inp == NULL
-#if NECP
-               || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
-               )
-               return (inp == NULL ? EINVAL : EPROTOTYPE);
+       if (inp == NULL)
+               return (EINVAL);
+
+#if defined(NECP) && defined(FLOW_DIVERT)
+       should_use_flow_divert = necp_socket_should_use_flow_divert(inp);
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
 
        if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
                struct sockaddr_in6 *sin6_p;
@@ -749,6 +799,11 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
                        if (inp->inp_faddr.s_addr != INADDR_ANY)
                                return (EISCONN);
                        in6_sin6_2_sin(&sin, sin6_p);
+#if defined(NECP) && defined(FLOW_DIVERT)
+                       if (should_use_flow_divert) {
+                               goto do_flow_divert;
+                       }
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
                        error = in_pcbconnect(inp, (struct sockaddr *)&sin,
                            p, IFSCOPE_NONE, NULL);
                        if (error == 0) {
@@ -762,6 +817,23 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 
        if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
                return (EISCONN);
+
+#if defined(NECP) && defined(FLOW_DIVERT)
+do_flow_divert:
+       if (should_use_flow_divert) {
+               uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
+               if (fd_ctl_unit > 0) {
+                       error = flow_divert_pcb_init(so, fd_ctl_unit);
+                       if (error == 0) {
+                               error = flow_divert_connect_out(so, nam, p);
+                       }
+               } else {
+                       error = ENETDOWN;
+               }
+               return (error);
+       }
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
+
        error = in6_pcbconnect(inp, nam, p);
        if (error == 0) {
                /* should be non mapped addr */
@@ -787,11 +859,11 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 static int
 udp6_connectx(struct socket *so, struct sockaddr_list **src_sl,
     struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
-    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
-    uint32_t arglen)
+    sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
+    uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
 {
        return (udp_connectx_common(so, AF_INET6, src_sl, dst_sl,
-           p, ifscope, aid, pcid, flags, arg, arglen));
+           p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written));
 }
 
 static int
@@ -841,10 +913,10 @@ udp6_disconnect(struct socket *so)
 }
 
 static int
-udp6_disconnectx(struct socket *so, associd_t aid, connid_t cid)
+udp6_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
 {
 #pragma unused(cid)
-       if (aid != ASSOCID_ANY && aid != ASSOCID_ALL)
+       if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL)
                return (EINVAL);
 
        return (udp6_disconnect(so));
@@ -856,20 +928,20 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 {
        struct inpcb *inp;
        int error = 0;
+#if defined(NECP) && defined(FLOW_DIVERT)
+       int should_use_flow_divert = 0;
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
 
        inp = sotoinpcb(so);
-       if (inp == NULL
-#if NECP
-               || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
-               ) {
-               if (inp == NULL)
-                       error = EINVAL;
-               else
-                       error = EPROTOTYPE;
+       if (inp == NULL) {
+               error = EINVAL;
                goto bad;
        }
 
+#if defined(NECP) && defined(FLOW_DIVERT)
+       should_use_flow_divert = necp_socket_should_use_flow_divert(inp);
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
+
        if (addr != NULL) {
                if (addr->sa_len != sizeof (struct sockaddr_in6)) {
                        error = EINVAL;
@@ -897,6 +969,11 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 
                        if (sin6 != NULL)
                                in6_sin6_2_sin_in_sock(addr);
+#if defined(NECP) && defined(FLOW_DIVERT)
+                       if (should_use_flow_divert) {
+                               goto do_flow_divert;
+                       }
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
                        pru = ip_protox[IPPROTO_UDP]->pr_usrreqs;
                        error = ((*pru->pru_send)(so, flags, m, addr,
                            control, p));
@@ -904,6 +981,15 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
                        return (error);
                }
        }
+
+#if defined(NECP) && defined(FLOW_DIVERT)
+do_flow_divert:
+       if (should_use_flow_divert) {
+               /* Implicit connect */
+               return (flow_divert_implicit_data_out(so, flags, m, addr, control, p));
+       }
+#endif /* defined(NECP) && defined(FLOW_DIVERT) */
+
        return (udp6_output(inp, m, addr, control, p));
 
 bad:
index 882e7b5e3f454428fff50ab2f8fa575601e17dae..6152b0760ca45abd41e80622811f50c8d7f5299b 100644 (file)
@@ -24,7 +24,7 @@ EXPORT_MI_LIST        = ${DATAFILES}
 
 EXPORT_MI_DIR = netkey
 
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES}
 
index 91b6ba0401d54d4fda30f42f8483b6e0b7a58436..0e2e7df2e3cf1632016ee7abca6a4f9c38493128 100644 (file)
@@ -234,6 +234,9 @@ static const int minsize[] = {
        sizeof(struct sadb_address),    /* SADB_X_EXT_ADDR_RANGE_SRC_END */
     sizeof(struct sadb_address),       /* SADB_X_EXT_ADDR_RANGE_DST_START */
        sizeof(struct sadb_address),    /* SADB_X_EXT_ADDR_RANGE_DST_END */
+       sizeof(struct sadb_address),    /* SADB_EXT_MIGRATE_ADDRESS_SRC */
+       sizeof(struct sadb_address),    /* SADB_EXT_MIGRATE_ADDRESS_DST */
+       sizeof(struct sadb_x_ipsecif),  /* SADB_X_EXT_MIGRATE_IPSECIF */
 };
 static const int maxsize[] = {
        sizeof(struct sadb_msg),        /* SADB_EXT_RESERVED */
@@ -263,6 +266,9 @@ static const int maxsize[] = {
        0,              /* SADB_X_EXT_ADDR_RANGE_SRC_END */
     0,              /* SADB_X_EXT_ADDR_RANGE_DST_START */
        0,              /* SADB_X_EXT_ADDR_RANGE_DST_END */
+       0,              /* SADB_EXT_MIGRATE_ADDRESS_SRC */
+       0,              /* SADB_EXT_MIGRATE_ADDRESS_DST */
+       sizeof(struct sadb_x_ipsecif), /* SADB_X_EXT_MIGRATE_IPSECIF */
 };
 
 static int ipsec_esp_keymin = 256;
@@ -565,6 +571,7 @@ static int key_validate_ext(const struct sadb_ext *, int);
 static int key_align(struct mbuf *, struct sadb_msghdr *);
 static struct mbuf *key_alloc_mbuf(int);
 static int key_getsastat (struct socket *, struct mbuf *, const struct sadb_msghdr *);
+static int key_migrate (struct socket *, struct mbuf *, const struct sadb_msghdr *);
 static int key_setsaval2(struct secasvar      *sav,
                                                 u_int8_t              satype,
                                                 u_int8_t              alg_auth,
@@ -585,8 +592,8 @@ static int key_setsaval2(struct secasvar      *sav,
 extern int ipsec_bypass;
 extern int esp_udp_encap_port;
 int ipsec_send_natt_keepalive(struct secasvar *sav);
-bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ipsec_offload_frame *frame, size_t frame_data_offset);
-u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, struct ipsec_offload_frame *frames_array, u_int32_t frames_array_count, size_t frame_data_offset);
+bool ipsec_fill_offload_frame(ifnet_t ifp, struct secasvar *sav, struct ifnet_keepalive_offload_frame *frame, size_t frame_data_offset);
+u_int32_t key_fill_offload_frames_for_savs (ifnet_t ifp, struct ifnet_keepalive_offload_frame *frames_array, u_int32_t frames_array_count, size_t frame_data_offset);
 
 void key_init(struct protosw *, struct domain *);
 
@@ -850,7 +857,7 @@ struct secasvar *key_alloc_outbound_sav_for_interface(ifnet_t interface, int fam
        
        LIST_FOREACH(sah, &sahtree, chain) {
                if (sah->ipsec_if == interface &&
-                       (family == AF_INET6 || sah->saidx.dst.ss_family == family) && /* IPv6 can go over IPv4 */
+                       (family == AF_INET6 || family == AF_INET) &&
                        sah->dir == IPSEC_DIR_OUTBOUND) {
                        /* This SAH is linked to the IPSec interface, and the right family. We found it! */
                        if (key_preferred_oldsa) {
@@ -2251,13 +2258,13 @@ key_spdadd(
     
     /* Process interfaces */
     if (ipsecifopts != NULL) {
-        if (ipsecifopts->sadb_x_ipsecif_internal_if) {
+        if (ipsecifopts->sadb_x_ipsecif_internal_if[0]) {
             ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_internal_if, &internal_if);
         }
-        if (ipsecifopts->sadb_x_ipsecif_outgoing_if) {
+        if (ipsecifopts->sadb_x_ipsecif_outgoing_if[0]) {
             outgoing_if = ipsecifopts->sadb_x_ipsecif_outgoing_if;
         }
-        if (ipsecifopts->sadb_x_ipsecif_ipsec_if) {
+        if (ipsecifopts->sadb_x_ipsecif_ipsec_if[0]) {
             ipsec_if = ipsecifopts->sadb_x_ipsecif_ipsec_if;
         }
                init_disabled = ipsecifopts->sadb_x_ipsecif_init_disabled;
@@ -2596,7 +2603,7 @@ key_spddelete(
     
     /* Process interfaces */
     if (ipsecifopts != NULL) {
-        if (ipsecifopts->sadb_x_ipsecif_internal_if) {
+        if (ipsecifopts->sadb_x_ipsecif_internal_if[0]) {
             ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_internal_if, &internal_if);
         }
     }
@@ -3830,6 +3837,23 @@ key_newsav2(struct secashead     *sah,
        return newsav;
 }
 
+static int
+key_migratesav(struct secasvar *sav,
+                          struct secashead *newsah)
+{
+       if (sav == NULL || newsah == NULL || sav->state != SADB_SASTATE_MATURE) {
+               return EINVAL;
+       }
+       
+       /* remove from SA header */
+       if (__LIST_CHAINED(sav))
+               LIST_REMOVE(sav, chain);
+       
+       sav->sah = newsah;
+       LIST_INSERT_TAIL(&newsah->savtree[SADB_SASTATE_MATURE], sav, secasvar, chain);
+       return 0;
+}
+
 /*
  * free() SA variable entry.
  */
@@ -4090,6 +4114,7 @@ key_setsaval(
                        }
                        sav->remote_ike_port = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port;
                        sav->natt_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_interval;
+                       sav->natt_offload_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_offload_interval;
                }
                
                /*
@@ -4856,7 +4881,7 @@ key_setdumpsa(
                if ((!m && !p) || (m && p))
                        goto fail;
                if (p && tres) {
-                       M_PREPEND(tres, l, M_WAITOK);
+                       M_PREPEND(tres, l, M_WAITOK, 1);
                        if (!tres)
                                goto fail;
                        bcopy(p, mtod(tres, caddr_t), l);
@@ -6364,14 +6389,14 @@ key_proto2satype(
 }
 
 static ifnet_t
-key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp)
+key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp, int message_type)
 {
        struct sadb_x_ipsecif *ipsecifopts = NULL;
        ifnet_t ipsec_if = NULL;
        
-       ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF];
+       ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[message_type];
        if (ipsecifopts != NULL) {
-               if (ipsecifopts->sadb_x_ipsecif_internal_if) {
+               if (ipsecifopts->sadb_x_ipsecif_ipsec_if[0]) {
                        ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_ipsec_if, &ipsec_if);
                }
        }
@@ -6380,14 +6405,14 @@ key_get_ipsec_if_from_message (const struct sadb_msghdr *mhp)
 }
 
 static u_int
-key_get_outgoing_ifindex_from_message (const struct sadb_msghdr *mhp)
+key_get_outgoing_ifindex_from_message (const struct sadb_msghdr *mhp, int message_type)
 {
        struct sadb_x_ipsecif *ipsecifopts = NULL;
        ifnet_t outgoing_if = NULL;
        
-       ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF];
+       ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[message_type];
        if (ipsecifopts != NULL) {
-               if (ipsecifopts->sadb_x_ipsecif_outgoing_if) {
+               if (ipsecifopts->sadb_x_ipsecif_outgoing_if[0]) {
                        ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_outgoing_if, &outgoing_if);
         }
     }
@@ -6454,7 +6479,7 @@ key_getspi(
        src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
        dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
        
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* map satype to proto */
        if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
@@ -6512,7 +6537,7 @@ key_getspi(
        /* get a SA index */
        if ((newsah = key_getsah(&saidx)) == NULL) {
                /* create a new SA index: key_addspi is always used for inbound spi */
-               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp), IPSEC_DIR_INBOUND)) == NULL) {
+               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_INBOUND)) == NULL) {
                        lck_mtx_unlock(sadb_mutex);
                        ipseclog((LOG_DEBUG, "key_getspi: No more memory.\n"));
                        return key_senderror(so, m, ENOBUFS);
@@ -6829,7 +6854,7 @@ key_update(
        sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
        dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* XXX boundary check against sa_len */
        KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx);
@@ -6943,6 +6968,152 @@ key_update(
     }
 }
 
+static int
+key_migrate(struct socket *so,
+                       struct mbuf *m,
+                       const struct sadb_msghdr *mhp)
+{
+       struct sadb_sa *sa0 = NULL;
+       struct sadb_address *src0 = NULL;
+       struct sadb_address *dst0 = NULL;
+       struct sadb_address *src1 = NULL;
+       struct sadb_address *dst1 = NULL;
+       ifnet_t ipsec_if0 = NULL;
+       ifnet_t ipsec_if1 = NULL;
+       struct secasindex saidx0;
+       struct secasindex saidx1;
+       struct secashead *sah = NULL;
+       struct secashead *newsah = NULL;
+       struct secasvar *sav = NULL;
+       u_int16_t proto;
+       
+       lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
+       
+       /* sanity check */
+       if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL)
+               panic("key_migrate: NULL pointer is passed.\n");
+       
+       /* map satype to proto */
+       if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
+               ipseclog((LOG_DEBUG, "key_migrate: invalid satype is passed.\n"));
+               return key_senderror(so, m, EINVAL);
+       }
+       
+       if (mhp->ext[SADB_EXT_SA] == NULL ||
+               mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
+               mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
+               mhp->ext[SADB_EXT_MIGRATE_ADDRESS_SRC] == NULL ||
+               mhp->ext[SADB_EXT_MIGRATE_ADDRESS_DST] == NULL) {
+               ipseclog((LOG_DEBUG, "key_migrate: invalid message is passed.\n"));
+               return key_senderror(so, m, EINVAL);
+       }
+       
+       if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
+               mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
+               mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
+               mhp->extlen[SADB_EXT_MIGRATE_ADDRESS_SRC] < sizeof(struct sadb_address) ||
+               mhp->extlen[SADB_EXT_MIGRATE_ADDRESS_DST] < sizeof(struct sadb_address)) {
+               ipseclog((LOG_DEBUG, "key_migrate: invalid message is passed.\n"));
+               return key_senderror(so, m, EINVAL);
+       }
+       
+       lck_mtx_lock(sadb_mutex);
+       
+       sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
+       src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
+       dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
+       src1 = (struct sadb_address *)(mhp->ext[SADB_EXT_MIGRATE_ADDRESS_SRC]);
+       dst1 = (struct sadb_address *)(mhp->ext[SADB_EXT_MIGRATE_ADDRESS_DST]);
+       ipsec_if0 = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
+       ipsec_if1 = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF);
+       
+       /* Find existing SAH and SAV */
+       KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if0 ? ipsec_if0->if_index : 0, &saidx0);
+       
+       LIST_FOREACH(sah, &sahtree, chain) {
+               if (sah->state != SADB_SASTATE_MATURE)
+                       continue;
+               if (key_cmpsaidx(&sah->saidx, &saidx0, CMP_HEAD) == 0)
+                       continue;
+               
+               sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
+               if (sav && sav->state == SADB_SASTATE_MATURE)
+                       break;
+       }
+       if (sah == NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_DEBUG, "key_migrate: no mature SAH found.\n"));
+               return key_senderror(so, m, ENOENT);
+       }
+       
+       if (sav == NULL) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_DEBUG, "key_migrate: no SA found.\n"));
+               return key_senderror(so, m, ENOENT);
+       }
+       
+       /* Find or create new SAH */
+       KEY_SETSECASIDX(proto, sah->saidx.mode, sah->saidx.reqid, src1 + 1, dst1 + 1, ipsec_if1 ? ipsec_if1->if_index : 0, &saidx1);
+       
+       if ((newsah = key_getsah(&saidx1)) == NULL) {
+               if ((newsah = key_newsah(&saidx1, ipsec_if1, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF), sah->dir)) == NULL) {
+                       lck_mtx_unlock(sadb_mutex);
+                       ipseclog((LOG_DEBUG, "key_migrate: No more memory.\n"));
+                       return key_senderror(so, m, ENOBUFS);
+               }
+       }
+       
+       /* Migrate SAV in to new SAH */
+       if (key_migratesav(sav, newsah) != 0) {
+               lck_mtx_unlock(sadb_mutex);
+               ipseclog((LOG_DEBUG, "key_migrate: Failed to migrate SA to new SAH.\n"));
+               return key_senderror(so, m, EINVAL);
+       }
+       
+       /* Reset NAT values */
+       sav->flags = sa0->sadb_sa_flags;
+       sav->remote_ike_port = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port;
+       sav->natt_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_interval;
+       sav->natt_offload_interval = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_offload_interval;
+       sav->natt_last_activity = natt_now;
+       
+       /*
+        * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that
+        * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not
+        * set (we're not behind nat) - otherwise clear it.
+        */
+       if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0)
+               if ((sav->flags & SADB_X_EXT_NATT) == 0 ||
+                       (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0)
+                       sav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS;
+       
+       lck_mtx_unlock(sadb_mutex);
+       {
+               struct mbuf *n;
+               struct sadb_msg *newmsg;
+               int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_SA,
+                       SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_X_EXT_IPSECIF,
+                       SADB_EXT_MIGRATE_ADDRESS_SRC, SADB_EXT_MIGRATE_ADDRESS_DST, SADB_X_EXT_MIGRATE_IPSECIF};
+               
+               /* create new sadb_msg to reply. */
+               n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems);
+               if (!n)
+                       return key_senderror(so, m, ENOBUFS);
+               
+               if (n->m_len < sizeof(struct sadb_msg)) {
+                       n = m_pullup(n, sizeof(struct sadb_msg));
+                       if (n == NULL)
+                               return key_senderror(so, m, ENOBUFS);
+               }
+               newmsg = mtod(n, struct sadb_msg *);
+               newmsg->sadb_msg_errno = 0;
+               newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len);
+               
+               m_freem(m);
+               return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
+       }
+}
+
 /*
  * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL.
  * only called by key_update().
@@ -7060,7 +7231,7 @@ key_add(
        sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* XXX boundary check against sa_len */
        KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx);
@@ -7070,7 +7241,7 @@ key_add(
        /* get a SA header */
        if ((newsah = key_getsah(&saidx)) == NULL) {
                /* create a new SA header: key_addspi is always used for outbound spi */
-               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp), IPSEC_DIR_OUTBOUND)) == NULL) {
+               if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND)) == NULL) {
                        lck_mtx_unlock(sadb_mutex);
                        ipseclog((LOG_DEBUG, "key_add: No more memory.\n"));
                        return key_senderror(so, m, ENOBUFS);
@@ -7328,7 +7499,7 @@ key_delete(
        sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
        dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* XXX boundary check against sa_len */
        KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx);
@@ -7403,7 +7574,7 @@ key_delete_all(
        
        src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]);
        dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]);
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* XXX boundary check against sa_len */
        KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx);
@@ -7519,7 +7690,7 @@ key_get(
        sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA];
        src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* XXX boundary check against sa_len */
        KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, ipsec_if ? ipsec_if->if_index : 0, &saidx);
@@ -7790,7 +7961,7 @@ key_getcomb_ah(void)
                                m->m_next = NULL;
                        }
                } else
-                       M_PREPEND(m, l, M_WAITOK);
+                       M_PREPEND(m, l, M_WAITOK, 1);
                if (!m)
                        return NULL;
                
@@ -7836,7 +8007,7 @@ key_getcomb_ipcomp(void)
                                m->m_next = NULL;
                        }
                } else
-                       M_PREPEND(m, l, M_WAITOK);
+                       M_PREPEND(m, l, M_WAITOK, 1);
                if (!m)
                        return NULL;
                
@@ -7882,7 +8053,7 @@ key_getprop(
        
        if (!m)
                return NULL;
-       M_PREPEND(m, l, M_WAITOK);
+       M_PREPEND(m, l, M_WAITOK, 1);
        if (!m)
                return NULL;
        
@@ -8316,7 +8487,7 @@ key_acquire2(
        
        src0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
-       ipsec_if = key_get_ipsec_if_from_message(mhp);
+       ipsec_if = key_get_ipsec_if_from_message(mhp, SADB_X_EXT_IPSECIF);
        
        /* XXX boundary check against sa_len */
        /* cast warnings */
@@ -9014,8 +9185,69 @@ static int (*key_typesw[])(struct socket *, struct mbuf *,
        key_getsastat,   /* SADB_GETSASTAT */
        key_spdenable,   /* SADB_X_SPDENABLE */
        key_spddisable,   /* SADB_X_SPDDISABLE */
+       key_migrate,   /* SADB_MIGRATE */
 };
 
+static void
+bzero_mbuf(struct mbuf *m)
+{
+       struct mbuf *mptr  = m;
+       struct sadb_msg *msg = NULL;
+       int offset = 0;
+
+       if (!mptr) {
+               return;
+       }
+
+       if (mptr->m_len >= sizeof(struct sadb_msg)) {
+               msg = mtod(mptr, struct sadb_msg *);
+               if (msg->sadb_msg_type != SADB_ADD &&
+                   msg->sadb_msg_type != SADB_UPDATE) {
+                       return;
+               }
+               offset = sizeof(struct sadb_msg);
+       }
+       bzero(mptr->m_data+offset, mptr->m_len-offset);
+       mptr = mptr->m_next;
+       while (mptr != NULL) {
+               bzero(mptr->m_data, mptr->m_len);
+               mptr = mptr->m_next;
+       }
+}
+
+static void
+bzero_keys(struct sadb_msghdr *mh)
+{
+       int extlen = 0;
+       int offset = 0;
+
+       if (!mh) {
+               return;
+       }
+       offset = sizeof(struct sadb_key);
+
+       if (mh->ext[SADB_EXT_KEY_ENCRYPT]) {
+               struct sadb_key *key = (struct sadb_key*)mh->ext[SADB_EXT_KEY_ENCRYPT];
+               extlen = key->sadb_key_bits >> 3;
+
+               if (mh->extlen[SADB_EXT_KEY_ENCRYPT] >= offset + extlen) {
+                       bzero((uint8_t *)mh->ext[SADB_EXT_KEY_ENCRYPT]+offset, extlen);
+               } else {
+                       bzero(mh->ext[SADB_EXT_KEY_ENCRYPT], mh->extlen[SADB_EXT_KEY_ENCRYPT]);
+               }
+       }
+       if (mh->ext[SADB_EXT_KEY_AUTH]) {
+               struct sadb_key *key = (struct sadb_key*)mh->ext[SADB_EXT_KEY_AUTH];
+               extlen = key->sadb_key_bits >> 3;
+
+               if (mh->extlen[SADB_EXT_KEY_AUTH] >= offset + extlen) {
+                       bzero((uint8_t *)mh->ext[SADB_EXT_KEY_AUTH]+offset, extlen);
+               } else {
+                       bzero(mh->ext[SADB_EXT_KEY_AUTH], mh->extlen[SADB_EXT_KEY_AUTH]);
+               }
+       }
+}
+
 /*
  * parse sadb_msg buffer to process PFKEYv2,
  * and create a data to response if needed.
@@ -9037,7 +9269,8 @@ key_parse(
        u_int orglen;
        int error;
        int target;
-       
+       Boolean keyAligned = FALSE;
+
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
        
        /* sanity check */
@@ -9101,12 +9334,14 @@ key_parse(
                        }
                }
                if (!n) {
+                       bzero_mbuf(m);
                        m_freem(m);
                        return ENOBUFS;
                }
                m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
                n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
                n->m_next = NULL;
+               bzero_mbuf(m);
                m_freem(m);
                m = n;
        }
@@ -9117,10 +9352,12 @@ key_parse(
                return error;
        
        if (m->m_next) {        /*XXX*/
+               bzero_mbuf(m);
                m_freem(m);
                return ENOBUFS;
        }
        
+       keyAligned = TRUE;
        msg = mh.msg;
        
        /* check SA type */
@@ -9277,9 +9514,20 @@ key_parse(
                goto senderror;
        }
        
-       return (*key_typesw[msg->sadb_msg_type])(so, m, &mh);
+       error = (*key_typesw[msg->sadb_msg_type])(so, m, &mh);
+
+       // mh.ext points to the mbuf content.
+       // Zero out Encryption and Integrity keys if present.
+       bzero_keys(&mh);
        
+       return error;
+
 senderror:
+       if (keyAligned) {
+               bzero_keys(&mh);
+       } else {
+               bzero_mbuf(m);
+       }
        msg->sadb_msg_errno = error;
        return key_sendup_mbuf(so, m, target);
 }
@@ -9367,6 +9615,9 @@ key_align(
                        case SADB_X_EXT_ADDR_RANGE_SRC_END:
                        case SADB_X_EXT_ADDR_RANGE_DST_START:
                        case SADB_X_EXT_ADDR_RANGE_DST_END:
+                       case SADB_EXT_MIGRATE_ADDRESS_SRC:
+                       case SADB_EXT_MIGRATE_ADDRESS_DST:
+                       case SADB_X_EXT_MIGRATE_IPSECIF:
                                /* duplicate check */
                                /*
                                 * XXX Are there duplication payloads of either
@@ -9376,6 +9627,7 @@ key_align(
                                        ipseclog((LOG_DEBUG,
                                                          "key_align: duplicate ext_type %u "
                                                          "is passed.\n", ext->sadb_ext_type));
+                                       bzero_mbuf(m);
                                        m_freem(m);
                                        PFKEY_STAT_INCREMENT(pfkeystat.out_dupext);
                                        return EINVAL;
@@ -9385,6 +9637,7 @@ key_align(
                                ipseclog((LOG_DEBUG,
                                                  "key_align: invalid ext_type %u is passed.\n",
                                                  ext->sadb_ext_type));
+                               bzero_mbuf(m);
                                m_freem(m);
                                PFKEY_STAT_INCREMENT(pfkeystat.out_invexttype);
                                return EINVAL;
@@ -9393,6 +9646,7 @@ key_align(
                extlen = PFKEY_UNUNIT64(ext->sadb_ext_len);
                
                if (key_validate_ext(ext, extlen)) {
+                       bzero_mbuf(m);
                        m_freem(m);
                        PFKEY_STAT_INCREMENT(pfkeystat.out_invlen);
                        return EINVAL;
@@ -9411,6 +9665,7 @@ key_align(
        }
        
        if (off != end) {
+               bzero_mbuf(m);
                m_freem(m);
                PFKEY_STAT_INCREMENT(pfkeystat.out_invlen);
                return EINVAL;
@@ -9450,6 +9705,8 @@ key_validate_ext(
                case SADB_X_EXT_ADDR_RANGE_SRC_END:
                case SADB_X_EXT_ADDR_RANGE_DST_START:
                case SADB_X_EXT_ADDR_RANGE_DST_END:
+               case SADB_EXT_MIGRATE_ADDRESS_SRC:
+               case SADB_EXT_MIGRATE_ADDRESS_DST:
                        baselen = PFKEY_ALIGN8(sizeof(struct sadb_address));
                        checktype = ADDR;
                        break;
@@ -10007,13 +10264,13 @@ key_delsp_for_ipsec_if (ifnet_t ipsec_if)
 
 __private_extern__ u_int32_t
 key_fill_offload_frames_for_savs (ifnet_t ifp,
-                                                                 struct ipsec_offload_frame *frames_array,
-                                                                 u_int32_t frames_array_count,
-                                                                 size_t frame_data_offset)
+    struct ifnet_keepalive_offload_frame *frames_array,
+    u_int32_t frames_array_count,
+    size_t frame_data_offset)
 {
        struct secashead *sah = NULL;
        struct secasvar *sav = NULL;
-       struct ipsec_offload_frame *frame = frames_array;
+       struct ifnet_keepalive_offload_frame *frame = frames_array;
        u_int32_t frame_index = 0;
 
        if (frame == NULL || frames_array_count == 0) {
index 362ce530fd0ff2a574e4e3d73d3f66d77f4f9953..f1d5c830b9f06499caeb696cf7867a2b129e14b6 100644 (file)
@@ -65,11 +65,8 @@ keydb_newsecpolicy()
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
-       p = (struct secpolicy *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK);
-       if (!p)
-               return p;
-       bzero(p, sizeof(*p));
-       return p;
+       return (struct secpolicy *)_MALLOC(sizeof(*p), M_SECA,
+           M_WAITOK | M_ZERO);
 }
 
 void
@@ -91,15 +88,15 @@ keydb_newsecashead()
 
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
 
-       p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT);
+       p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT | M_ZERO);
        if (!p) {
                lck_mtx_unlock(sadb_mutex);
-               p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK);
+               p = (struct secashead *)_MALLOC(sizeof(*p), M_SECA,
+                   M_WAITOK | M_ZERO);
                lck_mtx_lock(sadb_mutex);
        }
        if (!p) 
                return p;
-       bzero(p, sizeof(*p));
        for (i = 0; i < sizeof(p->savtree)/sizeof(p->savtree[0]); i++)
                LIST_INIT(&p->savtree[i]);
        return p;
@@ -180,28 +177,28 @@ keydb_newsecreplay(wsize)
        
        lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
 
-       p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT);
+       p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, M_NOWAIT | M_ZERO);
        if (!p) {
                lck_mtx_unlock(sadb_mutex);
-               p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA, M_WAITOK);
+               p = (struct secreplay *)_MALLOC(sizeof(*p), M_SECA,
+                   M_WAITOK | M_ZERO);
                lck_mtx_lock(sadb_mutex);
        }
        if (!p)
                return p;
 
-       bzero(p, sizeof(*p));
        if (wsize != 0) {
-               p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, M_NOWAIT);
+               p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, M_NOWAIT | M_ZERO);
                if (!p->bitmap) {
                        lck_mtx_unlock(sadb_mutex);
-                       p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA, M_WAITOK);
+                       p->bitmap = (caddr_t)_MALLOC(wsize, M_SECA,
+                           M_WAITOK | M_ZERO);
                        lck_mtx_lock(sadb_mutex);
                        if (!p->bitmap) {
                                _FREE(p, M_SECA);
                                return NULL;
                        }
                }
-               bzero(p->bitmap, wsize);
        }
        p->wsize = wsize;
        return p;
index c2e4630738e4e2cb9c512284c458cf0ad92a836f..715e5e6fbce1664074690313e5ce80bce4543601 100644 (file)
@@ -113,6 +113,7 @@ struct secasvar {
        u_int16_t       remote_ike_port;
        u_int16_t       natt_encapsulated_src_port;     /* network byte order */
        u_int16_t       natt_interval; /* Interval in seconds */
+       u_int16_t       natt_offload_interval; /* Hardware Offload Interval in seconds */
        
        u_int8_t        always_expire; /* Send expire/delete messages even if unused */
 
index 69c1e92c83041998b2d7bb64f94667fba8e5d788..dd7e2da45af54c1e2d5a576da35847977ee29d34 100644 (file)
@@ -171,7 +171,7 @@ key_sendup0(rp, m, promisc)
        if (promisc) {
                struct sadb_msg *pmsg;
 
-               M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT);
+               M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT, 1);
                if (m && m->m_len < sizeof(struct sadb_msg))
                        m = m_pullup(m, sizeof(struct sadb_msg));
                if (!m) {
@@ -358,10 +358,10 @@ key_attach(struct socket *so, int proto, struct proc *p)
 
        if (sotorawcb(so) != 0)
                return EISCONN; /* XXX panic? */
-       kp = (struct keycb *)_MALLOC(sizeof *kp, M_PCB, M_WAITOK); /* XXX */
+       kp = (struct keycb *)_MALLOC(sizeof (*kp), M_PCB,
+           M_WAITOK | M_ZERO); /* XXX */
        if (kp == 0)
                return ENOBUFS;
-       bzero(kp, sizeof *kp);
 
        so->so_pcb = (caddr_t)kp;
        kp->kp_promisc = kp->kp_registered = 0;
index 370ddaa832451120255b3d98017eab728a6c2cdf..3bc6a641c24f6bf872d510ee29aab9e03d7ccdaa 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -106,7 +106,6 @@ extern int nfs_ticks;
 #define        NFS_ASYNCTHREADMAXIDLE  60      /* Seconds before idle nfsiods are reaped */
 #define        NFS_DEFSTATFSRATELIMIT  10      /* Def. max # statfs RPCs per second */
 #define NFS_REQUESTDELAY       10      /* ms interval to check request queue */
-#define NFSRV_DEADSOCKDELAY    5       /* Seconds before dead sockets are reaped */
 #define NFSRV_MAXWGATHERDELAY  100     /* Max. write gather delay (msec) */
 #ifndef NFSRV_WGATHERDELAY
 #define NFSRV_WGATHERDELAY     1       /* Default write gather delay (msec) */
@@ -186,6 +185,7 @@ extern int nfs_ticks;
 #define NFS_MATTR_REALM                        24      /* Realm to authenticate with */
 #define NFS_MATTR_PRINCIPAL            25      /* GSS principal to authenticate with */
 #define NFS_MATTR_SVCPRINCIPAL         26      /* GSS principal to authenticate to, the server principal */
+#define NFS_MATTR_NFS_VERSION_RANGE    27      /* Packed version range to try */
 
 /* NFS mount flags */
 #define NFS_MFLAG_SOFT                 0       /* soft mount (requests fail if unresponsive) */
@@ -207,6 +207,11 @@ extern int nfs_ticks;
 #define NFS_MFLAG_MNTUDP               16      /* MOUNT protocol should use UDP */
 #define NFS_MFLAG_MNTQUICK             17      /* use short timeouts while mounting */
 
+/* Macros for packing and unpacking packed versions */
+#define PVER2MAJOR(M) ((uint32_t)(((M) >> 16) & 0xffff))
+#define PVER2MINOR(m) ((uint32_t)((m) & 0xffff))
+#define VER2PVER(M, m) ((uint32_t)((M) << 16) | ((m) & 0xffff))
+
 /* NFS advisory file locking modes */
 #define NFS_LOCK_MODE_ENABLED          0       /* advisory file locking enabled */
 #define NFS_LOCK_MODE_DISABLED         1       /* do not support advisory file locking */
@@ -931,8 +936,6 @@ extern lck_grp_t *nfs_request_grp;
 
 #define NFSNOLIST      ((void *)0x0badcafe)    /* sentinel value for nfs lists */
 #define NFSREQNOLIST   NFSNOLIST               /* sentinel value for nfsreq lists */
-#define NFSIODCOMPLETING       ((void *)0x10d) /* sentinel value for iod processing
-                                                  async I/O w/callback being completed */
 
 /* Flag values for r_flags */
 #define R_TIMING       0x00000001      /* timing request (in mntp) */
@@ -952,6 +955,7 @@ extern lck_grp_t *nfs_request_grp;
 #define R_RESENDQ      0x00004000      /* async request currently on resendq */
 #define R_SENDING      0x00008000      /* request currently being sent */
 #define R_SOFT         0x00010000      /* request is soft - don't retry or reconnect */
+#define R_IOD          0x00020000      /* request is being managed by an IOD */
 
 #define R_NOINTR       0x20000000      /* request should not be interupted by a signal */
 #define R_RECOVER      0x40000000      /* a state recovery RPC - during NFSSTA_RECOVER */
@@ -970,7 +974,7 @@ extern int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des;
 extern int nfs_tprintf_initial_delay, nfs_tprintf_delay;
 extern int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes;
 extern int nfs_idmap_ctrl, nfs_callback_port;
-extern int nfs_is_mobile, nfs_readlink_nocache;
+extern int nfs_is_mobile, nfs_readlink_nocache, nfs_root_steals_ctx;
 extern uint32_t nfs_squishy_flags;
 extern uint32_t nfs_debug_ctl;
 
@@ -1050,8 +1054,8 @@ extern struct nfsrv_sock *nfsrv_udpsock, *nfsrv_udp6sock;
  * nfsrv_sockwork - sockets being worked on which may have more work to do (ns_svcq)
  * nfsrv_sockwg - sockets with pending write gather input (ns_wgq)
  */
-extern TAILQ_HEAD(nfsrv_sockhead, nfsrv_sock) nfsrv_socklist, nfsrv_deadsocklist,
-                                               nfsrv_sockwg, nfsrv_sockwait, nfsrv_sockwork;
+extern TAILQ_HEAD(nfsrv_sockhead, nfsrv_sock) nfsrv_socklist, nfsrv_sockwg,
+                                               nfsrv_sockwait, nfsrv_sockwork;
 
 /* lock groups for nfsrv_sock's */
 extern lck_grp_t *nfsrv_slp_rwlock_group;
@@ -1123,7 +1127,7 @@ extern in_port_t nfs4_cb_port, nfs4_cb_port6;
 extern thread_call_t   nfs_request_timer_call;
 extern thread_call_t   nfs_buf_timer_call;
 extern thread_call_t   nfs4_callback_timer_call;
-extern thread_call_t   nfsrv_deadsock_timer_call;
+extern thread_call_t   nfsrv_idlesock_timer_call;
 #if CONFIG_FSE
 extern thread_call_t   nfsrv_fmod_timer_call;
 #endif
@@ -1392,13 +1396,14 @@ void    nfsrv_cleancache(void);
 void   nfsrv_cleanup(void);
 int    nfsrv_credcheck(struct nfsrv_descript *, vfs_context_t, struct nfs_export *,
                        struct nfs_export_options *);
-void   nfsrv_deadsock_timer(void *, void *);
+void   nfsrv_idlesock_timer(void *, void *);
 int    nfsrv_dorec(struct nfsrv_sock *, struct nfsd *, struct nfsrv_descript **);
 int    nfsrv_errmap(struct nfsrv_descript *, int);
 int    nfsrv_export(struct user_nfs_export_args *, vfs_context_t);
 int    nfsrv_fhmatch(struct nfs_filehandle *, struct nfs_filehandle *);
 int    nfsrv_fhtovp(struct nfs_filehandle *, struct nfsrv_descript *, vnode_t *,
                        struct nfs_export **, struct nfs_export_options **);
+int    nfsrv_check_exports_allow_address(mbuf_t);
 #if CONFIG_FSE
 void   nfsrv_fmod_timer(void *, void *);
 #endif
index 376ae34434f40c4a469843aeedb4bcffc26eb044..60a52d8679ce358d02b1a3ca8993c8b65990076e 100644 (file)
@@ -236,7 +236,7 @@ nfs4_setclientid(struct nfsmount *nmp)
        // SETCLIENTID
        numops = 1;
        nfsm_chain_build_alloc_init(error, &nmreq, 14 * NFSX_UNSIGNED + nmp->nm_longid->nci_idlen);
-       nfsm_chain_add_compound_header(error, &nmreq, "setclid", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "setclid", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID);
        /* nfs_client_id4  client; */
@@ -300,7 +300,7 @@ nfs4_setclientid(struct nfsmount *nmp)
        // SETCLIENTID_CONFIRM
        numops = 1;
        nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "setclid_conf", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "setclid_conf", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID_CONFIRM);
        nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
@@ -325,7 +325,7 @@ nfs4_setclientid(struct nfsmount *nmp)
        // PUTFH, GETATTR(FS)
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "setclid_attr", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "setclid_attr", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, nmp->nm_dnp->n_fhp, nmp->nm_dnp->n_fhsize);
@@ -377,7 +377,7 @@ nfs4_renew(struct nfsmount *nmp, int rpcflag)
        // RENEW
        numops = 1;
        nfsm_chain_build_alloc_init(error, &nmreq, 8 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "renew", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "renew", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_RENEW);
        nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
@@ -533,7 +533,7 @@ gotargs:
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq,
                4 * NFSX_UNSIGNED + NFSX_FH(nfsvers) + nfsm_rndup(namelen));
-       nfsm_chain_add_compound_header(error, &nmreq, "secinfo", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "secinfo", nmp->nm_minor_vers, numops);
        numops--;
        if (fhp) {
                nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
@@ -665,7 +665,7 @@ nfs4_get_fs_locations(
        NFSREQ_SECINFO_SET(&si, NULL, fhp, fhsize, name, 0);
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "fs_locations", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "fs_locations", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp, fhsize);
index 2682d94be706efbdcbae313e7942881f8a04bf86..a018cdaa77dc760fcf23cd33f75b33dbae2732ff 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -102,7 +102,7 @@ nfs4_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx
        // PUTFH, ACCESS, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "access", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "access", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -214,7 +214,7 @@ nfs4_getattr_rpc(
        // PUTFH, GETATTR
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "getattr", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "getattr", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, fhp, fhsize);
@@ -271,7 +271,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
        // PUTFH, GETATTR, READLINK
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "readlink", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
@@ -341,7 +341,7 @@ nfs4_read_rpc_async(
        // PUTFH, READ, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "read", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "read", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -453,7 +453,7 @@ nfs4_write_rpc_async(
        // PUTFH, WRITE, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED + len);
-       nfsm_chain_add_compound_header(error, &nmreq, "write", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "write", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -578,7 +578,7 @@ restart:
        // PUTFH, REMOVE, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED + namelen);
-       nfsm_chain_add_compound_header(error, &nmreq, "remove", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "remove", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
@@ -654,7 +654,7 @@ nfs4_rename_rpc(
        // PUTFH(FROM), SAVEFH, PUTFH(TO), RENAME, GETATTR(TO), RESTOREFH, GETATTR(FROM)
        numops = 7;
        nfsm_chain_build_alloc_init(error, &nmreq, 30 * NFSX_UNSIGNED + fnamelen + tnamelen);
-       nfsm_chain_add_compound_header(error, &nmreq, "rename", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "rename", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
@@ -854,7 +854,7 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
                // PUTFH, GETATTR, READDIR
                numops = 3;
                nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
-               nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
+               nfsm_chain_add_compound_header(error, &nmreq, tag, nmp->nm_minor_vers, numops);
                numops--;
                nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
                nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
@@ -1104,7 +1104,7 @@ nfs4_lookup_rpc_async(
        // PUTFH, GETATTR, LOOKUP(P), GETFH, GETATTR (FH)
        numops = 5;
        nfsm_chain_build_alloc_init(error, &nmreq, 20 * NFSX_UNSIGNED + namelen);
-       nfsm_chain_add_compound_header(error, &nmreq, "lookup", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "lookup", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
@@ -1160,6 +1160,8 @@ nfs4_lookup_rpc_async_finish(
        struct nfsm_chain nmrep;
 
        nmp = NFSTONMP(dnp);
+       if (nmp == NULL)
+               return (ENXIO);
        nfsvers = nmp->nm_vers;
        if ((name[0] == '.') && (name[1] == '.') && (namelen == 2))
                isdotdot = 1;
@@ -1255,7 +1257,7 @@ nfs4_commit_rpc(
        // PUTFH, COMMIT, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 19 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "commit", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "commit", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -1325,7 +1327,7 @@ nfs4_pathconf_rpc(
        // PUTFH, GETATTR
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "pathconf", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "pathconf", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -1540,7 +1542,7 @@ tryagain:
        // PUTFH, SETATTR, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 40 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "setattr", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "setattr", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -2735,10 +2737,25 @@ restart:
         * So grab another open count matching the accessMode passed in.
         * If we already had an mmap open, prefer read/write without deny mode.
         * This means we may have to drop the current mmap open first.
+        *
+        * N.B. We should have an open for the mmap, because, mmap was
+        * called on an open descriptor, or we've created an open for read
+        * from reading the first page for execve. However, if we piggy
+        * backed on an existing NFS_OPEN_SHARE_ACCESS_READ/NFS_OPEN_SHARE_DENY_NONE
+        * that open may have closed.
         */
 
-       if (!nofp->nof_access) {
-               if (accessMode != NFS_OPEN_SHARE_ACCESS_READ) {
+       if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ)) {
+               if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) {
+                       /* We shouldn't get here. We've already open the file for execve */
+                       NP(np, "nfs_vnop_mmap: File already needs close access: 0x%x, cred: %d thread: %lld",
+                          nofp->nof_access, kauth_cred_getuid(nofp->nof_owner->noo_cred), thread_tid(vfs_context_thread(ctx)));
+               }
+               /*
+                * mmapings for execve are just for read. Get out with EPERM if the accessMode is not ACCESS_READ
+                * or the access would be denied. Other accesses should have an open descriptor for the mapping.
+                */
+               if (accessMode != NFS_OPEN_SHARE_ACCESS_READ || (accessMode & nofp->nof_deny)) {
                        /* not asking for just read access -> fail */
                        error = EPERM;
                        goto out;
@@ -2795,6 +2812,29 @@ restart:
                                denyMode = NFS_OPEN_SHARE_DENY_WRITE;
                        else if (nofp->nof_r_drw)
                                denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+               } else if (nofp->nof_d_rw || nofp->nof_d_rw_dw || nofp->nof_d_rw_drw) {
+                       /*
+                        * This clause and the one below is to co-opt a read write access
+                        * for a read only mmaping. We probably got here in that an
+                        * existing rw open for an executable file already exists.
+                        */
+                       delegated = 1;
+                       accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+                       if (nofp->nof_d_rw)
+                               denyMode = NFS_OPEN_SHARE_DENY_NONE;
+                       else if (nofp->nof_d_rw_dw)
+                               denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+                       else if (nofp->nof_d_rw_drw)
+                               denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+               } else if (nofp->nof_rw || nofp->nof_rw_dw || nofp->nof_rw_drw) {
+                       delegated = 0;
+                       accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+                       if (nofp->nof_rw)
+                               denyMode = NFS_OPEN_SHARE_DENY_NONE;
+                       else if (nofp->nof_rw_dw)
+                               denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+                       else if (nofp->nof_rw_drw)
+                               denyMode = NFS_OPEN_SHARE_DENY_BOTH;
                } else {
                        error = EPERM;
                }
@@ -3164,7 +3204,7 @@ nfs_file_lock_destroy(struct nfs_file_lock *nflp)
                FREE(nflp, M_TEMP);
        } else {
                lck_mtx_lock(&nlop->nlo_lock);
-               bzero(nflp, sizeof(nflp));
+               bzero(nflp, sizeof(*nflp));
                lck_mtx_unlock(&nlop->nlo_lock);
        }
        nfs_lock_owner_rele(nlop);
@@ -3264,7 +3304,7 @@ nfs4_setlock_rpc(
        // PUTFH, GETATTR, LOCK
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 33 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "lock", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "lock", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
@@ -3360,7 +3400,7 @@ nfs4_unlock_rpc(
        // PUTFH, GETATTR, LOCKU
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "unlock", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "unlock", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
@@ -3434,7 +3474,7 @@ nfs4_getlock_rpc(
        // PUTFH, GETATTR, LOCKT
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "locktest", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "locktest", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
@@ -4582,7 +4622,7 @@ nfs4_open_confirm_rpc(
        // PUTFH, OPEN_CONFIRM, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
@@ -4700,7 +4740,7 @@ again:
        // PUTFH, SAVEFH, OPEN(CREATE?), GETATTR(FH), RESTOREFH, GETATTR
        numops = 6;
        nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen);
-       nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
@@ -5040,6 +5080,7 @@ nfs4_claim_delegated_open_rpc(
                MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
                if (!filename) {
                        error = ENOMEM;
+                       nfs_node_unlock(np);
                        goto out;
                }
                snprintf(filename, namelen+1, "%s", name);
@@ -5047,8 +5088,7 @@ nfs4_claim_delegated_open_rpc(
        nfs_node_unlock(np);
 
        if ((error = nfs_open_owner_set_busy(noop, NULL)))
-               return (error);
-
+               goto out;
        NVATTR_INIT(&nvattr);
        delegation = NFS_OPEN_DELEGATE_NONE;
        dstateid = np->n_dstateid;
@@ -5060,7 +5100,7 @@ nfs4_claim_delegated_open_rpc(
        // PUTFH, OPEN, GETATTR(FH)
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, VTONFS(dvp)->n_fhp, VTONFS(dvp)->n_fhsize);
@@ -5266,7 +5306,7 @@ nfs4_open_reclaim_rpc(
        // PUTFH, OPEN, GETATTR(FH)
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -5449,7 +5489,7 @@ nfs4_open_downgrade_rpc(
        // PUTFH, OPEN_DOWNGRADE, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -5519,7 +5559,7 @@ nfs4_close_rpc(
        // PUTFH, CLOSE, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "close", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "close", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -5989,7 +6029,7 @@ nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_st
        // PUTFH, DELEGRETURN
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
@@ -6065,42 +6105,76 @@ restart:
                nfs_open_owner_rele(noop);
                return (error);
        }
-       if (!nofp->nof_access) {
-               /* we don't have the file open, so open it for read access */
-               error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
-               if (error) {
+       /*
+        * Since the read path is a hot path, if we already have
+        * read access, lets go and try and do the read, without
+        * busying the mount and open file node for this open owner.
+        *
+        * N.B. This is inherently racy w.r.t. an execve using
+        * an already open file, in that the read at the end of
+        * this routine will be racing with a potential close.
+        * The code below ultimately has the same problem. In practice
+        * this does not seem to be an issue.
+        */
+       if (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ) {
+               nfs_open_owner_rele(noop);
+               goto do_read;
+       }
+       error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+       if (error) {
+               nfs_open_owner_rele(noop);
+               return (error);
+       }
+       /*
+        * If we don't have a file already open with the access we need (read) then
+        * we need to open one. Otherwise we just co-opt an open. We might not already
+        * have access because we're trying to read the first page of the
+        * file for execve.
+        */
+       error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+       if (error) {
+               nfs_mount_state_in_use_end(nmp, 0);
+               nfs_open_owner_rele(noop);
+               return (error);
+       }
+       if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ)) {
+               /* we don't have the file open, so open it for read access if we're not denied */
+               if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) {
+                       NP(np, "nfs_vnop_read: File already needs close access: 0x%x, cred: %d thread: %lld",
+                          nofp->nof_access, kauth_cred_getuid(nofp->nof_owner->noo_cred), thread_tid(vfs_context_thread(ctx)));
+               }
+               if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) {
+                       nfs_open_file_clear_busy(nofp);
+                       nfs_mount_state_in_use_end(nmp, 0);
                        nfs_open_owner_rele(noop);
-                       return (error);
+                       return (EPERM);
                }
                if (np->n_flag & NREVOKE) {
                        error = EIO;
+                       nfs_open_file_clear_busy(nofp);
                        nfs_mount_state_in_use_end(nmp, 0);
                        nfs_open_owner_rele(noop);
                        return (error);
                }
-               error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
-               if (error)
-                       nofp = NULL;
-               if (!error) {
-                       if (nmp->nm_vers < NFS_VER4) {
-                               /* NFS v2/v3 opens are always allowed - so just add it. */
-                               nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
-                       } else {
-                               error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
-                       }
+               if (nmp->nm_vers < NFS_VER4) {
+                       /* NFS v2/v3 opens are always allowed - so just add it. */
+                       nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
+               } else {
+                       error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
                }
                if (!error)
                        nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
-               if (nofp)
-                       nfs_open_file_clear_busy(nofp);
-               if (nfs_mount_state_in_use_end(nmp, error)) {
-                       nofp = NULL;
-                       goto restart;
-               }
+       }
+       if (nofp)
+               nfs_open_file_clear_busy(nofp);
+       if (nfs_mount_state_in_use_end(nmp, error)) {
+               nofp = NULL;
+               goto restart;
        }
        nfs_open_owner_rele(noop);
        if (error)
                return (error);
+do_read:
        return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
 }
 
@@ -6348,7 +6422,7 @@ nfs4_create_rpc(
        // PUTFH, SAVEFH, CREATE, GETATTR(FH), RESTOREFH, GETATTR
        numops = 6;
        nfsm_chain_build_alloc_init(error, &nmreq, 66 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
+       nfsm_chain_add_compound_header(error, &nmreq, tag, nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
@@ -6606,7 +6680,7 @@ nfs4_vnop_link(
        // PUTFH(SOURCE), SAVEFH, PUTFH(DIR), LINK, GETATTR(DIR), RESTOREFH, GETATTR
        numops = 7;
        nfsm_chain_build_alloc_init(error, &nmreq, 29 * NFSX_UNSIGNED + cnp->cn_namelen);
-       nfsm_chain_add_compound_header(error, &nmreq, "link", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "link", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -6811,7 +6885,7 @@ nfs4_named_attr_dir_get(nfsnode_t np, int fetch, vfs_context_t ctx)
        // PUTFH, OPENATTR, GETATTR
        numops = 3;
        nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "openattr", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "openattr", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
@@ -7141,7 +7215,7 @@ restart:
        if (prefetch)
                numops += 4;    // also sending: SAVEFH, RESTOREFH, NVERIFY, READ
        nfsm_chain_build_alloc_init(error, &nmreq, 64 * NFSX_UNSIGNED + cnp->cn_namelen);
-       nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", nmp->nm_minor_vers, numops);
        if (hadattrdir) {
                numops--;
                nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
index c0715a2b0267b0e962183eeffc569ceea733cda3..926cc0ad7777cea9fc4983a7a181115e7e434771 100644 (file)
@@ -3797,7 +3797,7 @@ again:
                }
        }
 
-       if (req->r_achain.tqe_next == NFSREQNOLIST || req->r_achain.tqe_next == NFSIODCOMPLETING)
+       if (req->r_achain.tqe_next == NFSREQNOLIST)
                TAILQ_INSERT_TAIL(&nmp->nm_iodq, req, r_achain);
 
        /* If this mount doesn't already have an nfsiod working on it... */
@@ -3835,11 +3835,17 @@ nfs_asyncio_resend(struct nfsreq *req)
 
        if (nfs_mount_gone(nmp))
                return;
+
        nfs_gss_clnt_rpcdone(req);
        lck_mtx_lock(&nmp->nm_lock);
        if (!(req->r_flags & R_RESENDQ)) {
                TAILQ_INSERT_TAIL(&nmp->nm_resendq, req, r_rchain);
                req->r_flags |= R_RESENDQ;
+               /*
+                * We take a reference on this request so that it can't be
+                * destroyed while a resend is queued or in progress.
+                */
+               nfs_request_ref(req, 1);
        }
        nfs_mount_sock_thread_wake(nmp);
        lck_mtx_unlock(&nmp->nm_lock);
index 9f98a9a506e5af60a24966ab3ae738b5ca5e9069..3414db3477d5e79bb3603d47b08d82e9f30ac4cc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfs_gss.h>
-
 #include "nfs_gss_crypto.h"
+#include <mach_assert.h>
+#include <kern/assert.h>
+
+#define ASSERT(EX) assert(EX)
 
 #define NFS_GSS_MACH_MAX_RETRIES 3
 
@@ -192,8 +195,9 @@ static int  nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx
 static int     nfs_gss_clnt_ctx_callserver(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 static uint8_t *nfs_gss_clnt_svcname(struct nfsmount *, gssd_nametype *, uint32_t *);
 static int     nfs_gss_clnt_gssd_upcall(struct nfsreq *, struct nfs_gss_clnt_ctx *);
-void   nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *, struct nfsmount *);
+void           nfs_gss_clnt_ctx_neg_cache_reap(struct nfsmount *);
 static void    nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *);
+static int     nfs_gss_clnt_ctx_copy(struct nfs_gss_clnt_ctx *, struct nfs_gss_clnt_ctx **, gss_key_info *);
 static void    nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *);
 static void    nfs_gss_clnt_log_error(struct nfsreq *, struct nfs_gss_clnt_ctx *, uint32_t, uint32_t);
 #endif /* NFSCLIENT */
@@ -276,6 +280,16 @@ nfs_gss_init(void)
 #define kauth_cred_getasid(cred) ((cred)->cr_audit.as_aia_p->ai_asid)
 #define kauth_cred_getauid(cred) ((cred)->cr_audit.as_aia_p->ai_auid)
 
+#define SAFE_CAST_INTTYPE( type, intval ) \
+       ( (type)(intval)/(sizeof(type) < sizeof(intval) ? 0 : 1) )
+
+uid_t
+nfs_cred_getasid2uid(kauth_cred_t cred)
+{
+       uid_t result = SAFE_CAST_INTTYPE(uid_t, kauth_cred_getasid(cred));
+       return (result);
+}
+
 /*
  * Debugging
  */
@@ -285,7 +299,7 @@ nfs_gss_clnt_ctx_dump(struct nfsmount *nmp)
        struct nfs_gss_clnt_ctx *cp;
 
        lck_mtx_lock(&nmp->nm_lock);
-       NFS_GSS_DBG("Enter");
+       NFS_GSS_DBG("Enter\n");
        TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
                lck_mtx_lock(cp->gss_clnt_mtx);
                printf("context %d/%d: refcnt = %d, flags = %x\n",
@@ -294,19 +308,47 @@ nfs_gss_clnt_ctx_dump(struct nfsmount *nmp)
                       cp->gss_clnt_refcnt, cp->gss_clnt_flags);
                lck_mtx_unlock(cp->gss_clnt_mtx);
        }
-       
-       TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) {
-               lck_mtx_lock(cp->gss_clnt_mtx);
-               printf("context %d/%d: refcnt = %d, flags = %x\n",
-                      kauth_cred_getasid(cp->gss_clnt_cred),
-                      kauth_cred_getauid(cp->gss_clnt_cred),
-                      cp->gss_clnt_refcnt, cp->gss_clnt_flags);
-               lck_mtx_unlock(cp->gss_clnt_mtx);
-       }
-       NFS_GSS_DBG("Exit");
+       NFS_GSS_DBG("Exit\n");
        lck_mtx_unlock(&nmp->nm_lock);
 }
 
+static char *
+nfs_gss_clnt_ctx_name(struct nfsmount *nmp, struct nfs_gss_clnt_ctx *cp, char *buf, int len)
+{
+       char *np;
+       int nlen;
+       const char *server = "";
+
+       if (nmp && nmp->nm_mountp)
+               server = vfs_statfs(nmp->nm_mountp)->f_mntfromname;
+
+       if (cp == NULL) {
+               snprintf(buf, len, "[%s] NULL context", server);
+               return (buf);
+       }
+
+       if (cp->gss_clnt_principal && !cp->gss_clnt_display) {
+               np = (char *)cp->gss_clnt_principal;
+               nlen = cp->gss_clnt_prinlen;
+       } else {
+               np = cp->gss_clnt_display;
+               nlen = np ? strlen(cp->gss_clnt_display) : 0;
+       }
+       if (nlen)
+               snprintf(buf, len, "[%s] %.*s %d/%d %s", server, nlen, np,
+                        kauth_cred_getasid(cp->gss_clnt_cred),
+                        kauth_cred_getuid(cp->gss_clnt_cred),
+                        cp->gss_clnt_principal ? "" : "[from default cred] ");
+       else
+               snprintf(buf, len, "[%s] using default %d/%d ", server,
+                        kauth_cred_getasid(cp->gss_clnt_cred),
+                        kauth_cred_getuid(cp->gss_clnt_cred));
+       return (buf);
+}
+
+#define NFS_CTXBUFSZ 80
+#define NFS_GSS_CTX(req, cp) nfs_gss_clnt_ctx_name((req)->r_nmp, cp ? cp : (req)->r_gss_ctx, CTXBUF, sizeof(CTXBUF))
+
 #define NFS_GSS_CLNT_CTX_DUMP(nmp)             \
        do {                  \
                if (NFS_GSS_ISDBG && (NFS_DEBUG_FLAGS & 0x2))   \
@@ -321,23 +363,79 @@ nfs_gss_clnt_ctx_cred_match(kauth_cred_t cred1, kauth_cred_t cred2)
        return (0);
 }
 
-                       
+/*
+ * Busy the mount for each principal set on the mount
+ * so that the automounter will not unmount the file
+ * system underneath us. With out this, if an unmount
+ * occurs the principal that is set for an audit session
+ * will be lost and we may end up with a different identity.
+ *
+ * Note setting principals on the mount is a bad idea. This
+ * really should be handle by KIM (Kerberos Identity Management)
+ * so that defaults can be set by service identities.
+ */
+
+static void
+nfs_gss_clnt_mnt_ref(struct nfsmount *nmp)
+{
+       int error;
+       vnode_t rvp;
+
+       if (nmp == NULL ||
+           !(vfs_flags(nmp->nm_mountp) & MNT_AUTOMOUNTED))
+               return;
+
+       error = VFS_ROOT(nmp->nm_mountp, &rvp, NULL);
+       if (!error) {
+               vnode_ref(rvp);
+               vnode_put(rvp);
+       }
+}
+
+/*
+ * Unbusy the mout. See above comment,
+ */
+
+static void
+nfs_gss_clnt_mnt_rele(struct nfsmount *nmp)
+{
+       int error;
+       vnode_t rvp;
+
+       if (nmp == NULL ||
+           !(vfs_flags(nmp->nm_mountp) & MNT_AUTOMOUNTED))
+               return;
+
+       error = VFS_ROOT(nmp->nm_mountp, &rvp, NULL);
+       if (!error) {
+               vnode_rele(rvp);
+               vnode_put(rvp);
+       }
+}
+
+int nfs_root_steals_ctx = 1;
+
 static int
-nfs_gss_clnt_ctx_find(struct nfsreq *req)
+nfs_gss_clnt_ctx_find_principal(struct nfsreq *req, uint8_t *principal, uint32_t plen, uint32_t nt)
 {
        struct nfsmount *nmp = req->r_nmp;
        struct nfs_gss_clnt_ctx *cp;
+       struct nfsreq treq;
        int error = 0;
        struct timeval now;
-       
+       gss_key_info *ki;
+       char CTXBUF[NFS_CTXBUFSZ];
+
+       bzero(&treq, sizeof (struct nfsreq));
+       treq.r_nmp = nmp;
+
        microuptime(&now);
        lck_mtx_lock(&nmp->nm_lock);
        TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
                lck_mtx_lock(cp->gss_clnt_mtx);
                if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
-                       NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n",
-                                   kauth_cred_getasid(cp->gss_clnt_cred),
-                                   kauth_cred_getauid(cp->gss_clnt_cred),
+                       NFS_GSS_DBG("Found destroyed context %s refcnt = %d continuing\n",
+                                   NFS_GSS_CTX(req, cp),
                                    cp->gss_clnt_refcnt);
                        lck_mtx_unlock(cp->gss_clnt_mtx);
                        continue;
@@ -347,27 +445,88 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
                                TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
                                TAILQ_INSERT_HEAD(&nmp->nm_gsscl, cp, gss_clnt_entries);
                        }
+                       if (principal) {
+                               /*
+                                * If we have a principal, but it does not match the current cred
+                                * mark it for removal
+                                */
+                               if (cp->gss_clnt_prinlen != plen || cp->gss_clnt_prinnt != nt ||
+                                   bcmp(cp->gss_clnt_principal, principal, plen) != 0) {
+                                       cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
+                                       cp->gss_clnt_refcnt++;
+                                       lck_mtx_unlock(cp->gss_clnt_mtx);
+                                       NFS_GSS_DBG("Marking %s for deletion because %s does not match\n",
+                                                   NFS_GSS_CTX(req, cp), principal);
+                                       NFS_GSS_DBG("len = (%d,%d), nt = (%d,%d)\n", cp->gss_clnt_prinlen, plen,
+                                                   cp->gss_clnt_prinnt, nt);
+                                       treq.r_gss_ctx  = cp;
+                                       cp = NULL;
+                                       break;
+                               }
+                       }
                        if (cp->gss_clnt_flags & GSS_CTX_INVAL) {
-                       /* 
-                        * We haven't been moved to the neg cache list
-                        * but we're about to be, finding an entry on 
-                        * the negative cache list will result in an
-                        * NFSERR_EAUTH for GSS_NEG_CACHE_TO so we just
-                        * return that now.
-                        */
-                               lck_mtx_unlock(cp->gss_clnt_mtx);
-                               lck_mtx_unlock(&nmp->nm_lock);
-                               return (NFSERR_EAUTH);
+                               /*
+                                * If we're still being used and we're not expired
+                                * just return and don't bother gssd again. Note if
+                                * gss_clnt_nctime is zero it is about to be set to now.
+                                */
+                               if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec || cp->gss_clnt_nctime == 0) {
+                                       NFS_GSS_DBG("Context %s (refcnt = %d) not expired returning EAUTH nctime = %ld now = %ld\n",
+                                                   NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt, cp->gss_clnt_nctime, now.tv_sec);
+                                       lck_mtx_unlock(cp->gss_clnt_mtx);
+                                       lck_mtx_unlock(&nmp->nm_lock);
+                                       return (NFSERR_EAUTH);
+                               }
+                               if (cp->gss_clnt_refcnt) {
+                                       struct nfs_gss_clnt_ctx *ncp;
+                                       /*
+                                        * If this context has references, we can't use it so we mark if for
+                                        * destruction and create a new context based on this one in the
+                                        * same manner as renewing one.
+                                        */
+                                       cp->gss_clnt_flags |= GSS_CTX_DESTROY;
+                                       NFS_GSS_DBG("Context %s has expired but we still have %d references\n",
+                                                   NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt);
+                                       error = nfs_gss_clnt_ctx_copy(cp, &ncp, NULL);
+                                       lck_mtx_unlock(cp->gss_clnt_mtx);
+                                       if (error) {
+                                               lck_mtx_unlock(&nmp->nm_lock);
+                                               return (error);
+                                       }
+                                       cp = ncp;
+                                       break;
+                               } else {
+                                       /* cp->gss_clnt_kinfo should be NULL here */
+                                       if (cp->gss_clnt_kinfo) {
+                                               FREE(cp->gss_clnt_kinfo, M_TEMP);
+                                               cp->gss_clnt_kinfo = NULL;
+                                       }
+                                       if (cp->gss_clnt_nctime)
+                                               nmp->nm_ncentries--;
+                                       lck_mtx_unlock(cp->gss_clnt_mtx);
+                                       TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
+                                       break;
+                               }
                        }
+                       /* Found a valid context to return */
+                       cp->gss_clnt_refcnt++;
+                       req->r_gss_ctx = cp;
                        lck_mtx_unlock(cp->gss_clnt_mtx);
                        lck_mtx_unlock(&nmp->nm_lock);
-                       nfs_gss_clnt_ctx_ref(req, cp);
                        return (0);
                }
                lck_mtx_unlock(cp->gss_clnt_mtx);
        }
 
-       if (kauth_cred_getuid(req->r_cred) == 0) {
+       MALLOC(ki, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK|M_ZERO);
+       if (ki == NULL) {
+               lck_mtx_unlock(&nmp->nm_lock);
+               return (ENOMEM);
+       }
+
+       if (cp) {
+               cp->gss_clnt_kinfo = ki;
+       } else if (nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) {
                /*
                 * If superuser is trying to get access, then co-opt
                 * the first valid context in the list.
@@ -379,66 +538,17 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
                        if (!(cp->gss_clnt_flags & (GSS_CTX_INVAL|GSS_CTX_DESTROY))) {
                                nfs_gss_clnt_ctx_ref(req, cp);
                                lck_mtx_unlock(&nmp->nm_lock);
-                               NFS_GSS_DBG("Root stole context %d/%d\n",
-                                           kauth_cred_getasid(cp->gss_clnt_cred), kauth_cred_getauid(cp->gss_clnt_cred));
+                               NFS_GSS_DBG("Root stole context %s\n", NFS_GSS_CTX(req, NULL));
                                return (0);
                        }
                }
        }
 
-       /*
-        * Check negative context cache 
-        * If found and the cache has not expired
-        * return NFSERR_EAUTH, else remove
-        * from the cache and try to create a new context 
-        */
-       TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) {
-               lck_mtx_lock(cp->gss_clnt_mtx);
-               if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
-                       NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n",
-                                   kauth_cred_getasid(cp->gss_clnt_cred),
-                                   kauth_cred_getauid(cp->gss_clnt_cred), cp->gss_clnt_refcnt);
-                       lck_mtx_unlock(cp->gss_clnt_mtx);
-                       continue;
-               }
-               if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, req->r_cred)) {
-                       /*
-                        * If we're still being used and invalid or we're not expired 
-                        * just return and don't bother gssd again.
-                        */
-                       if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec) {
-                               NFS_GSS_DBG("Context %d/%d (refcnt = %d) not expired returning EAUTH nctime = %ld now = %ld\n",
-                                           kauth_cred_getasid(cp->gss_clnt_cred),
-                                           kauth_cred_getauid(cp->gss_clnt_cred),
-                                           cp->gss_clnt_refcnt, cp->gss_clnt_nctime, now.tv_sec);
-                               lck_mtx_unlock(cp->gss_clnt_mtx);
-                               lck_mtx_unlock(&nmp->nm_lock);
-                               return (NFSERR_EAUTH);
-                       }
-                       if (cp->gss_clnt_refcnt && (cp->gss_clnt_flags & GSS_CTX_INVAL)) {
-                               NFS_GSS_DBG("Context %d/%d has expired but we still have %d references\n",
-                                           kauth_cred_getasid(cp->gss_clnt_cred),
-                                           kauth_cred_getauid(cp->gss_clnt_cred),
-                                           cp->gss_clnt_refcnt);
-                               lck_mtx_unlock(cp->gss_clnt_mtx);
-                               lck_mtx_unlock(&nmp->nm_lock);
-                               return (NFSERR_EAUTH);
-                       }
-                       TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries);
-                       lck_mtx_unlock(cp->gss_clnt_mtx);
-                       nmp->nm_ncentries--;
-                       break;
-               }
-               lck_mtx_unlock(cp->gss_clnt_mtx);
-       }
-
-
-       NFS_GSS_DBG("Context %d/%d %sfound in Neg Cache @  %ld\n",
-                   kauth_cred_getasid(req->r_cred),
-                   kauth_cred_getauid(req->r_cred),
-                   cp == NULL ? "not " : "",
+       NFS_GSS_DBG("Context %s%sfound in Neg Cache @  %ld\n",
+                   NFS_GSS_CTX(req, cp),
+                   cp == NULL ? " not " : "",
                    cp == NULL ? 0L : cp->gss_clnt_nctime);
-       
+
        /*
         * Not found - create a new context
         */
@@ -449,26 +559,61 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
                        lck_mtx_unlock(&nmp->nm_lock);
                        return (ENOMEM);
                }
+               cp->gss_clnt_kinfo = ki;
                cp->gss_clnt_cred = req->r_cred;
                kauth_cred_ref(cp->gss_clnt_cred);
                cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL);
                cp->gss_clnt_ptime = now.tv_sec - GSS_PRINT_DELAY;
+               if (principal) {
+                       MALLOC(cp->gss_clnt_principal, uint8_t *, plen+1, M_TEMP, M_WAITOK|M_ZERO);
+                       memcpy(cp->gss_clnt_principal, principal, plen);
+                       cp->gss_clnt_prinlen = plen;
+                       cp->gss_clnt_prinnt = nt;
+                       cp->gss_clnt_flags |= GSS_CTX_STICKY;
+                       nfs_gss_clnt_mnt_ref(nmp);
+               }
        } else {
                nfs_gss_clnt_ctx_clean(cp);
+               if (principal) {
+                       /*
+                        * If we have a principal and we found a matching audit
+                        * session, then to get here, the principal had to match.
+                        * In walking the context list if it has a principal
+                        * or the principal is not set then we mark the context
+                        * for destruction and set cp to NULL and we fall to the
+                        * if clause above. If the context still has references
+                        * again we copy the context which will preserve the principal
+                        * and we end up here with the correct principal set.
+                        * If we don't have references the the principal must have
+                        * match and we will fall through here.
+                        */
+                       cp->gss_clnt_flags |= GSS_CTX_STICKY;
+               }
        }
-       
+
        cp->gss_clnt_thread = current_thread();
        nfs_gss_clnt_ctx_ref(req, cp);
        TAILQ_INSERT_HEAD(&nmp->nm_gsscl, cp, gss_clnt_entries);
        lck_mtx_unlock(&nmp->nm_lock);
 
        error = nfs_gss_clnt_ctx_init_retry(req, cp); // Initialize new context
-       if (error) 
+       if (error) {
+               NFS_GSS_DBG("nfs_gss_clnt_ctx_init_retry returned %d for %s\n", error, NFS_GSS_CTX(req, cp));
                nfs_gss_clnt_ctx_unref(req);
+       }
+
+       /* Remove any old matching contex that had a different principal */
+       nfs_gss_clnt_ctx_unref(&treq);
 
        return (error);
 }
 
+static int
+nfs_gss_clnt_ctx_find(struct nfsreq *req)
+{
+       return (nfs_gss_clnt_ctx_find_principal(req, NULL, 0, 0));
+}
+
 /*
  * Inserts an RPCSEC_GSS credential into an RPC header.
  * After the credential is inserted, the code continues
@@ -489,12 +634,13 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args)
        u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)];
        u_char cksum[MAX_DIGEST];
        gss_key_info *ki;
-       
+
        slpflag = (PZERO-1);
        if (req->r_nmp) {
                slpflag |= (NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
                recordmark = (req->r_nmp->nm_sotype == SOCK_STREAM);
        }
+
 retry:
        if (req->r_gss_ctx == NULL) {
                /*
@@ -526,7 +672,7 @@ retry:
        }
        lck_mtx_unlock(cp->gss_clnt_mtx);
 
-       ki = &cp->gss_clnt_kinfo;
+       ki = cp->gss_clnt_kinfo;
        if (cp->gss_clnt_flags & GSS_CTX_COMPLETE) {
                /*
                 * Get a sequence number for this request.
@@ -721,7 +867,7 @@ nfs_gss_clnt_verf_get(
        struct gss_seq *gsp;
        uint32_t reslen, start, cksumlen, toklen;
        int error = 0;
-       gss_key_info *ki = &cp->gss_clnt_kinfo;
+       gss_key_info *ki = cp->gss_clnt_kinfo;
 
        reslen = cksumlen = 0;
        *accepted_statusp = 0;
@@ -939,7 +1085,7 @@ nfsmout:
  * The location and length of the args is marked by two fields
  * in the request structure: r_gss_argoff and r_gss_arglen,
  * which are stashed when the NFS request is built.
- */ 
+ */
 int
 nfs_gss_clnt_args_restore(struct nfsreq *req)
 {
@@ -947,7 +1093,7 @@ nfs_gss_clnt_args_restore(struct nfsreq *req)
        struct nfsm_chain mchain, *nmc = &mchain;
        int len, error = 0;
 
-       if (cp == NULL) 
+       if (cp == NULL)
                return (NFSERR_EAUTH);
 
        if ((cp->gss_clnt_flags & GSS_CTX_COMPLETE) == 0)
@@ -986,7 +1132,7 @@ nfs_gss_clnt_args_restore(struct nfsreq *req)
                 */
                len = req->r_gss_arglen;
                len += len % 8 > 0 ? 4 : 8;                     // add DES padding length
-               nfs_gss_encrypt_chain(&cp->gss_clnt_kinfo, nmc,
+               nfs_gss_encrypt_chain(cp->gss_clnt_kinfo, nmc,
                                        req->r_gss_argoff, len, DES_DECRYPT);
                nfsm_chain_adv(error, nmc, req->r_gss_arglen);
                if (error)
@@ -1017,10 +1163,9 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
        int server_complete = 0;
        u_char cksum1[MAX_DIGEST], cksum2[MAX_DIGEST];
        int error = 0;
-       gss_key_info *ki = &cp->gss_clnt_kinfo;
+       gss_key_info *ki = cp->gss_clnt_kinfo;
 
        /* Initialize a new client context */
-       
 
        if (cp->gss_clnt_svcname == NULL) {
                cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp, &cp->gss_clnt_svcnt, &cp->gss_clnt_svcnamlen);
@@ -1131,12 +1276,12 @@ retry:
        if (cp->gss_clnt_seqbits == NULL)
                error = NFSERR_EAUTH;
 nfsmout:
-       /*
+       /*
         * If the error is ENEEDAUTH we're not done, so no need
         * to wake up other threads again. This thread will retry in
         * the find or renew routines.
         */
-       if (error == ENEEDAUTH) 
+       if (error == ENEEDAUTH)
                return (error);
 
        /*
@@ -1407,6 +1552,7 @@ nfs_gss_clnt_get_upcall_port(kauth_cred_t credp)
        kr = mach_gss_lookup(gssd_host_port, uid, asid, &uc_port);
        if (kr != KERN_SUCCESS)
                printf("nfs_gss_clnt_get_upcall_port: mach_gssd_lookup failed: status %x (%d)\n", kr, kr);
+       host_release_special_port(gssd_host_port);
 
        return (uc_port);
 }
@@ -1628,13 +1774,13 @@ skip:
                        goto out;
                }
                error = nfs_gss_mach_vmcopyout((vm_map_copy_t) okey, skeylen, 
-                               cp->gss_clnt_kinfo.skey);
+                               cp->gss_clnt_kinfo->skey);
                if (error) {
                        vm_map_copy_discard((vm_map_copy_t) otoken);
                        goto out;
                }
                
-               error = gss_key_init(&cp->gss_clnt_kinfo, skeylen);
+               error = gss_key_init(cp->gss_clnt_kinfo, skeylen);
                if (error)
                        goto out;
        }
@@ -1752,9 +1898,11 @@ nfs_gss_clnt_ctx_unref(struct nfsreq *req)
 {
        struct nfsmount *nmp = req->r_nmp;
        struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx;
-       int neg_cache = 0;
        int on_neg_cache = 0;
+       int neg_cache = 0;
        int destroy = 0;
+       struct timeval now;
+       char CTXBUF[NFS_CTXBUFSZ];
 
        if (cp == NULL)
                return;
@@ -1765,84 +1913,94 @@ nfs_gss_clnt_ctx_unref(struct nfsreq *req)
        if (--cp->gss_clnt_refcnt < 0)
                panic("Over release of gss context!\n");
 
-       if (cp->gss_clnt_refcnt == 0 && (cp->gss_clnt_flags & GSS_CTX_DESTROY)) {
-               destroy = 1;
-               if (cp->gss_clnt_flags & GSS_CTX_NC)
-                       on_neg_cache = 1;
-       } else if ((cp->gss_clnt_flags & (GSS_CTX_INVAL | GSS_CTX_NC)) == GSS_CTX_INVAL) {
+       if (cp->gss_clnt_refcnt == 0) {
+               if ((cp->gss_clnt_flags & GSS_CTX_INVAL) &&
+                   cp->gss_clnt_kinfo) {
+                       FREE(cp->gss_clnt_kinfo, M_TEMP);
+                       cp->gss_clnt_kinfo = NULL;
+               }
+               if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
+                       destroy = 1;
+                       if (cp->gss_clnt_flags & GSS_CTX_STICKY)
+                               nfs_gss_clnt_mnt_rele(nmp);
+                       if (cp->gss_clnt_nctime)
+                               on_neg_cache = 1;
+               }
+       }
+       if (!destroy && cp->gss_clnt_nctime == 0 &&
+           (cp->gss_clnt_flags & GSS_CTX_INVAL)) {
+               microuptime(&now);
+               cp->gss_clnt_nctime = now.tv_sec;
                neg_cache = 1;
        }
        lck_mtx_unlock(cp->gss_clnt_mtx);
        if (destroy) {
+               NFS_GSS_DBG("Destroying context %s\n", NFS_GSS_CTX(req, cp));
                if (nmp) {
                        lck_mtx_lock(&nmp->nm_lock);
                        if (cp->gss_clnt_entries.tqe_next != NFSNOLIST) {
-                               if (on_neg_cache)
-                                       TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries);
-                               else
-                                       TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
+                               TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
+                       }
+                       if (on_neg_cache) {
+                               nmp->nm_ncentries--;
                        }
                        lck_mtx_unlock(&nmp->nm_lock);
                }
                nfs_gss_clnt_ctx_destroy(cp);
-       } else if (neg_cache)
-               nfs_gss_clnt_ctx_neg_cache_enter(cp, nmp);
+       } else if (neg_cache) {
+               NFS_GSS_DBG("Entering context %s into negative cache\n", NFS_GSS_CTX(req, cp));
+               if (nmp) {
+                       lck_mtx_lock(&nmp->nm_lock);
+                       nmp->nm_ncentries++;
+                       nfs_gss_clnt_ctx_neg_cache_reap(nmp);
+                       lck_mtx_unlock(&nmp->nm_lock);
+               }
+       }
        NFS_GSS_CLNT_CTX_DUMP(nmp);
 }
 
 /*
- * Enter the gss context associated with req on to the neg context
+ * Try and reap any old negative cache entries.
  * cache queue.
  */
 void
-nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *cp, struct nfsmount *nmp)
+nfs_gss_clnt_ctx_neg_cache_reap(struct nfsmount *nmp)
 {
-       struct nfs_gss_clnt_ctx *nccp, *tcp;
+       struct nfs_gss_clnt_ctx *cp, *tcp;
        struct timeval now;
        int reaped = 0;
 
-       if (nmp == NULL)
-               return;
-       
-       microuptime(&now);
-       lck_mtx_lock(&nmp->nm_lock);
-
-       lck_mtx_lock(cp->gss_clnt_mtx);
-       if (cp->gss_clnt_entries.tqe_next != NFSNOLIST)
-               TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
-
-       cp->gss_clnt_flags |= GSS_CTX_NC;
-       cp->gss_clnt_nctime = now.tv_sec;
-       lck_mtx_unlock(cp->gss_clnt_mtx);
-
-       TAILQ_INSERT_TAIL(&nmp->nm_gssnccl, cp, gss_clnt_entries);
-       nmp->nm_ncentries++;
-
        NFS_GSS_DBG("Reaping contexts ncentries = %d\n", nmp->nm_ncentries);
        /* Try and reap old, unreferenced, expired contexts */
-       TAILQ_FOREACH_SAFE(nccp, &nmp->nm_gssnccl, gss_clnt_entries, tcp) {
+
+       TAILQ_FOREACH_SAFE(cp, &nmp->nm_gsscl, gss_clnt_entries, tcp) {
                int destroy = 0;
 
+               /* Don't reap STICKY contexts */
+               if ((cp->gss_clnt_flags & GSS_CTX_STICKY) ||
+                   !(cp->gss_clnt_flags & GSS_CTX_INVAL))
+                       continue;
                /* Keep up to GSS_MAX_NEG_CACHE_ENTRIES */
                if (nmp->nm_ncentries <= GSS_MAX_NEG_CACHE_ENTRIES)
                        break;
-               /* Contexts to young */
-               if (nccp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec)
-                       break;
+               /* Contexts too young */
+               if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec)
+                       continue;
                /* Not referenced, remove it. */
-               lck_mtx_lock(nccp->gss_clnt_mtx);
-               if (nccp->gss_clnt_refcnt == 0) {
-                       TAILQ_REMOVE(&nmp->nm_gssnccl, nccp, gss_clnt_entries);
-                       reaped++;
+               lck_mtx_lock(cp->gss_clnt_mtx);
+               if (cp->gss_clnt_refcnt == 0) {
+                       cp->gss_clnt_flags |= GSS_CTX_DESTROY;
                        destroy = 1;
                }
-               lck_mtx_unlock(nccp->gss_clnt_mtx);
-               if (destroy)
-                       nfs_gss_clnt_ctx_destroy(nccp);
-               nmp->nm_ncentries--;
+               lck_mtx_unlock(cp->gss_clnt_mtx);
+               if (destroy) {
+                       TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
+                       nmp->nm_ncentries++;
+                       reaped++;
+                       nfs_gss_clnt_ctx_destroy(cp);
+               }
        }
        NFS_GSS_DBG("Reaped %d contexts ncentries = %d\n", reaped, nmp->nm_ncentries);
-       lck_mtx_unlock(&nmp->nm_lock);
 }
 
 /*
@@ -1851,26 +2009,116 @@ nfs_gss_clnt_ctx_neg_cache_enter(struct nfs_gss_clnt_ctx *cp, struct nfsmount *n
 static void
 nfs_gss_clnt_ctx_clean(struct nfs_gss_clnt_ctx *cp)
 {
+       /* Preserve gss_clnt_mtx */
+       assert(cp->gss_clnt_thread == NULL);  /* Will be set to this thread */
+       /* gss_clnt_entries  we should not be on any list at this point */
        cp->gss_clnt_flags = 0;
+       /* gss_clnt_refcnt should be zero */
+       assert(cp->gss_clnt_refcnt == 0);
+       /*
+        * We are who we are preserve:
+        * gss_clnt_cred
+        * gss_clnt_principal
+        * gss_clnt_prinlen
+        * gss_clnt_prinnt
+        * gss_clnt_desplay
+        */
+       /* gss_clnt_proc will be set in nfs_gss_clnt_ctx_init */
+       cp->gss_clnt_seqnum = 0;
+       /* Preserve gss_clnt_service, we're not changing flavors */
        if (cp->gss_clnt_handle) {
                FREE(cp->gss_clnt_handle, M_TEMP);
                cp->gss_clnt_handle = NULL;
        }
+       cp->gss_clnt_handle_len = 0;
+       cp->gss_clnt_nctime = 0;
+       cp->gss_clnt_seqwin = 0;
        if (cp->gss_clnt_seqbits) {
                FREE(cp->gss_clnt_seqbits, M_TEMP);
                cp->gss_clnt_seqbits = NULL;
        }
-       if (cp->gss_clnt_token) {
-               FREE(cp->gss_clnt_token, M_TEMP);
-               cp->gss_clnt_token = NULL;
+       /* Preserve gss_clnt_mport. Still talking to the same gssd */
+       if (cp->gss_clnt_verf) {
+               FREE(cp->gss_clnt_verf, M_TEMP);
+               cp->gss_clnt_verf = NULL;
        }
+       /* Service name might change on failover, so reset it */
        if (cp->gss_clnt_svcname) {
                FREE(cp->gss_clnt_svcname, M_TEMP);
                cp->gss_clnt_svcname = NULL;
+               cp->gss_clnt_svcnt = 0;
        }
-       cp->gss_clnt_flags = 0;
-       cp->gss_clnt_seqwin = 0;
-       cp->gss_clnt_seqnum = 0;
+       cp->gss_clnt_svcnamlen = 0;
+       cp->gss_clnt_cred_handle = 0;
+       cp->gss_clnt_context = 0;
+       if (cp->gss_clnt_token) {
+               FREE(cp->gss_clnt_token, M_TEMP);
+               cp->gss_clnt_token = NULL;
+       }
+       cp->gss_clnt_tokenlen = 0;
+       if (cp->gss_clnt_kinfo)
+               bzero(cp->gss_clnt_kinfo, sizeof(gss_key_info));
+       /*
+        * Preserve:
+        * gss_clnt_gssd_flags
+        * gss_clnt_major
+        * gss_clnt_minor
+        * gss_clnt_ptime
+        */
+}
+
+/*
+ * Copy a source context to a new context. This is used to create a new context
+ * with the identity of the old context for renewal. The old context is invalid
+ * at this point but may have reference still to it, so it is not safe to use that
+ * context.
+ */
+static int
+nfs_gss_clnt_ctx_copy(struct nfs_gss_clnt_ctx *scp, struct nfs_gss_clnt_ctx **dcpp, gss_key_info *ki)
+{
+       struct nfs_gss_clnt_ctx *dcp;
+
+       *dcpp = (struct nfs_gss_clnt_ctx *)NULL;
+       MALLOC(dcp, struct nfs_gss_clnt_ctx *, sizeof (struct nfs_gss_clnt_ctx), M_TEMP, M_WAITOK);
+       if (dcp == NULL)
+               return (ENOMEM);
+       bzero(dcp, sizeof (struct nfs_gss_clnt_ctx));
+       if (ki == NULL) {
+               MALLOC(dcp->gss_clnt_kinfo, gss_key_info *, sizeof (gss_key_info), M_TEMP, M_WAITOK);
+               if (dcp->gss_clnt_kinfo == NULL) {
+                       FREE(dcp, M_TEMP);
+                       return (ENOMEM);
+               }
+       } else {
+               dcp->gss_clnt_kinfo = ki;
+       }
+       bzero(dcp->gss_clnt_kinfo, sizeof (gss_key_info));
+       dcp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL);
+       dcp->gss_clnt_cred = scp->gss_clnt_cred;
+       kauth_cred_ref(dcp->gss_clnt_cred);
+       dcp->gss_clnt_prinlen = scp->gss_clnt_prinlen;
+       dcp->gss_clnt_prinnt = scp->gss_clnt_prinnt;
+       if (scp->gss_clnt_principal) {
+               MALLOC(dcp->gss_clnt_principal, uint8_t *, dcp->gss_clnt_prinlen, M_TEMP, M_WAITOK | M_ZERO);
+               if (dcp->gss_clnt_principal == NULL) {
+                       FREE(dcp->gss_clnt_kinfo, M_TEMP);
+                       FREE(dcp, M_TEMP);
+                       return (ENOMEM);
+               }
+               bcopy(scp->gss_clnt_principal, dcp->gss_clnt_principal, dcp->gss_clnt_prinlen);
+       }
+       /* Note we don't preserve the display name, that will be set by a successful up call */
+       dcp->gss_clnt_service = scp->gss_clnt_service;
+       dcp->gss_clnt_mport = host_copy_special_port(scp->gss_clnt_mport);
+       /*  gss_clnt_kinfo allocated above */
+       dcp->gss_clnt_gssd_flags = scp->gss_clnt_gssd_flags;
+       dcp->gss_clnt_major = scp->gss_clnt_major;
+       dcp->gss_clnt_minor = scp->gss_clnt_minor;
+       dcp->gss_clnt_ptime = scp->gss_clnt_ptime;
+
+       *dcpp = dcp;
+
+       return (0);
 }
 
 /*
@@ -1885,7 +2133,7 @@ nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *cp)
 
        host_release_special_port(cp->gss_clnt_mport);
        cp->gss_clnt_mport = IPC_PORT_NULL;
-       
+
        if (cp->gss_clnt_mtx) {
                lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp);
                cp->gss_clnt_mtx = (lck_mtx_t *)NULL;
@@ -1902,9 +2150,13 @@ nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *cp)
                FREE(cp->gss_clnt_display, M_TEMP);
                cp->gss_clnt_display = NULL;
        }
-       
+       if (cp->gss_clnt_kinfo) {
+               FREE(cp->gss_clnt_kinfo, M_TEMP);
+               cp->gss_clnt_kinfo = NULL;
+       }
+
        nfs_gss_clnt_ctx_clean(cp);
-       
+
        FREE(cp, M_TEMP);
 }
 
@@ -1917,15 +2169,18 @@ int
 nfs_gss_clnt_ctx_renew(struct nfsreq *req)
 {
        struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx;
-       struct nfsmount *nmp = req->r_nmp;
-       struct nfs_gss_clnt_ctx tmp;
        struct nfs_gss_clnt_ctx *ncp;
-
+       struct nfsmount *nmp;
        int error = 0;
+       char CTXBUF[NFS_CTXBUFSZ];
 
        if (cp == NULL)
                return (0);
 
+       if (req->r_nmp == NULL)
+               return (ENXIO);
+       nmp = req->r_nmp;
+
        lck_mtx_lock(cp->gss_clnt_mtx);
        if (cp->gss_clnt_flags & GSS_CTX_INVAL) {
                lck_mtx_unlock(cp->gss_clnt_mtx);
@@ -1933,59 +2188,34 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req)
                return (0);     // already being renewed
        }
 
-       bzero(&tmp, sizeof(tmp));
-       tmp.gss_clnt_cred = cp->gss_clnt_cred;
-       kauth_cred_ref(tmp.gss_clnt_cred);
-       tmp.gss_clnt_mport = host_copy_special_port(cp->gss_clnt_mport);
-       tmp.gss_clnt_principal = cp->gss_clnt_principal;
-       cp->gss_clnt_principal = NULL;
-       tmp.gss_clnt_prinlen = cp->gss_clnt_prinlen;
-       tmp.gss_clnt_prinnt = cp->gss_clnt_prinnt;
-       tmp.gss_clnt_major = cp->gss_clnt_major;
-       tmp.gss_clnt_minor = cp->gss_clnt_minor;
-       tmp.gss_clnt_ptime = cp->gss_clnt_ptime;
-
-       NFS_GSS_DBG("Renewing context %d/%d\n",
-                   kauth_cred_getasid(tmp.gss_clnt_cred),
-                   kauth_cred_getauid(tmp.gss_clnt_cred));
        cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
 
-       /*
-        * If there's a thread waiting
-        * in the old context, wake it up.
-        */
        if (cp->gss_clnt_flags & (GSS_NEEDCTX | GSS_NEEDSEQ)) {
                cp->gss_clnt_flags &= ~GSS_NEEDSEQ;
                wakeup(cp);
        }
        lck_mtx_unlock(cp->gss_clnt_mtx);
 
+       error =  nfs_gss_clnt_ctx_copy(cp, &ncp, NULL);
+       NFS_GSS_DBG("Renewing context %s\n", NFS_GSS_CTX(req, ncp));
+       nfs_gss_clnt_ctx_unref(req);
+       if (error)
+               return (error);
+
+       lck_mtx_lock(&nmp->nm_lock);
        /*
-        * Create a new context
+        * Note we don't bother taking the new context mutex as we're
+        * not findable at the moment.
         */
-       MALLOC(ncp, struct nfs_gss_clnt_ctx *, sizeof(*ncp),
-               M_TEMP, M_WAITOK|M_ZERO);
-       if (ncp == NULL) {
-               error = ENOMEM;
-               goto out;
-       }
-
-       *ncp = tmp;
-       ncp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL);
        ncp->gss_clnt_thread = current_thread();
-
-       lck_mtx_lock(&nmp->nm_lock);
-       TAILQ_INSERT_TAIL(&nmp->nm_gsscl, ncp, gss_clnt_entries);
-       lck_mtx_unlock(&nmp->nm_lock);
-
-       /* Adjust reference counts to new and old context */
-       nfs_gss_clnt_ctx_unref(req);
        nfs_gss_clnt_ctx_ref(req, ncp);
+       TAILQ_INSERT_HEAD(&nmp->nm_gsscl, ncp, gss_clnt_entries);
+       lck_mtx_unlock(&nmp->nm_lock);
 
-       error = nfs_gss_clnt_ctx_init_retry(req, ncp);
-out:
+       error = nfs_gss_clnt_ctx_init_retry(req, ncp); // Initialize new context
        if (error)
                nfs_gss_clnt_ctx_unref(req);
+
        return (error);
 }
 
@@ -2006,26 +2236,27 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp)
        if (!nmp)
                return;
 
-       for (;;) {
-               lck_mtx_lock(&nmp->nm_lock);
-               cp = TAILQ_FIRST(&nmp->nm_gsscl);
-               if (cp == NULL) {
-                       lck_mtx_unlock(&nmp->nm_lock);
-                       goto remove_neg_cache;
-               }
-               
+
+       lck_mtx_lock(&nmp->nm_lock);
+       while((cp = TAILQ_FIRST(&nmp->nm_gsscl))) {
+               TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
+               cp->gss_clnt_entries.tqe_next = NFSNOLIST;
                lck_mtx_lock(cp->gss_clnt_mtx);
+               if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
+                       lck_mtx_unlock(cp->gss_clnt_mtx);
+                       continue;
+               }
                cp->gss_clnt_refcnt++;
                lck_mtx_unlock(cp->gss_clnt_mtx);
                req.r_gss_ctx = cp;
 
                lck_mtx_unlock(&nmp->nm_lock);
-
                /*
                 * Tell the server to destroy its context.
                 * But don't bother if it's a forced unmount.
                 */
-               if (!nfs_mount_gone(nmp)) {
+               if (!nfs_mount_gone(nmp) &&
+                   (cp->gss_clnt_flags & (GSS_CTX_INVAL | GSS_CTX_DESTROY | GSS_CTX_COMPLETE)) == GSS_CTX_COMPLETE) {
                        cp->gss_clnt_proc = RPCSEC_GSS_DESTROY;
 
                        error = 0;
@@ -2049,37 +2280,13 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp)
                cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
                lck_mtx_unlock(cp->gss_clnt_mtx);
                nfs_gss_clnt_ctx_unref(&req);
-       }
-
-       /* Now all the remaining contexts should be on the negative cache list */
-remove_neg_cache:
-       for (;;) {
                lck_mtx_lock(&nmp->nm_lock);
-               cp = TAILQ_FIRST(&nmp->nm_gssnccl);
-               if (cp == NULL) {
-                       lck_mtx_unlock(&nmp->nm_lock);
-                       return;
-               }
-               req.r_gss_ctx = cp;
-               TAILQ_REMOVE(&nmp->nm_gssnccl, cp, gss_clnt_entries);
-               cp->gss_clnt_entries.tqe_next = NFSNOLIST;
-               
-               lck_mtx_lock(cp->gss_clnt_mtx);
-               if (cp->gss_clnt_refcnt)
-                       NFS_GSS_DBG("Context %d/%d found with %d references\n",
-                                   kauth_cred_getasid(cp->gss_clnt_cred),
-                                   kauth_cred_getauid(cp->gss_clnt_cred),
-                                   cp->gss_clnt_refcnt);
-               cp->gss_clnt_refcnt++;
-               cp->gss_clnt_flags |= GSS_CTX_DESTROY;
-               lck_mtx_unlock(cp->gss_clnt_mtx);
-               lck_mtx_unlock(&nmp->nm_lock);
-
-               nfs_gss_clnt_ctx_unref(&req);
        }
-       NFS_GSS_CLNT_CTX_DUMP(nmp);
+       lck_mtx_unlock(&nmp->nm_lock);
+       assert(TAILQ_EMPTY(&nmp->nm_gsscl));
 }
 
+
 /*
  * Removes a mounts context for a credential
  */
@@ -2124,43 +2331,115 @@ nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, kauth_cred_t cred)
                lck_mtx_unlock(cp->gss_clnt_mtx);
        }
 
-       TAILQ_FOREACH(cp, &nmp->nm_gssnccl, gss_clnt_entries) {
+       lck_mtx_unlock(&nmp->nm_lock);
+       
+       NFS_GSS_DBG("Returning ENOENT\n");
+       return (ENOENT);
+}
+
+/*
+ * Sets a mounts principal for a session associated with cred.
+ */
+int
+nfs_gss_clnt_ctx_set_principal(struct nfsmount *nmp, vfs_context_t ctx,
+                              uint8_t *principal, uint32_t princlen, uint32_t nametype)
+
+{
+       struct nfsreq req;
+       int error;
+
+       NFS_GSS_DBG("Enter:\n");
+
+       bzero(&req, sizeof(struct nfsreq));
+       req.r_nmp = nmp;
+       req.r_gss_ctx = NULL;
+       req.r_auth = nmp->nm_auth;
+       req.r_thread = vfs_context_thread(ctx);
+       req.r_cred = vfs_context_ucred(ctx);
+
+       error = nfs_gss_clnt_ctx_find_principal(&req, principal, princlen, nametype);
+       NFS_GSS_DBG("nfs_gss_clnt_ctx_find_principal returned %d\n", error);
+       /*
+        * We don't care about auth errors. Those would indicate that the context is in the
+        * neagative cache and if and when the user has credentials for the principal
+        * we should be good to go in that we will select those credentials for this principal.
+        */
+       if (error == EACCES || error == EAUTH || error == ENEEDAUTH)
+               error = 0;
+
+       /* We're done with this request */
+       nfs_gss_clnt_ctx_unref(&req);
+
+       return (error);
+}
+
+/*
+ * Gets a mounts principal from a session associated with cred
+ */
+int
+nfs_gss_clnt_ctx_get_principal(struct nfsmount *nmp, vfs_context_t ctx,
+                              struct user_nfs_gss_principal *p)
+{
+       struct nfsreq req;
+       int error = 0;
+       struct nfs_gss_clnt_ctx *cp;
+       kauth_cred_t cred = vfs_context_ucred(ctx);
+       const char *princ;
+       char CTXBUF[NFS_CTXBUFSZ];
+
+       req.r_nmp = nmp;
+       lck_mtx_lock(&nmp->nm_lock);
+       TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
                lck_mtx_lock(cp->gss_clnt_mtx);
+               if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
+                       NFS_GSS_DBG("Found destroyed context %s refcnt = %d continuing\n",
+                                   NFS_GSS_CTX(&req, cp),
+                                   cp->gss_clnt_refcnt);
+                       lck_mtx_unlock(cp->gss_clnt_mtx);
+                       continue;
+               }
                if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) {
-                       if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
-                               NFS_GSS_DBG("Found destroyed context %d/%d refcnt = %d continuing\n",
-                                           kauth_cred_getasid(cp->gss_clnt_cred),
-                                           kauth_cred_getuid(cp->gss_clnt_cred),
-                                           cp->gss_clnt_refcnt);
-                               lck_mtx_unlock(cp->gss_clnt_mtx);
-                               continue;
-                       }
                        cp->gss_clnt_refcnt++;
-                       cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
                        lck_mtx_unlock(cp->gss_clnt_mtx);
-                       req.r_gss_ctx = cp;
-                       lck_mtx_unlock(&nmp->nm_lock);
-                       /*
-                        * Drop the reference to remove it if its
-                        * refcount is zero.
-                        */
-                       NFS_GSS_DBG("Removed context from neg cache %d/%d refcnt = %d\n",
-                                   kauth_cred_getasid(cp->gss_clnt_cred),
-                                   kauth_cred_getuid(cp->gss_clnt_cred),
-                                   cp->gss_clnt_refcnt);
-                       nfs_gss_clnt_ctx_unref(&req);
-                       return (0);
+                       goto out;
                }
                lck_mtx_unlock(cp->gss_clnt_mtx);
        }
 
-       lck_mtx_unlock(&nmp->nm_lock);
-       
-       NFS_GSS_DBG("Returning ENOENT\n");
-       return (ENOENT);
-}
+out:
+       if (cp == NULL) {
+               lck_mtx_unlock(&nmp->nm_lock);
+               p->princlen = 0;
+               p->principal = USER_ADDR_NULL;
+               p->nametype = GSSD_STRING_NAME;
+               p->flags |= NFS_IOC_NO_CRED_FLAG;
+               NFS_GSS_DBG("No context found for session %d by uid %d\n",
+                           kauth_cred_getasid(cred), kauth_cred_getuid(cred));
+               return (0);
+       }
 
+       princ = cp->gss_clnt_principal ? (char *)cp->gss_clnt_principal : cp->gss_clnt_display;
+       p->princlen = cp->gss_clnt_principal ? cp->gss_clnt_prinlen :
+               (cp->gss_clnt_display ? strlen(cp->gss_clnt_display) : 0);
+       p->nametype = cp->gss_clnt_prinnt;
+       if (princ) {
+               char *pp;
+
+               MALLOC(pp, char *, p->princlen, M_TEMP, M_WAITOK);
+               if (pp) {
+                       bcopy(princ, pp, p->princlen);
+                       p->principal = CAST_USER_ADDR_T(pp);
+               }
+               else
+                       error = ENOMEM;
+       }
+       lck_mtx_unlock(&nmp->nm_lock);
 
+       req.r_gss_ctx = cp;
+       NFS_GSS_DBG("Found context %s\n", NFS_GSS_CTX(&req, NULL));
+       nfs_gss_clnt_ctx_unref(&req);
+       return (error);
+}
 #endif /* NFSCLIENT */
 
 /*************
@@ -3243,7 +3522,7 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr)
 
        tbuflen = vm_map_round_page(buflen,
                                    vm_map_page_mask(ipc_kernel_map));
-       kr = vm_allocate(ipc_kernel_map, &kmem_buf, tbuflen, VM_FLAGS_ANYWHERE);
+       kr = vm_allocate(ipc_kernel_map, &kmem_buf, tbuflen, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_FILE));
        if (kr != 0) {
                printf("nfs_gss_mach_alloc_buffer: vm_allocate failed\n");
                return;
@@ -3254,7 +3533,7 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr)
                                           vm_map_page_mask(ipc_kernel_map)),
                         vm_map_round_page(kmem_buf + tbuflen,
                                           vm_map_page_mask(ipc_kernel_map)),
-               VM_PROT_READ|VM_PROT_WRITE, FALSE);
+               VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE), FALSE);
        if (kr != 0) {
                printf("nfs_gss_mach_alloc_buffer: vm_map_wire failed\n");
                return;
index 1588eba028cf40583fbeb6ef001eee39730010f0..e590eb1bf853e8647681ba2c1178390b98af5c60 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -154,7 +154,7 @@ struct nfs_gss_clnt_ctx {
        gssd_ctx                gss_clnt_context;       // Opaque context handle from gssd
        uint8_t                 *gss_clnt_token;        // GSS token exchanged via gssd & server
        uint32_t                gss_clnt_tokenlen;      // Length of token
-       gss_key_info            gss_clnt_kinfo;         // GSS key info
+       gss_key_info            *gss_clnt_kinfo;                // GSS key info
        uint32_t                gss_clnt_gssd_flags;    // Special flag bits to gssd
        uint32_t                gss_clnt_major;         // GSS major result from gssd or server
        uint32_t                gss_clnt_minor;         // GSS minor result from gssd or server
@@ -166,11 +166,10 @@ struct nfs_gss_clnt_ctx {
  */
 #define GSS_CTX_COMPLETE       0x00000001      // Context is complete
 #define GSS_CTX_INVAL          0x00000002      // Context is invalid
-#define GSS_CTX_INCOMPLETE     0x00000004      // Context needs to be inited
+#define GSS_CTX_STICKY         0x00000004      // Context has been set by user
 #define GSS_NEEDSEQ            0x00000008      // Need a sequence number
 #define GSS_NEEDCTX            0x00000010      // Need the context
-#define GSS_CTX_NC             0x00000020      // Context is in negative cache
-#define GSS_CTX_DESTROY                0x00000040      // Context is being destroyed, don't cache
+#define GSS_CTX_DESTROY                0x00000020      // Context is being destroyed, don't cache
 
 /*
  * The server's RPCSEC_GSS context information
@@ -217,9 +216,15 @@ LIST_HEAD(nfs_gss_svc_ctx_hashhead, nfs_gss_svc_ctx);
 #define GSS_TIMER_PERIOD       300             // seconds
 #define MSECS_PER_SEC          1000
 
+#define auth_is_kerberized(auth) \
+       (auth == RPCAUTH_KRB5 || \
+        auth == RPCAUTH_KRB5I || \
+        auth == RPCAUTH_KRB5P)
+
 __BEGIN_DECLS
 
 void   nfs_gss_init(void);
+uid_t  nfs_cred_getasid2uid(kauth_cred_t);
 int    nfs_gss_clnt_cred_put(struct nfsreq *, struct nfsm_chain *, mbuf_t);
 int    nfs_gss_clnt_verf_get(struct nfsreq *, struct nfsm_chain *,
                uint32_t, uint32_t, uint32_t *);
@@ -229,7 +234,9 @@ int nfs_gss_clnt_ctx_renew(struct nfsreq *);
 void   nfs_gss_clnt_ctx_ref(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 void   nfs_gss_clnt_ctx_unref(struct nfsreq *);
 void   nfs_gss_clnt_ctx_unmount(struct nfsmount *);
-int    nfs_gss_clnt_ctx_remove(struct nfsmount *, kauth_cred_t cred);
+int    nfs_gss_clnt_ctx_remove(struct nfsmount *, kauth_cred_t);
+int    nfs_gss_clnt_ctx_set_principal(struct nfsmount *, vfs_context_t, uint8_t *, uint32_t, uint32_t);
+int    nfs_gss_clnt_ctx_get_principal(struct nfsmount *, vfs_context_t, struct user_nfs_gss_principal *);
 int    nfs_gss_svc_cred_get(struct nfsrv_descript *, struct nfsm_chain *);
 int    nfs_gss_svc_verf_put(struct nfsrv_descript *, struct nfsm_chain *);
 int    nfs_gss_svc_ctx_init(struct nfsrv_descript *, struct nfsrv_sock *, mbuf_t *);
index 31e27f56d158a67c538ead21b1f0ac4087ab16ed..9b2cbb5ff61fa5d909db8ff5665d21b237cfd12c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2012,2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  */
 #define NFS_IOC_DESTROY_CRED           _IO('n', 1)
 #define NFS_FSCTL_DESTROY_CRED         IOCBASECMD(NFS_IOC_DESTROY_CRED)
+/*
+ * fsclt (vnop_ioctl) to set the callers credentials associated with the vnode's mount
+ */
+
+
+struct nfs_gss_principal
+{
+       uint32_t        princlen;       /* length of data */
+       uint32_t        nametype;       /* nametype of data */
+#ifdef KERNEL
+       user32_addr_t   principal;      /* principal data in userspace */
+#else
+       uint8_t         *principal;
+#endif
+       uint32_t        flags;          /* Return flags */
+};
+
+#ifdef KERNEL
+/* LP64 version of nfs_gss_principal */
+struct user_nfs_gss_principal
+{
+       uint32_t        princlen;       /* length of data */
+       uint32_t        nametype;       /* nametype of data */
+       user_addr_t     principal;      /* principal data in userspace */
+       uint32_t        flags;          /* Returned flags */
+};
+#endif
+
+/* If no credential was found returned NFS_IOC_NO_CRED_FLAG in the flags field. */
+#define NFS_IOC_NO_CRED_FLAG           1       /* No credential was found */
+
+#define NFS_IOC_SET_CRED               _IOW('n', 2, struct nfs_gss_principal)
+#define NFS_FSCTL_SET_CRED             IOCBASECMD(NFS_IOC_SET_CRED)
 
+#define NFS_IOC_GET_CRED               _IOWR('n', 3, struct nfs_gss_principal)
+#define NFS_FSCTL_GET_CRED             IOCBASECMD(NFS_IOC_GET_CRED)
 #endif
index 8cc717b8eb779834c179490a43921d957447c5f6..7b9df74b41eb65901e4b539ba99967e9e90e5f66 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -113,7 +113,7 @@ struct nfsd_head nfsd_head, nfsd_queue;
 
 lck_grp_t *nfsrv_slp_rwlock_group;
 lck_grp_t *nfsrv_slp_mutex_group;
-struct nfsrv_sockhead nfsrv_socklist, nfsrv_deadsocklist, nfsrv_sockwg,
+struct nfsrv_sockhead nfsrv_socklist, nfsrv_sockwg,
                        nfsrv_sockwait, nfsrv_sockwork;
 struct nfsrv_sock *nfsrv_udpsock = NULL;
 struct nfsrv_sock *nfsrv_udp6sock = NULL;
@@ -140,7 +140,7 @@ int nfsrv_fsevents_enabled = 1;
 #if CONFIG_FSE
 thread_call_t  nfsrv_fmod_timer_call;
 #endif
-thread_call_t  nfsrv_deadsock_timer_call;
+thread_call_t  nfsrv_idlesock_timer_call;
 thread_call_t  nfsrv_wg_timer_call;
 int nfsrv_wg_timer_on;
 
@@ -223,14 +223,13 @@ nfsrv_init(void)
 #if CONFIG_FSE
        nfsrv_fmod_timer_call = thread_call_allocate(nfsrv_fmod_timer, NULL);
 #endif
-       nfsrv_deadsock_timer_call = thread_call_allocate(nfsrv_deadsock_timer, NULL);
+       nfsrv_idlesock_timer_call = thread_call_allocate(nfsrv_idlesock_timer, NULL);
        nfsrv_wg_timer_call = thread_call_allocate(nfsrv_wg_timer, NULL);
 
        /* Init server data structures */
        TAILQ_INIT(&nfsrv_socklist);
        TAILQ_INIT(&nfsrv_sockwait);
        TAILQ_INIT(&nfsrv_sockwork);
-       TAILQ_INIT(&nfsrv_deadsocklist);
        TAILQ_INIT(&nfsrv_sockwg);
        TAILQ_INIT(&nfsd_head);
        TAILQ_INIT(&nfsd_queue);
index ab4cdbe8fd8c23e28ff3871ec04b7cf2d143ca97..191a8fa29ce2785c29b5a482d135ca78cbe8cd18 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -389,14 +389,23 @@ nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag)
                                                        nso->nso_version = RPCBVERS3;
                                        }
                                } else if (nso->nso_protocol == NFS_PROG) {
-                                       if ((minvers > NFS_VER4) || (maxvers < NFS_VER2))
-                                               error = EPROGMISMATCH;
-                                       else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers))
-                                               nso->nso_version = NFS_VER3;
-                                       else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers))
-                                               nso->nso_version = NFS_VER2;
-                                       else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers))
-                                               nso->nso_version = NFS_VER4;
+                                       int vers;
+
+                                       /*
+                                        * N.B. Both portmapper and rpcbind V3 are happy to return
+                                        * addresses for other versions than the one you ask (getport or
+                                        * getaddr) and thus we may have fallen to this code path. So if
+                                        * we get a version that we support, use highest supported
+                                        * version.  This assumes that the server supports all versions
+                                        * between minvers and maxvers.  Note for IPv6 we will try and
+                                        * use rpcbind V4 which has getversaddr and we should not get
+                                        * here if that was successful.
+                                        */
+                                       for (vers = nso->nso_nfs_max_vers; vers >= (int)nso->nso_nfs_min_vers; vers--) {
+                                               if (vers >= (int)minvers && vers <= (int)maxvers)
+                                                               break;
+                                       }
+                                       nso->nso_version = (vers < (int)nso->nso_nfs_min_vers) ? 0 : vers;
                                }
                                if (!error && nso->nso_version)
                                        accepted_status = RPC_SUCCESS;
@@ -456,7 +465,7 @@ nfsmout:
  */
 int
 nfs_socket_create(
-       __unused struct nfsmount *nmp,
+       struct nfsmount *nmp,
        struct sockaddr *sa,
        int sotype,
        in_port_t port,
@@ -506,6 +515,8 @@ nfs_socket_create(
                ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
        nso->nso_protocol = protocol;
        nso->nso_version = vers;
+       nso->nso_nfs_min_vers = PVER2MAJOR(nmp->nm_min_vers);
+       nso->nso_nfs_max_vers = PVER2MAJOR(nmp->nm_max_vers);
 
        error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so);
 
@@ -844,7 +855,7 @@ nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct tim
                if (nso->nso_protocol == PMAPPROG)
                        vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
                else if (nso->nso_protocol == NFS_PROG)
-                       vers = NFS_VER3;
+                       vers = PVER2MAJOR(nmp->nm_max_vers);
        }
        lck_mtx_unlock(&nso->nso_lock);
        error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS,
@@ -883,7 +894,7 @@ nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct tim
  *                                     Set the nfs socket protocol and version if needed. 
  */
 void
-nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct nfs_socket *nso)
+nfs_connect_search_socket_found(struct nfsmount *nmp, struct nfs_socket_search *nss, struct nfs_socket *nso)
 {
        NFS_SOCK_DBG("nfs connect %s socket %p verified\n",
                      vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso);
@@ -892,7 +903,7 @@ nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket
                if (nso->nso_protocol == PMAPPROG)
                        nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
                if (nso->nso_protocol == NFS_PROG)
-                       nso->nso_version = NFS_VER3;
+                       nso->nso_version = PVER2MAJOR(nmp->nm_max_vers);
        }
        TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
        nss->nss_sockcnt--;
@@ -1046,6 +1057,7 @@ loop:
  * A mount's initial connection may require negotiating some parameters such
  * as socket type and NFS version.
  */
+
 int
 nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
 {
@@ -1056,6 +1068,7 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
        sock_upcall upcall;
        struct timeval now, start;
        int error, savederror, nfsvers;
+       int tryv4 = 1;
        uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM;
        fhandle_t *fh = NULL;
        char *path = NULL;
@@ -1107,10 +1120,17 @@ tryagain:
                if (!nmp->nm_vers) {
                        /* No NFS version specified... */
                        if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
-                               /* ...connect to portmapper first if we (may) need any ports. */
-                               nss.nss_port = PMAPPORT;
-                               nss.nss_protocol = PMAPPROG;
-                               nss.nss_version = 0;
+                               if (PVER2MAJOR(nmp->nm_max_vers) >= NFS_VER4 && tryv4) {
+                                       nss.nss_port = NFS_PORT;
+                                       nss.nss_protocol = NFS_PROG;
+                                       nss.nss_version = 4;
+                                       nss.nss_flags |= NSS_FALLBACK2PMAP;
+                               } else {
+                                       /* ...connect to portmapper first if we (may) need any ports. */
+                                       nss.nss_port = PMAPPORT;
+                                       nss.nss_protocol = PMAPPROG;
+                                       nss.nss_version = 0;
+                               }
                        } else {
                                /* ...connect to NFS port first. */
                                nss.nss_port = nmp->nm_nfsport;
@@ -1118,10 +1138,23 @@ tryagain:
                                nss.nss_version = 0;
                        }
                } else if (nmp->nm_vers >= NFS_VER4) {
-                       /* For NFSv4, we use the given (or default) port. */
-                       nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
-                       nss.nss_protocol = NFS_PROG;
-                       nss.nss_version = 4;
+                       if (tryv4) {
+                               /* For NFSv4, we use the given (or default) port. */
+                               nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
+                               nss.nss_protocol = NFS_PROG;
+                               nss.nss_version = 4;
+                               /*
+                                * set NSS_FALLBACK2PMAP here to pick up any non standard port
+                                * if no port is specified on the mount;
+                                * Note nm_vers is set so we will only try NFS_VER4.
+                                */
+                               if (!nmp->nm_nfsport)
+                                       nss.nss_flags |= NSS_FALLBACK2PMAP;
+                       } else {
+                               nss.nss_port = PMAPPORT;
+                               nss.nss_protocol = PMAPPROG;
+                               nss.nss_version = 0;
+                       }
                } else {
                        /* For NFSv3/v2... */
                        if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
@@ -1176,6 +1209,13 @@ keepsearching:
        if (error || !nss.nss_sock) {
                /* search failed */
                nfs_socket_search_cleanup(&nss);
+               if (nss.nss_flags & NSS_FALLBACK2PMAP) {
+                       tryv4 = 0;
+                       NFS_SOCK_DBG("nfs connect %s TCP failed for V4 %d %d, trying PORTMAP\n",
+                               vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error);
+                       goto tryagain;
+               }
+
                if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) {
                        /* Try using UDP */
                        sotype = SOCK_DGRAM;
@@ -1222,30 +1262,21 @@ keepsearching:
                /* Set up socket address and port for NFS socket. */
                bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
 
-               /* If NFS version not set, try NFSv3 then NFSv2. */
-               nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3;
-
+               /* If NFS version not set, try nm_max_vers down to nm_min_vers */
+               nfsvers = nmp->nm_vers ? nmp->nm_vers : PVER2MAJOR(nmp->nm_max_vers);
                if (!(port = nmp->nm_nfsport)) {
                        if (ss.ss_family == AF_INET)
                                ((struct sockaddr_in*)&ss)->sin_port = htons(0);
                        else if (ss.ss_family == AF_INET6)
                                ((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
-                       error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
-                                       nso->nso_so, NFS_PROG, nfsvers,
-                                       (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
-                       if (!error) {
-                               if (ss.ss_family == AF_INET)
-                                       port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
-                               else if (ss.ss_family == AF_INET6)
-                                       port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
-                               if (!port)
-                                       error = EPROGUNAVAIL;
-                       }
-                       if (error && !nmp->nm_vers) {
-                               nfsvers = NFS_VER2;
+                       for (; nfsvers >= (int)PVER2MAJOR(nmp->nm_min_vers); nfsvers--) {
+                               if (nmp->nm_vers && nmp->nm_vers != nfsvers)
+                                       continue; /* Wrong version */
+                               if (nfsvers == NFS_VER4 && nso->nso_sotype == SOCK_DGRAM)
+                                       continue; /* NFSv4 does not do UDP */
                                error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
-                                               nso->nso_so, NFS_PROG, nfsvers,
-                                               (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
+                                                          nso->nso_so, NFS_PROG, nfsvers,
+                                                          (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
                                if (!error) {
                                        if (ss.ss_family == AF_INET)
                                                port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
@@ -1253,8 +1284,14 @@ keepsearching:
                                                port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
                                        if (!port)
                                                error = EPROGUNAVAIL;
+                                       if (port == NFS_PORT && nfsvers == NFS_VER4 && tryv4 == 0)
+                                               continue; /* We already tried this */
                                }
+                               if (!error)
+                                       break;
                        }
+                       if (nfsvers < (int)PVER2MAJOR(nmp->nm_min_vers) && error == 0)
+                               error = EPROGUNAVAIL;
                        if (error) {
                                nfs_socket_search_update_error(&nss, error);
                                nfs_socket_destroy(nso);
@@ -1262,6 +1299,7 @@ keepsearching:
                        }
                }
                /* Create NFS protocol socket and add it to the list of sockets. */
+               /* N.B. If nfsvers is NFS_VER4 at this point then we're on a non standard port */
                error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port,
                                NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs);
                if (error) {
@@ -1680,7 +1718,7 @@ nfs_reconnect(struct nfsmount *nmp)
                                rq->r_flags |= R_MUSTRESEND;
                                rq->r_rtt = -1;
                                wakeup(rq);
-                               if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
+                               if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
                                        nfs_asyncio_resend(rq);
                        }
                        lck_mtx_unlock(&rq->r_mtx);
@@ -1751,7 +1789,7 @@ nfs_need_reconnect(struct nfsmount *nmp)
                                rq->r_flags |= R_MUSTRESEND;
                                rq->r_rtt = -1;
                                wakeup(rq);
-                               if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
+                               if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
                                        nfs_asyncio_resend(rq);
                        }
                        lck_mtx_unlock(&rq->r_mtx);
@@ -1846,6 +1884,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
                        req->r_rchain.tqe_next = NFSREQNOLIST;
                        lck_mtx_unlock(&nmp->nm_lock);
                        lck_mtx_lock(&req->r_mtx);
+                       /* Note that we have a reference on the request that was taken nfs_asyncio_resend */
                        if (req->r_error || req->r_nmrep.nmc_mhead) {
                                dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT);
                                req->r_flags &= ~R_RESENDQ;
@@ -1853,6 +1892,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
                                lck_mtx_unlock(&req->r_mtx);
                                if (dofinish)
                                        nfs_asyncio_finish(req);
+                               nfs_request_rele(req);
                                lck_mtx_lock(&nmp->nm_lock);
                                continue;
                        }
@@ -1886,6 +1926,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
                                lck_mtx_unlock(&req->r_mtx);
                                if (dofinish)
                                        nfs_asyncio_finish(req);
+                               nfs_request_rele(req);
                                lck_mtx_lock(&nmp->nm_lock);
                                error = 0;
                                continue;
@@ -1903,6 +1944,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
                                                req->r_flags &= ~R_RESENDQ;
                                        wakeup(req);
                                        lck_mtx_unlock(&req->r_mtx);
+                                       nfs_request_rele(req);
                                        lck_mtx_lock(&nmp->nm_lock);
                                        continue;
                                }
@@ -1915,6 +1957,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
                        lck_mtx_unlock(&req->r_mtx);
                        if (dofinish)
                                nfs_asyncio_finish(req);
+                       nfs_request_rele(req);
                        lck_mtx_lock(&nmp->nm_lock);
                }
                if (nfs_mount_check_dead_timeout(nmp)) {
@@ -3214,6 +3257,9 @@ again:
 
        nfs_sndunlock(req);
 
+       if (nfs_is_dead(error, nmp))
+               error = EIO;
+
        /*
         * Don't log some errors:
         * EPIPE errors may be common with servers that drop idle connections.
@@ -3227,9 +3273,6 @@ again:
                        !req->r_nmp ? "<unmounted>" :
                        vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname);
 
-       if (nfs_is_dead(error, nmp))
-               error = EIO;
-
        /* prefer request termination error over other errors */
        error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
        if (error2)
@@ -3678,26 +3721,24 @@ nfs_request_create(
 void
 nfs_request_destroy(struct nfsreq *req)
 {
-       struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
+       struct nfsmount *nmp;
        struct gss_seq *gsp, *ngsp;
        int clearjbtimeo = 0;
-       struct timespec ts = { 1, 0 };
 
        if (!req || !(req->r_flags & R_INITTED))
                return;
+       nmp  = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
        req->r_flags &= ~R_INITTED;
        if (req->r_lflags & RL_QUEUED)
                nfs_reqdequeue(req);
 
-       if (req->r_achain.tqe_next != NFSREQNOLIST &&
-           req->r_achain.tqe_next != NFSIODCOMPLETING) {
+       if (req->r_achain.tqe_next != NFSREQNOLIST) {
                /* 
                 * Still on an async I/O queue?
                 * %%% But which one, we may be on a local iod.
                 */
                lck_mtx_lock(nfsiod_mutex);
-               if (nmp && req->r_achain.tqe_next != NFSREQNOLIST &&
-                   req->r_achain.tqe_next != NFSIODCOMPLETING) {
+               if (nmp && req->r_achain.tqe_next != NFSREQNOLIST) {
                        TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
                        req->r_achain.tqe_next = NFSREQNOLIST;
                }
@@ -3719,6 +3760,8 @@ nfs_request_destroy(struct nfsreq *req)
                                wakeup(req2);
                        }
                }
+               assert((req->r_flags & R_RESENDQ) == 0);
+               /* XXX should we just remove this conditional, we should have a reference if we're resending */
                if (req->r_rchain.tqe_next != NFSREQNOLIST) {
                        TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
                        req->r_rchain.tqe_next = NFSREQNOLIST;
@@ -3736,9 +3779,6 @@ nfs_request_destroy(struct nfsreq *req)
                }
                lck_mtx_unlock(&nmp->nm_lock);
        }
-       /* Wait for the mount_sock_thread to finish with the resend */
-       while (req->r_flags & R_RESENDQ)
-               msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts);
        lck_mtx_unlock(&req->r_mtx);
 
        if (clearjbtimeo)
@@ -4480,6 +4520,8 @@ nfs_request_async(
                                                req->r_flags |= R_SENDING;
                                                lck_mtx_unlock(&req->r_mtx);
                                                error = nfs_send(req, 1);
+                                               /* Remove the R_RESENDQ reference */
+                                               nfs_request_rele(req);
                                                lck_mtx_lock(&req->r_mtx);
                                                if (error)
                                                        break;
@@ -4537,6 +4579,9 @@ nfs_request_async_finish(
                                req->r_rchain.tqe_next = NFSREQNOLIST;
                                if (req->r_flags & R_RESENDQ)
                                        req->r_flags &= ~R_RESENDQ;
+                               /* Remove the R_RESENDQ reference */
+                               assert(req->r_refs > 0);
+                               req->r_refs--;
                                lck_mtx_unlock(&nmp->nm_lock);
                                break;
                        }
@@ -4554,11 +4599,16 @@ nfs_request_async_finish(
        }
 
        while (!error && (req->r_flags & R_RESTART)) {
-               if (asyncio && req->r_resendtime) {  /* send later */
+               if (asyncio) {
+                       assert(req->r_achain.tqe_next == NFSREQNOLIST);
                        lck_mtx_lock(&req->r_mtx);
-                       nfs_asyncio_resend(req);
+                       req->r_flags &= ~R_IOD;
+                       if (req->r_resendtime) {  /* send later */
+                               nfs_asyncio_resend(req);
+                               lck_mtx_unlock(&req->r_mtx);
+                               return (EINPROGRESS);
+                       }
                        lck_mtx_unlock(&req->r_mtx);
-                       return (EINPROGRESS);
                }
                req->r_error = 0;
                req->r_flags &= ~R_RESTART;
@@ -4912,7 +4962,7 @@ restart:
                req->r_flags |= R_MUSTRESEND;
                req->r_rtt = -1;
                wakeup(req);
-               if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
+               if ((req->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC)
                        nfs_asyncio_resend(req);
                lck_mtx_unlock(&req->r_mtx);
        }
index 3872e7ff0a11c2d34e2a37b18dd8f268e6711cd1..8496093a9fac2230ec36d2b81f5772ba6ac027ea 100644 (file)
@@ -2527,6 +2527,12 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa)
                if (error)
                        return (error);
 
+               if (nxna.nxna_addr.ss_len > sizeof(struct sockaddr_storage) ||
+                   nxna.nxna_mask.ss_len > sizeof(struct sockaddr_storage) ||
+                   nxna.nxna_addr.ss_family > AF_MAX ||
+                   nxna.nxna_mask.ss_family > AF_MAX)
+                       return (EINVAL);
+
                if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) {
                        struct posix_cred temp_pcred;
                        bzero(&temp_pcred, sizeof(temp_pcred));
@@ -3221,6 +3227,38 @@ unlock_out:
        return (error);
 }
 
+/*
+ * Check if there is a least one export that will allow this address.
+ *
+ * Return 0, if there is an export that will allow this address,
+ * else return EACCES
+ */
+int
+nfsrv_check_exports_allow_address(mbuf_t nam)
+{
+       struct nfs_exportfs             *nxfs;
+       struct nfs_export               *nx;
+       struct nfs_export_options       *nxo;
+
+       if (nam == NULL)
+               return (EACCES);
+
+       lck_rw_lock_shared(&nfsrv_export_rwlock);
+       LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
+               LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
+                       /* A little optimizing by checking for the default first */
+                       if (nx->nx_flags & NX_DEFAULTEXPORT)
+                               nxo = &nx->nx_defopt;
+                       if (nxo || (nxo = nfsrv_export_lookup(nx, nam)))
+                               goto found;
+               }
+       }
+found:
+       lck_rw_done(&nfsrv_export_rwlock);
+
+       return (nxo ? 0 : EACCES);
+}
+
 struct nfs_export_options *
 nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam)
 {
index 5fa063cea66d671d0edbf09d5b8b579e3d4a9d80..12daa55889dcfeb91ed97bdbad8088320de2b23f 100644 (file)
@@ -131,7 +131,10 @@ extern int nfsrv_wg_delay;
 extern int nfsrv_wg_delay_v3;
 
 static int nfsrv_require_resv_port = 0;
-static int nfsrv_deadsock_timer_on = 0;
+static time_t  nfsrv_idlesock_timer_on = 0;
+static int nfsrv_sock_tcp_cnt = 0;
+#define NFSD_MIN_IDLE_TIMEOUT 30
+static int nfsrv_sock_idle_timeout = 3600; /* One hour */
 
 int    nfssvc_export(user_addr_t argp);
 int    nfssvc_nfsd(void);
@@ -170,7 +173,7 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LO
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
 SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
-
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
 #endif /* NFSCLIENT */
 
 #if NFSSERVER
@@ -189,11 +192,13 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOC
 #endif
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
 #ifdef NFS_UC_Q_DEBUG
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
 #endif
 #endif /* NFSSERVER */
 
@@ -421,14 +426,19 @@ worktodo:
                /* grab the current contents of the queue */
                TAILQ_INIT(&iodq);
                TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
+               /* Mark each iod request as being managed by an iod */
+               TAILQ_FOREACH(req, &iodq, r_achain) {
+                       lck_mtx_lock(&req->r_mtx);
+                       assert(!(req->r_flags & R_IOD));
+                       req->r_flags |= R_IOD;
+                       lck_mtx_unlock(&req->r_mtx);
+               }
                lck_mtx_unlock(nfsiod_mutex);
 
                /* process the queue */
                TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
                        TAILQ_REMOVE(&iodq, req, r_achain);
-                       lck_mtx_lock(nfsiod_mutex);
-                       req->r_achain.tqe_next = NFSIODCOMPLETING;
-                       lck_mtx_unlock(nfsiod_mutex);
+                       req->r_achain.tqe_next = NFSREQNOLIST;
                        req->r_callback.rcb_func(req);
                }
 
@@ -831,8 +841,12 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        }
 
        /* Set protocol options and reserve some space (for UDP). */
-       if (sotype == SOCK_STREAM)
+       if (sotype == SOCK_STREAM) {
+               error = nfsrv_check_exports_allow_address(mynam);
+               if (error)
+                       return (error);
                sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+       }
        if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
                sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
        if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
@@ -898,6 +912,58 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        /* add the socket to the list */
        first = TAILQ_EMPTY(&nfsrv_socklist);
        TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+       if (soprotocol == IPPROTO_TCP) {
+               nfsrv_sock_tcp_cnt++;
+               if (nfsrv_sock_idle_timeout < 0)
+                       nfsrv_sock_idle_timeout = 0;
+               if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
+                       nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
+               /*
+                * Possibly start or stop the idle timer. We only start the idle timer when
+                * we have more than 2 * nfsd_thread_max connections. If the idle timer is
+                * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
+                * the number of connections.
+                */
+               if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
+                       if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+                               if (nfsrv_idlesock_timer_on) {
+                                       thread_call_cancel(nfsrv_idlesock_timer_call);
+                                       nfsrv_idlesock_timer_on = 0;
+                               }
+                       } else {
+                               struct nfsrv_sock *old_slp;
+                               struct timeval now;
+                               time_t time_to_wait = nfsrv_sock_idle_timeout;
+                               /*
+                                * Get the oldest tcp socket and calculate the
+                                * earliest time for the next idle timer to fire
+                                * based on the possibly updated nfsrv_sock_idle_timeout
+                                */
+                               TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
+                                       if (old_slp->ns_sotype == SOCK_STREAM) {
+                                               microuptime(&now);
+                                               time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
+                                               if (time_to_wait < 1)
+                                                       time_to_wait = 1;
+                                               break;
+                                       }
+                               }
+                               /*
+                                * If we have a timer scheduled, but if its going to fire too late,
+                                * turn it off.
+                                */
+                               if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
+                                       thread_call_cancel(nfsrv_idlesock_timer_call);
+                                       nfsrv_idlesock_timer_on = 0;
+                               }
+                               /* Schedule the idle thread if it isn't already */
+                               if (!nfsrv_idlesock_timer_on) {
+                                       nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+                                       nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+                               }
+                       }
+               }
+       }
 
        sock_retain(so); /* grab a retain count on the socket */
        slp->ns_so = so;
@@ -909,7 +975,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
 
        /* mark that the socket is not in the nfsrv_sockwg list */
        slp->ns_wgq.tqe_next = SLPNOLIST;
-       
+
        slp->ns_flag = SLP_VALID | SLP_NEEDQ;
 
        nfsrv_wakenfsd(slp);
@@ -1057,6 +1123,11 @@ nfssvc_nfsd(void)
                        if (!nfsd->nfsd_slp && slp) {
                                /* we found a socket to work on, grab a reference */
                                slp->ns_sref++;
+                               microuptime(&now);
+                               slp->ns_timestamp = now.tv_sec;
+                               /* We keep the socket list in least recently used order for reaping idle sockets */
+                               TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+                               TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
                                nfsd->nfsd_slp = slp;
                                opcnt = 0;
                                /* and put it at the back of the work queue */
@@ -1376,15 +1447,7 @@ nfsrv_zapsock(struct nfsrv_sock *slp)
        if (so == NULL)
                return;
 
-       /*
-        * Attempt to deter future up-calls, but leave the
-        * up-call info in place to avoid a race with the
-        * networking code.
-        */
-       socket_lock(so, 1);
-       so->so_rcv.sb_flags &= ~SB_UPCALL;
-       socket_unlock(so, 1);
-
+       sock_setupcall(so, NULL, NULL);
        sock_shutdown(so, SHUT_RDWR);
 
        /*
@@ -1416,9 +1479,6 @@ nfsrv_slpfree(struct nfsrv_sock *slp)
        slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
        slp->ns_reccnt = 0;
 
-       if (slp->ns_ua)
-               FREE(slp->ns_ua, M_NFSSVC);
-
        for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
                nnwp = nwp->nd_tq.le_next;
                LIST_REMOVE(nwp, nd_tq);
@@ -1444,12 +1504,9 @@ nfsrv_slpfree(struct nfsrv_sock *slp)
  * Derefence a server socket structure. If it has no more references and
  * is no longer valid, you can throw it away.
  */
-void
-nfsrv_slpderef(struct nfsrv_sock *slp)
+static void
+nfsrv_slpderef_locked(struct nfsrv_sock *slp)
 {
-       struct timeval now;
-
-       lck_mtx_lock(nfsd_mutex);
        lck_rw_lock_exclusive(&slp->ns_rwlock);
        slp->ns_sref--;
 
@@ -1463,7 +1520,6 @@ nfsrv_slpderef(struct nfsrv_sock *slp)
                        slp->ns_flag &= ~SLP_QUEUED;
                }
                lck_rw_done(&slp->ns_rwlock);
-               lck_mtx_unlock(nfsd_mutex);
                return;
        }
 
@@ -1476,66 +1532,88 @@ nfsrv_slpderef(struct nfsrv_sock *slp)
                        TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
                slp->ns_flag &= ~SLP_QUEUED;
        }
+       lck_rw_done(&slp->ns_rwlock);
 
-       /*
-        * Queue the socket up for deletion
-        * and start the timer to delete it
-        * after it has been in limbo for
-        * a while.
-        */
-       microuptime(&now);
-       slp->ns_timestamp = now.tv_sec;
        TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
-       TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
-       if (!nfsrv_deadsock_timer_on) {
-               nfsrv_deadsock_timer_on = 1;
-               nfs_interval_timer_start(nfsrv_deadsock_timer_call,
-                               NFSRV_DEADSOCKDELAY * 1000);
-       }
+       if (slp->ns_sotype == SOCK_STREAM)
+               nfsrv_sock_tcp_cnt--;
 
-       lck_rw_done(&slp->ns_rwlock);
        /* now remove from the write gather socket list */ 
        if (slp->ns_wgq.tqe_next != SLPNOLIST) {
                TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
                slp->ns_wgq.tqe_next = SLPNOLIST;
        }
+       nfsrv_slpfree(slp);
+}
+
+void
+nfsrv_slpderef(struct nfsrv_sock *slp)
+{
+       lck_mtx_lock(nfsd_mutex);
+       nfsrv_slpderef_locked(slp);
        lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
- * Check periodically for dead sockets pending delete.
- * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
- * seconds then we assume it's safe to free.
+ * Check periodically for idle sockest if needed and
+ * zap them.
  */
 void
-nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
+nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
 {
-       struct nfsrv_sock *slp;
+       struct nfsrv_sock *slp, *tslp;
        struct timeval now;
-       time_t time_to_wait;
+       time_t time_to_wait = nfsrv_sock_idle_timeout;
 
        microuptime(&now);
        lck_mtx_lock(nfsd_mutex);
 
-       while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
-               if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
-                       break;
-               TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
-               nfsrv_slpfree(slp);
-       }
-       if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
-               nfsrv_deadsock_timer_on = 0;
+       /* Turn off the timer if we're suppose to and get out */
+       if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
+           nfsrv_sock_idle_timeout = 0;
+       if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
+               nfsrv_idlesock_timer_on = 0;
                lck_mtx_unlock(nfsd_mutex);
                return;
        }
-       time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
-       if (time_to_wait < 1)
-               time_to_wait = 1;
 
-       lck_mtx_unlock(nfsd_mutex);
+       TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
+               lck_rw_lock_exclusive(&slp->ns_rwlock);
+               /* Skip udp and referenced sockets */
+               if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
+                       lck_rw_done(&slp->ns_rwlock);
+                       continue;
+               }
+               /*
+                * If this is the first non-referenced socket that hasn't idle out,
+                * use its time stamp to calculate the earlist time in the future
+                * to start the next invocation of the timer. Since the nfsrv_socklist
+                * is sorted oldest access to newest. Once we find the first one,
+                * we're done and break out of the loop.
+                */
+               if (((slp->ns_timestamp + nfsrv_sock_idle_timeout)  >  now.tv_sec) ||
+                       nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+                       time_to_wait -= now.tv_sec - slp->ns_timestamp;
+                       if (time_to_wait < 1)
+                               time_to_wait = 1;
+                       lck_rw_done(&slp->ns_rwlock);
+                       break;
+               }
+               /*
+                * Bump the ref count. nfsrv_slpderef below will destroy
+                * the socket, since nfsrv_zapsock has closed it.
+                */
+               slp->ns_sref++;
+               nfsrv_zapsock(slp);
+               lck_rw_done(&slp->ns_rwlock);
+               nfsrv_slpderef_locked(slp);
+       }
 
-       nfs_interval_timer_start(nfsrv_deadsock_timer_call,
-               time_to_wait * 1000);
+       /* Start ourself back up */
+       nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+       /* Remember when the next timer will fire for nfssvc_addsock. */
+       nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+       lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
@@ -1554,33 +1632,14 @@ nfsrv_cleanup(void)
        microuptime(&now);
        for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
                nslp = TAILQ_NEXT(slp, ns_chain);
-               if (slp->ns_flag & SLP_VALID) {
-                       lck_rw_lock_exclusive(&slp->ns_rwlock);
+               lck_rw_lock_exclusive(&slp->ns_rwlock);
+               slp->ns_sref++;
+               if (slp->ns_flag & SLP_VALID)
                        nfsrv_zapsock(slp);
-                       lck_rw_done(&slp->ns_rwlock);
-               }
-               if (slp->ns_flag & SLP_QUEUED) {
-                       if (slp->ns_flag & SLP_WAITQ)
-                               TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
-                       else
-                               TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
-                       slp->ns_flag &= ~SLP_QUEUED;
-               }
-               if (slp->ns_wgq.tqe_next != SLPNOLIST) {
-                       TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
-                       slp->ns_wgq.tqe_next = SLPNOLIST;
-               }
-               /* queue the socket up for deletion */
-               slp->ns_timestamp = now.tv_sec;
-               TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
-               TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
-               if (!nfsrv_deadsock_timer_on) {
-                       nfsrv_deadsock_timer_on = 1;
-                       nfs_interval_timer_start(nfsrv_deadsock_timer_call,
-                               NFSRV_DEADSOCKDELAY * 1000);
-               }
+               lck_rw_done(&slp->ns_rwlock);
+               nfsrv_slpderef_locked(slp);
        }
-
+#
 #if CONFIG_FSE
        /*
         * Flush pending file write fsevents
index 7d6f85f532cafd307e63093f155506255bb9a144..bc71aa0a948b95a71f1ba0533d064e4d9bd6a85f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Apple Inc.  All rights reserved.
+ * Copyright (c) 2011-2014 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -164,6 +164,8 @@ nfsrv_uc_dequeue(struct nfsrv_sock *slp)
                OSDecrementAtomic(&nfsrv_uc_queue_count);
 #endif         
        }
+       FREE(slp->ns_ua, M_TEMP);
+       slp->ns_ua = NULL;
        lck_mtx_unlock(myqueue->ucq_lock);
 }
 
@@ -315,9 +317,9 @@ nfsrv_uc_proxy(socket_t so, void *arg, int waitflag)
        lck_mtx_lock(myqueue->ucq_lock);
        DPRINT("nfsrv_uc_proxy called for %p (%p)\n", uap, uap->nua_slp);
        DPRINT("\tUp-call queued on %d for wakeup of %p\n", qi, myqueue);
-       if (uap->nua_flags & NFS_UC_QUEUED) {
+       if (uap == NULL || uap->nua_flags & NFS_UC_QUEUED) {
                lck_mtx_unlock(myqueue->ucq_lock);
-               return;  /* Already queued */
+               return;  /* Already queued or freed */
        }
 
        uap->nua_so = so;
@@ -366,7 +368,7 @@ nfsrv_uc_addsock(struct nfsrv_sock *slp, int start)
         * generate up-calls.
         */
        if (nfsrv_uc_thread_count) {
-               MALLOC(arg, struct nfsrv_uc_arg *, sizeof (struct nfsrv_uc_arg), M_NFSSVC, M_WAITOK | M_ZERO);
+               MALLOC(arg, struct nfsrv_uc_arg *, sizeof (struct nfsrv_uc_arg), M_TEMP, M_WAITOK | M_ZERO);
                if (arg == NULL)
                        goto direct;
 
index 49d487f53d438338a5131fdb13fc60447392758a..90e20e774614a6f8703838e82ac1f39935368b0e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -455,7 +455,7 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
        // PUTFH + GETATTR
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "statfs", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "statfs", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -1986,7 +1986,7 @@ nfs4_mount_update_path_with_symlink(struct nfsmount *nmp, struct nfs_fs_path *nf
        // PUTFH, READLINK
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 12 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "readlink", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp->fh_data, fhp->fh_len);
@@ -2180,7 +2180,7 @@ nocomponents:
                NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, NULL, 0);
                numops = 2;
                nfsm_chain_build_alloc_init(error, &nmreq, 9 * NFSX_UNSIGNED);
-               nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+               nfsm_chain_add_compound_header(error, &nmreq, "mount", nmp->nm_minor_vers, numops);
                numops--;
                nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
                numops--;
@@ -2240,7 +2240,7 @@ nocomponents:
                        NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
                numops = 4;
                nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
-               nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+               nfsm_chain_add_compound_header(error, &nmreq, "mount", nmp->nm_minor_vers, numops);
                numops--;
                if (dirfh.fh_len) {
                        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
@@ -2403,7 +2403,7 @@ gotfh:
        /* get attrs for mount point root */
        numops = NMFLAG(nmp, NONAMEDATTR) ? 2 : 3; // PUTFH + GETATTR + OPENATTR
        nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "mount", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
@@ -2631,6 +2631,21 @@ nfs_mount_connect(struct nfsmount *nmp)
        return (error);
 }
 
+/* Table of maximum minor version for a given version */
+uint32_t maxminorverstab[] = {
+       0, /* Version 0 (does not exist) */
+       0, /* Version 1 (does not exist) */
+       0, /* Version 2 */
+       0, /* Version 3 */
+       0, /* Version 4 */
+};
+
+#define NFS_MAX_SUPPORTED_VERSION  ((long)(sizeof (maxminorverstab) / sizeof (uint32_t) - 1))
+#define NFS_MAX_SUPPORTED_MINOR_VERSION(v) ((long)(maxminorverstab[(v)]))
+
+#define DEFAULT_NFS_MIN_VERS VER2PVER(2, 0)
+#define DEFAULT_NFS_MAX_VERS VER2PVER(3, 0)
+
 /*
  * Common code to mount an NFS file system.
  */
@@ -2646,7 +2661,7 @@ mountnfs(
        int error = 0;
        struct vfsstatfs *sbp;
        struct xdrbuf xb;
-       uint32_t i, val, vers = 0, minorvers, maxio, iosize, len;
+       uint32_t i, val, maxio, iosize, len;
        uint32_t *mattrs;
        uint32_t *mflags_mask;
        uint32_t *mflags;
@@ -2675,7 +2690,6 @@ mountnfs(
                TAILQ_INIT(&nmp->nm_resendq);
                TAILQ_INIT(&nmp->nm_iodq);
                TAILQ_INIT(&nmp->nm_gsscl);
-               TAILQ_INIT(&nmp->nm_gssnccl);
                LIST_INIT(&nmp->nm_monlist);
                vfs_setfsprivate(mp, nmp);
                vfs_getnewfsid(mp);
@@ -2689,6 +2703,8 @@ mountnfs(
                /* set up defaults */
                nmp->nm_ref = 0;
                nmp->nm_vers = 0;
+               nmp->nm_min_vers = DEFAULT_NFS_MIN_VERS;
+               nmp->nm_max_vers = DEFAULT_NFS_MAX_VERS;
                nmp->nm_timeo = NFS_TIMEO;
                nmp->nm_retry = NFS_RETRANS;
                nmp->nm_sotype = 0;
@@ -2753,37 +2769,36 @@ mountnfs(
                }
        }
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) {
-               xb_get_32(error, &xb, vers);
+               /* Can't specify a single version and a range */
+               if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION_RANGE))
+                       error = EINVAL;
+               xb_get_32(error, &xb, nmp->nm_vers);
+               if (nmp->nm_vers > NFS_MAX_SUPPORTED_VERSION ||
+                   nmp->nm_vers < NFS_VER2)
+                       error = EINVAL;
                if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
-                       xb_get_32(error, &xb, minorvers);
+                       xb_get_32(error, &xb, nmp->nm_minor_vers);
                else
-                       minorvers = 0;
-               nfsmerr_if(error);
-               switch (vers) {
-               case 2:
-                       nmp->nm_vers = NFS_VER2;
-                       break;
-               case 3:
-                       nmp->nm_vers = NFS_VER3;
-                       break;
-               case 4:
-                       switch (minorvers) {
-                       case 0:
-                               nmp->nm_vers = NFS_VER4;
-                               break;
-                       default:
-                               error = EINVAL;
-                       }
-                       break;
-               default:
+                       nmp->nm_minor_vers = maxminorverstab[nmp->nm_vers];
+               if (nmp->nm_minor_vers > maxminorverstab[nmp->nm_vers])
                        error = EINVAL;
-               }
-       }
+               nmp->nm_max_vers = nmp->nm_min_vers = 
+                       VER2PVER(nmp->nm_vers, nmp->nm_minor_vers);
+       } 
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) {
-               /* should have also gotten NFS version (and already gotten minorvers) */
+               /* should have also gotten NFS version (and already gotten minor version) */
                if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
                        error = EINVAL;
        }
+       if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION_RANGE)) {
+               xb_get_32(error, &xb, nmp->nm_min_vers);
+               xb_get_32(error, &xb, nmp->nm_max_vers);
+               if ((nmp->nm_min_vers > nmp->nm_max_vers) ||
+                   (PVER2MAJOR(nmp->nm_max_vers) > NFS_MAX_SUPPORTED_VERSION) ||
+                   (PVER2MINOR(nmp->nm_min_vers) > maxminorverstab[PVER2MAJOR(nmp->nm_min_vers)]) ||
+                   (PVER2MINOR(nmp->nm_max_vers) > maxminorverstab[PVER2MAJOR(nmp->nm_max_vers)]))
+                       error = EINVAL;
+       }
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
                xb_get_32(error, &xb, nmp->nm_rsize);
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
@@ -3461,6 +3476,10 @@ nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx)
                xb_copy_32(error, &xb, &xbnew, val);
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
                xb_copy_32(error, &xb, &xbnew, val);
+       if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION_RANGE)) {
+               xb_copy_32(error, &xb, &xbnew, val);
+               xb_copy_32(error, &xb, &xbnew, val);
+       }
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
                xb_copy_32(error, &xb, &xbnew, val);
        if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
@@ -4286,7 +4305,7 @@ void
 nfs_mount_zombie(struct nfsmount *nmp, int nm_state_flags)
 {
        struct nfsreq *req, *treq;
-       struct nfs_reqqhead iodq;
+       struct nfs_reqqhead iodq, resendq;
        struct timespec ts = { 1, 0 };
        struct nfs_open_owner *noop, *nextnoop;
        nfsnode_t np;
@@ -4366,39 +4385,83 @@ nfs_mount_zombie(struct nfsmount *nmp, int nm_state_flags)
        }
 
        /*
-        * Loop through outstanding request list and remove dangling
-        * references to defunct nfsmount struct
+        * Be sure all requests for this mount are completed
+        * and removed from the resend queue.
+        */
+       TAILQ_INIT(&resendq);
+       lck_mtx_lock(nfs_request_mutex);
+       TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
+               if (req->r_nmp == nmp) {
+                       lck_mtx_lock(&req->r_mtx);
+                       if (!req->r_error && req->r_nmrep.nmc_mhead == NULL)
+                               req->r_error = EIO;
+                       if (req->r_flags & R_RESENDQ) {
+                               lck_mtx_lock(&nmp->nm_lock);
+                               req->r_flags &= ~R_RESENDQ;
+                               if (req->r_rchain.tqe_next != NFSREQNOLIST) {
+                                       TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
+                                       /*
+                                        * Queue up the request so that we can unreference them 
+                                        * with out holding nfs_request_mutex
+                                        */
+                                       TAILQ_INSERT_TAIL(&resendq, req, r_rchain);
+                               }
+                               lck_mtx_unlock(&nmp->nm_lock);
+                       }
+                       wakeup(req);
+                       lck_mtx_unlock(&req->r_mtx);
+               }
+       }
+       lck_mtx_unlock(nfs_request_mutex);
+
+       /* Since we've drop the request mutex we can now safely unreference the request */
+       TAILQ_FOREACH_SAFE(req, &resendq, r_rchain, treq) {
+               TAILQ_REMOVE(&resendq, req, r_rchain);
+               nfs_request_rele(req);
+       }
+
+       /*
+        * Now handle and outstanding async requests. We need to walk the
+        * request queue again this time with the nfsiod_mutex held. No
+        * other iods can grab our requests until we've put them on our own
+        * local iod queue for processing.
         */
        TAILQ_INIT(&iodq);
        lck_mtx_lock(nfs_request_mutex);
+       lck_mtx_lock(nfsiod_mutex);
        TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
                if (req->r_nmp == nmp) {
-                       if (req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT)) {
-                               /* async I/O RPC needs to be finished */
-                               lck_mtx_lock(nfsiod_mutex);
+                       lck_mtx_lock(&req->r_mtx);
+                       if (req->r_callback.rcb_func
+                           && !(req->r_flags & R_WAITSENT) && !(req->r_flags & R_IOD)) {
+                               /* 
+                                * Since R_IOD is not set then we need to handle it. If
+                                * we're not on a list add it to our iod queue. Otherwise
+                                * we must already be on nm_iodq which is added to our
+                                * local queue below.
+                                * %%% We should really keep a back pointer to our iod queue
+                                * that we're on.
+                                */
+                               req->r_flags |= R_IOD;
                                if (req->r_achain.tqe_next == NFSREQNOLIST) {
                                        TAILQ_INSERT_TAIL(&iodq, req, r_achain);
                                }
-                               lck_mtx_unlock(nfsiod_mutex);
                        }
-                       wakeup(req);
+                       lck_mtx_unlock(&req->r_mtx);
                }
        }
-       lck_mtx_unlock(nfs_request_mutex);
 
        /* finish any async I/O RPCs queued up */
-       lck_mtx_lock(nfsiod_mutex);
        if (nmp->nm_iodlink.tqe_next != NFSNOLIST)
                TAILQ_REMOVE(&nfsiodmounts, nmp, nm_iodlink);
        TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
        lck_mtx_unlock(nfsiod_mutex);
+       lck_mtx_unlock(nfs_request_mutex);
+
        TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
                TAILQ_REMOVE(&iodq, req, r_achain);
-               lck_mtx_lock(nfsiod_mutex);
-               req->r_achain.tqe_next = NFSIODCOMPLETING;
-               lck_mtx_unlock(nfsiod_mutex);
+               req->r_achain.tqe_next = NFSREQNOLIST;
                lck_mtx_lock(&req->r_mtx);
-               req->r_error = ENXIO;
                docallback = !(req->r_flags & R_WAITSENT);
                lck_mtx_unlock(&req->r_mtx);
                if (docallback)
@@ -4697,7 +4760,7 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc
        // PUTFH + GETATTR
        numops = 2;
        nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
-       nfsm_chain_add_compound_header(error, &nmreq, "quota", numops);
+       nfsm_chain_add_compound_header(error, &nmreq, "quota", nmp->nm_minor_vers, numops);
        numops--;
        nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
        nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
@@ -5069,7 +5132,7 @@ nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb)
        xb_add_bitmap(error, &xbinfo, mflags, NFS_MFLAG_BITMAP_LEN);
        xb_add_32(error, &xbinfo, nmp->nm_vers);                /* NFS_VERSION */
        if (nmp->nm_vers >= NFS_VER4)
-               xb_add_32(error, &xbinfo, 0);                   /* NFS_MINOR_VERSION */
+               xb_add_32(error, &xbinfo, nmp->nm_minor_vers);  /* NFS_MINOR_VERSION */
        xb_add_32(error, &xbinfo, nmp->nm_rsize);               /* READ_SIZE */
        xb_add_32(error, &xbinfo, nmp->nm_wsize);               /* WRITE_SIZE */
        xb_add_32(error, &xbinfo, nmp->nm_readdirsize);         /* READDIR_SIZE */
index 4f155940f6e0cec57daf5483fb4dd37ed1367bb4..ae1906aed738f44f0c0a02c3afe047e95ffb7ab2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -636,7 +636,10 @@ nfs_vnop_access(
         * Does our cached result allow us to give a definite yes to
         * this request?
         */
-       uid = kauth_cred_getuid(vfs_context_ucred(ctx));
+       if (auth_is_kerberized(np->n_auth) || auth_is_kerberized(nmp->nm_auth))
+               uid = nfs_cred_getasid2uid(vfs_context_ucred(ctx));
+       else
+               uid = kauth_cred_getuid(vfs_context_ucred(ctx));
        slot = nfs_node_access_slot(np, uid, 0);
        dorpc = 1;
        if (access == 0) {
@@ -6065,6 +6068,8 @@ nfs3_lookup_rpc_async_finish(
        struct nfsm_chain nmrep;
 
        nmp = NFSTONMP(dnp);
+       if (nmp == NULL)
+               return (ENXIO);
        nfsvers = nmp->nm_vers;
 
        nfsm_chain_null(&nmrep);
@@ -6906,6 +6911,8 @@ nfs_vnop_ioctl(
        vfs_context_t ctx = ap->a_context;
        vnode_t vp = ap->a_vp;
        struct nfsmount *mp = VTONMP(vp);
+       struct user_nfs_gss_principal gprinc;
+       uint32_t len;
        int error = ENOTTY;
 
        if (mp == NULL)
@@ -6919,8 +6926,78 @@ nfs_vnop_ioctl(
                error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0);
                break;
        case NFS_FSCTL_DESTROY_CRED:
+               if (!auth_is_kerberized(mp->nm_auth))
+                       return (ENOTSUP);
                error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx));
                break;
+       case NFS_FSCTL_SET_CRED:
+               if (!auth_is_kerberized(mp->nm_auth))
+                       return (ENOTSUP);
+               NFS_DBG(NFS_FAC_GSS, 7, "Enter NFS_FSCTL_SET_CRED (proc %d) data = %p\n", vfs_context_is64bit(ctx), (void *)ap->a_data);
+               if (vfs_context_is64bit(ctx)) {
+                       gprinc = *(struct user_nfs_gss_principal *)ap->a_data;
+               } else {
+                       struct nfs_gss_principal *tp;
+                       tp = (struct nfs_gss_principal *)ap->a_data;
+                       gprinc.princlen = tp->princlen;
+                       gprinc.nametype = tp->nametype;
+                       gprinc.principal = CAST_USER_ADDR_T(tp->principal);
+               }
+               if (gprinc.princlen > MAXPATHLEN)
+                       return (EINVAL);
+               NFS_DBG(NFS_FAC_GSS, 7, "Received principal length %d name type = %d\n", gprinc.princlen, gprinc.nametype);
+               uint8_t *p;
+               MALLOC(p, uint8_t *, gprinc.princlen+1, M_TEMP, M_WAITOK|M_ZERO);
+               if (p == NULL)
+                       return (ENOMEM);
+               error = copyin(gprinc.principal, p, gprinc.princlen);
+               if (error) {
+                       NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_SET_CRED could not copy in princiapl data of len %d: %d\n",
+                               gprinc.princlen, error);
+                       FREE(p, M_TEMP);
+                       return (error);
+               }
+               NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s\n", p);
+               error = nfs_gss_clnt_ctx_set_principal(mp, ctx, p, gprinc.princlen, gprinc.nametype);
+               NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s returned %d\n", p, error);
+               FREE(p, M_TEMP);
+               break;
+       case NFS_FSCTL_GET_CRED:
+               if (!auth_is_kerberized(mp->nm_auth))
+                       return (ENOTSUP);
+               error = nfs_gss_clnt_ctx_get_principal(mp, ctx, &gprinc);
+               if (error)
+                       break;
+               if (vfs_context_is64bit(ctx)) {
+                       struct user_nfs_gss_principal *upp = (struct user_nfs_gss_principal *)ap->a_data;
+                       len = upp->princlen;
+                       if (gprinc.princlen < len)
+                               len = gprinc.princlen;
+                       upp->princlen = gprinc.princlen;
+                       upp->nametype = gprinc.nametype;
+                       upp->flags = gprinc.flags;
+                       if (gprinc.principal)
+                               error = copyout((void *)gprinc.principal, upp->principal, len);
+                       else
+                               upp->principal = USER_ADDR_NULL;
+               } else {
+                       struct nfs_gss_principal *u32pp = (struct nfs_gss_principal *)ap->a_data;
+                       len = u32pp->princlen;
+                       if (gprinc.princlen < len)
+                               len = gprinc.princlen;
+                       u32pp->princlen = gprinc.princlen;
+                       u32pp->nametype = gprinc.nametype;
+                       u32pp->flags = gprinc.flags;
+                       if (gprinc.principal)
+                               error = copyout((void *)gprinc.principal, u32pp->principal, len);
+                       else
+                               u32pp->principal = (user32_addr_t)0;
+               }
+               if (error) {
+                       NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_GET_CRED could not copy out princiapl data of len %d: %d\n",
+                               gprinc.princlen, error);
+               }
+               FREE(gprinc.principal, M_TEMP);
        }
 
        return (error);
index 434d4f57a54b5a8ab4d4a19a88754a4b0f42cf89..69d0f78654c2e414b9b1dc119121e87567c49820 100644 (file)
@@ -433,7 +433,7 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 
 /* add NFSv4 COMPOUND header */
 #define NFS4_TAG_LENGTH        12
-#define nfsm_chain_add_compound_header(E, NMC, TAG, NUMOPS) \
+#define nfsm_chain_add_compound_header(E, NMC, TAG, MINOR, NUMOPS) \
        do { \
                if ((TAG) && strlen(TAG)) { \
                        /* put tags into a fixed-length space-padded field */ \
@@ -444,7 +444,7 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
                } else { \
                        nfsm_chain_add_32((E), (NMC), 0); \
                } \
-               nfsm_chain_add_32((E), (NMC), 0); /*minorversion*/ \
+               nfsm_chain_add_32((E), (NMC), (MINOR)); /*minorversion*/ \
                nfsm_chain_add_32((E), (NMC), (NUMOPS)); \
        } while (0)
 
index 4d28a97749b5a90cb6efa983f5e66f9af54c25bd..7721e633661f7fa22f06f69a8b92b39734a96a51 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -172,6 +172,8 @@ struct nfs_socket {
        uint32_t                nso_protocol;           /* RPC protocol */
        uint32_t                nso_version;            /* RPC protocol version */
        uint32_t                nso_pingxid;            /* RPC XID of NULL ping request */
+       uint32_t                nso_nfs_min_vers;       /* minimum nfs version for connecting sockets */
+       uint32_t                nso_nfs_max_vers;       /* maximum nfs version for connecting sockets */
        int                     nso_error;              /* saved error/status */
        struct nfs_rpc_record_state nso_rrs;            /* RPC record parsing state (TCP) */
 };
@@ -206,6 +208,7 @@ struct nfs_socket_search {
 /* nss_flags */
 #define NSS_VERBOSE            0x00000001              /* OK to log info about socket search */
 #define NSS_WARNED             0x00000002              /* logged warning about socket search taking a while */
+#define NSS_FALLBACK2PMAP      0x00000004              /* Try V4 on NFS_PORT first, if that fails fall back to portmapper */
 
 /*
  * function table for calling version-specific NFS functions
@@ -261,6 +264,9 @@ struct nfsmount {
        int     nm_ref;                 /* Reference count on this mount */
        int     nm_state;               /* Internal state flags */
        int     nm_vers;                /* NFS version */
+       uint32_t nm_minor_vers;         /* minor version of above */
+       uint32_t nm_min_vers;           /* minimum packed version to try */
+       uint32_t nm_max_vers;           /* maximum packed version to try */
        struct nfs_funcs *nm_funcs;     /* version-specific functions */
        kauth_cred_t nm_mcred;          /* credential used for the mount */
        mount_t nm_mountp;              /* VFS structure for this filesystem */
@@ -268,7 +274,6 @@ struct nfsmount {
        struct nfs_fs_locations nm_locations; /* file system locations */
        uint32_t nm_numgrps;            /* Max. size of groupslist */
        TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl;        /* GSS user contexts */
-       TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gssnccl;      /* GSS neg cache contexts */
        uint32_t nm_ncentries;          /* GSS expired negative cache entries */
        int     nm_timeo;               /* Init timer for NFSMNT_DUMBTIMR */
        int     nm_retry;               /* Max retries */
diff --git a/bsd/pgo/profile_runtime.c b/bsd/pgo/profile_runtime.c
new file mode 100644 (file)
index 0000000..ac308b6
--- /dev/null
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/sysproto.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/pgo.h>
+#include <sys/kauth.h>
+#include <security/mac_framework.h>
+#include <libkern/OSKextLib.h>
+
+
+/*
+ * This tells compiler_rt not to include userspace-specific stuff writing
+ * profile data to a file.
+ */
+int __llvm_profile_runtime = 0;
+
+
+#ifdef PROFILE
+
+/* These __llvm functions are defined in InstrProfiling.h in compiler_rt.  That
+ * is a internal header, so we need to re-prototype them here.  */
+
+uint64_t __llvm_profile_get_size_for_buffer(void);
+int __llvm_profile_write_buffer(char *Buffer);
+uint64_t __llvm_profile_get_size_for_buffer_internal(const char *DataBegin,
+                                                     const char *DataEnd,
+                                                     const char *CountersBegin,
+                                                     const char *CountersEnd ,
+                                                     const char *NamesBegin,
+                                                     const char *NamesEnd);
+int __llvm_profile_write_buffer_internal(char *Buffer,
+                                         const char *DataBegin,
+                                         const char *DataEnd,
+                                         const char *CountersBegin,
+                                         const char *CountersEnd ,
+                                         const char *NamesBegin,
+                                         const char *NamesEnd);
+
+extern char __pgo_hib_DataStart __asm("section$start$__HIB$__llvm_prf_data");
+extern char __pgo_hib_DataEnd   __asm("section$end$__HIB$__llvm_prf_data");
+extern char __pgo_hib_NamesStart __asm("section$start$__HIB$__llvm_prf_names");
+extern char __pgo_hib_NamesEnd   __asm("section$end$__HIB$__llvm_prf_names");
+extern char __pgo_hib_CountersStart __asm("section$start$__HIB$__llvm_prf_cnts");
+extern char __pgo_hib_CountersEnd   __asm("section$end$__HIB$__llvm_prf_cnts");
+
+
+static uint64_t get_size_for_buffer(int flags)
+{
+        if (flags & PGO_HIB) {
+                return __llvm_profile_get_size_for_buffer_internal(
+                        &__pgo_hib_DataStart, &__pgo_hib_DataEnd,
+                        &__pgo_hib_CountersStart, &__pgo_hib_CountersEnd,
+                        &__pgo_hib_NamesStart, &__pgo_hib_NamesEnd);
+        } else {
+                return __llvm_profile_get_size_for_buffer();
+        }
+}
+
+
+static int write_buffer(int flags, char *buffer)
+{
+        if (flags & PGO_HIB) {
+                return __llvm_profile_write_buffer_internal(
+                        buffer,
+                        &__pgo_hib_DataStart, &__pgo_hib_DataEnd,
+                        &__pgo_hib_CountersStart, &__pgo_hib_CountersEnd,
+                        &__pgo_hib_NamesStart, &__pgo_hib_NamesEnd);
+        } else {
+                return __llvm_profile_write_buffer(buffer);
+        }
+}
+
+
+#endif
+
+
+
+/*
+ * returns:
+ *   EPERM  unless you are root
+ *   EINVAL for invalid args.
+ *   ENOSYS for not implemented
+ *   ERANGE for integer overflow
+ *   ENOENT if kext not found
+ *   ENOTSUP kext does not support PGO
+ *   EIO llvm returned an error.  shouldn't ever happen.
+ */
+
+int grab_pgo_data(struct proc *p,
+                  struct grab_pgo_data_args *uap,
+                  register_t *retval)
+{
+        char *buffer = NULL;
+        int err = 0;
+
+        (void) p;
+
+        if (!kauth_cred_issuser(kauth_cred_get())) {
+                err = EPERM;
+                goto out;
+        }
+
+#if CONFIG_MACF
+        err = mac_system_check_info(kauth_cred_get(), "kern.profiling_data");
+        if (err) {
+                goto out;
+        }
+#endif
+
+        if ( uap->flags & ~PGO_ALL_FLAGS ||
+             uap->size < 0 ||
+             (uap->size > 0 && uap->buffer == 0))
+        {
+                err = EINVAL;
+                goto out;
+        }
+
+        *retval = 0;
+
+        if (uap->uuid) {
+                uuid_t uuid;
+                err = copyin(uap->uuid, &uuid, sizeof(uuid));
+                if (err) {
+                        goto out;
+                }
+
+                if (uap->buffer == 0 && uap->size == 0) {
+                    uint64_t size64;
+
+                    if (uap->flags & PGO_WAIT_FOR_UNLOAD) {
+                        err = EINVAL;
+                        goto out;
+                    }
+
+                    err = OSKextGrabPgoData(uuid, &size64, NULL, 0, 0, !!(uap->flags & PGO_METADATA));
+                    if (err) {
+                        goto out;
+                    }
+
+                    ssize_t size = size64;
+                    if ( ((uint64_t) size) != size64  ||
+                         size < 0 )
+                    {
+                        err = ERANGE;
+                        goto out;
+                    }
+
+                    *retval = size;
+                    err = 0;
+                    goto out;
+
+                } else if (!uap->buffer || uap->size <= 0) {
+
+                    err = EINVAL;
+                    goto out;
+
+                } else {
+
+                    MALLOC(buffer, char *, uap->size, M_TEMP, M_WAITOK);
+                    if (!buffer) {
+                        err = ENOMEM;
+                        goto out;
+                    }
+
+                    uint64_t size64;
+
+                    err = OSKextGrabPgoData(uuid, &size64, buffer, uap->size,
+                                            !!(uap->flags & PGO_WAIT_FOR_UNLOAD),
+                                            !!(uap->flags & PGO_METADATA));
+                    if (err) {
+                        goto out;
+                    }
+
+                    ssize_t size = size64;
+                    if ( ((uint64_t) size) != size64  ||
+                         size < 0 )
+                    {
+                        err = ERANGE;
+                        goto out;
+                    }
+
+                    err = copyout(buffer, uap->buffer, size);
+                    if (err) {
+                        goto out;
+                    }
+
+                    *retval = size;
+                    goto out;
+                }
+        }
+
+
+#ifdef PROFILE
+
+        uint64_t size64 = get_size_for_buffer(uap->flags);
+        ssize_t size = size64;
+
+        if (uap->flags & (PGO_WAIT_FOR_UNLOAD | PGO_METADATA)) {
+            err = EINVAL;
+            goto out;
+        }
+
+        if ( ((uint64_t) size) != size64  ||
+             size < 0 )
+        {
+                err = ERANGE;
+                goto out;
+        }
+
+
+        if (uap->buffer == 0 && uap->size == 0) {
+                *retval = size;
+                err = 0;
+                goto out;
+        } else if (uap->size < size) {
+                err = EINVAL;
+                goto out;
+        } else {
+                MALLOC(buffer, char *, size, M_TEMP, M_WAITOK);
+                if (!buffer) {
+                        err = ENOMEM;
+                        goto out;
+                }
+
+                err = write_buffer(uap->flags, buffer);
+                if (err)
+                {
+                    err = EIO;
+                    goto out;
+                }
+
+                err = copyout(buffer, uap->buffer, size);
+                if (err) {
+                        goto out;
+                }
+
+                *retval = size;
+                goto out;
+        }
+
+#else
+
+        *retval = -1;
+        err = ENOSYS;
+        goto out;
+
+#endif
+
+out:
+        if (buffer) {
+                FREE(buffer, M_TEMP);
+        }
+        if (err) {
+                *retval = -1;
+        }
+        return err;
+}
index 8fffb654b004946f1d711663c78387b98eca3013..c5f18ebe4d8c74ee8d918294a947864b85150aa9 100644 (file)
@@ -76,7 +76,6 @@
 #include <kern/host.h>
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
-#include <kern/wait_queue.h>
 #include <kern/sched_prim.h>
 
 #include <net/route.h>
index 4b16e76b63dd8ce50c949d20d91e39f338717689..20733790997f80e4c75fb3daeda914b547f82a58 100644 (file)
@@ -75,7 +75,6 @@
 #include <kern/host.h>
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
-#include <kern/wait_queue.h>
 #include <kern/sched_prim.h>
 
 #if CONFIG_MACF
index d63c131ea4247e32b230e7ffc67a5b253659b9a4..7ca2771d41db60ba2cf9cd782e388f138c7b5bc1 100644 (file)
@@ -1911,8 +1911,6 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
 
        case AUE_MAC_GET_PROC:
        case AUE_MAC_SET_PROC:
-       case AUE_MAC_GET_LCTX:
-       case AUE_MAC_SET_LCTX:
                PROCESS_MAC_TOKENS;
                break;
 #endif
index c741986bf868976617526ba82df0a92d38ba0002..e62a0b9e3326789c4eb708e7b5c185aedddb9f62 100644 (file)
@@ -217,6 +217,9 @@ static const bsm_fcntl_cmd_t bsm_fcntl_cmdtab[] = {
 #ifdef F_MARKDEPENDENCY
        { BSM_F_MARKDEPENDENCY, F_MARKDEPENDENCY },
 #endif
+#ifdef F_BARRIERFSYNC
+       { BSM_F_BARRIERFSYNC,   F_BARRIERFSYNC },
+#endif
 
 #ifdef FCNTL_FS_SPECIFIC_BASE
        { BSM_F_FS_SPECIFIC_0,  FCNTL_FS_SPECIFIC_BASE},
index f1f0655619d53751e78df7a7db5725713a016019..f80c948ba3ccc67037e14113f729feb5968f3541 100644 (file)
@@ -57,7 +57,6 @@
 #include <kern/host.h>
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
-#include <kern/wait_queue.h>
 #include <kern/sched_prim.h>
 
 #if CONFIG_AUDIT
index 67c4bab7ab7e60ab139b6f49937079135556485a..2a46a579d4898fbeea6372c53338792f426c0204 100644 (file)
@@ -73,7 +73,6 @@
 #include <kern/host.h>
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
-#include <kern/wait_queue.h>
 #include <kern/sched_prim.h>
 
 #if CONFIG_MACF
index 9a7a992811b75be22464662ca882aebf6490b319..aa44fa446973f68b1eee0ed39766057edee9f064 100644 (file)
@@ -70,7 +70,6 @@
 #include <kern/zalloc.h>
 #include <kern/sched_prim.h>
 #include <kern/task.h>
-#include <kern/wait_queue.h>
 
 #include <net/route.h>
 
index 30a5166b8993667706adaad2cd9b65119f3290d1..4b4072516e57a176c8ae46a9c26b6505ba0fe625 100644 (file)
@@ -22,8 +22,8 @@ DATAFILES = \
        dir.h dirent.h disk.h dkstat.h dtrace.h dtrace_glue.h dtrace_impl.h \
        errno.h ev.h event.h fasttrap.h fasttrap_isa.h fcntl.h file.h filedesc.h filio.h gmon.h \
        ioccom.h ioctl.h \
-       ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h loadable_fs.h lock.h lockf.h \
-       kauth.h kdebug.h kern_control.h lctx.h lockstat.h malloc.h \
+       ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h lctx.h loadable_fs.h lock.h lockf.h \
+       kauth.h kdebug.h kern_control.h lockstat.h malloc.h \
        mbuf.h mman.h mount.h msg.h msgbuf.h netport.h param.h paths.h pipe.h poll.h \
        proc.h  proc_info.h ptrace.h queue.h quota.h random.h reboot.h resource.h resourcevar.h \
        sbuf.h posix_sem.h posix_shm.h sdt.h \
@@ -39,30 +39,58 @@ DATAFILES = \
 # Installs header file for Apple internal use in user level - 
 #        $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
 PRIVATE_DATAFILES = \
+       attr.h \
+       cdefs.h \
        coalition.h \
        codesign.h \
        content_protection.h \
        csr.h \
        decmpfs.h \
+       disk.h \
        disklabel.h \
+       domain.h \
+       event.h \
+       fcntl.h \
        fileport.h \
        fsctl.h \
+       fsevents.h \
        fsgetpath.h \
        fslog.h \
        guarded.h \
        imgsrc.h \
        ipcs.h \
        kas_info.h \
+       kdebug.h \
+       kern_control.h \
+       kern_event.h \
        kern_memorystatus.h \
        kern_overrides.h \
+       mbuf.h \
+       mman.h \
+       priv.h \
+       proc.h \
+       proc_info.h \
+       proc_uuid_policy.h \
+       process_policy.h \
+       resource.h \
        sfi.h \
        shm_internal.h \
+       socket.h \
+       socketvar.h \
+       sockio.h \
+       spawn.h \
        spawn_internal.h \
+       stackshot.h \
+       sys_domain.h \
        tree.h \
+       unpcb.h \
        ux_exception.h \
+       work_interval.h \
        process_policy.h \
        proc_uuid_policy.h \
-       priv.h
+       priv.h \
+       pgo.h \
+       memory_maintenance.h
 
 # Installs header file for kernel extensions - 
 #        $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers
@@ -73,7 +101,7 @@ KERNELFILES = \
        dir.h dirent.h disk.h disklabel.h dkstat.h  \
        errno.h ev.h event.h fcntl.h file.h filio.h \
        ioccom.h ioctl.h ipc.h \
-       ioctl_compat.h  kernel.h kernel_types.h kern_event.h lctx.h lock.h lockf.h \
+       ioctl_compat.h  kernel.h kernel_types.h kern_event.h lock.h lockf.h \
        kauth.h kdebug.h  md5.h kern_control.h imgact.h malloc.h namei.h \
        mman.h mbuf.h mount.h netport.h param.h paths.h \
        proc.h  queue.h random.h resource.h \
@@ -106,6 +134,7 @@ PRIVATE_KERNELFILES = \
        fslog.h \
        kasl.h \
        kern_memorystatus.h \
+       kpi_private.h \
        mach_swapon.h \
        msgbuf.h \
        eventvar.h \
@@ -115,16 +144,19 @@ PRIVATE_KERNELFILES = \
        sem_internal.h \
        shm_internal.h \
        signalvar.h \
+       stackshot.h \
        tty.h  ttychars.h \
        ttydefaults.h ttydev.h \
        user.h \
        vfs_context.h \
        vmmeter.h \
        spawn_internal.h \
-       priv.h
+       priv.h \
+       pgo.h \
+       memory_maintenance.h
 
 
-# /System/Library/Frameworks/System.framework/Headers and /usr/include
+# /usr/include
 INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_GEN_LIST = syscall.h _posix_availability.h _symbol_aliasing.h
@@ -133,14 +165,14 @@ INSTALL_MI_DIR = sys
 
 EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h pthread_internal.h filedesc.h pipe.h resourcevar.h semaphore.h \
                                                                vnode_internal.h proc_internal.h file_internal.h mount_internal.h \
-                                                               uio_internal.h tree.h munge.h kern_tests.h
+                                                               uio_internal.h tree.h munge.h
 
 EXPORT_MI_GEN_LIST = syscall.h sysproto.h kdebugevents.h
 
 EXPORT_MI_DIR = sys
 
 # /System/Library/Frameworks/System.framework/PrivateHeaders
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 # /System/Library/Frameworks/Kernel.framework/PrivateHeaders
 INSTALL_KF_MI_LCL_LIST =  ${KERNELFILES} ${PRIVATE_KERNELFILES}
@@ -169,8 +201,8 @@ sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) $(OBJROOT)/cscop
        $(_v)$(MAKESYSCALLS) $< proto > /dev/null
 
 kdebugevents.h:  $(SRCROOT)/bsd/kern/trace.codes $(MAKEKDEBUGEVENTS) $(OBJROOT)/cscope.genhdrs
-       @echo "Generating bsd/kern/$@ from $<";
-       @echo "$(OBJPATH)/bsd/kern/$@" > $(OBJROOT)/cscope.genhdrs/$@.path
+       @echo "Generating bsd/sys/$@ from $<";
+       @echo "$(OBJPATH)/bsd/sys/$@" > $(OBJROOT)/cscope.genhdrs/$@.path
        $(_v)$(MAKEKDEBUGEVENTS) $< > "$(OBJPATH)/bsd/sys/$@"
 
 MAKE_POSIX_AVAILABILITY = $(SRCROOT)/bsd/sys/make_posix_availability.sh
index 318b9dce897d7a2187e5dd36c4d7516edfa4a790..1cc149aa8fa61626e8c84d9a34c7e226fe321aac 100644 (file)
@@ -75,6 +75,7 @@ DATAFILES = \
        _timespec.h \
        _timeval.h \
        _timeval32.h \
+       _timeval64.h \
        _ucontext.h \
        _ucontext64.h \
        _uid_t.h \
@@ -126,7 +127,7 @@ EXPORT_MI_GEN_LIST =
 EXPORT_MI_DIR = sys/_types
 
 # /System/Library/Frameworks/System.framework/PrivateHeaders
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 # /System/Library/Frameworks/Kernel.framework/PrivateHeaders
 INSTALL_KF_MI_LCL_LIST =  ${KERNELFILES} ${PRIVATE_KERNELFILES}
diff --git a/bsd/sys/_types/_timeval64.h b/bsd/sys/_types/_timeval64.h
new file mode 100644 (file)
index 0000000..c14f833
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _STRUCT_TIMEVAL64
+#define _STRUCT_TIMEVAL64
+struct timeval64
+{
+       __int64_t               tv_sec;         /* seconds */
+       __int64_t               tv_usec;        /* and microseconds */
+};
+#endif /* _STRUCT_TIMEVAL32 */
index 8dc62498d2a0d57ecec70ac94104e665eced9377..ebfeb60912de4831e04b8a61c0a1d89c0f73fa21 100644 (file)
@@ -373,6 +373,27 @@ typedef struct vol_attributes_attr {
 #define ATTR_CMN_RETURNED_ATTRS                0x80000000      
 
 #define ATTR_CMN_VALIDMASK                     0xFFFFFFFF
+/*
+ * The settable ATTR_CMN_* attributes include the following:
+ * ATTR_CMN_SCRIPT
+ * ATTR_CMN_CRTIME
+ * ATTR_CMN_MODTIME
+ * ATTR_CMN_CHGTIME
+ * 
+ * ATTR_CMN_ACCTIME
+ * ATTR_CMN_BKUPTIME
+ * ATTR_CMN_FNDRINFO
+ * ATTR_CMN_OWNERID
+ * 
+ * ATTR_CMN_GRPID
+ * ATTR_CMN_ACCESSMASK
+ * ATTR_CMN_FLAGS
+ * 
+ * ATTR_CMN_EXTENDED_SECURITY
+ * ATTR_CMN_UUID
+ * 
+ * ATTR_CMN_GRPUUID
+ */
 #define ATTR_CMN_SETMASK                       0x01C7FF00
 #define ATTR_CMN_VOLSETMASK                    0x00006700
 
@@ -399,6 +420,12 @@ typedef struct vol_attributes_attr {
 #define ATTR_VOL_INFO                          0x80000000
 
 #define ATTR_VOL_VALIDMASK                     0xC007FFFF
+
+/*
+ * The list of settable ATTR_VOL_* attributes include the following:
+ * ATTR_VOL_NAME
+ * ATTR_VOL_INFO
+ */
 #define ATTR_VOL_SETMASK                       0x80002000
 
 
@@ -426,6 +453,10 @@ typedef struct vol_attributes_attr {
 #define ATTR_FILE_RSRCALLOCSIZE                        0x00002000
 
 #define ATTR_FILE_VALIDMASK                    0x000037FF
+/* 
+ * Settable ATTR_FILE_* attributes include:
+ * ATTR_FILE_DEVTYPE
+ */
 #define ATTR_FILE_SETMASK                      0x00000020
 
 #define ATTR_FORK_TOTALSIZE                    0x00000001
index 0bebc2c7fddd1bdb102020355ed9371ad96c6fe5..1f5fb1cc75b3d4f18d6677f28dae20f3c75fb4d1 100644 (file)
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005, 2015 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -30,6 +30,8 @@
 #ifndef _SYS_BSDTASK_INFO_H
 #define _SYS_BSDTASK_INFO_H
 
+#include <vm/vm_map.h>
+
 struct proc_taskinfo_internal {          
         uint64_t                pti_virtual_size;   /* virtual memory size (bytes) */
         uint64_t                pti_resident_size;  /* resident memory size (bytes) */
index 6db9a69379d6755c4c004fcafe982ee5660ba564..3763a223a57d3d68014b7f98c993127b11858bcd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1060,12 +1060,20 @@ bufattr_t bufattr_dup (bufattr_t bap);
 void bufattr_free(bufattr_t bap);
 
 /*!
- @function bufattr_cpaddr
- @abstract Get the address of cp_entry on a buffer.
- @param bap Buffer Attribute whose cp_entry to get.
- @return int.
+ @function bufattr_cpx
+ @abstract Returns a pointer to a cpx_t structure.
+ @param bap Buffer Attribute whose cpx_t structure you wish to get.
+ @return Returns a cpx_t structure, or NULL if not valid
  */
-void *bufattr_cpaddr(bufattr_t);
+struct cpx *bufattr_cpx(bufattr_t);
+
+/*!
+ @function bufattr_setcpx
+ @abstract Set the cp_ctx on a buffer attribute.
+ @param bap Buffer Attribute that you wish to change
+ @return void
+ */
+void bufattr_setcpx(bufattr_t, struct cpx *cpx);
 
 /*!
  @function bufattr_cpoff
@@ -1075,15 +1083,6 @@ void *bufattr_cpaddr(bufattr_t);
  */
 uint64_t bufattr_cpoff(bufattr_t);
 
-
-/*!
- @function bufattr_setcpaddr
- @abstract Set the address of cp_entry on a buffer attribute.
- @param bap Buffer Attribute whose cp entry value has to be set
- @return void.
- */
-void bufattr_setcpaddr(bufattr_t, void *);
-
 /*!
  @function bufattr_setcpoff
  @abstract Set the file offset for a content protected I/O on 
index e0674869f6309be33699b44c3f521dab4a6dbafc..6ff3284bc2b7256550522183c48854b612e1780b 100644 (file)
 #include <sys/buf.h>
 #include <sys/lock.h>
 
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+
 #define NOLIST ((struct buf *)0x87654321)
 
 /*
@@ -86,8 +90,8 @@
  */
 struct bufattr {
 #if CONFIG_PROTECT
-       struct cprotect *ba_cpentry;    /* address of cp_entry */
-       uint64_t ba_cp_file_off;        /* rounded file offset. See buf_setcpoff() for more comments */
+       struct cpx *ba_cpx;
+       uint64_t ba_cp_file_off;
 #endif
        uint64_t ba_flags;      /* flags. Some are only in-use on embedded devices */
 };
@@ -333,7 +337,6 @@ errno_t     buf_make_private(buf_t bp);
 #endif
 
 #ifdef CONFIG_PROTECT
-void buf_setcpaddr(buf_t, struct cprotect *);
 void buf_setcpoff (buf_t, uint64_t);
 #endif
 
index 7c729026f904ec543af282b1f2b84a62a5f96d66..a8d95eb3a9e43db6ac7ce7f1ee02450442bba938 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #warning "Unsupported compiler detected"
 #endif
 
+/*
+ * Compatibility with compilers and environments that don't support compiler
+ * feature checking function-like macros.
+ */
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+#ifndef __has_extension
+#define __has_extension(x) 0
+#endif
+
 /*
  * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
  * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
  */
 #define __deprecated   __attribute__((deprecated))
 
-#ifdef __has_extension
-    #if __has_extension(attribute_deprecated_with_message)
-        #define __deprecated_msg(_msg) __attribute__((deprecated(_msg)))
-    #else
-        #define __deprecated_msg(_msg) __attribute__((deprecated))
-    #endif
-#elif defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
-    #define __deprecated_msg(_msg) __attribute__((deprecated(_msg)))
+#if __has_extension(attribute_deprecated_with_message) || \
+               (defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))))
+       #define __deprecated_msg(_msg) __attribute__((deprecated(_msg)))
 #else
-    #define __deprecated_msg(_msg) __attribute__((deprecated))
+       #define __deprecated_msg(_msg) __attribute__((deprecated))
 #endif
 
-#ifdef __has_extension
-    #if __has_extension(enumerator_attributes)
-        #define __deprecated_enum_msg(_msg) __deprecated_msg(_msg)
-    #else
-        #define __deprecated_enum_msg(_msg)
-    #endif
+#if __has_extension(enumerator_attributes)
+       #define __deprecated_enum_msg(_msg) __deprecated_msg(_msg)
 #else
-    #define __deprecated_enum_msg(_msg)
+       #define __deprecated_enum_msg(_msg)
 #endif
 
 /* __unavailable causes the compiler to error out when encountering
 #define __restrict     restrict
 #endif
 
+/* Compatibility with compilers and environments that don't support the
+ * nullability feature.
+ */
+
+#if !__has_feature(nullability)
+#ifndef __nullable
+#define __nullable
+#endif
+#ifndef __nonnull
+#define __nonnull
+#endif
+#ifndef __null_unspecified
+#define __null_unspecified
+#endif
+#endif
+
 /* Declaring inline functions within headers is error-prone due to differences
  * across various versions of the C language and extensions.  __header_inline
  * can be used to declare inline functions within system headers.  In cases
 #define __DARWIN_ONLY_UNIX_CONFORMANCE 1
 #define __DARWIN_ONLY_VERS_1050                1
 #endif /* PLATFORM_iPhoneSimulator */
+#ifdef PLATFORM_tvOS
+/* Platform: tvOS */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PLATFORM_tvOS */
+#ifdef PLATFORM_AppleTVOS
+/* Platform: AppleTVOS */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PLATFORM_AppleTVOS */
+#ifdef PLATFORM_tvSimulator
+/* Platform: tvSimulator */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PLATFORM_tvSimulator */
+#ifdef PLATFORM_AppleTVSimulator
+/* Platform: AppleTVSimulator */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PLATFORM_AppleTVSimulator */
 #ifdef PLATFORM_iPhoneOSNano
 /* Platform: iPhoneOSNano */
 #define __DARWIN_ONLY_64_BIT_INO_T     1
 #define __DARWIN_ONLY_UNIX_CONFORMANCE 1
 #define __DARWIN_ONLY_VERS_1050                1
 #endif /* PLATFORM_iPhoneNanoSimulator */
+#ifdef PLATFORM_WatchOS
+/* Platform: WatchOS */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PLATFORM_WatchOS */
+#ifdef PLATFORM_WatchSimulator
+/* Platform: WatchSimulator */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PLATFORM_WatchSimulator */
 #ifdef PLATFORM_MacOSX
 /* Platform: MacOSX */
 #define __DARWIN_ONLY_64_BIT_INO_T     0
 #elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
 #define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)   __DARWIN_ALIAS_STARTING_MAC_##_mac(x)
 #else
-#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)
+#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)   x
 #endif
 #endif /* KERNEL */
 
 #error Unsupported architecture
 #endif
 
+#ifdef XNU_KERNEL_PRIVATE
+/*
+ * Selectively ignore cast alignment warnings
+ */
+#define __IGNORE_WCASTALIGN(x) _Pragma("clang diagnostic push")                     \
+                               _Pragma("clang diagnostic ignored \"-Wcast-align\"") \
+                               x;                                                   \
+                               _Pragma("clang diagnostic pop")
+#endif
+
 #endif /* !_CDEFS_H_ */
index 73eca26a815c812081852954ba16a82f1d5057ee..62b205eb79d13a33a775bf273703240c9a737a0c 100644 (file)
 #ifndef _SYS_COALITION_H_
 #define _SYS_COALITION_H_
 
-#include <sys/cdefs.h>
-#include <Availability.h>
 #include <stdint.h>
+#include <sys/cdefs.h>
 #include <sys/types.h>
 
+#include <mach/coalition.h>
+
 __BEGIN_DECLS
 
 #ifndef KERNEL
@@ -44,42 +45,175 @@ int coalition_create(uint64_t *cid_out, uint32_t flags);
 int coalition_terminate(uint64_t cid, uint32_t flags);
 int coalition_reap(uint64_t cid, uint32_t flags);
 
-/* This struct is also defined in osfmk/kern/coalition.h. Keep in sync. */
-struct coalition_resource_usage {
-       uint64_t tasks_started;
-       uint64_t tasks_exited;
-       uint64_t time_nonempty;
-       uint64_t cpu_time;
-       uint64_t interrupt_wakeups;
-       uint64_t platform_idle_wakeups;
-       uint64_t bytesread;
-       uint64_t byteswritten;
-       uint64_t gpu_time;
-};
-
 /* Wrappers around __coalition_info syscall (with proper struct types) */
 int coalition_info_resource_usage(uint64_t cid, struct coalition_resource_usage *cru, size_t sz);
 
-#endif /* KERNEL */
+#else /* KERNEL */
 
-/* Flags shared by userspace and xnu */
+#if CONFIG_COALITIONS
+/* in-kernel BSD interfaces */
 
-#define COALITION_CREATE_FLAG_PRIVILEGED ((uint32_t)0x1)
+/*
+ * coalition_id:
+ * Get the unique 64-bit identifier associated with the given coalition
+ */
+uint64_t coalition_id(coalition_t coal);
 
-#define COALITION_CREATE_FLAG_MASK ((uint32_t)0x1)
 
-#ifdef PRIVATE
-/* Flavors shared by only xnu + Libsyscall */
+/*
+ * coalitions_get_list:
+ * Get a list of coalitions as procinfo_coalinfo structures
+ *
+ * This interface is primarily to support libproc.
+ *
+ * Parameters:
+ *     type      : The COALITION_TYPE of the coalitions to investigate.
+ *                 Valid types can be found in <mach/coalition.h>
+ *     coal_list : Pointer to an array of procinfo_coalinfo structures
+ *                 that will be filled with information about each
+ *                 coalition whose type matches 'type'
+ *                 NOTE: This can be NULL to perform a simple query of
+ *                 the total number of coalitions.
+ *     list_sz   : The size (in number of structures) of 'coal_list'
+ *
+ * Returns: 0 if no coalitions matching 'type' are found
+ *          Otherwise: the number of coalitions whose type matches
+ *                     the 'type' parameter (all coalitions if type == -1)
+ */
+extern int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz);
 
-/* Syscall flavors */
-#define COALITION_OP_CREATE 1
-#define COALITION_OP_TERMINATE 2
-#define COALITION_OP_REAP 3
 
-/* coalition_info flavors */
-#define COALITION_INFO_RESOURCE_USAGE 1
+/*
+ * coalition_is_leader:
+ * Determine if a task is a coalition leader.
+ *
+ * Parameters:
+ *     task      : The task to investigate
+ *     coal_type : The COALITION_TYPE of the coalition to investigate.
+ *                 Valid types can be found in <mach/coalition.h>
+ *     coal      : If 'task' is a valid task, and is a member of a coalition
+ *                 of type 'coal_type', then 'coal' will be filled in with
+ *                 the corresponding coalition_t object.
+ *                 NOTE: This will be filled in whether or not the 'task' is
+ *                       a leader in the coalition. However, if 'task' is
+ *                       not a member of a coalition of type 'coal_type' then
+ *                       'coal' will be filled in with COALITION_NULL.
+ *                 NOTE: This can be NULL
+ *
+ * Returns: TRUE if 'task' is a coalition leader, FALSE otherwise.
+ */
+extern boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal);
 
-#endif /* PRIVATE */
+/*
+ * coalition_get_task_count:
+ * Sum up the number of tasks in the given coalition
+ *
+ * Parameters:
+ *     coal     : The coalition to investigate
+ *
+ * Returns: The number of tasks in the coalition
+ */
+extern int coalition_get_task_count(coalition_t coal);
+
+/*
+ * coalition_get_page_count:
+ * Sum up the page count for each task in the coalition specified by 'coal'
+ *
+ * Parameters:
+ *     coal     : The coalition to investigate
+ *     ntasks   : If non-NULL, this will be filled in with the number of
+ *                tasks in the coalition.
+ *
+ * Returns: The sum of all pages used by all members of the coalition
+ */
+extern uint64_t coalition_get_page_count(coalition_t coal, int *ntasks);
+
+/*
+ * coalition_get_pid_list:
+ * Gather a list of constituent PIDs of tasks within a coalition playing a
+ * given role.
+ *
+ * Parameters:
+ *     coal       : The coalition to investigate
+ *     rolemask   : The set of coalition task roles used to filter the list
+ *                  of PIDs returned in 'pid_list'. Roles can be combined
+ *                  using the COALITION_ROLEMASK_* tokens found in
+ *                  <mach/coalition.h>. Each PID returned is guaranteed to
+ *                  be tagged with one of the task roles specified by this
+ *                  mask.
+ *     sort_order : The order in which the returned PIDs should be sorted
+ *                  by default this is in descending page count.
+ *     pid_list   : Pointer to an array of PIDs that will be filled with
+ *                  members of the coalition tagged with the given 'taskrole'
+ *     list_sz    : The size (in number of PIDs) of 'pid_list'
+ *
+ * Note:
+ * This function will return the list of PIDs in a sorted order. By default
+ * the PIDs will be sorted by task page count in descending order. In the
+ * future it may be possible for user space to specify a level of importance
+ * for each coalition member. If there is a user space specified importance,
+ * then the list of PIDs returned will be sorted in _ascending_ importance,
+ * i.e., pid_list[0] will be the least important task (or the largest consumer
+ * of memory). The desired sort order can be specified using the
+ * COALITION_SORT_* definitions in osfmk/mach/coalition.h
+ *
+ * It is also possible to return an unsorted list of PIDs using the special
+ * sort type 'COALITION_SORT_NOSORT'
+ *
+ * Returns: < 0 on ERROR
+ *          0 if 'coal' contains no tasks whose role is 'taskrole'
+ *              (or if the coalition is being deallocated)
+ *          Otherwise: the number of PIDs in the coalition whose role is
+ *                     'taskrole'. NOTE: This may be larger or smaller than
+ *                     the 'pid_list' array.
+ *
+ */
+extern int coalition_get_pid_list(coalition_t coal, uint32_t rolemask,
+                                 int sort_order, int *pid_list, int list_sz);
+
+#else /* !CONFIG_COALITIONS */
+static inline uint64_t coalition_id(__unused coalition_t coal)
+{
+       return 0;
+}
+
+static inline int coalitions_get_list(__unused int type,
+                                     __unused struct procinfo_coalinfo *coal_list,
+                                     __unused int list_sz)
+{
+       return 0;
+}
+
+static inline boolean_t coalition_is_leader(__unused task_t task,
+                                           __unused int coal_type,
+                                           coalition_t *coal)
+{
+       *coal = COALITION_NULL;
+       return FALSE;
+}
+
+static inline int coalition_get_task_count(__unused coalition_t coal)
+{
+       return 0;
+}
+
+static inline uint64_t coalition_get_page_count(__unused coalition_t coal,
+                                               __unused int *ntasks)
+{
+       return 0;
+}
+
+static inline int coalition_get_pid_list(__unused coalition_t coal,
+                                        __unused uint32_t rolemask,
+                                        __unused int sort_order,
+                                        __unused int *pid_list,
+                                        __unused int list_sz)
+{
+       return 0;
+}
+#endif
+
+#endif /* KERNEL */
 
 __END_DECLS
 
index e0070c5cad26cee407e5903ba1880810b696c7bd..ffda243263399cab69c146320e46a900939cdbb8 100644 (file)
@@ -41,7 +41,7 @@
 const 
 CS_CodeDirectory *findCodeDirectory(
        const CS_SuperBlob *embedded,
-       char *lower_bound,
-       char *upper_bound);
+       const char *lower_bound,
+       const char *upper_bound);
 
 #endif
index 1a23c3d0a4fbb4e5d4173877d488c79f0dc5f381..1d8ee6516bbf531203c7f861b8f425ac6e088d80 100644 (file)
@@ -52,6 +52,8 @@
 
 #define CS_KILLED              0x1000000       /* was killed by kernel for invalidity */
 #define CS_DYLD_PLATFORM       0x2000000       /* dyld used to load this is a platform binary */
+#define CS_PLATFORM_BINARY     0x4000000       /* this is a platform binary */
+#define CS_PLATFORM_PATH       0x8000000       /* platform binary by the fact of path (osx only) */
 
 #define CS_ENTITLEMENT_FLAGS   (CS_GET_TASK_ALLOW | CS_INSTALLER)
 
 #define CS_OPS_BLOB            10      /* get codesign blob */
 #define CS_OPS_IDENTITY                11      /* get codesign identity */
 
-/* SigPUP */
-#define CS_OPS_SIGPUP_INSTALL  20
-#define CS_OPS_SIGPUP_DROP     21
-#define CS_OPS_SIGPUP_VALIDATE 22
-
-struct sigpup_install_table {
-       uint64_t data;
-       uint64_t length;
-       uint64_t path;
-};
-
-
 /*
  * Magic numbers used by Code Signing
  */
@@ -114,11 +104,20 @@ enum {
        CSTYPE_INDEX_REQUIREMENTS = 0x00000002,         /* compat with amfi */
        CSTYPE_INDEX_ENTITLEMENTS = 0x00000005,         /* compat with amfi */
 
-       CS_HASHTYPE_SHA1 = 1
+       CS_HASHTYPE_SHA1 = 1,
+       CS_HASHTYPE_SHA256 = 2,
+       CS_HASHTYPE_SHA256_TRUNCATED = 3,
+
+       CS_SHA1_LEN = 20,
+       CS_SHA256_TRUNCATED_LEN = 20,
+
+       CS_CDHASH_LEN = 20,
+       CS_HASH_MAX_SIZE = 32, /* max size of the hash we'll support */
 };
 
 
 #define KERNEL_HAVE_CS_CODEDIRECTORY 1
+#define KERNEL_CS_CODEDIRECTORY_HAVE_PLATFORM 1
 
 /*
  * C form of a CodeDirectory.
@@ -135,7 +134,7 @@ typedef struct __CodeDirectory {
        uint32_t codeLimit;                             /* limit to main image signature range */
        uint8_t hashSize;                               /* size of each hash in bytes */
        uint8_t hashType;                               /* type of hash (cdHashType* constants) */
-       uint8_t spare1;                                 /* unused (must be zero) */
+       uint8_t platform;                               /* platform identifier; zero if not platform binary */
        uint8_t pageSize;                               /* log2(page size in bytes); 0 => infinite */
        uint32_t spare2;                                /* unused (must be zero) */
        /* Version 0x20100 */
@@ -162,6 +161,7 @@ typedef struct __SC_SuperBlob {
        /* followed by Blobs in no particular order as indicated by offsets in index */
 } CS_SuperBlob;
 
+#define KERNEL_HAVE_CS_GENERICBLOB 1
 typedef struct __SC_GenericBlob {
        uint32_t magic;                         /* magic number */
        uint32_t length;                        /* total length of blob */
@@ -196,43 +196,50 @@ struct vnode;
 struct cs_blob;
 struct fileglob;
 
-struct cscsr_functions  {
-       int             csr_version;
-#define CSCSR_VERSION 1
-       int             (*csr_validate_header)(const uint8_t *, size_t);
-       const void*     (*csr_find_file_codedirectory)(struct vnode *, const uint8_t *, size_t, size_t *);
-};
-
 __BEGIN_DECLS
 int    cs_enforcement(struct proc *);
 int    cs_require_lv(struct proc *);
 uint32_t cs_entitlement_flags(struct proc *p);
 int    cs_entitlements_blob_get(struct proc *, void **, size_t *);
+int    cs_restricted(struct proc *);
 uint8_t * cs_get_cdhash(struct proc *);
-void   cs_register_cscsr(struct cscsr_functions *);
 
-const  CS_GenericBlob *
-       cs_find_blob(struct cs_blob *, uint32_t, uint32_t);
+struct cs_blob * csproc_get_blob(struct proc *);
+struct cs_blob * csvnode_get_blob(struct vnode *, off_t);
+void            csvnode_print_debug(struct vnode *);
+
+const char *   csblob_get_teamid(struct cs_blob *);
+const char *   csblob_get_identity(struct cs_blob *);
+const uint8_t *        csblob_get_cdhash(struct cs_blob *);
+int            csblob_get_platform_binary(struct cs_blob *);
+unsigned int   csblob_get_flags(struct cs_blob *blob);
+int            csblob_get_entitlements(struct cs_blob *, void **, size_t *);
+const CS_GenericBlob *
+               csblob_find_blob(struct cs_blob *, uint32_t, uint32_t);
+const CS_GenericBlob *
+               csblob_find_blob_bytes(const uint8_t *, size_t, uint32_t, uint32_t);
+
+/*
+ * Mostly convenience functions below
+ */
 
-const  char * csblob_get_teamid(struct cs_blob *);
 const  char * csproc_get_teamid(struct proc *);
 const  char * csvnode_get_teamid(struct vnode *, off_t);
 int    csproc_get_platform_binary(struct proc *);
 const  char * csfg_get_teamid(struct fileglob *);
 int    csfg_get_path(struct fileglob *, char *, int *);
 int    csfg_get_platform_binary(struct fileglob *);
+uint8_t * csfg_get_cdhash(struct fileglob *, uint64_t, size_t *);
 
-__END_DECLS
+extern int cs_debug;
 
 #ifdef XNU_KERNEL_PRIVATE
 
 void   cs_init(void);
 int    cs_allow_invalid(struct proc *);
 int    cs_invalid_page(addr64_t);
-int    sigpup_install(user_addr_t);
-int    sigpup_drop(void);
+int    csproc_get_platform_path(struct proc *);
 
-extern int cs_debug;
 extern int cs_validation;
 #if !SECURE_KERNEL
 extern int cs_enforcement_panic;
@@ -240,6 +247,11 @@ extern int cs_enforcement_panic;
 
 #endif /* XNU_KERNEL_PRIVATE */
 
+
+__END_DECLS
+
+
+
 #endif /* KERNEL */
 
 #endif /* _SYS_CODESIGN_H_ */
index d35884f1bbcb7ee06c18637af6b53cfce1ee5cd5..20eae8b310c83e765fd4219d66796d2269eb9417 100644 (file)
 #define PROTECTION_CLASS_E 5
 #define PROTECTION_CLASS_F 6
 
+/*
+ * This forces open_dprotected_np to behave as though the file were created with
+ * the traditional open(2) semantics.
+ */
+#define PROTECTION_CLASS_DEFAULT  (-1)
+
 #endif /* PRIVATE */
 
 #endif /* _SYS_CONTENT_PROTECTION_H_ */
index 67cdd1e5754a936eb792b2f0153f0e18362cb82d..64285892155b96ed4875303d5201ae14ffa4b7f2 100644 (file)
 #ifndef _SYS_CPROTECT_H_
 #define        _SYS_CPROTECT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #if KERNEL_PRIVATE
 
 #include <sys/cdefs.h>
-#include <sys/content_protection.h>
-#include <sys/kernel_types.h>
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/kdebug.h>
 #include <crypto/aes.h>
+#include <stdbool.h>
 
-#include <sys/kdebug.h>
+__BEGIN_DECLS
 
 #define CP_CODE(code) FSDBG_CODE(DBG_CONTENT_PROT, code)
 /* 
@@ -67,16 +65,7 @@ enum {
 
 #endif
 
-
-
-#define CP_IV_KEYSIZE             20   /* 16x8 = 128, but SHA1 pushes 20 bytes so keep space for that */
-#define CP_MAX_KEYSIZE            32   /* 8x4 = 32, 32x8 = 256 */
-#define CP_MAX_CACHEBUFLEN        64   /* Maximum size of cp cache buffer/array */
-
 #define CP_MAX_WRAPPEDKEYSIZE     128  /* The size of the largest allowed key */
-#define CP_INITIAL_WRAPPEDKEYSIZE 40
-#define CP_V2_WRAPPEDKEYSIZE      40   /* Size of the wrapped key in a v2 EA */
-#define CP_V4_RESERVEDBYTES       20   /* Number of reserved bytes in EA still present */
 
 /* lock events from AppleKeyStore */
 #define CP_LOCKED_STATE           0    /* Device is locked */
@@ -84,48 +73,9 @@ enum {
 
 #define CP_MAX_STATE                     1     /* uint8_t ; maximum # of states is 255 */
 
-#define CP_LOCKED_KEYCHAIN        0
-#define CP_UNLOCKED_KEYCHAIN      1
-
-/* For struct cprotect: cp_flags */
-#define CP_NEEDS_KEYS             0x01 /* File needs persistent keys */
-#define CP_KEY_FLUSHED            0x02 /* File's unwrapped key has been purged from memory */
-#define CP_NO_XATTR               0x04 /* Key info has not been saved as EA to the FS */
-#define CP_OFF_IV_ENABLED         0x08 /* Only go down relative IV route if this flag is set */
-#define CP_RELOCATION_INFLIGHT    0x10 /* File with offset IVs is in the process of being relocated. */
-#define CP_SEP_WRAPPEDKEY                0x20  /* Wrapped key delivered from keybag */
-
-
-
-/* Content Protection VNOP Operation flags */
-#define CP_READ_ACCESS            0x1
-#define CP_WRITE_ACCESS           0x2
-
-/*
- * Check for this version when deciding to enable features
- * For iOS 4, CP_CURRENT_MAJOR_VERS = 2.0
- * For iOS 5, CP_CURRENT_MAJOR_VERS = 4.0
- */
-#define CONTENT_PROTECTION_XATTR_NAME  "com.apple.system.cprotect"
-#define CP_NEW_MAJOR_VERS         4
-#define CP_PREV_MAJOR_VERS        2
-#define CP_MINOR_VERS             0
-
-/* the class occupies the lowest 5 bits, so there are 32 values (0-31) */
-#define CP_EFFECTIVE_CLASSMASK 0x0000001f
-
-/* macros for quick access/typing to mask out the classmask */
-#define CP_CLASS(x) ((uint32_t)(CP_EFFECTIVE_CLASSMASK & (x)))
-
-#define CP_CRYPTO_G1   0x00000020
-
 typedef struct cprotect *cprotect_t;
 typedef struct cp_wrap_func *cp_wrap_func_t;
-typedef struct cp_xattr *cp_xattr_t;
-
-typedef struct cnode * cnode_ptr_t;
-//forward declare the struct.
-struct hfsmount;
+typedef struct cpx *cpx_t;
 
 /* Structures passed between HFS and AKS kext */
 typedef struct {
@@ -146,11 +96,14 @@ typedef struct {
 
 typedef cp_wrapped_key_s* cp_wrapped_key_t;
 
+typedef uint16_t cp_key_revision_t;
+
 typedef struct {
-       ino64_t  inode;
-       uint32_t volume;
-       pid_t    pid;
-       uid_t    uid;
+       ino64_t                         inode;
+       uint32_t                        volume;
+       pid_t                           pid;
+       uid_t                           uid;
+       cp_key_revision_t       key_revision;
 } cp_cred_s;
 
 typedef cp_cred_s* cp_cred_t;
@@ -170,41 +123,22 @@ typedef int backup_key_t(cp_cred_t access, const cp_wrapped_key_t wrapped_key_in
  */
 #define CP_RAW_KEY_WRAPPEDKEY  0x00000001
 
-
-/* 
- * Flags for Key Generation Behavior 
- *
- * These are passed to cp_generate_keys() and cp_new() in the 
- * flags arguments
- */
-#define CP_KEYWRAP_DIFFCLASS    0x00000001 /* wrapping with a different class bag is OK */
-
-
 /*
- * Runtime-only structure containing the content protection status
- * for the given file.  This is contained within the cnode
- * This is passed down to IOStorageFamily via the bufattr struct
- *
- ******************************************************
- * Some Key calculation information for offset based IV
- ******************************************************
- * Kf  = original 256 bit per file key
- * Kiv = SHA1(Kf), use full Kf, but truncate Kiv to 128 bits
- * Kiv can be cached in the cprotect, so it only has to be calculated once for the file init
- *
- * IVb = Encrypt(Kiv, offset)
- *
+ * Function prototypes for kexts to interface with our internal cprotect
+ * fields;  cpx provides opacity and allows us to modify behavior internally
+ * without requiring kext changes.
  */
-struct cprotect {
-       uint32_t        cp_flags;
-       uint32_t        cp_pclass;  /* persistent class stored on-disk */
-       aes_encrypt_ctx cp_cache_iv_ctx;
-       uint32_t        cp_cache_key_len;
-       uint8_t         cp_cache_key[CP_MAX_CACHEBUFLEN];
-       uint32_t        cp_persistent_key_len;
-       void*           cp_backing_cnode;
-       uint8_t         cp_persistent_key[];
-};
+cpx_t cpx_alloc(size_t key_size);
+void cpx_free(cpx_t);
+__attribute__((const)) size_t cpx_size(size_t key_size);
+__attribute__((pure)) bool cpx_is_sep_wrapped_key(const struct cpx *);
+void cpx_set_is_sep_wrapped_key(struct cpx *, bool);
+__attribute__((pure)) bool cpx_use_offset_for_iv(const struct cpx *);
+void cpx_set_use_offset_for_iv(struct cpx *, bool);
+__attribute__((pure)) uint16_t cpx_key_len(const struct cpx *);
+void cpx_set_key_len(struct cpx *, uint16_t key_len);
+__attribute__((pure)) void *cpx_key(const struct cpx *);
+aes_encrypt_ctx *cpx_iv_aes_ctx(struct cpx *);
 
 /* Structure to store pointers for AKS functions */
 struct cp_wrap_func {
@@ -215,100 +149,32 @@ struct cp_wrap_func {
        backup_key_t    *backup_key;
 };
 
-/*
- * On-disk structure written as the per-file EA payload
- * All on-disk multi-byte fields for the CP XATTR must be stored
- * little-endian on-disk.  This means they must be endian swapped to
- * L.E on getxattr() and converted to LE on setxattr().
- *
- * This structure is a fixed length and is tightly packed.
- * 56 bytes total.
- */
-struct cp_xattr_v2 {
-       u_int16_t xattr_major_version;
-       u_int16_t xattr_minor_version;
-       u_int32_t flags;
-       u_int32_t persistent_class;
-       u_int32_t key_size;
-       uint8_t   persistent_key[CP_V2_WRAPPEDKEYSIZE];
-} __attribute__((aligned(2), packed));
-
-
-/*
- * V4 Content Protection EA On-Disk Layout.
- *
- * This structure must be tightly packed, but the *size can vary*
- * depending on the length of the key.  At MOST, the key length will be
- * CP_MAX_WRAPPEDKEYSIZE, but the length is defined by the key_size field.
- *
- * Either way, the packing must be applied to ensure that the key data is
- * retrievable in the right location relative to the start of the struct.
- *
- * Fully packed, this structure can range from :
- *             MIN: 36 bytes (no key -- used with directories)
- *             MAX: 164 bytes (with 128 byte key)
- *
- * During runtime we always allocate with the full 128 byte key, but only
- * use as much of the key buffer as needed. It must be tightly packed, though.
- */
-
-struct cp_xattr_v4 {
-       u_int16_t xattr_major_version;
-       u_int16_t xattr_minor_version;
-       u_int32_t flags;
-       u_int32_t persistent_class;
-       u_int32_t key_size;
-       /* CP V4 Reserved Bytes == 20 */
-       u_int8_t reserved[CP_V4_RESERVEDBYTES];
-       /* All above fields are fixed regardless of key length (36 bytes) */
-       /* Max Wrapped Size == 128 */
-       uint8_t   persistent_key[CP_MAX_WRAPPEDKEYSIZE];
-} __attribute__((aligned(2), packed));
+int cp_key_store_action(int);
+int cp_register_wraps(cp_wrap_func_t);
 
+#ifdef BSD_KERNEL_PRIVATE
 
 /*
- * The Root Directory's EA (fileid 1) is special; it defines information about
- * what capabilities the filesystem is using.
- *
- * The data is still stored little endian.
- *
- * Note that this structure is tightly packed: 28 bytes total.
+ * Declarations that are not exported from the kernel but are used by
+ * VFS to call into the implementation (i.e. HFS) should be here.
  */
- struct cp_root_xattr {
-       u_int16_t major_version;
-       u_int16_t minor_version;
-       u_int64_t flags;
-       u_int8_t reserved[16];
-} __attribute__((aligned(2), packed));
 
+/* Content Protection VNOP Operation flags */
+#define CP_READ_ACCESS            0x1
+#define CP_WRITE_ACCESS           0x2
 
 /*
  * Functions to check the status of a CP and to query
  * the containing filesystem to see if it is supported.
  */
-int cp_vnode_getclass(vnode_t, int *);
-int cp_vnode_setclass(vnode_t, uint32_t);
-int cp_vnode_transcode(vnode_t vp, void *key, unsigned *len);
+struct vnode;
+struct hfsmount;
 
-int cp_key_store_action(int);
-int cp_register_wraps(cp_wrap_func_t);
+int cp_vnode_getclass(struct vnode *, int *);
+int cp_vnode_setclass(struct vnode *, uint32_t);
+int cp_vnode_transcode(struct vnode * vp, void *key, unsigned *len);
 
-int cp_entry_init(cnode_ptr_t, struct mount *);
-int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp);
-int cp_needs_tempkeys (struct hfsmount *hfsmp, int* needs);
-void cp_entry_destroy(struct cprotect *entry_ptr);
-void cp_replace_entry (struct cnode *cp, struct cprotect *newentry);
-cnode_ptr_t cp_get_protected_cnode(vnode_t);
-int cp_handle_vnop(vnode_t, int, int);
-int cp_fs_protected (mount_t);
-int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr);
-int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr);
-int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options);
-int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, 
-               uint32_t flags, struct cprotect **newentry);
-int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, 
-               mode_t cmode, struct cprotect **tmpentry);
-int cp_handle_relocate (cnode_ptr_t cp, struct hfsmount *hfsmp);
+int cp_handle_vnop(struct vnode *, int, int);
 int cp_handle_open(struct vnode *vp, int mode);
 int cp_get_root_major_vers (struct vnode *vp, uint32_t *level);
 int cp_get_default_level (struct vnode *vp, uint32_t *level);
@@ -317,11 +183,9 @@ int cp_set_trimmed(struct hfsmount *hfsmp);
 int cp_set_rewrapped(struct hfsmount *hfsmp);
 int cp_flop_generation (struct hfsmount *hfsmp);
 
+#endif /* BSD_KERNEL_PRIVATE */
 
-#endif /* KERNEL_PRIVATE */
-
-#ifdef __cplusplus
-};
-#endif
+__END_DECLS
 
+#endif /* KERNEL_PRIVATE */
 #endif /* !_SYS_CPROTECT_H_ */
index f418eef6821511051a103dc2b0355b746153c55c..b2f59f1c3104e4ef70cecc736258dab399f3b8db 100644 (file)
@@ -47,6 +47,7 @@ typedef uint32_t csr_op_t;
 #define CSR_ALLOW_DESTRUCTIVE_DTRACE   (1 << 5) /* name deprecated */
 #define CSR_ALLOW_UNRESTRICTED_DTRACE  (1 << 5)
 #define CSR_ALLOW_UNRESTRICTED_NVRAM   (1 << 6)
+#define CSR_ALLOW_DEVICE_CONFIGURATION (1 << 7)
 
 #define CSR_VALID_FLAGS (CSR_ALLOW_UNTRUSTED_KEXTS | \
                          CSR_ALLOW_UNRESTRICTED_FS | \
@@ -54,15 +55,25 @@ typedef uint32_t csr_op_t;
                          CSR_ALLOW_KERNEL_DEBUGGER | \
                          CSR_ALLOW_APPLE_INTERNAL | \
                          CSR_ALLOW_UNRESTRICTED_DTRACE | \
-                         CSR_ALLOW_UNRESTRICTED_NVRAM)
+                         CSR_ALLOW_UNRESTRICTED_NVRAM | \
+                         CSR_ALLOW_DEVICE_CONFIGURATION)
+
+
+/* CSR capabilities that a booter can give to the system */
+#define CSR_CAPABILITY_UNLIMITED                               (1 << 0)
+#define CSR_CAPABILITY_CONFIG                                  (1 << 1)
+#define CSR_CAPABILITY_APPLE_INTERNAL                  (1 << 2)
+
+#define CSR_VALID_CAPABILITIES (CSR_CAPABILITY_UNLIMITED | CSR_CAPABILITY_CONFIG | CSR_CAPABILITY_APPLE_INTERNAL)
 
 #ifdef PRIVATE
 /* Private system call interface between Libsyscall and xnu */
 
 /* Syscall flavors */
-#define CSR_OP_CHECK 0
-#define CSR_OP_GET_ACTIVE_CONFIG 1
-#define CSR_OP_GET_PENDING_CONFIG 2
+enum csr_syscalls {
+       CSR_SYSCALL_CHECK,
+       CSR_SYSCALL_GET_ACTIVE_CONFIG,
+};
 
 #endif /* PRIVATE */
 
@@ -79,7 +90,6 @@ void csr_set_allow_all(int value);
 /* Syscalls */
 int csr_check(csr_config_t mask);
 int csr_get_active_config(csr_config_t *config);
-int csr_get_pending_config(csr_config_t *config);
 
 __END_DECLS
 
index 48c1ab9cc773cf97d4a3ecae825acd26b4bccfdc..6a47180167792148f8a4da970c34918a2c281d23 100644 (file)
@@ -42,7 +42,7 @@
  * ioctl                                 description
  * ------------------------------------- ---------------------------------------
  * DKIOCEJECT                            eject media
- * DKIOCSYNCHRONIZECACHE                 flush media
+ * DKIOCSYNCHRONIZE                      flush media
  *
  * DKIOCFORMAT                           format media
  * DKIOCGETFORMATCAPACITIES              get media's formattable capacities
  * DKIOCGETCOMMANDPOOLSIZE               get device's queue depth
  */
 
+#define DK_FEATURE_BARRIER                    0x00000002
 #define DK_FEATURE_PRIORITY                   0x00000004
 #define DK_FEATURE_UNMAP                      0x00000010
 
+#define DK_SYNCHRONIZE_OPTION_BARRIER         0x00000002
+
 typedef struct
 {
     uint64_t               offset;
@@ -109,6 +112,16 @@ typedef struct
 #endif /* !__LP64__ */
 } dk_format_capacities_t;
 
+typedef struct
+{
+    uint64_t               offset;
+    uint64_t               length;
+
+    uint32_t               options;
+
+    uint8_t                reserved0160[4];        /* reserved, clear to zero */
+} dk_synchronize_t;
+
 typedef struct
 {
     dk_extent_t *          extents;
@@ -122,6 +135,21 @@ typedef struct
 } dk_unmap_t;
 
 
+typedef struct
+{
+       uint64_t           flags;
+       uint64_t           hotfile_size;           /* in bytes */
+       uint64_t           hibernate_minsize;
+       uint64_t           swapfile_pinning;
+
+       uint64_t           padding[4];
+} dk_corestorage_info_t;
+
+#define DK_CORESTORAGE_PIN_YOUR_METADATA        0x00000001
+#define DK_CORESTORAGE_ENABLE_HOTFILES          0x00000002
+#define DK_CORESTORAGE_PIN_YOUR_SWAPFILE        0x00000004
+
+
 #ifdef KERNEL
 #ifdef PRIVATE
 
@@ -132,7 +160,7 @@ typedef struct
 #endif /* KERNEL */
 
 #define DKIOCEJECT                            _IO('d', 21)
-#define DKIOCSYNCHRONIZECACHE                 _IO('d', 22)
+#define DKIOCSYNCHRONIZE                      _IOW('d', 22, dk_synchronize_t)
 
 #define DKIOCFORMAT                           _IOW('d', 26, dk_format_capacity_t)
 #define DKIOCGETFORMATCAPACITIES              _IOWR('d', 26, dk_format_capacities_t)
@@ -146,7 +174,7 @@ typedef struct
 
 #define DKIOCREQUESTIDLE                      _IO('d', 30)
 #define DKIOCUNMAP                            _IOW('d', 31, dk_unmap_t)
-#define _DKIOCCORESTORAGE                    _IO('d', 32)
+#define DKIOCCORESTORAGE                      _IOR('d', 32, dk_corestorage_info_t)
 
 #define DKIOCGETMAXBLOCKCOUNTREAD             _IOR('d', 64, uint64_t)
 #define DKIOCGETMAXBLOCKCOUNTWRITE            _IOR('d', 65, uint64_t)
@@ -165,6 +193,8 @@ typedef struct
 #define DKIOCGETPHYSICALBLOCKSIZE             _IOR('d', 77, uint32_t)
 #define DKIOCGETCOMMANDPOOLSIZE               _IOR('d', 78, uint32_t)
 
+#define DKIOCSYNCHRONIZECACHE                 _IO('d', 22)
+
 #ifdef KERNEL
 #define DK_FEATURE_FORCE_UNIT_ACCESS          0x00000001
 
@@ -202,7 +232,6 @@ typedef struct
 #endif /* !__LP64__ */
 } dk_set_tier_t;
 
-#define DKIOCGETBLOCKCOUNT32                  _IOR('d', 25, uint32_t)
 #define DKIOCSETBLOCKSIZE                     _IOW('d', 24, uint32_t)
 #define DKIOCGETBSDUNIT                       _IOR('d', 27, uint32_t)
 #define DKIOCISSOLIDSTATE                     _IOR('d', 79, uint32_t)
@@ -234,8 +263,16 @@ typedef struct _dk_cs_pin {
        dk_extent_t     cp_extent;
        int64_t         cp_flags;
 } _dk_cs_pin_t;
-#define _DKIOCCSPINFORHIBERNATION       (1 << 0)
-#define _DKIOCCSPINDISCARDBLACKLIST     (1 << 1)
+/* The following are modifiers to _DKIOCCSPINEXTENT/cp_flags operation */
+#define _DKIOCCSPINTOFASTMEDIA          (0)                    /* Pin extent to the fast (SSD) media             */
+#define _DKIOCCSPINFORHIBERNATION       (1 << 0)       /* Pin of hibernation file, content not preserved */
+#define _DKIOCCSPINDISCARDBLACKLIST     (1 << 1)       /* Hibernation complete/error, stop blacklisting  */
+#define _DKIOCCSPINTOSLOWMEDIA          (1 << 2)       /* Pin extent to the slow (HDD) media             */
+#define _DKIOCCSTEMPORARYPIN            (1 << 3)       /* Relocate, but do not pin, to indicated media   */
+#define _DKIOCCSHIBERNATEIMGSIZE        (1 << 4)       /* Anticipate/Max size of the upcoming hibernate  */
+#define _DKIOCCSPINFORSWAPFILE          (1 << 5)       /* Pin of swap file, content not preserved        */
+
+#define _DKIOCCSSETLVNAME                     _IOW('d', 198, char[256])
 #define _DKIOCCSPINEXTENT                     _IOW('d', 199, _dk_cs_pin_t)
 #define _DKIOCCSUNPINEXTENT                   _IOW('d', 200, _dk_cs_pin_t)
 #define _DKIOCGETMIGRATIONUNITBYTESIZE        _IOR('d', 201, uint32_t)
@@ -252,8 +289,7 @@ typedef struct _dk_cs_unmap {
 } _dk_cs_unmap_t;
 
 #define _DKIOCCSMAP                           _IOWR('d', 202, _dk_cs_map_t)
-#define _DKIOCCSSETFSVNODE                    _IOW('d', 203, vnode_t)
-#define _DKIOCCSGETFREEBYTES                  _IOR('d', 204, uint64_t)
+// No longer used: _DKIOCCSSETFSVNODE (203) & _DKIOCCSGETFREEBYTES (204)
 #define        _DKIOCCSUNMAP                         _IOWR('d', 205, _dk_cs_unmap_t)
 #endif /* PRIVATE */
 #endif /* KERNEL */
index 3fe847cdf041dc493908fb2bf955b6e2abd4176e..fa77f963b722c2e6a03f6027f9e1c46dc350a054 100644 (file)
@@ -175,11 +175,11 @@ extern void net_drain_domains(void);
 extern void domain_proto_mtx_lock_assert_held(void);
 extern void domain_proto_mtx_lock_assert_notheld(void);
 struct domain_guard;
-typedef struct domain_guard *domain_guard_t;
+typedef const struct domain_guard *domain_guard_t;
 extern domain_guard_t domain_guard_deploy(void);
 extern void domain_guard_release(domain_guard_t);
 struct domain_unguard;
-typedef struct domain_unguard *domain_unguard_t;
+typedef const struct domain_unguard *domain_unguard_t;
 extern domain_unguard_t domain_unguard_deploy(void);
 extern void domain_unguard_release(domain_unguard_t);
 extern struct domain_old *pffinddomain_old(int);
index 3e39fca6a4be23023d72ba0f48d64d1db1746795..debf2f76764089b574bb27684d0ee5813a1a0265 100644 (file)
@@ -20,7 +20,8 @@
  */
 
 /*
- * Portions copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Portions copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Portions Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -340,6 +341,7 @@ typedef enum dtrace_probespec {
 #define DIF_VAR_PTHREAD_SELF   0x0200  /* Apple specific PTHREAD_SELF (Not currently supported!) */
 #define DIF_VAR_DISPATCHQADDR  0x0201  /* Apple specific dispatch queue addr */
 #define DIF_VAR_MACHTIMESTAMP  0x0202  /* mach_absolute_timestamp() */
+#define DIF_VAR_CPU            0x0203  /* cpu number */
 #endif /* __APPLE __ */
 
 #define        DIF_SUBR_RAND                   0
@@ -388,13 +390,14 @@ typedef enum dtrace_probespec {
 #define        DIF_SUBR_INET_NTOA6             43
 #define        DIF_SUBR_TOUPPER                44
 #define        DIF_SUBR_TOLOWER                45
+#define        DIF_SUBR_VM_KERNEL_ADDRPERM     46
 #if !defined(__APPLE__)
 
-#define DIF_SUBR_MAX                   45      /* max subroutine value */
+#define DIF_SUBR_MAX                   46      /* max subroutine value */
 #else
-#define DIF_SUBR_COREPROFILE           46
+#define DIF_SUBR_COREPROFILE           47
 
-#define DIF_SUBR_MAX                   46      /* max subroutine value */
+#define DIF_SUBR_MAX                   47      /* max subroutine value */
 #endif /* __APPLE__ */
 
 typedef uint32_t dif_instr_t;
@@ -457,6 +460,7 @@ typedef struct dtrace_diftype {
 #define DIF_TYPE_STRING         1       /* type is a D string */
 
 #define DIF_TF_BYREF            0x1     /* type is passed by reference */
+#define DIF_TF_BYUREF           0x2     /* user type is passed by reference */
 
 /*
  * A DTrace Intermediate Format variable record is used to describe each of the
@@ -1447,7 +1451,10 @@ typedef struct dtrace_module_uuids_list {
 #define DTRACE_MODULE_UUIDS_LIST_SIZE(count) (sizeof(dtrace_module_uuids_list_t) + ((count - 1) * sizeof(UUID)))
 
 typedef struct dtrace_procdesc {
-       char            p_comm[MAXCOMLEN+1];
+       /* Must be specified by user-space */
+       char            p_name[128];
+       /* Set or modified by the Kernel */
+       int             p_name_length;
        pid_t           p_pid;
 } dtrace_procdesc_t;
 
index cbb14c0abd651fcade830c0dbb5fff44da1acf20..02065c15aad99aaee7dac4032c25b1fea2bd7bf1 100644 (file)
@@ -1357,7 +1357,10 @@ extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, volatile uint16_t *);
 /*
  * DTrace restriction checks
  */
+extern void dtrace_restriction_policy_load(void);
 extern boolean_t dtrace_is_restricted(void);
+extern boolean_t dtrace_is_running_apple_internal(void);
+extern boolean_t dtrace_fbt_probes_restricted(void);
 extern boolean_t dtrace_can_attach_to_proc(proc_t);
 
 /*
index 44cef5438af4fb00afefeb29a56aabd303fd1b32..00635c1b46695b9f390596e272bef60712f304d6 100644 (file)
@@ -112,6 +112,16 @@ struct user32_kevent {
        user32_addr_t   udata;          /* opaque user data identifier */
 };
 
+struct kevent_internal_s {
+       uint64_t        ident;          /* identifier for this event */
+       int16_t         filter;         /* filter for event */
+       uint16_t        flags;          /* general flags */
+       uint32_t        fflags;         /* filter-specific flags */
+       int64_t         data;           /* filter-specific data */
+       uint64_t        udata;          /* opaque user data identifier */
+       uint64_t        ext[2];         /* filter-specific extensions */
+};
+
 #endif
 
 #pragma pack()
@@ -126,6 +136,20 @@ struct kevent64_s {
        uint64_t        ext[2];         /* filter-specific extensions */
 };
 
+#ifdef PRIVATE
+struct kevent_qos_s {
+       uint64_t        ident;          /* identifier for this event */
+       int16_t         filter;         /* filter for event */
+       uint16_t        flags;          /* general flags */
+       int32_t         qos;            /* quality of service */
+       uint64_t        udata;          /* opaque user data identifier */
+       uint32_t        fflags;         /* filter-specific flags */
+       uint32_t        xflags;         /* extra filter-specific flags */
+       int64_t         data;           /* filter-specific data */
+       uint64_t        ext[4];         /* filter-specific extensions */
+};
+#endif /* PRIVATE */
+
 #define EV_SET(kevp, a, b, c, d, e, f) do {    \
        struct kevent *__kevp__ = (kevp);       \
        __kevp__->ident = (a);                  \
@@ -148,34 +172,73 @@ struct kevent64_s {
        __kevp__->ext[1] = (h);                         \
 } while(0)
 
+
+/* kevent system call flags */
+#define KEVENT_FLAG_NONE               0x00    /* no flag value */
+#define KEVENT_FLAG_IMMEDIATE          0x01    /* immediate timeout */
+#define KEVENT_FLAG_ERROR_EVENTS       0x02    /* output events only include change errors */
+
+#ifdef PRIVATE
+
+#define EV_SET_QOS 0
+/*
+ * Rather than provide an EV_SET_QOS macro for kevent_qos_t structure
+ * initialization, we encourage use of named field initialization support
+ * instead.
+ */
+
+#define KEVENT_FLAG_STACK_EVENTS       0x04    /* output events treated as stack (grows down) */
+#define KEVENT_FLAG_STACK_DATA         0x08    /* output data allocated as stack (grows down) */
+#define KEVENT_FLAG_WORKQ               0x20   /* interact with the default workq kq */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+#define KEVENT_FLAG_LEGACY32            0x40   /* event data in legacy 32-bit format */
+#define KEVENT_FLAG_LEGACY64            0x80   /* event data in legacy 64-bit format */
+
+#define KEVENT_FLAG_USER       (KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS | \
+                                KEVENT_FLAG_STACK_EVENTS | KEVENT_FLAG_STACK_DATA | \
+                                KEVENT_FLAG_WORKQ)
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* PRIVATE */
+
 /* actions */
-#define EV_ADD         0x0001          /* add event to kq (implies enable) */
-#define EV_DELETE      0x0002          /* delete event from kq */
-#define EV_ENABLE      0x0004          /* enable event */
-#define EV_DISABLE     0x0008          /* disable event (not reported) */
-#define EV_RECEIPT     0x0040          /* force EV_ERROR on success, data == 0 */
+#define EV_ADD                 0x0001          /* add event to kq (implies enable) */
+#define EV_DELETE              0x0002          /* delete event from kq */
+#define EV_ENABLE              0x0004          /* enable event */
+#define EV_DISABLE             0x0008          /* disable event (not reported) */
 
 /* flags */
-#define EV_ONESHOT     0x0010          /* only report one occurrence */
-#define EV_CLEAR       0x0020          /* clear event state after reporting */
-#define EV_DISPATCH     0x0080          /* disable event after reporting */
+#define EV_ONESHOT             0x0010          /* only report one occurrence */
+#define EV_CLEAR               0x0020          /* clear event state after reporting */
+#define EV_RECEIPT             0x0040          /* force EV_ERROR on success, data == 0 */
+#define EV_DISPATCH     0x0080      /* disable event after reporting */
+
+#define EV_UDATA_SPECIFIC      0x0100          /* unique kevent per udata value */
+                                            /* ... in combination with EV_DELETE */
+                                            /* will defer delete until udata-specific */
+                                            /* event enabled. EINPROGRESS will be */
+                                            /* returned to indicate the deferral */
 
-#define EV_SYSFLAGS    0xF000          /* reserved by system */
-#define EV_FLAG0       0x1000          /* filter-specific flag */
-#define EV_FLAG1       0x2000          /* filter-specific flag */
+#define EV_DISPATCH2           (EV_DISPATCH | EV_UDATA_SPECIFIC)
+
+#define EV_SYSFLAGS            0xF000          /* reserved by system */
+#define EV_FLAG0               0x1000          /* filter-specific flag */
+#define EV_FLAG1               0x2000          /* filter-specific flag */
 
 /* returned values */
-#define EV_EOF         0x8000          /* EOF detected */
-#define EV_ERROR       0x4000          /* error, data contains errno */
+#define EV_EOF                 0x8000          /* EOF detected */
+#define EV_ERROR               0x4000          /* error, data contains errno */
 
 /*
  * Filter specific flags for EVFILT_READ
  *
  * The default behavior for EVFILT_READ is to make the "read" determination
- * relative to the current file descriptor read pointer. The EV_POLL
- * flag indicates the determination should be made via poll(2) semantics
- * (which always returns true for regular files - regardless of the amount
- * of unread data in the file).
+ * relative to the current file descriptor read pointer. 
+ *
+ * The EV_POLL flag indicates the determination should be made via poll(2)
+ * semantics. These semantics dictate always returning true for regular files,
+ * regardless of the amount of unread data in the file.  
  *
  * On input, EV_OOBAND specifies that filter should actively return in the
  * presence of OOB on the descriptor. It implies that filter will return
@@ -192,7 +255,7 @@ struct kevent64_s {
  * number of bytes before the current OOB marker, else data count is the number
  * of bytes beyond OOB marker.
  */
-#define EV_POLL        EV_FLAG0
+#define EV_POLL                EV_FLAG0
 #define EV_OOBAND      EV_FLAG1
 
 /*
@@ -225,6 +288,7 @@ struct kevent64_s {
  * realtive to the current file descriptor read pointer.
  */
 #define NOTE_LOWAT     0x00000001              /* low water mark */
+
 /*
  * data/hint fflags for EVFILT_VNODE, shared with userspace
  */
@@ -356,6 +420,12 @@ typedef enum vm_pressure_level {
 #define        NOTE_DISCONNECTED       0x00001000 /* socket is disconnected */
 #define        NOTE_CONNINFO_UPDATED   0x00002000 /* connection info was updated */
 
+#define        EVFILT_SOCK_LEVEL_TRIGGER_MASK \
+    (NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_SUSPEND | NOTE_RESUME | NOTE_CONNECTED | NOTE_DISCONNECTED)
+
+#define EVFILT_SOCK_ALL_MASK \
+    (NOTE_CONNRESET | NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_TIMEOUT | NOTE_NOSRCADDR | NOTE_IFDENIED | NOTE_SUSPEND | NOTE_RESUME | NOTE_KEEPALIVE | NOTE_ADAPTIVE_WTIMO | NOTE_ADAPTIVE_RTIMO | NOTE_CONNECTED | NOTE_DISCONNECTED | NOTE_CONNINFO_UPDATED)
+
 #endif /* PRIVATE */
 
 /*
@@ -373,6 +443,19 @@ typedef enum vm_pressure_level {
  * receive the message and the requested (or default) message trailers.  In addition,
  * the fflags field contains the return code normally returned by mach_msg().
  *
+ * If MACH_RCV_MSG is specified, and the ext[1] field specifies a zero length, the
+ * system call argument specifying an ouput area (kevent_qos) will be consulted. If
+ * the system call specified an output data area, the user-space address
+ * of the received message is carved from that provided output data area (if enough
+ * space remains there). The address and length of each received message is 
+ * returned in the ext[0] and ext[1] fields (respectively) of the corresponding kevent.
+ *
+ * IF_MACH_RCV_VOUCHER_CONTENT is specified, the contents of the message voucher is
+ * extracted (as specified in the xflags field) and stored in ext[2] up to ext[3]
+ * length.  If the input length is zero, and the system call provided a data area,
+ * the space for the voucher content is carved from the provided space and its
+ * address and length is returned in ext[2] and ext[3] respectively.
+ *
  * If no message receipt options were provided in the fflags field on setup, no
  * message is received by this call. Instead, on output, the data field simply
  * contains the name of the actual port detected with a message waiting.
@@ -410,9 +493,10 @@ TAILQ_HEAD(kqtailq, knote);        /* a list of "queued" events */
 
 struct knote {
        int             kn_inuse;       /* inuse count */
-       struct kqtailq  *kn_tq;         /* pointer to tail queue */
+       int             kn_hookid;
        TAILQ_ENTRY(knote)      kn_tqe;         /* linkage for tail queue */
-       struct kqueue   *kn_kq; /* which kqueue we are on */
+       struct kqtailq          *kn_tq;         /* pointer to tail queue */
+       struct kqueue           *kn_kq;         /* which kqueue we are on */
        SLIST_ENTRY(knote)      kn_link;        /* linkage for search list */
        SLIST_ENTRY(knote)      kn_selnext;     /* klist element chain */
        union {
@@ -423,10 +507,12 @@ struct knote {
        struct                  filterops *kn_fop;
        int                     kn_status;      /* status bits */
        int                     kn_sfflags;     /* saved filter flags */
-       struct                  kevent64_s kn_kevent;
-       void                    *kn_hook;
-       int                     kn_hookid;
+       union {
+               void            *kn_hook;
+               uint64_t        kn_hook_data;
+       };
        int64_t                 kn_sdata;       /* saved data field */
+       struct                  kevent_internal_s kn_kevent;
 
 #define KN_ACTIVE      0x01                    /* event has been triggered */
 #define KN_QUEUED      0x02                    /* event is on queue */
@@ -435,13 +521,17 @@ struct knote {
 #define KN_USEWAIT     0x10                    /* wait for knote use */
 #define KN_ATTACHING   0x20                    /* event is pending attach */
 #define KN_STAYQUEUED  0x40                    /* force event to stay on queue */
+#define KN_DEFERDROP   0x80                    /* defer drop until re-enabled */
+#define KN_TOUCH       0x100                   /* Always call f_touch callback */
 
 #define kn_id          kn_kevent.ident
 #define kn_filter      kn_kevent.filter
 #define kn_flags       kn_kevent.flags
+#define kn_qos         kn_kevent.qos
+#define kn_udata       kn_kevent.udata
 #define kn_fflags      kn_kevent.fflags
+#define kn_xflags      kn_kevent.xflags
 #define kn_data                kn_kevent.data
-#define kn_udata       kn_kevent.udata
 #define kn_ext         kn_kevent.ext
 #define kn_fp          kn_ptr.p_fp
 };
@@ -456,13 +546,13 @@ struct filterops {
        void    (*f_detach)(struct knote *kn);
        int     (*f_event)(struct knote *kn, long hint);
        /* Optional f_touch operation, called only if !f_isfd && non-NULL */
-       void    (*f_touch)(struct knote *kn, struct kevent64_s *kev, long type);
+       void    (*f_touch)(struct knote *kn, struct kevent_internal_s *kev, long type);
        /* Optional f_peek operation, called only if KN_STAYQUEUED is set */
        unsigned (*f_peek)(struct knote *kn);
 };
 
 struct proc;
-struct wait_queue;
+struct waitq;
 
 SLIST_HEAD(klist, knote);
 extern void    knote_init(void);
@@ -476,27 +566,45 @@ extern void       klist_init(struct klist *list);
 extern void    knote(struct klist *list, long hint);
 extern int     knote_attach(struct klist *list, struct knote *kn);
 extern int     knote_detach(struct klist *list, struct knote *kn);
-extern int     knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql);  
-extern int     knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp);
+extern int     knote_link_waitq(struct knote *kn, struct waitq *wq, uint64_t *reserved_link);
+extern int     knote_unlink_waitq(struct knote *kn, struct waitq *wq);
 extern void    knote_fdclose(struct proc *p, int fd);
 extern void    knote_markstayqueued(struct knote *kn);
 extern void    knote_clearstayqueued(struct knote *kn);
+
+extern int     kevent_qos_internal(struct proc *p, int fd, 
+                           user_addr_t changelist, int nchanges,
+                           user_addr_t eventlist, int nevents,
+                           user_addr_t data_out, user_size_t *data_available,
+                           unsigned int flags, int32_t *retval);
 #endif /* !KERNEL_PRIVATE */
 
 #else  /* KERNEL */
 
+#include <sys/types.h>
 
 struct timespec;
 
 __BEGIN_DECLS
 int     kqueue(void);
-int     kevent(int kq, const struct kevent *changelist, int nchanges,
-                   struct kevent *eventlist, int nevents,
-                   const struct timespec *timeout);
-int     kevent64(int kq, const struct kevent64_s *changelist, 
-                   int nchanges, struct kevent64_s *eventlist, 
-                   int nevents, unsigned int flags, 
-                   const struct timespec *timeout);
+int     kevent(int kq, 
+              const struct kevent *changelist, int nchanges,
+              struct kevent *eventlist, int nevents,
+              const struct timespec *timeout);
+int     kevent64(int kq, 
+                const struct kevent64_s *changelist, int nchanges,
+                struct kevent64_s *eventlist, int nevents,
+                unsigned int flags, 
+                const struct timespec *timeout);
+
+#ifdef PRIVATE
+int     kevent_qos(int kq, 
+                  const struct kevent_qos_s *changelist, int nchanges,
+                  struct kevent_qos_s *eventlist, int nevents,
+                  void *data_out, size_t *data_available,
+                  unsigned int flags);
+#endif /* PRIVATE */
+
 __END_DECLS
 
 
index 29adde75d66279bfbae1fd9954d0de583ca8f31f..6ce00103af1464ffae7be54842168dfe4c28cc4e 100644 (file)
 #include <sys/event.h>
 #include <sys/select.h>
 #include <kern/kern_types.h>
+#include <kern/waitq.h>
 
 #define KQ_NEVENTS     16              /* minimize copy{in,out} calls */
 #define KQEXTENT       256             /* linear growth by this amount */
 
 struct kqueue {
-       wait_queue_set_t kq_wqs;        /* private wait queue set */
+       struct waitq_set *kq_wqs;       /* private waitq set */
        decl_lck_spin_data( ,kq_lock)   /* kqueue lock */
        int             kq_state;
        int             kq_count;       /* number of queued events */
@@ -79,15 +80,17 @@ struct kqueue {
 #define KQ_PROCWAIT    0x04
 #define KQ_KEV32       0x08
 #define KQ_KEV64       0x10
+#define KQ_KEV_QOS     0x20
+#define KQ_WORKQ       0x40
 };
 
 extern struct kqueue *kqueue_alloc(struct proc *);
 extern void kqueue_dealloc(struct kqueue *);
 
-typedef int (*kevent_callback_t)(struct kqueue *, struct kevent64_s *, void *);
+typedef int (*kevent_callback_t)(struct kqueue *, struct kevent_internal_s *, void *);
 typedef void (*kqueue_continue_t)(struct kqueue *, void *, int);
 
-extern int kevent_register(struct kqueue *, struct kevent64_s *, struct proc *);
+extern int kevent_register(struct kqueue *, struct kevent_internal_s *, struct proc *);
 extern int kqueue_scan(struct kqueue *, kevent_callback_t, kqueue_continue_t,
                       void *, struct timeval *, struct proc *);
 extern int kqueue_stat(struct kqueue *, void *, int, proc_t);
index e8dcd03fd5fa75320532755fcd79bfd0414e5fbb..08ad4e546f54a13b2bc0e5bc9e4e0590c44402b5 100644 (file)
 #define O_CLOFORK      0x8000000       /* implicitly set FD_CLOFORK */
 #endif
 
+#ifdef KERNEL
+#define FUNENCRYPTED   0x10000000
+#endif
+
 /* Data Protection Flags */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 #define O_DP_GETRAWENCRYPTED   0x0001
+#define O_DP_GETRAWUNENCRYPTED 0x0002
 #endif
 
 
  */
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define CPF_OVERWRITE 1
-#define CPF_IGNORE_MODE 2
+#define CPF_OVERWRITE    0x0001
+#define CPF_IGNORE_MODE  0x0002
 #define CPF_MASK (CPF_OVERWRITE|CPF_IGNORE_MODE)
 #endif
 
 
 #define F_ADDFILESIGS_FOR_DYLD_SIM 83  /* Add signature from same file, only if it is signed by Apple (used by dyld for simulator) */
 
+#ifdef PRIVATE
+#define F_RECYCLE                      84      /* Recycle vnode; debug/development builds only */
+#endif
+
+#define F_BARRIERFSYNC         85      /* fsync + issue barrier to drive */
+
+#ifdef PRIVATE
+#define F_OFD_SETLK            90      /* Acquire or release open file description lock */
+#define F_OFD_SETLKW           91      /* (as F_OFD_SETLK but blocking if conflicting lock) */
+#define F_OFD_GETLK            92      /* Examine OFD lock */
+
+#define F_OFD_SETLKWTIMEOUT    93      /* (as F_OFD_SETLKW but return if timeout) */
+#define F_OFD_GETLKPID         94      /* get record locking information */
+
+#define F_SETCONFINED          95      /* "confine" OFD to process */
+#define F_GETCONFINED          96      /* is-fd-confined? */
+#endif
+
+#define F_ADDFILESIGS_RETURN   97      /* Add signature from same file, return end offset in structure on sucess */
+
 
 // FS-specific fcntl()'s numbers begin at 0x00010000 and go up
 #define FCNTL_FS_SPECIFIC_BASE  0x00010000
 #define        F_PROV          0x080           /* Non-coalesced provisional lock */
 #define F_WAKE1_SAFE    0x100           /* its safe to only wake one waiter */
 #define        F_ABORT         0x200           /* lock attempt aborted (force umount) */
+#define        F_OFD_LOCK      0x400           /* Use "OFD" semantics for lock */
 #endif
 
 #if PRIVATE
index aaf2485549f2f5af29cb11420c39da5dc289a183..172aa3d04db81ffa722ecfab7bc87a1bc4837f1c 100644 (file)
@@ -92,7 +92,7 @@ struct fileproc {
        unsigned int f_flags;
        int32_t f_iocount;
        struct fileglob * f_fglob;
-       void *  f_waddr;
+       void *f_wset;
 };
 
 #define FILEPROC_NULL (struct fileproc *)0
@@ -157,6 +157,8 @@ typedef enum {
 #define FG_NOSIGPIPE   0x40    /* don't deliver SIGPIPE with EPIPE return */
 #define FG_OFF_LOCKED  0x80    /* Used as a mutex for offset changes (for vnodes) */
 #define FG_OFF_LOCKWANT 0x100  /* Somebody's wating for the lock */
+#define FG_CONFINED    0x200   /* fileglob confined to process, immutably */
+#define FG_HAS_OFDLOCK 0x400   /* Has or has had an OFD lock */
 
 struct fileglob {
        LIST_ENTRY(fileglob) f_msglist;/* list of active files */
@@ -243,14 +245,16 @@ int open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
     int32_t *retval);
 int kqueue_body(struct proc *p, fp_allocfn_t, void *cra, int32_t *retval);
 void fg_insertuipc(struct fileglob * fg);
+boolean_t fg_insertuipc_mark(struct fileglob * fg);
 void fg_removeuipc(struct fileglob * fg);
+boolean_t fg_removeuipc_mark(struct fileglob * fg);
 void unp_gc_wait(void);
 void procfdtbl_reservefd(struct proc * p, int fd);
 void procfdtbl_markclosefd(struct proc * p, int fd);
 void procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp);
 void procfdtbl_waitfd(struct proc * p, int fd);
 void procfdtbl_clearfd(struct proc * p, int fd);
-boolean_t filetype_issendable(file_type_t type);
+boolean_t file_issendable(struct proc * p, struct fileproc *fp);
 extern int fdgetf_noref(proc_t, int, struct fileproc **);
 extern struct fileproc *fileproc_alloc_init(void *crargs);
 extern void fileproc_free(struct fileproc *fp);
index b5b80a6bc6814d8de000decc4c6b2580343701b1..41dc190b5a3d4808119a660eb4488a51618894c0 100644 (file)
@@ -145,6 +145,16 @@ extern int fdavail(proc_t p, int n);
                        (&(p)->p_fd->fd_ofiles[(fd)])
 #define                fdflags(p, fd)                                  \
                        (&(p)->p_fd->fd_ofileflags[(fd)])
+
+/*
+ * Accesor macros for fd flags
+ */
+#define FDFLAGS_GET(p, fd) (*fdflags(p, fd) & (UF_EXCLOSE|UF_FORKCLOSE))
+#define FDFLAGS_SET(p, fd, bits) \
+          (*fdflags(p, fd) |= ((bits) & (UF_EXCLOSE|UF_FORKCLOSE)))
+#define FDFLAGS_CLR(p, fd, bits) \
+          (*fdflags(p, fd) &= ~((bits) & (UF_EXCLOSE|UF_FORKCLOSE)))
+
 extern int     falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx);
 
 #ifdef __APPLE_API_PRIVATE
index f1208ffc3f17a7a9fb354e6633bfbbeacaabd02b..16fb2242585c584ba8d691e1ecde4c8304badca0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define FSE_TRUNCATED_PATH     (1 << 28)    // the path for this item had to be truncated
 
 // ioctl's on /dev/fsevents
-#if __LP64__
 typedef struct fsevent_clone_args {
     int8_t  *event_list;
     int32_t  num_events;
     int32_t  event_queue_depth;
     int32_t *fd;
 } fsevent_clone_args;
-#else
-typedef struct fsevent_clone_args {
-    int8_t  *event_list;
-    int32_t  pad1;
-    int32_t  num_events;
-    int32_t  event_queue_depth;
-    int32_t *fd;
-    int32_t  pad2;
-} fsevent_clone_args;
-#endif
 
 #define        FSEVENTS_CLONE          _IOW('s', 1, fsevent_clone_args)
 
 
 // ioctl's on the cloned fd
-#if __LP64__
 #pragma pack(push, 4)
 typedef struct fsevent_dev_filter_args {
     uint32_t  num_devices;
     dev_t    *devices;
 } fsevent_dev_filter_args;
 #pragma pack(pop)
-#else
-typedef struct fsevent_dev_filter_args {
-    uint32_t  num_devices;
-    dev_t    *devices;
-    int32_t   pad1;
-} fsevent_dev_filter_args;
-#endif
 
 #define        FSEVENTS_DEVICE_FILTER          _IOW('s', 100, fsevent_dev_filter_args)
 #define        FSEVENTS_WANT_COMPACT_EVENTS    _IO('s', 101)
index dc55c7a59f922e39c46d0e0f258cda5fc95cedc2..6fa6a7752b1a724ce830764a8fe7b4c133065847 100644 (file)
@@ -54,9 +54,9 @@ extern int guarded_kqueue_np(const guardid_t *guard, u_int guardflags);
 extern int guarded_close_np(int fd, const guardid_t *guard);
 extern int change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags,
        const guardid_t *nguard, u_int nguardflags, int *fdflagsp);
-extern user_ssize_t guarded_write_np(int fd, const guardid_t *guard, user_addr_t cbuf, user_size_t nbyte);
-extern user_ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, user_addr_t buf, user_size_t nbyte, off_t offset);
-extern user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iovec *iovp, u_int iovcnt);
+extern ssize_t guarded_write_np(int fd, const guardid_t *guard, const void *buf, size_t nbyte);
+extern ssize_t guarded_pwrite_np(int fd, const guardid_t *guard, const void *buf, size_t nbyte, off_t offset);
+extern ssize_t guarded_writev_np(int fd, const guardid_t *guard, const struct iovec *iovp, int iovcnt);
 #endif /* KERNEL */
 
 /*
@@ -70,7 +70,8 @@ extern user_ssize_t guarded_writev_np(int fd, const guardid_t *guard, struct iov
  * File descriptor guard flavors.
  */
 
-/* Forbid close(2), and the implicit close() that a dup2(2) may do.
+/*
+ * Forbid close(2), and the implicit close() that a dup2(2) may do.
  * Forces close-on-fork to be set immutably too.
  */
 #define GUARD_CLOSE            (1u << 0)
index 945da513d99a6ac70ed27a1c07bf7e8c38237333..03db89c613ad3c3a12e826f75440f21326044e4e 100644 (file)
@@ -104,7 +104,6 @@ struct image_params {
        int             ip_interp_sugid_fd;             /* fd for sugid script */
 
        /* Next two fields are for support of architecture translation... */
-       char            *ip_p_comm;             /* optional alt p->p_comm */
        struct vfs_context      *ip_vfs_context;        /* VFS context */
        struct nameidata *ip_ndp;               /* current nameidata */
        thread_t        ip_new_thread;          /* thread for spawn/vfork */
@@ -118,6 +117,7 @@ struct image_params {
        void            *ip_px_sfa;
        void            *ip_px_spa;
        void            *ip_px_smpx;            /* MAC-specific spawn attrs. */
+       void            *ip_reserved;
 };
 
 /*
index af75e23a1033a424531bd244e3692e95d98757a2..48dfac84a5faeed54d986ffb161fb0bdf13e1ec4 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-/*     Copyright (c) 1997 Apple Computer, Inc.  All rights reserved. 
+/*     Copyright (c) 1997 Apple Computer, Inc.  All rights reserved.
  *
  * kdebug.h -   kernel_debug definitions
  *
@@ -53,6 +53,63 @@ __BEGIN_DECLS
 #include <mach/branch_predicates.h>
 #endif
 
+/*
+ * Kdebug is a facility for tracing events occurring on a system.
+ *
+ * All events are tagged with a debugid, consisting of the following:
+ *
+ * +----------------+----------------+----------------------------+----+
+ * |   Class (8)    |  Subclass (8)  |          Code (14)         |Func|
+ * |                |                |                            |(2) |
+ * +----------------+----------------+----------------------------+----+
+ * \______________________________________________________________/
+ *                            Eventid
+ * \___________________________________________________________________/
+ *                                 Debugid
+ *
+ * The eventid is a hierarchical ID, indicating which components an event is
+ * referring to.  The debugid includes an eventid and two function qualifier
+ * bits, to determine the structural significance of an event (whether it
+ * starts or ends a series of grouped events).
+ */
+
+#define KDBG_CLASS_MASK   (0xff000000)
+#define KDBG_CLASS_OFFSET (24)
+#define KDBG_CLASS_MAX    (0xff)
+
+#define KDBG_SUBCLASS_MASK   (0x00ff0000)
+#define KDBG_SUBCLASS_OFFSET (16)
+#define KDBG_SUBCLASS_MAX    (0xff)
+
+/* class and subclass mask */
+#define KDBG_CSC_MASK   (0xffff0000)
+#define KDBG_CSC_OFFSET (KDBG_SUBCLASS_OFFSET)
+
+#define KDBG_CODE_MASK   (0x0000fffc)
+#define KDBG_CODE_OFFSET (2)
+#define KDBG_CODE_MAX    (0x3fff)
+
+#define KDBG_EVENTID_MASK (0xfffffffc)
+
+/* Generate an eventid corresponding to Class, SubClass, and Code. */
+#define KDBG_EVENTID(Class, SubClass, Code)                \
+        ((((Class)    &   0xff) << KDBG_CLASS_OFFSET)    | \
+         (((SubClass) &   0xff) << KDBG_SUBCLASS_OFFSET) | \
+         (((Code)     & 0x3fff) << KDBG_CODE_OFFSET))
+/* Deprecated macro using old naming convention. */
+#define KDBG_CODE(Class, SubClass, Code) \
+        KDBG_EVENTID(Class, SubClass, Code)
+
+/* Extract pieces of the debug code. */
+#define KDBG_EXTRACT_CLASS(Debugid) \
+        ((uint8_t)(((Debugid) & KDBG_CLASS_MASK) >> KDBG_CLASS_OFFSET))
+#define KDBG_EXTRACT_SUBCLASS(Debugid) \
+        ((uint8_t)(((Debugid) & KDBG_SUBCLASS_MASK) >> KDBG_SUBCLASS_OFFSET))
+#define KDBG_EXTRACT_CSC(Debugid) \
+        ((uint16_t)(((Debugid) & KDBG_CSC_MASK) >> KDBG_CSC_OFFSET))
+#define KDBG_EXTRACT_CODE(Debugid) \
+        ((uint16_t)(((Debugid) & KDBG_CODE_MASK) >> KDBG_CODE_OFFSET))
+
 #ifdef KERNEL_PRIVATE
 
 typedef enum
@@ -73,22 +130,22 @@ struct kd_callback {
 typedef struct kd_callback kd_callback_t;
 
 /*
- * Registers an IOP for participation in tracing. 
- *  
- * The registered callback function will be called with the 
- * supplied context as the first argument, followed by a 
- * kd_callback_type and an associated void* argument. 
- *  
- * The return value is a nonzero coreid that shall be used in 
- * kernel_debug_enter() to refer to your IOP. If the allocation 
- * failed, then 0 will be returned. 
- *  
- *  
- * Caveats: 
- * Note that not all callback calls will indicate a change in 
- * state (e.g. disabling trace twice would send two disable 
- * notifications). 
- *  
+ * Registers an IOP for participation in tracing.
+ *
+ * The registered callback function will be called with the
+ * supplied context as the first argument, followed by a
+ * kd_callback_type and an associated void* argument.
+ *
+ * The return value is a nonzero coreid that shall be used in
+ * kernel_debug_enter() to refer to your IOP. If the allocation
+ * failed, then 0 will be returned.
+ *
+ *
+ * Caveats:
+ * Note that not all callback calls will indicate a change in
+ * state (e.g. disabling trace twice would send two disable
+ * notifications).
+ *
  */
 extern int kernel_debug_register_callback(kd_callback_t callback);
 
@@ -105,24 +162,14 @@ extern void kernel_debug_enter(
 
 #endif /* KERNEL_PRIVATE */
 
-/* The debug code consists of the following 
-*
-* ----------------------------------------------------------------------
-*|              |               |                               |Func   |
-*| Class (8)    | SubClass (8)  |          Code (14)            |Qual(2)|
-* ----------------------------------------------------------------------
-* The class specifies the higher level 
-*/
-
 /* The Function qualifiers  */
 #define DBG_FUNC_START         1
 #define DBG_FUNC_END           2
 #define DBG_FUNC_NONE          0
 
-
 /* The Kernel Debug Classes  */
 #define DBG_MACH               1
-#define DBG_NETWORK            2       
+#define DBG_NETWORK            2
 #define DBG_FSYSTEM            3
 #define DBG_BSD                        4
 #define DBG_IOKIT              5
@@ -144,6 +191,8 @@ extern void kernel_debug_enter(
 #define DBG_XPC                 41
 #define DBG_ATM                 42
 #define DBG_ARIADNE             43
+#define DBG_DAEMON              44
+#define DBG_ENERGYTRACE         45
 
 
 #define DBG_MIG                        255
@@ -167,10 +216,85 @@ extern void kernel_debug_enter(
  * On error, -1 will be returned and errno will indicate the error.
  */
 #ifndef KERNEL
-extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) __OSX_AVAILABLE_STARTING(__MAC_10_10_2, __IPHONE_NA);
+extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) __OSX_AVAILABLE_STARTING(__MAC_10_10_2, __IPHONE_8_2);
+#endif
+
+/*!
+ * @function kdebug_trace_string
+ *
+ * @discussion
+ * This function emits strings to kdebug trace along with an ID and allows
+ * for previously-traced strings to be overwritten and invalidated.
+ *
+ * To start tracing a string and generate an ID to use to refer to it:
+ *
+ *      string_id = kdebug_trace_string(debugid, 0, "string");
+ *
+ * To replace a string previously traced:
+ *
+ *      string_id = kdebug_trace_string(debugid, string_id, "new string");
+ *
+ * To invalidate a string ID:
+ *
+ *      string_id = kdebug_trace_string(debugid, string_id, NULL);
+ *
+ * To check for errors:
+ *
+ *      if ((int64_t)string_id == -1) { perror("string error") }
+ *
+ * @param debugid
+ * The `debugid` to check if its enabled before tracing and include as
+ * an argument in the event containing the string.
+ *
+ * Some classes or subclasses are reserved for specific uses and are not
+ * allowed to be used with this function.  No function qualifiers are
+ * allowed on `debugid`.
+ *
+ * @param str_id
+ * When 0, a new ID will be generated and returned if tracing is
+ * enabled.
+ *
+ * Otherwise `str_id` must contain an ID that was previously generated
+ * with this function.  Clents should pass NULL in `str` if `str_id`
+ * is no longer in use.  Otherwise, the string previously mapped to
+ * `str_id` will be overwritten with the contents of `str`.
+ *
+ * @param str
+ * A NUL-terminated 'C' string containing the characters that should be
+ * traced alongside `str_id`.
+ *
+ * If necessary, the string will be truncated at an
+ * implementation-defined length.  The string must not be the empty
+ * string, but can be NULL if a valid `str_id` is provided.
+ *
+ * @return
+ * 0 if tracing is disabled or `debugid` is being filtered out of trace.
+ * It can also return (int64_t)-1 if an error occured. Otherwise,
+ * it returns the ID to use to refer to the string in future
+ * kdebug_trace(2) calls.
+ *
+ * The errors that can occur are:
+ *
+ * EINVAL
+ *      There are function qualifiers on `debugid`, `str` is empty, or
+ *      `str_id` was not generated by this function.
+ * EPERM
+ *      The `debugid`'s class or subclass is reserved for internal use.
+ * EFAULT
+ *      `str` is an invalid address or NULL when `str_id` is 0.
+ */
+#ifndef KERNEL
+extern uint64_t kdebug_trace_string(uint32_t debugid, uint64_t str_id,
+                                    const char *str)
+__OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0);
 #endif
 #endif /* PRIVATE */
 
+#ifdef XNU_KERNEL_PRIVATE
+/* Used in early boot to log strings spanning only a single tracepoint. */
+extern void kernel_debug_string_simple(const char *message);
+#endif /* XNU_KERNEL_PRIVATE */
+
 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */
 #define        DBG_MACH_EXCP_KTRAP_x86 0x02    /* Kernel Traps on x86 */
 #define        DBG_MACH_EXCP_DFLT      0x03    /* Data Translation Fault */
@@ -192,14 +316,15 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define        DBG_MACH_MSGID_INVALID  0x50    /* Messages - invalid */
 #define DBG_MACH_LOCKS         0x60    /* new lock APIs */
 #define DBG_MACH_PMAP          0x70    /* pmap */
-/* #define unused              0x80    */
+#define DBG_MACH_CLOCK         0x80    /* clock */
 #define DBG_MACH_MP            0x90    /* MP related */
 #define DBG_MACH_VM_PRESSURE   0xA0    /* Memory Pressure Events */
 #define DBG_MACH_STACKSHOT             0xA1    /* Stackshot/Microstackshot subsystem */
 #define DBG_MACH_SFI                   0xA2    /* Selective Forced Idle (SFI) */
 #define DBG_MACH_ENERGY_PERF   0xA3 /* Energy/performance resource stats */
+#define DBG_MACH_SYSDIAGNOSE   0xA4    /* sysdiagnose keychord */
 
-/* Codes for Scheduler (DBG_MACH_SCHED) */     
+/* Codes for Scheduler (DBG_MACH_SCHED) */
 #define MACH_SCHED              0x0     /* Scheduler */
 #define MACH_STACK_ATTACH       0x1     /* stack_attach() */
 #define MACH_STACK_HANDOFF      0x2     /* stack_handoff() */
@@ -212,8 +337,8 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define MACH_IDLE               0x9    /* processor idling */
 #define MACH_STACK_DEPTH        0xa    /* stack depth at switch */
 #define MACH_MOVED              0xb    /* did not use original scheduling decision */
-#define MACH_FAIRSHARE_ENTER    0xc    /* move to fairshare band */
-#define MACH_FAIRSHARE_EXIT     0xd    /* exit fairshare band */
+/* unused                       0xc    */
+/* unused                       0xd    */
 #define MACH_FAILSAFE           0xe    /* tripped fixed-pri/RT failsafe */
 #define MACH_BLOCK              0xf    /* thread block */
 #define MACH_WAIT              0x10    /* thread wait assertion */
@@ -223,10 +348,9 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
                                         */
 #define        MACH_REDISPATCH         0x16    /* "next thread" thread redispatched */
 #define        MACH_REMOTE_AST         0x17    /* AST signal issued to remote processor */
-
 #define        MACH_SCHED_CHOOSE_PROCESSOR     0x18    /* Result of choose_processor */
 #define MACH_DEEP_IDLE          0x19   /* deep idle on master processor */
-#define MACH_SCHED_DECAY_PRIORITY      0x1a    /* timeshare thread priority decayed/restored */
+/* unused                       0x1a    was MACH_SCHED_DECAY_PRIORITY */
 #define MACH_CPU_THROTTLE_DISABLE      0x1b    /* Global CPU Throttle Disable */
 #define MACH_RW_PROMOTE            0x1c        /* promoted due to RW lock promotion */
 #define MACH_RW_DEMOTE             0x1d        /* promotion due to RW lock undone */
@@ -236,6 +360,14 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define MACH_MULTIQ_DEQUEUE        0x22 /* Result of multiq dequeue */
 #define MACH_SCHED_THREAD_SWITCH   0x23 /* attempt direct context switch to hinted thread */
 #define MACH_SCHED_SMT_BALANCE     0x24 /* SMT load balancing ASTs */
+#define MACH_REMOTE_DEFERRED_AST   0x25 /* Deferred AST started against remote processor */
+#define MACH_REMOTE_CANCEL_AST     0x26 /* Canceled deferred AST for remote processor */
+#define MACH_SCHED_CHANGE_PRIORITY 0x27 /* thread sched priority changed */
+#define MACH_SCHED_UPDATE_REC_CORES    0x28    /* Change to recommended processor bitmask */
+#define MACH_STACK_WAIT            0x29 /* Thread could not be switched-to because of kernel stack shortage */
+#define MACH_THREAD_BIND           0x2a /* Thread was bound (or unbound) to a processor */
+#define MACH_WAITQ_PROMOTE         0x2b /* Thread promoted by waitq boost */
+#define MACH_WAITQ_DEMOTE          0x2c /* Thread demoted from waitq boost */
 
 /* Variants for MACH_MULTIQ_DEQUEUE */
 #define MACH_MULTIQ_BOUND     1
@@ -248,7 +380,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define DBG_COW_FAULT         3
 #define DBG_CACHE_HIT_FAULT   4
 #define DBG_NZF_PAGE_FAULT    5
-#define DBG_GUARD_FAULT              6 
+#define DBG_GUARD_FAULT              6
 #define DBG_PAGEINV_FAULT     7
 #define DBG_PAGEIND_FAULT     8
 #define DBG_COMPRESSOR_FAULT  9
@@ -266,7 +398,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define MACH_IPC_VOUCHER_CREATE_ATTR_DATA      0x8     /* Attr data for newly created voucher */
 #define MACH_IPC_VOUCHER_DESTROY               0x9     /* Voucher removed from global voucher hashtable */
 
-/* Codes for pmap (DBG_MACH_PMAP) */     
+/* Codes for pmap (DBG_MACH_PMAP) */
 #define PMAP__CREATE           0x0
 #define PMAP__DESTROY          0x1
 #define PMAP__PROTECT          0x2
@@ -283,11 +415,19 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define PMAP__FLUSH_KERN_TLBS  0xd
 #define PMAP__FLUSH_DELAYED_TLBS       0xe
 #define PMAP__FLUSH_TLBS_TO    0xf
+#define PMAP__FLUSH_EPT        0x10
+
+/* Codes for clock (DBG_MACH_CLOCK) */
+#define        MACH_EPOCH_CHANGE       0x0     /* wake epoch change */
+
 
 /* Codes for Stackshot/Microstackshot (DBG_MACH_STACKSHOT) */
 #define MICROSTACKSHOT_RECORD  0x0
 #define MICROSTACKSHOT_GATHER  0x1
 
+/* Codes for sysdiagnose */
+#define SYSDIAGNOSE_NOTIFY_USER        0x0
+
 /* Codes for Selective Forced Idle (DBG_MACH_SFI) */
 #define SFI_SET_WINDOW                 0x0
 #define SFI_CANCEL_WINDOW              0x1
@@ -330,14 +470,14 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 
 /* **** The Kernel Debug Sub Classes for IOKIT (DBG_IOKIT) **** */
 #define DBG_IOINTC                     0       /* Interrupt controller */
-#define DBG_IOWORKLOOP         1       /* Work from work loop */
+#define DBG_IOWORKLOOP                 1       /* Work from work loop */
 #define DBG_IOINTES                    2       /* Interrupt event source */
 #define DBG_IOCLKES                    3       /* Clock event source */
 #define DBG_IOCMDQ                     4       /* Command queue latencies */
 #define DBG_IOMCURS                    5       /* Memory Cursor */
 #define DBG_IOMDESC                    6       /* Memory Descriptors */
 #define DBG_IOPOWER                    7       /* Power Managerment */
-#define DBG_IOSERVICE          8       /* Matching etc. */
+#define DBG_IOSERVICE                  8       /* Matching etc. */
 
 /* **** 9-32 reserved for internal IOKit usage **** */
 
@@ -372,28 +512,28 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define DBG_DRVSTORAGE         1       /* Storage layers */
 #define        DBG_DRVNETWORK          2       /* Network layers */
 #define        DBG_DRVKEYBOARD         3       /* Keyboard */
-#define        DBG_DRVHID                      4       /* HID Devices */
+#define        DBG_DRVHID              4       /* HID Devices */
 #define        DBG_DRVAUDIO            5       /* Audio */
 #define        DBG_DRVSERIAL           7       /* Serial */
-#define DBG_DRVSAM                     8       /* SCSI Architecture Model layers */
-#define DBG_DRVPARALLELATA  9  /* Parallel ATA */
+#define DBG_DRVSAM             8       /* SCSI Architecture Model layers */
+#define DBG_DRVPARALLELATA     9       /* Parallel ATA */
 #define DBG_DRVPARALLELSCSI    10      /* Parallel SCSI */
-#define DBG_DRVSATA                    11      /* Serial ATA */
-#define DBG_DRVSAS                     12      /* SAS */
+#define DBG_DRVSATA            11      /* Serial ATA */
+#define DBG_DRVSAS             12      /* SAS */
 #define DBG_DRVFIBRECHANNEL    13      /* FiberChannel */
-#define DBG_DRVUSB                     14      /* USB */
+#define DBG_DRVUSB             14      /* USB */
 #define DBG_DRVBLUETOOTH       15      /* Bluetooth */
 #define DBG_DRVFIREWIRE                16      /* FireWire */
 #define DBG_DRVINFINIBAND      17      /* Infiniband */
-#define DBG_DRVGRAPHICS                18  /* Graphics */
+#define DBG_DRVGRAPHICS                18      /* Graphics */
 #define DBG_DRVSD              19      /* Secure Digital */
 #define DBG_DRVNAND            20      /* NAND drivers and layers */
 #define DBG_SSD                        21      /* SSD */
 #define DBG_DRVSPI             22      /* SPI */
 
 /* Backwards compatibility */
-#define        DBG_DRVPOINTING         DBG_DRVHID              /* OBSOLETE: Use DBG_DRVHID instead */
-#define DBG_DRVDISK                    DBG_DRVSTORAGE  /* OBSOLETE: Use DBG_DRVSTORAGE instead */
+#define        DBG_DRVPOINTING         DBG_DRVHID      /* OBSOLETE: Use DBG_DRVHID instead */
+#define DBG_DRVDISK            DBG_DRVSTORAGE  /* OBSOLETE: Use DBG_DRVSTORAGE instead */
 
 /* **** The Kernel Debug Sub Classes for the DLIL Layer (DBG_DLIL) **** */
 #define DBG_DLIL_STATIC 1       /* Static DLIL code */
@@ -414,7 +554,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define DBG_EXFAT     0xE     /* ExFAT-specific events; see the exfat project */
 #define DBG_MSDOS     0xF     /* FAT-specific events; see the msdosfs project */
 #define DBG_ACFS      0x10    /* Xsan-specific events; see the XsanFS project */
-#define DBG_THROTTLE  0x11    /* I/O Throttling events */      
+#define DBG_THROTTLE  0x11    /* I/O Throttling events */
 #define DBG_CONTENT_PROT 0xCF /* Content Protection Events: see bsd/sys/cprotect.h */
 
 /*
@@ -424,8 +564,10 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define DBG_HFS_UPDATE_MODTIME  0x02
 #define DBG_HFS_UPDATE_CHGTIME  0x04
 #define DBG_HFS_UPDATE_MODIFIED         0x08
-#define DBG_HFS_UPDATE_FORCE    0x10
+#define DBG_HFS_UPDATE_FORCE     0x10
 #define DBG_HFS_UPDATE_DATEADDED 0x20
+#define DBG_HFS_UPDATE_MINOR     0x40
+#define DBG_HFS_UPDATE_SKIPPED  0x80
 
 /* The Kernel Debug Sub Classes for BSD */
 #define DBG_BSD_PROC           0x01    /* process/signals related */
@@ -446,7 +588,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define BSD_MEMSTAT_JETSAM_HIWAT     3  /* highwater jetsam */
 #define BSD_MEMSTAT_FREEZE           4  /* freeze process */
 #define BSD_MEMSTAT_LATENCY_COALESCE 5  /* delay imposed to coalesce jetsam reports */
-#define BSD_MEMSTAT_UPDATE           6  /* priority update */  
+#define BSD_MEMSTAT_UPDATE           6  /* priority update */
 #define BSD_MEMSTAT_IDLE_DEMOTE      7  /* idle demotion fired */
 #define BSD_MEMSTAT_CLEAR_ERRORS     8  /* reset termination error state */
 #define BSD_MEMSTAT_DIRTY_TRACK      9  /* track the process state */
@@ -466,6 +608,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define        TRACE_DATA_NEWTHREAD            (TRACEDBG_CODE(DBG_TRACE_DATA, 1))
 #define        TRACE_DATA_EXEC                 (TRACEDBG_CODE(DBG_TRACE_DATA, 2))
 #define        TRACE_DATA_THREAD_TERMINATE     (TRACEDBG_CODE(DBG_TRACE_DATA, 3))
+#define TRACE_STRING_GLOBAL            (TRACEDBG_CODE(DBG_TRACE_STRING, 0))
 #define        TRACE_STRING_NEWTHREAD          (TRACEDBG_CODE(DBG_TRACE_STRING, 1))
 #define        TRACE_STRING_EXEC               (TRACEDBG_CODE(DBG_TRACE_STRING, 2))
 #define        TRACE_PANIC                     (TRACEDBG_CODE(DBG_TRACE_INFO, 0))
@@ -504,11 +647,13 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 /* Kernel Debug Sub Classes for Applications (DBG_APPS) */
 #define DBG_APP_LOGINWINDOW     0x03
 #define DBG_APP_AUDIO           0x04
+#define DBG_APP_SIGPOST         0x0A
+#define DBG_APP_APPKIT          0x0C
 #define DBG_APP_SAMBA           0x80
 
 /* Kernel Debug codes for Throttling (DBG_THROTTLE) */
 #define OPEN_THROTTLE_WINDOW   0x1
-#define PROCESS_THROTTLED      0x2     
+#define PROCESS_THROTTLED      0x2
 #define IO_THROTTLE_DISABLE    0x3
 
 
@@ -555,16 +700,16 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define BANK_TASK_INFO                 0x11    /* Trace points related to bank task struct */
 
 /* Subclasses for MACH ATM Voucher Attribute Manager (ATM) */
-#define ATM_SUBAID_INFO                        0x10    
-#define ATM_GETVALUE_INFO              0x20    
-#define ATM_UNREGISTER_INFO            0x30    
+#define ATM_SUBAID_INFO                        0x10
+#define ATM_GETVALUE_INFO              0x20
+#define ATM_UNREGISTER_INFO            0x30
 
 /* Codes for BANK_ACCOUNT_INFO */
 #define BANK_SETTLE_CPU_TIME           0x1     /* Bank ledger(chit) rolled up to tasks. */
 
 /* Codes for ATM_SUBAID_INFO */
 #define ATM_MIN_CALLED                         0x1
-#define ATM_MIN_LINK_LIST                      0x2
+#define ATM_LINK_LIST_TRIM                     0x2
 
 /* Codes for ATM_GETVALUE_INFO */
 #define ATM_VALUE_REPLACED                     0x1
@@ -574,11 +719,13 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define ATM_VALUE_UNREGISTERED                 0x1
 #define ATM_VALUE_DIFF_MAILBOX                 0x2
 
-/**********************************************************************/
+/* Kernel Debug Sub Classes for daemons (DBG_DAEMON) */
+#define DBG_DAEMON_COREDUET                    0x1
 
-#define KDBG_CODE(Class, SubClass, code) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16) | ((code & 0x3fff)  << 2))
+/**********************************************************************/
 
-#define KDBG_MIGCODE(msgid) ((DBG_MIG << 24) | (((msgid) & 0x3fffff)  << 2))
+#define KDBG_MIGCODE(msgid) ((DBG_MIG << KDBG_CLASS_OFFSET) | \
+                             (((msgid) & 0x3fffff) << KDBG_CODE_OFFSET))
 
 #define MACHDBG_CODE(SubClass, code) KDBG_CODE(DBG_MACH, SubClass, code)
 #define NETDBG_CODE(SubClass, code) KDBG_CODE(DBG_NETWORK, SubClass, code)
@@ -594,6 +741,7 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define QTDBG_CODE(SubClass,code) KDBG_CODE(DBG_QT, SubClass, code)
 #define APPSDBG_CODE(SubClass,code) KDBG_CODE(DBG_APPS, SubClass, code)
 #define ARIADNEDBG_CODE(SubClass, code) KDBG_CODE(DBG_ARIADNE, SubClass, code)
+#define DAEMONDBG_CODE(SubClass, code) KDBG_CODE(DBG_DAEMON, SubClass, code)
 #define CPUPM_CODE(code) IOKDBG_CODE(DBG_IOCPUPM, code)
 
 #define KMEM_ALLOC_CODE MACHDBG_CODE(DBG_MACH_LEAKS, 0)
@@ -612,23 +760,26 @@ extern int kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t ar
 #define BANK_CODE(SubClass, code) KDBG_CODE(DBG_BANK, (SubClass), (code))
 #define ATM_CODE(SubClass, code) KDBG_CODE(DBG_ATM, (SubClass), (code))
 
+/* Kernel Debug Macros for specific daemons */
+#define COREDUETDBG_CODE(code) DAEMONDBG_CODE(DBG_DAEMON_COREDUET, code)
+
 /*   Usage:
-* kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), 
-*      offset, 0, 0, 0,0) 
-* 
-* For ex, 
-* 
+* kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START),
+*      offset, 0, 0, 0,0)
+*
+* For ex,
+*
 * #include <sys/kdebug.h>
-* 
+*
 * #define DBG_NETIPINIT NETDBG_CODE(DBG_NETIP,1)
-* 
-* 
+*
+*
 * void
 * ip_init()
 * {
 *      register struct protosw *pr;
 *      register int i;
-*      
+*
 *      KERNEL_DEBUG(DBG_NETIPINIT | DBG_FUNC_START, 0,0,0,0,0)
 *      --------
 *      KERNEL_DEBUG(DBG_NETIPINIT, 0,0,0,0,0)
@@ -649,7 +800,7 @@ extern unsigned int kdebug_enable;
 /*
  * Infer the supported kernel debug event level from config option.
  * Use (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) as a guard to protect
- * unaudited debug code. 
+ * unaudited debug code.
  */
 #define KDEBUG_LEVEL_NONE     0
 #define KDEBUG_LEVEL_IST      1
@@ -657,13 +808,18 @@ extern unsigned int kdebug_enable;
 #define KDEBUG_LEVEL_FULL     3
 
 #if NO_KDEBUG
-#define KDEBUG_LEVEL KDEBUG_LEVEL_NONE    
+#define KDEBUG_LEVEL KDEBUG_LEVEL_NONE
 #elif IST_KDEBUG
 #define KDEBUG_LEVEL KDEBUG_LEVEL_IST
+       // currently configured for the iOS release kernel
 #elif KDEBUG
 #define KDEBUG_LEVEL KDEBUG_LEVEL_FULL
 #else
 #define KDEBUG_LEVEL KDEBUG_LEVEL_STANDARD
+/* Currently, all other kernel configurations (development, etc)
+   build with KDEBUG_LEVEL_STANDARD.  As a result, KERNEL_DEBUG_CONSTANT*()
+   are on by default but KERNEL_DEBUG*() are not.
+*/
 #endif
 
 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD)
@@ -714,7 +870,21 @@ do {                                                                       \
 #define KERNEL_DEBUG_EARLY(x,a,b,c,d) do { } while(0)
 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */
 
-/* 
+#ifdef KERNEL_PRIVATE
+
+// Abbreviated version of above
+#define KDBG(x, ...) KDBG_(x, ## __VA_ARGS__, 5, 4, 3, 2, 1, 0)
+#define KDBG_(x, a, b, c, d, e, n, ...) KDBG##n(x, a, b, c, d, e)
+#define KDBG0(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, 0, 0, 0, 0, 0)
+#define KDBG1(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, 0, 0, 0, 0)
+#define KDBG2(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, 0, 0, 0)
+#define KDBG3(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, 0, 0)
+#define KDBG4(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, d, 0)
+#define KDBG5(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e)
+
+#endif // KERNEL_PRIVATE
+
+/*
  * Specify KDEBUG_PPT to indicate that the event belongs to the
  * limited PPT set.
  */
@@ -723,27 +893,83 @@ do {                                                                      \
 #define KDEBUG_PPT    (KDEBUG_ENABLE_PPT)
 
 /*
- * KERNEL_DEBUG_CONSTANT_IST events provide an audited subset of
- * tracepoints for userland system tracing tools.
+   KERNEL_DEBUG_CONSTANT_IST events provide an audited subset of
+   tracepoints for userland system tracing tools.  This tracing level was
+   created by 8857227 to protect fairplayd and other PT_DENY_ATTACH
+   processes.  It has two effects: only KERNEL_DEBUG_CONSTANT_IST() traces
+   are emitted and any PT_DENY_ATTACH processes will only emit basic
+   traces as defined by the kernel_debug_filter() routine.
  */
 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST)
 #ifdef XNU_KERNEL_PRIVATE
-#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e)                            \
+#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e)                    \
 do {                                                                   \
-       if (__improbable(kdebug_enable & type))                                 \
+       if (__improbable(kdebug_enable & type))                         \
         kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,         \
                        (uintptr_t)d,(uintptr_t)e);                     \
 } while(0)
 #else /* XNU_KERNEL_PRIVATE */
-#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e)                            \
+#define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e)                    \
 do {                                                                   \
-       if (kdebug_enable & type)                                               \
+       if (kdebug_enable & type)                                       \
         kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,         \
                        (uintptr_t)d,(uintptr_t)e);                     \
 } while(0)
 #endif /* XNU_KERNEL_PRIVATE */
+
+// whether to bother calculating EnergyTracing inputs
+// could chnage in future to see if DBG_ENERGYTRACE is active
+#define ENTR_SHOULDTRACE kdebug_enable
+// encode logical EnergyTracing into 32/64 KDebug trace
+#define ENTR_KDTRACE(component, opcode, lifespan, id, quality, value)  \
+do {                                                                   \
+    uint32_t kdcode__;                                                 \
+    uintptr_t highval__, lowval__, mask__ = 0xffffffff;                        \
+    kdcode__ = KDBG_CODE(DBG_ENERGYTRACE,component,opcode)|(lifespan);         \
+    highval__ = ((value) >> 32) & mask__;                              \
+    lowval__ = (value) & mask__;                                       \
+    ENTR_KDTRACEFUNC(kdcode__, id, quality, highval__, lowval__);      \
+} while(0)
+
+/*
+    Trace the association of two existing activations.
+
+    An association is traced as a modification to the parent activation.
+    In order to fit the sub-activation's component, activation code, and
+    activation ID into a kdebug tracepoint, the arguments that would hold
+    the value are left separate, and one stores the component and opcode
+    of the sub-activation, while the other stores the pointer-sized
+    activation ID.
+
+           arg2                   arg3               arg4
+    +-----------------+  +~+----+----+--------+   +----------+
+    |kEnTrModAssociate|  | |    |    |        |   |          |
+    +-----------------+  +~+----+----+--------+   +----------+
+                           8-bits unused       sub-activation ID
+                                8-bit sub-component
+                                     16-bit sub-opcode
+
+*/
+#define kEnTrModAssociate (1 << 28)
+#define ENTR_KDASSOCIATE(par_comp, par_opcode, par_act_id,             \
+                         sub_comp, sub_opcode, sub_act_id)              \
+do {                                                                   \
+    unsigned sub_compcode = ((unsigned)sub_comp << 16) | sub_opcode;   \
+    ENTR_KDTRACEFUNC(KDBG_CODE(DBG_ENERGYTRACE,par_comp,par_opcode),   \
+                    par_act_id, kEnTrModAssociate, sub_compcode,       \
+                    sub_act_id);                                       \
+} while(0)
+
 #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
+
 #define KERNEL_DEBUG_CONSTANT_IST(type,x,a,b,c,d,e) do { } while(0)
+#define ENTR_SHOULDTRACE FALSE
+#define ENTR_KDTRACE(component, opcode, lifespan, id, quality, value)  \
+                                   do {} while (0)
+#define ENTR_KDASSOCIATE(par_comp, par_opcode, par_act_id,             \
+                        sub_comp, sub_opcode, sub_act_id)              \
+                                   do {} while (0)
+
 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */
 
 #if NO_KDEBUG
@@ -773,8 +999,19 @@ extern void kernel_debug_early(
                uintptr_t arg3,
                uintptr_t arg4);
 
-extern void kernel_debug_string(
-               const char *message);
+#ifdef KERNEL_PRIVATE
+/*
+ * kernel_debug_string provides the same functionality as the
+ * kdebug_trace_string syscall as a KPI.  str_id is an in/out
+ * parameter that, if it's pointing to a string ID of 0, will
+ * receive a generated ID.  If it provides a value in str_id,
+ * then that will be used, instead.
+ *
+ * Returns an errno indicating the type of failure.
+ */
+extern int
+kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str);
+#endif
 
 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL)
 #ifdef XNU_KERNEL_PRIVATE
@@ -808,14 +1045,66 @@ do {                                                                     \
                      (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);        \
 } while(0)
 #endif /* XNU_KERNEL_PRIVATE */
+
 #else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) */
+
 #define KERNEL_DEBUG(x,a,b,c,d,e) do {} while (0)
 #define KERNEL_DEBUG1(x,a,b,c,d,e) do {} while (0)
 
 #define __kdebug_only __unused
 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_FULL) */
 
+
+// for EnergyTracing user space & clients
+#define kEnTrCompKernel            2
+
+/*
+    EnergyTracing opcodes
+
+    Activations use DBG_FUNC_START/END.
+    Events are DBG_FUNC_NONE.
+ */
+
+/* Socket reads and writes are uniquely identified by the (sanitized)
+   pointer to the socket struct in question.  To associate this address
+   with the user space file descriptor, we have a socket activation with
+   the FD as its identifier and the socket struct pointer as its value.
+*/
+#define kEnTrActKernSocket     1
+#define kEnTrActKernSockRead   2
+#define kEnTrActKernSockWrite  3
+
+#define kEnTrActKernPoll       10
+#define kEnTrActKernSelect     11
+#define kEnTrActKernKQWait     12
+
+// events
+#define kEnTrEvUnblocked       256
+
+// EnergyTracing flags (the low-order 16 bits of 'quality')
+#define kEnTrFlagNonBlocking   1 << 0
+#define kEnTrFlagNoWork                1 << 1
+
+// and now the internal mechanism
 #ifdef KERNEL_PRIVATE
+
+// 20452597 requests that the trace macros not take an argument it throws away
+#define KERNEL_DBG_IST_SANE(x, a, b, c, d)                             \
+        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, x, a, b, c, d,          \
+                                  0 /*__unused in kernel_debug()*/)
+#define ENTR_KDTRACEFUNC KERNEL_DBG_IST_SANE
+
+// value is int64_t, quality is uint32_t
+#define KERNEL_ENERGYTRACE(opcode, lifespan, id, quality, value)        \
+            ENTR_KDTRACE(kEnTrCompKernel, opcode, lifespan, id,         \
+                         quality, value)
+#define KERNEL_ENTR_ASSOCIATE(par_opcode, par_act_id, sub_opcode, sub_act_id) \
+           ENTR_KDASSOCIATE(kEnTrCompKernel, par_opcode, par_act_id,   \
+                            kEnTrCompKernel, sub_opcode, sub_act_id)
+
+// end EnergyTracing
+
+
 #include <mach/boolean.h>
 
 #define NUMPARMS 23
@@ -837,8 +1126,8 @@ void enable_wrap(uint32_t old_slowcheck, boolean_t lostevents);
 void release_storage_unit(int cpu,  uint32_t storage_unit);
 int allocate_storage_unit(int cpu);
 
-#define KDBG_CLASS_ENCODE(Class, SubClass) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16))
-#define KDBG_CLASS_DECODE(Debugid) (Debugid & 0xFFFF0000)
+#define KDBG_CLASS_ENCODE(Class, SubClass) KDBG_EVENTID(Class, SubClass, 0)
+#define KDBG_CLASS_DECODE(Debugid)         (Debugid & KDBG_CSC_MASK)
 
 
 #endif  /* KERNEL_PRIVATE */
@@ -875,7 +1164,7 @@ typedef struct {
 static inline void
 kdbg_set_cpu(kd_buf *kp, int cpu)
 {
-       kp->timestamp = (kp->timestamp & KDBG_TIMESTAMP_MASK) | 
+       kp->timestamp = (kp->timestamp & KDBG_TIMESTAMP_MASK) |
                                (((uint64_t) cpu) << KDBG_CPU_SHIFT);
 }
 static inline int
@@ -896,7 +1185,7 @@ kdbg_get_timestamp(kd_buf *kp)
 static inline void
 kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t thetime, int cpu)
 {
-       kp->timestamp = (thetime & KDBG_TIMESTAMP_MASK) | 
+       kp->timestamp = (thetime & KDBG_TIMESTAMP_MASK) |
                                (((uint64_t) cpu) << KDBG_CPU_SHIFT);
 }
 #else
@@ -950,7 +1239,7 @@ typedef struct {
        unsigned int    value2;
        unsigned int    value3;
        unsigned int    value4;
-       
+
 } kd_regtype;
 
 typedef struct
@@ -1041,8 +1330,79 @@ typedef struct {
        uint32_t        TOD_usecs;
 } RAW_header;
 
+// Version 3 header
+// The header chunk has the tag 0x00001000 which also serves as a magic word
+// that identifies the file as a version 3 trace file. The header payload is
+// a set of fixed fields followed by a variable number of sub-chunks:
+/*
+ ____________________________________________________________________________
+ | Offset | Size | Field                                                    |
+ ----------------------------------------------------------------------------
+ |    0   |  4   | Tag (0x00001000)                                         |
+ |    4   |  4   | Sub-tag. Represents the version of the header.           |
+ |    8   |  8   | Length of header payload (40+8x)                         |
+ |   16   |  8   | Time base info. Two 32-bit numbers, numer/denom,         |
+ |        |      | for converting timestamps to nanoseconds.                |
+ |   24   |  8   | Timestamp of trace start.                                |
+ |   32   |  8   | Wall time seconds since Unix epoch.                      |
+ |        |      | As returned by gettimeofday().                           |
+ |   40   |  4   | Wall time microseconds. As returned by gettimeofday().   |
+ |   44   |  4   | Local time zone offset in minutes. ( " )                 |
+ |   48   |  4   | Type of daylight savings time correction to apply. ( " ) |
+ |   52   |  4   | Flags. 1 = 64-bit. Remaining bits should be written      |
+ |        |      | as 0 and ignored when reading.                           |
+ |   56   |  8x  | Variable number of sub-chunks. None are required.        |
+ |        |      | Ignore unknown chunks.                                   |
+ ----------------------------------------------------------------------------
+*/
+// NOTE: The header sub-chunks are considered part of the header chunk,
+// so they must be included in the header chunk’s length field.
+// The CPU map is an optional sub-chunk of the header chunk. It provides
+// information about the CPUs that are referenced from the trace events.
+typedef struct {
+       uint32_t        tag;
+       uint32_t        sub_tag;
+       uint64_t        length;
+       uint32_t        timebase_numer;
+       uint32_t        timebase_denom;
+       uint64_t        timestamp;
+       uint64_t        walltime_secs;
+       uint32_t        walltime_usecs;
+       uint32_t        timezone_minuteswest;
+       uint32_t        timezone_dst;
+       uint32_t        flags;
+} kd_header_v3;
+
+typedef struct {
+       uint32_t tag;
+       uint32_t sub_tag;
+       uint64_t length;
+} kd_chunk_header_v3;
+
 #define RAW_VERSION0   0x55aa0000
 #define RAW_VERSION1   0x55aa0101
+#define RAW_VERSION2    0x55aa0200 /* Only used by kperf and Instruments */
+#define RAW_VERSION3   0x00001000
+
+#define V3_CONFIG      0x00001b00
+#define V3_CPU_MAP     0x00001c00
+#define V3_THREAD_MAP  0x00001d00
+#define V3_RAW_EVENTS  0x00001e00
+#define V3_NULL_CHUNK  0x00002000
+
+// The current version of all kernel managed chunks is 1. The
+// V3_CURRENT_CHUNK_VERSION is added to ease the simple case
+// when most/all the kernel managed chunks have the same version.
+
+#define V3_CURRENT_CHUNK_VERSION 1
+#define V3_HEADER_VERSION     V3_CURRENT_CHUNK_VERSION
+#define V3_CPUMAP_VERSION     V3_CURRENT_CHUNK_VERSION
+#define V3_THRMAP_VERSION     V3_CURRENT_CHUNK_VERSION
+#define V3_EVENT_DATA_VERSION V3_CURRENT_CHUNK_VERSION
+
+// Apis to support writing v3 chunks in the kernel
+int kdbg_write_v3_chunk_header_to_buffer(void *buffer, uint32_t tag, uint32_t sub_tag, uint64_t length);
+int kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd);
 
 #define        KDBG_CLASSTYPE          0x10000
 #define        KDBG_SUBCLSTYPE         0x20000
@@ -1053,7 +1413,7 @@ typedef struct {
 #define        KDBG_RANGECHECK 0x100000
 #define        KDBG_VALCHECK   0x200000        /* Check up to 4 individual values */
 
-#define        KDBG_TYPEFILTER_CHECK   ((uint32_t) 0x400000)        /* Check class and subclass against a bitmap */ 
+#define        KDBG_TYPEFILTER_CHECK   ((uint32_t) 0x400000)        /* Check class and subclass against a bitmap */
 
 #define        KDBG_BUFINIT    0x80000000
 
@@ -1064,9 +1424,11 @@ typedef struct {
 #define VFS_LOOKUP     (FSDBG_CODE(DBG_FSRW,36))
 #define VFS_LOOKUP_DONE        (FSDBG_CODE(DBG_FSRW,39))
 
+#ifdef XNU_KERNEL_PRIVATE
 #if (DEVELOPMENT || DEBUG)
 #define KDEBUG_MOJO_TRACE 1
 #endif
+#endif
 
 #endif /* __APPLE_API_PRIVATE */
 #endif /* PRIVATE */
index 6ac130dd7066affdf1e3806a6c03c98da226286a..3d87bce89d65be9a94e1827a1c7b66208d63c964 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004, 2012-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2004, 2012-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -187,7 +187,7 @@ struct kctlstat {
        u_int64_t       kcs_send_list_fail __attribute__((aligned(8)));
        u_int64_t       kcs_enqueue_fail __attribute__((aligned(8)));
        u_int64_t       kcs_enqueue_fullsock __attribute__((aligned(8)));
-       
+       u_int64_t       kcs_bad_kctlref __attribute__((aligned(8)));
 };
 
 #endif /* PRIVATE */
@@ -560,8 +560,20 @@ errno_t
 ctl_enqueuembuf_list(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m_list,
        u_int32_t flags, mbuf_t *m_remain);
 
+/*!
+       @function ctl_getenqueuepacketcount
+       @discussion Retrieve the number of packets in the socket
+               receive buffer.
+       @param kctlref The control reference of the kernel control.
+       @param unit The unit number of the kernel control instance.
+       @param pcnt The address where to return the current count. 
+       @result 0 - Success; the packet count is returned to caller.
+               EINVAL - Invalid parameters.
+ */
+errno_t
+ctl_getenqueuepacketcount(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *pcnt);
 
-#endif
+#endif /* PRIVATE */
 
 /*!
        @function ctl_getenqueuespace
@@ -601,43 +613,11 @@ ctl_getenqueuereadable(kern_ctl_ref kctlref, u_int32_t unit, u_int32_t *differen
  * internal structure maintained for each register controller
  */
 struct ctl_cb;
+struct kctl;
 struct socket;
+struct socket_info;
 
-struct kctl {
-       TAILQ_ENTRY(kctl)               next;           /* controller chain */
-
-       /* controller information provided when registering */
-       char                            name[MAX_KCTL_NAME];    /* unique nke identifier, provided by DTS */
-       u_int32_t                       id;
-       u_int32_t                       reg_unit;
-
-       /* misc communication information */
-       u_int32_t                       flags;          /* support flags */
-       u_int32_t                       recvbufsize;    /* request more than the default buffer size */
-       u_int32_t                       sendbufsize;    /* request more than the default buffer size */
-
-       /* Dispatch functions */
-       ctl_connect_func                connect;        /* Make contact */
-       ctl_disconnect_func             disconnect;     /* Break contact */
-       ctl_send_func                   send;           /* Send data to nke */
-       ctl_send_list_func              send_list;      /* Send list of packets */
-       ctl_setopt_func                 setopt;         /* set kctl configuration */
-       ctl_getopt_func                 getopt;         /* get kctl configuration */
-       ctl_rcvd_func                   rcvd;           /* Notify nke when client reads data */
-
-       TAILQ_HEAD(, ctl_cb)            kcb_head;
-       u_int32_t                       lastunit;
-};
-
-struct ctl_cb {
-       TAILQ_ENTRY(ctl_cb)             next;           /* controller chain */
-       lck_mtx_t                       *mtx;
-       struct socket                   *so;            /* controlling socket */
-       struct kctl                     *kctl;          /* back pointer to controller */
-       void                            *userdata;
-       u_int32_t                       unit;
-       u_int32_t                       usecount;
-};
+void kctl_fill_socketinfo(struct socket *, struct socket_info *);
 
 u_int32_t ctl_id_by_name(const char *name);
 errno_t ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize);
index 4e8d01c3eeb4309a889853e295a6e64966e42538..c740032913d340d90c32aff9361370e9278b4d55 100644 (file)
@@ -184,17 +184,21 @@ int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, void *bu
 #define MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES      2
 #define MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT          3
 #define MEMORYSTATUS_CMD_GET_PRESSURE_STATUS          4
-#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK   5
-#define MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT       6
-
-/* Group Commands */
-#define MEMORYSTATUS_CMD_GRP_SET_PROPERTIES           7
+#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK   5    /* Set active memory limit = inactive memory limit, both non-fatal  */
+#define MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT       6    /* Set active memory limit = inactive memory limit, both fatal       */
+#define MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES      7    /* Set memory limits plus attributes independently                  */
+#define MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES      8    /* Get memory limits plus attributes                                        */
+#define MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE   9    /* Set the task's status as a privileged listener w.r.t memory notifications  */
+#define MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE  10   /* Reset the task's status as a privileged listener w.r.t memory notifications  */
+/* Commands that act on a group of processes */
+#define MEMORYSTATUS_CMD_GRP_SET_PROPERTIES           100
 
 #if PRIVATE
 /* Test commands */
 
 /* Trigger forced jetsam */
-#define MEMORYSTATUS_CMD_TEST_JETSAM                  1000
+#define MEMORYSTATUS_CMD_TEST_JETSAM           1000
+#define MEMORYSTATUS_CMD_TEST_JETSAM_SORT      1001
 
 /* Panic on jetsam options */
 typedef struct memorystatus_jetsam_panic_options {
@@ -202,17 +206,100 @@ typedef struct memorystatus_jetsam_panic_options {
        uint32_t mask;
 } memorystatus_jetsam_panic_options_t;
 
-#define MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS        1001
+#define MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS        1002
+
+/* Select priority band sort order */
+#define JETSAM_SORT_NOSORT     0
+#define JETSAM_SORT_DEFAULT    1
+
 #endif /* PRIVATE */
 
+/*
+ * For use with memorystatus_control:
+ * MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT
+ *
+ * A jetsam snapshot is initialized when a non-idle
+ * jetsam event occurs.  The data is held in the
+ * buffer until it is reaped. This is the default
+ * behavior.
+ *
+ * Flags change the default behavior:
+ *     Demand mode - this is an on_demand snapshot,
+ *     meaning data is populated upon request.
+ *
+ *     Boot mode - this is a snapshot of
+ *     memstats collected before loading the
+ *     init program.  Once collected, these
+ *     stats do not change.  In this mode,
+ *     the snapshot entry_count is always 0.
+ *
+ * Snapshots are inherently racey between request
+ * for buffer size and actual data compilation.
+*/
+
+/* Flags */
+#define MEMORYSTATUS_SNAPSHOT_ON_DEMAND                0x1     /* A populated snapshot buffer is returned on demand */
+#define MEMORYSTATUS_SNAPSHOT_AT_BOOT          0x2     /* Returns a snapshot with memstats collected at boot */
+
+
+/*
+ * For use with memorystatus_control:
+ * MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES
+ */
 typedef struct memorystatus_priority_properties {
        int32_t  priority;
        uint64_t user_data;
 } memorystatus_priority_properties_t;
 
+/*
+ * For use with memorystatus_control:
+ * MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES
+ * MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES
+ */
+typedef struct memorystatus_memlimit_properties {
+       int32_t memlimit_active;                /* jetsam memory limit (in MB) when process is active */
+       uint32_t memlimit_active_attr;
+       int32_t memlimit_inactive;              /* jetsam memory limit (in MB) when process is inactive */
+       uint32_t memlimit_inactive_attr;
+} memorystatus_memlimit_properties_t;
+
+#define MEMORYSTATUS_MEMLIMIT_ATTR_FATAL       0x1     /* if set, exceeding the memlimit is fatal */
+
+
 #ifdef XNU_KERNEL_PRIVATE
 
-/* p_memstat_state flags */
+/*
+ * A process will be killed immediately if it crosses a memory limit marked as fatal.
+ * Fatal limit types are the
+ *     - default system-wide task limit
+ *     - per-task custom memory limit
+ *
+ * A process with a non-fatal memory limit can exceed that limit, but becomes an early
+ * candidate for jetsam when the device is under memory pressure.
+ * Non-fatal limit types are the
+ *     - high-water-mark limit
+ *
+ * P_MEMSTAT_MEMLIMIT_BACKGROUND is translated in posix_spawn as
+ *     the fatal system_wide task limit when active
+ *     non-fatal inactive limit based on limit provided.
+ *     This is necessary for backward compatibility until the
+ *     the flag can be considered obsolete.
+ *
+ * Processes that opt into dirty tracking are evaluated
+ * based on clean vs dirty state.
+ *      dirty ==> active
+ *      clean ==> inactive
+ *
+ * Processes that do not opt into dirty tracking are
+ * evalulated based on priority level.
+ *      Foreground or above ==> active
+ *      Below Foreground    ==> inactive
+ */
+
+/*
+ * p_memstat_state flag holds
+ *     - in kernel process state and memlimit state
+ */
 
 #define P_MEMSTAT_SUSPENDED            0x00000001
 #define P_MEMSTAT_FROZEN               0x00000002
@@ -227,12 +314,21 @@ typedef struct memorystatus_priority_properties {
 #define P_MEMSTAT_PRIOR_THAW           0x00000400
 #define P_MEMSTAT_MEMLIMIT_BACKGROUND  0x00000800 /* Task has a memory limit for when it's in the background. Used for a process' "high water mark".*/
 #define P_MEMSTAT_INTERNAL             0x00001000
-#define P_MEMSTAT_FATAL_MEMLIMIT       0x00002000 /* cross this limit and the process is killed. Types: system-wide default task memory limit and per-task custom memory limit. */
+#define P_MEMSTAT_FATAL_MEMLIMIT                  0x00002000   /* current fatal state of the process's memlimit */
+#define P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL           0x00004000   /* if set, exceeding limit is fatal when the process is active   */
+#define P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED   0x00008000   /* if set, supresses high-water-mark EXC_RESOURCE, allows one hit per active limit */
+#define P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL         0x00010000   /* if set, exceeding limit is fatal when the process is inactive */
+#define P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED 0x00020000   /* if set, supresses high-water-mark EXC_RESOURCE, allows one hit per inactive limit */
 
 extern void memorystatus_init(void) __attribute__((section("__TEXT, initcode")));
 
+extern void memorystatus_init_at_boot_snapshot(void);
+
 extern int memorystatus_add(proc_t p, boolean_t locked);
-extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit);
+extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective,
+                              boolean_t update_memlimit, int32_t memlimit_active, boolean_t memlimit_active_is_fatal,
+                              int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal, boolean_t memlimit_background);
+
 extern int memorystatus_remove(proc_t p, boolean_t locked);
 
 extern int memorystatus_dirty_track(proc_t p, uint32_t pcontrol);
@@ -257,6 +353,8 @@ void memorystatus_knote_unregister(struct knote *kn);
 
 #if CONFIG_JETSAM
 
+int memorystatus_get_pressure_status_kdp(void);
+
 typedef enum memorystatus_policy {
        kPolicyDefault        = 0x0, 
        kPolicyMoreFree       = 0x1,
@@ -274,6 +372,7 @@ boolean_t memorystatus_kill_on_FC_thrashing(boolean_t async);
 boolean_t memorystatus_kill_on_vnode_limit(void);
 
 void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
+void proc_memstat_terminated(proc_t p, boolean_t set);
 void jetsam_on_ledger_cpulimit_exceeded(void);
 
 void memorystatus_pages_update(unsigned int pages_avail);
@@ -292,8 +391,7 @@ boolean_t memorystatus_idle_exit_from_VM(void);
 #define FREEZE_SUSPENDED_THRESHOLD_LOW     2
 #define FREEZE_SUSPENDED_THRESHOLD_DEFAULT 4
 
-#define FREEZE_DAILY_MB_MAX      1024
-#define FREEZE_DAILY_PAGEOUTS_MAX (FREEZE_DAILY_MB_MAX * (1024 * 1024 / PAGE_SIZE))
+#define FREEZE_DAILY_MB_MAX_DEFAULT      1024
 
 typedef struct throttle_interval_t {
        uint32_t mins;
@@ -308,7 +406,7 @@ extern boolean_t memorystatus_freeze_enabled;
 extern int memorystatus_freeze_wakeup;
 
 extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initcode")));
-
+extern int  memorystatus_freeze_process_sync(proc_t p);
 #endif /* CONFIG_FREEZE */
 
 #if VM_PRESSURE_EVENTS
@@ -316,6 +414,8 @@ extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initc
 extern kern_return_t memorystatus_update_vm_pressure(boolean_t);
 
 #if CONFIG_MEMORYSTATUS
+/* Flags */
+extern int memorystatus_low_mem_privileged_listener(uint32_t op_flags);
 extern int memorystatus_send_pressure_note(int pid);
 extern boolean_t memorystatus_is_foreground_locked(proc_t p);
 extern boolean_t memorystatus_bg_pressure_eligible(proc_t p);
diff --git a/bsd/sys/kern_tests.h b/bsd/sys/kern_tests.h
deleted file mode 100644 (file)
index df71d9e..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef _KERN_TESTS_H
-#define _KERN_TESTS_H
-
-
-#endif /* !defined(_KERN_TESTS_H) */
index e4ac6702c85d4d15ae0df767e2546564cafd5f31..50cda45db34aeac225b4d814a95eb53a9243eac8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -1628,6 +1628,22 @@ extern errno_t mbuf_pkthdr_aux_flags(mbuf_t mbuf,
 */
 extern errno_t mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area,
     size_t *area_ln);
+
+/*
+       @function mbuf_get_unsent_data_bytes
+       @discussion Returns the amount of data that is waiting to be sent
+               on this interface. This is a private SPI used by cellular
+               interface as an indication of future activity on that
+               interface.
+       @param mbuf The mbuf containingthe packet header
+       @param unsent_data A pointer to an integer where the value of
+               unsent data will be set.
+       @result 0 upon success otherwise the errno error. If the mbuf
+               packet header does not have valid data bytes, the error
+               code will be EINVAL
+ */
+extern errno_t mbuf_get_unsent_data_bytes(const mbuf_t m,
+    u_int32_t *unsent_data);
 #endif /* KERNEL_PRIVATE */
 
 #ifdef XNU_KERNEL_PRIVATE
diff --git a/bsd/sys/kpi_private.h b/bsd/sys/kpi_private.h
new file mode 100644 (file)
index 0000000..bbd67a2
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SYS_KPI_PRIVATE_H
+#define        _SYS_KPI_PRIVATE_H
+
+/*
+ * Assorted odds and ends for exported private KPI (internal use only) 
+ */
+
+#ifdef KERNEL
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+#ifdef KERNEL_PRIVATE
+
+/* kernel-exported qsort */
+void kx_qsort (void* array, size_t nm, size_t member_size, int (*)(const void * , const void *));
+
+#endif  /* KERNEL_PRIVATE */
+
+__END_DECLS
+
+
+#endif /* KERNEL  */
+#endif /* !_SYS_KPI_PRIVATE_H */
index 8a8f186b35aa9df8c7c4eaf41be39b854c49d5ba..ff2475e98eb251cb9a2aedd132bfbcd78c8d6308 100644 (file)
@@ -248,12 +248,12 @@ extern errno_t sock_settclassopt(socket_t so, const void* optval, size_t optlen)
 extern errno_t sock_gettclassopt(socket_t so, void* optval, size_t* optlen);
 
 #ifdef XNU_KERNEL_PRIVATE
-extern void socket_set_traffic_mgt_flags_locked(socket_t so, u_int32_t flags);
-extern void socket_clear_traffic_mgt_flags_locked(socket_t so, u_int32_t flags);
+extern void socket_set_traffic_mgt_flags_locked(socket_t so, u_int8_t flags);
+extern void socket_clear_traffic_mgt_flags_locked(socket_t so, u_int8_t flags);
 #endif /* XNU_KERNEL_PRIVATE */
 #ifdef BSD_KERNEL_PRIVATE
-extern void socket_set_traffic_mgt_flags(socket_t so, u_int32_t flags);
-extern void socket_clear_traffic_mgt_flags(socket_t so, u_int32_t flags);
+extern void socket_set_traffic_mgt_flags(socket_t so, u_int8_t flags);
+extern void socket_clear_traffic_mgt_flags(socket_t so, u_int8_t flags);
 extern errno_t socket_defunct(struct proc *, socket_t so, int);
 extern errno_t sock_receive_internal(socket_t, struct msghdr *, mbuf_t *,
     int, size_t *);
index c52153117c22b9f372566c4da5edfedbbb32a3ba..14fef1c2a96267054e5a236066c86bac60594c09 100644 (file)
@@ -564,6 +564,7 @@ struct sflt_filter {
                desciption of domain, type, and protocol.
        @param filter A structure describing the filter.
        @param domain The protocol domain these filters will be attached to.
+               Only PF_INET & PF_INET6 domains are supported.
        @param type The socket type these filters will be attached to.
        @param protocol The protocol these filters will be attached to.
        @result 0 on success otherwise the errno error.
index fa374a0e8de73d1febde53ea6e3c46cfa593424f..554176c4a1c719947c517e76463a7c295cc7b8e9 100644 (file)
@@ -1,23 +1,22 @@
 /*
- * $Id: lctx.h,v 1.1.6.2 2006/03/03 23:20:46 msteil Exp $
+ * TODO: remove this file
  */
-
 #ifndef _SYS_LCTX_H_
 #define _SYS_LCTX_H_
 
-#include <sys/syscall.h>
-
 #ifndef KERNEL
 static __inline pid_t
 getlcid(pid_t pid)
 {
-        return (syscall(SYS_getlcid, pid));
+       errno = ENOSYS;
+       return -1;
 }
 
 static __inline int
 setlcid(pid_t pid, pid_t lcid)
 {
-        return (syscall(SYS_setlcid, pid, lcid));
+       errno = ENOSYS;
+       return -1;
 }
 #endif
 
index 123e742fbcef7e0921d99cfe112d9370bce777e3..a5e736101c189b680b4f92b219b61a9b70994a53 100644 (file)
 #define FS_DIR_LOCATION                "/System/Library/Filesystems"
 #define FS_DIR_SUFFIX          ".fs"
 #define FS_UTIL_SUFFIX         ".util"
-#define FS_OPEN_SUFFIX         ".openfs.tiff"
-#define FS_CLOSED_SUFFIX       ".fs.tiff"
-#define FS_NAME_SUFFIX         ".name"
-#define FS_LABEL_SUFFIX                ".label"
 
 /*
  * .util program commands - all sent in the form "-p" or "-m" ... as argv[1].
@@ -79,9 +75,6 @@
 #define FSUC_REPAIR            'r'     /* repair ('fsck') FS */ 
        /* example usage: foo.util -r fd0 removable */
 
-#define        FSUC_INITIALIZE         'i'     /* initialize FS */
-       /* example usage: foo.util -i fd0 removable */ 
-
 #define FSUC_UNMOUNT           'u'     /* unmount FS */
        /* example usage: foo.util -u fd0 /bar */ 
 
 #define        DEVICE_REMOVABLE        "removable"
 #define        DEVICE_FIXED            "fixed"
 
-/*
- *     Additional parameters to the mount command - used by WSM when they
- *     appear in the /etc/mtab file.
- */
-#define        MNTOPT_FS               "filesystem=" /* e.g. "filesystem=DOS" */
-#define        MNTOPT_REMOVABLE        "removable"
-
 #endif /* _SYS_LOADABLE_FS_ */
index c12abebf00612ce0cd6f03ac7b83a08111fb9645..e20bf87335bbd6b2c3222a4ea9bcdf388de6eef4 100644 (file)
@@ -102,7 +102,8 @@ struct lockf {
        TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
 #if IMPORTANCE_INHERITANCE
        int     lf_boosted;         /* Is the owner of the lock boosted */
-#endif /* IMPORTANCE_INHERITANCE */
+#endif
+       struct proc *lf_owner;      /* The proc that did the SETLK, if known */
 };
 
 #pragma pack()
index ef423f5310ed2878350e8796d05ec66848ef3664..713210e1d6d9ccd0cd7a6c8b0aa41efeaf65ed41 100644 (file)
 #define M_MACTEMP      104     /* MAC framework */
 #define M_SBUF         105     /* string buffers */
 #define M_EXTATTR      106     /* extended attribute */
-#define M_LCTX         107     /* process login context */
-/* M_TRAFFIC_MGT 108 */
+#define M_SELECT       107     /* per-thread select memory */
+/* M_TRAFFIC_MGT       108 */
 #if HFS_COMPRESSION
 #define M_DECMPFS_CNODE        109     /* decmpfs cnode structures */
 #endif /* HFS_COMPRESSION */
 #define M_NECP_IP_POLICY 121 /* NECP IP-level policies */
 #define M_FD_VN_DATA   122     /* Per fd vnode data */
 #define M_FD_DIRBUF    123     /* Directory entries' buffer */
+#define M_NETAGENT     124     /* Network Agents */
 
-#define        M_LAST          124     /* Must be last type + 1 */
+#define        M_LAST          125     /* Must be last type + 1 */
 
 #else /* BSD_KERNEL_PRIVATE */
 
@@ -259,6 +260,67 @@ extern struct kmemstats kmemstats[];
  * The malloc/free primatives used
  * by the BSD kernel code.
  */
+#if XNU_KERNEL_PRIVATE
+
+#include <mach/vm_types.h>
+
+#define        MALLOC(space, cast, size, type, flags) \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       (space) = (cast)__MALLOC(size, type, flags, &site); })
+#define        REALLOC(space, cast, addr, size, type, flags) \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       (space) = (cast)__REALLOC(addr, size, type, flags, &site); })
+
+#define        _MALLOC(size, type, flags) \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       __MALLOC(size, type, flags, &site); })
+#define        _REALLOC(addr, size, type, flags) \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       __REALLOC(addr, size, type, flags, &site); })
+
+#define        _MALLOC_ZONE(size, type, flags) \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       __MALLOC_ZONE(size, type, flags, &site); })
+
+#define FREE(addr, type) \
+       _FREE((void *)addr, type)
+
+#define MALLOC_ZONE(space, cast, size, type, flags) \
+       (space) = (cast)_MALLOC_ZONE(size, type, flags)
+
+#define FREE_ZONE(addr, size, type) \
+       _FREE_ZONE((void *)addr, size, type)
+
+extern void    *__MALLOC(
+                       size_t                size,
+                       int                   type,
+                       int                   flags,
+                       vm_allocation_site_t *site);
+
+extern void    _FREE(
+                       void            *addr,
+                       int             type);
+
+extern void    *__REALLOC(
+                       void                 *addr,
+                       size_t                size,
+                       int                   type,
+                       int                   flags,
+                       vm_allocation_site_t *site);
+
+extern void    *__MALLOC_ZONE(
+                       size_t          size,
+                       int             type,
+                       int             flags,
+                       vm_allocation_site_t *site);
+
+extern void    _FREE_ZONE(
+                       void            *elem,
+                       size_t          size,
+                       int             type);
+
+#else /* XNU_KERNEL_PRIVATE */
+
 #define        MALLOC(space, cast, size, type, flags) \
        (space) = (cast)_MALLOC(size, type, flags)
 
@@ -299,6 +361,9 @@ extern void _FREE_ZONE(
                        size_t          size,
                        int             type);
 
+
+#endif /* !XNU_KERNEL_PRIVATE */
+
 #endif /* KERNEL */
 
 #endif /* _SYS_MALLOC_H_ */
index 5a7913ea89417220c8a5b2a56b7035677bb0e0ea..b5b7ee8020ab6b8043c31381dbc8bd4d70d6e146 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 #define        _MLEN           (MSIZE - sizeof(struct m_hdr))  /* normal data len */
 #define        _MHLEN          (_MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
 
-#define        NMBPBGSHIFT     (MBIGCLSHIFT - MSIZESHIFT)
-#define        NMBPBG          (1 << NMBPBGSHIFT)      /* # of mbufs per big cl */
+#define        NMBPGSHIFT      (PAGE_SHIFT - MSIZESHIFT)
+#define        NMBPG           (1 << NMBPGSHIFT)       /* # of mbufs per page */
 
-#define        NCLPBGSHIFT     (MBIGCLSHIFT - MCLSHIFT)
-#define        NCLPBG          (1 << NCLPBGSHIFT)      /* # of cl per big cl */
+#define        NCLPGSHIFT      (PAGE_SHIFT - MCLSHIFT)
+#define        NCLPG           (1 << NCLPGSHIFT)       /* # of cl per page */
 
-#define        NMBPCLSHIFT     (NMBPBGSHIFT - NCLPBGSHIFT)
+#define        NBCLPGSHIFT     (PAGE_SHIFT - MBIGCLSHIFT)
+#define NBCLPG         (1 << NBCLPGSHIFT)      /* # of big cl per page */
+
+#define        NMBPCLSHIFT     (MCLSHIFT - MSIZESHIFT)
 #define        NMBPCL          (1 << NMBPCLSHIFT)      /* # of mbufs per cl */
 
-#define        NCLPJCLSHIFT    ((M16KCLSHIFT - MBIGCLSHIFT) + NCLPBGSHIFT)
+#define        NCLPJCLSHIFT    (M16KCLSHIFT - MCLSHIFT)
 #define        NCLPJCL         (1 << NCLPJCLSHIFT)     /* # of cl per jumbo cl */
 
+#define        NCLPBGSHIFT     (MBIGCLSHIFT - MCLSHIFT)
+#define        NCLPBG          (1 << NCLPBGSHIFT)      /* # of cl per big cl */
+
+#define        NMBPBGSHIFT     (MBIGCLSHIFT - MSIZESHIFT)
+#define        NMBPBG          (1 << NMBPBGSHIFT)      /* # of mbufs per big cl */
+
 /*
  * Macros for type conversion
  * mtod(m,t) - convert mbuf pointer to data pointer of correct type
@@ -273,8 +282,9 @@ struct proto_mtag {
  * NECP specific mbuf tag.
  */
 struct necp_mtag {
-       uint32_t        necp_policy_id;
-       uint32_t        necp_last_interface_index;
+       u_int32_t       necp_policy_id;
+       u_int32_t       necp_last_interface_index;
+       u_int32_t       necp_route_rule_id;
 };
 
 /*
@@ -350,11 +360,13 @@ struct    pkthdr {
 #define        dst_ifindex     _pkt_iaif.dst
 #define        dst_iff         _pkt_iaif.dst_flags
                u_int64_t pkt_ifainfo;  /* data field used by ifainfo */
+               u_int32_t pkt_unsent_databytes; /* unsent data */
        };
 #if MEASURE_BW
        u_int64_t pkt_bwseq;            /* sequence # */
 #endif /* MEASURE_BW */
        u_int64_t pkt_enqueue_ts;       /* enqueue time */
+
        /*
         * Tags (external and built-in)
         */
@@ -436,6 +448,10 @@ struct     pkthdr {
 #define        PKTF_FORWARDED          0x10000 /* pkt was forwarded from another i/f */
 #define        PKTF_PRIV_GUARDED       0x20000 /* pkt_mpriv area guard enabled */
 #define        PKTF_KEEPALIVE          0x40000 /* pkt is kernel-generated keepalive */
+#define        PKTF_SO_REALTIME        0x80000 /* data is realtime traffic */
+#define        PKTF_VALID_UNSENT_DATA  0x100000 /* unsent data is valid */
+#define        PKTF_TCP_REXMT          0x200000 /* packet is TCP retransmission */
+
 /* flags related to flow control/advisory and identification */
 #define        PKTF_FLOW_MASK  \
        (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)
@@ -721,7 +737,8 @@ do {                                                                        \
  * If how is M_DONTWAIT and allocation fails, the original mbuf chain
  * is freed and m is set to NULL.
  */
-#define        M_PREPEND(m, plen, how) ((m) = m_prepend_2((m), (plen), (how)))
+#define        M_PREPEND(m, plen, how, align)  \
+    ((m) = m_prepend_2((m), (plen), (how), (align)))
 
 /* change mbuf to new type */
 #define        MCHTYPE(m, t)           m_mchtype(m, t)
@@ -1114,7 +1131,7 @@ extern struct mbuf *m_getpacket(void);
 extern struct mbuf *m_getpackets(int, int, int);
 extern struct mbuf *m_mclget(struct mbuf *, int);
 extern void *m_mtod(struct mbuf *);
-extern struct mbuf *m_prepend_2(struct mbuf *, int, int);
+extern struct mbuf *m_prepend_2(struct mbuf *, int, int, int);
 extern struct mbuf *m_pullup(struct mbuf *, int);
 extern struct mbuf *m_split(struct mbuf *, int, int);
 extern void m_mclfree(caddr_t p);
@@ -1180,6 +1197,9 @@ extern void m_mclfree(caddr_t p);
 #define        MBUF_TC2SCVAL(_tc)      ((_tc) << 7)
 #define IS_MBUF_SC_BACKGROUND(_sc) (((_sc) == MBUF_SC_BK_SYS) || \
        ((_sc) == MBUF_SC_BK))
+#define        IS_MBUF_SC_REALTIME(_sc)        ((_sc) >= MBUF_SC_AV && (_sc) <= MBUF_SC_VO)
+#define IS_MBUF_SC_BESTEFFORT(_sc)     ((_sc) == MBUF_SC_BE || \
+    (_sc) == MBUF_SC_RD || (_sc) == MBUF_SC_OAM)
 
 #define        SCIDX_BK_SYS            MBUF_SCIDX(MBUF_SC_BK_SYS)
 #define        SCIDX_BK                MBUF_SCIDX(MBUF_SC_BK)
@@ -1221,8 +1241,8 @@ extern void m_mclfree(caddr_t p);
        c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_VO ||              \
        c == SCVAL_CTL)
 
-extern union mbigcluster *mbutl;       /* start VA of mbuf pool */
-extern union mbigcluster *embutl;      /* end VA of mbuf pool */
+extern unsigned char *mbutl;   /* start VA of mbuf pool */
+extern unsigned char *embutl;  /* end VA of mbuf pool */
 extern unsigned int nmbclusters;       /* number of mapped clusters */
 extern int njcl;               /* # of jumbo clusters  */
 extern int njclbytes;  /* size of a jumbo cluster */
diff --git a/bsd/sys/memory_maintenance.h b/bsd/sys/memory_maintenance.h
new file mode 100644 (file)
index 0000000..1de00c6
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SYS_MEMORY_MAINTENANCE_H_
+#define _SYS_MEMORY_MAINTENANCE_H_
+
+/*
+ * File:       sys/memory_maintenance.h
+ * Author:     Samuel Gosselin [sgosselin@apple.com]
+ *
+ * Header file for Memory Maintenance support.
+ */
+
+/*
+ * The kern.darkboot sysctl can be controlled from kexts or userspace. If
+ * processes want to change the sysctl value, they require the
+ * 'com.apple.kern.darkboot' entitlement.
+ *
+ * Operating the kern.darkboot sysctl is done via using the commands below:
+ *
+ *     - MEMORY_MAINTENANCE_DARK_BOOT_UNSET
+ *             Unset the kern.darkboot sysctl (kern.sysctl=0).
+ *     - MEMORY_MAINTENANCE_DARK_BOOT_SET
+ *             Set the kern.darkboot sysctl (kern.sysctl=1).
+ *     - MEMORY_MAINTENANCE_DARK_BOOT_SET_PERSISTENT
+ *             Set the kern.darkboot sysctl (kern.sysctl=1) and save its
+ *             value into the 'darkboot' NVRAM variable.
+ *
+ * Example:
+ *     sysctl kern.darkboot=2
+ */
+#define        MEMORY_MAINTENANCE_DARK_BOOT_UNSET              (0)
+#define        MEMORY_MAINTENANCE_DARK_BOOT_SET                (1)
+#define        MEMORY_MAINTENANCE_DARK_BOOT_SET_PERSISTENT     (2)
+
+#define MEMORY_MAINTENANCE_DARK_BOOT_NVRAM_NAME                "darkboot"
+
+#endif /* _SYS_MEMORY_MAINTENANCE_H_ */
+
index acdbeb59fa270b6b46417c5ad9945e45753d3023..06c76abf5133ae3e161a24b51c71e0cfd1066a15 100644 (file)
 #define        MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */
 #define MAP_NOCACHE     0x0400 /* don't cache pages for this mapping */
 #define MAP_JIT                 0x0800 /* Allocate a region that will be used for JIT purposes */
+
+/*
+ * Mapping type
+ */
+#define        MAP_FILE        0x0000  /* map from file (default) */
+#define        MAP_ANON        0x1000  /* allocated from memory, swap space */
+#define        MAP_ANONYMOUS   MAP_ANON
+
+/*
+ * The MAP_RESILIENT_* flags can be used when the caller wants to map some
+ * possibly unreliable memory and be able to access it safely, possibly
+ * getting the wrong contents rather than raising any exception.
+ * For safety reasons, such mappings have to be read-only (PROT_READ access
+ * only).
+ *
+ * MAP_RESILIENT_CODESIGN:
+ *     accessing this mapping will not generate code-signing violations,
+ *     even if the contents are tainted.
+ * MAP_RESILIENT_MEDIA:
+ *     accessing this mapping will not generate an exception if the contents
+ *     are not available (unreachable removable or remote media, access beyond
+ *     end-of-file, ...).  Missing contents will be replaced with zeroes.
+ */
+#define MAP_RESILIENT_CODESIGN 0x2000 /* no code-signing failures */
+#define MAP_RESILIENT_MEDIA    0x4000 /* no backing-store failures */
+
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /*
 #define MS_KILLPAGES    0x0004  /* invalidate pages, leave mapped */
 #define MS_DEACTIVATE   0x0008  /* deactivate pages, leave mapped */
 
-/*
- * Mapping type
- */
-#define        MAP_FILE        0x0000  /* map from file (default) */
-#define        MAP_ANON        0x1000  /* allocated from memory, swap space */
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 
 #define MADV_FREE_REUSABLE     7       /* pages can be reused (by anyone) */
 #define MADV_FREE_REUSE                8       /* caller wants to reuse those pages */
 #define MADV_CAN_REUSE         9
+#define MADV_PAGEOUT           10      /* page out now (internal only) */
 
 /*
  * Return bits from mincore
index 965d0a6309c9d8f82f3f2c1caa12463a857d6ced..e73e4b9af554a610e9f96911d73ccbcae7415bad 100644 (file)
@@ -137,6 +137,7 @@ struct mount {
        pending_io_t    mnt_pending_read_size  __attribute__((aligned(sizeof(pending_io_t))));  /* byte count of pending reads */
        struct timeval  mnt_last_write_issued_timestamp;
        struct timeval  mnt_last_write_completed_timestamp;
+       int64_t         mnt_max_swappin_available;
        
        lck_rw_t        mnt_rwlock;             /* mutex readwrite lock */
        lck_mtx_t       mnt_renamelock;         /* mutex that serializes renames that change shape of tree */
@@ -206,6 +207,8 @@ struct mount {
 #define MNT_IOFLAGS_UNMAP_SUPPORTED    0x00000002
 #define MNT_IOFLAGS_IOSCHED_SUPPORTED  0x00000004
 #define MNT_IOFLAGS_CSUNMAP_SUPPORTED  0x00000008
+#define MNT_IOFLAGS_SWAPPIN_SUPPORTED  0x00000010
+#define MNT_IOFLAGS_FUSION_DRIVE       0x00000020
 
 /*
  * ioqueue depth for devices that don't report one
@@ -413,6 +416,7 @@ struct user32_statfs {
 
 __BEGIN_DECLS
 
+extern boolean_t root_is_CF_drive;
 extern uint32_t mount_generation;
 extern TAILQ_HEAD(mntlist, mount) mountlist;
 void mount_list_lock(void);
index 122e3c527efada23a0be4dff5cc35541cd6a5b9c..d1ab960965d03df49725ca862bb6a64f99b24a29 100644 (file)
@@ -85,6 +85,7 @@ void munge_wwlw(void *args);
 void munge_wwlll(void *args);
 void munge_wwllww(void *args);
 void munge_wlw(void *args);
+void munge_wlww(void *args);
 void munge_wlwwwll(void *args);
 void munge_wlwwwllw(void *args);
 void munge_wlwwlwlw(void *args);
@@ -107,6 +108,7 @@ void munge_wwwwwwlw(void *args);
 void munge_wwwwwwll(void *args);
 void munge_wsw(void *args);
 void munge_wws(void *args);
+void munge_wwws(void *args);
 void munge_wwwsw(void *args);
 void munge_llllll(void *args);
 void munge_l(void *args);
@@ -114,5 +116,4 @@ void munge_ll(void *args);
 void munge_lw(void *args);
 void munge_lwww(void *args);
 void munge_wwlwww(void *args);
-
 #endif /* __MUNGE_H__ */
diff --git a/bsd/sys/pgo.h b/bsd/sys/pgo.h
new file mode 100644 (file)
index 0000000..8f7909b
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2014 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SYS_PGO_H_
+#define _SYS_PGO_H_
+
+#include <sys/_types.h>
+#include <sys/_types/_ssize_t.h>
+#include <stdint.h>
+#include <uuid/uuid.h>
+
+#define PGO_HIB (1)
+#define PGO_WAIT_FOR_UNLOAD (2)
+#define PGO_METADATA (4)
+
+#define PGO_ALL_FLAGS (PGO_HIB | PGO_WAIT_FOR_UNLOAD | PGO_METADATA)
+
+
+/**
+ * This is a serialization format for metadata related to a profile data buffer.
+ *
+ * If metadata is present, this footer will appear at the end of the file, so
+ * the last four bytes of the file will be the ASCII string "meta".
+ *
+ * The metadata is stored in a environment-string style buffer.  The buffer
+ * consists of key-value pairs, which are delimited by null bytes.  Each
+ * key-value pair is a string of the form "FOO=bar".  Everything before the
+ * first equal sign is the key, everything after is the value.
+ *
+ * All members are in network byte order.
+ */
+struct pgo_metadata_footer {
+    /**
+     * number of pairs.
+     *
+     * This should be htonl(n), where n is the number of key-value pairs in the
+     * metadata buffer
+     */
+    uint32_t number_of_pairs;
+
+    /**
+     * pointer to the metadata buffer
+     *
+     * This should be htonl(offset), where offset is the backwards offset from
+     * the end of the file to the metadata buffer.
+     */
+    uint32_t  offset_to_pairs;
+
+    /**
+     * magic number
+     *
+     * This should be  htonl(0x6d657461);
+     */
+    uint32_t magic;
+};
+
+#ifndef KERNEL
+
+ssize_t grab_pgo_data(
+       uuid_t *uuid,
+       int flags,
+       unsigned char *buffer,
+       ssize_t size);
+
+
+#endif
+
+#endif
index 27d7eb34e069f7b854bc93ed6d0fe895162a8398..ff38da81d10db0ea64d851fbf09de3ae638764de 100644 (file)
@@ -84,6 +84,8 @@
 #define PRIV_HW_DEBUG_DATA         1004        /* Extract hw-specific debug data (e.g. ECC data) */
 #define PRIV_SELECTIVE_FORCED_IDLE     1005    /* Configure and control Selective Forced Idle (SFI) subsystem */
 #define PRIV_PROC_TRACE_INSPECT 1006   /* Request trace memory of arbitrary process to be inspected */
+#define PRIV_DARKBOOT          1007    /* Manipulate the darkboot flag */
+#define PRIV_WORK_INTERVAL     1008    /* Express details about a work interval */
 
 /*
  * Virtual memory privileges.
index 8c4ebb790270b5fa64b75ab5e07c32d394441574..4d3b2cbd3c0457fcce929d4cc5f279213eb3906f 100644 (file)
 #endif
 #include <mach/boolean.h>
 
+#ifdef XNU_KERNEL_PRIVATE
+#include <mach/coalition.h>            /* COALITION_NUM_TYPES */
+#endif
+
 #if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) 
 
 struct session;
@@ -178,10 +182,6 @@ struct extern_proc {
 #define        P_AFFINITY      0x00010000      /* xxx */
 #define        P_TRANSLATED    0x00020000      /* xxx */
 #define        P_CLASSIC       P_TRANSLATED    /* xxx */
-/*
-#define        P_FSTRACE       0x10000 / * tracing via file system (elsewhere?) * /
-#define        P_SSTEP         0x20000 / * process needs single-step fixup ??? * /
-*/
 
 #define        P_DELAYIDLESLEEP 0x00040000     /* Process is marked to delay idle sleep on disk IO */
 #define        P_CHECKOPENEVT  0x00080000      /* check if a vnode has the OPENEVT flag set on open */
@@ -193,7 +193,7 @@ struct extern_proc {
 
 #define        P_THCWD         0x01000000      /* process has thread cwd  */
 #define        P_RESV9         0x02000000      /* (P_VFORK)process has vfork children */
-#define        P_RESV10        0x04000000      /* used to be P_NOATTACH */
+#define        P_RESV10        0x04000000      /* reserved flag */
 #define        P_RESV11        0x08000000      /* (P_INVFORK) proc in vfork */
 
 #define        P_NOSHLIB       0x10000000      /* no shared libs are in use for proc */
@@ -269,6 +269,8 @@ extern int proc_ppid(proc_t);
 extern int proc_noremotehang(proc_t);
 /* returns 1 if the process is marked for force quota */
 extern int proc_forcequota(proc_t);
+/* returns 1 if the process is chrooted */
+extern int proc_chrooted(proc_t);
 
 /* this routine returns 1 if the process is running with 64bit address space, else 0 */
 extern int proc_is64bit(proc_t);
@@ -329,6 +331,8 @@ extern int proc_pidbackgrounded(pid_t pid, uint32_t* state);
  */
 extern uint64_t proc_uniqueid(proc_t);
 
+extern void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid);
+
 #endif /* KERNEL_PRIVATE */
 
 #ifdef XNU_KERNEL_PRIVATE
@@ -345,7 +349,7 @@ extern int proc_pidoriginatoruuid(uuid_t uuid_buf, uint32_t buffersize);
 extern uint64_t proc_was_throttled(proc_t);
 extern uint64_t proc_did_throttle(proc_t);
 
-extern uint64_t proc_coalitionid(proc_t);
+extern void proc_coalitionids(proc_t, uint64_t [COALITION_NUM_TYPES]);
 
 #endif /* XNU_KERNEL_PRIVATE*/
 
index e8ca29c4f11aeee50789cc6b967918f06041c3de..443861ddc22f4a9b48569d6ea5d53dcf6292d7bd 100644 (file)
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <sys/kern_control.h>
+#include <sys/event.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <mach/machine.h>
 
+#ifdef PRIVATE
+#include <mach/coalition.h> /* COALITION_NUM_TYPES */
+#endif
+
 __BEGIN_DECLS
 
 
@@ -118,12 +123,20 @@ struct proc_archinfo {
 };
 
 struct proc_pidcoalitioninfo {
-       uint64_t coalition_id;
+       uint64_t coalition_id[COALITION_NUM_TYPES];
        uint64_t reserved1;
        uint64_t reserved2;
        uint64_t reserved3;
 };
 
+struct proc_originatorinfo {
+       uuid_t                  originator_uuid;        /* UUID of the originator process */
+       pid_t                   originator_pid;         /* pid of the originator process */
+       uint64_t                p_reserve2;
+       uint64_t                p_reserve3;
+       uint64_t                p_reserve4;
+};
+
 #endif
 
 
@@ -286,6 +299,7 @@ struct proc_fileinfo {
 #define PROC_FP_SHARED 1       /* shared by more than one fd */
 #define PROC_FP_CLEXEC 2       /* close on exec */
 #define PROC_FP_GUARDED        4       /* guarded fd */
+#define PROC_FP_CLFORK 8       /* close on fork */
 
 #define PROC_FI_GUARD_CLOSE            (1u << 0)
 #define PROC_FI_GUARD_DUP              (1u << 1)
@@ -605,8 +619,23 @@ struct kqueue_info {
        uint32_t                kq_state;
        uint32_t                rfu_1;  /* reserved */
 };
-#define PROC_KQUEUE_SELECT     1
-#define PROC_KQUEUE_SLEEP      2
+
+/* keep in sync with KQ_* in sys/eventvar.h */
+#define PROC_KQUEUE_SELECT     0x01
+#define PROC_KQUEUE_SLEEP      0x02
+#define PROC_KQUEUE_32         0x08
+#define PROC_KQUEUE_64         0x10
+#define PROC_KQUEUE_QOS                0x20
+
+#ifdef PRIVATE
+struct kevent_extinfo {
+       struct kevent_qos_s kqext_kev;
+       uint64_t kqext_sdata;
+       int kqext_status;
+       int kqext_sfflags;
+       uint64_t kqext_reserved[2];
+};
+#endif /* PRIVATE */
 
 struct kqueue_fdinfo {
        struct proc_fileinfo    pfi;
@@ -748,6 +777,11 @@ struct proc_fileportinfo {
 #define PROC_PIDFDATALKINFO            8
 #define PROC_PIDFDATALKINFO_SIZE       (sizeof(struct appletalk_fdinfo))
 
+#ifdef PRIVATE
+#define PROC_PIDFDKQUEUE_EXTINFO       9
+#define PROC_PIDFDKQUEUE_EXTINFO_SIZE  (sizeof(struct kevent_extinfo))
+#endif /* PRIVATE */
+
 /* Flavors for proc_pidfileportinfo */
 
 #define PROC_PIDFILEPORTVNODEPATHINFO  2       /* out: vnode_fdinfowithpath */
@@ -800,6 +834,16 @@ struct proc_fileportinfo {
 #define PROC_PIDORIGINATOR_BGSTATE     0x2
 #define PROC_PIDORIGINATOR_BGSTATE_SIZE (sizeof(uint32_t))
 
+#define PROC_PIDORIGINATOR_PID_UUID     0x3
+#define PROC_PIDORIGINATOR_PID_UUID_SIZE (sizeof(struct proc_originatorinfo))
+
+/* Flavors for proc_listcoalitions */
+#define LISTCOALITIONS_ALL_COALS       1
+#define LISTCOALITIONS_ALL_COALS_SIZE   (sizeof(struct procinfo_coalinfo))
+
+#define LISTCOALITIONS_SINGLE_TYPE     2
+#define LISTCOALITIONS_SINGLE_TYPE_SIZE (sizeof(struct procinfo_coalinfo))
+
 /* __proc_info() call numbers */
 #define PROC_INFO_CALL_LISTPIDS         0x1
 #define PROC_INFO_CALL_PIDINFO          0x2
@@ -811,6 +855,7 @@ struct proc_fileportinfo {
 #define PROC_INFO_CALL_DIRTYCONTROL     0x8
 #define PROC_INFO_CALL_PIDRUSAGE        0x9
 #define PROC_INFO_CALL_PIDORIGINATORINFO 0xa
+#define PROC_INFO_CALL_LISTCOALITIONS   0xb
 
 #endif /* PRIVATE */
 
@@ -832,6 +877,7 @@ extern int fill_pshminfo(struct pshmnode * pshm, struct pshm_info * pinfo);
 extern int fill_pseminfo(struct psemnode * psem, struct psem_info * pinfo);
 extern int fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo);
 extern int fill_kqueueinfo(struct kqueue * kq, struct kqueue_info * kinfo);
+extern int pid_kqueue_extinfo(proc_t, struct kqueue * kq, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
 extern int fill_procworkqueue(proc_t, struct proc_workqueueinfo *);
 #endif /* XNU_KERNEL_PRIVATE */
 
index 96c7c33925cac12f44658780ac4ad380a0e2e8a0..a3d8487e5bb89d45a1d4da76756e279dccb885ec 100644 (file)
@@ -100,22 +100,6 @@ __END_DECLS
 */
 struct label;
 
-/*
- * Added by SPARTA, Inc.
- */
-/*
- * Login context.
- */
-struct lctx {
-       LIST_ENTRY(lctx) lc_list;       /* List of all login contexts. */
-       LIST_HEAD(, proc) lc_members;   /* Pointer to lc members. */
-       int             lc_mc;          /* Member Count. */
-       pid_t           lc_id;          /* Login context ID. */
-       lck_mtx_t       lc_mtx;         /* Mutex to protect members */
-
-       struct label    *lc_label;      /* Login context MAC label. */
-};
-
 /*
  * One structure allocated per session.
  */
@@ -362,8 +346,6 @@ struct      proc {
        u_short p_acflag;       /* Accounting flags. */
        volatile u_short p_vfs_iopolicy;        /* VFS iopolicy flags. (atomic bit ops) */
 
-       struct lctx *p_lctx;            /* Pointer to login context. */
-       LIST_ENTRY(proc) p_lclist;      /* List of processes in lctx. */
        user_addr_t     p_threadstart;          /* pthread start fn */
        user_addr_t     p_wqthread;             /* pthread workqueue fn */
        int     p_pthsize;                      /* pthread size */
@@ -374,6 +356,8 @@ struct      proc {
        int     p_wqsize;                       /* allocated size */
        boolean_t       p_wqiniting;            /* semaphore to serialze wq_open */
        lck_spin_t      p_wqlock;               /* lock to protect work queue */
+       struct kqueue * p_wqkqueue;             /* private workq kqueue */
+
        struct  timeval p_start;                /* starting time */
        void *  p_rcall;
        int             p_ractive;
@@ -406,12 +390,17 @@ struct    proc {
        uint64_t          p_memstat_userdata;           /* user state */
        uint64_t          p_memstat_idledeadline;       /* time at which process became clean */
 #if CONFIG_JETSAM
-       int32_t           p_memstat_memlimit;           /* cached memory limit */
+       int32_t           p_memstat_memlimit;           /* cached memory limit, toggles between active and inactive limits */
+       int32_t           p_memstat_memlimit_active;    /* memory limit enforced when process is in active jetsam state */
+       int32_t           p_memstat_memlimit_inactive;  /* memory limit enforced when process is in inactive jetsam state */
 #endif
 #if CONFIG_FREEZE
        uint32_t          p_memstat_suspendedfootprint; /* footprint at time of suspensions */
 #endif /* CONFIG_FREEZE */
 #endif /* CONFIG_MEMORYSTATUS */
+
+       /* cached proc-specific data required for corpse inspection */
+       pid_t             p_responsible_pid;    /* pid resonsible for this process */
 };
 
 #define PGRPID_DEAD 0xdeaddead
@@ -460,11 +449,11 @@ struct    proc {
 #define        P_LLIMWAIT      0x00040000
 #define P_LWAITED      0x00080000 
 #define P_LINSIGNAL            0x00100000 
-#define P_UNUSED       0x00200000      /* Unused */
+#define P_LRETURNWAIT          0x00200000      /* process is completing spawn/vfork-exec/fork */
 #define P_LRAGE_VNODES 0x00400000
 #define P_LREGISTER    0x00800000      /* thread start fns registered  */
 #define P_LVMRSRCOWNER 0x01000000      /* can handle the resource ownership of  */
-/* old P_LPTERMINATE    0x02000000 */
+#define P_LRETURNWAITER 0x02000000     /* thread is waiting on P_LRETURNWAIT being cleared */
 #define P_LTERM_DECRYPTFAIL    0x04000000      /* process terminating due to key failure to decrypt */
 #define        P_LTERM_JETSAM          0x08000000      /* process is being jetsam'd */
 #define P_JETSAM_VMPAGESHORTAGE        0x00000000      /* jetsam: lowest jetsam priority proc, killed due to vm page shortage */
@@ -491,6 +480,7 @@ struct      proc {
 
 /* additional process flags */
 #define P_LADVLOCK             0x01
+#define P_LXBKIDLEINPROG       0x02
 
 /* p_vfs_iopolicy flags */
 #define P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY 0x0001
@@ -525,6 +515,9 @@ struct      proc {
 #ifdef KERNEL
 #include <sys/time.h>  /* user_timeval, user_itimerval */
 
+/* This packing breaks symmetry with userspace side (struct extern_proc 
+ * of proc.h) for the ARMV7K ABI where 64-bit types are 64-bit aligned
+ */
 #pragma pack(4)
 struct user32_extern_proc {
        union {
@@ -653,18 +646,6 @@ extern vm_offset_t * execargs_cache;
 
 #define SESS_LEADER(p, sessp)  ((sessp)->s_leader == (p))
 
-/* Lock and unlock a login context. */
-#define LCTX_LOCK(lc)  lck_mtx_lock(&(lc)->lc_mtx)
-#define LCTX_UNLOCK(lc)        lck_mtx_unlock(&(lc)->lc_mtx)
-#define LCTX_LOCKED(lc)
-#define LCTX_LOCK_ASSERT(lc, type)
-#define ALLLCTX_LOCK   lck_mtx_lock(&alllctx_lock)
-#define ALLLCTX_UNLOCK lck_mtx_unlock(&alllctx_lock)
-extern lck_mtx_t alllctx_lock;
-extern lck_grp_t * lctx_lck_grp;
-extern lck_grp_attr_t * lctx_lck_grp_attr;
-extern lck_attr_t * lctx_lck_attr;
-
 #define        PIDHASH(pid)    (&pidhashtbl[(pid) & pidhash])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern u_long pidhash;
@@ -710,17 +691,12 @@ __private_extern__ struct proc *proc_find_zombref(pid_t); /* Find zombie by id.
 __private_extern__ void proc_drop_zombref(struct proc * p);    /* Find zombie by id. */
 
 
-extern struct  lctx *lcfind(pid_t);            /* Find a login context by id */
-extern struct  lctx *lccreate(void);           /* Create a new login context */
-
 extern int     chgproccnt(uid_t uid, int diff);
-extern void    enterlctx(struct proc *p, struct lctx *l, int create);
 extern void    pinsertchild(struct proc *parent, struct proc *child);
 extern int     enterpgrp(struct proc *p, pid_t pgid, int mksess);
 extern void    fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
 extern int     inferior(struct proc *p);
 extern int     leavepgrp(struct proc *p);
-extern void    leavelctx(struct proc *p);
 extern void    resetpriority(struct proc *);
 extern void    setrunnable(struct proc *);
 extern void    setrunqueue(struct proc *);
@@ -731,7 +707,7 @@ extern int  msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int t
 extern void    vfork_return(struct proc *child, int32_t *retval, int rval);
 extern int     exit1(struct proc *, int, int *);
 extern int     exit1_internal(struct proc *, int, int *, boolean_t, boolean_t, int);
-extern int     fork1(proc_t, thread_t *, int, coalition_t);
+extern int     fork1(proc_t, thread_t *, int, coalition_t *);
 extern void vfork_exit_internal(struct proc *p, int rv, int forced);
 extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int cansignal, int locked);
 extern int pgrp_iterate(struct pgrp * pgrp, int flags, int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg);
@@ -795,6 +771,10 @@ extern lck_mtx_t * pthread_list_mlock;
 #endif /* PSYNCH */
 struct uthread * current_uthread(void);
 
+void proc_set_return_wait(struct proc *);
+void proc_clear_return_wait(proc_t p, thread_t child_thread);
+void proc_wait_to_return(void);
+
 /* return 1 if process is forcing case-sensitive HFS+ access, 0 for default */
 extern int proc_is_forcing_hfs_case_sensitivity(proc_t);
 
index b7d319d1811a0db7b25c4a978420e7e3a37c8410..ac2b4f8c578f5cd2ae42ef72dd2fca7138159505 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -85,6 +85,7 @@ struct sockaddr;
 struct socket;
 struct sockopt;
 struct socket_filter;
+struct uio;
 #ifdef XNU_KERNEL_PRIVATE
 struct domain_old;
 #endif /* XNU_KERNEL_PRIVATE */
@@ -256,6 +257,8 @@ struct protosw {
 #define        PR_ATTACHED     0x800   /* protocol is attached to a domain */
 #define        PR_MULTICONN    0x1000  /* supports multiple connect calls */
 #define        PR_EVCONNINFO   0x2000  /* protocol generates conninfo event */
+#define        PR_PRECONN_WRITE        0x4000  /* protocol supports preconnect write */
+#define        PR_DATA_IDEMPOTENT      0x8000  /* protocol supports idempotent data at connectx-time */
 #define        PR_OLD          0x10000000 /* added via net_add_proto */
 
 /* pseudo-public domain flags */
@@ -384,6 +387,7 @@ struct ifnet;
 struct stat;
 struct ucred;
 struct uio;
+struct recv_msg_elem;
 
 #ifdef XNU_KERNEL_PRIVATE
 /*
@@ -450,14 +454,17 @@ struct pr_usrreqs {
        int     (*pru_connect2)(struct socket *, struct socket *);
        int     (*pru_connectx)(struct socket *, struct sockaddr_list **,
                    struct sockaddr_list **, struct proc *, uint32_t,
-                   associd_t, connid_t *, uint32_t, void *, uint32_t);
+                   sae_associd_t, sae_connid_t *, uint32_t, void *, uint32_t,
+                   struct uio *, user_ssize_t *);
        int     (*pru_control)(struct socket *, u_long, caddr_t,
                    struct ifnet *, struct proc *);
        int     (*pru_detach)(struct socket *);
        int     (*pru_disconnect)(struct socket *);
-       int     (*pru_disconnectx)(struct socket *, associd_t, connid_t);
+       int     (*pru_disconnectx)(struct socket *,
+                   sae_associd_t, sae_connid_t);
        int     (*pru_listen)(struct socket *, struct proc *);
-       int     (*pru_peeloff)(struct socket *, associd_t, struct socket **);
+       int     (*pru_peeloff)(struct socket *,
+                   sae_associd_t, struct socket **);
        int     (*pru_peeraddr)(struct socket *, struct sockaddr **);
        int     (*pru_rcvd)(struct socket *, int);
        int     (*pru_rcvoob)(struct socket *, struct mbuf *, int);
@@ -474,13 +481,13 @@ struct pr_usrreqs {
        int     (*pru_sopoll)(struct socket *, int, struct ucred *, void *);
        int     (*pru_soreceive)(struct socket *, struct sockaddr **,
                    struct uio *, struct mbuf **, struct mbuf **, int *);
-       int     (*pru_soreceive_list)(struct socket *, struct sockaddr **,
-                   struct uio **, u_int, struct mbuf **, struct mbuf **, int *);
+       int     (*pru_soreceive_list)(struct socket *, struct recv_msg_elem *, u_int,
+                   int *);
        int     (*pru_sosend)(struct socket *, struct sockaddr *,
                    struct uio *, struct mbuf *, struct mbuf *, int);
-       int     (*pru_sosend_list)(struct socket *, struct sockaddr *,
-                   struct uio **, u_int, struct mbuf *, struct mbuf *, int);
+       int     (*pru_sosend_list)(struct socket *, struct uio **, u_int, int);
        int     (*pru_socheckopt)(struct socket *, struct sockopt *);
+       int     (*pru_preconnect)(struct socket *so);
 };
 
 /* Values for pru_flags  */
@@ -499,11 +506,12 @@ extern int pru_connect_notsupp(struct socket *so, struct sockaddr *nam,
 extern int pru_connect2_notsupp(struct socket *so1, struct socket *so2);
 #ifdef XNU_KERNEL_PRIVATE
 extern int pru_connectx_notsupp(struct socket *, struct sockaddr_list **,
-    struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *,
-    uint32_t, void *, uint32_t);
-extern int pru_disconnectx_notsupp(struct socket *, associd_t, connid_t);
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
+extern int pru_disconnectx_notsupp(struct socket *, sae_associd_t,
+    sae_connid_t);
 extern int pru_socheckopt_null(struct socket *, struct sockopt *);
-extern int pru_peeloff_notsupp(struct socket *, associd_t, struct socket **);
+extern int pru_peeloff_notsupp(struct socket *, sae_associd_t, struct socket **);
 #endif /* XNU_KERNEL_PRIVATE */
 extern int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct proc *p);
@@ -522,14 +530,13 @@ extern int pru_shutdown_notsupp(struct socket *so);
 extern int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam);
 extern int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
     struct uio *uio,  struct mbuf *top, struct mbuf *control, int flags);
-extern int pru_sosend_list_notsupp(struct socket *so, struct sockaddr *addr,
-    struct uio **uio, u_int, struct mbuf *top, struct mbuf *control, int flags);
+extern int pru_sosend_list_notsupp(struct socket *so, struct uio **uio,
+    u_int, int flags);
 extern int pru_soreceive_notsupp(struct socket *so,
     struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0,
     struct mbuf **controlp, int *flagsp);
-extern int pru_soreceive_list_notsupp(struct socket *so,
-    struct sockaddr **paddr, struct uio **uio, u_int, struct mbuf **mp0,
-    struct mbuf **controlp, int *flagsp);
+extern int pru_soreceive_list_notsupp(struct socket *, struct recv_msg_elem *, u_int,
+    int *);
 extern int pru_sopoll_notsupp(struct socket *so, int events,
     struct ucred *cred, void *);
 #ifdef XNU_KERNEL_PRIVATE
index d0050e4163b006aae20d782c5737598da75c63ea..872173b093d09f0ae52e5bff6f1062c8bf4cebd0 100644 (file)
@@ -50,6 +50,8 @@ struct uthread;
 typedef void (*sched_call_t)(int type, thread_t thread);
 #endif
 
+typedef struct workq_reqthreads_req_s {unsigned long priority; int count;} *workq_reqthreads_req_t;
+
 /*
  * Increment each time new reserved slots are used. When the pthread
  * kext registers this table, it will include the version of the xnu
@@ -100,8 +102,14 @@ typedef struct pthread_functions_s {
        /* New pthreadctl system. */
        int (*bsdthread_ctl)(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval);
 
+    /* Request threads to deliver kevents */
+    thread_t (*workq_reqthreads)(struct proc *p, int requests_count, workq_reqthreads_req_t requests);
+
+    /* Resolve a pthread_priority_t to a QoS/relative pri */
+    integer_t (*thread_qos_from_pthread_priority)(unsigned long pthread_priority, unsigned long *flags);
+
        /* padding for future */
-       void* _pad[97];
+       void* _pad[95];
 } *pthread_functions_t;
 
 typedef struct pthread_callbacks_s {
index fda2515a5fa8cb9095adcf75933c4e3f63dce624..cf589929b5015b5974933e5c15863bb0d3d15e40 100644 (file)
 #include <sys/appleapiopts.h>
 #include <sys/cdefs.h>
 
+enum {
+       ePtAttachDeprecated __deprecated_enum_msg("PT_ATTACH is deprecated. See PT_ATTACHEXC") = 10
+};
+
+
 #define        PT_TRACE_ME     0       /* child declares it's being traced */
 #define        PT_READ_I       1       /* read word in child's I space */
 #define        PT_READ_D       2       /* read word in child's D space */
@@ -77,7 +82,7 @@
 #define        PT_CONTINUE     7       /* continue the child */
 #define        PT_KILL         8       /* kill the child process */
 #define        PT_STEP         9       /* single step the child */
-#define        PT_ATTACH       10      /* trace some running process */
+#define        PT_ATTACH       ePtAttachDeprecated     /* trace some running process */
 #define        PT_DETACH       11      /* stop tracing a process */
 #define        PT_SIGEXC       12      /* signals as exceptions for current_proc */
 #define PT_THUPDATE    13      /* signal for thread# */
index 18e3662e945faa249e928fa9d6fdbbb75d173101..5f51c9952983e92d547e58c02b3c452e62aed493 100644 (file)
@@ -84,8 +84,7 @@
 #define RB_SAFEBOOT    0x100   /* booting safe */
 #define RB_UPSDELAY 0x200   /* Delays restart by 5 minutes */
 #define RB_QUICK       0x400   /* quick and ungraceful reboot with file system caches flushed*/
-#define RB_PANIC       0       /* reboot due to panic */
-#define RB_BOOT                1       /* reboot due to boot() */
+#define RB_PANIC       0x800   /* panic the kernel */
 
 #endif /* __APPLE_API_PRIVATE */
 
 #include <machine/reboot.h>
 
 __BEGIN_DECLS
-int    boot(int, int, char *);
+int    reboot_kernel(int, char *);
 __END_DECLS
 
 #define PROC_SHUTDOWN_LOG "/var/log/kernel-shutdown.log"
index 6a4164fa8215352c3b8415270ed5c384a45c91f3..993907e548e6116884267d61042b8794c4ba48dd 100644 (file)
@@ -120,10 +120,12 @@ typedef __uint64_t        rlim_t;
 
 #define PRIO_DARWIN_ROLE        6               /* Second argument is a PID */
 
-#define PRIO_DARWIN_ROLE_DEFAULT        0x0     /* Default state */
+#define PRIO_DARWIN_ROLE_DEFAULT        0x0     /* Reset to default state */
 #define PRIO_DARWIN_ROLE_UI_FOCAL       0x1     /* On  screen,     focal UI */
-#define PRIO_DARWIN_ROLE_UI             0x2     /* On  screen, non-focal UI */
+#define PRIO_DARWIN_ROLE_UI             0x2     /* On  screen UI,  focal unknown */
 #define PRIO_DARWIN_ROLE_NON_UI         0x3     /* Off screen, non-focal UI */
+#define PRIO_DARWIN_ROLE_UI_NON_FOCAL   0x4     /* On  screen, non-focal UI */
+#define PRIO_DARWIN_ROLE_TAL_LAUNCH     0x5     /* Throttled-launch (for OS X TAL resume) */
 
 #endif /* PRIVATE */
 
index a4c33d0c891e6e2df5bffdcd6c2f6bb65d8df25a..1fa3f7605e2176567ceeb50dbc8cafe63569192f 100644 (file)
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 #ifdef KERNEL
-#ifdef KERNEL_PRIVATE
-#include <kern/wait_queue.h>
-#endif
 #include <sys/kernel_types.h>
-
+#include <kern/waitq.h>
 #include <sys/event.h>
 
 /*
  */
 #ifdef KERNEL_PRIVATE
 struct selinfo {
-       struct  wait_queue si_wait_queue;       /* wait_queue for wait/wakeup */
-       struct klist si_note;           /* JMM - temporary separation */
+       struct  waitq si_waitq;         /* waitq for wait/wakeup */
+       struct  klist si_note;          /* JMM - temporary separation */
        u_int   si_flags;               /* see below */
 };
 
index 2ff00560983fd1861542e9d478f4a4dba04f6a08..2d0fc43e8822664b12b76daad3afb1f098865dfb 100644 (file)
@@ -71,6 +71,7 @@
 
 #include <sys/cdefs.h>
 #include <sys/appleapiopts.h>
+#include <Availability.h>
 
 #define __DARWIN_NSIG  32      /* counting 0; could be 33 (mask is 1-32) */
 
index cd5c2d133e9b3daa2a8c3239a82ab570c753b713..6d8488807f6c75a8d2858d99b5488c4203cee1fe 100644 (file)
@@ -188,10 +188,16 @@ int sigprop[NSIG + 1] = {
 
 #define        sigcantmask     (sigmask(SIGKILL) | sigmask(SIGSTOP))
 
+#define SIGRESTRICTMASK (sigmask(SIGILL) | sigmask(SIGTRAP) | sigmask(SIGABRT) | \
+                         sigmask(SIGFPE) | sigmask(SIGBUS)  | sigmask(SIGSEGV) | \
+                         sigmask(SIGSYS))
+
+extern unsigned sigrestrict_arg;
+
 /*
  * Machine-independent functions:
  */
-int    coredump(struct proc *p, uint32_t reserve_mb, int ignore_ulimit);
+
 void   execsigs(struct proc *p, thread_t thread);
 void   gsignal(int pgid, int sig);
 int    issignal_locked(struct proc *p);
@@ -203,6 +209,7 @@ void        siginit(struct proc *p);
 void   trapsignal(struct proc *p, int sig, unsigned code);
 void   pt_setrunnable(struct proc *p);
 int    hassigprop(int sig, int prop);
+int setsigvec(proc_t, thread_t, int signum, struct __kern_sigaction *, boolean_t in_sigstart);
 
 /*
  * Machine-dependent functions:
@@ -219,9 +226,6 @@ void        threadsignal(thread_t sig_actthread, int signum,
 int    thread_issignal(proc_t p, thread_t th, sigset_t mask);
 void   psignal_vfork(struct proc *p, task_t new_task, thread_t thread,
                int signum);
-void   psignal_vtalarm(struct proc *);
-void   psignal_xcpu(struct proc *);
-void   psignal_sigprof(struct proc *);
 void   signal_setast(thread_t sig_actthread);
 void   pgsigio(pid_t pgid, int signalnum);
 
@@ -230,4 +234,17 @@ int sig_try_locked(struct proc *p);
 
 #endif /* BSD_KERNEL_PRIVATE */
 
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* Functions exported to Mach as well */
+
+#define COREDUMP_IGNORE_ULIMIT  0x0001 /* Ignore the process's core file ulimit. */
+#define COREDUMP_FULLFSYNC      0x0002 /* Run F_FULLFSYNC on the core file's vnode */
+
+int    coredump(struct proc *p, uint32_t reserve_mb, int coredump_flags);
+
+#endif  /* XNU_KERNEL_PRIVATE */
+
+
 #endif /* !_SYS_SIGNALVAR_H_ */
index 3b6197bf67012e2a75fb01f7c17270ddf04b3f48..8afe6c4daa073bcb7c698b965d6045a28289372d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -329,12 +329,33 @@ struct so_tcdbg {
 #if MPTCP
 #define SO_MPTCP_FASTJOIN      0x1111  /* fast join MPTCP */
 #endif /* MPTCP */
-
-#define        SO_AWDL_UNRESTRICTED    0x1113  /* try to use AWDL in restricted mode */
 #endif /* PRIVATE */
-
 #define SO_NUMRCVPKT           0x1112  /* number of datagrams in receive socket buffer */
+#ifdef PRIVATE
+#define        SO_AWDL_UNRESTRICTED    0x1113  /* try to use AWDL in restricted mode */
+#define SO_EXTENDED_BK_IDLE    0x1114  /* extended time to keep socket idle after app is suspended (int) */
+#endif /* PRIVATE */
 
+typedef __uint32_t sae_associd_t;
+#define        SAE_ASSOCID_ANY 0
+#define        SAE_ASSOCID_ALL ((sae_associd_t)(-1ULL))
+
+typedef __uint32_t sae_connid_t;
+#define        SAE_CONNID_ANY  0
+#define        SAE_CONNID_ALL  ((sae_connid_t)(-1ULL))
+
+/* connectx() flag parameters */
+#define CONNECT_RESUME_ON_READ_WRITE   0x1 /* resume connect() on read/write */
+#define CONNECT_DATA_IDEMPOTENT                0x2 /* data is idempotent */
+
+/* sockaddr endpoints */
+typedef struct sa_endpoints {
+       unsigned int    sae_srcif;      /* optional source interface */
+       struct sockaddr *sae_srcaddr;   /* optional source address */
+       socklen_t       sae_srcaddrlen; /* size of source address */
+       struct sockaddr *sae_dstaddr;   /* destination address */
+       socklen_t       sae_dstaddrlen; /* size of destination address */
+} sa_endpoints_t;
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /*
@@ -792,6 +813,46 @@ struct user32_msghdr_x {
        user32_size_t   msg_datalen;    /* byte length of buffer in msg_iov */
 };
 
+/*
+ * In-kernel representation of "struct sa_endpoints" from
+ * userspace. Has enough precision for 32-bit or
+ * 64-bit clients, but does not need to be packed.
+ */
+
+struct user_sa_endpoints {
+       unsigned int    sae_srcif;      /* optional source interface */
+       user_addr_t     sae_srcaddr;    /* optional source address */
+       socklen_t       sae_srcaddrlen; /* size of source address */
+       user_addr_t     sae_dstaddr;    /* destination address */
+       socklen_t       sae_dstaddrlen; /* size of destination address */
+};
+
+/*
+ * LP64 user version of struct sa_endpoints
+ * WARNING - keep in sync with struct sa_endpoints
+ */
+
+struct user64_sa_endpoints {
+       unsigned int    sae_srcif;      /* optional source interface */
+       user64_addr_t   sae_srcaddr;    /* optional source address */
+       socklen_t       sae_srcaddrlen; /* size of source address */
+       user64_addr_t   sae_dstaddr;    /* destination address */
+       socklen_t       sae_dstaddrlen; /* size of destination address */
+};
+
+/*
+ * ILP32 user version of struct sa_endpoints
+ * WARNING - keep in sync with struct sa_endpoints
+ */
+
+struct user32_sa_endpoints {
+       unsigned int    sae_srcif;      /* optional source interface */
+       user32_addr_t   sae_srcaddr;    /* optional source address */
+       socklen_t       sae_srcaddrlen; /* size of source address */
+       user32_addr_t   sae_dstaddr;    /* destination address */
+       socklen_t       sae_dstaddrlen; /* size of destination address */
+};
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 #define        MSG_OOB         0x1             /* process out-of-band data */
@@ -997,20 +1058,13 @@ struct user32_sf_hdtr {
 
 #ifdef PRIVATE
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-typedef __uint32_t associd_t;
-#define        ASSOCID_ANY     0
-#define        ASSOCID_ALL     ((associd_t)(-1ULL))
-
-typedef __uint32_t connid_t;
-#define        CONNID_ANY      0
-#define        CONNID_ALL      ((connid_t)(-1ULL))
 
 /*
  * Structure for SIOCGASSOCIDS
  */
 struct so_aidreq {
        __uint32_t      sar_cnt;        /* number of associations */
-       associd_t       *sar_aidp;      /* array of association IDs */
+       sae_associd_t   *sar_aidp;      /* array of association IDs */
 };
 
 #ifdef BSD_KERNEL_PRIVATE
@@ -1029,20 +1083,20 @@ struct so_aidreq64 {
  * Structure for SIOCGCONNIDS
  */
 struct so_cidreq {
-       associd_t       scr_aid;        /* association ID */
+       sae_associd_t   scr_aid;        /* association ID */
        __uint32_t      scr_cnt;        /* number of connections */
-       connid_t        *scr_cidp;      /* array of connection IDs */
+       sae_connid_t    *scr_cidp;      /* array of connection IDs */
 };
 
 #ifdef BSD_KERNEL_PRIVATE
 struct so_cidreq32 {
-       associd_t       scr_aid;
+       sae_associd_t   scr_aid;
        __uint32_t      scr_cnt;
        user32_addr_t   scr_cidp;
 };
 
 struct so_cidreq64 {
-       associd_t       scr_aid;
+       sae_associd_t   scr_aid;
        __uint32_t      scr_cnt;
        user64_addr_t   scr_cidp __attribute__((aligned(8)));
 };
@@ -1052,7 +1106,7 @@ struct so_cidreq64 {
  * Structure for SIOCGCONNINFO
  */
 struct so_cinforeq {
-       connid_t        scir_cid;               /* connection ID */
+       sae_connid_t    scir_cid;               /* connection ID */
        __uint32_t      scir_flags;             /* see flags below */
        __uint32_t      scir_ifindex;           /* (last) outbound interface */
        __int32_t       scir_error;             /* most recent error */
@@ -1067,7 +1121,7 @@ struct so_cinforeq {
 
 #ifdef BSD_KERNEL_PRIVATE
 struct so_cinforeq32 {
-       connid_t        scir_cid;
+       sae_connid_t    scir_cid;
        __uint32_t      scir_flags;
        __uint32_t      scir_ifindex;
        __int32_t       scir_error;
@@ -1081,7 +1135,7 @@ struct so_cinforeq32 {
 };
 
 struct so_cinforeq64 {
-       connid_t        scir_cid;
+       sae_connid_t    scir_cid;
        __uint32_t      scir_flags;
        __uint32_t      scir_ifindex;
        __int32_t       scir_error;
@@ -1116,7 +1170,7 @@ struct so_cinforeq64 {
  * Structure for SIOC{S,G}CONNORDER
  */
 struct so_cordreq {
-       connid_t        sco_cid;                /* connection ID */
+       sae_connid_t    sco_cid;                /* connection ID */
        __uint32_t      sco_rank;               /* rank (0 means unspecified) */
 };
 
@@ -1170,10 +1224,8 @@ struct kev_socket_closed {
 
 #ifndef        KERNEL
 __BEGIN_DECLS
-extern int connectx(int s, struct sockaddr *, socklen_t, struct sockaddr *,
-    socklen_t, __uint32_t, associd_t, connid_t *);
-extern int disconnectx(int s, associd_t, connid_t);
-extern int peeloff(int s, associd_t);
+
+extern int peeloff(int s, sae_associd_t);
 extern int socket_delegate(int, int, int, pid_t);
 
 /*
@@ -1181,7 +1233,7 @@ extern int socket_delegate(int, int, int, pid_t);
  * several datagrams at once in the array of message headers "msgp".
  *
  * recvmsg_x() can be used only with protocols handlers that have been specially
- * modified to handle sending and receiving several datagrams at once.
+ * modified to support sending and receiving several datagrams at once.
  * 
  * The size of the array "msgp" is given by the argument "cnt".
  *
@@ -1201,11 +1253,7 @@ extern int socket_delegate(int, int, int, pid_t);
  * recvmsg_x() may return with less than "cnt" datagrams received based on
  * the low water mark and the amount of data pending in the socket buffer.
  *
- * Address and ancillary data are not supported so the following fields
- * must be set to zero on input:
- *   "msg_name", "msg_namelen", "msg_control" and "msg_controllen".
- *
- * recvmsg_x() returns the number of datagrams that have been received ,
+ * recvmsg_x() returns the number of datagrams that have been received,
  * or -1 if an error occurred. 
  *
  * NOTE: This a private system call, the API is subject to change.
@@ -1217,7 +1265,7 @@ ssize_t recvmsg_x(int s, const struct msghdr_x *msgp, u_int cnt, int flags);
  * several datagrams at once in the array of message headers "msgp".
  *
  * sendmsg_x() can be used only with protocols handlers that have been specially
- * modified to support to handle sending and receiving several datagrams at once.
+ * modified to support sending and receiving several datagrams at once.
  * 
  * The size of the array "msgp" is given by the argument "cnt".
  *
@@ -1250,6 +1298,7 @@ __END_DECLS
 
 #ifndef        KERNEL
 __BEGIN_DECLS
+
 int    accept(int, struct sockaddr * __restrict, socklen_t * __restrict)
                __DARWIN_ALIAS_C(accept);
 int    bind(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS(bind);
@@ -1280,6 +1329,9 @@ int       sendfile(int, int, off_t, off_t *, struct sf_hdtr *, int);
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 void   pfctlinput(int, struct sockaddr *);
+int connectx(int , const sa_endpoints_t *, sae_associd_t, unsigned int,
+    const struct iovec *, unsigned int, size_t *, sae_connid_t *);
+int disconnectx(int , sae_associd_t, sae_connid_t);
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 __END_DECLS
 #endif /* !KERNEL */
index 33ddf98e24e45a90d5b1a39719e5ad66060ccf99..e0b810b0c10bfdafebb0827ea598d1d221b90d42 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -150,6 +150,7 @@ struct msg_state {
 struct socket {
        int     so_zone;                /* zone we were allocated from */
        short   so_type;                /* generic type, see socket.h */
+       u_short so_error;               /* error affecting connection */
        u_int32_t so_options;           /* from socket call, see socket.h */
        short   so_linger;              /* time to linger while closing */
        short   so_state;               /* internal state flags SS_*, below */
@@ -175,7 +176,6 @@ struct socket {
                                           connections */
        short   so_qlimit;              /* max number queued connections */
        short   so_timeo;               /* connection timeout */
-       u_short so_error;               /* error affecting connection */
        pid_t   so_pgid;                /* pgid for signals */
        u_int32_t so_oobmark;           /* chars to oob mark */
        /*
@@ -193,8 +193,8 @@ struct socket {
                struct mbuf     *sb_lastrecord; /* first mbuf of last record */
                struct socket   *sb_so;         /* socket back ptr for kexts */
                struct selinfo  sb_sel;         /* process selecting rd/wr */
-               u_int32_t       sb_flags;       /* flags, see below */
                struct timeval  sb_timeo;       /* timeout for read/write */
+               u_int32_t       sb_flags;       /* flags, see below */
                u_int32_t       sb_idealsize;   /* Ideal size for the sb based
                                                   on bandwidth and delay */
                void    (*sb_upcall)(struct socket *, void *arg, int waitf);
@@ -203,6 +203,7 @@ struct socket {
                u_int32_t       sb_waiters;     /* # of data/space waiters */
                thread_t        sb_cfil_thread; /* content filter thread */
                u_int32_t       sb_cfil_refs;   /* # of nested calls */
+               u_int32_t       sb_preconn_hiwat;/* preconnect hiwat mark */
        } so_rcv, so_snd;
 #define        SB_MAX          (8192*1024)     /* default for max chars in sockbuf */
 #define LOW_SB_MAX     (2*9*1024)      /* lower limit on max socket buffer
@@ -220,7 +221,6 @@ struct socket {
 #define        SB_AUTOSIZE     0x400           /* automatically size socket buffer */
 #define        SB_TRIM         0x800           /* Trim the socket buffer */
 #define        SB_NOCOMPRESS   0x1000          /* do not compress socket buffer */
-
        caddr_t so_tpcb;                /* Misc. protocol control block, used
                                           by some kexts */
 
@@ -230,28 +230,33 @@ struct socket {
        /* NB: generation count must not be first; easiest to make it last. */
        so_gen_t so_gencnt;             /* generation count */
        TAILQ_HEAD(, eventqelt) so_evlist;
-       boolean_t       cached_in_sock_layer; /* bundled with inpcb and tcpcb */
        STAILQ_ENTRY(socket) so_cache_ent;      /* socache entry */
-       u_int32_t       cache_timestamp;        /* time socket was cached */
        caddr_t         so_saved_pcb;   /* Saved pcb when cacheing */
+       u_int32_t       cache_timestamp;        /* time socket was cached */
+
+       pid_t           last_pid;       /* pid of most recent accessor */
+       u_int64_t       last_upid;      /* upid of most recent accessor */
+
        struct mbuf     *so_temp;       /* Holding area for outbound frags */
        /* Plug-in support - make the socket interface overridable */
        struct mbuf     *so_tail;
        struct socket_filter_entry *so_filt;    /* NKE hook */
        u_int32_t       so_flags;       /* Flags */
 #define        SOF_NOSIGPIPE           0x00000001
-#define        SOF_NOADDRAVAIL         0x00000002      /* EADDRNOTAVAIL if src addr is gone */
-#define        SOF_PCBCLEARING         0x00000004      /* pru_disconnect done; don't call pru_detach */
-#define        SOF_DEFUNCT             0x00000008      /* socket marked as inactive */
-#define        SOF_CLOSEWAIT           0x00000010      /* blocked in close awaiting some events */
-#define SOF_REUSESHAREUID      0x00000040      /* Allows SO_REUSEADDR/SO_REUSEPORT
-                                               for multiple so_uid */
-#define        SOF_MULTIPAGES          0x00000080      /* jumbo clusters may be used for sosend */
-#define SOF_ABORTED            0x00000100      /* soabort was already called once */
-#define SOF_OVERFLOW           0x00000200      /* socket was dropped as overflow of listen q */
-#define SOF_NOTIFYCONFLICT     0x00000400      /* notify that a bind was done on a
-                                          port already in use */
-#define        SOF_UPCALLCLOSEWAIT     0x00000800 /* block close until upcall returns  */
+#define        SOF_NOADDRAVAIL         0x00000002 /* EADDRNOTAVAIL if src addr is gone */
+#define        SOF_PCBCLEARING         0x00000004 /* pru_disconnect done; don't
+                                             call pru_detach */
+#define        SOF_DEFUNCT             0x00000008 /* socket marked as inactive */
+#define        SOF_CLOSEWAIT           0x00000010 /* blocked in close awaiting some events */
+#define SOF_REUSESHAREUID      0x00000040 /* Allows SO_REUSEADDR/SO_REUSEPORT
+                                             for multiple so_uid */
+#define        SOF_MULTIPAGES          0x00000080 /* jumbo clusters may be used for sosend */
+#define SOF_ABORTED            0x00000100 /* soabort was already called once */
+#define SOF_OVERFLOW           0x00000200 /* socket was dropped as overflow of
+                                             listen q */
+#define SOF_NOTIFYCONFLICT     0x00000400 /* notify that a bind was done on a
+                                             port already in use */
+#define        SOF_UPCALLCLOSEWAIT     0x00000800 /* block close until upcall returns */
 #define SOF_BINDRANDOMPORT     0x00001000 /* Randomized port number for bind */
 #define SOF_NPX_SETOPTSHUT     0x00002000 /* Non POSIX extension to allow
                                           setsockopt(2) after shut down */
@@ -261,7 +266,7 @@ struct socket {
 #define SOF_SUSPENDED          0x00020000 /* i/f output queue is suspended */
 #define SOF_INCOMP_INPROGRESS  0x00040000 /* incomp socket is being processed */
 #define        SOF_NOTSENT_LOWAT       0x00080000 /* A different lowat on not sent
-                                          data has been set */
+                                             data has been set */
 #define SOF_KNOTE              0x00100000 /* socket is on the EV_SOCK klist */
 #define SOF_USELRO             0x00200000 /* TCP must use LRO on these sockets */
 #define SOF_ENABLE_MSGS                0x00400000 /* TCP must enable message delivery */
@@ -279,25 +284,26 @@ struct socket {
        int             so_usecount;    /* refcounting of socket use */;
        int             so_retaincnt;
        u_int32_t       so_filteruse;   /* usecount for the socket filters */
-       u_int32_t       so_traffic_mgt_flags;   /* traffic_mgt socket config */
+       u_int16_t       so_traffic_class;
+       u_int8_t        so_traffic_mgt_flags;   /* traffic_mgt socket config */
+       u_int8_t        so_restrictions;
        thread_t        so_send_filt_thread;
-       u_int32_t       so_restrictions;
 
        /* for debug pruposes */
 #define        SO_LCKDBG_MAX 4 /* number of debug locking Link Registers recorded */
        void    *lock_lr[SO_LCKDBG_MAX];        /* locking calling history */
-       int     next_lock_lr;
        void    *unlock_lr[SO_LCKDBG_MAX];      /* unlocking caller history */
-       int     next_unlock_lr;
+       u_int8_t        next_lock_lr;
+       u_int8_t        next_unlock_lr;
+
+       u_int16_t       so_pktheadroom; /* headroom before packet payload */
+
+       u_int32_t       so_ifdenied_notifies; /* # of notifications generated */
 
        struct label    *so_label;      /* MAC label for socket */
        struct label    *so_peerlabel;  /* cached MAC label for socket peer */
        thread_t        so_background_thread;   /* thread that marked
                                                   this socket background */
-       int             so_traffic_class;
-
-       u_int64_t       last_upid;      /* upid of most recent accessor */
-       pid_t           last_pid;       /* pid of most recent accessor */
        struct data_stats so_tc_stats[SO_TC_STATS_MAX];
        struct klist    so_klist;               /* klist for EV_SOCK events */
 
@@ -308,21 +314,28 @@ struct socket {
 
        u_int32_t       so_eventmask;           /* event mask */
 
-       u_int64_t       e_upid;         /* upid of the effective owner */
        pid_t           e_pid;          /* pid of the effective owner */
+       u_int64_t       e_upid;         /* upid of the effective owner */
 
        uuid_t          last_uuid;      /* uuid of most recent accessor */
        uuid_t          e_uuid;         /* uuid of effective owner */
        uuid_t          so_vuuid;       /* UUID of the Voucher originator */
 
        int32_t         so_policy_gencnt; /* UUID policy gencnt */
-       u_int32_t       so_ifdenied_notifies; /* # of notifications generated */
 
        u_int32_t       so_flags1;
 #define SOF1_POST_FALLBACK_SYNC        0x00000001 /* fallback to TCP */
 #define        SOF1_AWDL_PRIVILEGED    0x00000002
 #define        SOF1_IF_2KCL            0x00000004 /* interface prefers 2 KB clusters */
 #define        SOF1_DEFUNCTINPROG      0x00000008
+#define SOF1_DATA_IDEMPOTENT   0x00000010 /* idempotent data for TFO */
+#define SOF1_PRECONNECT_DATA   0x00000020 /* request for preconnect data */
+#define        SOF1_EXTEND_BK_IDLE_WANTED      0x00000040 /* option set */
+#define        SOF1_EXTEND_BK_IDLE_INPROG      0x00000080 /* socket */
+#define        SOF1_CACHED_IN_SOCK_LAYER       0x00000100 /* bundled with inpcb and
+                                                     tcpcb */
+
+       u_int64_t       so_extended_bk_start;
 };
 
 /* Control message accessor in mbufs */
@@ -492,6 +505,27 @@ struct xsockstat_n {
        u_int32_t               xst_kind;       /* XSO_STATS */
        struct data_stats       xst_tc_stats[SO_TC_STATS_MAX];
 };
+
+/*
+ * Global socket statistics
+ */
+struct soextbkidlestat {
+       u_int32_t       so_xbkidle_maxperproc;
+       u_int32_t       so_xbkidle_time;
+       u_int32_t       so_xbkidle_rcvhiwat;
+       int32_t         so_xbkidle_notsupp;
+       int32_t         so_xbkidle_toomany;
+       int32_t         so_xbkidle_wantok;
+       int32_t         so_xbkidle_active;
+       int32_t         so_xbkidle_nocell;
+       int32_t         so_xbkidle_notime;
+       int32_t         so_xbkidle_forced;
+       int32_t         so_xbkidle_resumed;
+       int32_t         so_xbkidle_expired;
+       int32_t         so_xbkidle_resched;
+       int32_t         so_xbkidle_nodlgtd;
+       int32_t         so_xbkidle_drained;
+};
 #endif /* PRIVATE */
 
 #pragma pack()
@@ -636,6 +670,7 @@ struct so_procinfo {
        pid_t           spi_epid;
        uuid_t          spi_uuid;
        uuid_t          spi_euuid;
+       int             spi_delegated;
 };
 
 extern u_int32_t sb_max;
@@ -652,9 +687,13 @@ extern uint32_t tcp_autosndbuf_max;
 extern u_int32_t sotcdb;
 extern u_int32_t net_io_policy_throttled;
 extern u_int32_t net_io_policy_log;
+extern u_int32_t net_io_policy_throttle_best_effort;
 #if CONFIG_PROC_UUID_POLICY
 extern u_int32_t net_io_policy_uuid;
 #endif /* CONFIG_PROC_UUID_POLICY */
+
+extern struct soextbkidlestat soextbkidlestat;
+
 #endif /* BSD_KERNEL_PRIVATE */
 
 struct mbuf;
@@ -662,6 +701,18 @@ struct sockaddr;
 struct ucred;
 struct uio;
 
+#define        SOCK_MSG_SA 0x01
+#define        SOCK_MSG_CONTROL 0x02
+#define        SOCK_MSG_DATA 0x04
+
+struct recv_msg_elem {
+       struct uio *uio;
+       struct sockaddr *psa;
+       struct mbuf *controlp;
+       int which;
+       int flags;
+};
+
 /*
  * From uipc_socket and friends
  */
@@ -679,6 +730,7 @@ extern int sodisconnect(struct socket *so);
 extern void sofree(struct socket *so);
 extern void sofreelastref(struct socket *, int);
 extern void soisconnected(struct socket *so);
+extern boolean_t socanwrite(struct socket *so);
 extern void soisconnecting(struct socket *so);
 extern void soisdisconnected(struct socket *so);
 extern void soisdisconnecting(struct socket *so);
@@ -691,13 +743,13 @@ extern int sooptcopyout(struct sockopt *sopt, void *data, size_t len);
 extern int soreceive(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
 extern int soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc);
+extern void soreserve_preconnect(struct socket *so, unsigned int pre_cc);
 extern void sorwakeup(struct socket *so);
 extern int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags);
-extern int sosend_list(struct socket *so, struct sockaddr *addr, struct uio **uio,
-    u_int uiocnt, struct mbuf *top, struct mbuf *control, int flags);
-extern int soreceive_list(struct socket *so, struct sockaddr **psa, struct uio **uio,
-               u_int uiocnt, struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
+extern int sosend_list(struct socket *so, struct uio **uio, u_int uiocnt, int flags);
+extern int soreceive_list(struct socket *so, struct recv_msg_elem *msgarray, u_int msgcnt,
+    int *flags);
 extern void sonullevent(struct socket *so, void *arg, uint32_t hint);
 __END_DECLS
 
@@ -771,11 +823,11 @@ extern int soconnect(struct socket *so, struct sockaddr *nam);
 extern int soconnectlock(struct socket *so, struct sockaddr *nam, int dolock);
 extern int soconnect2(struct socket *so1, struct socket *so2);
 extern int soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl,
-    struct sockaddr_list **dst_sl, struct proc *, uint32_t, associd_t,
-    connid_t *, uint32_t, void *, u_int32_t);
-extern int sodisconnectx(struct socket *so, associd_t, connid_t);
-extern int sodisconnectxlocked(struct socket *so, associd_t, connid_t);
-extern int sopeelofflocked(struct socket *so, associd_t, struct socket **);
+    struct sockaddr_list **dst_sl, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uint32_t, void *, u_int32_t, uio_t, user_ssize_t*);
+extern int sodisconnectx(struct socket *so, sae_associd_t, sae_connid_t);
+extern int sodisconnectxlocked(struct socket *so, sae_associd_t, sae_connid_t);
+extern int sopeelofflocked(struct socket *so, sae_associd_t, struct socket **);
 extern void soevupcall(struct socket *, u_int32_t);
 /* flags for socreate_internal */
 #define        SOCF_ASYNC      0x1     /* non-blocking socket */
@@ -792,12 +844,17 @@ extern void somultipages(struct socket *, boolean_t);
 extern void soif2kcl(struct socket *, boolean_t);
 extern int sosetdefunct(struct proc *, struct socket *, int level, boolean_t);
 extern int sodefunct(struct proc *, struct socket *, int level);
+extern int soresume(struct proc *, struct socket *, int);
+extern void resume_proc_sockets(proc_t);
+extern int so_check_extended_bk_idle_time(struct socket *);
+extern void so_drain_extended_bk_idle(struct socket *);
 extern void sohasoutofband(struct socket *so);
 extern void sodisconnectwakeup(struct socket *so);
 extern int soisthrottled(struct socket *so);
 extern int soisprivilegedtraffic(struct socket *so);
 extern int soissrcbackground(struct socket *so);
 extern int soissrcrealtime(struct socket *so);
+extern int soissrcbesteffort(struct socket *so);
 extern int solisten(struct socket *so, int backlog);
 extern struct socket *sodropablereq(struct socket *head);
 extern int socket_lock(struct socket *so, int refcount);
@@ -845,6 +902,14 @@ extern struct sockaddr_list *sockaddrlist_dup(const struct sockaddr_list *,
 #define PKT_SCF_IPV6           0x00000001      /* IPv6 packet */
 #define PKT_SCF_TCP_ACK                0x00000002      /* Pure TCP ACK */
 
+/*
+ * Flags for connectx(2) user-protocol request routine.
+ */
+
+#define        CONNREQF_MPTCP  0x1     /* called internally by MPTCP */
+#define CONNREQF_UIO   0x2     /* there's data */
+#define CONNREQF_IDEM  0x4     /* data is idempotent */
+
 extern void set_packet_service_class(struct mbuf *, struct socket *,
     mbuf_svc_class_t, u_int32_t);
 extern void so_tc_update_stats(struct mbuf *, struct socket *,
@@ -905,6 +970,7 @@ extern void evsofree(struct socket *);
 extern int tcp_notsent_lowat_check(struct socket *so);
 
 extern user_ssize_t uio_array_resid(struct uio **, u_int);
+extern user_ssize_t recv_msg_array_resid(struct recv_msg_elem *, u_int);
 
 void sotoxsocket_n(struct socket *, struct xsocket_n *);
 void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *);
index d020b9573bcdabcbd954f592c14513d9c1dea654..96f2519e556ecdc3385975211f7eda389ad4508c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #define        SIOCGIFQUEUESTATS _IOWR('i', 147, struct if_qstatsreq)
 #define        SIOCSIFTHROTTLE _IOWR('i', 148, struct if_throttlereq)
 #define        SIOCGIFTHROTTLE _IOWR('i', 149, struct if_throttlereq)
-#endif /* PRIVATE */
 
-#ifdef PRIVATE
 #define        SIOCGASSOCIDS   _IOWR('s', 150, struct so_aidreq) /* get associds */
 #define        SIOCGCONNIDS    _IOWR('s', 151, struct so_cidreq) /* get connids */
 #define        SIOCGCONNINFO   _IOWR('s', 152, struct so_cinforeq) /* get conninfo */
 #endif /* BSD_KERNEL_PRIVATE */
 #define        SIOCSCONNORDER  _IOWR('s', 153, struct so_cordreq) /* set conn order */
 #define        SIOCGCONNORDER  _IOWR('s', 154, struct so_cordreq) /* get conn order */
-#endif /* PRIVATE */
 
-#ifdef PRIVATE
 #define        SIOCSIFLOG      _IOWR('i', 155, struct ifreq)
 #define        SIOCGIFLOG      _IOWR('i', 156, struct ifreq)
 #define        SIOCGIFDELEGATE _IOWR('i', 157, struct ifreq)
 #define        SIOCSIFEXPENSIVE _IOWR('i', 161, struct ifreq) /* mark interface expensive */
 #define        SIOCGIF2KCL     _IOWR('i', 162, struct ifreq)   /* interface prefers 2 KB clusters */
 #define        SIOCSIF2KCL     _IOWR('i', 163, struct ifreq)
+#define        SIOCGSTARTDELAY _IOWR('i', 164, struct ifreq)
+
+#define        SIOCAIFAGENTID  _IOWR('i', 165, struct if_agentidreq) /* Add netagent id */
+#define        SIOCDIFAGENTID  _IOWR('i', 166, struct if_agentidreq) /* Delete netagent id */
+#define        SIOCGIFAGENTIDS _IOWR('i', 167, struct if_agentidsreq) /* Get netagent ids */
+#define        SIOCGIFAGENTDATA        _IOWR('i', 168, struct netagent_req) /* Get netagent data */
+#ifdef BSD_KERNEL_PRIVATE
+#define        SIOCGIFAGENTIDS32       _IOWR('i', 167, struct if_agentidsreq32)
+#define        SIOCGIFAGENTIDS64       _IOWR('i', 167, struct if_agentidsreq64)
+#define        SIOCGIFAGENTDATA32              _IOWR('i', 168, struct netagent_req32)
+#define        SIOCGIFAGENTDATA64              _IOWR('i', 168, struct netagent_req64)
+#endif /* BSD_KERNEL_PRIVATE */
+
+#define        SIOCSIFINTERFACESTATE   _IOWR('i', 169, struct ifreq) /* set interface state */
+#define        SIOCGIFINTERFACESTATE   _IOWR('i', 170, struct ifreq) /* get interface state */
+#define        SIOCSIFPROBECONNECTIVITY _IOWR('i', 171, struct ifreq) /* Start/Stop probes to check connectivity */
+#define        SIOCGIFPROBECONNECTIVITY        _IOWR('i', 172, struct ifreq)   /* check if connectivity probes are enabled */
+
+#define        SIOCGIFFUNCTIONALTYPE   _IOWR('i', 173, struct ifreq) /* get interface functional type */
+#define        SIOCSIFNETSIGNATURE     _IOWR('i', 174, struct if_nsreq)
+#define        SIOCGIFNETSIGNATURE     _IOWR('i', 175, struct if_nsreq)
 #endif /* PRIVATE */
+
 #endif /* !_SYS_SOCKIO_H_ */
index 7054f6bb966cacb9aed4adf69fbbbd22850cbc9d..e794747db6ab98917fc1efc0903ba7b480211c60 100644 (file)
 #define        _SYS_SPAWN_INTERNAL_H_
 
 #include <sys/_types.h>                /* __offsetof(), __darwin_size_t */
+#include <sys/param.h>
 #include <sys/syslimits.h>     /* PATH_MAX */
 #include <sys/spawn.h>
 #include <mach/machine.h>
 #include <mach/port.h>
 #include <mach/exception_types.h>
+#include <mach/coalition.h>    /* COALITION_NUM_TYPES */
 
 /*
  * Allowable posix_spawn() port action types
@@ -117,6 +119,17 @@ typedef struct _posix_spawn_mac_policy_extensions {
 
 #define PS_MAC_EXTENSIONS_INIT_COUNT   2
 
+/*
+ * Coalition posix spawn attributes
+ */
+struct _posix_spawn_coalition_info {
+       struct {
+               uint64_t psci_id;
+               uint32_t psci_role;
+               uint32_t psci_reserved1;
+               uint64_t psci_reserved2;
+       } psci_info[COALITION_NUM_TYPES];
+};
 
 /*
  * A posix_spawnattr structure contains all of the attribute elements that
@@ -124,6 +137,7 @@ typedef struct _posix_spawn_mac_policy_extensions {
  * presence of a bit in the flags field.  All fields are initialized to the
  * appropriate default values by posix_spawnattr_init().
  */
+
 typedef struct _posix_spawnattr {
        short           psa_flags;              /* spawn attribute flags */
        short           flags_padding;  /* get the flags to be int aligned */
@@ -135,15 +149,16 @@ typedef struct _posix_spawnattr {
        int             psa_apptype;            /* app type and process spec behav */
        uint64_t        psa_cpumonitor_percent; /* CPU usage monitor percentage */
        uint64_t        psa_cpumonitor_interval; /* CPU usage monitor interval, in seconds */
-       uint64_t        psa_coalitionid;        /* coalition to spawn into */
+       uint64_t        psa_reserved;
 
-       short       psa_jetsam_flags; /* jetsam flags */
-       short           short_padding;  /* Padding for alignment issues */
-       int         psa_priority;   /* jetsam relative importance */
-       int         psa_high_water_mark; /* jetsam resident page count limit */
-       int             int_padding;    /* Padding for alignment issues */
+       short       psa_jetsam_flags;           /* jetsam flags */
+       short           short_padding;          /* Padding for alignment issues */
+       int         psa_priority;               /* jetsam relative importance */
+       int         psa_memlimit_active;        /* jetsam memory limit (in MB) when process is active */
+       int         psa_memlimit_inactive;      /* jetsam memory limit (in MB) when process is inactive */
 
        uint64_t        psa_qos_clamp;          /* QoS Clamp to set on the new process */
+       uint64_t        psa_darwin_role;           /* PRIO_DARWIN_ROLE to set on the new process */
 
        /*
         * NOTE: Extensions array pointers must stay at the end so that
@@ -152,16 +167,25 @@ typedef struct _posix_spawnattr {
         */
         _posix_spawn_port_actions_t    psa_ports; /* special/exception ports */
        _posix_spawn_mac_policy_extensions_t psa_mac_extensions; /* MAC policy-specific extensions. */
+       struct _posix_spawn_coalition_info *psa_coalition_info;  /* coalition info */
+       void            *reserved;
 } *_posix_spawnattr_t;
 
 /*
- * Jetsam flags
+ * Jetsam flags  eg: psa_jetsam_flags
  */
 #define        POSIX_SPAWN_JETSAM_SET                      0x8000
 
-#define        POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY   0x1
-#define        POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND       0x2
-#define        POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL           0x4
+#define        POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY       0x01
+#define        POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND           0x02
+#define        POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL               0x04  /* to be deprecated */
+
+/*
+ * Additional flags available for use with
+ * the posix_spawnattr_setjetsam_ext() call
+ */
+#define        POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL        0x04  /* if set, limit is fatal when the process is active   */
+#define        POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL      0x08  /* if set, limit is fatal when the process is inactive */
 
 /*
  * Deprecated posix_spawn psa_flags values
@@ -215,6 +239,10 @@ typedef struct _posix_spawnattr {
 #define POSIX_SPAWN_PROC_CLAMP_MAINTENANCE          0x00000003
 #define POSIX_SPAWN_PROC_CLAMP_LAST                 0x00000004
 
+/* Setting to indicate no change to darwin role */
+#define POSIX_SPAWN_DARWIN_ROLE_NONE                0x00000000
+/* Other possible values are specified by PRIO_DARWIN_ROLE in sys/resource.h */
+
 /*
  * Allowable posix_spawn() file actions
  */
@@ -308,7 +336,11 @@ struct _posix_spawn_args_desc {
        _posix_spawn_mac_policy_extensions_t
                                mac_extensions; /* pointer to policy-specific
                                                 * attributes */
+       __darwin_size_t coal_info_size;
+       struct _posix_spawn_coalition_info *coal_info;  /* pointer to coalition info */
 
+       __darwin_size_t reserved_size;
+       void *reserved;
 };
 
 #ifdef KERNEL
@@ -328,6 +360,10 @@ struct user32__posix_spawn_args_desc {
        uint32_t                port_actions;   /* pointer to block */
        uint32_t        mac_extensions_size;
        uint32_t        mac_extensions;
+       uint32_t        coal_info_size;
+       uint32_t        coal_info;
+       uint32_t        reserved_size;
+       uint32_t        reserved;
 };
 
 struct user__posix_spawn_args_desc {
@@ -339,6 +375,10 @@ struct user__posix_spawn_args_desc {
        user_addr_t             port_actions;   /* pointer to block */
        user_size_t     mac_extensions_size;    /* size of MAC-specific attrs. */
        user_addr_t     mac_extensions;         /* pointer to block */
+       user_size_t     coal_info_size;
+       user_addr_t     coal_info;
+       user_size_t     reserved_size;
+       user_addr_t     reserved;
 };
 
 
diff --git a/bsd/sys/stackshot.h b/bsd/sys/stackshot.h
new file mode 100644 (file)
index 0000000..50a1eb5
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SYS_STACKSHOT_H
+#define _SYS_STACKSHOT_H
+
+#include <stdint.h>
+
+#define STACKSHOT_CONFIG_TYPE 1
+
+typedef struct stackshot_config {
+       /* Input options */
+       int             sc_pid;                 /* PID to trace, or -1 for the entire system */
+       uint32_t        sc_flags;               /* Stackshot flags */
+       uint64_t        sc_since_timestamp;     /* Get traces of threads that have run since this time (NOT YET SUPPORTED) */
+
+       /* Stackshot results */
+       uint64_t        sc_buffer;              /* Pointer to stackshot buffer */
+       uint32_t        sc_size;                /* Length of the stackshot buffer */
+
+       /* Internals */
+       uint64_t        sc_out_buffer_addr;     /* Location where the kernel should copy the address of the newly mapped buffer in user space */
+       uint64_t        sc_out_size_addr;       /* Location where the kernel should copy the size of the stackshot buffer */
+} stackshot_config_t;
+
+#ifndef KERNEL
+
+#if !LIBSYSCALL_INTERFACE
+typedef struct stackshot_config stackshot_config_t;
+#endif
+
+stackshot_config_t * stackshot_config_create(void);
+int stackshot_config_set_pid(stackshot_config_t * stackshot_config, int pid);
+int stackshot_config_set_flags(stackshot_config_t * stackshot_config, uint32_t flags);
+int stackshot_capture_with_config(stackshot_config_t * stackshot_config);
+void * stackshot_config_get_stackshot_buffer(stackshot_config_t * stackshot_config);
+uint32_t stackshot_config_get_stackshot_size(stackshot_config_t * stackshot_config);
+int stackshot_config_set_size_hint(stackshot_config_t * stackshot_config, uint32_t suggested_size);
+int stackshot_config_dealloc_buffer(stackshot_config_t * stackshot_config);
+int stackshot_config_dealloc(stackshot_config_t * stackshot_config);
+
+#endif /* KERNEL */
+
+#endif /* _SYS_STACKSHOT_H */
index 3614aaac202c8bdd45f424e8a094ee767ea33b26..c9fac7e25f6bdfe702ea4f75ff63029f6c966139 100644 (file)
@@ -480,17 +480,18 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp);
 /*
  * Super-user changeable flags.
  */
+#define        SF_SUPPORTED    0x001f0000      /* mask of superuser supported flags */
 #define        SF_SETTABLE     0xffff0000      /* mask of superuser changeable flags */
 #define        SF_ARCHIVED     0x00010000      /* file is archived */
 #define        SF_IMMUTABLE    0x00020000      /* file may not be changed */
 #define        SF_APPEND       0x00040000      /* writes to file may only append */
 #define SF_RESTRICTED  0x00080000      /* restricted access */
+#define SF_NOUNLINK    0x00100000      /* Item may not be removed, renamed or mounted on */
 
 /*
  * The following two bits are reserved for FreeBSD.  They are not
  * implemented in Mac OS X.
  */
-/* #define SF_NOUNLINK 0x00100000 */   /* file may not be removed or renamed */
 /* #define SF_SNAPSHOT 0x00200000 */   /* snapshot inode */
 /* NOTE: There is no SF_HIDDEN bit. */
 
index f224e999acb64d113cba2556d8e73207d65ddf10..d597067de3330c28baea256965f6d59294bb49ba 100644 (file)
@@ -521,7 +521,7 @@ SYSCTL_DECL(_user);
 #define        KERN_SPECULATIVE_READS  64      /* int: whether speculative reads are disabled */
 #define        KERN_OSVERSION          65      /* for build number i.e. 9A127 */
 #define        KERN_SAFEBOOT           66      /* are we booted safe? */
-#define        KERN_LCTX               67      /* node: login context */
+                       /*      67 was KERN_LCTX (login context) */
 #define KERN_RAGEVNODE         68
 #define KERN_TTY               69      /* node: tty settings */
 #define KERN_CHECKOPENEVT       70      /* spi: check the VOPENEVT flag on vnodes at open time */
@@ -586,6 +586,10 @@ SYSCTL_DECL(_user);
 #define KERN_KDSET_TYPEFILTER   22
 #define KERN_KDBUFWAIT         23
 #define KERN_KDCPUMAP          24
+#define KERN_KDWAIT_BG_TRACE_RESET 25
+#define KERN_KDSET_BG_TYPEFILTER   26
+#define KERN_KDWRITEMAP_V3     27
+#define KERN_KDWRITETR_V3      28
 
 #define CTL_KERN_NAMES { \
        { 0, 0 }, \
@@ -655,7 +659,7 @@ SYSCTL_DECL(_user);
        { "speculative_reads_disabled", CTLTYPE_INT }, \
        { "osversion", CTLTYPE_STRING }, \
        { "safeboot", CTLTYPE_INT }, \
-       { "lctx", CTLTYPE_NODE }, \
+       { "dummy", CTLTYPE_INT },               /* deprecated: lctx */ \
        { "rage_vnode", CTLTYPE_INT }, \
        { "tty", CTLTYPE_NODE },        \
        { "check_openevt", CTLTYPE_INT }, \
@@ -681,13 +685,6 @@ SYSCTL_DECL(_user);
 #define        KERN_PROC_RUID          6       /* by real uid */
 #define        KERN_PROC_LCID          7       /* by login context id */
 
-/*
- * KERN_LCTX subtypes
- */
-#define        KERN_LCTX_ALL           0       /* everything */
-#define        KERN_LCTX_LCID          1       /* by login context id */
-
-
 #if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) 
 /* 
  * KERN_PROC subtype ops return arrays of augmented proc structures:
@@ -735,20 +732,10 @@ struct kinfo_proc {
 #define        EPROC_SLEADER   0x02    /* session leader */
 #define        COMAPT_MAXLOGNAME       12
                char    e_login[COMAPT_MAXLOGNAME];     /* short setlogin() name */
-#if CONFIG_LCTX
-               pid_t   e_lcid;
-               int32_t e_spare[3];
-#else
                int32_t e_spare[4];
-#endif
        } kp_eproc;
 };
 
-struct kinfo_lctx {
-       pid_t   id;     /* Login Context ID */
-       int     mc;     /* Member Count */
-};
-
 #endif /* defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) */
 
 #ifdef BSD_KERNEL_PRIVATE
@@ -803,12 +790,7 @@ struct user32_kinfo_proc {
                short   e_xswrss;
                int32_t e_flag;
                char    e_login[COMAPT_MAXLOGNAME];     /* short setlogin() name */
-#if CONFIG_LCTX
-               pid_t   e_lcid;
-               int32_t e_spare[3];
-#else
                int32_t e_spare[4];
-#endif
        } kp_eproc;
 };
 struct user64_kinfo_proc {
@@ -832,12 +814,7 @@ struct user64_kinfo_proc {
                short   e_xswrss;
                int32_t e_flag;
                char    e_login[COMAPT_MAXLOGNAME];     /* short setlogin() name */
-#if CONFIG_LCTX
-               pid_t   e_lcid;
-               int32_t e_spare[3];
-#else
                int32_t e_spare[4];
-#endif
        } kp_eproc;
 };
 
index e69e93b2b415e82bf8997c1ca12836b799b5bf46..297de2cc265991d7ef9eb510a678a218d3e73e29 100644 (file)
@@ -42,7 +42,7 @@ typedef       void    sy_munge_t(void *);
 
 struct sysent {                /* system call table */
        sy_call_t       *sy_call;       /* implementing function */
-#if CONFIG_REQUIRES_U32_MUNGING
+#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))
        sy_munge_t      *sy_arg_munge32; /* system call arguments munger for 32-bit process */
 #endif
        int32_t         sy_return_type; /* system call return types */
@@ -57,7 +57,7 @@ extern struct sysent sysent[];
 #endif /* __INIT_SYSENT_C__ */
 
 extern int nsysent;
-#define NUM_SYSENT     490     /* Current number of defined syscalls */
+#define NUM_SYSENT     500     /* Current number of defined syscalls */
 
 /* 
  * Valid values for sy_cancel
index df33790537e803563aa0ed8fde4daaac52710a41..2cbbba27fa15c68bf95baf555479469d76ab938b 100644 (file)
@@ -123,6 +123,7 @@ extern const char copyright[];              /* system copyright */
 
 extern int     boothowto;      /* reboot flags, from console subsystem */
 extern int     show_space;
+extern int     minimalboot;
 
 extern int nblkdev;            /* number of entries in bdevsw */
 extern int nchrdev;            /* number of entries in cdevsw */
@@ -228,7 +229,7 @@ typedef struct __throttle_info_handle *throttle_info_handle_t;
 int    throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle);
 void   throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle);
 void   throttle_info_update_by_mask(void *throttle_info_handle, int flags);
-void   throttle_info_disable_throttle(int devno);
+void   throttle_info_disable_throttle(int devno, boolean_t isfusion);
 /*
  * 'throttle_info_handle' acquired via 'throttle_info_ref_by_mask'
  * 'policy' should be specified as either IOPOL_UTILITY or IPOL_THROTTLE,
index fdf7776170a0fde389d0f78973b64e209eced8bc..da5e4d784a1a057f43de3ec9376f44b5ca1da769 100644 (file)
 #include <sys/_types/_fd_def.h>
 #include <sys/_types/_timespec.h>
 #include <sys/_types/_timeval.h>
+
+#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#include <sys/_types/_timeval64.h>
+#endif /* !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) */
+
 #ifdef KERNEL
 #include <sys/_types/_user_timespec.h>
 #include <sys/_types/_user32_timespec.h>
index b3389d4b03d1b842ae259b36912944e17a830c07..720ae9818af53f6a2d9c9b2cc325fec05889ff0e 100644 (file)
@@ -89,6 +89,7 @@ int ubc_cs_generation_check(vnode_t);
 int cs_entitlements_blob_get(proc_t, void **, size_t *);
 int cs_blob_get(proc_t, void **, size_t *);
 const char *cs_identity_get(proc_t);
+
 #endif
 
 /* cluster IO routines */
@@ -118,6 +119,9 @@ void        cluster_zero(upl_t, upl_offset_t, int, buf_t);
 int    cluster_copy_upl_data(uio_t, upl_t, int, int *);
 int    cluster_copy_ubc_data(vnode_t, uio_t, int *, int);
 
+typedef struct cl_direct_read_lock cl_direct_read_lock_t;
+cl_direct_read_lock_t *cluster_lock_direct_read(vnode_t vp, lck_rw_type_t exclusive);
+void cluster_unlock_direct_read(cl_direct_read_lock_t *lck);
 
 /* UPL routines */
 int    ubc_create_upl(vnode_t, off_t, int, upl_t *, upl_page_info_t **, int);
@@ -134,6 +138,8 @@ upl_size_t ubc_upl_maxbufsize(void);
 
 int    is_file_clean(vnode_t, off_t);
 
+errno_t mach_to_bsd_errno(kern_return_t mach_err);
+
 __END_DECLS
 
 #endif /* _SYS_UBC_H_ */
index 90424d745b26b5dbaf4d9403d23a91080ae8da7c..f5b04763a43ffd7c26010b8694dc53959834f8af 100644 (file)
 #include <sys/vnode.h>
 #include <sys/ubc.h>
 #include <sys/mman.h>
+#include <sys/codesign.h>
 
 #include <sys/cdefs.h>
 
 #include <kern/locks.h>
 #include <mach/memory_object_types.h>
 
-#include <libkern/crypto/sha1.h>
-
 
 #define UBC_INFO_NULL  ((struct ubc_info *) 0)
 
@@ -95,6 +94,7 @@ struct cl_writebehind {
        struct cl_wextent cl_clusters[MAX_CLUSTERS];    /* packed write behind clusters */
 };
 
+struct cs_hash;
 
 struct cs_blob {
        struct cs_blob  *csb_next;
@@ -107,10 +107,11 @@ struct cs_blob {
        vm_size_t       csb_mem_size;
        vm_offset_t     csb_mem_offset;
        vm_address_t    csb_mem_kaddr;
-       unsigned char   csb_sha1[SHA1_RESULTLEN];
-       unsigned int    csb_sigpup;
+       unsigned char   csb_cdhash[CS_CDHASH_LEN];
+        struct cs_hash  *csb_hashtype;
        const char      *csb_teamid;
-       unsigned int    csb_platform_binary; 
+       unsigned int    csb_platform_binary:1;
+       unsigned int    csb_platform_path:1;
 };
 
 /*
@@ -195,7 +196,7 @@ int UBCINFOEXISTS(const struct vnode *);
 
 /* code signing */
 struct cs_blob;
-int    ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t, int);
+int    ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t, int, struct cs_blob **);
 int    ubc_cs_sigpup_add(vnode_t, vm_address_t, vm_size_t);
 struct cs_blob *ubc_get_cs_blobs(vnode_t);
 void   ubc_get_cs_mtime(vnode_t, struct timespec *);
index 959eb501c2776fc8e5ca8911e6bf500443e160c3..e1535ae82d4352bbc362cf4af26390345f5a662a 100644 (file)
@@ -144,7 +144,7 @@ struct uio {
        off_t                   uio_offset;
        enum uio_seg    uio_segflg;
        enum uio_rw     uio_rw;
-       user_ssize_t    uio_resid_64;
+       user_size_t     uio_resid_64;
        int                             uio_size;               /* size for use with kfree */
        int                             uio_max_iovs;   /* max number of iovecs this uio_t can hold */
        u_int32_t               uio_flags;              
index 8fc6c578ac2b2fda11908dcbe6851f0b56a211cb..4e52357544b07fb9adfa8916a26c2a7df851bb33 100644 (file)
@@ -65,6 +65,7 @@
 #define        _SYS_USER_H_
 
 #include <sys/appleapiopts.h>
+struct waitq_set;
 #ifndef KERNEL
 /* stuff that *used* to be included by user.h, or is now needed */
 #include <errno.h>
@@ -144,7 +145,7 @@ struct uthread {
        union {
                struct _select_data {
                        u_int64_t abstime;
-                       char * wql;
+                       uint64_t *wqp;
                        int count;
                        struct select_nocancel_args *args;      /* original syscall arguments */
                        int32_t *retval;                                        /* place to store return val */
@@ -158,12 +159,12 @@ struct uthread {
                struct _kevent {
                        struct _kqueue_scan scan;/* space for the generic data */
                        struct fileproc *fp;     /* fileproc we hold iocount on */
-                       int fd;                  /* filedescriptor for kq */
-                       int32_t *retval;         /* place to store return val */
+                       int fd;                          /* filedescriptor for kq */
+                       unsigned int eventflags; /* flags to determine kevent size/direction */
+                       int eventcount;              /* user-level event count */
+                       int eventout;                /* number of events output */
+                       int32_t *retval;             /* place to store return val */
                        user_addr_t eventlist;   /* user-level event list address */
-                       size_t eventsize;       /* kevent or kevent64_s */
-                       int eventcount;         /* user-level event count */
-                       int eventout;            /* number of events output */
                } ss_kevent;                     /* saved state for kevent() */
 
                struct _kauth {
@@ -198,8 +199,8 @@ struct uthread {
        struct proc * uu_proc;
        thread_t uu_thread;
        void * uu_userstate;
-       wait_queue_set_t uu_wqset;                      /* cached across select calls */
-       size_t uu_allocsize;                            /* ...size of select cache */
+       struct waitq_set *uu_wqset;             /* waitq state cached across select calls */
+       size_t uu_wqstate_sz;                   /* ...size of uu_wqset buffer */
        int uu_flag;
        sigset_t uu_siglist;                            /* signals pending for the thread */
        sigset_t  uu_sigwait;                           /*  sigwait on this thread*/
@@ -235,6 +236,16 @@ struct uthread {
         void   *       uu_vps[32];
         void    *       uu_pcs[32][10];
 #endif
+
+#if PROC_REF_DEBUG
+#define NUM_PROC_REFS_TO_TRACK 32
+#define PROC_REF_STACK_DEPTH 10
+       int             uu_proc_refcount;
+       int             uu_pindex;
+       void    *       uu_proc_ps[NUM_PROC_REFS_TO_TRACK];
+       uintptr_t       uu_proc_pcs[NUM_PROC_REFS_TO_TRACK][PROC_REF_STACK_DEPTH];
+#endif
+
 #if CONFIG_DTRACE
        uint32_t        t_dtrace_errno; /* Most recent errno */
        siginfo_t       t_dtrace_siginfo;
@@ -277,7 +288,6 @@ struct uthread {
 #endif /* CONFIG_DTRACE */
        void *          uu_threadlist;
        char *          pth_name;
-       struct label *  uu_label;       /* MAC label */
 
        /* Document Tracking struct used to track a "tombstone" for a document */
        struct doc_tombstone *t_tombstone;
index 309842aee55a45ffc46aae8dd6de08619f64ef90..e243a7a2f59f041c7ee1397e02d9c089e16a74dd 100644 (file)
@@ -170,6 +170,7 @@ enum vtagtype       {
 #define IO_SYSCALL_DISPATCH            0x100000        /* I/O was originated from a file table syscall */
 #define IO_SWAP_DISPATCH               0x200000        /* I/O was originated from the swap layer */
 #define IO_SKIP_ENCRYPTION             0x400000        /* Skips en(de)cryption on the IO. Must be initiated from kernel */
+#define IO_EVTONLY                      0x800000        /* the i/o is being done on an fd that's marked O_EVTONLY */
 
 /*
  * Component Name: this structure describes the pathname
@@ -247,6 +248,14 @@ struct vnode_fsparam {
 
 #define VNCREATE_FLAVOR        0
 #define VCREATESIZE sizeof(struct vnode_fsparam)
+#ifdef KERNEL_PRIVATE
+/*
+ * For use with SPI to create trigger vnodes.
+ */
+struct vnode_trigger_param;
+#define VNCREATE_TRIGGER       (('T' << 8) + ('V'))
+#define VNCREATE_TRIGGER_SIZE  sizeof(struct vnode_trigger_param)
+#endif /* KERNEL_PRIVATE */
 
 
 #ifdef KERNEL_PRIVATE
@@ -450,9 +459,6 @@ struct vnode_trigger_param {
        uint32_t                                vnt_flags;  /* optional flags (see below) */
 };
 
-#define VNCREATE_TRIGGER       (('T' << 8) + ('V'))
-#define VNCREATE_TRIGGER_SIZE  sizeof(struct vnode_trigger_param)
-
 /*
  * vnode trigger flags (vnt_flags)
  *
@@ -683,7 +689,8 @@ struct vnode_attr {
 /* 
  * Flags for va_dataprotect_flags
  */
-#define VA_DP_RAWENCRYPTED 0x0001
+#define VA_DP_RAWENCRYPTED   0x0001
+#define VA_DP_RAWUNENCRYPTED 0x0002
 
 #endif
 
@@ -801,6 +808,37 @@ __BEGIN_DECLS
  */
 errno_t        vnode_create(uint32_t, uint32_t, void  *, vnode_t *);
 
+#if KERNEL_PRIVATE
+/*!
+ @function vnode_create_empty
+ @abstract Create an empty, uninitialized vnode.
+ @discussion Returns with an iocount held on the vnode which must eventually be
+ dropped with vnode_put(). The next operation performed on the vnode must be
+ vnode_initialize (or vnode_put if the vnode is not needed anymore).
+ This interface is provided as a mechanism to pre-flight obtaining a vnode for
+ certain filesystem operations which may need to get a vnode without filesystem
+ locks held. It is imperative that nothing be done with the vnode till the
+ succeeding vnode_initialize (or vnode_put as the case may be) call.
+ @param vpp  Pointer to a vnode pointer, to be filled in with newly created vnode.
+ @return 0 for success, error code otherwise.
+ */
+errno_t        vnode_create_empty(vnode_t *);
+
+/*!
+ @function vnode_initialize
+ @abstract Initialize a vnode obtained by vnode_create_empty
+ @discussion Does not drop iocount held on the vnode which must eventually be
+ dropped with vnode_put().  In case of an error however, the vnode's iocount is
+ dropped and the vnode must not be referenced again by the caller.
+ @param flavor Should be VNCREATE_FLAVOR.
+ @param size  Size of the struct vnode_fsparam in "data".
+ @param data  Pointer to a struct vnode_fsparam containing initialization information.
+ @param vpp  Pointer to a vnode pointer, to be filled in with newly created vnode.
+ @return 0 for success, error code otherwise.
+ */
+errno_t        vnode_initialize(uint32_t, uint32_t, void  *, vnode_t *);
+#endif /* KERNEL_PRIVATE */
+
 /*!
  @function vnode_addfsref
  @abstract Mark a vnode as being stored in a filesystem hash.
@@ -1116,6 +1154,58 @@ void     vnode_setnoreadahead(vnode_t);
  */
 void   vnode_clearnoreadahead(vnode_t);
 
+/*!
+ @function vnode_isfastdevicecandidate
+ @abstract Check if a vnode is a candidate to store on the fast device of a composite disk system
+ @param vp The vnode which you want to test.
+ @return Nonzero if the vnode is marked as a fast-device candidate
+ @return void.
+ */
+int    vnode_isfastdevicecandidate(vnode_t);
+
+/*!
+ @function vnode_setfastdevicecandidate
+ @abstract Mark a vnode as a candidate to store on the fast device of a composite disk system
+ @abstract If the vnode is a directory, all its children will inherit this bit.
+ @param vp The vnode which you want marked.
+ @return void.
+ */
+void   vnode_setfastdevicecandidate(vnode_t);
+
+/*!
+ @function vnode_clearfastdevicecandidate
+ @abstract Clear the status of a vnode being a candidate to store on the fast device of a composite disk system.
+ @param vp The vnode whose flag to clear.
+ @return void.
+ */
+void   vnode_clearfastdevicecandidate(vnode_t);
+
+/*!
+ @function vnode_isautocandidate
+ @abstract Check if a vnode was automatically selected to be fast-dev candidate (see vnode_setfastdevicecandidate)
+ @param vp The vnode which you want to test.
+ @return Nonzero if the vnode was automatically marked as a fast-device candidate
+ @return void.
+ */
+int    vnode_isautocandidate(vnode_t);
+
+/*!
+ @function vnode_setfastdevicecandidate
+ @abstract Mark a vnode as an automatically selected candidate for storing on the fast device of a composite disk system
+ @abstract If the vnode is a directory, all its children will inherit this bit.
+ @param vp The vnode which you want marked.
+ @return void.
+ */
+void   vnode_setautocandidate(vnode_t);
+
+/*!
+ @function vnode_clearautocandidate
+ @abstract Clear the status of a vnode being an automatic candidate (see above)
+ @param vp The vnode whose flag to clear.
+ @return void.
+ */
+void   vnode_clearautocandidate(vnode_t);
+
 /* left only for compat reasons as User code depends on this from getattrlist, for ex */
 
 /*!
@@ -1481,20 +1571,6 @@ int      vnode_recycle(vnode_t);
 
 #endif /* BSD_KERNEL_PRIVATE  */
 
-/*!
- @function vnode_notify
- @abstract Send a notification up to VFS.  
- @param vp Vnode for which to provide notification.
- @param vap Attributes for that vnode, to be passed to fsevents.
- @discussion Filesystem determines which attributes to pass up using 
- vfs_get_notify_attributes(&vap).  The most specific events possible should be passed,
- e.g. VNODE_EVENT_FILE_CREATED on a directory rather than just VNODE_EVENT_WRITE, but
- a less specific event can be passed up if more specific information is not available.
- Will not reenter the filesystem.
- @return 0 for success, else an error code.
- */ 
-int    vnode_notify(vnode_t, uint32_t, struct vnode_attr*);
-
 /*!
  @function vnode_ismonitored
  @abstract Check whether a file has watchers that would make it useful to query a server
@@ -1516,15 +1592,6 @@ int      vnode_ismonitored(vnode_t);
 int    vnode_isdyldsharedcache(vnode_t);
 
 
-/*!
- @function vfs_get_notify_attributes
- @abstract Determine what attributes are required to send up a notification with vnode_notify().
- @param vap Structure to initialize and activate required attributes on.
- @discussion Will not reenter the filesystem.
- @return 0 for success, nonzero for error (currently always succeeds).
- */ 
-int    vfs_get_notify_attributes(struct vnode_attr *vap);
-
 /*!
  @function vn_getpath_fsenter
  @abstract Attempt to get a vnode's path, willing to enter the filesystem.
@@ -1637,11 +1704,35 @@ errno_t vnode_close(vnode_t, int, vfs_context_t);
  */
 int vn_getpath(struct vnode *vp, char *pathbuf, int *len);
 
+/*!
+ @function vnode_notify
+ @abstract Send a notification up to VFS.  
+ @param vp Vnode for which to provide notification.
+ @param vap Attributes for that vnode, to be passed to fsevents.
+ @discussion Filesystem determines which attributes to pass up using 
+ vfs_get_notify_attributes(&vap).  The most specific events possible should be passed,
+ e.g. VNODE_EVENT_FILE_CREATED on a directory rather than just VNODE_EVENT_WRITE, but
+ a less specific event can be passed up if more specific information is not available.
+ Will not reenter the filesystem.
+ @return 0 for success, else an error code.
+ */ 
+int    vnode_notify(vnode_t, uint32_t, struct vnode_attr*);
+
+/*!
+ @function vfs_get_notify_attributes
+ @abstract Determine what attributes are required to send up a notification with vnode_notify().
+ @param vap Structure to initialize and activate required attributes on.
+ @discussion Will not reenter the filesystem.
+ @return 0 for success, nonzero for error (currently always succeeds).
+ */ 
+int    vfs_get_notify_attributes(struct vnode_attr *vap);
+
 /*
  * Flags for the vnode_lookup and vnode_open
  */
 #define VNODE_LOOKUP_NOFOLLOW          0x01
 #define        VNODE_LOOKUP_NOCROSSMOUNT       0x02
+#define        VNODE_LOOKUP_CROSSMOUNTNOWAIT   0x04
 /*!
  @function vnode_lookup
  @abstract Convert a path into a vnode.
index abc1566a45e2db046bc3c151858750efb4c2b281..73722ba638d22eef2fa01859110ad4583d0d6e7c 100644 (file)
@@ -260,6 +260,13 @@ struct vnode {
 #define VNEEDSSNAPSHOT 0x1000000
 #define VNOCS         0x2000000        /* is there no code signature available */
 #define VISDIRTY       0x4000000        /* vnode will need IO if reclaimed */  
+#define VFASTDEVCANDIDATE  0x8000000        /* vnode is a candidate to store on a fast device */
+#define VAUTOCANDIDATE 0x10000000       /* vnode was automatically marked as a fast-dev candidate */
+/*
+  0x20000000 not used
+  0x40000000 not used
+  0x80000000 not used.
+*/
 
 /*
  * This structure describes vnode data which is specific to a file descriptor.
@@ -552,6 +559,22 @@ void       vfsinit(void);
 void vnode_lock(vnode_t);
 void vnode_unlock(vnode_t);
 
+void vn_print_state(vnode_t /* vp */, const char * /* fmt */, ...)
+    __printflike(2,3);
+
+#if DEVELOPMENT || DEBUG
+#define VNASSERT(exp, vp, msg)                                         \
+do {                                                                   \
+       if (__improbable(!(exp))) {                                     \
+               vn_print_state(vp, "VNASSERT failed %s:%d\n", __FILE__, \
+                   __LINE__);                                          \
+               panic msg;                                              \
+       }                                                               \
+} while (0)
+#else
+#define VNASSERT(exp, vp, msg)
+#endif /* DEVELOPMENT || DEBUG */
+
 /*
  * XXX exported symbols; should be static
  */
@@ -570,6 +593,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                 int *numdirent, vfs_context_t ctxp);
 
 void vnode_setswapmount(vnode_t);
+int64_t        vnode_getswappin_avail(vnode_t);
 
 #if CONFIG_TRIGGERS
 /* VFS Internal Vnode Trigger Interfaces (Private) */
diff --git a/bsd/sys/work_interval.h b/bsd/sys/work_interval.h
new file mode 100644 (file)
index 0000000..cc9ba9f
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SYS_WORK_INTERVAL_H
+#define _SYS_WORK_INTERVAL_H
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+/*
+ * Trusted clients with deadline-sensitive work may report information
+ * about the execution of their work using the work interval facility.
+ * This is intended to be a higher-level semantic than realtime scheduling,
+ * which operates at the level of thread block/unblock. A high level
+ * operation may have many blocking points, including IPC to other tasks,
+ * and this this metric will capture the overall time to complete a unit of
+ * work.
+ *
+ * A work interval is defined by several timestamps, namely (S)tart,
+ * (F)inish, (D)eadline, and (N)ext start.
+ *
+ *   ... ----+==================+--------+--+==== ...
+ *           |                  |        |  |
+ *           S                  F        D  N
+ *
+ *           \__________________/
+ *                  Active
+ *           \___________________________/
+ *                   Work Interval
+ *
+ *                               \_________/
+ *                                    |
+ *   report information here ---------+
+ *
+ * Definitions:
+ *
+ *   Start: Absolute time when the current deadline-oriented work began. Due
+ *          to scheduling latency, preemption, and blocking points, the
+ *          thread controlling the work interval may actually begin
+ *          executing after this ideal time (which may be the previous work
+ *          interval's "next start")
+ *   Finish: Absolute time when the current deadline-oriented work finished.
+ *          This will typically be a timestamp taken before reporting using
+ *          the work interval interface.
+ *   Deadline: Absolute time by which the current work was expected to finish.
+ *          In cases where the amount of computation (or preemption, or time
+ *          spent blocked) causes the active period to take longer than
+ *          expected, F may be greater than D.
+ *   Next start: Absolute time when the next deadline-oriented work is
+ *          expected to begin. This is typically the same as Deadline.
+ *   Active: The fraction of the work interval spent completing the work. In
+ *          cases where the Finish time exceeded the Deadline, this fraction
+ *          will be >1.0.
+ *
+ * Basic Use:
+ *
+ *   Clients should report information for a work interval after finishing
+ *   work for the current interval but before the next work interval begins.
+ *
+ *   If Finish far exceeds the previously expected Deadline, the
+ *   caller may adjust Next Start to align to a multiple of the period
+ *   (and skip over several work intervals that could not be
+ *   executed).
+ *
+ * Caution (!):
+ *
+ *   Because the information supplied via this facility directly influences power
+ *   management decisions, clients should strive to be as accurate as possible.
+ *   Failure to do so will adversely impact system power and performance.
+ *
+ */
+#ifndef KERNEL
+
+typedef struct work_interval *work_interval_t;
+
+/* Create a new work interval handle (currently for the current thread only). Flags is unused */
+int            work_interval_create(work_interval_t *interval_handle, uint32_t flags);
+
+/* Notify the power management subsystem that the work for a current interval has completed */
+int            work_interval_notify(work_interval_t interval_handle, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags);
+
+/* Notify, with "finish" implicitly set to the current time */
+int            work_interval_notify_simple(work_interval_t interval_handle, uint64_t start, uint64_t deadline, uint64_t next_start);
+
+/* Deallocate work interval (currently for the current thread only) */
+int            work_interval_destroy(work_interval_t interval_handle);
+
+#endif /* KERNEL */
+
+#if PRIVATE
+
+/* Private interface between Libsyscall and xnu */
+#define WORK_INTERVAL_OPERATION_CREATE 0x00000001      /* arg is a uint64_t * that accepts work interval ID as an OUT param */
+#define WORK_INTERVAL_OPERATION_DESTROY        0x00000002
+#define WORK_INTERVAL_OPERATION_NOTIFY 0x00000003      /* arg is a work_interval_notification_t */
+
+struct work_interval_notification {
+       uint64_t        start;
+       uint64_t        finish;
+       uint64_t        deadline;
+       uint64_t        next_start;
+       uint32_t        flags;
+       uint32_t        unused1;
+};
+typedef struct work_interval_notification *work_interval_notification_t;
+
+int            __work_interval_ctl(uint32_t operation, uint64_t work_interval_id, void *arg, size_t len);
+
+#endif /* PRIVATE */
+
+__END_DECLS
+
+#endif /* _SYS_WORK_INTERVAL_H */
index 10f9cecaa932a089adeba4177b5c221d0426cb46..1e5f59ecc8ad72ea77085fd5fe60d1ea3c9c06e3 100644 (file)
@@ -24,9 +24,6 @@ EXPORT_MI_LIST        = ${KERNELFILES}
 
 EXPORT_MI_DIR = uuid
 
-# /System/Library/Frameworks/System.framework/PrivateHeaders
-INSTALL_MI_LCL_LIST = ${DATAFILES}
-
 # /System/Library/Frameworks/Kernel.framework/PrivateHeaders
 
 INSTALL_KF_MI_LCL_LIST =  ${KERNELFILES}
index c3313018cbb7907503b055c33de19d1c586915e0..795a408087a6360737c02e5d675bfbafac197bc0 100644 (file)
@@ -76,8 +76,7 @@ extern mach_msg_return_t mach_msg_send(mach_msg_header_t *msg,
                mach_msg_option_t option, mach_msg_size_t send_size,
                mach_msg_timeout_t send_timeout, mach_port_name_t notify);
 extern thread_t convert_port_to_thread(ipc_port_t port);
-extern void ipc_port_release(ipc_port_t);
-
+extern void ipc_port_release_send(ipc_port_t port);
 
 
 
@@ -256,7 +255,7 @@ catch_mach_exception_raise(
                       (void *) &thread_port) == MACH_MSG_SUCCESS)) {
         if (IPC_PORT_VALID(thread_port)) {
           th_act = convert_port_to_thread(thread_port);
-          ipc_port_release(thread_port);
+          ipc_port_release_send(thread_port);
        } else {
           th_act = THREAD_NULL;
        }
index 1a71f6b69b1a943c425e4d5051a839ab44598cda..19a4be3d1c9ada9db2a89ac7a0c9cc3893d2ebbf 100644 (file)
@@ -1754,6 +1754,52 @@ vnode_clearnoreadahead(vnode_t vp)
        vnode_unlock(vp);
 }
 
+int
+vnode_isfastdevicecandidate(vnode_t vp)
+{
+       return ((vp->v_flag & VFASTDEVCANDIDATE)? 1 : 0);
+}
+
+void
+vnode_setfastdevicecandidate(vnode_t vp)
+{
+       vnode_lock_spin(vp);
+       vp->v_flag |= VFASTDEVCANDIDATE;
+       vnode_unlock(vp);
+}
+
+void
+vnode_clearfastdevicecandidate(vnode_t vp)
+{
+       vnode_lock_spin(vp);
+       vp->v_flag &= ~VFASTDEVCANDIDATE;
+       vnode_unlock(vp);
+}
+
+int
+vnode_isautocandidate(vnode_t vp)
+{
+       return ((vp->v_flag & VAUTOCANDIDATE)? 1 : 0);
+}
+
+void
+vnode_setautocandidate(vnode_t vp)
+{
+       vnode_lock_spin(vp);
+       vp->v_flag |= VAUTOCANDIDATE;
+       vnode_unlock(vp);
+}
+
+void
+vnode_clearautocandidate(vnode_t vp)
+{
+       vnode_lock_spin(vp);
+       vp->v_flag &= ~VAUTOCANDIDATE;
+       vnode_unlock(vp);
+}
+
+
+
 
 /* mark vnode_t to skip vflush() is SKIPSYSTEM */
 void 
@@ -1833,7 +1879,7 @@ vnode_setname(vnode_t vp, char * name)
 void 
 vnode_vfsname(vnode_t vp, char * buf)
 {
-        strncpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN);
+        strlcpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN);
 }
 
 /* return the FS type number */
@@ -2459,6 +2505,11 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx)
                goto out;
        }
 
+       /* Never allow the setting of any unsupported superuser flags. */
+       if (VATTR_IS_ACTIVE(vap, va_flags)) {
+           vap->va_flags &= (SF_SUPPORTED | UF_SETTABLE);
+       }
+
        error = VNOP_SETATTR(vp, vap, ctx);
 
        if ((error == 0) && !VATTR_ALL_SUPPORTED(vap))
@@ -3326,7 +3377,7 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct
         * We have to be able to use the root filesystem's device vnode even when
         * devfs isn't mounted (yet/anymore), so we can't go looking at its mount
         * structure.  If there is no data pointer, it doesn't matter whether
-        * the device is 64-bit ready.  Any command (like DKIOCSYNCHRONIZECACHE)
+        * the device is 64-bit ready.  Any command (like DKIOCSYNCHRONIZE)
         * which passes NULL for its data pointer can therefore be used during
         * mount or unmount of the root filesystem.
         *
@@ -3826,11 +3877,6 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
        } else {
                _err = VNOP_RENAME(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, ctx);
        }
-#if CONFIG_MACF
-       if (_err == 0) {
-               mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp);
-       }
-#endif
 
        /*
         * If moved to a new directory that is restricted,
@@ -3850,6 +3896,12 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s
                }
        }
 
+#if CONFIG_MACF
+       if (_err == 0) {
+               mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp);
+       }
+#endif
+
 #if CONFIG_APPLEDOUBLE
        /* 
         * Rename any associated extended attribute file (._ AppleDouble file).
@@ -4891,6 +4943,9 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags,
                if ((vp->v_flag & VLOCKLOCAL)) {
                        /* Advisory locking done at this layer */
                        _err = lf_advlock(&a);
+               } else if (flags & F_OFD_LOCK) {
+                       /* Non-local locking doesn't work for OFD locks */
+                       _err = err_advlock(&a);
                } else {
                        /* Advisory locking done by underlying filesystem */
                        _err = (*vp->v_op[vnop_advlock_desc.vdesc_offset])(&a);
index 54d0323f84a0273ecd2f5f0683df6c45c733e7a6..38d7a3f2453d63833981b85e0de6266694f0cfd6 100644 (file)
@@ -473,7 +473,7 @@ struct getattrlist_attrtab {
 static struct getattrlist_attrtab getattrlist_common_tab[] = {
        {ATTR_CMN_NAME,         VATTR_BIT(va_name),             sizeof(struct attrreference),   KAUTH_VNODE_READ_ATTRIBUTES},
        {ATTR_CMN_DEVID,        0,                              sizeof(dev_t),                  KAUTH_VNODE_READ_ATTRIBUTES},
-       {ATTR_CMN_FSID,         VATTR_BIT(va_fsid),             sizeof(fsid_t),                 KAUTH_VNODE_READ_ATTRIBUTES},
+       {ATTR_CMN_FSID,         0,                              sizeof(fsid_t),                 KAUTH_VNODE_READ_ATTRIBUTES},
        {ATTR_CMN_OBJTYPE,      0,                              sizeof(fsobj_type_t),           KAUTH_VNODE_READ_ATTRIBUTES},
        {ATTR_CMN_OBJTAG,       0,                              sizeof(fsobj_tag_t),            KAUTH_VNODE_READ_ATTRIBUTES},
        {ATTR_CMN_OBJID,        VATTR_BIT(va_fileid) | VATTR_BIT(va_linkid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES},
@@ -1446,14 +1446,6 @@ attr_pack_common(vfs_context_t ctx, struct vnode *vp,  struct attrlist *alp,
                } else if (VATTR_IS_SUPPORTED(vap, va_fsid64)) {
                        ATTR_PACK8((*abp), vap->va_fsid64);
                        abp->actual.commonattr |= ATTR_CMN_FSID;
-               } else if (VATTR_IS_SUPPORTED(vap, va_fsid)) {
-                       fsid_t fsid;
-
-                       /* va_fsid is 32 bits */
-                       fsid.val[0] = vap->va_fsid;
-                       fsid.val[1] = 0;
-                       ATTR_PACK8((*abp), fsid);
-                       abp->actual.commonattr |= ATTR_CMN_FSID;
                } else if (!return_valid || pack_invalid) {
                        fsid_t fsid = {{0}};
 
@@ -2493,17 +2485,6 @@ vfs_attr_pack(vnode_t vp, uio_t uio, struct attrlist *alp, uint64_t options,
        error = getattrlist_setupvattr_all(alp, vap, v_type, &fixedsize,
            proc_is64bit(vfs_context_proc(ctx)));
 
-       /*
-        * Ugly hack to correctly report fsids. vs_fsid is 32 bits and
-        * there is va_fsid64 as well but filesystems have to say that
-        * both are supported so that the value can be used correctly.
-        * So we set va_fsid if the filesystem has only set va_fsid64.
-        */
-
-       if ((alp->commonattr & ATTR_CMN_FSID) &&
-           VATTR_IS_SUPPORTED(vap, va_fsid64))
-               VATTR_SET_SUPPORTED(vap, va_fsid);
-
        if (error) {
                VFS_DEBUG(ctx, vp,
                    "ATTRLIST - ERROR: setup for request failed");
@@ -3238,9 +3219,18 @@ readdirattr(vnode_t dvp, struct fd_vn_data *fvd, uio_t auio,
                }
 
                /*
-                * We have an iocount on the directory already
+                * We have an iocount on the directory already.
+                * 
+                * Note that we supply NOCROSSMOUNT to the namei call as we attempt to acquire
+                * a vnode for this particular entry.  This is because the native call will
+                * (likely) attempt to emit attributes based on its own metadata in order to avoid
+                * creating vnodes where posssible.  If the native call is not going to  walk
+                * up the vnode mounted-on chain in order to find the top-most mount point, then we
+                * should not either in this emulated readdir+getattrlist() approach.  We  
+                * will be responsible for setting DIR_MNTSTATUS_MNTPOINT on that directory that
+                * contains a mount point.  
                 */
-               NDINIT(&nd, LOOKUP, OP_GETATTR, AUDITVNPATH1 | USEDVP,
+               NDINIT(&nd, LOOKUP, OP_GETATTR, (AUDITVNPATH1 | USEDVP | NOCROSSMOUNT), 
                    UIO_SYSSPACE, CAST_USER_ADDR_T(name_buffer), ctx);
 
                nd.ni_dvp = dvp;
@@ -3738,7 +3728,8 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con
        }
        if (al.commonattr & ATTR_CMN_CHGTIME) {
                ATTR_UNPACK_TIME(va.va_change_time, proc_is64);
-               VATTR_SET_ACTIVE(&va, va_change_time);
+               al.commonattr &= ~ATTR_CMN_CHGTIME;
+               /*quietly ignore change time; advisory in man page*/
        }
        if (al.commonattr & ATTR_CMN_ACCTIME) {
                ATTR_UNPACK_TIME(va.va_access_time, proc_is64);
@@ -3772,6 +3763,10 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con
        if (al.commonattr & ATTR_CMN_FLAGS) {
                ATTR_UNPACK(va.va_flags);
                VATTR_SET_ACTIVE(&va, va_flags);
+#if CONFIG_MACF
+               if ((error = mac_vnode_check_setflags(ctx, vp, va.va_flags)) != 0)
+                       goto out;
+#endif
        }
        if (al.commonattr & ATTR_CMN_EXTENDED_SECURITY) {
 
@@ -3834,18 +3829,20 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con
                        volname = cursor;
                        ATTR_UNPACK(ar);        
                        /* attr_length cannot be 0! */
-                       if ((ar.attr_dataoffset < 0) || (ar.attr_length == 0)) {
+                       if ((ar.attr_dataoffset < 0) || (ar.attr_length == 0) ||
+                               (ar.attr_length > uap->bufferSize) ||
+                               (uap->bufferSize - ar.attr_length < (unsigned)ar.attr_dataoffset)) {
                                VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: bad offset supplied (2) ", ar.attr_dataoffset);
                                error = EINVAL;
                                goto out;
                        }
 
-                       volname += ar.attr_dataoffset;
-                       if ((volname + ar.attr_length) > bufend) {
+                       if (volname >= bufend - ar.attr_dataoffset - ar.attr_length) {
                                error = EINVAL;
                                VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: volume name too big for caller buffer");
                                goto out;
                        }
+                       volname += ar.attr_dataoffset;
                        /* guarantee NUL termination */
                        volname[ar.attr_length - 1] = 0;
                }
index c6e919d9e806e19ca306a9c397a5da52a2207649..9c4b20a0f999adbb770721066ce156e952e7d387 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/ubc_internal.h>
 
 #include <sys/sdt.h>
-#include <sys/cprotect.h>
 
 int    bcleanbuf(buf_t bp, boolean_t discard);
 static int     brecover_data(buf_t bp);
@@ -366,9 +365,14 @@ buf_markfua(buf_t bp) {
 }
 
 #if CONFIG_PROTECT
-void
-buf_setcpaddr(buf_t bp, struct cprotect *entry) {
-       bp->b_attr.ba_cpentry = entry;
+cpx_t bufattr_cpx(bufattr_t bap)
+{
+       return bap->ba_cpx;
+}
+
+void bufattr_setcpx(bufattr_t bap, cpx_t cpx)
+{
+       bap->ba_cpx = cpx;
 }
 
 void
@@ -376,46 +380,38 @@ buf_setcpoff (buf_t bp, uint64_t foffset) {
        bp->b_attr.ba_cp_file_off = foffset;
 }
 
-void *
-bufattr_cpaddr(bufattr_t bap) {
-       return (bap->ba_cpentry);
-}
-
 uint64_t
 bufattr_cpoff(bufattr_t bap) {
-       return (bap->ba_cp_file_off);
-}
-
-void
-bufattr_setcpaddr(bufattr_t bap, void *cp_entry_addr) {
-        bap->ba_cpentry = cp_entry_addr;
+       return bap->ba_cp_file_off;
 }
 
 void
 bufattr_setcpoff(bufattr_t bap, uint64_t foffset) {
-        bap->ba_cp_file_off = foffset;
+       bap->ba_cp_file_off = foffset;
 }
 
-#else
-void *
-bufattr_cpaddr(bufattr_t bap __unused) {
-        return NULL;
-}
+#else // !CONTECT_PROTECT
 
 uint64_t
 bufattr_cpoff(bufattr_t bap __unused) {
        return 0;
 }
 
-void
-bufattr_setcpaddr(bufattr_t bap __unused, void *cp_entry_addr __unused) {
-}
-
 void
 bufattr_setcpoff(__unused bufattr_t bap, __unused uint64_t foffset) {
        return;
 }
-#endif /* CONFIG_PROTECT */
+
+struct cpx *bufattr_cpx(__unused bufattr_t bap)
+{
+       return NULL;
+}
+
+void bufattr_setcpx(__unused bufattr_t bap, __unused struct cpx *cpx)
+{
+}
+
+#endif /* !CONFIG_PROTECT */
 
 bufattr_t
 bufattr_alloc() {
@@ -685,6 +681,8 @@ buf_callback(buf_t bp)
 errno_t
 buf_setcallback(buf_t bp, void (*callback)(buf_t, void *), void *transaction)
 {
+       assert(!ISSET(bp->b_flags, B_FILTER) && ISSET(bp->b_lflags, BL_BUSY));
+
        if (callback)
                bp->b_flags |= (B_CALL | B_ASYNC);
        else
@@ -920,6 +918,8 @@ void
 buf_setfilter(buf_t bp, void (*filter)(buf_t, void *), void *transaction,
                          void (**old_iodone)(buf_t, void *), void **old_transaction)
 {
+       assert(ISSET(bp->b_lflags, BL_BUSY));
+
        if (old_iodone)
                *old_iodone = bp->b_iodone;
        if (old_transaction)
@@ -1317,9 +1317,10 @@ buf_strategy(vnode_t devvp, void *ap)
        
 #if CONFIG_PROTECT
        /* Capture f_offset in the bufattr*/
-       if (bp->b_attr.ba_cpentry != 0) {
+       cpx_t cpx = bufattr_cpx(buf_attr(bp));
+       if (cpx) {
                /* No need to go here for older EAs */
-               if(bp->b_attr.ba_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
+               if(cpx_use_offset_for_iv(cpx)) {
                        off_t f_offset;
                        if ((error = VNOP_BLKTOOFF(bp->b_vp, bp->b_lblkno, &f_offset)))
                                return error;
@@ -1337,7 +1338,7 @@ buf_strategy(vnode_t devvp, void *ap)
                         * each I/O to IOFlashStorage.  But from our perspective
                         * we have only issued a single I/O.
                         */
-                       bufattr_setcpoff (&(bp->b_attr), (u_int64_t)f_offset);
+                       buf_setcpoff(bp, f_offset);
                        CP_DEBUG((CPDBG_OFFSET_IO | DBG_FUNC_NONE), (uint32_t) f_offset, (uint32_t) bp->b_lblkno, (uint32_t) bp->b_blkno, (uint32_t) bp->b_bcount, 0);
                }
        }
@@ -2447,7 +2448,7 @@ buf_brelse_shadow(buf_t bp)
 
        lck_mtx_lock_spin(buf_mtxp);
 
-       bp_head = (buf_t)bp->b_orig;
+       __IGNORE_WCASTALIGN(bp_head = (buf_t)bp->b_orig);
 
        if (bp_head->b_whichq != -1)
                panic("buf_brelse_shadow: bp_head on freelist %d\n", bp_head->b_whichq);
@@ -3104,6 +3105,25 @@ start:
                        size_t  contig_bytes;
                        int     bmap_flags;
 
+#if DEVELOPMENT || DEBUG
+                       /*
+                        * Apple implemented file systems use UBC excludively; they should
+                        * not call in here."
+                        */
+                       const char* excldfs[] = {"hfs", "afpfs", "smbfs", "acfs",
+                                                "exfat", "msdos", "webdav", NULL};
+
+                       for (int i = 0; excldfs[i] != NULL; i++) {
+                               if (vp->v_mount &&
+                                   !strcmp(vp->v_mount->mnt_vfsstat.f_fstypename,
+                                               excldfs[i])) {
+                                       panic("%s %s calls buf_getblk",
+                                               excldfs[i],
+                                               operation == BLK_READ ? "BLK_READ" : "BLK_WRITE");
+                               }
+                       }
+#endif
+
                        if ( (bp->b_upl) )
                                panic("bp already has UPL: %p",bp);
 
@@ -3355,7 +3375,7 @@ allocbuf(buf_t bp, int size)
                                                *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize);
                                        } else {
                                                bp->b_datap = (uintptr_t)NULL;
-                                               kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size);
+                                               kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE);
                                                CLR(bp->b_flags, B_ZALLOC);
                                        }
                                        bcopy((void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize);
@@ -3368,7 +3388,7 @@ allocbuf(buf_t bp, int size)
                                if ((vm_size_t)bp->b_bufsize < desired_size) {
                                        /* reallocate to a bigger size */
                                        bp->b_datap = (uintptr_t)NULL;
-                                       kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size);
+                                       kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE);
                                        bcopy((const void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize);
                                        kmem_free(kernel_map, elem, bp->b_bufsize); 
                                } else {
@@ -3384,7 +3404,7 @@ allocbuf(buf_t bp, int size)
                                *(void **)(&bp->b_datap) = grab_memory_for_meta_buf(nsize);
                                SET(bp->b_flags, B_ZALLOC);
                        } else
-                               kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size);
+                               kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size, VM_KERN_MEMORY_FILE);
                }
 
                if (bp->b_datap == 0)
@@ -3660,8 +3680,6 @@ bcleanbuf(buf_t bp, boolean_t discard)
 
        buf_release_credentials(bp);
        
-       bp->b_redundancy_flags = 0;
-
        /* If discarding, just move to the empty queue */
        if (discard) {
                lck_mtx_lock_spin(buf_mtxp);
@@ -3676,6 +3694,7 @@ bcleanbuf(buf_t bp, boolean_t discard)
                bp->b_bufsize = 0;
                bp->b_datap = (uintptr_t)NULL;
                bp->b_upl = (void *)NULL;
+               bp->b_fsprivate = (void *)NULL;
                /*
                 * preserve the state of whether this buffer
                 * was allocated on the fly or not...
@@ -3688,6 +3707,7 @@ bcleanbuf(buf_t bp, boolean_t discard)
 #endif
                bp->b_lflags = BL_BUSY;
                bp->b_flags = (bp->b_flags & B_HDRALLOC);
+               bp->b_redundancy_flags = 0;
                bp->b_dev = NODEV;
                bp->b_blkno = bp->b_lblkno = 0;
                bp->b_iodone = NULL;
@@ -4160,6 +4180,7 @@ alloc_io_buf(vnode_t vp, int priv)
        bp->b_bcount = 0;
        bp->b_bufsize = 0;
        bp->b_upl = NULL;
+       bp->b_fsprivate = (void *)NULL;
        bp->b_vp = vp;
        bzero(&bp->b_attr, sizeof(struct bufattr));
 
index 1575aafea967f29df2c31facbde3e41d31f9cea4..36b1d24e6c88b01ab3ff35949855980cb2d05c31 100644 (file)
@@ -1252,11 +1252,11 @@ skiprsrcfork:
                }
 
                if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
-
-                       if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation ||
-                               mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id)
-                               break;
-                       vp = mp->mnt_realrootvp;
+                       vnode_t tmp_vp = mp->mnt_realrootvp;
+                       if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
+                               mp->mnt_realrootvp_vid != tmp_vp->v_id)
+                               break;
+                       vp = tmp_vp;
                }
 
 #if CONFIG_TRIGGERS
@@ -1265,10 +1265,8 @@ skiprsrcfork:
                 * trigger in hand, resolve it.  Note that we don't need to 
                 * leave the fast path if the mount has already happened.
                 */
-               if ((vp->v_resolve != NULL) && 
-                               (vp->v_resolve->vr_resolve_func != NULL)) {
+               if (vp->v_resolve)
                        break;
-               } 
 #endif /* CONFIG_TRIGGERS */
 
 
@@ -1711,6 +1709,25 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn
                ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
        else
                ncp->nc_name = strname;
+
+       //
+       // If the bytes of the name associated with the vnode differ,
+       // use the name associated with the vnode since the file system
+       // may have set that explicitly in the case of a lookup on a
+       // case-insensitive file system where the case of the looked up
+       // name differs from what is on disk.  For more details, see:
+       //   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+       // 
+       const char *vn_name = vp ? vp->v_name : NULL;
+       unsigned int len = vn_name ? strlen(vn_name) : 0;
+       if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
+               unsigned int hash = hash_string(vn_name, len);
+               
+               vfs_removename(ncp->nc_name);
+               ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
+               ncp->nc_hashval = hash;
+       }
+
        /*
         * make us the newest entry in the cache
         * i.e. we'll be the last to be stolen
index 621825949af81b7e77935bd7fed4feb5a507ab6f..57fe3431adde89bb9ef6d1907863361220ff1461 100644 (file)
@@ -95,6 +95,8 @@
 
 #include <sys/sdt.h>
 
+#include <stdbool.h>
+
 #if 0
 #undef KERNEL_DEBUG
 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
@@ -138,12 +140,25 @@ struct clios {
         int    io_wanted;          /* someone is sleeping waiting for a change in state */
 };
 
+struct cl_direct_read_lock {
+       LIST_ENTRY(cl_direct_read_lock)         chain;
+       int32_t                                                         ref_count;
+       vnode_t                                                         vp;
+       lck_rw_t                                                        rw_lock;
+};
+
+#define CL_DIRECT_READ_LOCK_BUCKETS 61
+
+static LIST_HEAD(cl_direct_read_locks, cl_direct_read_lock)
+       cl_direct_read_locks[CL_DIRECT_READ_LOCK_BUCKETS];
+
+static lck_spin_t cl_direct_read_spin_lock;
+
 static lck_grp_t       *cl_mtx_grp;
 static lck_attr_t      *cl_mtx_attr;
 static lck_grp_attr_t   *cl_mtx_grp_attr;
 static lck_mtx_t       *cl_transaction_mtxp;
 
-
 #define        IO_UNKNOWN      0
 #define        IO_DIRECT       1
 #define IO_CONTIG      2
@@ -236,7 +251,12 @@ int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL;
 #define MAX_IO_REQUEST_SIZE    (1024 * 1024 * 512)
 #define MAX_IO_CONTIG_SIZE     MAX_UPL_SIZE_BYTES
 #define MAX_VECTS              16
-#define MIN_DIRECT_WRITE_SIZE  (4 * PAGE_SIZE)
+/*
+ * The MIN_DIRECT_WRITE_SIZE governs how much I/O should be issued before we consider
+ * allowing the caller to bypass the buffer cache.  For small I/Os (less than 16k), 
+ * we have not historically allowed the write to bypass the UBC. 
+ */
+#define MIN_DIRECT_WRITE_SIZE  (16384)
 
 #define WRITE_THROTTLE         6
 #define WRITE_THROTTLE_SSD     2
@@ -287,6 +307,11 @@ cluster_init(void) {
 
        if (cl_transaction_mtxp == NULL)
                panic("cluster_init: failed to allocate cl_transaction_mtxp");
+
+       lck_spin_init(&cl_direct_read_spin_lock, cl_mtx_grp, cl_mtx_attr);
+
+       for (int i = 0; i < CL_DIRECT_READ_LOCK_BUCKETS; ++i)
+               LIST_INIT(&cl_direct_read_locks[i]);
 }
 
 
@@ -507,6 +532,142 @@ cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name)
        lck_mtx_unlock(&iostate->io_mtxp);
 }
 
+static void cluster_handle_associated_upl(struct clios *iostate, upl_t upl,
+                                                                                 upl_offset_t upl_offset, upl_size_t size)
+{
+       if (!size)
+               return;
+
+       upl_t associated_upl = upl_associated_upl(upl);
+
+       if (!associated_upl)
+               return;
+
+#if 0
+       printf("1: %d %d\n", upl_offset, upl_offset + size);
+#endif
+
+       /*
+        * The associated UPL is page aligned to file offsets whereas the
+        * UPL it's attached to has different alignment requirements.  The
+        * upl_offset that we have refers to @upl.  The code that follows
+        * has to deal with the first and last pages in this transaction
+        * which might straddle pages in the associated UPL.  To keep
+        * track of these pages, we use the mark bits: if the mark bit is
+        * set, we know another transaction has completed its part of that
+        * page and so we can unlock that page here.
+        *
+        * The following illustrates what we have to deal with:
+        *
+        *    MEM u <------------ 1 PAGE ------------> e
+        *        +-------------+----------------------+-----------------
+        *        |             |######################|#################
+        *        +-------------+----------------------+-----------------
+        *   FILE | <--- a ---> o <------------ 1 PAGE ------------>
+        *
+        * So here we show a write to offset @o.  The data that is to be
+        * written is in a buffer that is not page aligned; it has offset
+        * @a in the page.  The upl that carries the data starts in memory
+        * at @u.  The associated upl starts in the file at offset @o.  A
+        * transaction will always end on a page boundary (like @e above)
+        * except for the very last transaction in the group.  We cannot
+        * unlock the page at @o in the associated upl until both the
+        * transaction ending at @e and the following transaction (that
+        * starts at @e) has completed.
+        */
+
+       /*
+        * We record whether or not the two UPLs are aligned as the mark
+        * bit in the first page of @upl.
+        */
+       upl_page_info_t *pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+       bool is_unaligned = upl_page_get_mark(pl, 0);
+
+       if (is_unaligned) {
+               upl_page_info_t *assoc_pl = UPL_GET_INTERNAL_PAGE_LIST(associated_upl);
+
+               upl_offset_t upl_end = upl_offset + size;
+               assert(upl_end >= PAGE_SIZE);
+
+               upl_size_t assoc_upl_size = upl_get_size(associated_upl);
+
+               /*
+                * In the very first transaction in the group, upl_offset will
+                * not be page aligned, but after that it will be and in that
+                * case we want the preceding page in the associated UPL hence
+                * the minus one.
+                */
+               assert(upl_offset);
+               if (upl_offset)
+                       upl_offset = trunc_page_32(upl_offset - 1);
+
+               lck_mtx_lock_spin(&iostate->io_mtxp);
+
+               // Look at the first page...
+               if (upl_offset
+                       && !upl_page_get_mark(assoc_pl, upl_offset >> PAGE_SHIFT)) {
+                       /*
+                        * The first page isn't marked so let another transaction
+                        * completion handle it.
+                        */
+                       upl_page_set_mark(assoc_pl, upl_offset >> PAGE_SHIFT, true);
+                       upl_offset += PAGE_SIZE;
+               }
+
+               // And now the last page...
+
+               /*
+                * This needs to be > rather than >= because if it's equal, it
+                * means there's another transaction that is sharing the last
+                * page.
+                */
+               if (upl_end > assoc_upl_size)
+                       upl_end = assoc_upl_size;
+               else {
+                       upl_end = trunc_page_32(upl_end);
+                       const int last_pg = (upl_end >> PAGE_SHIFT) - 1;
+
+                       if (!upl_page_get_mark(assoc_pl, last_pg)) {
+                               /*
+                                * The last page isn't marked so mark the page and let another
+                                * transaction completion handle it.
+                                */
+                               upl_page_set_mark(assoc_pl, last_pg, true);
+                               upl_end -= PAGE_SIZE;
+                       }
+               }
+
+               lck_mtx_unlock(&iostate->io_mtxp);
+
+#if 0
+               printf("2: %d %d\n", upl_offset, upl_end);
+#endif
+
+               if (upl_end <= upl_offset)
+                       return;
+
+               size = upl_end - upl_offset;
+       } else {
+               assert(!(upl_offset & PAGE_MASK));
+               assert(!(size & PAGE_MASK));
+       }
+
+       boolean_t empty;
+
+       /*
+        * We can unlock these pages now and as this is for a
+        * direct/uncached write, we want to dump the pages too.
+        */
+       kern_return_t kr = upl_abort_range(associated_upl, upl_offset, size,
+                                                                          UPL_ABORT_DUMP_PAGES, &empty);
+
+       assert(!kr);
+
+       if (!kr && empty) {
+               upl_set_associated_upl(upl, NULL);
+               upl_deallocate(associated_upl);
+       }
+}
 
 static int
 cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags, vnode_t vp)
@@ -570,7 +731,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
        struct  clios *iostate;
        boolean_t       transaction_complete = FALSE;
 
-       cbp_head = (buf_t)(bp->b_trans_head);
+       __IGNORE_WCASTALIGN(cbp_head = (buf_t)(bp->b_trans_head));
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
                     cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
@@ -581,7 +742,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
                lck_mtx_lock_spin(cl_transaction_mtxp);
 
                bp->b_flags |= B_TDONE;
-               
+
                if (bp->b_flags & B_TWANTED) {
                        CLR(bp->b_flags, B_TWANTED);
                        need_wakeup = TRUE;
@@ -656,6 +817,14 @@ cluster_iodone(buf_t bp, void *callback_arg)
 
                cbp = cbp_next;
        }
+
+       if (ISSET(b_flags, B_COMMIT_UPL)) {
+               cluster_handle_associated_upl(iostate,
+                                                                         cbp_head->b_upl,
+                                                                         upl_offset,
+                                                                         transaction_size);
+       }
+
        if (error == 0 && total_resid)
                error = EIO;
 
@@ -702,14 +871,13 @@ cluster_iodone(buf_t bp, void *callback_arg)
        }
 
        if (b_flags & B_COMMIT_UPL) {
-
-               pg_offset   = upl_offset & PAGE_MASK;
+               pg_offset   = upl_offset & PAGE_MASK;
                commit_size = (pg_offset + transaction_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
 
                if (error)
                        upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, commit_size, error, b_flags, vp);
                else {
-                       upl_flags = UPL_COMMIT_FREE_ON_EMPTY;
+                       upl_flags = UPL_COMMIT_FREE_ON_EMPTY;
 
                        if ((b_flags & B_PHYS) && (b_flags & B_READ)) 
                                upl_flags |= UPL_COMMIT_SET_DIRTY;
@@ -1037,6 +1205,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                io_flags |= B_PASSIVE;
        if (flags & CL_ENCRYPTED)
                io_flags |= B_ENCRYPTED_IO;     
+
        if (vp->v_flag & VSYSTEM)
                io_flags |= B_META;
 
@@ -1049,7 +1218,37 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                 * read in from the file
                 */
                zero_offset = upl_offset + non_rounded_size;
+       } else if (!ISSET(flags, CL_READ) && ISSET(flags, CL_DIRECT_IO)) {
+               assert(ISSET(flags, CL_COMMIT));
+
+               // For a direct/uncached write, we need to lock pages...
+
+               upl_t cached_upl;
+
+               /*
+                * Create a UPL to lock the pages in the cache whilst the
+                * write is in progress.
+                */
+               ubc_create_upl(vp, f_offset, non_rounded_size, &cached_upl,
+                                          NULL, UPL_SET_LITE);
+
+               /*
+                * Attach this UPL to the other UPL so that we can find it
+                * later.
+                */
+               upl_set_associated_upl(upl, cached_upl);
+
+               if (upl_offset & PAGE_MASK) {
+                       /*
+                        * The two UPLs are not aligned, so mark the first page in
+                        * @upl so that cluster_handle_associated_upl can handle
+                        * it accordingly.
+                        */
+                       upl_page_info_t *pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+                       upl_page_set_mark(pl, 0, true);
+               }
        }
+
        while (size) {
                daddr64_t blkno;
                daddr64_t lblkno;
@@ -1330,6 +1529,10 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 
                                commit_offset = upl_offset & ~PAGE_MASK;
                        }
+
+                       // Associated UPL is currently only used in the direct write path
+                       assert(!upl_associated_upl(upl));
+
                        if ( (flags & CL_COMMIT) && pg_count) {
                                ubc_upl_commit_range(upl, commit_offset, pg_count * PAGE_SIZE,
                                                     UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
@@ -1426,9 +1629,13 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                if (flags & CL_PAGEOUT) {
                        u_int i;
 
-                       for (i = 0; i < pg_count; i++) {
-                               if (buf_invalblkno(vp, lblkno + i, 0) == EBUSY)
-                                       panic("BUSY bp found in cluster_io");
+                       /*
+                        * since blocks are in offsets of 0x1000, scale
+                        * iteration to (PAGE_SIZE * pg_count) of blks.
+                        */
+                       for (i = 0; i < (PAGE_SIZE * pg_count)/0x1000; i++) {
+                               if (buf_invalblkno(vp, lblkno + i, 0) == EBUSY)
+                                       panic("BUSY bp found in cluster_io");
                        }
                }
                if (flags & CL_ASYNC) {
@@ -1553,34 +1760,41 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                }
         }
        if (error) {
-               int abort_size;
+               int abort_size;
 
                io_size = 0;
-               
+
                if (cbp_head) {
-                        /*
-                         * first wait until all of the outstanding I/O
-                         * for this partial transaction has completed
-                         */
-                       cluster_wait_IO(cbp_head, (flags & CL_ASYNC));
+                       /*
+                        * Wait until all of the outstanding I/O
+                        * for this partial transaction has completed
+                        */
+                       cluster_wait_IO(cbp_head, (flags & CL_ASYNC));
 
                        /*
                         * Rewind the upl offset to the beginning of the
                         * transaction.
                         */
                        upl_offset = cbp_head->b_uploffset;
+               }
 
-                       for (cbp = cbp_head; cbp;) {
-                               buf_t   cbp_next;
-        
-                               size       += cbp->b_bcount;
-                               io_size    += cbp->b_bcount;
+               if (ISSET(flags, CL_COMMIT)) {
+                       cluster_handle_associated_upl(iostate, upl, upl_offset,
+                                                                                 upl_end_offset - upl_offset);
+               }
 
-                               cbp_next = cbp->b_trans_next;
-                               free_io_buf(cbp);
-                               cbp = cbp_next;
-                       }
+               // Free all the IO buffers in this transaction
+               for (cbp = cbp_head; cbp;) {
+                       buf_t   cbp_next;
+                       size       += cbp->b_bcount;
+                       io_size    += cbp->b_bcount;
+
+                       cbp_next = cbp->b_trans_next;
+                       free_io_buf(cbp);
+                       cbp = cbp_next;
                }
+
                if (iostate) {
                        int need_wakeup = 0;
 
@@ -1608,12 +1822,13 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                        if (need_wakeup)
                                wakeup((caddr_t)&iostate->io_wanted);
                }
+
                if (flags & CL_COMMIT) {
                        int     upl_flags;
 
-                       pg_offset  = upl_offset & PAGE_MASK;
+                       pg_offset  = upl_offset & PAGE_MASK;
                        abort_size = (upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK;
-                       
+
                        upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags, vp);
                        
                        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
@@ -2101,7 +2316,7 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in
        upl_size_t       upl_size, vector_upl_size = 0;
        vm_size_t        upl_needed_size;
        mach_msg_type_number_t  pages_in_pl;
-       int              upl_flags;
+       upl_control_flags_t upl_flags;
        kern_return_t    kret;
        mach_msg_type_number_t  i;
        int              force_data_sync;
@@ -2252,13 +2467,15 @@ next_dwrite:
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
                             (int)upl_offset, upl_needed_size, (int)iov_base, io_size, 0);
 
+               vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
                for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) {
                        pages_in_pl = 0;
                        upl_size = upl_needed_size;
                        upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
-                                   UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
+                                   UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE
+                                   | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE);
 
-                       kret = vm_map_get_upl(current_map(),
+                       kret = vm_map_get_upl(map,
                                              (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
                                              &upl_size,
                                              &upl, 
@@ -2343,14 +2560,6 @@ next_dwrite:
                         */
                }
 
-               /*
-                * Now look for pages already in the cache
-                * and throw them away.
-                * uio->uio_offset is page aligned within the file
-                * io_size is a multiple of PAGE_SIZE
-                */
-               ubc_range_op(vp, uio->uio_offset, uio->uio_offset + io_size, UPL_ROP_DUMP, NULL);
-
                /*
                 * we want push out these writes asynchronously so that we can overlap
                 * the preparation of the next I/O
@@ -2492,7 +2701,7 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type,
        upl_size_t       upl_size;
        vm_size_t        upl_needed_size;
        mach_msg_type_number_t  pages_in_pl;
-       int              upl_flags;
+       upl_control_flags_t upl_flags;
        kern_return_t    kret;
         struct clios     iostate;
        int              error  = 0;
@@ -2531,9 +2740,11 @@ next_cwrite:
        pages_in_pl = 0;
        upl_size = upl_needed_size;
        upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC | 
-                   UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
+                   UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE
+                   | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE);
 
-       kret = vm_map_get_upl(current_map(),
+       vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
+       kret = vm_map_get_upl(map,
                              (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
                              &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, 0);
 
@@ -3378,7 +3589,7 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (*
                flags |= IO_NOCACHE;
        if ((vp->v_flag & VRAOFF) || speculative_reads_disabled)
                flags |= IO_RAOFF;
-       
+
        if (flags & IO_SKIP_ENCRYPTION)
                flags |= IO_ENCRYPTED;
        /* 
@@ -3991,6 +4202,73 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
        return (retval);
 }
 
+/*
+ * We don't want another read/write lock for every vnode in the system
+ * so we keep a hash of them here.  There should never be very many of
+ * these around at any point in time.
+ */
+cl_direct_read_lock_t *cluster_lock_direct_read(vnode_t vp, lck_rw_type_t type)
+{
+       struct cl_direct_read_locks *head
+               = &cl_direct_read_locks[(uintptr_t)vp / sizeof(*vp)
+                                                               % CL_DIRECT_READ_LOCK_BUCKETS];
+
+       struct cl_direct_read_lock *lck, *new_lck = NULL;
+
+       for (;;) {
+               lck_spin_lock(&cl_direct_read_spin_lock);
+
+               LIST_FOREACH(lck, head, chain) {
+                       if (lck->vp == vp) {
+                               ++lck->ref_count;
+                               lck_spin_unlock(&cl_direct_read_spin_lock);
+                               if (new_lck) {
+                                       // Someone beat us to it, ditch the allocation
+                                       lck_rw_destroy(&new_lck->rw_lock, cl_mtx_grp);
+                                       FREE(new_lck, M_TEMP);
+                               }
+                               lck_rw_lock(&lck->rw_lock, type);
+                               return lck;
+                       }
+               }
+
+               if (new_lck) {
+                       // Use the lock we allocated
+                       LIST_INSERT_HEAD(head, new_lck, chain);
+                       lck_spin_unlock(&cl_direct_read_spin_lock);
+                       lck_rw_lock(&new_lck->rw_lock, type);
+                       return new_lck;
+               }
+
+               lck_spin_unlock(&cl_direct_read_spin_lock);
+
+               // Allocate a new lock
+               MALLOC(new_lck, cl_direct_read_lock_t *, sizeof(*new_lck),
+                          M_TEMP, M_WAITOK);
+               lck_rw_init(&new_lck->rw_lock, cl_mtx_grp, cl_mtx_attr);
+               new_lck->vp = vp;
+               new_lck->ref_count = 1;
+
+               // Got to go round again
+       }
+}
+
+void cluster_unlock_direct_read(cl_direct_read_lock_t *lck)
+{
+       lck_rw_done(&lck->rw_lock);
+
+       lck_spin_lock(&cl_direct_read_spin_lock);
+       if (lck->ref_count == 1) {
+               LIST_REMOVE(lck, chain);
+               lck_spin_unlock(&cl_direct_read_spin_lock);
+               lck_rw_destroy(&lck->rw_lock, cl_mtx_grp);
+               FREE(lck, M_TEMP);
+       } else {
+               --lck->ref_count;
+               lck_spin_unlock(&cl_direct_read_spin_lock);
+       }
+}
+
 static int
 cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, u_int32_t *read_length,
                    int flags, int (*callback)(buf_t, void *), void *callback_arg)
@@ -4002,7 +4280,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        upl_size_t       upl_size, vector_upl_size = 0;
        vm_size_t        upl_needed_size;
        unsigned int     pages_in_pl;
-       int              upl_flags;
+       upl_control_flags_t upl_flags;
        kern_return_t    kret;
        unsigned int     i;
        int              force_data_sync;
@@ -4032,6 +4310,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        off_t            v_upl_uio_offset = 0;
        int              vector_upl_index=0;
        upl_t            vector_upl = NULL;
+       cl_direct_read_lock_t *lock = NULL;
 
        user_addr_t      orig_iov_base = 0;
        user_addr_t      last_iov_base = 0;
@@ -4267,7 +4546,7 @@ next_dread:
                        goto wait_for_dreads;
                }
 
-               /* 
+               /*
                 * Don't re-check the UBC data if we are looking for uncached IO
                 * or asking for encrypted blocks.
                 */
@@ -4278,6 +4557,19 @@ next_dread:
 
                        io_size = 0;
 
+                       if (!lock) {
+                               /*
+                                * We hold a lock here between the time we check the
+                                * cache and the time we issue I/O.  This saves us
+                                * from having to lock the pages in the cache.  Not
+                                * all clients will care about this lock but some
+                                * clients may want to guarantee stability between
+                                * here and when the I/O is issued in which case they
+                                * will take the lock exclusively.
+                                */
+                               lock = cluster_lock_direct_read(vp, LCK_RW_TYPE_SHARED);
+                       }
+
                        ubc_range_op(vp, uio->uio_offset, uio->uio_offset + xsize, UPL_ROP_ABSENT, (int *)&io_size);
 
                        if (io_size == 0) {
@@ -4322,17 +4614,18 @@ next_dread:
                else
                        no_zero_fill = 0;
 
+               vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
                for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) {
                        pages_in_pl = 0;
                        upl_size = upl_needed_size;
-                       upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
-
+                       upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE
+                                 | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE);
                        if (no_zero_fill)
                                upl_flags |= UPL_NOZEROFILL;
                        if (force_data_sync)
                                upl_flags |= UPL_FORCE_DATA_SYNC;
 
-                       kret = vm_map_create_upl(current_map(),
+                       kret = vm_map_create_upl(map,
                                                 (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
                                                 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags);
 
@@ -4417,7 +4710,6 @@ next_dread:
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
                             upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0);
 
-
                if(!useVectorUPL) {
                        if (no_zero_fill)
                                io_flag &= ~CL_PRESERVE;
@@ -4447,6 +4739,12 @@ next_dread:
                }
                last_iov_base = iov_base + io_size;
 
+               if (lock) {
+                       // We don't need to wait for the I/O to complete
+                       cluster_unlock_direct_read(lock);
+                       lock = NULL;
+               }
+
                /*
                 * update the uio structure
                 */
@@ -4493,6 +4791,11 @@ wait_for_dreads:
                retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize,  io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
                reset_vector_run_state();
        }
+
+       // We don't need to wait for the I/O to complete
+       if (lock)
+               cluster_unlock_direct_read(lock);
+
        /*
         * make sure all async reads that are part of this stream
         * have completed before we return
@@ -4545,7 +4848,7 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
        upl_size_t       upl_size;
        vm_size_t        upl_needed_size;
        mach_msg_type_number_t  pages_in_pl;
-       int              upl_flags;
+       upl_control_flags_t upl_flags;
        kern_return_t    kret;
        struct clios     iostate;
        int              error= 0;
@@ -4599,13 +4902,15 @@ next_cread:
 
        pages_in_pl = 0;
        upl_size = upl_needed_size;
-       upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE;
+       upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE
+                  | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE);
 
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 92)) | DBG_FUNC_START,
                     (int)upl_offset, (int)upl_size, (int)iov_base, io_size, 0);
 
-       kret = vm_map_get_upl(current_map(),
+       vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
+       kret = vm_map_get_upl(map,
                              (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
                              &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, 0);
 
@@ -4751,7 +5056,7 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m
        user_addr_t      iov_base = 0;
        upl_t            upl;
        upl_size_t       upl_size;
-       int              upl_flags;
+       upl_control_flags_t upl_flags;
        int              retval = 0;
 
         /*
@@ -4775,9 +5080,10 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m
                else
                        upl_size = (u_int32_t)iov_len;
 
-               upl_flags = UPL_QUERY_OBJECT_TYPE;
-  
-               if ((vm_map_get_upl(current_map(),
+               upl_flags = UPL_QUERY_OBJECT_TYPE | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_FILE);
+
+               vm_map_t map = UIO_SEG_IS_USER_SPACE(uio->uio_segflg) ? current_map() : kernel_map;
+               if ((vm_map_get_upl(map,
                                    (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
                                    &upl_size, &upl, NULL, NULL, &upl_flags, 0)) != KERN_SUCCESS) {
                        /*
@@ -5875,15 +6181,15 @@ is_file_clean(vnode_t vp, off_t filesize)
  * single hashtable entry.  Each hashtable entry is aligned to this
  * size within the file.
  */
-#define DRT_BITVECTOR_PAGES            256
+#define DRT_BITVECTOR_PAGES            ((1024 * 1024) / PAGE_SIZE)
 
 /*
  * File offset handling.
  *
- * DRT_ADDRESS_MASK is dependent on DRT_BITVECTOR_PAGES;
- * the correct formula is  (~(DRT_BITVECTOR_PAGES * PAGE_SIZE) - 1)
 * DRT_ADDRESS_MASK is dependent on DRT_BITVECTOR_PAGES;
+ * the correct formula is  (~((DRT_BITVECTOR_PAGES * PAGE_SIZE) - 1))
  */
-#define DRT_ADDRESS_MASK               (~((1 << 20) - 1))
+#define DRT_ADDRESS_MASK               (~((DRT_BITVECTOR_PAGES * PAGE_SIZE) - 1))
 #define DRT_ALIGN_ADDRESS(addr)                ((addr) & DRT_ADDRESS_MASK)
 
 /*
@@ -5981,7 +6287,15 @@ is_file_clean(vnode_t vp, off_t filesize)
  */
 struct vfs_drt_hashentry {
        u_int64_t       dhe_control;
-       u_int32_t       dhe_bitvector[DRT_BITVECTOR_PAGES / 32];
+/*
+* dhe_bitvector was declared as dhe_bitvector[DRT_BITVECTOR_PAGES / 32];
+* DRT_BITVECTOR_PAGES is defined as ((1024 * 1024) / PAGE_SIZE)
+* Since PAGE_SIZE is only known at boot time, 
+*      -define MAX_DRT_BITVECTOR_PAGES for smallest supported page size (4k) 
+*      -declare dhe_bitvector array for largest possible length
+*/
+#define MAX_DRT_BITVECTOR_PAGES (1024 * 1024)/( 4 * 1024)
+       u_int32_t       dhe_bitvector[MAX_DRT_BITVECTOR_PAGES/32];
 };
 
 /*
@@ -6117,7 +6431,7 @@ vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp)
         */
 
        kret = kmem_alloc(kernel_map, (vm_offset_t *)&cmap,
-           (nsize == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION);
+           (nsize == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION, VM_KERN_MEMORY_FILE);
        if (kret != KERN_SUCCESS)
                return(kret);
        cmap->scm_magic = DRT_SCM_MAGIC;
index 6a90031c6798426385818b020a232d2eaf75d9e5..d2f32bd7489478e3af593ded542aacb3ed933dea 100644 (file)
@@ -172,14 +172,14 @@ is_ignored_directory(const char *path) {
       return 0;
     }
 
-#define IS_TLD(x) strnstr((char *) path, x, MAXPATHLEN) 
+#define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
     if (IS_TLD("/.Spotlight-V100/") ||
         IS_TLD("/.MobileBackups/") || 
         IS_TLD("/Backups.backupdb/")) {
         return 1;
     }
 #undef IS_TLD
-    
+       
     return 0;
 }
 
@@ -782,7 +782,8 @@ done_with_args:
            continue;
        }
        
-       if (   watcher->event_list[type] == FSE_REPORT
+       if (   type < watcher->num_events
+           && watcher->event_list[type] == FSE_REPORT
            && watcher_cares_about_dev(watcher, dev)) {
            
            if (watcher_add_event(watcher, kfse) != 0) {
@@ -1534,7 +1535,7 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio)
        // its type or which device it is for)
        //
        kfse = watcher->event_queue[watcher->rd];
-       if (!kfse || kfse->type == FSE_INVALID || kfse->refcount < 1) {
+       if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
          break;
        }
 
@@ -1641,62 +1642,25 @@ fseventsf_write(__unused struct fileproc *fp, __unused struct uio *uio,
 }
 
 #pragma pack(push, 4)
-typedef struct ext_fsevent_dev_filter_args {
-    uint32_t    num_devices;
-    user_addr_t devices;
-} ext_fsevent_dev_filter_args;
-#pragma pack(pop)
-
-#define NEW_FSEVENTS_DEVICE_FILTER      _IOW('s', 100, ext_fsevent_dev_filter_args)
-
-typedef struct old_fsevent_dev_filter_args {
-    uint32_t  num_devices;
-    int32_t   devices;
-} old_fsevent_dev_filter_args;
-
-#define        OLD_FSEVENTS_DEVICE_FILTER      _IOW('s', 100, old_fsevent_dev_filter_args)
-
-#if __LP64__
-/* need this in spite of the padding due to alignment of devices */
 typedef struct fsevent_dev_filter_args32 {
-    uint32_t  num_devices;
-    uint32_t  devices;
-    int32_t   pad1;
+    uint32_t            num_devices;
+    user32_addr_t       devices;
 } fsevent_dev_filter_args32;
-#endif
+typedef struct fsevent_dev_filter_args64 {
+    uint32_t            num_devices;
+    user64_addr_t       devices;
+} fsevent_dev_filter_args64;
+#pragma pack(pop)
+
+#define        FSEVENTS_DEVICE_FILTER_32       _IOW('s', 100, fsevent_dev_filter_args32)
+#define        FSEVENTS_DEVICE_FILTER_64       _IOW('s', 100, fsevent_dev_filter_args64)
 
 static int
 fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
 {
     fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
     int ret = 0;
-    ext_fsevent_dev_filter_args *devfilt_args, _devfilt_args;
-
-    if (proc_is64bit(vfs_context_proc(ctx))) {
-       devfilt_args = (ext_fsevent_dev_filter_args *)data;
-    }
-    else if (cmd == OLD_FSEVENTS_DEVICE_FILTER) {
-       old_fsevent_dev_filter_args *udev_filt_args = (old_fsevent_dev_filter_args *)data;
-       
-       devfilt_args = &_devfilt_args;
-       memset(devfilt_args, 0, sizeof(ext_fsevent_dev_filter_args));
-
-       devfilt_args->num_devices = udev_filt_args->num_devices;
-       devfilt_args->devices     = CAST_USER_ADDR_T(udev_filt_args->devices);
-    }
-    else {
-#if __LP64__
-       fsevent_dev_filter_args32 *udev_filt_args = (fsevent_dev_filter_args32 *)data;
-#else
-       fsevent_dev_filter_args *udev_filt_args = (fsevent_dev_filter_args *)data;
-#endif
-       
-       devfilt_args = &_devfilt_args;
-       memset(devfilt_args, 0, sizeof(ext_fsevent_dev_filter_args));
-
-       devfilt_args->num_devices = udev_filt_args->num_devices;
-       devfilt_args->devices     = CAST_USER_ADDR_T(udev_filt_args->devices);
-    }
+    fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
 
     OSAddAtomic(1, &fseh->active);
     if (fseh->flags & FSEH_CLOSING) {
@@ -1725,8 +1689,29 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx
                break;
        }
 
-       case OLD_FSEVENTS_DEVICE_FILTER:
-       case NEW_FSEVENTS_DEVICE_FILTER: {
+       case FSEVENTS_DEVICE_FILTER_32: {
+           if (proc_is64bit(vfs_context_proc(ctx))) {
+                   ret = EINVAL;
+                   break;
+           }
+           fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
+
+           devfilt_args = &_devfilt_args;
+           memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64));
+           devfilt_args->num_devices = devfilt_args32->num_devices;
+           devfilt_args->devices     = CAST_USER_ADDR_T(devfilt_args32->devices);
+           goto handle_dev_filter;
+       }
+
+       case FSEVENTS_DEVICE_FILTER_64:
+           if (!proc_is64bit(vfs_context_proc(ctx))) {
+                   ret = EINVAL;
+                   break;
+           }
+           devfilt_args = (fsevent_dev_filter_args64 *)data;
+
+       handle_dev_filter:
+       {
            int new_num_devices;
            dev_t *devices_not_to_watch, *tmp=NULL;
            
@@ -2107,7 +2092,7 @@ fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
     lck_mtx_lock(&event_writer_lock);
 
     if (write_buffer == NULL) {
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE, VM_KERN_MEMORY_FILE)) {
            lck_mtx_unlock(&event_writer_lock);
            return ENOMEM;
        }
@@ -2172,21 +2157,22 @@ static const struct fileops fsevents_fops = {
     fseventsf_drain
 };
 
-typedef struct ext_fsevent_clone_args {
-    user_addr_t  event_list;
-    int32_t      num_events;
-    int32_t      event_queue_depth;
-    user_addr_t  fd;
-} ext_fsevent_clone_args;
+typedef struct fsevent_clone_args32 {
+    user32_addr_t       event_list;
+    int32_t             num_events;
+    int32_t             event_queue_depth;
+    user32_addr_t       fd;
+} fsevent_clone_args32;
 
-typedef struct old_fsevent_clone_args {
-    uint32_t  event_list;
-    int32_t  num_events;
-    int32_t  event_queue_depth;
-    uint32_t  fd;
-} old_fsevent_clone_args;
+typedef struct fsevent_clone_args64 {
+    user64_addr_t       event_list;
+    int32_t             num_events;
+    int32_t             event_queue_depth;
+    user64_addr_t       fd;
+} fsevent_clone_args64;
 
-#define        OLD_FSEVENTS_CLONE      _IOW('s', 1, old_fsevent_clone_args)
+#define        FSEVENTS_CLONE_32       _IOW('s', 1, fsevent_clone_args32)
+#define        FSEVENTS_CLONE_64       _IOW('s', 1, fsevent_clone_args64)
 
 static int
 fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
@@ -2194,38 +2180,32 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s
     struct fileproc *f;
     int fd, error;
     fsevent_handle *fseh = NULL;
-    ext_fsevent_clone_args *fse_clone_args, _fse_clone;
+    fsevent_clone_args64 *fse_clone_args, _fse_clone;
     int8_t *event_list;
     int is64bit = proc_is64bit(p);
 
     switch (cmd) {
-       case OLD_FSEVENTS_CLONE: {
-           old_fsevent_clone_args *old_args = (old_fsevent_clone_args *)data;
+       case FSEVENTS_CLONE_32: {
+           if (is64bit) {
+                   return EINVAL;
+           }
+           fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
 
            fse_clone_args = &_fse_clone;
-           memset(fse_clone_args, 0, sizeof(ext_fsevent_clone_args));
+           memset(fse_clone_args, 0, sizeof(fsevent_clone_args64));
 
-           fse_clone_args->event_list        = CAST_USER_ADDR_T(old_args->event_list);
-           fse_clone_args->num_events        = old_args->num_events;
-           fse_clone_args->event_queue_depth = old_args->event_queue_depth;
-           fse_clone_args->fd                = CAST_USER_ADDR_T(old_args->fd);
+           fse_clone_args->event_list        = CAST_USER_ADDR_T(args32->event_list);
+           fse_clone_args->num_events        = args32->num_events;
+           fse_clone_args->event_queue_depth = args32->event_queue_depth;
+           fse_clone_args->fd                = CAST_USER_ADDR_T(args32->fd);
            goto handle_clone;
        }
-           
-       case FSEVENTS_CLONE:
-           if (is64bit) {
-               fse_clone_args = (ext_fsevent_clone_args *)data;
-           } else {
-               fsevent_clone_args *ufse_clone = (fsevent_clone_args *)data;
-               
-               fse_clone_args = &_fse_clone;
-               memset(fse_clone_args, 0, sizeof(ext_fsevent_clone_args));
 
-               fse_clone_args->event_list        = CAST_USER_ADDR_T(ufse_clone->event_list);
-               fse_clone_args->num_events        = ufse_clone->num_events;
-               fse_clone_args->event_queue_depth = ufse_clone->event_queue_depth;
-               fse_clone_args->fd                = CAST_USER_ADDR_T(ufse_clone->fd);
+       case FSEVENTS_CLONE_64:
+           if (!is64bit) {
+                   return EINVAL;
            }
+           fse_clone_args = (fsevent_clone_args64 *)data;
 
        handle_clone:
            if (fse_clone_args->num_events < 0 || fse_clone_args->num_events > 4096) {
index 628e5e7dcc7401ae3f86496271f065e86c2ef3c6..23b21860a87cfbf937411cb70cc33056ac963765 100644 (file)
@@ -339,6 +339,13 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction
        buf_t   bp;
        off_t   max_iosize;
        struct bufattr *bap;
+       boolean_t was_vm_privileged = FALSE;
+       boolean_t need_vm_privilege = FALSE;
+
+       if (jnl->fsmount) {
+               if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT)
+                       need_vm_privilege = TRUE;
+       }
 
        if (*offset < 0 || *offset > jnl->jhdr->size) {
                panic("jnl: do_jnl_io: bad offset 0x%llx (max 0x%llx)\n", *offset, jnl->jhdr->size);
@@ -406,12 +413,26 @@ again:
                buf_markfua(bp);
        }
 
+       if (need_vm_privilege == TRUE) {
+               /*
+                * if we block waiting for memory, and there is enough pressure to
+                * cause us to try and create a new swap file, we may end up deadlocking
+                * due to waiting for the journal on the swap file creation path...
+                * by making ourselves vm_privileged, we give ourselves the best chance
+                * of not blocking
+                */
+               was_vm_privileged = set_vm_privilege(TRUE);
+       }
        DTRACE_IO1(journal__start, buf_t, bp);
        err = VNOP_STRATEGY(bp);
        if (!err) {
                err = (int)buf_biowait(bp);
        }
        DTRACE_IO1(journal__done, buf_t, bp);
+
+       if (need_vm_privilege == TRUE && was_vm_privileged == FALSE)
+               set_vm_privilege(FALSE);
+
        free_io_buf(bp);
 
        if (err) {
@@ -471,7 +492,21 @@ write_journal_header(journal *jnl, int updating_start, uint32_t sequence_num)
        // writes.
        //
        if (!updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
-               ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context);
+
+               dk_synchronize_t sync_request = {
+                       .options                        = DK_SYNCHRONIZE_OPTION_BARRIER,
+               };
+
+               /*
+                * If device doesn't support barrier-only flush, or
+                * the journal is on a different device, use full flush.
+                */
+               if (!(jnl->flags & JOURNAL_FEATURE_BARRIER) || (jnl->jdev != jnl->fsdev)) {
+                       sync_request.options = 0;
+                       jnl->flush_counter++;
+               }
+
+               ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, &context);
        }
        if (ret != 0) {
                //
@@ -513,7 +548,21 @@ write_journal_header(journal *jnl, int updating_start, uint32_t sequence_num)
        // may seem obscure, it's not.
        //
        if (updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
-               VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context);
+
+               dk_synchronize_t sync_request = {
+                       .options                        = DK_SYNCHRONIZE_OPTION_BARRIER,
+               };
+
+               /*
+                * If device doesn't support barrier-only flush, or
+                * the journal is on a different device, use full flush.
+                */
+               if (!(jnl->flags & JOURNAL_FEATURE_BARRIER) || (jnl->jdev != jnl->fsdev)) {
+                       sync_request.options = 0;
+                       jnl->flush_counter++;
+               }
+
+               VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, &context);
        }
 
        return 0;
@@ -785,6 +834,8 @@ update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize)
 {
        int             ret;
        struct buf *oblock_bp=NULL;
+       boolean_t was_vm_privileged = FALSE;
+
     
        // first read the block we want.
        ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
@@ -813,11 +864,25 @@ update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize)
        // copy the journal data over top of it
        memcpy((char *)buf_dataptr(oblock_bp), block_ptr, bsize);
 
-       if ((ret = VNOP_BWRITE(oblock_bp)) != 0) {
+       if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) {
+               /*
+                * if we block waiting for memory, and there is enough pressure to
+                * cause us to try and create a new swap file, we may end up deadlocking
+                * due to waiting for the journal on the swap file creation path...
+                * by making ourselves vm_privileged, we give ourselves the best chance
+                * of not blocking
+                */
+               was_vm_privileged = set_vm_privilege(TRUE);
+       }
+       ret = VNOP_BWRITE(oblock_bp);
+
+       if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
+               set_vm_privilege(FALSE);
+
+       if (ret != 0) {
                printf("jnl: %s: update_fs_block: failed to update block %lld (ret %d)\n", jnl->jdev_name, fs_block,ret);
                return ret;
        }
-
        // and now invalidate it so that if someone else wants to read
        // it in a different size they'll be able to do it.
        ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
@@ -1138,7 +1203,7 @@ replay_journal(journal *jnl)
        orig_jnl_start = jnl->jhdr->start;
 
        // allocate memory for the header_block.  we'll read each blhdr into this
-       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size)) {
+       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size, VM_KERN_MEMORY_FILE)) {
                printf("jnl: %s: replay_journal: no memory for block buffer! (%d bytes)\n",
                       jnl->jdev_name, jnl->jhdr->blhdr_size);
                return -1;
@@ -1273,7 +1338,7 @@ restart_replay:
 
                if (blhdr->flags & BLHDR_CHECK_CHECKSUMS) {
                        check_block_checksums = 1;
-                       if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) {
+                       if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize, VM_KERN_MEMORY_FILE)) {
                                goto bad_replay;
                        }
                } else {
@@ -1423,7 +1488,7 @@ bad_txn_handling:
                max_bsize = (max_bsize + PAGE_SIZE) & ~(PAGE_SIZE - 1);
        }
 
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize, VM_KERN_MEMORY_FILE)) {
                goto bad_replay;
        }
     
@@ -1573,6 +1638,10 @@ get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_con
                if (features & DK_FEATURE_UNMAP) {
                        jnl->flags |= JOURNAL_USE_UNMAP;
                }
+
+               if (features & DK_FEATURE_BARRIER) {
+                       jnl->flags |= JOURNAL_FEATURE_BARRIER;
+               }
        }
 
        //
@@ -1715,7 +1784,7 @@ journal_create(struct vnode *jvp,
 
        get_io_info(jvp, phys_blksz, jnl, &context);
        
-       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) {
+       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) {
                printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz);
                goto bad_kmem_alloc;
        }
@@ -1893,7 +1962,7 @@ journal_open(struct vnode *jvp,
 
        get_io_info(jvp, phys_blksz, jnl, &context);
 
-       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) {
+       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) {
                printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz);
                goto bad_kmem_alloc;
        }
@@ -2006,8 +2075,8 @@ journal_open(struct vnode *jvp,
 
        // take care of replaying the journal if necessary
        if (flags & JOURNAL_RESET) {
-               printf("jnl: %s: journal start/end pointers reset! (jnl %p; s 0x%llx e 0x%llx)\n",
-                      jdev_name, jnl, jnl->jhdr->start, jnl->jhdr->end);
+               printf("jnl: %s: journal start/end pointers reset! (s 0x%llx e 0x%llx)\n",
+                      jdev_name, jnl->jhdr->start, jnl->jhdr->end);
                jnl->jhdr->start = jnl->jhdr->end;
        } else if (replay_journal(jnl) != 0) {
                printf("jnl: %s: journal_open: Error replaying the journal!\n", jdev_name);
@@ -2129,7 +2198,7 @@ journal_is_clean(struct vnode *jvp,
 
        memset(&jnl, 0, sizeof(jnl));
 
-       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz)) {
+       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) {
                printf("jnl: %s: is_clean: could not allocate space for header buffer (%d bytes)\n", jdev_name, phys_blksz);
                ret = ENOMEM;
                goto cleanup_jdev_name;
@@ -2272,8 +2341,7 @@ journal_close(journal *jnl)
        } else {
                // if we're here the journal isn't valid any more.
                // so make sure we don't leave any locked blocks lying around
-               printf("jnl: %s: close: journal %p, is invalid.  aborting outstanding transactions\n", jnl->jdev_name, jnl);
-
+               printf("jnl: %s: close: journal is invalid.  aborting outstanding transactions\n", jnl->jdev_name);
                if (jnl->active_tr || jnl->cur_tr) {
                        transaction *tr;
 
@@ -2478,7 +2546,8 @@ static errno_t
 journal_allocate_transaction(journal *jnl)
 {
        transaction *tr;
-       boolean_t was_vm_privileged;
+       boolean_t was_vm_privileged = FALSE;
+       kern_return_t retval;
        
        if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) {
                /*
@@ -2496,13 +2565,16 @@ journal_allocate_transaction(journal *jnl)
 
        tr->tbuffer_size = jnl->tbuffer_size;
 
-       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size)) {
+       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size, VM_KERN_MEMORY_FILE);
+
+       if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
+               set_vm_privilege(FALSE);
+
+       if (retval) {
                FREE_ZONE(tr, sizeof(transaction), M_JNL_TR);
                jnl->active_tr = NULL;
                return ENOMEM;
        }
-       if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
-               set_vm_privilege(FALSE);
 
        // journal replay code checksum check depends on this.
        memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE);
@@ -2601,6 +2673,7 @@ int
 journal_modify_block_start(journal *jnl, struct buf *bp)
 {
        transaction *tr;
+       boolean_t was_vm_privileged = FALSE;
     
        CHECK_JOURNAL(jnl);
 
@@ -2611,6 +2684,17 @@ journal_modify_block_start(journal *jnl, struct buf *bp)
                return EINVAL;
        }
 
+       if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) {
+               /*
+                * if we block waiting for memory, and there is enough pressure to
+                * cause us to try and create a new swap file, we may end up deadlocking
+                * due to waiting for the journal on the swap file creation path...
+                * by making ourselves vm_privileged, we give ourselves the best chance
+                * of not blocking
+                */
+               was_vm_privileged = set_vm_privilege(TRUE);
+       }
+
        // XXXdbg - for debugging I want this to be true.  later it may
        //          not be necessary.
        if ((buf_flags(bp) & B_META) == 0) {
@@ -2651,7 +2735,7 @@ journal_modify_block_start(journal *jnl, struct buf *bp)
 
                                printf("jnl: %s: phys blksz got bigger (was: %d/%d now %d)\n",
                                       jnl->jdev_name, jnl->header_buf_size, jnl->jhdr->jhdr_size, phys_blksz);
-                               if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz)) {
+                               if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz, VM_KERN_MEMORY_FILE)) {
                                        printf("jnl: modify_block_start: %s: create: phys blksz change (was %d, now %d) but could not allocate space for new header\n",
                                               jnl->jdev_name, jnl->jhdr->jhdr_size, phys_blksz);
                                        bad = 1;
@@ -2673,6 +2757,9 @@ journal_modify_block_start(journal *jnl, struct buf *bp)
                if (bad) {
                        panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n",
                              buf_size(bp), jnl->jhdr->jhdr_size);
+
+                       if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
+                               set_vm_privilege(FALSE);
                        return -1;
                }
        }
@@ -2681,6 +2768,9 @@ journal_modify_block_start(journal *jnl, struct buf *bp)
        if (tr->total_bytes+buf_size(bp) >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) {
                panic("jnl: transaction too big (%d >= %lld bytes, bufsize %d, tr %p bp %p)\n",
                      tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), buf_size(bp), tr, bp);
+
+               if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
+                       set_vm_privilege(FALSE);
                return -1;
        }
 
@@ -2702,6 +2792,9 @@ journal_modify_block_start(journal *jnl, struct buf *bp)
        }
        buf_setflags(bp, B_LOCKED);
 
+       if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
+               set_vm_privilege(FALSE);
+
        return 0;
 }
 
@@ -2844,7 +2937,7 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo
                // through prev->binfo[0].bnum.  that's a skanky way to do things but
                // avoids having yet another linked list of small data structures to manage.
 
-               if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size)) {
+               if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size, VM_KERN_MEMORY_FILE)) {
                        panic("jnl: end_tr: no space for new block tr @ %p (total bytes: %d)!\n",
                              tr, tr->total_bytes);
                }
@@ -2883,7 +2976,16 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo
                vnode_t vp;
 
                vp = buf_vnode(bp);
-               vnode_ref(vp);
+               if (vnode_ref(vp)) {
+                       // Nobody checks the return values, so...
+                       jnl->flags |= JOURNAL_INVALID;
+
+                       buf_brelse(bp);
+
+                       // We're probably here due to a force unmount, so EIO is appropriate
+                       return EIO;
+               }
+
                bsize = buf_size(bp);
 
                blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp));
@@ -2923,7 +3025,8 @@ journal_kill_block(journal *jnl, struct buf *bp)
        free_old_stuff(jnl);
 
        if (jnl->flags & JOURNAL_INVALID) {
-               return EINVAL;
+               buf_brelse(bp);
+               return 0;
        }
 
        tr = jnl->active_tr;
@@ -2972,15 +3075,17 @@ journal_kill_block(journal *jnl, struct buf *bp)
                                buf_markinvalid(bp);
                                buf_brelse(bp);
 
-                               break;
+                               return 0;
                        }
                }
-
-               if (i < blhdr->num_blocks) {
-                       break;
-               }
        }
 
+       /*
+        * We did not find the block in any transaction buffer but we still
+        * need to release it or else it will be left locked forever.
+        */
+       buf_brelse(bp);
+
        return 0;
 }
 
@@ -3041,7 +3146,7 @@ trim_realloc(journal *jnl, struct jnl_trim_list *trim)
 {
        void *new_extents;
        uint32_t new_allocated_count;
-       boolean_t was_vm_privileged;
+       boolean_t was_vm_privileged = FALSE;
        
        if (jnl_kdebug)
                KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_START, VM_KERNEL_ADDRPERM(trim), 0, trim->allocated_count, trim->extent_count, 0);
@@ -3590,7 +3695,7 @@ static int
 journal_trim_flush(journal *jnl, transaction *tr)
 {
        int errno = 0;
-       boolean_t was_vm_privileged;
+       boolean_t was_vm_privileged = FALSE;
        
        if (jnl_kdebug)
                KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, VM_KERNEL_ADDRPERM(jnl), tr, 0, tr->trim.extent_count, 0);
@@ -4039,13 +4144,23 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba
        size_t          tbuffer_offset;
        int             bufs_written = 0;
        int             ret_val = 0;
+       boolean_t       was_vm_privileged = FALSE;
 
        KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_START, jnl, tr, 0, 0, 0);
 
+       if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) {
+               /*
+                * if we block waiting for memory, and there is enough pressure to
+                * cause us to try and create a new swap file, we may end up deadlocking
+                * due to waiting for the journal on the swap file creation path...
+                * by making ourselves vm_privileged, we give ourselves the best chance
+                * of not blocking
+                */
+               was_vm_privileged = set_vm_privilege(TRUE);
+       }
        end  = jnl->jhdr->end;
 
        for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
-               boolean_t was_vm_privileged;
 
                amt = blhdr->bytes_used;
 
@@ -4054,22 +4169,9 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba
                blhdr->checksum = 0;
                blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
 
-               if (jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) {
-                       /*
-                        * if we block waiting for memory, and there is enough pressure to
-                        * cause us to try and create a new swap file, we may end up deadlocking
-                        * due to waiting for the journal on the swap file creation path...
-                        * by making ourselves vm_privileged, we give ourselves the best chance
-                        * of not blocking
-                        */
-                       was_vm_privileged = set_vm_privilege(TRUE);
-               }
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *), VM_KERN_MEMORY_FILE)) {
                        panic("can't allocate %zd bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *));
                }
-               if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
-                       set_vm_privilege(FALSE);
-
                tbuffer_offset = jnl->jhdr->blhdr_size;
 
                for (i = 1; i < blhdr->num_blocks; i++) {
@@ -4092,8 +4194,8 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba
                                lblkno = buf_lblkno(bp);
 
                                if (vp == NULL && lblkno == blkno) {
-                                       printf("jnl: %s: end_tr: bad news! bp @ %p w/null vp and l/blkno = %qd/%qd.  aborting the transaction (tr %p jnl %p).\n",
-                                              jnl->jdev_name, bp, lblkno, blkno, tr, jnl);
+                                       printf("jnl: %s: end_tr: bad news! buffer w/null vp and l/blkno = %qd/%qd.  aborting the transaction.\n",
+                                              jnl->jdev_name, lblkno, blkno);
                                        ret_val = -1;
                                        goto bad_journal;
                                }
@@ -4107,17 +4209,17 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba
                                        size_t  contig_bytes;
 
                                        if (VNOP_BLKTOOFF(vp, lblkno, &f_offset)) {
-                                               printf("jnl: %s: end_tr: vnop_blktooff failed @ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
+                                               printf("jnl: %s: end_tr: vnop_blktooff failed\n", jnl->jdev_name);
                                                ret_val = -1;
                                                goto bad_journal;
                                        }
                                        if (VNOP_BLOCKMAP(vp, f_offset, buf_count(bp), &blkno, &contig_bytes, NULL, 0, NULL)) {
-                                               printf("jnl: %s: end_tr: can't blockmap the bp @ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
+                                               printf("jnl: %s: end_tr: can't blockmap the buffer", jnl->jdev_name);
                                                ret_val = -1;
                                                goto bad_journal;
                                        }
                                        if ((uint32_t)contig_bytes < buf_count(bp)) {
-                                               printf("jnl: %s: end_tr: blk not physically contiguous on disk@ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
+                                               printf("jnl: %s: end_tr: blk not physically contiguous on disk\n", jnl->jdev_name);
                                                ret_val = -1;
                                                goto bad_journal;
                                        }
@@ -4287,6 +4389,8 @@ finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callba
 
 bad_journal:
        if (ret_val == -1) {
+               abort_transaction(jnl, tr);             // cleans up list of extents to be trimmed
+
                /*
                 * 'flush_aborted' is protected by the flushing condition... we need to
                 * set it before dropping the condition so that it will be
@@ -4304,12 +4408,14 @@ bad_journal:
 
                jnl->flags |= JOURNAL_INVALID;
                jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL;
-               abort_transaction(jnl, tr);             // cleans up list of extents to be trimmed
 
                journal_unlock(jnl);
        } else
                unlock_condition(jnl, &jnl->flushing);
 
+       if ((jnl->fsmount->mnt_kern_flag & MNTK_SWAP_MOUNT) && (was_vm_privileged == FALSE))
+               set_vm_privilege(FALSE);
+
        KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_END, jnl, tr, bufs_written, ret_val, 0);
 
        return (ret_val);
@@ -4395,13 +4501,24 @@ abort_transaction(journal *jnl, transaction *tr)
 
                        bp_vp = buf_vnode(tbp);
 
-                       buf_setfilter(tbp, NULL, NULL, NULL, NULL);
-
-                       if (buf_shadow(tbp))
+                       if (buf_shadow(tbp)) {
                                sbp = tbp;
-                       else
+                               buf_setfilter(tbp, NULL, NULL, NULL, NULL);
+                       } else {
+                               assert(ISSET(buf_flags(tbp), B_LOCKED));
+
                                sbp = NULL;
 
+                               do {
+                                       errno = buf_acquire(tbp, BAC_REMOVE, 0, 0);
+                               } while (errno == EAGAIN);
+
+                               if (!errno) {
+                                       buf_setfilter(tbp, NULL, NULL, NULL, NULL);
+                                       buf_brelse(tbp);
+                               }
+                       }
+
                        if (bp_vp) {
                                errno = buf_meta_bread(bp_vp,
                                                       buf_lblkno(tbp),
@@ -4430,8 +4547,8 @@ abort_transaction(journal *jnl, transaction *tr)
                                         */
                                        vnode_rele_ext(bp_vp, 0, 1);
                                } else {
-                                       printf("jnl: %s: abort_tr: could not find block %lld vp %p!\n",
-                                              jnl->jdev_name, blhdr->binfo[i].bnum, tbp);
+                                       printf("jnl: %s: abort_tr: could not find block %lld for vnode!\n",
+                                              jnl->jdev_name, blhdr->binfo[i].bnum);
                                        if (bp) {
                                                buf_brelse(bp);
                                        }
@@ -4569,9 +4686,11 @@ journal_end_transaction(journal *jnl)
  *     guarantees consistent journal content on the disk.
  */
 int
-journal_flush(journal *jnl, boolean_t wait_for_IO)
+journal_flush(journal *jnl, journal_flush_options_t options)
 {
        boolean_t drop_lock = FALSE;
+       errno_t error = 0;
+       uint32_t flush_count;
     
        CHECK_JOURNAL(jnl);
     
@@ -4588,13 +4707,16 @@ journal_flush(journal *jnl, boolean_t wait_for_IO)
                drop_lock = TRUE;
        }
 
+       if (ISSET(options, JOURNAL_FLUSH_FULL))
+               flush_count = jnl->flush_counter;
+
        // if we're not active, flush any buffered transactions
        if (jnl->active_tr == NULL && jnl->cur_tr) {
                transaction *tr = jnl->cur_tr;
 
                jnl->cur_tr = NULL;
 
-               if (wait_for_IO) {
+               if (ISSET(options, JOURNAL_WAIT_FOR_IO)) {
                        wait_condition(jnl, &jnl->flushing, "journal_flush");
                        wait_condition(jnl, &jnl->asyncIO, "journal_flush");
                }
@@ -4620,10 +4742,26 @@ journal_flush(journal *jnl, boolean_t wait_for_IO)
                 */
                wait_condition(jnl, &jnl->flushing, "journal_flush");
        }
-       if (wait_for_IO) {
+       if (ISSET(options, JOURNAL_WAIT_FOR_IO)) {
                wait_condition(jnl, &jnl->asyncIO, "journal_flush");
        }
 
+       if (ISSET(options, JOURNAL_FLUSH_FULL)) {
+
+               dk_synchronize_t sync_request = {
+                       .options                        = 0,
+               };
+
+               // We need a full cache flush. If it has not been done, do it here.
+               if (flush_count == jnl->flush_counter)
+                       error = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel());
+
+               // If external journal partition is enabled, flush filesystem data partition.
+               if (jnl->jdev != jnl->fsdev)
+                       error = VNOP_IOCTL(jnl->fsdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel());
+
+       }
+
        KERNEL_DEBUG(DBG_JOURNAL_FLUSH | DBG_FUNC_END, jnl, 0, 0, 0, 0);
 
        return 0;
@@ -4752,7 +4890,7 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu
        tr = jnl->active_tr;
        CHECK_TRANSACTION(tr);
        jnl->active_tr = NULL;
-       ret = journal_flush(jnl, TRUE);
+       ret = journal_flush(jnl, JOURNAL_WAIT_FOR_IO);
        jnl->active_tr = tr;
 
        if (ret) {
@@ -4812,6 +4950,10 @@ bad_journal:
        return ret;
 }
 
+uint32_t journal_current_txn(journal *jnl)
+{
+       return jnl->sequence_num + (jnl->active_tr || jnl->cur_tr ? 0 : 1);
+}
 
 #else   // !JOURNALING - so provide stub functions
 
@@ -4900,7 +5042,7 @@ journal_end_transaction(__unused journal *jnl)
 }
 
 int
-journal_flush(__unused journal *jnl, __unused boolean_t wait_for_IO)
+journal_flush(__unused journal *jnl, __unused journal_flush_options_t options)
 {
        return EINVAL;
 }
index 5b9578b378d01408949fa242cbed801994ab9114..42fd81e5c1f6505ce7774fe52a7d3b6a76852c89 100644 (file)
@@ -187,6 +187,7 @@ typedef struct journal {
     volatile off_t      old_start[16];     // this is how we do lazy start update
 
     int                 last_flush_err;    // last error from flushing the cache
+    uint32_t            flush_counter;     // a monotonically increasing value assigned on track cache flush
 } journal;
 
 /* internal-only journal flags (top 16 bits) */
@@ -196,6 +197,7 @@ typedef struct journal {
 #define JOURNAL_NEED_SWAP         0x00080000   // swap any data read from disk
 #define JOURNAL_DO_FUA_WRITES     0x00100000   // do force-unit-access writes
 #define JOURNAL_USE_UNMAP         0x00200000   // device supports UNMAP (TRIM)
+#define JOURNAL_FEATURE_BARRIER   0x00400000   // device supports barrier-only flush
 
 
 /* journal_open/create options are always in the low-16 bits */
@@ -338,7 +340,13 @@ int   journal_request_immediate_flush (journal *jnl);
 int   journal_end_transaction(journal *jnl);
 
 int   journal_active(journal *jnl);
-int   journal_flush(journal *jnl, boolean_t wait_for_IO);
+
+typedef enum journal_flush_options {
+       JOURNAL_WAIT_FOR_IO       = 0x01,   // Flush journal and metadata blocks, wait for async IO to complete.
+       JOURNAL_FLUSH_FULL        = 0x02,   // Flush track cache to media
+} journal_flush_options_t;
+
+int   journal_flush(journal *jnl, journal_flush_options_t options);
 void *journal_owner(journal *jnl);    // compare against current_thread()
 int   journal_uses_fua(journal *jnl);
 void  journal_lock(journal *jnl);
@@ -365,6 +373,8 @@ void  journal_unlock(journal *jnl);
 int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbuffer_size,
        errno_t (*callback)(void *), void *callback_arg);
 
+uint32_t journal_current_txn(journal *jnl);
+
 __END_DECLS
 
 #endif /* __APPLE_API_UNSTABLE */
index 4beff12a6e9c4364e61e38158991e94e3b74d066..09bd470a99a30aa63493011100713210062a801a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -90,7 +90,7 @@
 #include <sys/kauth.h>
 #include <kern/kalloc.h>
 #include <security/audit/audit.h>
-
+#include <sys/dtrace.h>        /* to get the prototype for strstr() in sys/dtrace_glue.h */
 #if CONFIG_MACF
 #include <security/mac_framework.h>
 #endif
@@ -371,6 +371,7 @@ retry_copy:
                if ( (error = lookup(ndp)) ) {
                        goto error_out;
                }
+
                /*
                 * Check for symbolic link
                 */
@@ -633,15 +634,6 @@ lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int
                goto nextname;
        }
 
-#if CONFIG_TRIGGERS
-       if (dp->v_resolve) {
-               error = vnode_trigger_resolve(dp, ndp, ctx);
-               if (error) {
-                       goto out;
-               }
-       }
-#endif /* CONFIG_TRIGGERS */
-
        /*
         * Take into account any additional components consumed by
         * the underlying filesystem.
@@ -1315,73 +1307,90 @@ lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vn
        uint32_t depth = 0;
        vnode_t mounted_on_dp;
        int current_mount_generation = 0;
+#if CONFIG_TRIGGERS
+       vnode_t triggered_dp = NULLVP;
+       int retry_cnt = 0;
+#define MAX_TRIGGER_RETRIES 1
+#endif
        
-       mounted_on_dp = dp;
-       current_mount_generation = mount_generation;
-
-       while ((dp->v_type == VDIR) && dp->v_mountedhere &&
-                       ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
+       if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT)
+               return 0;
 
-               if (dp->v_mountedhere->mnt_lflag & MNT_LFORCE) {
-                       break;  // don't traverse into a forced unmount
-               }
+       mounted_on_dp = dp;
 #if CONFIG_TRIGGERS
-               /*
-                * For a trigger vnode, call its resolver when crossing its mount (if requested)
-                */
-               if (dp->v_resolve) {
-                       (void) vnode_trigger_resolve(dp, ndp, ctx);
-               }
+restart:
 #endif
-               vnode_lock(dp);
-
-               if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) {
+       current_mount_generation = mount_generation;
 
+       while (dp->v_mountedhere) {
+               vnode_lock_spin(dp);
+               if ((mp = dp->v_mountedhere)) {
                        mp->mnt_crossref++;
                        vnode_unlock(dp);
+               } else {
+                       vnode_unlock(dp);
+                       break;
+               }
 
+               if (ISSET(mp->mnt_lflag, MNT_LFORCE)) {
+                       mount_dropcrossref(mp, dp, 0);
+                       break;  // don't traverse into a forced unmount
+               }
 
-                       if (vfs_busy(mp, vbusyflags)) {
-                               mount_dropcrossref(mp, dp, 0);
-                               if (vbusyflags == LK_NOWAIT) {
-                                       error = ENOENT;
-                                       goto out;
-                               }
-
-                               continue;
-                       }
-
-                       error = VFS_ROOT(mp, &tdp, ctx);
 
+               if (vfs_busy(mp, vbusyflags)) {
                        mount_dropcrossref(mp, dp, 0);
-                       vfs_unbusy(mp);
-
-                       if (error) {
+                       if (vbusyflags == LK_NOWAIT) {
+                               error = ENOENT;
                                goto out;
                        }
 
-                       vnode_put(dp);
-                       ndp->ni_vp = dp = tdp;
-                       depth++;
+                       continue;
+               }
+
+               error = VFS_ROOT(mp, &tdp, ctx);
 
-#if CONFIG_TRIGGERS
-                       /*
-                        * Check if root dir is a trigger vnode
-                        */
-                       if (dp->v_resolve) {
-                               error = vnode_trigger_resolve(dp, ndp, ctx);
-                               if (error) {
-                                       goto out;
-                               }
-                       }
-#endif                 
+               mount_dropcrossref(mp, dp, 0);
+               vfs_unbusy(mp);
 
-               } else { 
-                       vnode_unlock(dp);
+               if (error) {
+                       goto out;
+               }
+
+               vnode_put(dp);
+               ndp->ni_vp = dp = tdp;
+               if (dp->v_type != VDIR) {
+#if DEVELOPMENT || DEBUG
+                       panic("%s : Root of filesystem not a directory\n",
+                           __FUNCTION__);
+#else
                        break;
+#endif
                }
+               depth++;
        }
 
+#if CONFIG_TRIGGERS
+       /*
+        * The triggered_dp check here is required but is susceptible to a
+        * (unlikely) race in which trigger mount is done from here and is
+        * unmounted before we get past vfs_busy above. We retry to deal with
+        * that case but it has the side effect of unwanted retries for
+        * "special" processes which don't want to trigger mounts.
+        */
+       if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) {
+               error = vnode_trigger_resolve(dp, ndp, ctx);
+               if (error)
+                       goto out;
+               if (dp == triggered_dp)
+                       retry_cnt += 1;
+               else
+                       retry_cnt = 0;
+               triggered_dp = dp;
+               goto restart;
+       }
+#endif /* CONFIG_TRIGGERS */
+
        if (depth) {
                mp = mounted_on_dp->v_mountedhere;
 
index 898319ab0a6fabf1fd127cdc62b996a293ba8628..9fec68cd43b416d124bd3681c35b48bc5cda19e1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <security/mac_framework.h>
 #endif
 
-#define PANIC_PRINTS_VNODES
-
 extern lck_grp_t *vnode_lck_grp;
 extern lck_attr_t *vnode_lck_attr;
 
@@ -230,6 +228,12 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 static void record_vp(vnode_t vp, int count);
 #endif
 
+#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
+extern int bootarg_no_vnode_jetsam;    /* from bsd_init.c default value is 0 */
+#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+
+boolean_t root_is_CF_drive = FALSE;
+
 #if CONFIG_TRIGGERS
 static int vnode_resolver_create(mount_t, vnode_t, struct vnode_trigger_param *, boolean_t external);
 static void vnode_resolver_detach(vnode_t);
@@ -651,7 +655,7 @@ static void vnode_iterate_panic_hook(panic_hook_t *hook_)
 
        if (panic_phys_range_before(hook->vp, &phys, &range)) {
                kdb_log("vp = %p, phys = %p, prev (%p: %p-%p)\n", 
-                               hook->mp, phys, range.type, range.phys_start,
+                               hook->vp, phys, range.type, range.phys_start,
                                range.phys_start + range.len);
        } else {
                kdb_log("vp = %p, phys = %p, prev (!)\n", hook->vp, phys);
@@ -1122,6 +1126,13 @@ vfs_mountroot(void)
                         */
                        vfs_init_io_attributes(rootvp, mp);
 
+                       if ((mp->mnt_ioflags & MNT_IOFLAGS_FUSION_DRIVE) && 
+                           (mp->mnt_ioflags & MNT_IOFLAGS_IOSCHED_SUPPORTED)) {
+                               /*
+                                * only for CF
+                                */
+                               root_is_CF_drive = TRUE;
+                       }
                        /*
                         * Shadow the VFC_VFSNATIVEXATTR flag to MNTK_EXTENDED_ATTRS.
                         */
@@ -1683,12 +1694,33 @@ vnode_list_add(vnode_t vp)
 #if DIAGNOSTIC
        lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
 #endif
+
+again:
+
        /*
         * if it is already on a list or non zero references return 
         */
        if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || (vp->v_lflag & VL_TERMINATE))
                return;
 
+       /*
+        * In vclean, we might have deferred ditching locked buffers
+        * because something was still referencing them (indicated by
+        * usecount).  We can ditch them now.
+        */
+       if (ISSET(vp->v_lflag, VL_DEAD)
+               && (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))) {
+               ++vp->v_iocount;        // Probably not necessary, but harmless
+#ifdef JOE_DEBUG
+               record_vp(vp, 1);
+#endif
+               vnode_unlock(vp);
+               buf_invalidateblks(vp, BUF_INVALIDATE_LOCKED, 0, 0);
+               vnode_lock(vp);
+               vnode_dropiocount(vp);
+               goto again;
+       }
+
        vnode_list_lock();
 
        if ((vp->v_flag & VRAGE) && !(vp->v_lflag & VL_DEAD)) {
@@ -2002,7 +2034,13 @@ loop:
 
                vnode_lock_spin(vp);
 
-               if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) {
+               // If vnode is already terminating, wait for it...
+               while (vp->v_id == vid && ISSET(vp->v_lflag, VL_TERMINATE)) {
+                       vp->v_lflag |= VL_TERMWANT;
+                       msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vflush", NULL);
+               }
+
+               if ((vp->v_id != vid) || ISSET(vp->v_lflag, VL_DEAD)) {
                                vnode_unlock(vp);
                                mount_lock(mp);
                                continue;
@@ -2165,12 +2203,6 @@ vclean(vnode_t vp, int flags)
 
        vp->v_lflag |= VL_TERMINATE;
 
-       /*
-        * remove the vnode from any mount list
-        * it might be on...
-        */
-       insmntque(vp, (struct mount *)0);
-
 #if NAMEDSTREAMS
        is_namedstream = vnode_isnamedstream(vp);
 #endif
@@ -2197,8 +2229,16 @@ vclean(vnode_t vp, int flags)
                else
 #endif
                {
-                       VNOP_FSYNC(vp, MNT_WAIT, ctx);
-                       buf_invalidateblks(vp, BUF_WRITE_DATA | BUF_INVALIDATE_LOCKED, 0, 0);
+                       VNOP_FSYNC(vp, MNT_WAIT, ctx);
+
+                       /*
+                        * If the vnode is still in use (by the journal for
+                        * example) we don't want to invalidate locked buffers
+                        * here.  In that case, either the journal will tidy them
+                        * up, or we will deal with it when the usecount is
+                        * finally released in vnode_rele_internal.
+                        */
+                       buf_invalidateblks(vp, BUF_WRITE_DATA | (active ? 0 : BUF_INVALIDATE_LOCKED), 0, 0);
                }
                if (UBCINFOEXISTS(vp))
                        /*
@@ -2260,6 +2300,14 @@ vclean(vnode_t vp, int flags)
 
        vnode_lock(vp);
 
+       /*
+        * Remove the vnode from any mount list it might be on.  It is not
+        * safe to do this any earlier because unmount needs to wait for
+        * any vnodes to terminate and it cannot do that if it cannot find
+        * them.
+        */
+       insmntque(vp, (struct mount *)0);
+
        vp->v_mount = dead_mountp;
        vp->v_op = dead_vnodeop_p;
        vp->v_tag = VT_NON;
@@ -3071,6 +3119,8 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
        u_int64_t temp;
        u_int32_t features;
        vfs_context_t ctx = vfs_context_current();
+       dk_corestorage_info_t cs_info;
+       boolean_t cs_present = FALSE;;
        int isssd = 0;
        int isvirtual = 0;
 
@@ -3243,19 +3293,31 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 
        if (features & DK_FEATURE_FORCE_UNIT_ACCESS)
                mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED;
-       
+
+       if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, ctx) == 0)
+               cs_present = TRUE;
+
        if (features & DK_FEATURE_UNMAP) {
                mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED;
 
-               if (VNOP_IOCTL(devvp, _DKIOCCORESTORAGE, NULL, 0, ctx) == 0)
+               if (cs_present == TRUE)
                        mp->mnt_ioflags |= MNT_IOFLAGS_CSUNMAP_SUPPORTED;
        }
+       if (cs_present == TRUE) {
+               /*
+                * for now we'll use the following test as a proxy for
+                * the underlying drive being FUSION in nature
+                */
+               if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA))
+                       mp->mnt_ioflags |= MNT_IOFLAGS_FUSION_DRIVE;
+       }
+
 #if CONFIG_IOSCHED
         if (iosched_enabled && (features & DK_FEATURE_PRIORITY)) {
                 mp->mnt_ioflags |= MNT_IOFLAGS_IOSCHED_SUPPORTED;
-               throttle_info_disable_throttle(mp->mnt_devbsdunit);
+               throttle_info_disable_throttle(mp->mnt_devbsdunit, (mp->mnt_ioflags & MNT_IOFLAGS_FUSION_DRIVE) != 0);
        }
-#endif /* CONFIG_IOSCHED */    
+#endif /* CONFIG_IOSCHED */
        return (error);
 }
 
@@ -3751,6 +3813,44 @@ SYSCTL_NODE(_vfs_generic, VFS_CONF, conf,
                   CTLFLAG_RD | CTLFLAG_LOCKED,
                   sysctl_vfs_generic_conf, "");
 
+/*
+ * Print vnode state.
+ */
+void
+vn_print_state(struct vnode *vp, const char *fmt, ...)
+{
+       va_list ap;
+       char perm_str[] = "(VM_KERNEL_ADDRPERM pointer)";
+       char fs_name[MFSNAMELEN];
+
+       va_start(ap, fmt);
+       vprintf(fmt, ap);
+       va_end(ap);
+       printf("vp 0x%0llx %s: ", (uint64_t)VM_KERNEL_ADDRPERM(vp), perm_str);
+       printf("tag %d, type %d\n", vp->v_tag, vp->v_type);
+       /* Counts .. */
+       printf("    iocount %d, usecount %d, kusecount %d references %d\n",
+           vp->v_iocount, vp->v_usecount, vp->v_kusecount, vp->v_references);
+       printf("    writecount %d, numoutput %d\n", vp->v_writecount,
+           vp->v_numoutput);
+       /* Flags */
+       printf("    flag 0x%x, lflag 0x%x, listflag 0x%x\n", vp->v_flag,
+           vp->v_lflag, vp->v_listflag);
+
+       if (vp->v_mount == NULL || vp->v_mount == dead_mountp) {
+               strlcpy(fs_name, "deadfs", MFSNAMELEN);
+       } else {
+               vfs_name(vp->v_mount, fs_name);
+       }
+
+       printf("    v_data 0x%0llx %s\n",
+           (vp->v_data ? (uint64_t)VM_KERNEL_ADDRPERM(vp->v_data) : 0),
+           perm_str);
+       printf("    v_mount 0x%0llx %s vfs_name %s\n",
+           (vp->v_mount ? (uint64_t)VM_KERNEL_ADDRPERM(vp->v_mount) : 0),
+           perm_str, fs_name);
+}
+
 long num_reusedvnodes = 0;
 
 
@@ -4050,9 +4150,15 @@ retry:
                vnode_list_unlock();
                tablefull("vnode");
                log(LOG_EMERG, "%d desired, %d numvnodes, "
-                       "%d free, %d dead, %d rage\n",
-                       desiredvnodes, numvnodes, freevnodes, deadvnodes, ragevnodes);
+                       "%d free, %d dead, %d async, %d rage\n",
+                       desiredvnodes, numvnodes, freevnodes, deadvnodes, async_work_vnodes, ragevnodes);
 #if CONFIG_JETSAM
+
+#if DEVELOPMENT || DEBUG
+               if (bootarg_no_vnode_jetsam)
+                       panic("vnode table is full\n");
+#endif /* DEVELOPMENT || DEBUG */
+
                /*
                 * Running out of vnodes tends to make a system unusable. Start killing
                 * processes that jetsam knows are killable.
@@ -4265,6 +4371,17 @@ vnode_put(vnode_t vp)
        return(retval);
 }
 
+static inline void
+vn_set_dead(vnode_t vp)
+{
+       vp->v_mount = NULL;
+       vp->v_op = dead_vnodeop_p;
+       vp->v_tag = VT_NON;
+       vp->v_data = NULL;
+       vp->v_type = VBAD;
+       vp->v_lflag |= VL_DEAD;
+}
+
 int
 vnode_put_locked(vnode_t vp)
 {
@@ -4444,6 +4561,8 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags)
        int withvid = vflags & VNODE_WITHID;
 
        for (;;) {
+               int sleepflg = 0;
+
                /*
                 * if it is a dead vnode with deadfs
                 */
@@ -4476,7 +4595,8 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags)
 
                /*
                 * If this vnode is getting drained, there are some cases where
-                * we can't block.
+                * we can't block or, in case of tty vnodes, want to be
+                * interruptible.
                 */
                if (vp->v_lflag & VL_DRAIN) {
                        /*
@@ -4498,15 +4618,24 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags)
                         * failed because an unmount is in progress.
                         */
                        if (withvid && (vp->v_mount) && vfs_isunmount(vp->v_mount))
-                               return(ENODEV);
+                               return (ENODEV);
+
+                       if (vnode_istty(vp)) {
+                               sleepflg = PCATCH;
+                       }
                }
 
                vnode_lock_convert(vp);
 
                if (vp->v_lflag & VL_TERMINATE) {
+                       int error;
+
                        vp->v_lflag |= VL_TERMWANT;
 
-                       msleep(&vp->v_lflag,   &vp->v_lock, PVFS, "vnode getiocount", NULL);
+                       error = msleep(&vp->v_lflag,   &vp->v_lock,
+                          (PVFS | sleepflg), "vnode getiocount", NULL);
+                       if (error)
+                               return (error);
                } else
                        msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", NULL);
        }
@@ -4637,16 +4766,13 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags)
                vnode_unlock(vp);
 }
 
-/* USAGE:
- * The following api creates a vnode and associates all the parameter specified in vnode_fsparam
- * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias
- * is obsoleted by this.
- */
-int  
-vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
+static int
+vnode_create_internal(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp,
+    int init_vnode)
 {
        int error;
        int insert = 1;
+       int existing_vnode;
        vnode_t vp;
        vnode_t nvp;
        vnode_t dvp;
@@ -4656,34 +4782,68 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
 #if CONFIG_TRIGGERS
        struct vnode_trigger_param *tinfo = NULL;
 #endif
-       if (param == NULL)
-               return (EINVAL);
-
-       /* Do quick sanity check on the parameters. */
-       if (param->vnfs_vtype == VBAD) {
-               return EINVAL;
+       if (*vpp) {
+               vp = *vpp;
+               *vpp = NULLVP;
+               existing_vnode = 1;
+       } else {
+               existing_vnode = 0;
        }
 
-#if CONFIG_TRIGGERS
-       if ((flavor == VNCREATE_TRIGGER) && (size == VNCREATE_TRIGGER_SIZE)) {
-               tinfo = (struct vnode_trigger_param *)data;
+       if (init_vnode) {
+               /* Do quick sanity check on the parameters. */
+               if ((param == NULL) || (param->vnfs_vtype == VBAD)) {
+                       error = EINVAL;
+                       goto error_out;
+               }
 
-               /* Validate trigger vnode input */
-               if ((param->vnfs_vtype != VDIR) ||
-                   (tinfo->vnt_resolve_func == NULL) ||
-                   (tinfo->vnt_flags & ~VNT_VALID_MASK)) {
-                       return (EINVAL);
+#if CONFIG_TRIGGERS
+               if ((flavor == VNCREATE_TRIGGER) && (size == VNCREATE_TRIGGER_SIZE)) {
+                       tinfo = (struct vnode_trigger_param *)data;
+
+                       /* Validate trigger vnode input */
+                       if ((param->vnfs_vtype != VDIR) ||
+                           (tinfo->vnt_resolve_func == NULL) ||
+                           (tinfo->vnt_flags & ~VNT_VALID_MASK)) {
+                               error = EINVAL;
+                               goto error_out;
+                       }
+                       /* Fall through a normal create (params will be the same) */
+                       flavor = VNCREATE_FLAVOR;
+                       size = VCREATESIZE;
                }
-               /* Fall through a normal create (params will be the same) */
-               flavor = VNCREATE_FLAVOR;
-               size = VCREATESIZE;
-       }
 #endif
-       if ((flavor != VNCREATE_FLAVOR) || (size != VCREATESIZE))
-               return (EINVAL);
+               if ((flavor != VNCREATE_FLAVOR) || (size != VCREATESIZE)) {
+                       error = EINVAL;
+                       goto error_out;
+               }
+       }
 
-       if ( (error = new_vnode(&vp)) )
-               return(error);
+       if (!existing_vnode) {
+               if ((error = new_vnode(&vp)) ) {
+                       return (error);
+               }
+               if (!init_vnode) {
+                       /* Make it so that it can be released by a vnode_put) */
+                       vn_set_dead(vp);
+                       *vpp = vp;
+                       return (0);
+               }
+       } else {
+               /*
+                * A vnode obtained by vnode_create_empty has been passed to
+                * vnode_initialize - Unset VL_DEAD set by vn_set_dead. After
+                * this point, it is set back on any error.
+                *
+                * N.B. vnode locking - We make the same assumptions as the
+                * "unsplit" vnode_create did - i.e. it is safe to update the
+                * vnode's fields without the vnode lock. This vnode has been
+                * out and about with the filesystem and hopefully nothing
+                * was done to the vnode between the vnode_create_empty and
+                * now when it has come in through vnode_initialize.
+                */
+               vp->v_lflag &= ~VL_DEAD;
+       }
 
        dvp = param->vnfs_dvp;
        cnp = param->vnfs_cnp;
@@ -4702,12 +4862,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
 #ifdef JOE_DEBUG
                        record_vp(vp, 1);
 #endif
-                       vp->v_mount = NULL;
-                       vp->v_op = dead_vnodeop_p;
-                       vp->v_tag = VT_NON;
-                       vp->v_data = NULL;
-                       vp->v_type = VBAD;
-                       vp->v_lflag |= VL_DEAD;
+                       vn_set_dead(vp);
 
                        vnode_put(vp);
                        return(error);
@@ -4735,12 +4890,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
                error = vnode_resolver_create(param->vnfs_mp, vp, tinfo, FALSE);
                if (error) {
                        printf("vnode_create: vnode_resolver_create() err %d\n", error);
-                       vp->v_mount = NULL;
-                       vp->v_op = dead_vnodeop_p;
-                       vp->v_tag = VT_NON;
-                       vp->v_data = NULL;
-                       vp->v_type = VBAD;
-                       vp->v_lflag |= VL_DEAD;
+                       vn_set_dead(vp);
 #ifdef JOE_DEBUG
                        record_vp(vp, 1);
 #endif
@@ -4862,6 +5012,58 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
                vp->v_flag |= VRAGE;
        }
        return (0);
+
+error_out:
+       if (existing_vnode) {
+               vnode_put(vp);
+       }
+       return (error);
+}
+
+/* USAGE:
+ * The following api creates a vnode and associates all the parameter specified in vnode_fsparam
+ * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias
+ * is obsoleted by this.
+ */
+int
+vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
+{
+       *vpp = NULLVP;
+       return (vnode_create_internal(flavor, size, data, vpp, 1));
+}
+
+int
+vnode_create_empty(vnode_t *vpp)
+{
+       *vpp = NULLVP;
+       return (vnode_create_internal(VNCREATE_FLAVOR, VCREATESIZE, NULL,
+           vpp, 0));
+}
+
+int
+vnode_initialize(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
+{
+       if (*vpp == NULLVP) {
+               panic("NULL vnode passed to vnode_initialize");
+       }
+#if DEVELOPMENT || DEBUG
+       /*
+        * We lock to check that vnode is fit for unlocked use in
+        * vnode_create_internal.
+        */
+       vnode_lock_spin(*vpp);
+       VNASSERT(((*vpp)->v_iocount == 1), *vpp,
+           ("vnode_initialize : iocount not 1, is %d", (*vpp)->v_iocount));
+       VNASSERT(((*vpp)->v_usecount == 0), *vpp,
+           ("vnode_initialize : usecount not 0, is %d", (*vpp)->v_usecount));
+       VNASSERT(((*vpp)->v_lflag & VL_DEAD), *vpp,
+           ("vnode_initialize : v_lflag does not have VL_DEAD, is 0x%x",
+           (*vpp)->v_lflag));
+       VNASSERT(((*vpp)->v_data == NULL), *vpp,
+           ("vnode_initialize : v_data not NULL"));
+       vnode_unlock(*vpp);
+#endif
+       return (vnode_create_internal(flavor, size, data, vpp, 1));
 }
 
 int
@@ -5167,6 +5369,9 @@ vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t ctx)
        if (flags & VNODE_LOOKUP_NOCROSSMOUNT)
                ndflags |= NOCROSSMOUNT;
 
+       if (flags & VNODE_LOOKUP_CROSSMOUNTNOWAIT)
+               ndflags |= CN_NBMOUNTLOOK;
+
        /* XXX AUDITVNPATH1 needed ? */
        NDINIT(&nd, LOOKUP, OP_LOOKUP, ndflags, UIO_SYSSPACE,
               CAST_USER_ADDR_T(path), ctx);
@@ -5202,6 +5407,9 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_
        if (lflags & VNODE_LOOKUP_NOCROSSMOUNT)
                ndflags |= NOCROSSMOUNT;
        
+       if (lflags & VNODE_LOOKUP_CROSSMOUNTNOWAIT)
+               ndflags |= CN_NBMOUNTLOOK;
+
        /* XXX AUDITVNPATH1 needed ? */
        NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE,
               CAST_USER_ADDR_T(path), ctx);
@@ -8065,6 +8273,20 @@ vnode_setswapmount(vnode_t vp)
 }
 
 
+int64_t
+vnode_getswappin_avail(vnode_t vp)
+{
+       int64_t max_swappin_avail = 0;
+
+       mount_lock(vp->v_mount);
+       if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_SWAPPIN_SUPPORTED)
+               max_swappin_avail = vp->v_mount->mnt_max_swappin_available;
+       mount_unlock(vp->v_mount);
+
+       return (max_swappin_avail);
+}
+
+
 void
 vn_setunionwait(vnode_t vp)
 {
@@ -8130,7 +8352,7 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
        if (error == EBUSY)
                *restart_flag = 1;
        if (error != 0)
-               goto outsc;
+               return (error);
 
        /*
         * set up UIO
@@ -8298,7 +8520,8 @@ outsc:
        if (open_flag)
                VNOP_CLOSE(vp, FREAD, ctx);
 
-       uio_free(auio);
+       if (auio)
+               uio_free(auio);
        FREE(rbuf, M_TEMP);
 
        vnode_resume(vp);
@@ -8320,10 +8543,9 @@ lock_vnode_and_post(vnode_t vp, int kevent_num)
        }
 }
 
-
-#ifdef PANIC_PRINTS_VNODES
-
 void panic_print_vnodes(void);
+/* define PANIC_PRINTS_VNODES only if investigation is required. */
+#ifdef PANIC_PRINTS_VNODES
 
 static const char *__vtype(uint16_t vtype)
 {
@@ -8360,7 +8582,8 @@ static const char *__vtype(uint16_t vtype)
 static char *__vpath(vnode_t vp, char *str, int len, int depth)
 {
        int vnm_len;
-       char *dst, *src;
+       const char *src;
+       char *dst;
 
        if (len <= 0)
                return str;
@@ -8371,15 +8594,13 @@ static char *__vpath(vnode_t vp, char *str, int len, int depth)
        /* follow mount vnodes to get the full path */
        if ((vp->v_flag & VROOT)) {
                if (vp->v_mount != NULL && vp->v_mount->mnt_vnodecovered) {
-                       if (len < 1)
-                               return str + len;
                        return __vpath(vp->v_mount->mnt_vnodecovered,
                                       str, len, depth+1);
                }
                return str + len;
        }
 
-       src = (char *)vp->v_name;
+       src = vp->v_name;
        vnm_len = strlen(src);
        if (vnm_len > len) {
                /* truncate the name to fit in the string */
index e2b135a7bfc2f9d567bbae06f35ee652c6ceff46..a949a717d8753b67a5fdf51f7c589e42883feeec 100644 (file)
@@ -99,6 +99,7 @@
 #include <sys/fsctl.h>
 #include <sys/ubc_internal.h>
 #include <sys/disk.h>
+#include <sys/content_protection.h>
 #include <machine/cons.h>
 #include <machine/limits.h>
 #include <miscfs/specfs/specdev.h>
 
 #include <libkern/OSAtomic.h>
 #include <pexpert/pexpert.h>
+#include <IOKit/IOBSD.h>
 
 #if CONFIG_MACF
 #include <security/mac.h>
@@ -738,7 +740,7 @@ update:
                        if ( (error = namei(&nd)) )
                                goto out1;
 
-                       strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+                       strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
                        devvp = nd.ni_vp;
 
                        nameidone(&nd);
@@ -989,6 +991,8 @@ update:
 
                /* Now that mount is setup, notify the listeners */
                vfs_notify_mount(pvp);
+               IOBSDMountChange(mp, kIOMountChangeMount);
+
        } else {
                /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
                if (mp->mnt_vnodelist.tqh_first != NULL) {
@@ -1524,8 +1528,8 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
 
        placed = TRUE;
 
-       strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
-       strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+       strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
+       strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
 
        /* Forbid future moves */
        mount_lock(mp);
@@ -1550,7 +1554,7 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
 
        return 0;
 out3:
-       strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
+       strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
 
        mount_lock(mp);
        mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
@@ -1920,6 +1924,8 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
                }
        }
 
+       IOBSDMountChange(mp, kIOMountChangeUnmount);
+
 #if CONFIG_TRIGGERS
        vfs_nested_trigger_unmounts(mp, flags, ctx);
        did_vflush = 1;
@@ -3295,15 +3301,16 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
        int flags, oflags;
        int type, indx, error;
        struct flock lf;
-       int no_controlling_tty = 0;
-       int deny_controlling_tty = 0;
-       struct session *sessp = SESSION_NULL;
+       struct vfs_context context;
 
        oflags = uflags;
 
        if ((oflags & O_ACCMODE) == O_ACCMODE)
                return(EINVAL);
+
        flags = FFLAGS(uflags);
+       CLR(flags, FENCRYPTED);
+       CLR(flags, FUNENCRYPTED);
 
        AUDIT_ARG(fflags, oflags);
        AUDIT_ARG(mode, vap->va_mode);
@@ -3314,68 +3321,26 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
        }
        uu->uu_dupfd = -indx - 1;
 
-       if (!(p->p_flag & P_CONTROLT)) {
-               sessp = proc_session(p);
-               no_controlling_tty = 1;
-               /*
-                * If conditions would warrant getting a controlling tty if
-                * the device being opened is a tty (see ttyopen in tty.c),
-                * but the open flags deny it, set a flag in the session to
-                * prevent it.
-                */
-               if (SESS_LEADER(p, sessp) &&
-                   sessp->s_ttyvp == NULL &&
-                   (flags & O_NOCTTY)) {
-                       session_lock(sessp);
-                       sessp->s_flags |= S_NOCTTY;
-                       session_unlock(sessp);
-                       deny_controlling_tty = 1;
-               }
-       }
-
        if ((error = vn_open_auth(ndp, &flags, vap))) {
                if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
                        if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
                                fp_drop(p, indx, NULL, 0);
                                *retval = indx;
-                               if (deny_controlling_tty) {
-                                       session_lock(sessp);
-                                       sessp->s_flags &= ~S_NOCTTY;
-                                       session_unlock(sessp);
-                               }
-                               if (sessp != SESSION_NULL)
-                                       session_rele(sessp);
                                return (0);
                        }
                }
                if (error == ERESTART)
                        error = EINTR;
                fp_free(p, indx, fp);
-
-               if (deny_controlling_tty) {
-                       session_lock(sessp);
-                       sessp->s_flags &= ~S_NOCTTY;
-                       session_unlock(sessp);
-               }
-               if (sessp != SESSION_NULL)
-                       session_rele(sessp);
                return (error);
        }
        uu->uu_dupfd = 0;
        vp = ndp->ni_vp;
 
-       fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
+       fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
        fp->f_fglob->fg_ops = &vnops;
        fp->f_fglob->fg_data = (caddr_t)vp;
 
-#if CONFIG_PROTECT
-       if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
-               if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
-                       fp->f_fglob->fg_flag |= FENCRYPTED;
-               }
-       }
-#endif
-
        if (flags & (O_EXLOCK | O_SHLOCK)) {
                lf.l_whence = SEEK_SET;
                lf.l_start = 0;
@@ -3402,33 +3367,6 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
        if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
                goto bad;
 
-       /*
-        * If the open flags denied the acquisition of a controlling tty,
-        * clear the flag in the session structure that prevented the lower
-        * level code from assigning one.
-        */
-       if (deny_controlling_tty) {
-               session_lock(sessp);
-               sessp->s_flags &= ~S_NOCTTY;
-               session_unlock(sessp);
-       }
-
-       /*
-        * If a controlling tty was set by the tty line discipline, then we
-        * want to set the vp of the tty into the session structure.  We have
-        * a race here because we can't get to the vp for the tp in ttyopen,
-        * because it's not passed as a parameter in the open path.
-        */
-       if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
-               vnode_t ttyvp;
-
-               session_lock(sessp);
-               ttyvp = sessp->s_ttyvp;
-               sessp->s_ttyvp = vp;
-               sessp->s_ttyvid = vnode_vid(vp);
-               session_unlock(sessp);
-       }
-
        /*
         * For directories we hold some additional information in the fd.
         */
@@ -3440,6 +3378,18 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
 
        vnode_put(vp);
 
+       /*
+        * The first terminal open (without a O_NOCTTY) by a session leader
+        * results in it being set as the controlling terminal.
+        */
+       if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
+           !(flags & O_NOCTTY)) {
+               int tmp = 0;
+
+               (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
+                   (caddr_t)&tmp, ctx);
+       }
+
        proc_fdlock(p);
        if (flags & O_CLOEXEC)
                *fdflags(p, indx) |= UF_EXCLOSE;
@@ -3451,19 +3401,9 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
 
        *retval = indx;
 
-       if (sessp != SESSION_NULL)
-               session_rele(sessp);
        return (0);
 bad:
-       if (deny_controlling_tty) {
-               session_lock(sessp);
-               sessp->s_flags &= ~S_NOCTTY;
-               session_unlock(sessp);
-       }
-       if (sessp != SESSION_NULL)
-               session_rele(sessp);
-
-       struct vfs_context context = *vfs_context_current();
+       context = *vfs_context_current();
        context.vc_ucred = fp->f_fglob->fg_cred;
     
        if ((fp->f_fglob->fg_flag & FHASLOCK) &&
@@ -3629,15 +3569,27 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap,
         * 2. set a flag to mark it as requiring open-raw-encrypted semantics. 
         */ 
        if (flags & O_CREAT) {  
-               VATTR_SET(&va, va_dataprotect_class, class);
+               /* lower level kernel code validates that the class is valid before applying it. */
+               if (class != PROTECTION_CLASS_DEFAULT) {
+                       /*
+                        * PROTECTION_CLASS_DEFAULT implies that we make the class for this
+                        * file behave the same as open (2)
+                        */
+                       VATTR_SET(&va, va_dataprotect_class, class);
+               }
        }
        
-       if (dpflags & O_DP_GETRAWENCRYPTED) {
+       if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
                if ( flags & (O_RDWR | O_WRONLY)) {
                        /* Not allowed to write raw encrypted bytes */
                        return EINVAL;          
                }                       
-               VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+               if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
+                   VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+               }
+               if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
+                   VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
+               }
        }
 
        error = open1(vfs_context_current(), &nd, uap->flags, &va,
@@ -3816,9 +3768,6 @@ mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
        }
 
        switch (uap->mode & S_IFMT) {
-       case S_IFMT:    /* used by badsect to flag bad sectors */
-               VATTR_SET(&va, va_type, VBAD);
-               break;
        case S_IFCHR:
                VATTR_SET(&va, va_type, VCHR);
                break;
@@ -4353,17 +4302,18 @@ symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
                error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
 
 #if CONFIG_MACF
-       if (error == 0)
+       if (error == 0 && vp)
                error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
 #endif
 
        /* do fallback attribute handling */
-       if (error == 0)
+       if (error == 0 && vp)
                error = vnode_setattr_fallback(vp, &va, ctx);
 
        if (error == 0) {
                int     update_flags = 0;
 
+               /*check if a new vnode was created, else try to get one*/
                if (vp == NULL) {
                        nd.ni_cnd.cn_nameiop = LOOKUP;
 #if CONFIG_TRIGGERS
@@ -4544,10 +4494,12 @@ lookup_continue:
                if (!batched) {
                        error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
                        if (error) {
-                               if (error == ENOENT &&
-                                   retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
-                                       do_retry = 1;
-                                       retry_count++;
+                               if (error == ENOENT) {
+                                       assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+                                       if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+                                               do_retry = 1;
+                                               retry_count++;
+                                       }
                                }
                                goto out;
                        }
@@ -4612,16 +4564,18 @@ lookup_continue:
                                goto out;
                        }
                        goto lookup_continue;
-               } else if (error == ENOENT && batched &&
-                   retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
-                       /*
-                        * For compound VNOPs, the authorization callback may
-                        * return ENOENT in case of racing hardlink lookups
-                        * hitting the name  cache, redrive the lookup.
-                        */
-                       do_retry = 1;
-                       retry_count += 1;
-                       goto out;
+               } else if (error == ENOENT && batched) {
+                       assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+                       if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+                               /*
+                                * For compound VNOPs, the authorization callback may
+                                * return ENOENT in case of racing hardlink lookups
+                                * hitting the name  cache, redrive the lookup.
+                                */
+                               do_retry = 1;
+                               retry_count += 1;
+                               goto out;
+                       }
                }
        }
 
@@ -6713,15 +6667,17 @@ continue_lookup:
        if (!batched) {
                error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
                if (error) {
-                       if (error == ENOENT &&
-                           retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
-                               /*
-                                * We encountered a race where after doing the namei, tvp stops
-                                * being valid. If so, simply re-drive the rename call from the
-                                * top.
-                                */
-                               do_retry = 1;
-                               retry_count += 1;
+                       if (error == ENOENT) {
+                               assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+                               if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+                                       /*
+                                        * We encountered a race where after doing the namei, tvp stops
+                                        * being valid. If so, simply re-drive the rename call from the
+                                        * top.
+                                        */
+                                       do_retry = 1;
+                                       retry_count += 1;
+                               }
                        }
                        goto out1;
                }
@@ -6994,10 +6950,12 @@ skipped_lookup:
                 * ENOENT in case of racing hardlink lookups hitting the name
                 * cache, redrive the lookup.
                 */
-               if (batched && error == ENOENT &&
-                   retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
-                       do_retry = 1;
-                       retry_count += 1;
+               if (batched && error == ENOENT) {
+                       assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+                       if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+                               do_retry = 1;
+                               retry_count += 1;
+                       }
                }
 
                goto out1;
@@ -7424,10 +7382,12 @@ continue_lookup:
                        if (!batched) {
                                error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
                                if (error) {
-                                       if (error == ENOENT &&
-                                           restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
-                                               restart_flag = 1;
-                                               restart_count += 1;
+                                       if (error == ENOENT) {
+                                               assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+                                               if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+                                                       restart_flag = 1;
+                                                       restart_count += 1;
+                                               }
                                        }
                                        goto out;
                                }
@@ -7484,16 +7444,18 @@ continue_lookup:
 
                if (error == EKEEPLOOKING) {
                        goto continue_lookup;
-               } else if (batched && error == ENOENT &&
-                   restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
-                       /*
-                        * For compound VNOPs, the authorization callback
-                        * may return ENOENT in case of racing hard link lookups
-                        * redrive the lookup.
-                        */
-                       restart_flag = 1;
-                       restart_count += 1;
-                       goto out;
+               } else if (batched && error == ENOENT) {
+                       assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+                       if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+                               /*
+                                * For compound VNOPs, the authorization callback
+                                * may return ENOENT in case of racing hard link lookups
+                                * redrive the lookup.
+                                */
+                               restart_flag = 1;
+                               restart_count += 1;
+                               goto out;
+                       }
                }
 #if CONFIG_APPLEDOUBLE
                /*
@@ -9308,11 +9270,12 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long
 
        memp = NULL;
 
+
        /*
         * ensure the buffer is large enough for underlying calls
         */
 #ifndef HFSIOC_GETPATH
-typedef char pn_t[MAXPATHLEN];
+       typedef char pn_t[MAXPATHLEN];
 #define HFSIOC_GETPATH  _IOWR('h', 13, pn_t)
 #endif
 
@@ -9324,7 +9287,6 @@ typedef char pn_t[MAXPATHLEN];
                size = MAXPATHLEN;
        }
 
-
        if (size > sizeof (stkbuf)) {
                if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
                data = memp;
@@ -9719,27 +9681,30 @@ ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
        
        /* Get the vnode for the file we are getting info on:  */
        if ((error = file_vnode(uap->fd, &vp)))
-               goto done;
+               return error;
        fd = uap->fd;
        if ((error = vnode_getwithref(vp))) {
-               goto done;
+               file_drop(fd);
+               return error;
        }
 
 #if CONFIG_MACF
-       error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
-       if (error) {
-               goto done;
+       if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
+               file_drop(fd);
+               vnode_put(vp);
+               return error;
        }
 #endif
 
        error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
 
-done:
-       if (fd != -1)
-               file_drop(fd);
+       file_drop(fd);
 
-       if (vp)
+       /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
+       if (vp) {
                vnode_put(vp);
+       }
+
        return error;
 }
 /* end of fsctl system call */
@@ -9959,7 +9924,12 @@ fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
                return (EINVAL);
 
        if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
-               return (error);
+               if (error == EPERM) {
+                       /* if the string won't fit in attrname, copyinstr emits EPERM */
+                       return (ENAMETOOLONG);
+               }
+               /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
+               return error;
        }
        if (xattr_protected(attrname))
                return(EPERM);
index f785b0d8c017f3f1534e795cdb25978a8897011e..8639edc99c5e992abd418e6fd02a0f0a56ae637e 100644 (file)
 
 /* Surrogate Pair Constants */
 #define SP_HALF_SHIFT  10
-#define SP_HALF_BASE   0x0010000UL
-#define SP_HALF_MASK   0x3FFUL
+#define SP_HALF_BASE   0x0010000u
+#define SP_HALF_MASK   0x3FFu
 
-#define SP_HIGH_FIRST  0xD800UL
-#define SP_HIGH_LAST   0xDBFFUL
-#define SP_LOW_FIRST   0xDC00UL
-#define SP_LOW_LAST    0xDFFFUL
+#define SP_HIGH_FIRST  0xD800u
+#define SP_HIGH_LAST   0xDBFFu
+#define SP_LOW_FIRST   0xDC00u
+#define SP_LOW_LAST            0xDFFFu
 
 
 #include "vfs_utfconvdata.h"
@@ -148,7 +148,7 @@ static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
 
 static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
 
-static void priortysort(u_int16_t* characters, int count);
+static void prioritysort(u_int16_t* characters, int count);
 
 static u_int16_t  ucs_to_sfm(u_int16_t ucs_ch, int lastchar);
 
@@ -196,7 +196,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, int fl
        u_int16_t * chp = NULL;
        u_int16_t sequence[8];
        int extra = 0;
-       int charcnt;
+       size_t charcnt;
        int swapbytes = (flags & UTF_REVERSE_ENDIAN);
        int decompose = (flags & UTF_DECOMPOSED);
        size_t len;
@@ -266,7 +266,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
        u_int16_t * chp = NULL;
        u_int16_t sequence[8];
        int extra = 0;
-       int charcnt;
+       size_t charcnt;
        int swapbytes = (flags & UTF_REVERSE_ENDIAN);
        int nullterm  = ((flags & UTF_NO_NULL_TERM) == 0);
        int decompose = (flags & UTF_DECOMPOSED);
@@ -378,6 +378,23 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
        return (result);
 }
 
+// Pushes a character taking account of combining character sequences
+static void push(uint16_t ucs_ch, int *combcharcnt, uint16_t **ucsp)
+{
+       /*
+        * Make multiple combining character sequences canonical
+        */
+       if (unicode_combinable(ucs_ch)) {
+               ++*combcharcnt;         /* start tracking a run */
+       } else if (*combcharcnt) {
+               if (*combcharcnt > 1) {
+                       prioritysort(*ucsp - *combcharcnt, *combcharcnt);
+               }
+               *combcharcnt = 0;       /* start over */
+       }
+
+       *(*ucsp)++ = ucs_ch;
+}
 
 /*
  * utf8_decodestr - Decodes a UTF-8 string back to Unicode
@@ -417,13 +434,12 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
        unsigned int byte;
        int combcharcnt = 0;
        int result = 0;
-       int decompose, precompose, swapbytes, escaping;
+       int decompose, precompose, escaping;
        int sfmconv;
        int extrabytes;
 
        decompose  = (flags & UTF_DECOMPOSED);
        precompose = (flags & UTF_PRECOMPOSED);
-       swapbytes  = (flags & UTF_REVERSE_ENDIAN);
        escaping   = (flags & UTF_ESCAPE_ILLEGAL);
        sfmconv    = (flags & UTF_SFM_CONVERSIONS);
 
@@ -497,7 +513,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
                                ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
                                if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
                                        goto escape4;
-                               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
+                               push(ucs_ch, &combcharcnt, &ucsp);
                                if (ucsp >= bufend)
                                        goto toolong;
                                ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
@@ -505,7 +521,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
                                        --ucsp;
                                        goto escape4;
                                }
-                               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
+                               *ucsp++ = ucs_ch;
                                continue;
                        default:
                                result = EINVAL;
@@ -516,30 +532,22 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
                                        u_int16_t sequence[8];
                                        int count, i;
 
-                                       /* Before decomposing a new unicode character, sort 
-                                        * previous combining characters, if any, and reset
-                                        * the counter.
-                                        */
-                                       if (combcharcnt > 1) {
-                                               priortysort(ucsp - combcharcnt, combcharcnt);
-                                       }
-                                       combcharcnt = 0;
-
                                        count = unicode_decompose(ucs_ch, sequence);
+
                                        for (i = 0; i < count; ++i) {
-                                               ucs_ch = sequence[i];
-                                               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
                                                if (ucsp >= bufend)
                                                        goto toolong;
+
+                                               push(sequence[i], &combcharcnt, &ucsp);
                                        }
-                                       combcharcnt += count - 1;
-                                       continue;                       
+
+                                       continue;
                                }
                        } else if (precompose && (ucsp != bufstart)) {
                                u_int16_t composite, base;
 
                                if (unicode_combinable(ucs_ch)) {
-                                       base = swapbytes ? OSSwapInt16(*(ucsp - 1)) : *(ucsp - 1);
+                                       base = ucsp[-1];
                                        composite = unicode_combine(base, ucs_ch);
                                        if (composite) {
                                                --ucsp;
@@ -553,19 +561,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
                if (ucs_ch == altslash)
                        ucs_ch = '/';
 
-               /*
-                * Make multiple combining character sequences canonical
-                */
-               if (unicode_combinable(ucs_ch)) {
-                       ++combcharcnt;   /* start tracking a run */
-               } else if (combcharcnt) {
-                       if (combcharcnt > 1) {
-                               priortysort(ucsp - combcharcnt, combcharcnt);
-                       }
-                       combcharcnt = 0;  /* start over */
-               }
-
-               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
+               push(ucs_ch, &combcharcnt, &ucsp);
                continue;
 
                /* 
@@ -593,23 +589,32 @@ escape:
 
                /* Make a previous combining sequence canonical. */
                if (combcharcnt > 1) {
-                       priortysort(ucsp - combcharcnt, combcharcnt);
+                       prioritysort(ucsp - combcharcnt, combcharcnt);
                }
                combcharcnt = 0;
                
                ucs_ch = '%';
-               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
+               *ucsp++ = ucs_ch;
                ucs_ch =  hexdigits[byte >> 4];
-               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
+               *ucsp++ = ucs_ch;
                ucs_ch =  hexdigits[byte & 0x0F];
-               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch;
+               *ucsp++ = ucs_ch;
        }
        /*
         * Make a previous combining sequence canonical
         */
        if (combcharcnt > 1) {
-               priortysort(ucsp - combcharcnt, combcharcnt);
+               prioritysort(ucsp - combcharcnt, combcharcnt);
        }
+
+       if (flags & UTF_REVERSE_ENDIAN) {
+               uint16_t *p = bufstart;
+               while (p < ucsp) {
+                       *p = OSSwapInt16(*p);
+                       ++p;
+               }
+       }
+
 exit:
        *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;
 
@@ -804,7 +809,7 @@ nonASCII:
        if (unicode_bytes <= sizeof(unicodebuf))
                unistr = &unicodebuf[0];
        else
-               MALLOC(unistr, u_int16_t *, unicode_bytes, M_TEMP, M_WAITOK);
+               MALLOC(unistr, uint16_t *, unicode_bytes, M_TEMP, M_WAITOK);
 
        /* Normalize the string. */
        result = utf8_decodestr(inbufstart, inbuflen, unistr, &unicode_bytes,
@@ -1014,12 +1019,12 @@ unicode_combine(u_int16_t base, u_int16_t combining)
 
 
 /*
- * priortysort - order combining chars into canonical order
+ * prioritysort - order combining chars into canonical order
  *
  * Similar to CFUniCharPrioritySort
  */
 static void
-priortysort(u_int16_t* characters, int count)
+prioritysort(u_int16_t* characters, int count)
 {
        u_int32_t p1, p2;
        u_int16_t *ch1, *ch2;
index 9b431080fc494d6bc088d6589a377615b9260b0f..ca14ddec6c884d23b67135ad9935beabc98c1d5d 100644 (file)
@@ -115,7 +115,7 @@ int ubc_setcred(struct vnode *, struct proc *);
 #include <sys/cprotect.h>
 #endif
 
-extern void    sigpup_attach_vnode(vnode_t); /* XXX */
+#include <IOKit/IOBSD.h>
 
 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
@@ -195,8 +195,6 @@ vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
        kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 
                                                   (uintptr_t)vp, 0);
 
-       sigpup_attach_vnode(vp);
-
        return 0;
 
 bad:
@@ -308,6 +306,12 @@ out:
        return error;
 }
 
+/*
+ * This is the number of times we'll loop in vn_open_auth without explicitly
+ * yielding the CPU when we determine we have to retry.
+ */
+#define RETRY_NO_YIELD_COUNT   5
+
 /*
  * Open a file with authorization, updating the contents of the structures
  * pointed to by ndp, fmodep, and vap as necessary to perform the requested
@@ -367,6 +371,7 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
        boolean_t need_vnop_open;
        boolean_t batched;
        boolean_t ref_failed;
+       int nretries = 0;
 
 again:
        vp = NULL;
@@ -446,10 +451,9 @@ continue_create_lookup:
 
                        if (error) {
                                /*
-                                * Check for a creation or unlink race.
+                                * Check for a create race.
                                 */
-                               if (((error == EEXIST) && !(fmode & O_EXCL)) ||
-                                               ((error == ENOENT) && (fmode & O_CREAT))){
+                               if ((error == EEXIST) && !(fmode & O_EXCL)){
                                        if (vp) 
                                                vnode_put(vp);
                                        goto again;
@@ -571,21 +575,32 @@ continue_create_lookup:
                }
 
 #if CONFIG_PROTECT
-               /* 
-                * Perform any content protection access checks prior to calling 
-                * into the filesystem, if the raw encrypted mode was not 
-                * requested.  
-                * 
-                * If the va_dataprotect_flags are NOT active, or if they are,
-                * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need 
-                * to perform the checks.
-                */
-               if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
-                               ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
-                       error = cp_handle_open (vp, fmode);     
-                       if (error) {
+               // If raw encrypted mode is requested, handle that here
+               if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)
+                       && ISSET(vap->va_dataprotect_flags, VA_DP_RAWENCRYPTED)) {
+                       fmode |= FENCRYPTED;
+               }
+               if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)
+                       && ISSET(vap->va_dataprotect_flags, VA_DP_RAWUNENCRYPTED)) {
+                       /* Don't allow unencrypted io request from user space unless entitled */
+                       boolean_t entitled = FALSE;
+#if !SECURE_KERNEL
+                       entitled = IOTaskHasEntitlement(current_task(), "com.apple.private.security.file-unencrypt-access");
+#endif
+                       if (!entitled) {
+                               error = EPERM;
                                goto bad;
                        }
+                       fmode |= FUNENCRYPTED;
+               }
+
+               /*
+                * Perform any content protection access checks prior to calling 
+                * into the filesystem.
+                */
+               error = cp_handle_open (vp, fmode);
+               if (error) {
+                       goto bad;
                }
 #endif
 
@@ -649,6 +664,27 @@ bad:
                 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
                 */
                if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
+                       /*
+                        * We'll retry here but it may be possible that we get
+                        * into a retry "spin" inside the kernel and not allow
+                        * threads, which need to run in order for the retry
+                        * loop to end, to run. An example is an open of a
+                        * terminal which is getting revoked and we spin here
+                        * without yielding becasue namei and VNOP_OPEN are
+                        * successful but vnode_ref fails. The revoke needs
+                        * threads with an iocount to run but if spin here we
+                        * may possibly be blcoking other threads from running.
+                        *
+                        * We start yielding the CPU after some number of
+                        * retries for increasing durations. Note that this is
+                        * still a loop without an exit condition.
+                        */
+                       nretries += 1;
+                       if (nretries > RETRY_NO_YIELD_COUNT) {
+                               /* Every hz/100 secs is 10 msecs ... */
+                               tsleep(&nretries, PVFS, "vn_open_auth_retry",
+                                   MIN((nretries * (hz/100)), hz));
+                       }
                        goto again;
                }
        }
@@ -968,6 +1004,12 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
        if (fp->f_fglob->fg_flag & FENCRYPTED) {
                ioflag |= IO_ENCRYPTED;
        }
+       if (fp->f_fglob->fg_flag & FUNENCRYPTED) {
+               ioflag |= IO_SKIP_ENCRYPTION;
+       }
+       if (fp->f_fglob->fg_flag & O_EVTONLY) {
+               ioflag |= IO_EVTONLY;
+       }
        if (fp->f_fglob->fg_flag & FNORDAHEAD)
            ioflag |= IO_RAOFF;
 
@@ -980,7 +1022,7 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
        }
        count = uio_resid(uio);
 
-       if (vnode_isswap(vp)) {
+       if (vnode_isswap(vp) && !(IO_SKIP_ENCRYPTION & ioflag)) {
                /* special case for swap files */
                error = vn_read_swapfile(vp, uio);
        } else {
@@ -1044,6 +1086,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
                ioflag |= IO_NODIRECT;
        if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
                ioflag |= IO_SINGLE_WRITER;
+       if (fp->f_fglob->fg_flag & O_EVTONLY)
+               ioflag |= IO_EVTONLY;
 
        /*
         * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
@@ -1254,8 +1298,11 @@ vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat6
                sb64->st_atimespec = va.va_access_time;
                sb64->st_mtimespec = va.va_modify_time;
                sb64->st_ctimespec = va.va_change_time;
-               sb64->st_birthtimespec = 
-                               VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
+               if (VATTR_IS_SUPPORTED(&va, va_create_time)) {
+                       sb64->st_birthtimespec =  va.va_create_time;
+               } else {
+                       sb64->st_birthtimespec.tv_sec = sb64->st_birthtimespec.tv_nsec = 0;
+               }
                sb64->st_blksize = va.va_iosize;
                sb64->st_flags = va.va_flags;
                sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
@@ -1476,26 +1523,32 @@ vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
 static int
 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
 {
-       struct vnode *vp = (struct vnode *)fg->fg_data;
+       struct vnode *vp = fg->fg_data;
        int error;
-       struct flock lf;
 
        if ( (error = vnode_getwithref(vp)) == 0 ) {
-
-               if ((fg->fg_flag & FHASLOCK) &&
-                   FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
-                       lf.l_whence = SEEK_SET;
-                       lf.l_start = 0;
-                       lf.l_len = 0;
-                       lf.l_type = F_UNLCK;
-
-                       (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
+               if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE &&
+                   ((fg->fg_flag & FHASLOCK) != 0 || 
+                   (fg->fg_lflags & FG_HAS_OFDLOCK) != 0)) {
+                       struct flock lf = {
+                               .l_whence = SEEK_SET,
+                               .l_start = 0,
+                               .l_len = 0,
+                               .l_type = F_UNLCK
+                       };
+
+                       if ((fg->fg_flag & FHASLOCK) != 0)
+                               (void) VNOP_ADVLOCK(vp, (caddr_t)fg,
+                                   F_UNLCK, &lf, F_FLOCK, ctx, NULL);
+
+                       if ((fg->fg_lflags & FG_HAS_OFDLOCK) != 0)
+                               (void) VNOP_ADVLOCK(vp, (caddr_t)fg,
+                                   F_UNLCK, &lf, F_OFD_LOCK, ctx, NULL);
                }
                error = vn_close(vp, fg->fg_flag, ctx);
-
-               (void)vnode_put(vp);
+               (void) vnode_put(vp);
        }
-       return(error);
+       return (error);
 }
 
 /*
index a6fc322516fa6c90aad8207682a2de7f14c242d5..be1898b454b5e38b9b6cd5859061783c99c8f884 100644 (file)
@@ -598,7 +598,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context)
        }
 
        iosize = bufsize = MIN(datasize, NS_IOBUFSIZE);
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize, VM_KERN_MEMORY_FILE)) {
                return (ENOMEM);
        }
        auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
@@ -941,7 +941,7 @@ retry:
                size_t  iosize;
 
                iosize = bufsize = MIN(datasize, NS_IOBUFSIZE);
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize)) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize, VM_KERN_MEMORY_FILE)) {
                        error = ENOMEM;
                        goto out;
                }
@@ -3215,7 +3215,7 @@ shift_data_down(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t
        }
        orig_chunk = chunk;
 
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk, VM_KERN_MEMORY_FILE)) {
                return ENOMEM;
        }
 
@@ -3270,7 +3270,7 @@ shift_data_up(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t c
        orig_chunk = chunk;
        end = start + len;
 
-       if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk)) {
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk, VM_KERN_MEMORY_FILE)) {
                return ENOMEM;
        }
 
index 31b87e8a0caa7cea44c6fee8a3fd3ce1b42a91c9..e17287ff74e9cadba49a580d23aa67cf3d12e310 100644 (file)
@@ -602,7 +602,7 @@ macx_swapinfo(
        kern_return_t           kr;
 
        error = 0;
-       if (COMPRESSED_PAGER_IS_ACTIVE) {
+       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
 
                if (vm_swap_up == TRUE) {
 
index 9a663e9e848e7e5f821f778cff6dec569a1afba1..7ec5873dbc65a598ff8c745f2ce5da291f52b5fe 100644 (file)
 #include <sys/disk.h>
 #include <vm/vm_protos.h>
 #include <vm/vm_pageout.h>
+#include <hfs/hfs.h>
 
 void vm_swapfile_open(const char *path, vnode_t *vp);
 void vm_swapfile_close(uint64_t path, vnode_t vp);
-int vm_swapfile_preallocate(vnode_t vp, uint64_t *size);
+int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin);
 uint64_t vm_swapfile_get_blksize(vnode_t vp);
 uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
 int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags);
+int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size);
+
 
 void
 vm_swapfile_open(const char *path, vnode_t *vp)
@@ -96,7 +99,7 @@ vm_swapfile_close(uint64_t path_addr, vnode_t vp)
 }
 
 int
-vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
+vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
 {
        int             error = 0;
        uint64_t        file_size = 0;
@@ -126,7 +129,6 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
                }
        }
 #endif
-
        error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
 
        if (error) {
@@ -138,10 +140,24 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
 
        if (error) {
                printf("vnode_size (new file) for swap file failed: %d\n", error);
+               goto done;
        }       
-
        assert(file_size == *size);
        
+       if (pin != NULL && *pin != FALSE) {
+
+               assert(vnode_tag(vp) == VT_HFS);
+
+               error = hfs_pin_vnode(VTOHFS(vp), vp, HFS_PIN_IT | HFS_DATALESS_PIN, NULL, ctx);
+
+               if (error) {
+                       printf("hfs_pin_vnode for swap files failed: %d\n", error);
+                       /* this is not fatal, carry on with files wherever they landed */
+                       *pin = FALSE;
+                       error = 0;
+               }
+       }
+
        vnode_lock_spin(vp);
        SET(vp->v_flag, VSWAP);
        vnode_unlock(vp);
@@ -149,6 +165,23 @@ done:
        return error;
 }
 
+
+int
+vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
+{
+       int error = 0;
+       vfs_context_t ctx;
+
+       ctx = vfs_context_kernel();
+               
+       error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
+               UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+
+       return (error);
+}
+
+
+
 int
 vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
 {
@@ -158,10 +191,12 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag
        kern_return_t   kr = KERN_SUCCESS;
        upl_t           upl = NULL;
        unsigned int    count = 0;
-       int             upl_create_flags = 0, upl_control_flags = 0;
+       upl_control_flags_t upl_create_flags = 0;
+       int             upl_control_flags = 0;
        upl_size_t      upl_size = 0;
 
-       upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
+       upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE
+                       | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK);
 
 #if ENCRYPTED_SWAP
        upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;
index 06b5d4e1b14f48658142b053ee38500d2d291611..099a70fb612edb91629ecec103ed899c3cf2d03b 100644 (file)
 int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
 
+#if DEVELOPMENT || DEBUG
+extern int radar_20146450;
+SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
+
+extern int macho_printf;
+SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
+
+extern int apple_protect_pager_data_request_debug;
+SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
+
+#endif /* DEVELOPMENT || DEBUG */
+
 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
@@ -147,6 +159,7 @@ extern int allow_stack_exec, allow_data_exec;
 
 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
+
 #endif /* !SECURE_KERNEL */
 
 static const char *prot_values[] = {
@@ -192,9 +205,11 @@ SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED,
 static int64_t last_unnest_log_time = 0; 
 static int shared_region_unnest_log_count = 0;
 
-void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
-       struct timeval tv;
-       const char *pcommstr;
+void log_unnest_badness(
+       vm_map_t        m,
+       vm_map_offset_t s,
+       vm_map_offset_t e) {
+       struct timeval  tv;
 
        if (shared_region_unnest_logging == 0)
                return;
@@ -211,9 +226,7 @@ void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
                }
        }
 
-       pcommstr = current_proc()->p_comm;
-
-       printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
+       printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
 }
 
 int
@@ -248,7 +261,7 @@ vslock(
                                             vm_map_page_mask(map)),
                           vm_map_round_page(addr+len,
                                             vm_map_page_mask(map)), 
-                          VM_PROT_READ | VM_PROT_WRITE,
+                          VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD),
                           FALSE);
 
        switch (kret) {
@@ -947,6 +960,7 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
                }
        }
 
+
        task_reference(target);
 
 #if CONFIG_MEMORYSTATUS
@@ -1234,8 +1248,10 @@ _shared_region_map_and_slide(
        }
 
 #if CONFIG_MACF
+       /* pass in 0 for the offset argument because AMFI does not need the offset
+               of the shared cache */
        error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
-                       fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
+                       fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
        if (error) {
                goto done;
        }
@@ -1562,6 +1578,10 @@ extern unsigned int      vm_page_purgeable_wired_count;
 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
           &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
 
+extern unsigned int    vm_pageout_purged_objects;
+SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_pageout_purged_objects, 0, "System purged object count");
+
 extern int madvise_free_debug;
 SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
           &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
index 86f81524193259fee7c9293d8bdb5c0a209fcbaa..f858726ab46de544fe3ca95bc79eeadb549e5edc 100644 (file)
@@ -6,8 +6,8 @@ _VNOP_STRATEGY
 _VNOP_WRITE
 __FREE
 __FREE_ZONE
-__MALLOC
-__MALLOC_ZONE
+__MALLOC:__MALLOC_external
+__MALLOC_ZONE:__MALLOC_ZONE_external
 _advisory_read
 _advisory_read_ext
 _bcd2bin_data
@@ -468,6 +468,7 @@ _nop_write
 _nulldev
 _nullop
 _physio
+_proc_chrooted
 _proc_exiting
 _proc_find
 _proc_forcequota
@@ -650,6 +651,7 @@ _vfs_flags
 _vfs_fsadd
 _vfs_fsprivate
 _vfs_fsremove
+_vfs_get_notify_attributes
 _vfs_getnewfsid
 _vfs_getvfs
 _vfs_init_io_attributes
@@ -728,6 +730,7 @@ _vnode_iterate
 _vnode_lookup
 _vnode_mount
 _vnode_mountedhere
+_vnode_notify
 _vnode_open
 _vnode_put
 _vnode_putname
index 0a112a032f412c4fd5164bb4d95db8465fa13df2..29c12c6f5efcf09ed367863ce48ed12ecca52b7c 100644 (file)
@@ -19,7 +19,6 @@ _IOFreePageable
 _IOGetTime
 _IOIteratePageableMaps
 _IOKitBSDInit
-_IOKitResetTime
 _IOLibInit
 _IOLockAlloc
 _IOLockFree
@@ -45,11 +44,6 @@ _IOMappedWrite16
 _IOMappedWrite32
 _IOMappedWrite64
 _IOMappedWrite8
-_IOMapperIOVMAlloc
-_IOMapperIOVMFree
-_IOMapperInsertPPNPages
-_IOMapperInsertPage
-_IOMapperInsertUPLPages
 _IONDRVLibrariesInitialize
 _IONetworkNamePrefixMatching
 _IOPageableMapForAddress
@@ -82,6 +76,7 @@ _IOSimpleLockTryLock:_lck_spin_try_lock
 _IOSimpleLockUnlock:_lck_spin_unlock
 _IOSizeToAlignment
 _IOSleep
+_IOSleepWithLeeway
 _IOSystemShutdownNotification
 _IOZeroTvalspec
 _OSKernelStackRemaining
@@ -235,13 +230,18 @@ __ZN11IOResourcesD0Ev
 __ZN11IOResourcesD2Ev
 __ZN12IODMACommand10gMetaClassE
 __ZN12IODMACommand10superClassE
+__ZN12IODMACommand10withRefConEPv
 __ZN12IODMACommand10writeBytesEyPKvy
 __ZN12IODMACommand12cloneCommandEPv
 __ZN12IODMACommand12getAlignmentEv
+__ZN12IODMACommand14initWithRefConEPv
 __ZN12IODMACommand17getNumAddressBitsEv
+__ZN12IODMACommand18getAlignmentLengthEv
 __ZN12IODMACommand19setMemoryDescriptorEPK18IOMemoryDescriptorb
 __ZN12IODMACommand21clearMemoryDescriptorEb
+__ZNK12IODMACommand21getIOMemoryDescriptorEv
 __ZN12IODMACommand26getPreparedOffsetAndLengthEPyS0_
+__ZN12IODMACommand28getAlignmentInternalSegmentsEv
 __ZN12IODMACommand4freeEv
 __ZN12IODMACommand7prepareEyybb
 __ZN12IODMACommand8completeEbb
@@ -596,18 +596,6 @@ __ZN16IODMAEventSourceC2EPK11OSMetaClass
 __ZN16IODMAEventSourceC2Ev
 __ZN16IODMAEventSourceD0Ev
 __ZN16IODMAEventSourceD2Ev
-__ZN16IOKitDiagnostics10gMetaClassE
-__ZN16IOKitDiagnostics10superClassE
-__ZN16IOKitDiagnostics11diagnosticsEv
-__ZN16IOKitDiagnostics9MetaClassC1Ev
-__ZN16IOKitDiagnostics9MetaClassC2Ev
-__ZN16IOKitDiagnostics9metaClassE
-__ZN16IOKitDiagnosticsC1EPK11OSMetaClass
-__ZN16IOKitDiagnosticsC1Ev
-__ZN16IOKitDiagnosticsC2EPK11OSMetaClass
-__ZN16IOKitDiagnosticsC2Ev
-__ZN16IOKitDiagnosticsD0Ev
-__ZN16IOKitDiagnosticsD2Ev
 __ZN16IOPMinformeeList10gMetaClassE
 __ZN16IOPMinformeeList10initializeEv
 __ZN16IOPMinformeeList10nextInListEP12IOPMinformee
@@ -654,7 +642,6 @@ __ZN17IOBigMemoryCursorC2Ev
 __ZN17IOBigMemoryCursorD0Ev
 __ZN17IOBigMemoryCursorD2Ev
 __ZN17IOPolledInterface10gMetaClassE
-__ZN17IOPolledInterface15checkAllForWorkEv
 __ZN17IOPolledInterfaceC2EPK11OSMetaClass
 __ZN17IOPolledInterfaceD2Ev
 __ZN17IOPowerConnection10gMetaClassE
@@ -1012,7 +999,6 @@ __ZN8IOMapper17setMapperRequiredEb
 __ZN8IOMapper19copyMapperForDeviceEP9IOService
 __ZN8IOMapper28copyMapperForDeviceWithIndexEP9IOServicej
 __ZN8IOMapper19waitForSystemMapperEv
-__ZN8IOMapper13iovmMapMemoryEP8OSObjectjjjP13upl_page_infoPK21IODMAMapSpecification
 __ZN8IOMapper4freeEv
 __ZN8IOMapper5startEP9IOService
 __ZN8IOMapper7gSystemE
@@ -1256,9 +1242,6 @@ __ZNK15IORegistryPlane9MetaClass5allocEv
 __ZNK15IORegistryPlane9serializeEP11OSSerialize
 __ZNK16IODMAEventSource12getMetaClassEv
 __ZNK16IODMAEventSource9MetaClass5allocEv
-__ZNK16IOKitDiagnostics12getMetaClassEv
-__ZNK16IOKitDiagnostics9MetaClass5allocEv
-__ZNK16IOKitDiagnostics9serializeEP11OSSerialize
 __ZNK16IOPMinformeeList12getMetaClassEv
 __ZNK16IOPMinformeeList9MetaClass5allocEv
 __ZNK16IORangeAllocator12getMetaClassEv
@@ -1319,7 +1302,6 @@ __ZNK28IOFilterInterruptEventSource9MetaClass5allocEv
 __ZNK29IOInterleavedMemoryDescriptor12getMetaClassEv
 __ZNK29IOInterleavedMemoryDescriptor9MetaClass5allocEv
 __ZNK8IOMapper12getMetaClassEv
-__ZNK8IOMapper13getBypassMaskEPy
 __ZNK8IOMapper9MetaClass5allocEv
 __ZNK9IOCommand12getMetaClassEv
 __ZNK9IOCommand9MetaClass5allocEv
@@ -1360,7 +1342,6 @@ __ZTV15IOPMPowerSource
 __ZTV15IORegistryEntry
 __ZTV15IORegistryPlane
 __ZTV16IODMAEventSource
-__ZTV16IOKitDiagnostics
 __ZTV16IOPMinformeeList
 __ZTV16IORangeAllocator
 __ZTV17IOBigMemoryCursor
@@ -1413,7 +1394,6 @@ __ZTVN15IOPMPowerSource9MetaClassE
 __ZTVN15IORegistryEntry9MetaClassE
 __ZTVN15IORegistryPlane9MetaClassE
 __ZTVN16IODMAEventSource9MetaClassE
-__ZTVN16IOKitDiagnostics9MetaClassE
 __ZTVN16IOPMinformeeList9MetaClassE
 __ZTVN16IORangeAllocator9MetaClassE
 __ZTVN17IOBigMemoryCursor9MetaClassE
@@ -1499,6 +1479,7 @@ _gIOParentMatchKey
 _gIOPathMatchKey
 _gIOPlatformActiveActionKey
 _gIOPlatformHaltRestartActionKey
+_gIOPlatformPanicActionKey
 _gIOPlatformQuiesceActionKey
 _gIOPlatformSleepActionKey
 _gIOPlatformWakeActionKey
index f053710c8a22b605320a1bef961b3b4ca7408558..3aadfffa4c49e50222191e70107282364726c580 100644 (file)
@@ -47,12 +47,12 @@ __ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvj
 __ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvj
 __ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvjEPyS2_Pj
 __ZN12IODMACommand15genIOVMSegmentsEPyPvPj
+__ZN12IODMACommand16createCopyBufferEjy
+__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvjEPKNS_14SegmentOptionsEjP8IOMapperS2_
+__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvjEPKNS_14SegmentOptionsEjP8IOMapperS2_
+__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvjEPKNS_14SegmentOptionsEjP8IOMapperyybb
 __ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_
 __ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_
-__ZN12IODMACommand22_RESERVEDIODMACommand3Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand4Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand5Ev
-__ZN12IODMACommand22_RESERVEDIODMACommand6Ev
 __ZN12IODMACommand22_RESERVEDIODMACommand7Ev
 __ZN12IODMACommand22_RESERVEDIODMACommand8Ev
 __ZN12IODMACommand22_RESERVEDIODMACommand9Ev
@@ -179,7 +179,6 @@ __ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACom
 __ZN16IODMAEventSource15startDMACommandEP12IODMACommandjyy
 __ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandiyy
 __ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyyES8_j
-__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionaryjPKc
 __ZN16IORangeAllocator10deallocateEyy
 __ZN16IORangeAllocator12allocElementEj
 __ZN16IORangeAllocator13allocateRangeEyy
@@ -366,25 +365,22 @@ __ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptoryy
 __ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsEj
 __ZN29IOInterleavedMemoryDescriptor7prepareEj
 __ZN29IOInterleavedMemoryDescriptor8completeEj
-__ZN8IOMapper10allocTableEy
-__ZN8IOMapper10iovmInsertEjjP13upl_page_infoj
-__ZN8IOMapper10iovmInsertEjjPjj
-__ZN8IOMapper11NewARTTableEyPPvPj
-__ZN8IOMapper12FreeARTTableEP6OSDatay
+__ZN8IOMapper18_RESERVEDIOMapper0Ev
+__ZN8IOMapper18_RESERVEDIOMapper1Ev
+__ZN8IOMapper18_RESERVEDIOMapper2Ev
+__ZN8IOMapper18_RESERVEDIOMapper3Ev
 __ZN8IOMapper18_RESERVEDIOMapper4Ev
 __ZN8IOMapper18_RESERVEDIOMapper5Ev
 __ZN8IOMapper18_RESERVEDIOMapper6Ev
 __ZN8IOMapper18_RESERVEDIOMapper7Ev
 __ZN8IOMapper18_RESERVEDIOMapper8Ev
 __ZN8IOMapper18_RESERVEDIOMapper9Ev
-__ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjj
 __ZN8IOMapper19_RESERVEDIOMapper10Ev
 __ZN8IOMapper19_RESERVEDIOMapper11Ev
 __ZN8IOMapper19_RESERVEDIOMapper12Ev
 __ZN8IOMapper19_RESERVEDIOMapper13Ev
 __ZN8IOMapper19_RESERVEDIOMapper14Ev
 __ZN8IOMapper19_RESERVEDIOMapper15Ev
-__ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandj
 __ZN8IOSyncer10gMetaClassE
 __ZN8IOSyncer10superClassE
 __ZN8IOSyncer13privateSignalEv
index 04d952f8f265ab733a915d5d875677f4265e31e6..6f67de3506f952d30d4d123f0170c01d57473bb5 100644 (file)
@@ -84,6 +84,7 @@ __ZN11OSSerialize12withCapacityEj
 __ZN11OSSerialize14addXMLStartTagEPK15OSMetaClassBasePKc
 __ZN11OSSerialize14ensureCapacityEj
 __ZN11OSSerialize16initWithCapacityEj
+__ZN11OSSerialize18binaryWithCapacityEjPvS0_:__ZN11OSSerialize18binaryWithCapacityEjPFPK15OSMetaClassBasePvPS_P12OSCollectionPK8OSSymbolS2_ES3_
 __ZN11OSSerialize20previouslySerializedEPK15OSMetaClassBase
 __ZN11OSSerialize20setCapacityIncrementEj
 __ZN11OSSerialize4freeEv
@@ -390,6 +391,7 @@ __ZN9OSBooleanD2Ev
 __ZNK10OSIterator12getMetaClassEv
 __ZNK10OSIterator9MetaClass5allocEv
 __ZNK11OSMetaClass12getClassNameEv
+__ZNK11OSMetaClass18getClassNameSymbolEv
 __ZNK11OSMetaClass12getClassSizeEv
 __ZNK11OSMetaClass12getMetaClassEv
 __ZNK11OSMetaClass12taggedRetainEPKv
@@ -727,3 +729,4 @@ _vsnprintf
 _vsscanf
 _zError
 _zlibVersion
+___llvm_profile_runtime
index aa8401775569a20b4b38d453aa286c641f96746f..d6f7ad04df8a742c7e78f8ec2456b3c11f33d2c9 100644 (file)
@@ -9,9 +9,9 @@ _mac_label_set
 _mac_audit_text
 
 _mac_iokit_check_hid_control
-
-_mac_thread_get_threadlabel
-_mac_thread_get_uthreadlabel
+_mac_iokit_check_nvram_delete
+_mac_iokit_check_nvram_get
+_mac_iokit_check_nvram_set
 
 _sbuf_cat
 _sbuf_data
index 7e0b5f60c0be286b3d31f2b3e9de7b75bbc460dd..d612020b7a8a0358d31249c20430a0b07a01fcad 100644 (file)
@@ -53,7 +53,7 @@
 #######################################################################
 #
 
-options                INET                    #                               # <inet>
+options                INET            #                               # <inet>
 options                HW_AST          # Hardware ast support          # <hw_ast>
 options        HW_FOOTPRINT    # Cache footprint support       # <hw_foot>
 
@@ -75,7 +75,7 @@ options               AH_ALL_CRYPTO   # AH all crypto algs            # <ah_all_crypto>
 options                IPCOMP_ZLIB     # IP compression using zlib     # <ipcomp_zlib>
 options                PF              # Packet Filter                 # <pf>
 options                PF_ALTQ         # PF ALTQ (Alternate Queueing)  # <pf_altq>
-options                PF_ECN          # PF use ECN marking    # <pf_ecn>
+options                PF_ECN          # PF use ECN marking            # <pf_ecn>
 options                PFLOG           # PF log interface              # <pflog>
 options                PKTSCHED_CBQ    # CBQ packet scheduler          # <pktsched_cbq>
 options                PKTSCHED_HFSC   # H-FSC packet scheduler        # <pktsched_hfsc>
@@ -99,10 +99,10 @@ options             ICMP_BANDLIM    # ICMP bandwidth limiting sysctl
 options                IFNET_INPUT_SANITY_CHK  # allow dlil/ifnet input sanity check # <ifnet_input_chk>
 options                MULTIPATH       # Multipath domain              # <multipath>
 options                MPTCP           # Multipath TCP                 # <mptcp>
-options                SYSV_SEM        # SVID semaphores                       # <sysv_sem>
-options                SYSV_MSG        # SVID messages                         # <sysv_msg>
-options                SYSV_SHM        # SVID shared mem                       # <sysv_shm>
-options                PSYNCH          # pthread synch                         # <psynch>
+options                SYSV_SEM        # SVID semaphores               # <sysv_sem>
+options                SYSV_MSG        # SVID messages                 # <sysv_msg>
+options                SYSV_SHM        # SVID shared mem               # <sysv_shm>
+options                PSYNCH          # pthread synch                 # <psynch>
 options                FLOW_DIVERT                                                             # <flow_divert>
 options                NECP                                                                    # <necp>
 options                CONTENT_FILTER  #                                               # <content_filter>
@@ -115,12 +115,12 @@ options     OLD_SEMWAIT_SIGNAL  # old semwait_signal handler
 #
 #      4.4 general kernel 
 #
-options                SOCKETS         # socket support                        # <inet, inet6>
-options        DIAGNOSTIC      # diagnostics                           # <diagnostic>
-options                GPROF           # build profiling                       # <profile>
+options                SOCKETS         # socket support                # <inet, inet6>
+options        DIAGNOSTIC      # diagnostics                   # <diagnostic>
+options                GPROF           # build profiling               # <profile>
 options                PROFILE         # kernel profiling              # <profile>
 options                SENDFILE        # sendfile                                      # <sendfile>
-options                NETWORKING      # networking layer                      # <inet, inet6>
+options                NETWORKING      # networking layer              # <inet, inet6>
 options                CONFIG_FSE      # file system events            # <config_fse>
 options                CONFIG_IMAGEBOOT        # local image boot      # <config_imageboot>
 options                CONFIG_MBUF_JUMBO       # jumbo cluster pool    # <config_mbuf_jumbo>
@@ -132,18 +132,18 @@ options           CONFIG_WORKQUEUE        # <config_workqueue>
 #
 #      4.4 filesystems 
 #
-options                HFS                     # HFS/HFS+ support              # <hfs>
-options                MOCKFS                  # Boot from an executable       # <mockfs>
+options                HFS             # HFS/HFS+ support              # <hfs>
+options                MOCKFS          # Boot from an executable       # <mockfs>
 options                FIFO            # fifo support                  # <fifo>
 options                FDESC           # fdesc_fs support              # <fdesc>
 options                DEVFS           # devfs support                 # <devfs>
-options                JOURNALING      # journaling support    # <journaling>
-options                HFS_COMPRESSION # hfs compression       # <hfs_compression>
-options                CONFIG_HFS_STD  # hfs standard support  # <config_hfs_std>
-options                CONFIG_HFS_TRIM # hfs trims unused blocks       # <config_hfs_trim>
-options                CONFIG_HFS_MOUNT_UNMAP  #hfs trims blocks at mount      # <config_hfs_mount_unmap>
-options                CONFIG_HFS_DIRLINK              #allow directory hardlink creation # <config_hfs_dirlink>
-options                CONFIG_DEV_KMEM # /dev/kmem device for reading KVA      # <config_dev_kmem>
+options                JOURNALING      # journaling support            # <journaling>
+options                HFS_COMPRESSION # hfs compression           # <hfs_compression>
+options                CONFIG_HFS_STD  # hfs standard support      # <config_hfs_std>
+options                CONFIG_HFS_TRIM # hfs trims unused blocks   # <config_hfs_trim>
+options                CONFIG_HFS_MOUNT_UNMAP  # hfs trims blocks at mount # <config_hfs_mount_unmap>
+options                CONFIG_HFS_DIRLINK      allow directory hardlink creation # <config_hfs_dirlink>
+options                CONFIG_DEV_KMEM     # /dev/kmem device for reading KVA  # <config_dev_kmem>
 
 #
 #      file system features
@@ -167,30 +167,30 @@ options           NFSSERVER       # Be an NFS server              # <nfsserver>
 #
 # Machine Independent Apple Features
 #
-profile                                                # build a profiling kernel      # <profile>
+profile                                # build a profiling kernel      # <profile>
 
 #       
 # IPv6 Support
 #       
-options         "INET6"                        # kernel IPv6 Support           # <inet6>
-options         IPV6SEND                       # Secure Neighbor Discovery     # <ipv6send>
-options         IPSEC                                  # IP security                   # <ipsec>
-options         IPSEC_ESP                              # IP security                   # <ipsec>
-options         "IPV6FIREWALL"                         # IPv6 Firewall Feature         # <ipv6firewall>
+options         "INET6"         # kernel IPv6 Support           # <inet6>
+options         IPV6SEND       # Secure Neighbor Discovery     # <ipv6send>
+options         IPSEC           # IP security                  # <ipsec>
+options         IPSEC_ESP       # IP security                  # <ipsec>
+options         "IPV6FIREWALL"  # IPv6 Firewall Feature        # <ipv6firewall>
 options         "IPV6FIREWALL_DEFAULT_TO_ACCEPT"       #IPv6 Firewall Feature          # <ipv6firewall>
 #options         "IPV6FIREWALL_VERBOSE"                #IPv6 Firewall Feature          # <ipv6firewall>
 
-pseudo-device   gif     1              # <gif>
-pseudo-device   dummy   2              # <dummy>
-pseudo-device   stf    1               # <stf>
+pseudo-device   gif     1                              # <gif>
+pseudo-device   dummy   2                              # <dummy>
+pseudo-device   stf    1                               # <stf>
 
-options                        CRYPTO                  # <ipsec,crypto>
-options                        CRYPTO_SHA2             # <crypto_sha2>
-options                        ENCRYPTED_SWAP          # <encrypted_swap>
+options                        CRYPTO                          # <ipsec,crypto>
+options                        CRYPTO_SHA2                     # <crypto_sha2>
+options                        ENCRYPTED_SWAP                  # <encrypted_swap>
 
-options                ZLIB            # inflate/deflate support       # <zlib>
+options                ZLIB    # inflate/deflate support       # <zlib>
 
-options                IF_BRIDGE                       # <if_bridge>
+options                IF_BRIDGE                               # <if_bridge>
 
 #
 #  configurable kernel event related resources 
@@ -215,13 +215,13 @@ options   CONFIG_KN_HASHSIZE=20           # <bsmall>
 options   CONFIG_VNODES=263168         # <large,xlarge>
 options   CONFIG_VNODES=263168         # <medium>
 options   CONFIG_VNODES=10240          # <small>
-options   CONFIG_VNODES=750                    # <bsmall>
+options   CONFIG_VNODES=750            # <bsmall>
 
-options   CONFIG_VNODE_FREE_MIN=500            # <large,xlarge>
-options   CONFIG_VNODE_FREE_MIN=300            # <medium>
-options   CONFIG_VNODE_FREE_MIN=200            # <small>
-options   CONFIG_VNODE_FREE_MIN=100            # <xsmall>
-options   CONFIG_VNODE_FREE_MIN=75             # <bsmall>
+options   CONFIG_VNODE_FREE_MIN=500    # <large,xlarge>
+options   CONFIG_VNODE_FREE_MIN=300    # <medium>
+options   CONFIG_VNODE_FREE_MIN=200    # <small>
+options   CONFIG_VNODE_FREE_MIN=100    # <xsmall>
+options   CONFIG_VNODE_FREE_MIN=75     # <bsmall>
 
 options   CONFIG_NC_HASH=5120          # <large,xlarge>
 options   CONFIG_NC_HASH=4096          # <medium>
@@ -236,15 +236,6 @@ options   CONFIG_VFS_NAMES=2048            # <bsmall>
 options   CONFIG_MAX_CLUSTERS=8                # <xlarge,large,medium>
 options   CONFIG_MAX_CLUSTERS=4                # <small,xsmall,bsmall>
 
-#
-#  configurable kauth credential related resources 
-#
-options   KAUTH_CRED_PRIMES_COUNT=7            # <medium,large,xlarge>
-options   KAUTH_CRED_PRIMES_COUNT=3            # <bsmall,xsmall,small>
-
-options   KAUTH_CRED_PRIMES="{97, 241, 397, 743, 1499, 3989, 7499}"            # <medium,large,xlarge>
-options   KAUTH_CRED_PRIMES="{5, 17, 97}"                                                              # <bsmall,xsmall,small>
-
 #
 #  configurable options for minumum number of buffers for kernel memory 
 #
@@ -260,9 +251,9 @@ options   CONFIG_MIN_NIOBUF=32              # <bsmall>
 #
 # set maximum space used for packet buffers
 #
-options        CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)"         # <large,xlarge>
-options        CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)"          # <medium>
-options        CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)"          # <bsmall,xsmall,small>
+options        CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)" # <large,xlarge>
+options        CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)"  # <medium>
+options        CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)"  # <bsmall,xsmall,small>
 
 #
 # Configure size of TCP hash table
@@ -282,8 +273,8 @@ options CONFIG_ICMP_BANDLIM=50              # <xsmall,small,bsmall>
 #  CONFIG_AIO_PROCESS_MAX - process limit of async IO requests.
 #  CONFIG_AIO_THREAD_COUNT - number of async IO worker threads created.
 #
-options   CONFIG_AIO_MAX=360           # <xlarge>
-options   CONFIG_AIO_MAX=180           # <large>
+options   CONFIG_AIO_MAX=360                   # <xlarge>
+options   CONFIG_AIO_MAX=180                   # <large>
 options   CONFIG_AIO_MAX=90                    # <medium>
 options   CONFIG_AIO_MAX=45                    # <small>
 options   CONFIG_AIO_MAX=20                    # <xsmall>
@@ -296,15 +287,15 @@ options   CONFIG_AIO_PROCESS_MAX=12               # <small>
 options   CONFIG_AIO_PROCESS_MAX=8             # <xsmall>
 options   CONFIG_AIO_PROCESS_MAX=4             # <bsmall>
 
-options   CONFIG_AIO_THREAD_COUNT=16   # <xlarge>
+options   CONFIG_AIO_THREAD_COUNT=16           # <xlarge>
 options   CONFIG_AIO_THREAD_COUNT=8            # <large>
 options   CONFIG_AIO_THREAD_COUNT=4            # <medium>
 options   CONFIG_AIO_THREAD_COUNT=3            # <small>
 options   CONFIG_AIO_THREAD_COUNT=2            # <xsmall,bsmall>
 
-options   CONFIG_MAXVIFS=32                            # <medium,large,xlarge>
-options   CONFIG_MAXVIFS=16                            # <small,xsmall>
-options   CONFIG_MAXVIFS=2                             # <bsmall>
+options   CONFIG_MAXVIFS=32                    # <medium,large,xlarge>
+options   CONFIG_MAXVIFS=16                    # <small,xsmall>
+options   CONFIG_MAXVIFS=2                     # <bsmall>
 
 options   CONFIG_MFCTBLSIZ=256                 # <medium,large,xlarge>
 options   CONFIG_MFCTBLSIZ=128                 # <small,xsmall>
@@ -316,6 +307,12 @@ options   CONFIG_MFCTBLSIZ=16                      # <bsmall>
 options   CONFIG_MSG_BSIZE=4096                        # <bsmall,small,xsmall>
 options   CONFIG_MSG_BSIZE=16384               # <medium,large,xlarge>
 
+#
+# maximum size of the per-process Mach IPC table
+#
+options   CONFIG_IPC_TABLE_ENTRIES_STEPS=64    # 137898 entries        # <bsmall,small,xsmall>
+options   CONFIG_IPC_TABLE_ENTRIES_STEPS=256   # 300714 entries        # <medium,large,xlarge>
+
 #
 #  configurable kernel - use these options to strip strings from panic
 #  and printf calls.
@@ -343,35 +340,41 @@ options           CONFIG_ENFORCE_LIBRARY_VALIDATION  # <config_library_validation>
 #
 # code decryption... used on embedded for app protection, DSMOS on desktop
 #
-options                CONFIG_CODE_DECRYPTION  # <config_code_decryption>
+options                CONFIG_CODE_DECRYPTION          # <config_code_decryption>
 
 #
 # User Content Protection, used on embedded
 #
-options                CONFIG_PROTECT  # <config_protect>
+options                CONFIG_PROTECT                  # <config_protect>
 
 #
 # enable per-process memory priority tracking
 #
-options                CONFIG_MEMORYSTATUS                             # <memorystatus>
+options                CONFIG_MEMORYSTATUS             # <memorystatus>
 
 #
 # enable jetsam - used on embedded
 #
-options                CONFIG_JETSAM                           # <jetsam>
+options                CONFIG_JETSAM                   # <jetsam>
+
+#
+# enable new wait queue implementation stats / debugging
+#
+options                CONFIG_WAITQ_STATS                      # <config_waitq_stats>
+options                CONFIG_WAITQ_DEBUG                      # <config_waitq_debug>
 
 #
 # enable freezing of suspended processes - used on embedded
 #
-options                CONFIG_FREEZE                                   # <freeze>
+options                CONFIG_FREEZE                   # <freeze>
 
-options                CHECK_CS_VALIDATION_BITMAP                      # <config_cs_validation_bitmap>
+options                CHECK_CS_VALIDATION_BITMAP      # <config_cs_validation_bitmap>
 
 #
 # enable detectiion of file cache thrashing - used on platforms with
 # dynamic VM compression enabled
 #
-options                CONFIG_PHANTOM_CACHE                    # <phantom_cache>
+options                CONFIG_PHANTOM_CACHE            # <phantom_cache>
 
 #
 # memory pressure event support
@@ -390,28 +393,23 @@ options           CONFIG_IOSCHED                  # <config_iosched>
 options                IMPORTANCE_INHERITANCE          # <importance_inheritance>
 options                IMPORTANCE_DEBUG                # <importance_inheritance>
 
-options                CONFIG_TELEMETRY                        # <config_telemetry>
+options                CONFIG_TELEMETRY                # <config_telemetry>
 
 options                CONFIG_PROC_UUID_POLICY         # <config_proc_uuid_policy>
 
-#
-# In-kernel tests
-#
-options        CONFIG_IN_KERNEL_TESTS                  # <in_kernel_tests>
-
 #
 # ECC data logging
 # 
-options                CONFIG_ECC_LOGGING                      # <config_ecc_logging>
+options                CONFIG_ECC_LOGGING              # <config_ecc_logging>
 
 #
 #  Ethernet (ARP)
 #
-pseudo-device  ether                   # <networking,inet,inet6>
+pseudo-device  ether                           # <networking,inet,inet6>
 #
 #  Network loopback device
 #
-pseudo-device  loop                    # <networking,inet,inet6>
+pseudo-device  loop                            # <networking,inet,inet6>
 #
 #  UCB pseudo terminal service
 #
@@ -459,12 +457,13 @@ pseudo-device     profile_prvd    1       init    profile_init    # <config_dtrace>
 #
 
 options                HIBERNATION     # system hibernation    # <hibernation>
-options                IOKITCPP        # C++ implementation            # <iokitcpp>
-options                IOKITSTATS      # IOKit statistics              # <iokitstats>
-options                CONFIG_SLEEP    #                               # <config_sleep>
+options                IOKITCPP        # C++ implementation    # <iokitcpp>
+options                IOKITSTATS      # IOKit statistics      # <iokitstats>
+options                IOTRACKING      # IOKit tracking        # <iotracking>
+options                CONFIG_SLEEP    #                       # <config_sleep>
 options   CONFIG_MAX_THREADS=64        # IOConfigThread threads
-options         NO_KEXTD                # <no_kextd>
-options         NO_KERNEL_HID           # <no_kernel_hid>
+options         NO_KEXTD                               # <no_kextd>
+options         NO_KERNEL_HID                          # <no_kernel_hid>
 
 #
 # Libkern configuration options
@@ -485,21 +484,19 @@ options         CONFIG_STATIC_CPPINIT   # Static library initializes kext cpp ru
 
 # CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts
 #
-options                CONFIG_KEXT_BASEMENT            #               # <config_kext_basement>
+options                CONFIG_KEXT_BASEMENT            #       # <config_kext_basement>
 
 #
 # security configuration options
 #
 
-options                CONFIG_LCTX     # Login Context
-
-options                CONFIG_MACF                     # Mandatory Access Control Framework    # <config_macf>
+options                CONFIG_MACF     # Mandatory Access Control Framework    # <config_macf>
 options                CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels) # <config_macf>
-#options       CONFIG_MACF_SOCKET              # MAC socket labels     # <config_macf>
-#options       CONFIG_MACF_NET                 # mbuf                  # <config_macf>
-#options       CONFIG_MACF_DEBUG               # debug                 # <config_macf>
+#options       CONFIG_MACF_SOCKET  # MAC socket labels     # <config_macf>
+#options       CONFIG_MACF_NET     # mbuf                  # <config_macf>
+#options       CONFIG_MACF_DEBUG   # debug                 # <config_macf>
 
-options                CONFIG_AUDIT                    # Kernel auditing       # <config_audit>
+options                CONFIG_AUDIT        # Kernel auditing       # <config_audit>
 
 
 #
@@ -534,13 +531,13 @@ options           MACH_ASSERT     #               # <mach_assert>
 #
 options                MACH_DEBUG      # IPC debugging interface       # <mdebug>
 options                MACH_IPC_DEBUG  # Enable IPC debugging calls    # <ipc_debug>
-options                MACH_VM_DEBUG   #               # <debug>
+options                MACH_VM_DEBUG   #                               # <debug>
 #
 # MACH_MP_DEBUG control the possible dead locks that may occur by controlling
 #      that IPL level has been raised down to SPL0 after some calls to
 #      hardclock device driver.
 #
-options                MACH_MP_DEBUG   #               # <debug>
+options                MACH_MP_DEBUG   #                               # <debug>
 #
 # ZONE_DEBUG keeps track of all zalloc()ed elements to perform further
 #      operations on each element.
@@ -584,13 +581,13 @@ options           MACH_LDEBUG     #               # <debug>
 #
 options                KDEBUG                  # kernel tracing        # <kdebug>
 options                IST_KDEBUG              # limited tracing       # <ist_kdebug>
-options                NO_KDEBUG       # no kernel tracing # <no_kdebug>
+options                NO_KDEBUG               # no kernel tracing     # <no_kdebug>
 
 #
 # CONFIG_DTRACE enables code needed to support DTrace. Currently this is
 # only used for delivery of traps/interrupts to DTrace.
 #
-options                CONFIG_DTRACE   #               # <config_dtrace>
+options                CONFIG_DTRACE           #                   # <config_dtrace>
 
 # kernel performance tracing
 options     KPERF                  # <kperf>
@@ -598,13 +595,13 @@ options     KPC                    # <kpc>
 
 # MACH_COUNTERS enables code that handles various counters in the system.
 # 
-options                MACH_COUNTERS   #               # <debug>
+options                MACH_COUNTERS   #                           # <debug>
 
 # DEVELOPMENT define for development builds
-options                DEVELOPMENT             # dev kernel            # <development>
+options                DEVELOPMENT     # dev kernel                # <development>
 
 # DEBUG kernel
-options                DEBUG           # general debugging code        # <debug>
+options                DEBUG           # general debugging code    # <debug>
 
 ##########################################################
 #
@@ -654,11 +651,6 @@ options   CONFIG_ZLEAK_TRACE_MAP_NUM=4096 #<small,xsmall,bsmall>
 # vc_progress_white - make the progress gear white instead of black
 options          CONFIG_VC_PROGRESS_WHITE              # <vc_progress_white>
 
-#
-# Context switched counters 
-#
-options                CONFIG_COUNTERS                 # <config_counters>
-
 #
 # Timeshare scheduler implementations
 #
@@ -668,41 +660,55 @@ options           CONFIG_SCHED_GRRR               # <config_sched_grrr>
 options                CONFIG_SCHED_GRRR_CORE          # <config_sched_grrr>
 options                CONFIG_SCHED_MULTIQ             # <config_sched_multiq>
 options                CONFIG_SCHED_TIMESHARE_CORE     # <config_sched_traditional,config_sched_multiq>
-options                CONFIG_SCHED_FAIRSHARE_CORE     # <config_sched_traditional,config_sched_multiq,config_sched_proto>
 
-options                CONFIG_SCHED_IDLE_IN_PLACE              # <config_sched_idle_in_place>
+options                CONFIG_SCHED_IDLE_IN_PLACE      # <config_sched_idle_in_place>
+options                CONFIG_SCHED_SFI                # <config_sched_sfi>
 options                CONFIG_GZALLOC                  # <config_gzalloc>
+options                CONFIG_SCHED_DEFERRED_AST       # <config_sched_deferred_ast>
 
 # Enable allocation of contiguous physical memory through vm_map_enter_cpm()
-options                VM_CPM          # <vm_cpm>
+options                VM_CPM                          # <vm_cpm>
 
-options            CONFIG_SKIP_PRECISE_USER_KERNEL_TIME        # <config_skip_precise_user_kernel_time>
+options            CONFIG_SKIP_PRECISE_USER_KERNEL_TIME    # <config_skip_precise_user_kernel_time>
 
 #
 # Switch to disable cpu, wakeup and high memory watermark monitors
 #
-options        CONFIG_NOMONITORS                               # <config_nomonitors>
+options        CONFIG_NOMONITORS                       # <config_nomonitors>
 
-options                MACH_KDP        # KDP                           # <mach_kdp>
-options                CONFIG_SERIAL_KDP       # KDP over serial                               # <config_serial_kdp>
+options                MACH_KDP            # KDP               # <mach_kdp>
+options                CONFIG_SERIAL_KDP   # KDP over serial   # <config_serial_kdp>
 options                CONFIG_KDP_INTERACTIVE_DEBUGGING        # <kdp_interactive_debugging>
 
+#
+# Kernel Power On Self Tests
+#
+options                CONFIG_XNUPOST                          # <config_xnupost>
+
+#
+# Kernel proc reference instrumentation
+#
+options PROC_REF_DEBUG                                 # <proc_ref_debug>
+
 #
 # Kernel Voucher Attr Manager for Activity Trace
 #
-options        CONFIG_ATM                      # <config_atm>
+options        CONFIG_ATM                              # <config_atm>
 
 #
 # Kernel Voucher Attr Manager for BANK
 #
-options        CONFIG_BANK                     # <config_bank>
+options        CONFIG_BANK                             # <config_bank>
 
 
 # Group related tasks together into coalitions
 options                CONFIG_COALITIONS                       # <config_coalitions>
 
+# Enable support for sysdiagnose notifications
+options                CONFIG_SYSDIAGNOSE                      # <config_sysdiagnose>
+
 # Configurable Security Restrictions
-options                CONFIG_CSR                      # <config_csr>
+options                CONFIG_CSR                              # <config_csr>
 
 #
 # Console options
index 6e356ba91f41cba69d107e4249eeb7a5f95c49fe..3baa999632e24a676c91fbfe9f35512c15d996b0 100644 (file)
@@ -18,8 +18,8 @@
 #
 #  KERNEL_BASE =    [ intel medium config_requires_u32_munging ]
 #  KERNEL_RELEASE = [ KERNEL_BASE ]
-#  KERNEL_DEV =     [ KERNEL_BASE development mach_assert ]
-#  KERNEL_DEBUG =   [ KERNEL_BASE debug mach_assert ]
+#  KERNEL_DEV =     [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug]
+#  KERNEL_DEBUG =   [ KERNEL_BASE debug mach_assert config_waitq_stats config_waitq_debug ]
 #  BSD =            [ mach_bsd sysv_sem sysv_msg sysv_shm config_imageboot config_workqueue psynch config_proc_uuid_policy ]
 #  FILESYS =        [ devfs hfs journaling fdesc config_dev_kmem config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs config_hfs_dirlink config_appledouble ]
 #  NFS =            [ nfsclient nfsserver ]
 #  PKTSCHED =       [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ]
 #  CLASSQ =         [ classq_blue classq_red classq_rio ]
 #  MULTIPATH =      [ multipath mptcp ]
-#  IOKIT =          [ iokit iokitcpp hibernation config_sleep iokitstats hypervisor ]
-#  LIBKERN =        [ libkerncpp config_kxld config_kec_fips zlib crypto_sha2 ]
-#  PERF_DBG =       [ config_dtrace mach_kdp config_serial_kdp kdp_interactive_debugging kperf kpc config_counters zleaks config_gzalloc ]
-#  MACH_BASE =      [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic   config_telemetry importance_inheritance config_atm config_bank config_coalitions hypervisor config_iosched ]
+#  IOKIT_BASE =     [ iokit iokitcpp hibernation config_sleep iokitstats hypervisor ]
+#  IOKIT_RELEASE =  [ IOKIT_BASE ]
+#  IOKIT_DEV =      [ IOKIT_BASE iotracking ]
+#  IOKIT_DEBUG =    [ IOKIT_BASE iotracking ]
+#  LIBKERN_BASE =   [ libkerncpp config_kxld config_kec_fips zlib crypto_sha2 ]
+#  LIBKERN_RELEASE =[ LIBKERN_BASE ]
+#  LIBKERN_DEV =    [ LIBKERN_BASE iotracking ]
+#  LIBKERN_DEBUG =  [ LIBKERN_BASE iotracking ]
+#  PERF_DBG =       [ config_dtrace mach_kdp config_serial_kdp kdp_interactive_debugging kperf kpc zleaks config_gzalloc ]
+#  MACH_BASE =      [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic   config_telemetry importance_inheritance config_atm config_bank config_coalitions hypervisor config_iosched config_sysdiagnose ]
 #  MACH_RELEASE =   [ MACH_BASE ]
 #  MACH_DEV =       [ MACH_BASE task_zone_info ]
 #  MACH_DEBUG =     [ MACH_BASE task_zone_info ]
-#  SCHED_BASE =     [ config_sched_traditional config_sched_multiq ]
+#  SCHED_BASE =     [ config_sched_traditional config_sched_multiq config_sched_sfi ]
 #  SCHED_RELEASE =  [ SCHED_BASE ]
 #  SCHED_DEV =      [ SCHED_BASE ]
 #  SCHED_DEBUG =    [ SCHED_BASE config_sched_grrr config_sched_proto ]
 #  VM =             [ vm_pressure_events memorystatus dynamic_codesigning config_code_decryption encrypted_swap phantom_cache]
 #  SECURITY =       [ config_macf config_audit config_csr ]
-#  RELEASE =        [ KERNEL_RELEASE BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_RELEASE SCHED_RELEASE VM SECURITY ]
-#  DEVELOPMENT =    [ KERNEL_DEV     BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_DEV     SCHED_DEV     VM SECURITY ]
-#  DEBUG =          [ KERNEL_DEBUG   BSD FILESYS NFS NETWORKING PF VPN IOKIT LIBKERN PERF_DBG MACH_DEBUG   SCHED_DEBUG   VM SECURITY ]
+#  RELEASE =        [ KERNEL_RELEASE BSD FILESYS NFS NETWORKING PF VPN IOKIT_RELEASE LIBKERN_RELEASE PERF_DBG MACH_RELEASE SCHED_RELEASE VM SECURITY ]
+#  DEVELOPMENT =    [ KERNEL_DEV     BSD FILESYS NFS NETWORKING PF VPN IOKIT_DEV     LIBKERN_DEV     PERF_DBG MACH_DEV     SCHED_DEV     VM SECURITY ]
+#  DEBUG =          [ KERNEL_DEBUG   BSD FILESYS NFS NETWORKING PF VPN IOKIT_DEBUG   LIBKERN_DEBUG   PERF_DBG MACH_DEBUG   SCHED_DEBUG   VM SECURITY ]
 #
 ######################################################################
 #
index 1cca2a9b7722347fd6399a2f43a178c050177689..1ea2e20308c2f2c1e9e7d2303b8367a7461c6b4a 100644 (file)
@@ -55,3 +55,5 @@ _thread_policy_set
 _thread_reference
 _thread_terminate
 _thread_wakeup_prim
+_vm_kernel_addrperm_external
+_vm_kernel_unslide_or_perm_external
index a2049c6773aed57711c57b618f6214debdb96982..3aded3442183b6fb599447d5db522cb1730bee5c 100644 (file)
@@ -1,4 +1,4 @@
-14.5.0
+15.0.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index d7995f81e141003d5fdd9158a433591ae30f0c26..e04f8d26b834fa114b3eab349bf8f5b0eb1189e1 100644 (file)
@@ -1,3 +1,4 @@
+_PE_i_can_has_debugger
 __ZN16IOPlatformExpert*
 __ZNK16IOPlatformExpert*
 __ZTV16IOPlatformExpert
@@ -10,12 +11,12 @@ __ZTV5IOCPU
 __ZN24IOCPUInterruptController*
 __ZNK24IOCPUInterruptController*
 __ZTV24IOCPUInterruptController
+_PE_i_can_has_kernel_configuration
 _assert_wait_deadline_with_leeway
 _assert_wait_timeout_with_leeway
 _audio_active
 _b_to_q
 _bdevsw
-_boot
 _bootcache_contains_block
 _bsd_hostname
 _bsd_set_dependency_capable
@@ -52,20 +53,32 @@ _convert_task_to_port
 _cp_key_store_action
 _cp_register_wraps
 _cpu_to_processor
-_cs_enforcement
 _cs_blob_reset_cache
-_cs_require_lv
+_cs_debug
+_cs_enforcement
 _cs_entitlement_flags
 _cs_entitlements_blob_get
 _cs_get_cdhash
 _cs_identity_get
-_cs_register_cscsr
-_csfg_get_teamid
+_cs_require_lv
+_cs_restricted
+_csblob_find_blob_bytes
+_csblob_get_cdhash
+_csblob_get_entitlements
+_csblob_get_identity
+_csblob_get_platform_binary
+_csblob_get_flags
+_csblob_get_teamid
+_csfg_get_cdhash
 _csfg_get_path
 _csfg_get_platform_binary
-_csproc_get_teamid
+_csfg_get_teamid
+_csproc_get_blob
 _csproc_get_platform_binary
+_csproc_get_teamid
+_csvnode_get_blob
 _csvnode_get_teamid
+_csvnode_print_debug
 _ctl_enqueuembuf_list
 _ctl_id_by_name
 _ctl_name_by_id
@@ -74,6 +87,8 @@ _fd_rdwr
 _get_aiotask
 _gpu_accumulate_time
 _gpu_describe
+_gpu_fceiling_cb_register
+_gpu_submission_telemetry
 _hz
 _ifnet_allocate_extended
 _ifnet_bandwidths
@@ -89,7 +104,7 @@ _ifnet_enqueue
 _ifnet_flowid
 _ifnet_get_delegate
 _ifnet_get_inuse_address_list
-_ifnet_get_ipsec_offload_frames
+_ifnet_get_keepalive_offload_frames
 _ifnet_get_local_ports
 _ifnet_get_local_ports_extended
 _ifnet_get_rcvq_maxlen
@@ -102,6 +117,7 @@ _ifnet_inet_defrouter_llreachinfo
 _ifnet_input_extended
 _ifnet_latencies
 _ifnet_link_quality
+_ifnet_link_status_report
 _ifnet_notice_master_elected
 _ifnet_notice_node_absence
 _ifnet_notice_node_presence
@@ -143,8 +159,12 @@ _kdp_unregister_link
 _kdp_unregister_send_receive
 _kern_asl_msg
 _kern_asl_msg_va
-_kmem_alloc_kobject
-_kmem_alloc_pageable
+_kern_stack_snapshot_with_reason
+_kernel_debug_string
+_kevent_qos_internal
+_kmem_alloc_kobject:_kmem_alloc_kobject_external
+_kmem_alloc_pageable:_kmem_alloc_pageable_external
+_kx_qsort
 _linesw
 _log
 _logwakeup
@@ -176,6 +196,7 @@ _mbuf_get_traffic_class_index
 _mbuf_get_traffic_class_max_count
 _mbuf_is_service_class_privileged:_mbuf_is_traffic_class_privileged
 _mbuf_pkthdr_aux_flags
+_mbuf_get_unsent_data_bytes
 _mcl_to_paddr
 _mountroot_post_hook
 _net_add_domain:_net_add_domain_old
@@ -183,29 +204,8 @@ _net_add_proto:_net_add_proto_old
 _net_del_domain:_net_del_domain_old
 _net_del_proto:_net_del_proto_old
 _netboot_root
-_perf_monitor_register_*
-_perf_monitor_unregister
 _pffinddomain:_pffinddomain_old
 _pffindproto:_pffindproto_old
-_pmc_config_set_interrupt_threshold
-_pmc_config_set_value
-_pmc_create_config
-_pmc_find_by_name
-_pmc_free_config
-_pmc_free_pmc_list
-_pmc_get_accessible_core_list
-_pmc_get_name
-_pmc_get_pmc_list
-_pmc_register
-_pmc_reservation_free
-_pmc_reservation_read
-_pmc_reservation_start
-_pmc_reservation_stop
-_pmc_reservation_write
-_pmc_reserve
-_pmc_reserve_task
-_pmc_reserve_thread
-_pmc_unregister
 _port_name_to_task
 _port_name_to_thread
 _post_sys_powersource
@@ -213,6 +213,7 @@ _prng_factory_register
 _proc_getexecutablevnode
 _proc_pidbackgrounded
 _proc_pidversion
+_proc_set_responsible_pid
 _proc_task
 _proc_uniqueid
 _pru_abort_notsupp
@@ -268,6 +269,7 @@ _soreserve
 _sorwakeup
 _sosend
 _strnstr
+_sysdiagnose_notify_user
 _termioschars
 _thread_call_allocate_with_priority
 _thread_call_cancel_wait
@@ -282,6 +284,7 @@ _throttle_info_mount_rel
 _throttle_info_ref_by_mask
 _throttle_info_rel_by_mask
 _throttle_info_release
+_throttle_info_reset_window
 _throttle_info_update
 _throttle_info_update_by_mask
 _throttle_lowpri_io
@@ -317,7 +320,6 @@ _vfs_context_bind
 _vfs_context_get_special_port
 _vfs_context_set_special_port
 _vfs_devvp
-_vfs_get_notify_attributes
 _vfs_getattr
 _vfs_getbyid
 _vfs_mntlabel
@@ -337,15 +339,16 @@ _vm_map_page_shift
 _vm_map_page_size
 _vm_map_round_page_mask
 _vm_map_trunc_page_mask
-_vm_map_wire_and_extract
+_vm_map_wire_and_extract:_vm_map_wire_and_extract_external
 _vm_page_wire_count
 _vn_getpath_fsenter
 _vn_searchfs_inappropriate_name
+_vnode_create_empty
+_vnode_initialize
 _vnode_isdyldsharedcache
 _vnode_ismonitored
 _vnode_istty
 _vnode_lookup_continue_needed
-_vnode_notify
 _vnop_compound_mkdir_desc
 _vnop_compound_open_desc
 _vnop_compound_remove_desc
index 8df705eb687f3ec84c0733bd9c7bbd287004285e..35ecccdf54a2778398df90ab61bce7744352cb22 100644 (file)
@@ -14,11 +14,15 @@ _apply_func_phys
 _bufattr_delayidlesleep
 _cpu_to_lapic
 _cpuid_features
+_cpuid_leaf7_features
 _cpuid_info
 _csr_check
 _csr_get_active_config
-_csr_get_pending_config
 _csr_set_allow_all
+_hv_ept_pmap_create
+_hv_get*
+_hv_release*
+_hv_set*
 _lapic_end_of_interrupt
 _lapic_get_cmci_vector
 _lapic_unmask_perfcnt_interrupt
@@ -46,6 +50,4 @@ _xts_encrypt
 _xts_start
 _aes_decrypt
 _PE_reboot_on_panic
-_hv_set*
-_hv_get*
-_hv_release*
+
index 7d3ed425191d46a449e857282ced74368f1a596a..6cea97213edffe3ead4fd7cf10dc1b1b1600395e 100644 (file)
@@ -1,3 +1,4 @@
+_PE_i_can_has_debugger
 _Debugger
 _FastUnicodeCompare
 _KUNCExecute
@@ -7,7 +8,6 @@ _KUNCUserNotificationDisplayFromBundle
 _KUNCUserNotificationDisplayNotice
 _NDR_record
 _PE_kputc
-__Z22OSFlushObjectTrackListv
 __ZN11IOMemoryMap9wireRangeEjyy
 __ZN15IOWatchDogTimer10gMetaClassE
 __ZN15IOWatchDogTimer10superClassE
@@ -50,6 +50,7 @@ __ZTV16IOPlatformDevice
 __ZTV9IODTNVRAM
 __ZTVN15IOWatchDogTimer9MetaClassE
 __doprnt
+__doprnt_log
 __dtrace_register_anon_DOF
 _aes_decrypt_cbc
 _aes_decrypt_key
@@ -60,7 +61,6 @@ _aes_encrypt_key
 _aes_encrypt_key128
 _aes_encrypt_key256
 _appleClut8
-_boot
 _cons_cinput
 _cons_ops
 _conslog_putc
@@ -72,8 +72,6 @@ _delay_for_interval
 _des_ecb_encrypt
 _des_ecb_key_sched
 _gIODTSharedInterrupts
-_gOSObjectTrackList
-_gOSObjectTrackThread
 _gPEClockFrequencyInfo
 _gPESerialBaud
 _get_bsdtask_info
@@ -88,7 +86,7 @@ _host_get_exception_ports
 _host_priv_self
 _hz
 _ipc_kernel_map
-_kalloc
+_kalloc:_kalloc_external
 _kauth_cred_issuser
 _kauth_cred_label_update
 _kauth_guid_equal
@@ -100,7 +98,7 @@ _kernel_map
 _kernel_pmap
 _kev_post_msg
 _kfree
-_kmem_alloc
+_kmem_alloc:_kmem_alloc_external
 _kmem_free
 _kmputc
 _lck_mtx_assert
@@ -174,7 +172,7 @@ _vm_deallocate
 _vm_map
 _vm_map_deallocate
 _vm_map_unwire
-_vm_map_wire
+_vm_map_wire:_vm_map_wire_external
 _set_vm_privilege
 _vm_protect
 _vm_region
index 4eb17cafaf8666610553f7b425e84134f4138735..0f3ed92d1fac8496b04a99bdedb6ab35093491fa 100644 (file)
@@ -42,3 +42,4 @@ _sock_retain
 _tmrCvt
 _tsc_get_info
 _PE_state
+
index e4a00cd8a890f82cef23058b006f55a4babe68af..4acf84e355e64357bf7c2ce200953dbdadd38af4 100644 (file)
@@ -6,3 +6,4 @@ _atm_mana*
 _bank_mana*
 _ipc_importance_mana*
 _user_data_mana*
+_arm_hardware_page_size
diff --git a/iokit/.clang-format b/iokit/.clang-format
new file mode 100644 (file)
index 0000000..cd99c24
--- /dev/null
@@ -0,0 +1,30 @@
+# See top level .clang-format for explanation of options
+AlignEscapedNewlinesLeft: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: false
+AlwaysBreakBeforeMultilineStrings: true
+BinPackArguments: true
+BinPackParameters: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Allman
+ColumnLimit: 132
+IndentCaseLabels: false
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+PointerAlignment: Middle
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+TabWidth: 4
+UseTab: Never
index 7a866c5af69d6b1e008017cd45541716be67a765..4d8829507acf96d8f25d7d1098e82d3e4c18badc 100644 (file)
@@ -38,13 +38,11 @@ OSDefineAbstractStructors(IONVRAMController, IOService);
 // init
 //
 // ****************************************************************************
-bool IONVRAMController::start(IOService *provider)
+void IONVRAMController::registerService(IOOptionBits options)
 {
-  if(!super::start(provider)) return false;
-  
-  getPlatform()->registerNVRAMController(this);
-  
-  return true;
+       super::registerService(options);
+
+       getPlatform()->registerNVRAMController(this);
 }
 
 // ****************************************************************************
index 8938656ca78e847ddb381e1751bb1768643901cc..f2aadfed8f7c75fb89c307f31d1d60986f62c73b 100644 (file)
 #define kIOBSDMinorKey "BSD Minor" // (an OSNumber)
 #define kIOBSDUnitKey  "BSD Unit"  // (an OSNumber)
 
+
+#ifdef XNU_KERNEL_PRIVATE
+
+#include <stdint.h>
+#include <kern/task.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct IOPolledFileIOVars;
+struct mount;
+
+enum 
+{
+    kIOMountChangeMount      = 0x00000101,
+    kIOMountChangeUnmount    = 0x00000102,
+    kIOMountChangeWillResize = 0x00000201,
+    kIOMountChangeDidResize  = 0x00000202,
+};
+extern void IOBSDMountChange(struct mount * mp, uint32_t op);
+extern boolean_t IOTaskHasEntitlement(task_t task, const char * entitlement);
+
+extern struct IOPolledFileIOVars * gIOPolledCoreFileVars;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 #endif /* !_IOBSD_H */
index fdb0a398cf89b8bfad55e08b838b3283d7934c48..486ce4e13dfabb7f917a6edd737f6b8ad6f75c4d 100644 (file)
@@ -120,7 +120,7 @@ public:
     OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 15);
 
 protected:
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -270,7 +270,7 @@ public:
 
 #ifndef __LP64__
     virtual void * getVirtualSegment(IOByteCount offset,
-                                       IOByteCount * length) APPLE_KEXT_DEPRECATED; /* use getBytesNoCopy() instead */
+                                       IOByteCount * length) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED; /* use getBytesNoCopy() instead */
 #endif /* !__LP64__ */
 };
 
index 615d103afdb92af3efcc27f9ca7c04eb2439fc24..a9ae2e60590bfecf4c5b723f86ad6fb3c873e4e8 100644 (file)
@@ -78,17 +78,19 @@ protected:
 public:
   static  void           initCPUs(void);
   
-  virtual bool           start(IOService *provider);
-  virtual OSObject       *getProperty(const OSSymbol *aKey) const;
-  virtual bool           setProperty(const OSSymbol *aKey, OSObject *anObject);
-  virtual bool           serializeProperties(OSSerialize *serialize) const;
-  virtual IOReturn       setProperties(OSObject *properties);
+  virtual bool           start(IOService *provider) APPLE_KEXT_OVERRIDE;
+  virtual OSObject       *getProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE;
+  virtual bool           setProperty(const OSSymbol *aKey, OSObject *anObject) APPLE_KEXT_OVERRIDE;
+  virtual bool           serializeProperties(OSSerialize *serialize) const APPLE_KEXT_OVERRIDE;
+  virtual IOReturn       setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE;
   virtual void           initCPU(bool boot) = 0;
   virtual void           quiesceCPU(void) = 0;
   virtual kern_return_t  startCPU(vm_offset_t start_paddr,
                                  vm_offset_t arg_paddr) = 0;
   virtual void           haltCPU(void) = 0;
   virtual void           signalCPU(IOCPU *target);
+  virtual void           signalCPUDeferred(IOCPU * target);
+  virtual void           signalCPUCancel(IOCPU * target);
   virtual void           enableCPUTimeBase(bool enable);
   
   virtual UInt32         getCPUNumber(void);
@@ -113,6 +115,7 @@ void IOCPUSleepKernel(void);
 extern "C" kern_return_t IOCPURunPlatformQuiesceActions(void);
 extern "C" kern_return_t IOCPURunPlatformActiveActions(void);
 extern "C" kern_return_t IOCPURunPlatformHaltRestartActions(uint32_t message);
+extern "C" kern_return_t IOCPURunPlatformPanicActions(uint32_t message);
 
 class IOCPUInterruptController : public IOInterruptController
 {
@@ -137,17 +140,17 @@ public:
   virtual IOReturn registerInterrupt(IOService *nub, int source,
                                     void *target,
                                     IOInterruptHandler handler,
-                                    void *refCon);
+                                    void *refCon) APPLE_KEXT_OVERRIDE;
   
   virtual IOReturn getInterruptType(IOService *nub, int source,
-                                   int *interruptType);
+                                   int *interruptType) APPLE_KEXT_OVERRIDE;
   
-  virtual IOReturn enableInterrupt(IOService *nub, int source);
-  virtual IOReturn disableInterrupt(IOService *nub, int source);
-  virtual IOReturn causeInterrupt(IOService *nub, int source);
+  virtual IOReturn enableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn disableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn causeInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE;
   
   virtual IOReturn handleInterrupt(void *refCon, IOService *nub,
-                                  int source);
+                                  int source) APPLE_KEXT_OVERRIDE;
 
   OSMetaClassDeclareReservedUnused(IOCPUInterruptController, 0);
   OSMetaClassDeclareReservedUnused(IOCPUInterruptController, 1);
index 63781fc759770bd18a76ffc8a3e16c3634d4b927..693e0ef7e2f77abc79f4d6d18f95fa4631f69c00 100644 (file)
@@ -81,7 +81,7 @@ public:
         @function free
         @abstract Cleans up the database and deallocates memory allocated at initialization.  This is never called in normal operation of the system.
     */
-    void free( void );
+    void free( void ) APPLE_KEXT_OVERRIDE;
     
     /*!
         @function findDrivers
@@ -215,7 +215,7 @@ public:
         @param s The serializer object.
         @result Returns false if unable to serialize database, most likely due to memory shortage.
      */
-    virtual bool serialize(OSSerialize * s) const;
+    virtual bool serialize(OSSerialize * s) const APPLE_KEXT_OVERRIDE;
 
     bool serializeData(IOOptionBits kind, OSSerialize * s) const;
 
index 136c41bd5d86c4b111174b302f1f87ab5513872c..fc77b627f20cdad3d98e1fe49642efd19f655e3b 100644 (file)
@@ -71,7 +71,7 @@ class IOCommand : public OSObject
     OSDeclareDefaultStructors(IOCommand)
     
 public:
-    virtual bool init(void);
+    virtual bool init(void) APPLE_KEXT_OVERRIDE;
     
 /*! @var fCommandChain
     This variable is used by the current 'owner' to queue the command.  During the life cycle of a command it moves through a series of queues.  This is the queue pointer for it.  Only valid while 'ownership' is clear.  For instance a IOCommandPool uses this pointer to maintain its list of free commands.  May be manipulated using the kern/queue.h macros */
index d38c88670f020b78219f7338f52c9cb4dab214a3..26624116f956cf254e6a42e2287bdf6c6c7ebcc3 100644 (file)
 
 /*!
     @class IOCommandGate : public IOEventSource
-    @abstract Single-threaded work-loop client request mechanism.
-    @discussion An IOCommandGate instance is an extremely light way mechanism
-that executes an action on the driver's work-loop.  'On the work-loop' is
-actually a lie but the work-loop single threaded semantic is maintained for this
-event source.  Using the work-loop gate rather than execution by the workloop.
-The command gate tests for a potential self dead lock by checking if the
-runCommand request is made from the work-loop's thread, it doesn't check for a
-mutual dead lock though where a pair of work loop's dead lock each other.
+    @abstract Single-threaded work loop client request mechanism.
+    @discussion An IOCommandGate instance is an extremely lightweight mechanism
+that executes an action on the driver's work loop.  Although the code does not
+technically execute on the work loop itself, a single-threaded work loop semantic
+is maintained for this event source using the work loop gate.  The command gate
+tests for a potential self dead lock by checking if the runCommand request is
+made from the work loop's thread, it doesn't check for a mutual dead lock though
+where a pair of work loop's dead lock each other.
 <br><br>
        The IOCommandGate is a lighter weight version of the IOCommandQueue and
 should be used in preference.  Generally use a command queue whenever you need a
@@ -113,17 +113,17 @@ compiler warning.  Defaults to zero, see $link IOEventSource::setAction.
     virtual bool init(OSObject *owner, Action action = 0);
 
     // Superclass overrides
-    virtual void free();
-    virtual void setWorkLoop(IOWorkLoop *inWorkLoop);
+    virtual void free() APPLE_KEXT_OVERRIDE;
+    virtual void setWorkLoop(IOWorkLoop *inWorkLoop) APPLE_KEXT_OVERRIDE;
 
 /*! @function runCommand
-    @abstract Single thread a command with the target work-loop.
+    @abstract Single thread a command with the target work loop.
     @discussion Client function that causes the current action to be called in
-a single threaded manner.  Beware the work-loop's gate is recursive and command
+a single threaded manner.  Beware the work loop's gate is recursive and command
 gates can cause direct or indirect re-entrancy.         When the executing on a
-client's thread runCommand will sleep until the work-loop's gate opens for
+client's thread runCommand will sleep until the work loop's gate opens for
 execution of client actions, the action is single threaded against all other
-work-loop event sources.  If the command is disabled the attempt to run a command will be stalled until enable is called.
+work loop event sources.  If the command is disabled the attempt to run a command will be stalled until enable is called.
     @param arg0 Parameter for action of command gate, defaults to 0.
     @param arg1 Parameter for action of command gate, defaults to 0.
     @param arg2 Parameter for action of command gate, defaults to 0.
@@ -134,28 +134,28 @@ work-loop event sources.  If the command is disabled the attempt to run a comman
                                void *arg2 = 0, void *arg3 = 0);
 
 /*! @function runAction
-    @abstract Single thread a call to an action with the target work-loop.
+    @abstract Single thread a call to an action with the target work loop.
     @discussion Client function that causes the given action to be called in
-a single threaded manner.  Beware the work-loop's gate is recursive and command
+a single threaded manner.  Beware the work loop's gate is recursive and command
 gates can cause direct or indirect re-entrancy.         When the executing on a
-client's thread runAction will sleep until the work-loop's gate opens for
+client's thread runAction will sleep until the work loop's gate opens for
 execution of client actions, the action is single threaded against all other
-work-loop event sources.  If the command is disabled the attempt to run a command will be stalled until enable is called.
-    @param action Pointer to function to be executed in work-loop context.
+work loop event sources.  If the command is disabled the attempt to run a command will be stalled until enable is called.
+    @param action Pointer to function to be executed in the context of the work loop.
     @param arg0 Parameter for action parameter, defaults to 0.
     @param arg1 Parameter for action parameter, defaults to 0.
     @param arg2 Parameter for action parameter, defaults to 0.
     @param arg3 Parameter for action parameter, defaults to 0.
-    @result kIOReturnSuccess if successful. kIOReturnBadArgument if action is not defined, kIOReturnAborted if a disabled command gate is free()ed before being reenabled.
+    @result The return value of action if it was called, kIOReturnBadArgument if action is not defined, kIOReturnAborted if a disabled command gate is free()ed before being reenabled.
 */
     virtual IOReturn runAction(Action action,
                               void *arg0 = 0, void *arg1 = 0,
                               void *arg2 = 0, void *arg3 = 0);
 
 /*! @function attemptCommand
-    @abstract Single thread a command with the target work-loop.
+    @abstract Single thread a command with the target work loop.
     @discussion Client function that causes the current action to be called in
-a single threaded manner.  When the executing on a client's thread attemptCommand will fail if the work-loop's gate is closed.
+a single threaded manner.  When the executing on a client's thread attemptCommand will fail if the work loop's gate is closed.
     @param arg0 Parameter for action of command gate, defaults to 0.
     @param arg1 Parameter for action of command gate, defaults to 0.
     @param arg2 Parameter for action of command gate, defaults to 0.
@@ -166,12 +166,12 @@ a single threaded manner.  When the executing on a client's thread attemptComman
                                     void *arg2 = 0, void *arg3 = 0);
 
 /*! @function attemptAction
-    @abstract Single thread a call to an action with the target work-loop.
+    @abstract Single thread a call to an action with the target work loop.
     @discussion Client function that causes the given action to be called in
-a single threaded manner.  Beware the work-loop's gate is recursive and command
+a single threaded manner.  Beware the work loop's gate is recursive and command
 gates can cause direct or indirect re-entrancy.         When the executing on a
-client's thread attemptCommand will fail if the work-loop's gate is closed.
-    @param action Pointer to function to be executed in work-loop context.
+client's thread attemptCommand will fail if the work loop's gate is closed.
+    @param action Pointer to function to be executed in context of the work loop.
     @param arg0 Parameter for action parameter, defaults to 0.
     @param arg1 Parameter for action parameter, defaults to 0.
     @param arg2 Parameter for action parameter, defaults to 0.
@@ -201,12 +201,12 @@ client's thread attemptCommand will fail if the work-loop's gate is closed.
 /*! @function disable
     @abstract Disable the command gate
     @discussion When a command gate is disabled all future calls to runAction and runCommand will stall until the gate is enable()d later.  This can be used to block client threads when a system sleep is requested.  The IOWorkLoop thread itself will never stall, even when making runAction/runCommand calls.  This call must be made from a gated context, to clear potential race conditions.  */
-    virtual void disable();
+    virtual void disable() APPLE_KEXT_OVERRIDE;
 
 /*! @function enable
     @abstract Enable command gate, this will unblock any blocked Commands and Actions.
     @discussion  Enable the command gate.  The attemptAction/attemptCommand calls will now be enabled and can succeeed.  Stalled runCommand/runAction calls will be woken up. */
-    virtual void enable();
+    virtual void enable() APPLE_KEXT_OVERRIDE;
 
 /*! @function commandSleep  
     @abstract Put a thread that is currently holding the command gate to sleep.
index 91069f3d309ea8dbab4ccaf3ef972f887b769c8d..4428157616627c4c89b17c654cb54f3c962d77a3 100644 (file)
@@ -108,7 +108,7 @@ protected:
      * Free all of this object's outstanding resources.
      */
 
-    virtual void free(void);
+    virtual void free(void) APPLE_KEXT_OVERRIDE;
     
     
 public:
index 1dfc5270eb5bb460009496ccb9c0487410b8480b..124369d419ddb0b31e6bbeaac561d9937e8e09da 100644 (file)
@@ -59,9 +59,9 @@ protected:
     int producerIndex, consumerIndex;
     int size;
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
-    virtual bool checkForWork();
+    virtual bool checkForWork() APPLE_KEXT_OVERRIDE;
 
 public:
     static IOCommandQueue *commandQueue(OSObject *inOwner,
index f628feebc6235e8542244e84e95c02834842341d..a3a0440699e69cee864711b8c29c8a970003dde8 100644 (file)
@@ -54,7 +54,7 @@ private:
 public:
     static IOConditionLock *withCondition(int condition, bool inIntr = true);
     virtual bool initWithCondition(int condition, bool inIntr = true);
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     virtual bool tryLock();    // acquire lock, no waiting
     virtual int  lock();       // acquire lock (enter critical section)
index a680aec74bacc96e4d85c3075e73811dbad2bd85..54e0815bb7f68bdfdd22ce17d7e377d156bf673f 100644 (file)
@@ -31,6 +31,7 @@
 #include <IOKit/IOCommand.h>
 #include <IOKit/IOMemoryDescriptor.h>
 class IOMapper;
+class IOBufferMemoryDescriptor;
 
 /**************************** class IODMACommand ***************************/
 
@@ -46,6 +47,20 @@ class IOMapper;
     The IODMACommand can be used in a 'weak-linked' manner.  To do this you must avoid using any static member functions.  Use the, much slower but safe, weakWithSpecification function.  On success a dma command instance will be returned.  This instance can then be used to clone as many commands as is needed.  Remember deriving from this class can not be done weakly, that is no weak subclassing!
 */
 
+
+enum 
+{
+    kIODMAMapOptionMapped       = 0x00000000,
+    kIODMAMapOptionBypassed     = 0x00000001,
+    kIODMAMapOptionNonCoherent  = 0x00000002,
+    kIODMAMapOptionUnmapped     = 0x00000003,
+    kIODMAMapOptionTypeMask     = 0x0000000f,
+
+    kIODMAMapOptionNoCacheStore = 0x00000010,  // Memory in descriptor 
+    kIODMAMapOptionOnChip       = 0x00000020,  // Indicates DMA is on South Bridge
+    kIODMAMapOptionIterateOnly  = 0x00000040   // DMACommand will be used as a cursor only
+};
+
 class IODMACommand : public IOCommand
 {
     OSDeclareDefaultStructors(IODMACommand);
@@ -78,14 +93,25 @@ public:
     @constant kMaxMappingOptions       Internal use only
 */
     enum MappingOptions {
-       kMapped       = 0x00000000,
-       kBypassed     = 0x00000001,
-       kNonCoherent  = 0x00000002,
-       kTypeMask     = 0x0000000f,
-
-       kNoCacheStore = 0x00000010,     // Memory in descriptor 
-       kOnChip       = 0x00000020,     // Indicates DMA is on South Bridge
-       kIterateOnly  = 0x00000040      // DMACommand will be used as a cursor only
+       kMapped       = kIODMAMapOptionMapped,
+       kBypassed     = kIODMAMapOptionBypassed,
+       kNonCoherent  = kIODMAMapOptionNonCoherent,
+       kUnmapped     = kIODMAMapOptionUnmapped,
+       kTypeMask     = kIODMAMapOptionTypeMask,
+
+       kNoCacheStore = kIODMAMapOptionNoCacheStore,    // Memory in descriptor 
+       kOnChip       = kIODMAMapOptionOnChip,          // Indicates DMA is on South Bridge
+       kIterateOnly  = kIODMAMapOptionIterateOnly      // DMACommand will be used as a cursor only
+    };
+
+    struct SegmentOptions {
+       uint8_t  fStructSize;
+       uint8_t  fNumAddressBits;
+       uint64_t fMaxSegmentSize;
+       uint64_t fMaxTransferSize;
+       uint32_t fAlignment;
+       uint32_t fAlignmentLength;
+       uint32_t fAlignmentInternalSegments;
     };
 
 /*! @enum SynchronizeOptions
@@ -187,7 +213,8 @@ public:
     @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum.
     @param alignment Alignment restriction, in bytes, on I/O bus addresses.  Defaults to single byte alignment.
     @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper.
-    @result Returns a new memory cursor if successfully created and initialized, 0 otherwise.
+    @param refCon Reference Constant
+    @result Returns a new IODMACommand if successfully created and initialized, 0 otherwise.
 */
     static IODMACommand *
        withSpecification(SegmentFunction  outSegFunc,
@@ -210,6 +237,7 @@ public:
     @param maxTransferSize Maximum size of an entire transfer.  Defaults to 0 indicating no maximum.
     @param alignment Alignment restriction, in bytes, on I/O bus addresses.  Defaults to single byte alignment.
     @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper.
+    @param refCon Reference Constant
     @result kIOReturnSuccess if everything is OK, otherwise kIOReturnBadArgument if newCommand is NULL, kIOReturnUnsupported if the kernel doesn't export IODMACommand or IOReturnError if the new command fails to init, q.v. initWithSpecification.
 */
     // Note that the function has the attribute always_inline.
@@ -228,11 +256,27 @@ public:
             IOMapper       *mapper = 0,
             void           *refCon = 0) __attribute__((always_inline));
 
+    static IODMACommand *
+       withSpecification(SegmentFunction        outSegFunc,
+                         const SegmentOptions * segmentOptions,
+                         uint32_t               mappingOptions,
+                         IOMapper             * mapper,
+                         void                 * refCon);
+
+
+/*! @function withRefCon
+    @abstract Creates and initializes an unspecified IODMACommand.
+    @discussion Factory function to create and initialize an unspecified IODMACommand. prepareWithSpecification() must be used to prepare the IODMACommand before use.
+    @param refCon Reference Constant
+    @result Returns a new IODMACommand if successfully created and initialized, 0 otherwise.
+*/
+    static IODMACommand * withRefCon(void * refCon);
+
 /*!
     @function cloneCommand
     @abstract Creates a new command based on the specification of the current one.
     @discussion Factory function to create and initialise an IODMACommand in one operation.  The current command's specification will be duplicated in the new object, but however none of its state will be duplicated.  This means that it is safe to clone a command even if it is currently active and running, however you must be certain that the command to be duplicated does have a valid reference for the duration.
-    @result Returns a new memory cursor if successfully created and initialised, 0 otherwise.
+    @result Returns a new IODMACommand if successfully created and initialised, 0 otherwise.
 */
     virtual IODMACommand *cloneCommand(void *refCon = 0);
 
@@ -245,6 +289,7 @@ public:
     @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum.
     @param alignment Alignment restriction, in bytes, on I/O bus addresses.  Defaults to single byte alignment.
     @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper.
+    @param refCon Reference Constant
     @result Can fail if the mapping type is not recognised, if one of the 3 mandatory parameters are set to 0, if a 32 bit output function is selected when more than 32 bits of address is required or, if kBypassed is requested on a machine that doesn't support bypassing.  Returns true otherwise.
 */
     virtual bool initWithSpecification( SegmentFunction  outSegFunc,
@@ -278,6 +323,11 @@ public:
 */
     virtual const IOMemoryDescriptor *getMemoryDescriptor() const;
 
+/*! @function getIOMemoryDescriptor
+    @abstract Get the memory descriptor to be used for DMA
+*/
+    IOMemoryDescriptor * getIOMemoryDescriptor() const;
+
 /*! @function prepare
     @abstract Prepare the memory for an I/O transfer.
     @discussion Allocate the mapping resources neccessary for this transfer, specifying a sub range of the IOMemoryDescriptor that will be the target of the I/O.  The complete() method frees these resources.  Data may be copied to buffers for kIODirectionOut memory descriptors, depending on hardware mapping resource availabilty or alignment restrictions.  It should be noted that the this function may block and should only be called on the clients context, i.e never call this routine while gated; also the call itself is not thread safe though this should be an issue as each IODMACommand is independant.
@@ -362,15 +412,20 @@ public:
                                      UInt32    *numSegments)
     { return genIOVMSegments(offset, segments, numSegments); };
 
-       IOReturn
-       genIOVMSegments(SegmentFunction segmentFunction,
-                                                       UInt64 *offsetP,
-                                                       void   *segmentsP,
-                                                       UInt32 *numSegmentsP);
-       
-    virtual void free();
+    IOReturn
+    genIOVMSegments(SegmentFunction segmentFunction,
+                   UInt64 *offsetP,
+                   void   *segmentsP,
+                   UInt32 *numSegmentsP);
+    
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 private:
+    IOReturn setSpecification(SegmentFunction        outSegFunc,
+                             const SegmentOptions * segmentOptions,
+                             uint32_t               mappingOptions,
+                             IOMapper             * mapper);
+
     typedef IOReturn (*InternalSegmentFunction)(
                                    void         *reference,
                                    IODMACommand *target,
@@ -428,12 +483,11 @@ public:
                                              bool              flushCache = true,
                                              bool              synchronize = true);
 
-    static IOReturn transferSegment(
-                       void         *reference,
-                       IODMACommand *target,
-                       Segment64     segment,
-                       void         *segments,
-                       UInt32        segmentIndex);
+    static IOReturn transferSegment(void         *reference,
+                                   IODMACommand *target,
+                                   Segment64     segment,
+                                   void         *segments,
+                                   UInt32        segmentIndex);
 
 /*! @function getPreparedOffsetAndLength
     @abstract Returns the offset and length into the target IOMemoryDescriptor of a prepared IODDMACommand.
@@ -444,17 +498,49 @@ public:
 
     virtual IOReturn getPreparedOffsetAndLength(UInt64 * offset, UInt64 * length);
 
-       UInt8    getNumAddressBits(void);
-       UInt32   getAlignment(void);
-       
+    UInt8    getNumAddressBits(void);
+    UInt32   getAlignment(void);
+    uint32_t getAlignmentLength(void);
+    uint32_t getAlignmentInternalSegments(void);
+
+
+/*! @function initWithRefCon
+    @abstract Secondary initializer for the IODMACommand class. 
+    @param refCon Reference Constant
+    @result Can fail if super init fails.  Returns true otherwise.
+*/
+
+    virtual
+    bool initWithRefCon(void * refCon = 0);
+
+    virtual
+    bool initWithSpecification(SegmentFunction       outSegFunc,
+                              const SegmentOptions * segmentOptions,
+                              uint32_t               mappingOptions,
+                              IOMapper             * mapper,
+                              void                 * refCon);
+
+    virtual
+    IOReturn prepareWithSpecification(SegmentFunction       outSegFunc,
+                                     const SegmentOptions * segmentOptions,
+                                     uint32_t               mappingOptions,
+                                     IOMapper             * mapper,
+                                     uint64_t               offset,
+                                     uint64_t               length,
+                                     bool                   flushCache = true,
+                                     bool                   synchronize = true);
+
+    virtual
+    IOBufferMemoryDescriptor * createCopyBuffer(IODirection direction, UInt64 length);
+       
 private:
     OSMetaClassDeclareReservedUsed(IODMACommand,  0);
     OSMetaClassDeclareReservedUsed(IODMACommand,  1);
     OSMetaClassDeclareReservedUsed(IODMACommand,  2);
-    OSMetaClassDeclareReservedUnused(IODMACommand,  3);
-    OSMetaClassDeclareReservedUnused(IODMACommand,  4);
-    OSMetaClassDeclareReservedUnused(IODMACommand,  5);
-    OSMetaClassDeclareReservedUnused(IODMACommand,  6);
+    OSMetaClassDeclareReservedUsed(IODMACommand,  3);
+    OSMetaClassDeclareReservedUsed(IODMACommand,  4);
+    OSMetaClassDeclareReservedUsed(IODMACommand,  5);
+    OSMetaClassDeclareReservedUsed(IODMACommand,  6);
     OSMetaClassDeclareReservedUnused(IODMACommand,  7);
     OSMetaClassDeclareReservedUnused(IODMACommand,  8);
     OSMetaClassDeclareReservedUnused(IODMACommand,  9);
@@ -478,9 +564,8 @@ protected:
     Maximum size of a transfer that this memory cursor is allowed to generate */
     UInt64  fMaxTransferSize;
 
-/*! @var fBypassMask
-    Mask to be ored into the address to bypass the given iommu's mapping. */
-    UInt64  fBypassMask;
+    UInt32  fAlignMaskLength;
+    UInt32  fAlignMaskInternalSegments;
 
 /*! @var fMapper
     Client defined mapper. */
@@ -507,7 +592,7 @@ protected:
 
 /*! @var fMappingOptions
     What type of I/O virtual address mapping is required for this command */
-    MappingOptions  fMappingOptions;
+    uint32_t  fMappingOptions;
 
 /*! @var fActive
     fActive indicates that this DMA command is currently prepared and ready to go */
index 4fab6e0801efd2bfefd767780344831604346ed5..e5b8f106d1e76fddba383f4e4d03279dec8e1593 100644 (file)
@@ -57,6 +57,7 @@ class IODMAController : public IOService
   virtual IOByteCount getFIFODepth(UInt32 dmaIndex, IODirection direction) = 0;
   virtual IOReturn setFIFODepth(UInt32 dmaIndex, IOByteCount depth) = 0;
   virtual IOByteCount validFIFODepth(UInt32 dmaIndex, IOByteCount depth, IODirection direction) = 0;
+  virtual IOReturn setFrameSize(UInt32 dmaIndex, UInt8 byteCount) = 0;
   virtual IOReturn setDMAConfig(UInt32 dmaIndex, IOService *provider, UInt32 reqIndex) = 0;
   virtual bool validDMAConfig(UInt32 dmaIndex, IOService *provider, UInt32 reqIndex) = 0;
   
@@ -64,7 +65,7 @@ class IODMAController : public IOService
   static const OSSymbol *createControllerName(UInt32 phandle);
   static IODMAController *getController(IOService *provider, UInt32 dmaIndex);
   
-  virtual bool start(IOService *provider);
+  virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE;
 };
 
 
index 58d851bf90b89a2411a2554cf58dfe24e76d3e56..9aa34a93f7d43ca8ea5fec1fd9c39259d341796d 100644 (file)
@@ -66,6 +66,8 @@ class IODMAEventSource : public IOEventSource
   virtual IOReturn setFIFODepth(IOByteCount depth);
   virtual IOByteCount validFIFODepth(IOByteCount depth, IODirection direction);
 
+  virtual IOReturn setFrameSize(UInt8 byteCount);
+
   virtual IOReturn setDMAConfig(UInt32 dmaIndex);
   virtual bool validDMAConfig(UInt32 dmaIndex);
   
@@ -84,8 +86,8 @@ class IODMAEventSource : public IOEventSource
                    Action completion = 0,
                    Action notification = 0,
                    UInt32 dmaIndex = 0);
-  virtual bool checkForWork(void);
-  virtual void free(void);
+  virtual bool checkForWork(void) APPLE_KEXT_OVERRIDE;
+  virtual void free(void) APPLE_KEXT_OVERRIDE;
 };
 
 #endif /* _IOKIT_IODMAEVENTSOURCE_H */
index ecb92459bda61c374a06522dea160d27261bbb30..8af46e5b3187725b92385514f5800ad8915a2771 100644 (file)
@@ -72,7 +72,7 @@ protected:
 
     void *             notifyMsg;
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     /*!
      * @function sendDataAvailableNotification
index 9b39a2dc2b85e9dae454023666a716bf8c561f75..531202f4162191ba7400aa6bd201bda20ca8c6a6 100644 (file)
@@ -48,6 +48,7 @@ extern const OSSymbol *               gIODTPHandleKey;
 extern const OSSymbol *                gIODTCompatibleKey;
 extern const OSSymbol *        gIODTTypeKey;
 extern const OSSymbol *        gIODTModelKey;
+extern const OSSymbol *        gIODTTargetTypeKey;
 
 extern const OSSymbol *                gIODTAAPLInterruptsKey;
 extern const OSSymbol *                gIODTDefaultInterruptController;
index 10a392afccd2415353958a7f62681f9ddf9347e9..66ee9054a3d47d9b106e3465cd0b7a1241859e1d 100644 (file)
@@ -161,7 +161,7 @@ successfully.
 */
     virtual bool init(OSObject *owner, IOEventSource::Action action = 0);
 
-    virtual void free( void );
+    virtual void free( void ) APPLE_KEXT_OVERRIDE;
 
 /*! @function checkForWork
     @abstract Virtual member function used by IOWorkLoop for work
index de05c90d5d49ce208bb54c3afe7b825fb57ff9d0..60154944beae339cf5e2f1690e2631861c9c7c9c 100644 (file)
@@ -71,7 +71,7 @@ private:
     virtual bool init(OSObject *inOwner,
                      IOInterruptEventSource::Action inAction = 0,
                      IOService *inProvider = 0,
-                     int inIntIndex = 0);
+                     int inIntIndex = 0) APPLE_KEXT_OVERRIDE;
 
     static IOInterruptEventSource *
        interruptEventSource(OSObject *inOwner,
@@ -136,11 +136,11 @@ successfully.  */
 
 /*! @function normalInterruptOccurred
     @abstract Override $link IOInterruptEventSource::normalInterruptOccured to make a filter callout. */
-    virtual void normalInterruptOccurred(void *self, IOService *prov, int ind);
+    virtual void normalInterruptOccurred(void *self, IOService *prov, int ind) APPLE_KEXT_OVERRIDE;
 
 /*! @function disableInterruptOccurred
     @abstract Override $link IOInterruptEventSource::disableInterruptOccurred to make a filter callout. */
-    virtual void disableInterruptOccurred(void *self, IOService *prov, int ind);
+    virtual void disableInterruptOccurred(void *self, IOService *prov, int ind) APPLE_KEXT_OVERRIDE;
 
 private:
     OSMetaClassDeclareReservedUnused(IOFilterInterruptEventSource, 0);
index caf03f0748a8504f223e6e575329f06363f8f66b..a9d25fa982f93ef41d2efe2e6da6579f431efae8 100644 (file)
@@ -303,7 +303,7 @@ typedef struct hibernate_statistics_t hibernate_statistics_t;
 void     IOHibernateSystemInit(IOPMrootDomain * rootDomain);
 
 IOReturn IOHibernateSystemSleep(void);
-IOReturn IOHibernateOpenForDebugData(void);
+void     IOOpenDebugDataFile(const char *fname, uint64_t size);
 IOReturn IOHibernateIOKitSleep(void);
 IOReturn IOHibernateSystemHasSlept(void);
 IOReturn IOHibernateSystemWake(void);
@@ -315,34 +315,6 @@ void     IOHibernateSystemRestart(void);
 
 #endif /* __cplusplus */
 
-#ifdef _SYS_CONF_H_
-typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size);
-
-struct kern_direct_file_io_ref_t *
-kern_open_file_for_direct_io(const char * name, boolean_t create_file,
-                            kern_get_file_extents_callback_t callback, 
-                            void * callback_ref,
-
-                             off_t set_file_size,
-
-                             off_t write_file_offset,
-                             caddr_t write_file_addr,
-                             vm_size_t write_file_len,
-
-                            dev_t * partition_device_result,
-                            dev_t * image_device_result,
-                             uint64_t * partitionbase_result,
-                             uint64_t * maxiocount_result,
-                             uint32_t * oflags);
-int
-kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len, int ioflag);
-void
-kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
-                             off_t write_offset, caddr_t addr, vm_size_t write_length,
-                             off_t discard_offset, off_t discard_end);
-#endif /* _SYS_CONF_H_ */
-
-
 void
 vm_compressor_do_warmup(void);
 
@@ -358,8 +330,6 @@ hibernate_alloc_page_lists(
 
 kern_return_t 
 hibernate_setup(IOHibernateImageHeader * header,
-                        uint32_t  free_page_ratio,
-                        uint32_t  free_page_time,
                         boolean_t vmflush,
                        hibernate_page_list_t * page_list,
                        hibernate_page_list_t * page_list_wired,
index e5c2a943fd0817ee4a020e210ad104ee02b23962..e1c122aef50560d34c6e405988cf37cb113fd171 100644 (file)
@@ -48,7 +48,7 @@ protected:
     IOByteCount         * _descriptorLengths;
     bool                  _descriptorPrepared;
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -100,7 +100,7 @@ public:
 
     virtual addr64_t getPhysicalSegment( IOByteCount   offset,
                                          IOByteCount * length,
-                                         IOOptionBits  options = 0 );
+                                         IOOptionBits  options = 0 ) APPLE_KEXT_OVERRIDE;
 
 /*! @function prepare
     @abstract Prepare the memory for an I/O transfer.
@@ -108,7 +108,7 @@ public:
     @param forDirection The direction of the I/O to be performed, or kIODirectionNone for the direction specified by the memory descriptor.
     @result An IOReturn code. */
 
-    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 
 /*! @function complete
     @abstract Complete processing of the memory after an I/O transfer finishes.
@@ -116,7 +116,7 @@ public:
     @param forDirection The direction of the I/O just completed, or kIODirectionNone for the direction specified by the memory descriptor.
     @result An IOReturn code. */
 
-    virtual IOReturn complete(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 };
 
 #endif /* !_IOINTERLEAVEDMEMORYDESCRIPTOR_H */
index fdea6295cc51a5eaf78db8dfab437aff58ace630..7e03f6bd543746ddfed96b413f64f5b3d4345c2f 100644 (file)
@@ -144,5 +144,12 @@ enum {
     kInterruptAccountingInvalidStatisticIndex /* Sentinel value for checking for a nonsensical index */
 };
 
+/*
+ * IOReporting group name; exposed publicly for the purpose of getting channels by group
+ * name; other strings (subgroup names, statistic names) are not exposed, as we may want
+ * to change them in the future.
+ */
+#define kInterruptAccountingGroupName "Interrupt Statistics (by index)"
+
 #endif /* __IOKIT_IOINTERRUPTACCOUNTING_PRIVATE_H */
 
index ef50297a7f27d2eeeb0ddf95cfa832973c0ba81d..5f37136e1ce8325fd846af4b8c5b396f7c6fe3b1 100644 (file)
@@ -118,28 +118,6 @@ static const char * const kInterruptAccountingStatisticNameArray[IA_NUM_INTERRUP
     [kInterruptAccountingIdleExitsIndex] = kInterruptAccountingChannelNameIdleExits,
 };
 
-/*
- * IOReporting group names.
- */
-static const char * const kInterruptAccountingGroupName = "Interrupt Statistics (by index)";
-
-/*
- * TODO: Generate the subgroup name strings?
- */
-#define IA_MAX_SUBGROUP_NAME (32)
-
-static const char * const kInterruptAccountingSubgroupNames[IA_MAX_SUBGROUP_NAME] = {
-    "0", "1", "2" , "3", "4", "5", "6", "7",
-    "8", "9", "10", "11", "12", "13", "14", "15",
-    "16", "17", "18", "19", "20", "21", "22", "23",
-    "24", "25", "26", "27", "28", "29", "30", "31"};
-
-/*
- * As long as we use a lookup table, we may be out of bounds for a valid index.  In this case, fall
- * back on a generic subgroup name that indicates we have screwed up.
- */
-static const char * const kInterruptAccountingGenericSubgroupName = "(Index > 31)";
-
 /*
  * For updating the statistics in the data structure.  We cannot guarantee all of our platforms will be
  * able to do a 64-bit store in a single transaction.  So, for new platforms, call out to the hardware
index 71f55e549117978a3a60fed54964b875d060cf8e..d389a79e3eeead3e78e3e1169fac48b5236cbbd2 100644 (file)
@@ -133,17 +133,17 @@ public:
   virtual IOReturn registerInterrupt(IOService *nub, int source,
                                      void *target,
                                      IOInterruptHandler handler,
-                                     void *refCon);
-  virtual IOReturn unregisterInterrupt(IOService *nub, int source);
+                                     void *refCon) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn unregisterInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE;
   
   virtual IOReturn getInterruptType(IOService *nub, int source,
-                                   int *interruptType);
+                                   int *interruptType) APPLE_KEXT_OVERRIDE;
   
-  virtual IOReturn enableInterrupt(IOService *nub, int source);
-  virtual IOReturn disableInterrupt(IOService *nub, int source);
+  virtual IOReturn enableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn disableInterrupt(IOService *nub, int source) APPLE_KEXT_OVERRIDE;
   
-  virtual IOInterruptAction getInterruptHandlerAddress(void);
-  virtual IOReturn handleInterrupt(void *refCon, IOService *nub, int source);
+  virtual IOInterruptAction getInterruptHandlerAddress(void) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn handleInterrupt(void *refCon, IOService *nub, int source) APPLE_KEXT_OVERRIDE;
 
   OSMetaClassDeclareReservedUnused(IOSharedInterruptController, 0);
   OSMetaClassDeclareReservedUnused(IOSharedInterruptController, 1);
index 553eb4104a8e5fbe56bc8371fd7869f7e7a21de3..074af793096d210899576e84605535427d776271 100644 (file)
@@ -108,17 +108,17 @@ protected:
 
 /*! @function free
     @abstract Sub-class implementation of free method, disconnects from the interrupt source. */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 /*! @function checkForWork
     @abstract Pure Virtual member function used by IOWorkLoop for issueing a client calls.
     @discussion This function called when the work-loop is ready to check for any work to do and then to call out the owner/action.
     @result Return true if this function needs to be called again before all its outstanding events have been processed. */
-    virtual bool checkForWork();
+    virtual bool checkForWork() APPLE_KEXT_OVERRIDE;
 
 /*! @function setWorkLoop
     @abstract Sub-class implementation of setWorkLoop method. */
-    virtual void setWorkLoop(IOWorkLoop *inWorkLoop);
+    virtual void setWorkLoop(IOWorkLoop *inWorkLoop) APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -153,13 +153,13 @@ successfully.  */
     @discussion A subclass implementation is expected to respect the enabled
 state when checkForWork is called.  Calling this function will cause the
 work-loop to be signalled so that a checkForWork is performed. */
-    virtual void enable();
+    virtual void enable() APPLE_KEXT_OVERRIDE;
 
 /*! @function disable
     @abstract Disable event source.
     @discussion A subclass implementation is expected to respect the enabled
 state when checkForWork is called. */
-    virtual void disable();
+    virtual void disable() APPLE_KEXT_OVERRIDE;
 
 /*! @function getProvider
     @abstract Get'ter for $link provider variable.
index de529f40501547abfdd2e37557b67fa5ffdb4966..58475afd92c3d7d8a511d3eca8133739b7a0582f 100644 (file)
@@ -277,7 +277,7 @@ public:
 
     Locking: same-instance concurrency UNSAFE
 */
-    virtual void free(void);
+    virtual void free(void) APPLE_KEXT_OVERRIDE;
     
 
 /*********************************/
@@ -1214,7 +1214,7 @@ public:
      
     Locking: same-instance concurrency UNSAFE
 */
-    virtual void free(void);
+    virtual void free(void) APPLE_KEXT_OVERRIDE;
     
 protected:
 
@@ -1240,10 +1240,10 @@ protected:
      
     [see IOReporter::handle*Swap* for more info]
 */
-    virtual IOReturn handleSwapPrepare(int newNChannels);
+    virtual IOReturn handleSwapPrepare(int newNChannels) APPLE_KEXT_OVERRIDE;
     virtual IOReturn handleAddChannelSwap(uint64_t channel_id,
-                                          const OSSymbol *symChannelName);
-    virtual void handleSwapCleanup(int swapNChannels);
+                                          const OSSymbol *symChannelName) APPLE_KEXT_OVERRIDE;
+    virtual void handleSwapCleanup(int swapNChannels) APPLE_KEXT_OVERRIDE;
     
 /*! @function   IOStateReporter::updateChannelValues
     @abstract   Update accounting of time spent in current state
@@ -1258,7 +1258,7 @@ protected:
 
     Locking: Caller must ensure that the reporter (data) lock is held.
 */
-    virtual IOReturn updateChannelValues(int channel_index);
+    virtual IOReturn updateChannelValues(int channel_index) APPLE_KEXT_OVERRIDE;
 
 /*! @function   IOStateReporter::setStateByIndices
     @abstract   update a channel state without validating channel_id
@@ -1383,7 +1383,7 @@ public:
 /*! @function   IOHistogramReporter::with
     @abstract   Initializes the IOHistogramReporter instance variables and data structures
 
-    @param  reportingService - IOService instanciator and data provider into the reporter object
+    @param  reportingService - The I/O Kit service for this reporter's channels
     @param  categories - The categories in which the report should be classified
     @param  channelID - uint64_t channel identifier
     @param  channelName - rich channel name as char*
@@ -1443,7 +1443,7 @@ FIXME: need more explanation of the config
      
     Locking: same-instance concurrency UNSAFE
 */
-    virtual void free(void);
+    virtual void free(void) APPLE_KEXT_OVERRIDE;
 
 protected:
 
@@ -1473,7 +1473,7 @@ protected:
      
     Locking: same-instance concurrency SAFE, MAY BLOCK
 */
-    IOReportLegendEntry* handleCreateLegend(void);
+    IOReportLegendEntry* handleCreateLegend(void) APPLE_KEXT_OVERRIDE;
     
     
 private:
@@ -1578,11 +1578,11 @@ public:
         temporary reporter objects for the purpose of creating their
         legend entries.  User-space legends are tracked by 12836893.
 
-        The static version of addReporterLegend adds the reporter's
-        legend directly to reportingService's kIOReportLegendKey.  This
-        will result in serialized getProperty() and setProperty() calls
-        on reportingService and should be avoided when many reporters
-        objects are in use.
+        The static version of addReporterLegend adds the reporter's legend
+        directly to reportingService's kIOReportLegendKey.  It is not
+        possible to safely update kIOReportLegendKey from multiple threads.
+
+        Locking: same-reportingService and same-IORLegend concurrency UNSAFE
 */
     IOReturn addReporterLegend(IOReporter *reporter,
                                const char *groupName,
@@ -1614,7 +1614,7 @@ public:
         in the I/O Kit registry, its ownership will now be with the
         registry.
 */
-    void free(void);
+    void free(void) APPLE_KEXT_OVERRIDE;
     
 
     
index 3e9c1de3b2062910f92a303db1306296397421dd..86f34c995c15613c164f5bf07b25532944cb9d95 100644 (file)
@@ -44,10 +44,10 @@ class IOKitDiagnostics : public OSObject
 
 public:
     static OSObject * diagnostics( void );
-    virtual bool serialize(OSSerialize *s) const;
+    virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
 private:
     static void updateOffset( OSDictionary * dict,
-            UInt32 value, const char * name );
+            UInt64 value, const char * name );
 };
 
 #endif /* __cplusplus */
@@ -77,13 +77,15 @@ enum {
     kIOLogHibernate     =         0x00100000ULL,
     kIOStatistics       =         0x04000000ULL,
     kIOSleepWakeWdogOff =         0x40000000ULL,
+    kIOKextSpinDump     =         0x80000000ULL,
 
     // debug aids - change behaviour
     kIONoFreeObjects    =         0x00100000ULL,
     kIOLogSynchronous   =         0x00200000ULL,  // IOLog completes synchronously
-    kOSTraceObjectAlloc =         0x00400000ULL,
+    kIOTracking         =         0x00400000ULL,
     kIOWaitQuietPanics  =         0x00800000ULL,
-    kIOWaitQuietBeforeRoot  =     0x01000000ULL,
+    kIOWaitQuietBeforeRoot =      0x01000000ULL,
+    kIOTrackingBoot     =         0x02000000ULL,
 
     _kIODebugTopFlag    = 0x8000000000000000ULL   // force enum to be 64 bits
 };
@@ -126,6 +128,102 @@ extern void    OSPrintMemory( void );
 #endif
 #define IOPrintMemory OSPrintMemory
 
+
+
+#define kIOKitDiagnosticsClientClassName "IOKitDiagnosticsClient"
+
+enum
+{
+    kIOKitDiagnosticsClientType = 0x99000002
+};
+
+
+struct IOKitDiagnosticsParameters
+{
+    size_t    size;
+    uint64_t  value;
+    uint32_t  options;
+    uint32_t  reserved[3];
+};
+typedef struct IOKitDiagnosticsParameters IOKitDiagnosticsParameters;
+
+enum
+{ 
+    kIOTrackingCallSiteBTs = 16,
+};
+
+struct IOTrackingCallSiteInfo
+{
+    uint32_t      count;
+    size_t        size[2];
+    uintptr_t     bt[kIOTrackingCallSiteBTs];
+};
+
+#define kIOMallocTrackingName  "IOMalloc"
+#define kIOWireTrackingName    "IOWire"
+#define kIOMapTrackingName     "IOMap"
+
+#if KERNEL && IOTRACKING
+
+struct IOTrackingQueue;
+struct IOTrackingCallSite;
+
+struct IOTracking
+{
+    queue_chain_t        link;
+    IOTrackingCallSite * site;
+#if !defined(__LP64__)
+    uint32_t             flags;
+#endif
+};
+
+struct IOTrackingAddress
+{
+    IOTracking    tracking;
+    uintptr_t     address;
+    size_t        size;
+#if defined(__LP64__)
+    uint32_t      flags;
+#endif
+};
+
+void              IOTrackingInit(void);
+IOTrackingQueue * IOTrackingQueueAlloc(const char * name, size_t allocSize, size_t minCaptureSize, bool isAlloc);
+void              IOTrackingQueueFree(IOTrackingQueue * head);
+void              IOTrackingAdd(IOTrackingQueue * head, IOTracking * mem, size_t size, bool address);
+void              IOTrackingRemove(IOTrackingQueue * head, IOTracking * mem, size_t size);
+void              IOTrackingAlloc(IOTrackingQueue * head, uintptr_t address, size_t size);
+void              IOTrackingFree(IOTrackingQueue * head, uintptr_t address, size_t size);
+void              IOTrackingReset(IOTrackingQueue * head);
+void              IOTrackingAccumSize(IOTrackingQueue * head, IOTracking * mem, size_t size);
+kern_return_t     IOTrackingDebug(uint32_t selector, uint32_t options,
+                                 const char * names, size_t namesLen, 
+                                 size_t size, OSObject ** result);
+
+extern IOTrackingQueue * gIOMallocTracking;
+extern IOTrackingQueue * gIOWireTracking;
+extern IOTrackingQueue * gIOMapTracking;
+
+#endif /* KERNEL && IOTRACKING */
+
+enum
+{
+    kIOTrackingExcludeNames      = 0x00000001,
+};
+
+enum
+{
+    kIOTrackingGetTracking       = 0x00000001,
+    kIOTrackingPrintTracking     = 0x00000002,
+    kIOTrackingResetTracking     = 0x00000003,
+    kIOTrackingStartCapture      = 0x00000004,
+    kIOTrackingStopCapture       = 0x00000005,
+    kIOTrackingSetMinCaptureSize = 0x00000006,
+    kIOTrackingLeaks             = 0x00000007,
+    kIOTrackingInvalid           = 0xFFFFFFFE,
+};
+
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
diff --git a/iokit/IOKit/IOKitDiagnosticsUserClient.h b/iokit/IOKit/IOKitDiagnosticsUserClient.h
new file mode 100644 (file)
index 0000000..8637039
--- /dev/null
@@ -0,0 +1,17 @@
+
+#include <IOKit/IOService.h>
+#include <IOKit/IOUserClient.h>
+
+
+class IOKitDiagnosticsClient : public IOUserClient
+{
+    OSDeclareDefaultStructors(IOKitDiagnosticsClient)
+
+public:
+    static  IOUserClient * withTask(task_t owningTask);
+    virtual IOReturn       clientClose(void) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn       setProperties(OSObject * properties) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn       externalMethod(uint32_t selector, IOExternalMethodArguments * args,
+                                          IOExternalMethodDispatch * dispatch, OSObject * target, void * reference) APPLE_KEXT_OVERRIDE;
+};
+
index cae0be4f24615cd57196fc8c37056abd3c27424f..bea0d10f06164c262f65320705d90bd16be39b95 100644 (file)
 #define kIOClientPrivilegeSecureConsoleProcess  "secureprocess"
 #define kIOClientPrivilegeConsoleSession        "consolesession"
 
+
+// Embedded still throttles NVRAM commits via kIONVRAMSyncNowPropertyKey, but
+// some clients still need a stricter NVRAM commit contract. Please use this with
+// care.
+#define kIONVRAMForceSyncNowPropertyKey                "IONVRAM-FORCESYNCNOW-PROPERTY"
+
+
 // clientHasPrivilege security token for kIOClientPrivilegeSecureConsoleProcess
 typedef struct _IOUCProcessToken {
     void *  token;
@@ -71,11 +78,12 @@ typedef struct _IOUCProcessToken {
 
 #define kIOKernelHasSafeSleep        1
 
-#define kIOPlatformSleepActionKey                    "IOPlatformSleepAction"        /* value is OSNumber (priority) */
-#define kIOPlatformWakeActionKey                     "IOPlatformWakeAction"         /* value is OSNumber (priority) */
-#define kIOPlatformQuiesceActionKey                  "IOPlatformQuiesceAction"      /* value is OSNumber (priority) */
-#define kIOPlatformActiveActionKey                   "IOPlatformActiveAction"       /* value is OSNumber (priority) */
-#define kIOPlatformHaltRestartActionKey              "IOPlatformHaltRestartAction"  /* value is OSNumber (priority) */
+#define kIOPlatformSleepActionKey                    "IOPlatformSleepAction"         /* value is OSNumber (priority) */
+#define kIOPlatformWakeActionKey                     "IOPlatformWakeAction"          /* value is OSNumber (priority) */
+#define kIOPlatformQuiesceActionKey                  "IOPlatformQuiesceAction"       /* value is OSNumber (priority) */
+#define kIOPlatformActiveActionKey                   "IOPlatformActiveAction"        /* value is OSNumber (priority) */
+#define kIOPlatformHaltRestartActionKey              "IOPlatformHaltRestartAction"   /* value is OSNumber (priority) */
+#define kIOPlatformPanicActionKey                    "IOPlatformPanicAction"         /* value is OSNumber (priority) */
 
 #define kIOPlatformFunctionHandlerSet                "IOPlatformFunctionHandlerSet"
 #if defined(__i386__) || defined(__x86_64__)
@@ -96,4 +104,11 @@ enum {
     kIOServiceTerminateNeedWillTerminate = 0x00000100,
 };
 
+#define kIOClassNameOverrideKey "IOClassNameOverride"
+
+enum {
+    kIOClassNameOverrideNone = 0x00000001,
+};
+
+
 #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */
index a290e4d911278e1e95a3e01fd42bd91344df716f..7bf9ad8abd9d79c64b8ce29d34a602a5a70bb571 100644 (file)
@@ -150,9 +150,10 @@ void IOFreePageable(void * address, vm_size_t size);
  * Typed memory allocation macros. Both may block.
  */
 #define IONew(type,number) \
-( ((number) != 0 && ((vm_size_t) ((sizeof(type) * (number) / (number))) != sizeof(type)) /* overflow check 21532969 */ \
-? 0 \
-: ((type*)IOMalloc(sizeof(type) * (number)))) )
+( ((number) != 0 && ((vm_size_t) ((sizeof(type) * (number) / (number))) != sizeof(type)) /* overflow check 20847256 */ \
+  ? 0 \
+  : ((type*)IOMalloc(sizeof(type) * (number)))) )
+
 #define IODelete(ptr,type,number) IOFree( (ptr) , sizeof(type) * (number) )
 
 /////////////////////////////////////////////////////////////////////////////
@@ -271,6 +272,14 @@ void IOExitThread(void) __attribute__((deprecated));
 
 void IOSleep(unsigned milliseconds);
 
+/*! @function IOSleepWithLeeway
+    @abstract Sleep the calling thread for a number of milliseconds, with a specified leeway the kernel may use for timer coalescing.
+    @discussion This function blocks the calling thread for at least the number of specified milliseconds, giving time to other processes.  The kernel may also coalesce any timers involved in the delay, using the leeway given as a guideline.
+    @param intervalMilliseconds The integer number of milliseconds to wait.
+    @param leewayMilliseconds The integer number of milliseconds to use as a timer coalescing guideline. */
+
+void IOSleepWithLeeway(unsigned intervalMilliseconds, unsigned leewayMilliseconds);
+
 /*! @function IODelay
     @abstract Spin delay for a number of microseconds.
     @discussion This function spins to delay for at least the number of specified microseconds. Since the CPU is busy spinning no time is made available to other processes; this method of delay should be used only for short periods. Also, the AbsoluteTime based APIs of kern/clock.h provide finer grained and lower cost delays.
@@ -407,6 +416,11 @@ extern mach_timespec_t IOZeroTvalspec;
 
 #endif /* __APPLE_API_OBSOLETE */
 
+#if XNU_KERNEL_PRIVATE
+vm_tag_t
+IOMemoryTag(vm_map_t map);
+#endif
+
 __END_DECLS
 
 #endif /* !__IOKIT_IOLIB_H */
index 784077d9c740109733ece69fb5b1b3b2aec5cf32..fc4f07dbe16b2462ec1f66ba8c66dbbeb38b4c07 100644 (file)
@@ -38,16 +38,7 @@ __BEGIN_DECLS
 // These are C accessors to the system mapper for non-IOKit clients
 ppnum_t IOMapperIOVMAlloc(unsigned pages);
 void IOMapperIOVMFree(ppnum_t addr, unsigned pages);
-
 ppnum_t IOMapperInsertPage(ppnum_t addr, unsigned offset, ppnum_t page);
-void IOMapperInsertPPNPages(ppnum_t addr, unsigned offset,
-                            ppnum_t *pageList, unsigned pageCount);
-void IOMapperInsertUPLPages(ppnum_t addr, unsigned offset,
-                            upl_page_info_t *pageList, unsigned pageCount);
-
-mach_vm_address_t IOMallocPhysical(mach_vm_size_t size, mach_vm_address_t mask);
-
-void IOFreePhysical(mach_vm_address_t address, mach_vm_size_t size);
 
 __END_DECLS
 
@@ -67,6 +58,7 @@ class IOMapper : public IOService
 
     // Give the platform expert access to setMapperRequired();
     friend class IOPlatformExpert;
+    friend class IOMemoryDescriptor;
 
 private:
     enum SystemMapperState {
@@ -76,74 +68,67 @@ private:
         kWaitMask  = 3,
     };
 protected:
-    void *fTable;
-    ppnum_t fTablePhys;
-    IOItemCount fTableSize;
-    OSData *fTableHandle;
+#ifdef XNU_KERNEL_PRIVATE
+    uint64_t   __reservedA[7];
+    uint32_t   __reservedB;
+    uint32_t   fPageSize;
+#else
+    uint64_t __reserved[8];
+#endif
     bool fIsSystem;
 
-
     static void setMapperRequired(bool hasMapper);
     static void waitForSystemMapper();
 
     virtual bool initHardware(IOService *provider) = 0;
 
-    virtual bool allocTable(IOByteCount size);
-
 public:
-    virtual bool start(IOService *provider);
-    virtual void free();
-
-    // Static routines capable of allocating tables that are physically
-    // contiguous in real memory space.
-    static OSData * NewARTTable(IOByteCount size,
-                                void ** virtAddrP, ppnum_t *physAddrP);
-    static void FreeARTTable(OSData *handle, IOByteCount size);
-
+    virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE;
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     // To get access to the system mapper IOMapper::gSystem 
     static IOMapper *gSystem;
 
-    virtual ppnum_t iovmAlloc(IOItemCount pages) = 0;
-    virtual void iovmFree(ppnum_t addr, IOItemCount pages) = 0;
-
-    virtual void iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page) = 0;
-    virtual void iovmInsert(ppnum_t addr, IOItemCount offset,
-                            ppnum_t *pageList, IOItemCount pageCount);
-    virtual void iovmInsert(ppnum_t addr, IOItemCount offset,
-                            upl_page_info_t *pageList, IOItemCount pageCount);
-
     static void checkForSystemMapper()
         { if ((uintptr_t) gSystem & kWaitMask) waitForSystemMapper(); };
 
     static IOMapper * copyMapperForDevice(IOService * device);
     static IOMapper * copyMapperForDeviceWithIndex(IOService * device, unsigned int index);
 
-       
-    // Function will panic if the given address is not found in a valid
-    // iovm mapping.
-    virtual addr64_t mapAddr(IOPhysicalAddress addr) = 0;
-
-    // Get the address mask to or into an address to bypass this mapper
-    virtual bool getBypassMask(addr64_t *maskP) const;
-
-    virtual ppnum_t iovmAllocDMACommand(IODMACommand * command, IOItemCount pageCount);
-    virtual void iovmFreeDMACommand(IODMACommand * command, ppnum_t addr, IOItemCount pageCount);
-    
-    virtual ppnum_t iovmMapMemory(
-                         OSObject                    * memory,   // dma command or iomd
-                         ppnum_t                       offsetPage,
-                         ppnum_t                       pageCount,
-                         uint32_t                      options,
-                         upl_page_info_t             * pageList,
-                         const IODMAMapSpecification * mapSpecification);
-
-    OSMetaClassDeclareReservedUsed(IOMapper, 0);
-    OSMetaClassDeclareReservedUsed(IOMapper, 1);
-    OSMetaClassDeclareReservedUsed(IOMapper, 2);
-    OSMetaClassDeclareReservedUsed(IOMapper, 3);
+    // { subclasses
+
+    virtual uint64_t getPageSize(void) const = 0;
+
+    virtual IOReturn iovmMapMemory(IOMemoryDescriptor          * memory,
+                                  uint64_t                      descriptorOffset,
+                                  uint64_t                      length,
+                                  uint32_t                      mapOptions,
+                                  const IODMAMapSpecification * mapSpecification,
+                                  IODMACommand                * dmaCommand,
+                                  const IODMAMapPageList      * pageList,
+                                  uint64_t                    * mapAddress,
+                                  uint64_t                    * mapLength) = 0;
+
+    virtual IOReturn iovmUnmapMemory(IOMemoryDescriptor * memory, 
+                                    IODMACommand       * dmaCommand, 
+                                    uint64_t             mapAddress,
+                                    uint64_t             mapLength) = 0;
+
+    virtual IOReturn iovmInsert(uint32_t options,
+                               uint64_t mapAddress,
+                               uint64_t offset, 
+                               uint64_t physicalAddress, 
+                               uint64_t length) = 0;
+
+    virtual uint64_t mapToPhysicalAddress(uint64_t mappedAddress) = 0;
+
+    // }
 
 private:
+    OSMetaClassDeclareReservedUnused(IOMapper, 0);
+    OSMetaClassDeclareReservedUnused(IOMapper, 1);
+    OSMetaClassDeclareReservedUnused(IOMapper, 2);
+    OSMetaClassDeclareReservedUnused(IOMapper, 3);
     OSMetaClassDeclareReservedUnused(IOMapper, 4);
     OSMetaClassDeclareReservedUnused(IOMapper, 5);
     OSMetaClassDeclareReservedUnused(IOMapper, 6);
index 242581bcb4a5c6d342d4696879213bfe2bf55116..7b193afc0edb683e7f7e5e6b681f75eb13a78ce1 100644 (file)
 #include <IOKit/IOTypes.h>
 #include <IOKit/IOLocks.h>
 #include <libkern/c++/OSContainers.h>
+#ifdef XNU_KERNEL_PRIVATE
+#include <IOKit/IOKitDebug.h>
+#endif
 
 #include <mach/memory_object_types.h>
 
 class IOMemoryMap;
 class IOMapper;
 class IOService;
+class IODMACommand;
 
 /*
  * Direction of transfer, with respect to the described memory.
@@ -162,15 +166,72 @@ struct IODMAMapSpecification
        uint32_t    resvB[4];
 };
 
+struct IODMAMapPageList
+{
+    uint32_t                pageOffset;
+    uint32_t                pageListCount;
+    const upl_page_info_t * pageList;
+};
+
+// mapOptions for iovmMapMemory
 enum
 {
+    kIODMAMapReadAccess           = 0x00000001,
     kIODMAMapWriteAccess          = 0x00000002,
     kIODMAMapPhysicallyContiguous = 0x00000010,
     kIODMAMapDeviceMemory         = 0x00000020,
     kIODMAMapPagingPath           = 0x00000040,
     kIODMAMapIdentityMap          = 0x00000080,
+
+    kIODMAMapPageListFullyOccupied = 0x00000100,
+    kIODMAMapFixedAddress          = 0x00000200,
+};
+
+#ifdef KERNEL_PRIVATE
+
+// Used for dmaCommandOperation communications for IODMACommand and mappers
+
+enum  {
+    kIOMDWalkSegments             = 0x01000000,
+    kIOMDFirstSegment            = 1 | kIOMDWalkSegments,
+    kIOMDGetCharacteristics       = 0x02000000,
+    kIOMDGetCharacteristicsMapped = 1 | kIOMDGetCharacteristics,
+    kIOMDDMAActive                = 0x03000000,
+    kIOMDSetDMAActive             = 1 | kIOMDDMAActive,
+    kIOMDSetDMAInactive           = kIOMDDMAActive,
+    kIOMDAddDMAMapSpec            = 0x04000000,
+    kIOMDDMAMap                   = 0x05000000,
+    kIOMDDMACommandOperationMask  = 0xFF000000,
+};
+struct IOMDDMACharacteristics {
+    UInt64 fLength;
+    UInt32 fSGCount;
+    UInt32 fPages;
+    UInt32 fPageAlign;
+    ppnum_t fHighestPage;
+    IODirection fDirection;
+    UInt8 fIsPrepared;
+};
+
+struct IOMDDMAMapArgs {
+    IOMapper            * fMapper;
+    IODMACommand        * fCommand;
+    IODMAMapSpecification fMapSpec;
+    uint64_t              fOffset;
+    uint64_t              fLength;
+    uint64_t              fAlloc;
+    uint64_t              fAllocLength;
+    uint8_t               fMapContig;
 };
 
+struct IOMDDMAWalkSegmentArgs {
+    UInt64 fOffset;                    // Input/Output offset
+    UInt64 fIOVMAddr, fLength;         // Output variables
+    UInt8 fMapped;                     // Input Variable, Require mapped IOVMA
+};
+typedef UInt8 IOMDDMAWalkSegmentState[128];
+
+#endif /* KERNEL_PRIVATE */
 
 enum 
 {
@@ -191,6 +252,7 @@ struct IOMemoryReference;
 class IOMemoryDescriptor : public OSObject
 {
     friend class IOMemoryMap;
+    friend class IOMultiMemoryDescriptor;
 
     OSDeclareDefaultStructors(IOMemoryDescriptor);
 
@@ -315,11 +377,12 @@ typedef IOOptionBits DMACommandOps;
     IOMemoryDescriptorReserved * getKernelReserved( void );
     IOReturn dmaMap(
        IOMapper                    * mapper,
+       IODMACommand                * command,
        const IODMAMapSpecification * mapSpec,
        uint64_t                      offset,
        uint64_t                      length,
-       uint64_t                    * address,
-       ppnum_t                     * mapPages);
+       uint64_t                    * mapAddress,
+       uint64_t                    * mapLength);
 #endif
        
 private:
@@ -351,7 +414,7 @@ private:
     OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 15);
 
 protected:
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 public:
     static void initialize( void );
 
@@ -720,11 +783,14 @@ public:
     ipc_port_t          fRedirEntry;
     IOMemoryDescriptor * fOwner;
     uint8_t             fUserClientUnmap;
+#if IOTRACKING
+    IOTracking           fTracking;
+#endif
 #endif /* XNU_KERNEL_PRIVATE */
 
 protected:
-    virtual void taggedRelease(const void *tag = 0) const;
-    virtual void free();
+    virtual void taggedRelease(const void *tag = 0) const APPLE_KEXT_OVERRIDE;
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 public:
 /*! @function getVirtualAddress
@@ -922,22 +988,23 @@ protected:
     bool               _initialized;      /* has superclass been initialized? */
 
 public:
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
-    virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const;
+    virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const APPLE_KEXT_OVERRIDE;
 
-    virtual uint64_t getPreparationID( void );
+    virtual uint64_t getPreparationID( void ) APPLE_KEXT_OVERRIDE;
 
 #ifdef XNU_KERNEL_PRIVATE
     // Internal APIs may be made virtual at some time in the future.
     IOReturn wireVirtual(IODirection forDirection);
     IOReturn dmaMap(
        IOMapper                    * mapper,
+       IODMACommand                * command,
        const IODMAMapSpecification * mapSpec,
        uint64_t                      offset,
        uint64_t                      length,
-       uint64_t                    * address,
-       ppnum_t                     * mapPages);
+       uint64_t                    * mapAddress,
+       uint64_t                    * mapLength);
     bool initMemoryEntries(size_t size, IOMapper * mapper);
 
     IOMemoryReference * memoryReferenceAlloc(uint32_t capacity, 
@@ -994,76 +1061,76 @@ public:
                                  UInt32                offset,
                                  task_t                task,
                                  IOOptionBits  options,
-                                 IOMapper *    mapper = kIOMapperSystem);
+                                 IOMapper *    mapper = kIOMapperSystem) APPLE_KEXT_OVERRIDE;
 
 #ifndef __LP64__
     // Secondary initialisers
     virtual bool initWithAddress(void *                address,
                                  IOByteCount   withLength,
-                                 IODirection   withDirection) APPLE_KEXT_DEPRECATED;
+                                 IODirection   withDirection) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual bool initWithAddress(IOVirtualAddress address,
                                  IOByteCount    withLength,
                                  IODirection   withDirection,
-                                 task_t                withTask) APPLE_KEXT_DEPRECATED;
+                                 task_t                withTask) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual bool initWithPhysicalAddress(
                                 IOPhysicalAddress      address,
                                 IOByteCount            withLength,
-                                IODirection            withDirection ) APPLE_KEXT_DEPRECATED;
+                                IODirection            withDirection ) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual bool initWithRanges(        IOVirtualRange * ranges,
                                         UInt32           withCount,
                                         IODirection      withDirection,
                                         task_t           withTask,
-                                        bool             asReference = false) APPLE_KEXT_DEPRECATED;
+                                        bool             asReference = false) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual bool initWithPhysicalRanges(IOPhysicalRange * ranges,
                                         UInt32           withCount,
                                         IODirection      withDirection,
-                                        bool             asReference = false) APPLE_KEXT_DEPRECATED;
+                                        bool             asReference = false) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual addr64_t getPhysicalSegment64( IOByteCount offset,
-                                            IOByteCount * length ) APPLE_KEXT_DEPRECATED;
+                                            IOByteCount * length ) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset,
-                                                IOByteCount * length);
+                                                IOByteCount * length) APPLE_KEXT_OVERRIDE;
 
     virtual IOPhysicalAddress getSourceSegment(IOByteCount offset,
-                                               IOByteCount * length) APPLE_KEXT_DEPRECATED;
+                                               IOByteCount * length) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 
     virtual void * getVirtualSegment(IOByteCount offset,
-                                       IOByteCount * length) APPLE_KEXT_DEPRECATED;
+                                       IOByteCount * length) APPLE_KEXT_OVERRIDE APPLE_KEXT_DEPRECATED;
 #endif /* !__LP64__ */
 
     virtual IOReturn setPurgeable( IOOptionBits newState,
-                                    IOOptionBits * oldState );
+                                    IOOptionBits * oldState ) APPLE_KEXT_OVERRIDE;
     
     virtual addr64_t getPhysicalSegment( IOByteCount   offset,
                                          IOByteCount * length,
 #ifdef __LP64__
-                                         IOOptionBits  options = 0 );
+                                         IOOptionBits  options = 0 ) APPLE_KEXT_OVERRIDE;
 #else /* !__LP64__ */
-                                         IOOptionBits  options );
+                                         IOOptionBits  options ) APPLE_KEXT_OVERRIDE;
 #endif /* !__LP64__ */
 
-    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 
-    virtual IOReturn complete(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 
     virtual IOReturn doMap(
        vm_map_t                addressMap,
        IOVirtualAddress *      atAddress,
        IOOptionBits            options,
        IOByteCount             sourceOffset = 0,
-       IOByteCount             length = 0 );
+       IOByteCount             length = 0 ) APPLE_KEXT_OVERRIDE;
 
     virtual IOReturn doUnmap(
        vm_map_t                addressMap,
        IOVirtualAddress        logical,
-       IOByteCount             length );
+       IOByteCount             length ) APPLE_KEXT_OVERRIDE;
 
-    virtual bool serialize(OSSerialize *s) const;
+    virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
 
     // Factory method for cloning a persistent IOMD, see IOMemoryDescriptor
     static IOMemoryDescriptor *
index 42b19a46886b52d7197c28f39fe17fbe6217022f..1a5883abd8fa0ef0d0047b689f20fe16730ce8c0 100644 (file)
@@ -45,7 +45,7 @@ protected:
     UInt32                _descriptorsCount;
     bool                  _descriptorsIsAllocated;
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -88,7 +88,7 @@ public:
 
     virtual addr64_t getPhysicalSegment( IOByteCount   offset,
                                          IOByteCount * length,
-                                         IOOptionBits  options = 0 );
+                                         IOOptionBits  options = 0 ) APPLE_KEXT_OVERRIDE;
 
 /*! @function prepare
     @abstract Prepare the memory for an I/O transfer.
@@ -96,7 +96,7 @@ public:
     @param forDirection The direction of the I/O just completed, or kIODirectionNone for the direction specified by the memory descriptor.
     @result An IOReturn code. */
 
-    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 
 /*! @function complete
     @abstract Complete processing of the memory after an I/O transfer finishes.
@@ -104,7 +104,28 @@ public:
     @param forDirection The direction of the I/O just completed, or kIODirectionNone for the direction specified by the memory descriptor.
     @result An IOReturn code. */
 
-    virtual IOReturn complete(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
+
+    virtual IOReturn setPurgeable(IOOptionBits newState, IOOptionBits * oldState) APPLE_KEXT_OVERRIDE;
+
+/*! @function getPageCounts
+    @abstract Retrieve the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor.
+    @discussion This method returns the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor.
+    @param residentPageCount - If non-null, a pointer to a byte count that will return the number of resident pages encompassed by this IOMemoryDescriptor.
+    @param dirtyPageCount - If non-null, a pointer to a byte count that will return the number of dirty pages encompassed by this IOMemoryDescriptor.
+    @result An IOReturn code. */
+
+    IOReturn getPageCounts(IOByteCount * residentPageCount,
+                           IOByteCount * dirtyPageCount);
+
+#define IOMULTIMEMORYDESCRIPTOR_SUPPORTS_GETPAGECOUNTS 1
+
+private:
+    virtual IOReturn doMap(vm_map_t           addressMap,
+                           IOVirtualAddress * atAddress,
+                           IOOptionBits       options,
+                           IOByteCount        sourceOffset = 0,
+                           IOByteCount        length = 0 ) APPLE_KEXT_OVERRIDE;
 };
 
 #endif /* !_IOMULTIMEMORYDESCRIPTOR_H */
index 37d4ef4d3d1b6dad129c7e5ef2afac24e556e950..31d5e09671d5065e8d58f46261bf5e959447c4ac 100644 (file)
@@ -74,11 +74,11 @@ private:
   IONVRAMController *_nvramController;
   const OSSymbol    *_registryPropertiesKey;
   UInt8             *_nvramImage;
-  bool              _nvramImageDirty;
+  __unused bool     _nvramImageDirty;
   UInt32            _ofPartitionOffset;
   UInt32            _ofPartitionSize;
   UInt8             *_ofImage;
-  bool              _ofImageDirty;
+  __unused bool     _ofImageDirty;
   OSDictionary      *_ofDict;
   OSDictionary      *_nvramPartitionOffsets;
   OSDictionary      *_nvramPartitionLengths;
@@ -138,20 +138,20 @@ private:
   void initProxyData(void);
   
 public:
-  virtual bool init(IORegistryEntry *old, const IORegistryPlane *plane);
+  virtual bool init(IORegistryEntry *old, const IORegistryPlane *plane) APPLE_KEXT_OVERRIDE;
   
   virtual void registerNVRAMController(IONVRAMController *nvram);
   
   virtual void sync(void);
   
-  virtual bool serializeProperties(OSSerialize *s) const;
-  virtual OSObject *copyProperty(const OSSymbol *aKey) const;
-  virtual OSObject *copyProperty(const char *aKey) const;
-  virtual OSObject *getProperty(const OSSymbol *aKey) const;
-  virtual OSObject *getProperty(const char *aKey) const;
-  virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject);
-  virtual void removeProperty(const OSSymbol *aKey);
-  virtual IOReturn setProperties(OSObject *properties);
+  virtual bool serializeProperties(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
+  virtual OSObject *copyProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE;
+  virtual OSObject *copyProperty(const char *aKey) const APPLE_KEXT_OVERRIDE;
+  virtual OSObject *getProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE;
+  virtual OSObject *getProperty(const char *aKey) const APPLE_KEXT_OVERRIDE;
+  virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject) APPLE_KEXT_OVERRIDE;
+  virtual void removeProperty(const OSSymbol *aKey) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE;
   
   virtual IOReturn readXPRAM(IOByteCount offset, UInt8 *buffer,
                             IOByteCount length);
@@ -177,6 +177,7 @@ public:
   
   virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length);
   virtual bool safeToSync(void);
+  void syncInternal(bool rateLimit);
 };
 
 #endif /* __cplusplus */
index c60affed9f9223f730ba2257f20cfa5100a22864..52e1c366baf63f747f31a4136c1015075ca37d57 100644 (file)
@@ -73,6 +73,7 @@ extern void PESetUTCTimeOfDay( clock_sec_t secs, clock_usec_t usecs );
 
 /* unless it's a "well-known" property, these will read/write out the value as raw data */
 
+extern boolean_t PEWriteNVRAMBooleanProperty(const char *symbol, boolean_t value);
 extern boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, const unsigned int len);
 
 extern boolean_t PEReadNVRAMProperty(const char *symbol, void *value, unsigned int *len);
@@ -92,6 +93,7 @@ extern const OSSymbol *               gIOPlatformWakeActionKey;
 extern const OSSymbol *                gIOPlatformQuiesceActionKey;
 extern const OSSymbol *                gIOPlatformActiveActionKey;
 extern const OSSymbol *                gIOPlatformHaltRestartActionKey;
+extern const OSSymbol *                gIOPlatformPanicActionKey;
 
 class IORangeAllocator;
 class IONVRAMController;
@@ -130,8 +132,8 @@ protected:
     virtual void PMInstantiatePowerDomains ( void );
 
 public:
-    virtual bool attach( IOService * provider );
-    virtual bool start( IOService * provider );
+    virtual bool attach( IOService * provider ) APPLE_KEXT_OVERRIDE;
+    virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE;
     virtual bool configure( IOService * provider );
     virtual IOService * createNub( OSDictionary * from );
 
@@ -165,7 +167,7 @@ public:
     virtual IOReturn callPlatformFunction(const OSSymbol *functionName,
                                          bool waitForFunction,
                                          void *param1, void *param2,
-                                         void *param3, void *param4);
+                                         void *param3, void *param4) APPLE_KEXT_OVERRIDE;
 
     virtual IORangeAllocator * getPhysicalRangeAllocator(void);
 
@@ -217,8 +219,8 @@ private:
 
 public:
     virtual IOService * probe( IOService *     provider,
-                               SInt32    *     score );
-    virtual bool configure( IOService * provider );
+                               SInt32    *     score ) APPLE_KEXT_OVERRIDE;
+    virtual bool configure( IOService * provider ) APPLE_KEXT_OVERRIDE;
 
     virtual void processTopLevel( IORegistryEntry * root );
     virtual const char * deleteList( void ) = 0;
@@ -227,16 +229,16 @@ public:
     virtual bool createNubs( IOService * parent, OSIterator * iter );
 
     virtual bool compareNubName( const IOService * nub, OSString * name,
-                                OSString ** matched = 0 ) const;
+                                OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE;
 
-    virtual IOReturn getNubResources( IOService * nub );
+    virtual IOReturn getNubResources( IOService * nub ) APPLE_KEXT_OVERRIDE;
 
-    virtual bool getModelName( char * name, int maxLength );
-    virtual bool getMachineName( char * name, int maxLength );
+    virtual bool getModelName( char * name, int maxLength ) APPLE_KEXT_OVERRIDE;
+    virtual bool getMachineName( char * name, int maxLength ) APPLE_KEXT_OVERRIDE;
     
-    virtual void registerNVRAMController( IONVRAMController * nvram );
+    virtual void registerNVRAMController( IONVRAMController * nvram ) APPLE_KEXT_OVERRIDE;
 
-    virtual int haltRestart(unsigned int type);
+    virtual int haltRestart(unsigned int type) APPLE_KEXT_OVERRIDE;
 
     /* virtual */ IOReturn readXPRAM(IOByteCount offset, UInt8 * buffer,
                                     IOByteCount length);
@@ -265,8 +267,8 @@ public:
                                               IOByteCount offset, UInt8 * buffer,
                                               IOByteCount length);
 
-    virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length);
-    virtual OSString* createSystemSerialNumberString(OSData* myProperty);
+    virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length) APPLE_KEXT_OVERRIDE;
+    virtual OSString* createSystemSerialNumberString(OSData* myProperty) APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(IODTPlatformExpert,  0);
     OSMetaClassDeclareReservedUnused(IODTPlatformExpert,  1);
@@ -295,12 +297,17 @@ private:
 public:
     virtual bool initWithArgs( void * p1, void * p2,
                                        void * p3, void *p4 );
-    virtual bool compareName( OSString * name, OSString ** matched = 0 ) const;
+    virtual bool compareName( OSString * name, OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE;
+
+    virtual IOWorkLoop *getWorkLoop() const APPLE_KEXT_OVERRIDE;
+    virtual IOReturn setProperties( OSObject * properties ) APPLE_KEXT_OVERRIDE;
+
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
-    virtual IOWorkLoop *getWorkLoop() const;
-    virtual IOReturn setProperties( OSObject * properties );
+    virtual IOReturn newUserClient( task_t owningTask, void * securityID,
+                                    UInt32 type,  OSDictionary * properties,
+                                    IOUserClient ** handler) APPLE_KEXT_OVERRIDE;
 
-    virtual void free();
 
     OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice,  0);
     OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice,  1);
@@ -320,9 +327,9 @@ class IOPlatformDevice : public IOService
     ExpansionData *reserved;
 
 public:
-    virtual bool compareName( OSString * name, OSString ** matched = 0 ) const;
-    virtual IOService * matchLocation( IOService * client );
-    virtual IOReturn getResources( void );
+    virtual bool compareName( OSString * name, OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE;
+    virtual IOService * matchLocation( IOService * client ) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn getResources( void ) APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(IOPlatformDevice,  0);
     OSMetaClassDeclareReservedUnused(IOPlatformDevice,  1);
index 740ab11c57f1efbfd019931ec656ab2b5322e2fb..584484eabd7c521b3fe42574cf692603ce99ba13 100644 (file)
 #ifndef _IOPOLLEDINTERFACE_H_
 #define _IOPOLLEDINTERFACE_H_
 
-#include <libkern/c++/OSObject.h>
-#include <IOKit/IOMemoryDescriptor.h>
-
-#define kIOPolledInterfaceSupportKey "IOPolledInterface"
-#define kIOPolledInterfaceActiveKey  "IOPolledInterfaceActive"
-
 enum
 {
     kIOPolledPreflightState   = 1,
     kIOPolledBeforeSleepState = 2,
     kIOPolledAfterSleepState  = 3,
-    kIOPolledPostflightState  = 4
+    kIOPolledPostflightState  = 4,
+
+    kIOPolledPreflightCoreDumpState   = 5,
 };
 
+#if defined(__cplusplus)
+
+#include <libkern/c++/OSObject.h>
+#include <IOKit/IOMemoryDescriptor.h>
+
+#define kIOPolledInterfaceSupportKey "IOPolledInterface"
+#define kIOPolledInterfaceActiveKey  "IOPolledInterfaceActive"
+#define kIOPolledInterfaceStackKey   "IOPolledInterfaceStack"
+
 enum
 {
     kIOPolledWrite = 1,
@@ -82,8 +87,6 @@ public:
 
     virtual IOReturn checkForWork(void) = 0;
 
-    static IOReturn checkAllForWork(void);
-
     OSMetaClassDeclareReservedUnused(IOPolledInterface, 0);
     OSMetaClassDeclareReservedUnused(IOPolledInterface, 1);
     OSMetaClassDeclareReservedUnused(IOPolledInterface, 2);
@@ -102,4 +105,152 @@ public:
     OSMetaClassDeclareReservedUnused(IOPolledInterface, 15);
 };
 
+#endif /* defined(__cplusplus) */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+#include <libkern/crypto/aes.h>
+#include <IOKit/IOTypes.h>
+#include <IOKit/IOHibernatePrivate.h>
+
+enum
+{
+    kIOPolledFileSSD = 0x00000001
+};
+
+#if !defined(__cplusplus)
+typedef struct IORegistryEntry IORegistryEntry;
+typedef struct OSData OSData;
+typedef struct OSArray OSArray;
+typedef struct IOMemoryDescriptor IOMemoryDescriptor;
+typedef struct IOPolledFilePollers IOPolledFilePollers;
+#else
+class IOPolledFilePollers;
+#endif
+
+struct IOPolledFileIOVars
+{
+    IOPolledFilePollers              *  pollers;
+    struct kern_direct_file_io_ref_t * fileRef;
+    OSData *                           fileExtents;
+    uint64_t                           block0;
+    IOByteCount                                blockSize;
+    uint64_t                           maxiobytes;
+    IOByteCount                        bufferLimit;
+    uint8_t *                                  buffer;
+    IOByteCount                        bufferSize;
+    IOByteCount                        bufferOffset;
+    IOByteCount                        bufferHalf;
+    IOByteCount                                extentRemaining;
+    IOByteCount                                lastRead;
+    IOByteCount                                readEnd;
+    uint32_t                            flags;
+    uint64_t                           fileSize;
+    uint64_t                           position;
+    uint64_t                           extentPosition;
+    uint64_t                           encryptStart;
+    uint64_t                           encryptEnd;
+    uint64_t                            cryptBytes;
+    AbsoluteTime                        cryptTime;
+    IOPolledFileExtent *               extentMap;
+    IOPolledFileExtent *               currentExtent;
+    bool                               allocated;
+};
+
+typedef struct IOPolledFileIOVars IOPolledFileIOVars;
+
+struct IOPolledFileCryptVars
+{
+    uint8_t aes_iv[AES_BLOCK_SIZE];
+    aes_ctx ctx;
+};
+typedef struct IOPolledFileCryptVars IOPolledFileCryptVars;
+
+#if defined(__cplusplus)
+
+IOReturn IOPolledFileOpen(const char * filename, 
+                         uint64_t setFileSize, uint64_t fsFreeSize,
+                         void * write_file_addr, size_t write_file_len,
+                         IOPolledFileIOVars ** fileVars,
+                         OSData ** imagePath,
+                         uint8_t * volumeCryptKey, size_t keySize);
+
+IOReturn IOPolledFileClose(IOPolledFileIOVars ** pVars,
+                          off_t write_offset, void * addr, size_t write_length,
+                          off_t discard_offset, off_t discard_end);
+
+IOReturn IOPolledFilePollersSetup(IOPolledFileIOVars * vars, uint32_t openState);
+
+IOMemoryDescriptor * IOPolledFileGetIOBuffer(IOPolledFileIOVars * vars);
+
+#endif /* defined(__cplusplus) */
+
+#if defined(__cplusplus)
+#define __C    "C"
+#else
+#define __C
+#endif
+
+extern __C IOReturn IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position);
+
+extern __C IOReturn IOPolledFileWrite(IOPolledFileIOVars * vars,
+                          const uint8_t * bytes, IOByteCount size,
+                          IOPolledFileCryptVars * cryptvars);
+extern __C IOReturn IOPolledFileRead(IOPolledFileIOVars * vars,
+                         uint8_t * bytes, IOByteCount size,
+                         IOPolledFileCryptVars * cryptvars);
+
+extern __C IOReturn IOPolledFilePollersOpen(IOPolledFileIOVars * vars, uint32_t state, bool abortable);
+
+extern __C IOReturn IOPolledFilePollersClose(IOPolledFileIOVars * vars, uint32_t state);
+
+extern __C IOPolledFileIOVars * gCoreFileVars;
+
+#ifdef _SYS_CONF_H_
+
+__BEGIN_DECLS
+
+typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size);
+
+struct kern_direct_file_io_ref_t *
+kern_open_file_for_direct_io(const char * name, boolean_t create_file,
+                            kern_get_file_extents_callback_t callback, 
+                            void * callback_ref,
+                             off_t set_file_size,
+                             off_t fs_free_size,
+                             off_t write_file_offset,
+                             void * write_file_addr,
+                             size_t write_file_len,
+                            dev_t * partition_device_result,
+                            dev_t * image_device_result,
+                             uint64_t * partitionbase_result,
+                             uint64_t * maxiocount_result,
+                             uint32_t * oflags);
+void
+kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
+                             off_t write_offset, void * addr, size_t write_length,
+                             off_t discard_offset, off_t discard_end);
+int
+kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag);
+int
+kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag);
+
+struct mount *
+kern_file_mount(struct kern_direct_file_io_ref_t * ref);
+
+enum 
+{
+    kIOPolledFileMountChangeMount = 0x00000101,
+    kIOPolledFileMountChangeUnmount = 0x00000102,
+    kIOPolledFileMountChangeWillResize = 0x00000201,
+    kIOPolledFileMountChangeDidResize = 0x00000202,
+};
+extern void IOPolledFileMountChange(struct mount * mp, uint32_t op);
+
+__END_DECLS
+
+#endif /* _SYS_CONF_H_ */
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 #endif /* _IOPOLLEDINTERFACE_H_ */
index d81bbefc2d42249da44189b6caf4d2c06a641078..290194bb4da3ebe1ac7bcd0d5702bb735b5f9701 100644 (file)
@@ -98,8 +98,8 @@ public:
                                        UInt32 capacity = 0,
                                        IOOptionBits options = 0 );
 
-    virtual void free();
-    virtual bool serialize(OSSerialize *s) const;
+    virtual void free() APPLE_KEXT_OVERRIDE;
+    virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
 
 /*! @function getFragmentCount
     @abstract Accessor to return the number of free fragments in the range.
index dd34e176484d6a249cab17b863dfaab323a85738..906baaa9f15e9876e71569a42ca17406d0444eef 100644 (file)
@@ -261,7 +261,7 @@ public:
     @abstract Standard free method for all IORegistryEntry subclasses.
     @discussion This method will release any resources of the entry, in particular its property table. Note that the registry entry must always be detached from the registry before free may be called, and subclasses (namely IOService) will have additional protocols for removing registry entries. free should never need be called directly. */
 
-    virtual void free( void );
+    virtual void free( void ) APPLE_KEXT_OVERRIDE;
 
 /*! @function setPropertyTable
     @abstract Replace a registry entry's property table.
@@ -854,7 +854,7 @@ private:
     const IORegistryPlane *    plane;
     IOOptionBits               options;
 
-    virtual void free( void );
+    virtual void free( void ) APPLE_KEXT_OVERRIDE;
 
 public:
 /*! @function iterateOver
@@ -884,7 +884,7 @@ public:
     @discussion This method calls either getNextObjectFlat or getNextObjectRecursive depending on the options the iterator was created with. This implements the OSIterator defined getNextObject method. The object returned is retained while the iterator is pointing at it (its the current entry), or recursing into it. The caller should not release it.
     @result The next registry entry in the iteration (the current entry), or zero if the iteration has finished at this level of recursion. The entry returned is retained while the iterator is pointing at it (its the current entry), or recursing into it. The caller should not release it. */
 
-    virtual IORegistryEntry * getNextObject( void );
+    virtual IORegistryEntry * getNextObject( void ) APPLE_KEXT_OVERRIDE;
 
 /*! @function getNextObjectFlat
     @abstract Return the next object in the registry iteration, ignoring the kIORegistryIterateRecursively option.
@@ -931,18 +931,18 @@ public:
     @abstract Exits all levels of recursion, restoring the iterator to its state at creation.
     @discussion This method exits all levels of recursion, and restores the iterator to its state at creation. */
 
-    virtual void reset( void );
+    virtual void reset( void ) APPLE_KEXT_OVERRIDE;
 
 /*! @function isValid
     @abstract Checks that no registry changes have invalidated the iteration.
     @discussion If a registry iteration is invalidated by changes to the registry, it will be made invalid, the currentEntry will be considered zero, and further calls to getNextObject et al. will return zero. The iterator should be reset to restart the iteration when this happens.
     @result false if the iterator has been invalidated by changes to the registry, true otherwise. */
 
-    virtual bool isValid( void );
+    virtual bool isValid( void ) APPLE_KEXT_OVERRIDE;
 
 /*! @function iterateAll
     @abstract Iterates all entries (with getNextObject) and returns a set of all returned entries.
-    @discussion This method will reset, then iterate all entries in the iteration (with getNextObject) until successful (ie. the iterator is valid at the end of the iteration).
+    @discussion This method will reset, then iterate all entries in the iteration (with getNextObject).
     @result A set of entries returned by the iteration. The caller should release the set when it has finished with it. Zero is returned on a resource failure. */
 
     virtual OSOrderedSet * iterateAll( void );
index af1a1c27a53a807f33c77964ce44efee0daa55a3..b8c6a423987fbf6a42405e5bb78277ace74abc5a 100644 (file)
@@ -556,6 +556,148 @@ do {  \
             ->simple_values[(idx) % IOR_VALUES_PER_ELEMENT])
 
 
+/* ----- Histogram Reporting (HistogramReport) ----- */
+
+// Internal struct for HistogramReport
+typedef struct {
+    int             bucketWidth;
+   IOReportElement elem[]; // Array of elements
+} IOHistReportInfo;
+
+/*
+ * Determine the size required for a HistogramReport buffer.
+ *
+ * int nbuckets - number of buckets in the histogram
+ */
+#define HISTREPORT_BUFSIZE(nbuckets)  \
+    (sizeof(IOHistReportInfo) + ((nbuckets) * sizeof(IOReportElement)))
+
+/*
+ * Initialize a HistogramReport buffer. Supports only linear scale histogram.
+ *
+ *                   int nbuckets - number of buckets data is combined into
+ *           uint32_t bucketWidth - size of each bucket
+ *                  void* buffer - ptr to HISTREPORT_BUFSIZE(nbuckets) bytes
+ *                size_t bufSize - sanity check of buffer's size
+ *           uint64_t providerID - registry Entry ID of the reporting service
+ *            uint64_t channelID - ID of this channel, see IOREPORT_MAKEID()
+ * IOReportCategories categories - categories of this channel
+ *
+ * If the buffer is not of sufficient size, the macro invokes IOREPORT_ABORT.
+ * If that returns, the buffer is filled with 0xbadcafe.
+ */
+#define HISTREPORT_INIT(nbuckets, bktSize, buf, bufSize, providerID, channelID, cats) \
+do {  \
+    IOHistReportInfo   *__info = (IOHistReportInfo *)(buf);  \
+    IOReportElement         *__elem;  \
+    IOHistogramReportValues *__rep;  \
+    if ((bufSize) >= HISTREPORT_BUFSIZE(nbuckets)) {  \
+        __info->bucketWidth = (bktSize);  \
+        for (unsigned __no = 0; __no < (nbuckets); __no++) {  \
+            __elem =  &(__info->elem[__no]);  \
+            __rep = (IOHistogramReportValues *) &(__elem->values);  \
+            __elem->channel_id = (channelID);  \
+            __elem->provider_id = (providerID);  \
+            __elem->channel_type.report_format = kIOReportFormatHistogram;  \
+            __elem->channel_type.reserved = 0;  \
+            __elem->channel_type.categories = (cats);  \
+            __elem->channel_type.nelements = (nbuckets);  \
+            __elem->channel_type.element_idx = __no;  \
+            __elem->timestamp = 0;  \
+            bzero(__rep, sizeof(IOHistogramReportValues)); \
+        }  \
+    }  \
+    else {  \
+        IOREPORT_ABORT("bufSize is smaller than the required size\n");  \
+        __POLLUTE_BUF((buf), (bufSize));  \
+    }  \
+} while (0)
+
+/*
+ * Update histogram with a new value.
+ *
+ *
+ *      void* hist_buf - pointer to memory initialized by HISTREPORT_INIT()
+ *        int64_t value - new value to add to the histogram
+ */
+#define HISTREPORT_TALLYVALUE(hist_buf, value) \
+do {  \
+    IOHistReportInfo   *__info = (IOHistReportInfo *)(hist_buf);  \
+    IOReportElement         *__elem;  \
+    IOHistogramReportValues *__rep;  \
+    for (unsigned __no = 0; __no < __info->elem[0].channel_type.nelements; __no++) {  \
+        if ((value) <= __info->bucketWidth * (__no+1)) {  \
+            __elem =  &(__info->elem[__no]);  \
+            __rep = (IOHistogramReportValues *) &(__elem->values);  \
+            if (__rep->bucket_hits == 0) {  \
+                __rep->bucket_min = __rep->bucket_max = (value);  \
+            }  \
+            else if ((value) < __rep->bucket_min) {  \
+                __rep->bucket_min = (value);  \
+            }  \
+            else if ((value) > __rep->bucket_max) {  \
+                __rep->bucket_max = (value);  \
+            }  \
+            __rep->bucket_sum += (value);  \
+            __rep->bucket_hits++;  \
+            break;  \
+        }  \
+    }  \
+} while (0)
+
+/*
+ * Prepare a HistogramReport for
+ * IOService::updateReport(kIOReportCopyChannelData...)
+ *
+ *      void* array_buf - ptr to memory initialized by HISTREPORT_INIT()
+ *        void* ptr2cpy - filled in with pointer to buffer to be copied out
+ *      size_t size2cpy - filled in with the size of the buffer to copy out
+ */
+
+#define HISTREPORT_UPDATEPREP(hist_buf, ptr2cpy, size2cpy) \
+do {  \
+    IOHistReportInfo   *__info = (IOHistReportInfo *)(hist_buf);  \
+    (size2cpy) = __info->elem[0].channel_type.nelements * sizeof(IOReportElement);  \
+    (ptr2cpy) =  (void *) &__info->elem[0];  \
+} while(0)
+
+
+/*
+ * Update the result field received as a parameter for kIOReportGetDimensions &
+ * kIOReportCopyChannelData actions.
+ *
+ *                void* array_buf - memory initialized by HISTREPORT_INIT()
+ * IOReportConfigureAction action - configure/updateReport() 'action'
+ *                   void* result - configure/updateReport() 'result'
+ */
+
+#define HISTREPORT_UPDATERES(hist_buf, action, result) \
+do {  \
+    IOHistReportInfo   *__info = (IOHistReportInfo *)(hist_buf);  \
+    int *__nElements = (int *)(result);  \
+    if (((action) == kIOReportGetDimensions) || ((action) == kIOReportCopyChannelData)) {  \
+        *__nElements += __info->elem[0].channel_type.nelements;  \
+    }  \
+} while (0)
+
+/*
+ * Get the 64-bit channel ID of a HistogramReport.
+ *
+ * void* hist_buf - ptr to memory initialized by HISTREPORT_INIT()
+ */
+#define HISTREPORT_GETCHID(hist_buf)  \
+    (((IOHistReportInfo *)(hist_buf))->elem[0].channel_id)
+
+/*
+ * Get the IOReportChannelType of a HistogramReport.
+ *
+ * void* hist_buf - ptr to memory initialized by HISTREPORT_INIT()
+ */
+#define HISTREPORT_GETCHTYPE(hist_buf)  \
+    (*(uint64_t*)&(((IOHistReportInfo *)(hist_buf))->elem[0].channel_type))
+
+
+
 /* generic utilities */
 
     #define __POLLUTE_BUF(buf, bufSize)  \
index d4071b9aade1d3724bf1b8bd4cb02d70d2c1021b..83d05ce0a7e798d7f786cf0c38a230e1ae8c5854 100644 (file)
@@ -61,13 +61,16 @@ typedef     kern_return_t           IOReturn;
 #define sub_iokit_powermanagement         err_sub(13)
 #define sub_iokit_hidsystem             err_sub(14)
 #define sub_iokit_scsi                    err_sub(16)
+#define sub_iokit_usbaudio                err_sub(17)
 //#define sub_iokit_pccard                err_sub(21)
 #ifdef PRIVATE
 #define sub_iokit_nvme                    err_sub(28)
 #endif
 #define sub_iokit_thunderbolt             err_sub(29)
-    
+#define sub_iokit_platform                               err_sub(0x2A)
 #define sub_iokit_audio_video             err_sub(0x45)
+#define sub_iokit_baseband                err_sub(0x80)
+#define sub_iokit_HDA                     err_sub(254)
 #define sub_iokit_hsic                    err_sub(0x147)
 #define sub_iokit_sdio                    err_sub(0x174)
 #define sub_iokit_wlan                    err_sub(0x208)
index 5d5c093bcf80b3e4d8b6f363416921dbbda60d01..e369da9b4d5f63bb0e121a65008b0f34956194ae 100644 (file)
@@ -607,17 +607,17 @@ public:
 
 /*! @function init
     @abstract Initializes generic IOService data structures (expansion data, etc). */
-    virtual bool init( OSDictionary * dictionary = 0 );
+    virtual bool init( OSDictionary * dictionary = 0 ) APPLE_KEXT_OVERRIDE;
 
 /*! @function init
     @abstract Initializes generic IOService data structures (expansion data, etc). */
     virtual bool init( IORegistryEntry * from,
-                       const IORegistryPlane * inPlane );
+                       const IORegistryPlane * inPlane ) APPLE_KEXT_OVERRIDE;
 
 /*! @function free
     @abstract Frees data structures that were allocated when power management was initialized on this service. */
     
-    virtual void free( void );
+    virtual void free( void ) APPLE_KEXT_OVERRIDE;
 
 /*! @function lockForArbitration
     @abstract Locks an IOService object against changes in state or ownership.
@@ -1258,7 +1258,7 @@ public:
     IOInterruptSource *_interruptSources;
 
     /* overrides */
-    virtual bool serializeProperties( OSSerialize * s ) const;
+    virtual bool serializeProperties( OSSerialize * s ) const APPLE_KEXT_OVERRIDE;
 
 #ifdef KERNEL_PRIVATE
     /* Apple only SPI to control CPU low power modes */
@@ -1285,6 +1285,7 @@ public:
     void setTerminateDefer(IOService * provider, bool defer);
     uint64_t getAuthorizationID( void );
     IOReturn setAuthorizationID( uint64_t authorizationID );
+    void cpusRunning(void);
 
 private:
     static IOReturn waitMatchIdle( UInt32 ms );
@@ -1813,6 +1814,7 @@ public:
     IOReturn changePowerStateWithOverrideTo( IOPMPowerStateIndex ordinal, IOPMRequestTag tag );
     IOReturn changePowerStateForRootDomain( IOPMPowerStateIndex ordinal );
     IOReturn setIgnoreIdleTimer( bool ignore );
+    IOReturn quiescePowerTree( void * target, IOPMCompletionAction action, void * param );
     uint32_t getPowerStateForClient( const OSSymbol * client );
     static const char * getIOMessageString( uint32_t msg );
     static void setAdvisoryTickleEnable( bool enable );
@@ -1879,6 +1881,8 @@ private:
     void stop_ack_timer ( void );
     void start_ack_timer( UInt32 value, UInt32 scale );
     void startSettleTimer( void );
+    void start_spindump_timer( const char * delay_type );
+    void stop_spindump_timer( void );
     bool checkForDone ( void );
     bool responseValid ( uint32_t x, int pid );
     void computeDesiredState( unsigned long tempDesire, bool computeOnly );
@@ -1888,8 +1892,10 @@ private:
 
     static void ack_timer_expired( thread_call_param_t, thread_call_param_t );
     static void watchdog_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg1 );
+    static void spindump_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 );
     static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * );
     static IOReturn watchdog_timer_expired ( OSObject *, void *, void *, void *, void * );
+    static IOReturn actionSpinDumpTimerExpired(OSObject *, void *, void *, void *, void * );
 
     static IOReturn actionDriverCalloutDone(OSObject *, void *, void *, void *, void * );
     static IOPMRequest * acquirePMRequest( IOService * target, IOOptionBits type, IOPMRequest * active = 0 );
@@ -1899,6 +1905,8 @@ private:
     static void pmTellClientWithResponse( OSObject * object, void * context );
     static void pmTellCapabilityAppWithResponse ( OSObject * object, void * arg );
     static void pmTellCapabilityClientWithResponse( OSObject * object, void * arg );
+    static void submitPMRequest( IOPMRequest * request );
+    static void submitPMRequests( IOPMRequest ** request, IOItemCount count );
     bool ackTimerTick( void );
     void addPowerChild1( IOPMRequest * request );
     void addPowerChild2( IOPMRequest * request );
@@ -1914,14 +1922,12 @@ private:
     void handleActivityTickle( IOPMRequest * request );
     void handleInterestChanged( IOPMRequest * request );
     void handleSynchronizePowerTree( IOPMRequest * request );
-    void submitPMRequest( IOPMRequest * request );
-    void submitPMRequest( IOPMRequest ** request, IOItemCount count );
     void executePMRequest( IOPMRequest * request );
-    bool servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue  );
-    bool retirePMRequest(  IOPMRequest * request, IOPMWorkQueue * queue );
-    bool servicePMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue );
-    bool servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue );
-    bool servicePMFreeQueue( IOPMRequest * request, IOPMCompletionQueue * queue );
+    bool actionPMWorkQueueInvoke( IOPMRequest * request, IOPMWorkQueue * queue );
+    bool actionPMWorkQueueRetire( IOPMRequest * request, IOPMWorkQueue * queue );
+    bool actionPMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue );
+    bool actionPMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue );
+    bool actionPMCompletionQueue( IOPMRequest * request, IOPMCompletionQueue * queue );
     bool notifyInterestedDrivers( void );
     void notifyInterestedDriversDone( void );
     bool notifyControllingDriver( void );
index 27d871234588df5c93c5677d5c41126f921c6c29..17662cb4e081a4e2b673800bc13fcb1fbd1cbb17 100644 (file)
@@ -47,6 +47,8 @@ class IOPMRequest;
 class IOPMRequestQueue;
 class IOPMCompletionQueue;
 
+typedef void (*IOPMCompletionAction)(void * target, void * param);
+
 // PM channels for IOReporting
 #ifndef kPMPowerStatesChID
 #define kPMPowerStatesChID  IOREPORT_MAKEID('P','M','S','t','H','i','s','t')
index 09c5dc17e41f010ef64f21ff9af6197b741c8e2b..16e7cdde27a529b5b6d6f78ef20bf6319d2938b1 100644 (file)
@@ -64,7 +64,7 @@ class IOSharedDataQueue : public IODataQueue
     ExpansionData * _reserved;
 
 protected:
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     /*!
      * @function getQueueSize
@@ -110,7 +110,7 @@ public:
      * @param size The size of the data queue memory region.
      * @result Returns true on success and false on failure.
      */
-    virtual Boolean initWithCapacity(UInt32 size);
+    virtual Boolean initWithCapacity(UInt32 size) APPLE_KEXT_OVERRIDE;
 
     /*!
      * @function getMemoryDescriptor
@@ -118,7 +118,7 @@ public:
      * @discussion The IOMemoryDescriptor instance returned by this method is intended to be mapped into a user process.  This is the memory region that the IODataQueueClient code operates on.
      * @result Returns a newly allocated IOMemoryDescriptor for the IODataQueueMemory region.  Returns zero on failure.
      */
-    virtual IOMemoryDescriptor *getMemoryDescriptor();
+    virtual IOMemoryDescriptor *getMemoryDescriptor() APPLE_KEXT_OVERRIDE;
 
     /*!
      * @function peek
@@ -146,7 +146,7 @@ public:
      * @param dataSize Size of the data pointed to by data.
      * @result Returns true on success and false on failure.  Typically failure means that the queue is full.
      */
-    virtual Boolean enqueue(void *data, UInt32 dataSize);
+    virtual Boolean enqueue(void *data, UInt32 dataSize) APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(IOSharedDataQueue, 0);
     OSMetaClassDeclareReservedUnused(IOSharedDataQueue, 1);
index 0093ea32fea70f9b243d8be14863010d77135bdf..03f1850af2fde9256c8204596831a5fb363eb587 100644 (file)
@@ -43,7 +43,7 @@ protected:
     IOMemoryDescriptor * _parent;
     IOByteCount         _start;
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 public:
 /*! @function withSubRange
@@ -79,19 +79,20 @@ public:
 
     virtual addr64_t getPhysicalSegment( IOByteCount   offset,
                                          IOByteCount * length,
-                                         IOOptionBits  options = 0 );
+                                         IOOptionBits  options = 0 ) APPLE_KEXT_OVERRIDE;
 
-    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn prepare(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 
-    virtual IOReturn complete(IODirection forDirection = kIODirectionNone);
+    virtual IOReturn complete(IODirection forDirection = kIODirectionNone) APPLE_KEXT_OVERRIDE;
 
 #ifdef __LP64__
-    virtual
-#endif /* __LP64__ */
+    virtual IOReturn redirect( task_t safeTask, bool redirect ) APPLE_KEXT_OVERRIDE;
+#else
     IOReturn redirect( task_t safeTask, bool redirect );
+#endif /* __LP64__ */
 
     virtual IOReturn setPurgeable( IOOptionBits newState,
-                                    IOOptionBits * oldState );
+                                    IOOptionBits * oldState ) APPLE_KEXT_OVERRIDE;
 
     // support map() on kIOMemoryTypeVirtual without prepare()
     virtual IOMemoryMap *      makeMapping(
@@ -100,10 +101,19 @@ public:
        IOVirtualAddress        atAddress,
        IOOptionBits            options,
        IOByteCount             offset,
-       IOByteCount             length );
+       IOByteCount             length ) APPLE_KEXT_OVERRIDE;
+
+    virtual uint64_t getPreparationID( void ) APPLE_KEXT_OVERRIDE;
 
-       virtual uint64_t getPreparationID( void );
+/*! @function getPageCounts
+    @abstract Retrieve the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor.
+    @discussion This method returns the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor.
+    @param residentPageCount - If non-null, a pointer to a byte count that will return the number of resident pages encompassed by this IOMemoryDescriptor.
+    @param dirtyPageCount - If non-null, a pointer to a byte count that will return the number of dirty pages encompassed by this IOMemoryDescriptor.
+    @result An IOReturn code. */
 
+    IOReturn getPageCounts(IOByteCount * residentPageCount,
+                           IOByteCount * dirtyPageCount);
 };
 
 #endif /* !_IOSUBMEMORYDESCRIPTOR_H */
index f6dfce383a269a36f3470747c38cae931ce5b8c5..dbdb443c10f1fab460e8ee28dcea58e3c98a457b 100644 (file)
@@ -41,7 +41,7 @@ private:
     IOSimpleLock *guardLock;
     volatile bool threadMustStop;
     IOReturn fResult;
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
     virtual void privateSignal();
 
 public:
index b063778a363af20ea903fb45efb233878f6c726a..ab589f0d0daed31a40499cc9dd5bea9cd1bf8390 100644 (file)
@@ -190,8 +190,16 @@ IOTimeStamp(uintptr_t csc,
 #define IOSERVICE_TERMINATE_STOP_DEFER         16      /* 0x05080040 */
 #define IOSERVICE_TERMINATE_DONE               17      /* 0x05080044 */
 
-#define IOSERVICE_KEXTD_ALIVE          18      /* 0x05080048 */
-#define IOSERVICE_KEXTD_READY          19      /* 0x0508004C */
+#define IOSERVICE_KEXTD_ALIVE                  18      /* 0x05080048 */
+#define IOSERVICE_KEXTD_READY                  19      /* 0x0508004C */
 #define IOSERVICE_REGISTRY_QUIET               20      /* 0x05080050 */
 
+#define IOSERVICE_TERM_SET_INACTIVE            21      /* 0x05080054 */
+#define IOSERVICE_TERM_SCHED_PHASE2            22      /* 0x05080058 */
+#define IOSERVICE_TERM_START_PHASE2            23      /* 0x0508005C */
+#define IOSERVICE_TERM_TRY_PHASE2              24      /* 0x05080060 */
+#define IOSERVICE_TERM_UC_DEFER                        25      /* 0x05080064 */
+#define IOSERVICE_DETACH                       26      /* 0x05080068 */
+
+
 #endif /* ! IOKIT_IOTIMESTAMP_H */
index bbbeaf964104664bf80f003369295b4d1802ed6b..f5accffa3f5d4f8f3a4f34b6449a568b70da3d00 100644 (file)
@@ -93,9 +93,9 @@ protected:
 
 /*! @function free
     @abstract Sub-class implementation of free method, frees calloutEntry */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
-    virtual void setWorkLoop(IOWorkLoop *workLoop);
+    virtual void setWorkLoop(IOWorkLoop *workLoop) APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -121,12 +121,12 @@ public:
 /*! @function enable
     @abstract Enables a call to the action.
     @discussion Allows the action function to be called.  If the timer event source was disabled while a call was outstanding and the call wasn't cancelled then it will be rescheduled.  So a disable/enable pair will disable calls from this event source. */
-    virtual void enable();
+    virtual void enable() APPLE_KEXT_OVERRIDE;
 
 /*! @function disable
     @abstract Disable a timed callout.
     @discussion When disable returns the action will not be called until the next time enable(qv) is called. */
-    virtual void disable();
+    virtual void disable() APPLE_KEXT_OVERRIDE;
 
 
 /*! @function setTimeoutTicks
index aa8afd0573db6647c3eab32a67e2925ac96b0ab7..6d21a129446b711c836c122c9d5e4d018e1e53f9 100644 (file)
@@ -219,6 +219,7 @@ enum {
     kIOMap64Bit                        = 0x08000000,
 #endif
     kIOMapPrefault             = 0x10000000,
+    kIOMapOverwrite     = 0x20000000
 };
 
 /*! @enum Scale Factors
index c50e3ead9305ab1cc93a24ddcb02164ae261f63f..6286b853584802eec8688259c682ed2d8c712ae7 100644 (file)
@@ -309,8 +309,8 @@ public:
     */
     static IOReturn releaseNotificationPort(mach_port_t port);
 
-    virtual bool init();
-    virtual bool init( OSDictionary * dictionary );
+    virtual bool init() APPLE_KEXT_OVERRIDE;
+    virtual bool init( OSDictionary * dictionary ) APPLE_KEXT_OVERRIDE;
     // Currently ignores the all args, just passes up to IOService::init()
     virtual bool initWithTask(
                     task_t owningTask, void * securityToken, UInt32 type,
@@ -319,7 +319,7 @@ public:
     virtual bool initWithTask(
                     task_t owningTask, void * securityToken, UInt32 type);
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     virtual IOReturn clientClose( void );
     virtual IOReturn clientDied( void );
index e248a9b3bd7cabf0fb78de278045c88f1c556d2f..2db7b17ef7f19bb273ca660e1aa59ba9ae879201 100644 (file)
@@ -169,7 +169,7 @@ protected:
 <br><br>
        If the client has some outstanding requests on an event they will never be informed of completion.  If an external thread is blocked on any of the event sources they will be awakened with a KERN_INTERUPTED status. 
 */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 /*! @function threadMain
     @discussion Work loop threads main function.  This function consists of 3
@@ -201,7 +201,7 @@ public:
     @discussion Initializes an instance of the workloop.  This method creates and initializes the signaling semaphore, the controller gate lock, and spawns the thread that will continue executing.
     @result Returns true if initialized successfully, false otherwise. 
 */
-    virtual bool init();
+    virtual bool init() APPLE_KEXT_OVERRIDE;
 
 /*! @function getThread
     @abstract Gets the workThread.
@@ -264,6 +264,7 @@ protected:
     // Internal APIs used by event sources to control the thread
     friend class IOEventSource;
     friend class IOTimerEventSource;
+    friend class IOCommandGate;
 #if IOKITSTATS
     friend class IOStatistics;
 #endif
index 307d666a6d91058546ff420902f613d956ea94c2..29b2b722e60844f552419945bd01aefb652cc14e 100644 (file)
@@ -36,7 +36,7 @@ class IONVRAMController: public IOService
   OSDeclareAbstractStructors(IONVRAMController);
   
 public:
-  virtual bool start(IOService *provider);
+  virtual void registerService(IOOptionBits options = 0) APPLE_KEXT_OVERRIDE;
   
   virtual void sync(void);
   
index ae12eca13e53e37e47edc71fa5bf7ebc0b65ecc6..2c72d58c54af8f56fb8eedc4d03f1786cb1d4010 100644 (file)
@@ -54,7 +54,7 @@ protected:
     virtual bool selfTest( void );
 
 public:
-    virtual bool start(        IOService * provider );
+    virtual bool start(        IOService * provider ) APPLE_KEXT_OVERRIDE;
 
     virtual IOService * createNub( IORegistryEntry * from );
 
index 8d033fd2541f8051b389e6c87e1d3337c8e55bb5..7a676e116d79a9225c96e096e13fd86bf70b1e59 100644 (file)
@@ -47,9 +47,9 @@ private:
   ExpansionData *reserved;
 
 public:
-  virtual bool compareName( OSString * name, OSString ** matched = 0 ) const;
-  virtual IOService *matchLocation(IOService *client);
-  virtual IOReturn getResources( void );
+  virtual bool compareName( OSString * name, OSString ** matched = 0 ) const APPLE_KEXT_OVERRIDE;
+  virtual IOService *matchLocation(IOService *client) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn getResources( void ) APPLE_KEXT_OVERRIDE;
     
   OSMetaClassDeclareReservedUnused(AppleMacIODevice,  0);
   OSMetaClassDeclareReservedUnused(AppleMacIODevice,  1);
index 14cd1ed10260dcd75329c3289d75fb78c1517890..216f22074bd814aa373a971665e3ea4c817d3ca9 100644 (file)
@@ -60,12 +60,12 @@ private:
 
 public:
   IOService *rootDomain;
-  virtual bool start(IOService *provider);
+  virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE;
   virtual IOReturn initNMI(IOInterruptController *parentController, OSData *parentSource);
   virtual IOReturn handleInterrupt(void *refCon, IOService *nub, int source);
 
   // Power handling methods:
-  virtual IOReturn powerStateWillChangeTo(IOPMPowerFlags, unsigned long, IOService*);
+  virtual IOReturn powerStateWillChangeTo(IOPMPowerFlags, unsigned long, IOService*) APPLE_KEXT_OVERRIDE;
 
   OSMetaClassDeclareReservedUnused(AppleNMI,  0);
   OSMetaClassDeclareReservedUnused(AppleNMI,  1);
index 59728d23d53167bcf401f67945389d1f412f8c78..0f75950d3ab89f14719a0cc89cb5f5abfbf52f55 100644 (file)
@@ -69,17 +69,17 @@ private:
   ExpansionData *reserved;
 
 public:
-  virtual bool start( IOService * provider );
-  virtual bool configure( IOService * provider );
-  virtual const char * deleteList( void );
-  virtual const char * excludeList( void );
+  virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE;
+  virtual bool configure( IOService * provider ) APPLE_KEXT_OVERRIDE;
+  virtual const char * deleteList( void ) APPLE_KEXT_OVERRIDE;
+  virtual const char * excludeList( void ) APPLE_KEXT_OVERRIDE;
   
-  virtual void registerNVRAMController( IONVRAMController * nvram );
+  virtual void registerNVRAMController( IONVRAMController * nvram ) APPLE_KEXT_OVERRIDE;
   
-  virtual long getGMTTimeOfDay(void);
-  virtual void setGMTTimeOfDay(long secs);
+  virtual long getGMTTimeOfDay(void) APPLE_KEXT_OVERRIDE;
+  virtual void setGMTTimeOfDay(long secs) APPLE_KEXT_OVERRIDE;
   
-  virtual bool getMachineName(char *name, int maxLength);
+  virtual bool getMachineName(char *name, int maxLength) APPLE_KEXT_OVERRIDE;
 
   OSMetaClassDeclareReservedUnused(ApplePlatformExpert,  0);
   OSMetaClassDeclareReservedUnused(ApplePlatformExpert,  1);
index cb1b62744efa0027de79f0346240fc7162d54764..cd0db25bf743613106d120db491f2a7640138c10 100644 (file)
@@ -218,9 +218,9 @@ class IOPMPowerSource : public IOService
 */
     static IOPMPowerSource *powerSource(void);
 
-    virtual bool init(void);
+    virtual bool init(void) APPLE_KEXT_OVERRIDE;
     
-    virtual void free(void);
+    virtual void free(void) APPLE_KEXT_OVERRIDE;
 
 /*! @function updateStatus
     @abstract Must be called by physical battery controller when battery state
index cb1c8ea37e387081e2a1b93c2707d91b3237cc0f..cede5e137414eb372d2f9ac56bd79f3dd5e9d2a7 100644 (file)
@@ -42,7 +42,7 @@ class IOPMPowerSourceList : public OSObject
 
   public:
     void initialize(void);
-    void free(void);
+    void free(void) APPLE_KEXT_OVERRIDE;
 
     unsigned long numberOfItems(void);
     IOReturn addToList(IOPMPowerSource *newPowerSource);
index 0163072dd6d49124e475791ec3796950048b90d3..25226da746b218b28690b561482e99625b0aa971 100644 (file)
@@ -97,6 +97,12 @@ enum {
 #define kIOPMMessageLastCallBeforeSleep \
                 iokit_family_msg(sub_iokit_powermanagement, 0x410)
 
+#define kIOPMMessageIdleSleepPreventers \
+                iokit_family_msg(sub_iokit_powermanagement, 0x420)
+
+#define kIOPMMessageSystemSleepPreventers \
+                iokit_family_msg(sub_iokit_powermanagement, 0x430)
+
 /* @enum SystemSleepReasons
  * @abstract The potential causes for system sleep as logged in the system event record.
  */
@@ -258,6 +264,8 @@ enum {
 // Keys for IOPMrootDomain registry properties
 #define kIOPMSleepStatisticsKey                 "SleepStatistics"
 #define kIOPMSleepStatisticsAppsKey             "AppStatistics"
+#define kIOPMIdleSleepPreventersKey             "IdleSleepPreventers"
+#define kIOPMSystemSleepPreventersKey           "SystemSleepPreventers"
 
 // Application response statistics
 #define kIOPMStatsNameKey                       "Name"
@@ -800,6 +808,9 @@ typedef struct {
 #define SWD_BUF_SIZE            (40*PAGE_SIZE)
 #define SWD_INITIAL_STACK_SIZE  ((SWD_BUF_SIZE/2)-sizeof(swd_hdr))
 
+#define SWD_SPINDUMP_SIZE          (256*1024)
+#define SWD_INITIAL_SPINDUMP_SIZE  ((SWD_SPINDUMP_SIZE/2)-sizeof(swd_hdr))
+
 /* Bits in swd_flags */
 #define SWD_WDOG_ENABLED        0x01
 #define SWD_BOOT_BY_SW_WDOG     0x02
@@ -809,6 +820,7 @@ typedef struct {
 #define SWD_LOGS_IN_MEM         0x20
 
 /* Filenames associated with the stackshots/logs generated by the SWD */
+#define kSleepWakeStackBinFilename          "/var/log/SleepWakeStacks.bin"
 #define kSleepWakeStackFilename             "/var/log/SleepWakeStacks.dump"
 #define kSleepWakeLogFilename               "/var/log/SleepWakeLog.dump"
 #define kAppleOSXWatchdogStackFilename      "/var/log/AppleOSXWatchdogStacks.dump"
@@ -833,6 +845,19 @@ inline char const* getDumpLogFilename(swd_hdr *hdr)
 #define kDarkWkCntChID IOREPORT_MAKEID('G','U','I','W','k','C','n','t')
 #define kUserWkCntChID IOREPORT_MAKEID('D','r','k','W','k','C','n','t')
 
+/*
+ * kAssertDelayChID - Histogram of time elapsed before assertion after wake.
+ */
+#define kAssertDelayBcktCnt     11
+#define kAssertDelayBcktSize    3
+#define kAssertDelayChID IOREPORT_MAKEID('r','d','A','s','r','t','D','l')
+
+/*
+ * kSleepDelaysChID - Histogram of time taken to put system to sleep
+ */
+#define kSleepDelaysBcktCnt     13
+#define kSleepDelaysBcktSize    10
+#define kSleepDelaysChID IOREPORT_MAKEID('r','d','S','l','p','D','l','y')
 
 /* Sleep Options/settings */
 #define kSleepOptionDisplayCapturedModeKey         "DisplayCapturedMode"
index a06eb788c58d48b0e1199e726794a4d54d184cf5..9d4e0c3b91ca07e4a1379c23164aca3e61129b36 100644 (file)
@@ -41,7 +41,7 @@ public:
 
     void initialize( IOService * theObject );
 
-    void free( void );
+    void free( void ) APPLE_KEXT_OVERRIDE;
 
 public:
     IOService *     whatObject;     // interested driver  
index 8efd4a65448d11799000451821c90d0460f9a7f5..f06689def51066efa74cf312da92835099333da8 100644 (file)
@@ -31,6 +31,7 @@
 
 class IOPMinformee;
 class IOService;
+extern uint32_t gCanSleepTimeout;
 
 class IOPMinformeeList : public OSObject
 {
@@ -45,7 +46,7 @@ private:
 
 public:
     void initialize ( void );
-    void free ( void );
+    void free ( void ) APPLE_KEXT_OVERRIDE;
 
     unsigned long numberOfItems ( void );
 
index 41727cdcbfd626944ff62d652a5f0c00d048875c..b950816e0b82bd97e98ba7c688077c840837aa42 100644 (file)
@@ -80,5 +80,6 @@ enum PMLogEnum {
     kPMLogSetPinGroup,              // 52      0x050700d0 - NOT USED
     kPMLogIdleCancel,               // 53      0x050700d4 - device unidle during change
     kPMLogSleepWakeTracePoint,      // 54   0x050700d8 - kIOPMTracePoint markers
+    kPMLogQuiescePowerTree,         // 55   0x050700dc
     kIOPMlogLastEvent
 };
index dd945a43fae1bc938bbc17d199d4d8390eb973d4..f4f8dd0bf4d75cc34ee07aa79f631f01ca50f13a 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+
+#ifndef _IOKIT_IOPMPOWERSTATE_H
+#define _IOKIT_IOPMPOWERSTATE_H
+
 #include <IOKit/pwr_mgt/IOPM.h>
 
 /*! @header IOPMpowerState.h
@@ -71,3 +75,5 @@ enum {
     kIOPMPowerStateVersion1 = 1,
     kIOPMPowerStateVersion2 = 2
 };
+
+#endif /* _IOKIT_IOPMPOWERSTATE_H */
index 071575abfa6170f9b7ef8013a693e5c90f939e25..21ca1a97ae19652454c178ad18031825b129fa3f 100644 (file)
@@ -152,16 +152,16 @@ class IOPMrootDomain: public IOService
 public:
     static IOPMrootDomain * construct( void );
 
-    virtual bool        start( IOService * provider );
-    virtual IOReturn    setAggressiveness( unsigned long, unsigned long );
-    virtual IOReturn    getAggressiveness( unsigned long, unsigned long * );
+    virtual bool        start( IOService * provider ) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn    setAggressiveness( unsigned long, unsigned long ) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn    getAggressiveness( unsigned long, unsigned long * ) APPLE_KEXT_OVERRIDE;
 
     virtual IOReturn    sleepSystem( void );
     IOReturn            sleepSystemOptions( OSDictionary *options );
 
-    virtual IOReturn    setProperties( OSObject * );
-    virtual bool        serializeProperties( OSSerialize * s ) const;
-    virtual OSObject *  copyProperty( const char * aKey ) const;
+    virtual IOReturn    setProperties( OSObject * ) APPLE_KEXT_OVERRIDE;
+    virtual bool        serializeProperties( OSSerialize * s ) const APPLE_KEXT_OVERRIDE;
+    virtual OSObject *  copyProperty( const char * aKey ) const APPLE_KEXT_OVERRIDE;
 
 /*! @function systemPowerEventOccurred
     @abstract Other drivers may inform IOPMrootDomain of system PM events
@@ -329,13 +329,13 @@ public:
     virtual IONotifier * registerInterest(
                                 const OSSymbol * typeOfInterest,
                                 IOServiceInterestHandler handler,
-                                void * target, void * ref = 0 );
+                                void * target, void * ref = 0 ) APPLE_KEXT_OVERRIDE;
 
     virtual IOReturn    callPlatformFunction(
                                 const OSSymbol *functionName,
                                 bool waitForFunction,
                                 void *param1, void *param2,
-                                void *param3, void *param4 );
+                                void *param3, void *param4 ) APPLE_KEXT_OVERRIDE;
 
 /*! @function createPMAssertion
     @abstract Creates an assertion to influence system power behavior.
@@ -392,22 +392,29 @@ public:
     IOReturn restartWithStackshot();
 
 private:
-    virtual IOReturn    changePowerStateTo( unsigned long ordinal );
+    virtual IOReturn    changePowerStateTo( unsigned long ordinal ) APPLE_KEXT_COMPATIBILITY_OVERRIDE;
     virtual IOReturn    changePowerStateToPriv( unsigned long ordinal );
-    virtual IOReturn    requestPowerDomainState( IOPMPowerFlags, IOPowerConnection *, unsigned long );
-    virtual void        powerChangeDone( unsigned long );
-    virtual bool        tellChangeDown( unsigned long );
-    virtual bool        askChangeDown( unsigned long );
-    virtual void        tellChangeUp( unsigned long );
-    virtual void        tellNoChangeDown( unsigned long );
+    virtual IOReturn    requestPowerDomainState( IOPMPowerFlags, IOPowerConnection *, unsigned long ) APPLE_KEXT_OVERRIDE;
+    virtual void        powerChangeDone( unsigned long ) APPLE_KEXT_OVERRIDE;
+    virtual bool        tellChangeDown( unsigned long ) APPLE_KEXT_OVERRIDE;
+    virtual bool        askChangeDown( unsigned long ) APPLE_KEXT_OVERRIDE;
+    virtual void        tellChangeUp( unsigned long ) APPLE_KEXT_OVERRIDE;
+    virtual void        tellNoChangeDown( unsigned long ) APPLE_KEXT_OVERRIDE;
     virtual IOReturn configureReport(IOReportChannelList   *channels,
                                     IOReportConfigureAction action,
                                     void                    *result,
-                                    void                    *destination);
+                                    void                    *destination) APPLE_KEXT_OVERRIDE;
     virtual IOReturn updateReport(IOReportChannelList      *channels,
                                   IOReportUpdateAction     action,
                                   void                     *result,
-                                  void                     *destination);
+                                  void                     *destination) APPLE_KEXT_OVERRIDE;
+
+    void             configureReportGated(uint64_t channel_id,
+                                          uint64_t action,
+                                          void     *result);
+    IOReturn         updateReportGated(uint64_t ch_id, 
+                                       void *result, 
+                                       IOBufferMemoryDescriptor *dest);
 
 #ifdef XNU_KERNEL_PRIVATE
     /* Root Domain internals */
@@ -479,6 +486,8 @@ public:
     void        handleQueueSleepWakeUUID(
                     OSObject *obj);
 
+    void        handleDisplayPowerOn( );
+
     void        willNotifyPowerChildren( IOPMPowerStateIndex newPowerState );
 
     IOReturn    setMaintenanceWakeCalendar(
@@ -538,11 +547,12 @@ public:
                     uint32_t *  hibernateFreeRatio,
                     uint32_t *  hibernateFreeTime );
 #endif
-    void        takeStackshot(bool restart, bool isOSXWatchdog);
+    void        takeStackshot(bool restart, bool isOSXWatchdog, bool isSpinDump);
     void        sleepWakeDebugTrig(bool restart);
     void        sleepWakeDebugEnableWdog();
     bool        sleepWakeDebugIsWdogEnabled();
     static void saveTimeoutAppStackShot(void *p0, void *p1);
+    void        sleepWakeDebugSaveSpinDumpFile();
 
 private:
     friend class PMSettingObject;
@@ -612,6 +622,16 @@ private:
     OSArray                 *pmStatsAppResponses;
     IOLock                  *pmStatsLock;   // guards pmStatsAppResponses
 
+    void                    *sleepDelaysReport;     // report to track time taken to go to sleep
+    uint32_t                sleepDelaysClientCnt;   // Number of interested clients in sleepDelaysReport
+    uint64_t                ts_sleepStart;
+    uint64_t                wake2DarkwakeDelay;      // Time taken to change from full wake -> Dark wake
+
+
+    void                    *assertOnWakeReport;    // report to track time spent without any assertions held after wake
+    uint32_t                assertOnWakeClientCnt;  // Number of clients interested in assertOnWakeReport
+    clock_sec_t             assertOnWakeSecs;       // Num of secs after wake for first assertion
+
     bool                    uuidPublished;
 
     // Pref: idle time before idle sleep
@@ -628,6 +648,7 @@ private:
     thread_call_t           diskSyncCalloutEntry;
     thread_call_t           fullWakeThreadCall;
     thread_call_t           hibDebugSetupEntry;
+    thread_call_t           updateConsoleUsersEntry;
 
     // Track system capabilities.
     uint32_t                _desiredCapability;
@@ -694,6 +715,8 @@ private:
     unsigned int            displayIdleForDemandSleep :1;
     unsigned int            darkWakeHibernateError  :1;
     unsigned int            thermalWarningState:1;
+    unsigned int            toldPowerdCapWillChange :1;
+    unsigned int            displayPowerOnRequested:1;
 
     uint32_t                hibernateMode;
     AbsoluteTime            userActivityTime;
@@ -754,6 +777,7 @@ private:
     volatile uint32_t   swd_lock;    /* Lock to access swd_buffer & and its header */
     void  *             swd_buffer;  /* Memory allocated for dumping sleep/wake logs */
     uint8_t             swd_flags;   /* Flags defined in IOPMPrivate.h */
+    void  *             swd_spindump_buffer;
 
     IOMemoryMap  *      swd_logBufMap; /* Memory with sleep/wake logs from previous boot */
 
@@ -819,12 +843,15 @@ private:
 
     void        deregisterPMSettingObject( PMSettingObject * pmso );
 
+    void        checkForValidDebugData(const char *fname, vfs_context_t *ctx, 
+                                            void *tmpBuf, struct vnode **vp);
     void        sleepWakeDebugMemAlloc( );
+    void        sleepWakeDebugSpinDumpMemAlloc( );
     void        sleepWakeDebugDumpFromMem(IOMemoryMap *logBufMap);
     void        sleepWakeDebugDumpFromFile( );
     IOMemoryMap *sleepWakeDebugRetrieve();
     errno_t     sleepWakeDebugSaveFile(const char *name, char *buf, int len);
-    errno_t sleepWakeDebugCopyFile( struct vnode *srcVp, 
+    errno_t     sleepWakeDebugCopyFile( struct vnode *srcVp,
                                vfs_context_t srcCtx,
                                char *tmpBuf, uint64_t tmpBufSize,
                                uint64_t srcOffset, 
@@ -848,6 +875,7 @@ private:
     void        systemDidNotSleep( void );
     void        preventTransitionToUserActive( bool prevent );
     void        setThermalState(OSObject *value);
+    void        copySleepPreventersList(OSArray  **idleSleepList, OSArray  **systemSleepList);
 #endif /* XNU_KERNEL_PRIVATE */
 };
 
@@ -858,8 +886,8 @@ class IORootParent: public IOService
 
 public:
     static void initialize( void );
-    virtual OSObject * copyProperty( const char * aKey ) const;
-    bool start( IOService * nub );
+    virtual OSObject * copyProperty( const char * aKey ) const APPLE_KEXT_OVERRIDE;
+    bool start( IOService * nub ) APPLE_KEXT_OVERRIDE;
     void shutDownSystem( void );
     void restartSystem( void );
     void sleepSystem( void );
index a89b95bd1f3db0737abf0f9e618586f3d97f971e..ef5db4353b7d5cff6ccfb5e2d1e4556e77b16b44 100644 (file)
@@ -41,9 +41,9 @@ protected:
   ExpansionData *reserved;
   
 public:
-  virtual bool start(IOService *provider);
-  virtual void stop(IOService *provider);
-  virtual IOReturn setProperties(OSObject *properties);
+  virtual bool start(IOService *provider) APPLE_KEXT_OVERRIDE;
+  virtual void stop(IOService *provider) APPLE_KEXT_OVERRIDE;
+  virtual IOReturn setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE;
   virtual void setWatchDogTimer(UInt32 timeOut) = 0;
   
   OSMetaClassDeclareReservedUnused(IOWatchDogTimer,  0);
index 1c486fa0f7ad6ed916d073830d4a5786327193ba..9f190260568723c5ebda960eb7401c2a3521c0e8 100644 (file)
@@ -84,7 +84,7 @@ static uintptr_t IOBMDPageProc(iopa_t * a)
     int           options = 0; // KMA_LOMEM;
 
     kr = kernel_memory_allocate(kernel_map, &vmaddr,
-                               page_size, 0, options);
+                               page_size, 0, options, VM_KERN_MEMORY_IOKIT);
 
     if (KERN_SUCCESS != kr) vmaddr = 0;
     else                   bzero((void *) vmaddr, page_size);
@@ -167,6 +167,8 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask(
 
     _alignment = alignment;
 
+    if ((capacity + alignment) < _capacity) return (false);
+
     if ((inTask != kernel_task) && !(options & kIOMemoryPageable))
        return false;
 
@@ -232,7 +234,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask(
            {
                IOStatisticsAlloc(kIOStatisticsMallocAligned, capacity);
 #if IOALLOCDEBUG
-               debug_iomalloc_size += capacity;
+               OSAddAtomic(capacity, &debug_iomalloc_size);
 #endif
            }
        }
@@ -259,7 +261,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask(
 
        if( options & kIOMemoryPageable) {
 #if IOALLOCDEBUG
-           debug_iomallocpageable_size += size;
+           OSAddAtomicLong(size, &debug_iomallocpageable_size);
 #endif
            mapTask = inTask;
            if (NULL == inTask)
@@ -492,7 +494,7 @@ void IOBufferMemoryDescriptor::free()
     if (options & kIOMemoryPageable)
     {
 #if IOALLOCDEBUG
-       debug_iomallocpageable_size -= round_page(size);
+       OSAddAtomicLong(-(round_page(size)), &debug_iomallocpageable_size);
 #endif
     }
     else if (buffer)
@@ -512,7 +514,7 @@ void IOBufferMemoryDescriptor::free()
                kmem_free(kernel_map, page, page_size);
            }
 #if IOALLOCDEBUG
-           debug_iomalloc_size -= size;
+               OSAddAtomic(-size, &debug_iomalloc_size);
 #endif
            IOStatisticsAlloc(kIOStatisticsFreeAligned, size);
        }
index 00504e9ebaf1da9f9ed2e964c5ffdf509a083e93..47a17b5a110a396aa9a1fb5ae2723c8d80312e36 100644 (file)
@@ -66,80 +66,35 @@ struct iocpu_platform_action_entry
 };
 typedef struct iocpu_platform_action_entry iocpu_platform_action_entry_t;
 
-queue_head_t * 
-iocpu_get_platform_quiesce_queue(void);
-
-queue_head_t * 
-iocpu_get_platform_active_queue(void);
-
-void
-iocpu_platform_cpu_action_init(queue_head_t * quiesce_queue, queue_head_t * init_queue);
-
-void
-iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry_t * entry);
-
-void
-iocpu_remove_platform_action(iocpu_platform_action_entry_t * entry);
-
-kern_return_t
-iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32_t last_priority,
-                                       void * param1, void * param2, void * param3);
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #define kBootCPUNumber  0
 
-static iocpu_platform_action_entry_t * gIOAllActionsQueue;
-static queue_head_t gIOSleepActionQueue;
-static queue_head_t gIOWakeActionQueue;
-
-static queue_head_t iocpu_quiesce_queue;
-static queue_head_t iocpu_active_queue;
-
-static queue_head_t gIOHaltRestartActionQueue;
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-void 
-iocpu_platform_cpu_action_init(queue_head_t * quiesce_queue, __unused queue_head_t * init_queue)
+enum
 {
-#if 0
-    enum { kNumQuiesceActions = 2 };
-    static iocpu_platform_action_entry_t quiesce_actions[kNumQuiesceActions] = 
-    {
-       { { NULL, NULL }, (iocpu_platform_action_t) &clean_mmu_dcache, 97000, 0, 0, NULL },    
-       { { NULL, NULL }, (iocpu_platform_action_t) &arm_sleep, 99000, 0, 0, NULL },    
-    };
-    unsigned int idx;
+    kQueueSleep       = 0,
+    kQueueWake        = 1,
+    kQueueQuiesce     = 2,
+    kQueueActive      = 3,
+    kQueueHaltRestart = 4,
+    kQueuePanic       = 5,
+    kQueueCount       = 6
+};
 
-    for (idx = 0; idx < kNumQuiesceActions; idx++)
-       iocpu_add_platform_action(quiesce_queue, &quiesce_actions[idx]);
-#endif
-}
+const OSSymbol *               gIOPlatformSleepActionKey;
+const OSSymbol *               gIOPlatformWakeActionKey;
+const OSSymbol *               gIOPlatformQuiesceActionKey;
+const OSSymbol *               gIOPlatformActiveActionKey;
+const OSSymbol *               gIOPlatformHaltRestartActionKey;
+const OSSymbol *               gIOPlatformPanicActionKey;
 
-queue_head_t * iocpu_get_platform_quiesce_queue(void)
-{
-    if (!iocpu_quiesce_queue.next)
-    {
-       queue_init(&iocpu_quiesce_queue);
-       queue_init(&iocpu_active_queue);
-       iocpu_platform_cpu_action_init(&iocpu_quiesce_queue, &iocpu_active_queue);
-    }
-    return (&iocpu_quiesce_queue);
-}
+static queue_head_t            gActionQueues[kQueueCount];
+static const OSSymbol *                gActionSymbols[kQueueCount];
 
-queue_head_t * iocpu_get_platform_active_queue(void)
-{
-    if (!iocpu_active_queue.next)
-    {
-       queue_init(&iocpu_quiesce_queue);
-       queue_init(&iocpu_active_queue);
-       iocpu_platform_cpu_action_init(&iocpu_quiesce_queue, &iocpu_active_queue);
-    }
-    return (&iocpu_active_queue);
-}
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-void iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry_t * entry)
+static void
+iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry_t * entry)
 {
     iocpu_platform_action_entry_t * next;
 
@@ -154,12 +109,13 @@ void iocpu_add_platform_action(queue_head_t * queue, iocpu_platform_action_entry
     queue_enter(queue, entry, iocpu_platform_action_entry_t *, link);  // at tail
 }
 
-void iocpu_remove_platform_action(iocpu_platform_action_entry_t * entry)
+static void
+iocpu_remove_platform_action(iocpu_platform_action_entry_t * entry)
 {
     remque(&entry->link);
 }
 
-kern_return_t
+static kern_return_t
 iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32_t last_priority,
                                        void * param1, void * param2, void * param3)
 {
@@ -186,17 +142,33 @@ iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32
 extern "C" kern_return_t 
 IOCPURunPlatformQuiesceActions(void)
 {
-    return (iocpu_run_platform_actions(iocpu_get_platform_quiesce_queue(), 0, 0U-1,
+    return (iocpu_run_platform_actions(&gActionQueues[kQueueQuiesce], 0, 0U-1,
                                    NULL, NULL, NULL));
 }
 
 extern "C" kern_return_t 
 IOCPURunPlatformActiveActions(void)
 {
-    return (iocpu_run_platform_actions(iocpu_get_platform_active_queue(), 0, 0U-1,
+    return (iocpu_run_platform_actions(&gActionQueues[kQueueActive], 0, 0U-1,
                                    NULL, NULL, NULL));
 }
 
+extern "C" kern_return_t 
+IOCPURunPlatformHaltRestartActions(uint32_t message)
+{
+    return (iocpu_run_platform_actions(&gActionQueues[kQueueHaltRestart], 0, 0U-1,
+                                    (void *)(uintptr_t) message, NULL, NULL));
+}
+
+extern "C" kern_return_t 
+IOCPURunPlatformPanicActions(uint32_t message)
+{
+    return (iocpu_run_platform_actions(&gActionQueues[kQueuePanic], 0, 0U-1,
+                                    (void *)(uintptr_t) message, NULL, NULL));
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 static kern_return_t 
 IOServicePlatformAction(void * refcon0, void * refcon1, uint32_t priority,
                          void * param1, void * param2, void * param3,
@@ -215,17 +187,39 @@ IOServicePlatformAction(void * refcon0, void * refcon1, uint32_t priority,
 }
 
 static void
-IOInstallServicePlatformAction(IOService * service, 
-                               const OSSymbol * key, queue_head_t * queue,
-                               bool reverse)
+IOInstallServicePlatformAction(IOService * service, uint32_t qidx)
 {
-    OSNumber * num;
     iocpu_platform_action_entry_t * entry;
-    uint32_t priority;
+    OSNumber *       num;
+    uint32_t         priority;
+    const OSSymbol * key = gActionSymbols[qidx]; 
+    queue_head_t *   queue = &gActionQueues[qidx];
+    bool             reverse;
+    bool             uniq;
 
     num = OSDynamicCast(OSNumber, service->getProperty(key));
-    if (!num)
-       return;
+    if (!num) return;
+
+    reverse = false;
+    uniq    = false;
+    switch (qidx)
+    {
+       case kQueueWake:
+       case kQueueActive:
+           reverse = true;
+           break;
+       case kQueueHaltRestart:
+       case kQueuePanic:
+           uniq = true;
+           break;
+    }
+    if (uniq)
+    {
+       queue_iterate(queue, entry, iocpu_platform_action_entry_t *, link)
+       {
+           if (service == entry->refcon0) return;
+       }
+    }
 
     entry = IONew(iocpu_platform_action_entry_t, 1);
     entry->action = &IOServicePlatformAction;
@@ -239,49 +233,66 @@ IOInstallServicePlatformAction(IOService * service,
     entry->refcon1 = (void *) key;
 
     iocpu_add_platform_action(queue, entry);
-    entry->alloc_list = gIOAllActionsQueue;
-    gIOAllActionsQueue = entry;
 }
 
-extern "C" kern_return_t 
-IOCPURunPlatformHaltRestartActions(uint32_t message)
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOCPUInitialize(void)
 {
-    kern_return_t       ret;
-    IORegistryIterator * iter;
-    OSOrderedSet *       all;
-    IOService *          service;
+    for (uint32_t qidx = kQueueSleep; qidx < kQueueCount; qidx++)
+    {
+       queue_init(&gActionQueues[qidx]);
+    }
+
+    gIOPlatformSleepActionKey       = gActionSymbols[kQueueSleep]
+       = OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey);
+    gIOPlatformWakeActionKey        = gActionSymbols[kQueueWake]
+       = OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey);
+    gIOPlatformQuiesceActionKey             = gActionSymbols[kQueueQuiesce]
+       = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey);
+    gIOPlatformActiveActionKey      = gActionSymbols[kQueueActive]
+       = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey);
+    gIOPlatformHaltRestartActionKey  = gActionSymbols[kQueueHaltRestart]
+       = OSSymbol::withCStringNoCopy(kIOPlatformHaltRestartActionKey);
+    gIOPlatformPanicActionKey = gActionSymbols[kQueuePanic]
+       = OSSymbol::withCStringNoCopy(kIOPlatformPanicActionKey);
+}
+
+IOReturn
+IOInstallServicePlatformActions(IOService * service)
+{
+    IOInstallServicePlatformAction(service, kQueueHaltRestart);
+    IOInstallServicePlatformAction(service, kQueuePanic);
+
+    return (kIOReturnSuccess);
+}
 
-    if (!gIOHaltRestartActionQueue.next)
+IOReturn
+IORemoveServicePlatformActions(IOService * service)
+{
+    iocpu_platform_action_entry_t * entry;
+    iocpu_platform_action_entry_t * next;
+
+    for (uint32_t qidx = kQueueSleep; qidx < kQueueCount; qidx++)
     {
-       queue_init(&gIOHaltRestartActionQueue);
-       iter = IORegistryIterator::iterateOver(gIOServicePlane,
-                                               kIORegistryIterateRecursively);
-       if (iter)
+       next = (typeof(entry)) queue_first(&gActionQueues[qidx]);
+       while (!queue_end(&gActionQueues[qidx], &next->link))
        {
-           all = 0;
-           do 
+           entry = next;
+           next = (typeof(entry)) queue_next(&entry->link);
+           if (service == entry->refcon0)
            {
-               if (all) all->release();
-               all = iter->iterateAll();
+               iocpu_remove_platform_action(entry);
+               IODelete(entry, iocpu_platform_action_entry_t, 1);
            }
-           while (!iter->isValid());
-           iter->release();
-           if (all)
-           {
-               while((service = (IOService *) all->getFirstObject()))
-               {
-                   IOInstallServicePlatformAction(service, gIOPlatformHaltRestartActionKey, &gIOHaltRestartActionQueue, false);
-                   all->removeObject(service);
-               }
-               all->release();
-           }   
        }
     }
-    ret = iocpu_run_platform_actions(&gIOHaltRestartActionQueue, 0, 0U-1,
-                                    (void *)(uintptr_t) message, NULL, NULL);
-    return (ret);
+
+    return (kIOReturnSuccess);
 }
 
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 kern_return_t PE_cpu_start(cpu_id_t target,
@@ -308,6 +319,22 @@ void PE_cpu_signal(cpu_id_t source, cpu_id_t target)
   if (sourceCPU && targetCPU) sourceCPU->signalCPU(targetCPU);
 }
 
+void PE_cpu_signal_deferred(cpu_id_t source, cpu_id_t target)
+{
+  IOCPU *sourceCPU = OSDynamicCast(IOCPU, (OSObject *)source);
+  IOCPU *targetCPU = OSDynamicCast(IOCPU, (OSObject *)target);
+
+  if (sourceCPU && targetCPU) sourceCPU->signalCPUDeferred(targetCPU);
+}
+
+void PE_cpu_signal_cancel(cpu_id_t source, cpu_id_t target)
+{
+  IOCPU *sourceCPU = OSDynamicCast(IOCPU, (OSObject *)source);
+  IOCPU *targetCPU = OSDynamicCast(IOCPU, (OSObject *)target);
+
+  if (sourceCPU && targetCPU) sourceCPU->signalCPUCancel(targetCPU);
+}
+
 void PE_cpu_machine_init(cpu_id_t target, boolean_t bootb)
 {
   IOCPU *targetCPU = OSDynamicCast(IOCPU, (OSObject *)target);
@@ -358,10 +385,6 @@ void IOCPUSleepKernel(void)
 
     rootDomain->tracePoint( kIOPMTracePointSleepPlatformActions );
 
-    queue_init(&gIOSleepActionQueue);
-    queue_init(&gIOWakeActionQueue);
-    queue_init(&gIOHaltRestartActionQueue);
-
     iter = IORegistryIterator::iterateOver( gIOServicePlane,
                                            kIORegistryIterateRecursively );
     if( iter)
@@ -380,18 +403,17 @@ void IOCPUSleepKernel(void)
        {
            while((service = (IOService *) all->getFirstObject()))
            {
-               IOInstallServicePlatformAction(service, gIOPlatformSleepActionKey,   &gIOSleepActionQueue,               false);
-               IOInstallServicePlatformAction(service, gIOPlatformWakeActionKey,    &gIOWakeActionQueue,                true);
-               IOInstallServicePlatformAction(service, gIOPlatformQuiesceActionKey, iocpu_get_platform_quiesce_queue(), false);
-               IOInstallServicePlatformAction(service, gIOPlatformActiveActionKey,  iocpu_get_platform_active_queue(),  true);
-               IOInstallServicePlatformAction(service, gIOPlatformHaltRestartActionKey, &gIOHaltRestartActionQueue,     false);
+               for (uint32_t qidx = kQueueSleep; qidx <= kQueueActive; qidx++)
+               {
+                   IOInstallServicePlatformAction(service, qidx);
+               }
                all->removeObject(service);
            }
            all->release();
        }       
     }
 
-    iocpu_run_platform_actions(&gIOSleepActionQueue, 0, 0U-1,
+    iocpu_run_platform_actions(&gActionQueues[kQueueSleep], 0, 0U-1,
                                NULL, NULL, NULL);
 
     rootDomain->tracePoint( kIOPMTracePointSleepCPUs );
@@ -423,22 +445,20 @@ void IOCPUSleepKernel(void)
 
     rootDomain->tracePoint( kIOPMTracePointWakePlatformActions );
 
-    iocpu_run_platform_actions(&gIOWakeActionQueue, 0, 0U-1,
+    iocpu_run_platform_actions(&gActionQueues[kQueueWake], 0, 0U-1,
                                    NULL, NULL, NULL);
 
     iocpu_platform_action_entry_t * entry;
-    while ((entry = gIOAllActionsQueue))
+    for (uint32_t qidx = kQueueSleep; qidx <= kQueueActive; qidx++)
     {
-       gIOAllActionsQueue = entry->alloc_list;
-       iocpu_remove_platform_action(entry);
-       IODelete(entry, iocpu_platform_action_entry_t, 1);
+       while (!(queue_empty(&gActionQueues[qidx])))
+       {
+           entry = (typeof(entry)) queue_first(&gActionQueues[qidx]);
+           iocpu_remove_platform_action(entry);
+           IODelete(entry, iocpu_platform_action_entry_t, 1);
+       }
     }
 
-    if (!queue_empty(&gIOSleepActionQueue))       panic("gIOSleepActionQueue");
-    if (!queue_empty(&gIOWakeActionQueue))       panic("gIOWakeActionQueue");
-    if (!queue_empty(&gIOHaltRestartActionQueue)) panic("gIOHaltRestartActionQueue");
-    gIOHaltRestartActionQueue.next = 0;
-  
     rootDomain->tracePoint( kIOPMTracePointWakeCPUs );
 
     // Wake the other CPUs.
@@ -556,6 +576,7 @@ bool IOCPU::serializeProperties(OSSerialize *serialize) const
 {
        bool result;
        OSDictionary *dict = dictionaryWithProperties();
+       if (!dict) return false;
        dict->setObject(gIOCPUStateKey, gIOCPUStateNames[_cpuState]);
        result = dict->serialize(serialize);
        dict->release();  
@@ -587,6 +608,20 @@ void IOCPU::signalCPU(IOCPU */*target*/)
 {
 }
 
+void IOCPU::signalCPUDeferred(IOCPU *target)
+{
+  // Our CPU may not support deferred IPIs,
+  // so send a regular IPI by default
+  signalCPU(target);
+}
+
+void IOCPU::signalCPUCancel(IOCPU */*target*/)
+{
+  // Meant to cancel signals sent by
+  // signalCPUDeferred; unsupported
+  // by default
+}
+
 void IOCPU::enableCPUTimeBase(bool /*enable*/)
 {
 }
@@ -734,7 +769,10 @@ void IOCPUInterruptController::enableCPUInterrupt(IOCPU *cpu)
        // Ensure that the increment is seen by all processors
        OSIncrementAtomic(&enabledCPUs);
 
-       if (enabledCPUs == numCPUs) thread_wakeup(this);
+       if (enabledCPUs == numCPUs) {
+    IOService::cpusRunning();
+    thread_wakeup(this);
+  }
 }
 
 IOReturn IOCPUInterruptController::registerInterrupt(IOService *nub,
index 9b19d70eefeb5e7dd63bfb88997f44401ed9c654..ae767744e97ed5ded1d4e38fe49df7b079d082e7 100644 (file)
@@ -111,24 +111,41 @@ IOCommandGate::commandGate(OSObject *inOwner, Action inAction)
 
 /* virtual */ void IOCommandGate::free()
 {
-    setWorkLoop(0);
+    if (workLoop) setWorkLoop(0);
     super::free();
 }
 
+enum
+{
+    kSleepersRemoved     = 0x00000001,
+    kSleepersWaitEnabled = 0x00000002,
+    kSleepersActions     = 0x00000100,
+    kSleepersActionsMask = 0xffffff00,
+};
+
 /* virtual */ void IOCommandGate::setWorkLoop(IOWorkLoop *inWorkLoop)
 {
-    uintptr_t *sleepersP = (uintptr_t *) &reserved;
-    if (!inWorkLoop && workLoop) {             // tearing down
-       closeGate();
-       *sleepersP |= 1;
-       while (*sleepersP >> 1) {
+    IOWorkLoop * wl;
+    uintptr_t  * sleepersP = (uintptr_t *) &reserved;
+    bool         defer;
+
+    if (!inWorkLoop && (wl = workLoop)) {              // tearing down
+       wl->closeGate();
+       *sleepersP |= kSleepersRemoved;
+       while (*sleepersP & kSleepersWaitEnabled) {
            thread_wakeup_with_result(&enabled, THREAD_INTERRUPTED);
            sleepGate(sleepersP, THREAD_UNINT);
        }
-       *sleepersP = 0;
-       openGate();
+       *sleepersP &= ~kSleepersWaitEnabled;
+       defer = (0 != (kSleepersActionsMask & *sleepersP));
+       if (!defer)
+       {
+           super::setWorkLoop(0);
+           *sleepersP &= ~kSleepersRemoved;
+       }
+       wl->openGate();
+       return;
     }
-    else
 
     super::setWorkLoop(inWorkLoop);
 }
@@ -149,29 +166,38 @@ IOReturn IOCommandGate::runAction(Action inAction,
                                   void *arg0, void *arg1,
                                   void *arg2, void *arg3)
 {
+    IOWorkLoop * wl;
+    uintptr_t  * sleepersP;
+
     if (!inAction)
         return kIOReturnBadArgument;
+    if (!(wl = workLoop))
+        return kIOReturnNotReady;
 
     // closeGate is recursive needn't worry if we already hold the lock.
-    closeGate();
+    wl->closeGate();
+    sleepersP = (uintptr_t *) &reserved;
 
     // If the command gate is disabled and we aren't on the workloop thread
     // itself then sleep until we get enabled.
     IOReturn res;
-    if (!workLoop->onThread()) {
-       while (!enabled) {
-           uintptr_t *sleepersP = (uintptr_t *) &reserved;
-
-           *sleepersP += 2;
-           IOReturn res = sleepGate(&enabled, THREAD_ABORTSAFE);
-           *sleepersP -= 2;
-
-           bool wakeupTearDown = (*sleepersP & 1);
-           if (res || wakeupTearDown) {
-               openGate();
+    if (!wl->onThread())
+    {
+       while (!enabled)
+       {
+            IOReturn sleepResult = kIOReturnSuccess;
+           if (workLoop)
+           {
+               *sleepersP |= kSleepersWaitEnabled;
+               sleepResult = wl->sleepGate(&enabled, THREAD_ABORTSAFE);
+               *sleepersP &= ~kSleepersWaitEnabled;
+           }
+           bool wakeupTearDown = (!workLoop || (0 != (*sleepersP & kSleepersRemoved)));
+           if ((kIOReturnSuccess != sleepResult) || wakeupTearDown) {
+               wl->openGate();
 
                 if (wakeupTearDown)
-                    commandWakeup(sleepersP);  // No further resources used
+                    wl->wakeupGate(sleepersP, false);  // No further resources used
 
                return kIOReturnAborted;
            }
@@ -180,20 +206,28 @@ IOReturn IOCommandGate::runAction(Action inAction,
 
     bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false;
        
-       if (trace)
-               IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+    if (trace) IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
                                         VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
        
     IOStatisticsActionCall();
        
     // Must be gated and on the work loop or enabled
+
+    *sleepersP += kSleepersActions;
     res = (*inAction)(owner, arg0, arg1, arg2, arg3);
-       
-       if (trace)
-               IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+    *sleepersP -= kSleepersActions;
+
+    if (trace) IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
                                       VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
+
+    if (kSleepersRemoved == ((kSleepersActionsMask|kSleepersRemoved) & *sleepersP))
+    {
+        // no actions outstanding
+       *sleepersP &= ~kSleepersRemoved;
+       super::setWorkLoop(0);
+    }
     
-    openGate();
+    wl->openGate();
        
     return res;
 }
@@ -203,16 +237,19 @@ IOReturn IOCommandGate::attemptAction(Action inAction,
                                       void *arg2, void *arg3)
 {
     IOReturn res;
+    IOWorkLoop * wl;
 
     if (!inAction)
         return kIOReturnBadArgument;
+    if (!(wl = workLoop))
+        return kIOReturnNotReady;
 
     // Try to close the gate if can't get return immediately.
-    if (!tryCloseGate())
+    if (!wl->tryCloseGate())
         return kIOReturnCannotLock;
 
     // If the command gate is disabled then sleep until we get a wakeup
-    if (!workLoop->onThread() && !enabled)
+    if (!wl->onThread() && !enabled)
         res = kIOReturnNotPermitted;
     else {
                
@@ -231,7 +268,7 @@ IOReturn IOCommandGate::attemptAction(Action inAction,
                                   VM_KERNEL_UNSLIDE(inAction), (uintptr_t) owner);
     }
 
-    openGate();
+    wl->openGate();
 
     return res;
 }
index 3b3c0ee3a0159a227306b9ee1972ec939402934f..c8477aaca5f52f536911f54fcb950b319bf220e4 100644 (file)
@@ -42,8 +42,6 @@
 #include "IOKitKernelInternal.h"
 
 #define MAPTYPE(type)          ((UInt) (type) & kTypeMask)
-#define IS_MAPPED(type)                (MAPTYPE(type) != kBypassed)
-#define IS_BYPASSED(type)      (MAPTYPE(type) == kBypassed)
 #define IS_NONCOHERENT(type)   (MAPTYPE(type) == kNonCoherent)
 
 enum 
@@ -91,10 +89,10 @@ OSDefineMetaClassAndStructors(IODMACommand, IOCommand);
 OSMetaClassDefineReservedUsed(IODMACommand,  0);
 OSMetaClassDefineReservedUsed(IODMACommand,  1);
 OSMetaClassDefineReservedUsed(IODMACommand,  2);
-OSMetaClassDefineReservedUnused(IODMACommand,  3);
-OSMetaClassDefineReservedUnused(IODMACommand,  4);
-OSMetaClassDefineReservedUnused(IODMACommand,  5);
-OSMetaClassDefineReservedUnused(IODMACommand,  6);
+OSMetaClassDefineReservedUsed(IODMACommand,  3);
+OSMetaClassDefineReservedUsed(IODMACommand,  4);
+OSMetaClassDefineReservedUsed(IODMACommand,  5);
+OSMetaClassDefineReservedUsed(IODMACommand,  6);
 OSMetaClassDefineReservedUnused(IODMACommand,  7);
 OSMetaClassDefineReservedUnused(IODMACommand,  8);
 OSMetaClassDefineReservedUnused(IODMACommand,  9);
@@ -105,6 +103,39 @@ OSMetaClassDefineReservedUnused(IODMACommand, 13);
 OSMetaClassDefineReservedUnused(IODMACommand, 14);
 OSMetaClassDefineReservedUnused(IODMACommand, 15);
 
+IODMACommand *
+IODMACommand::withRefCon(void * refCon)
+{
+    IODMACommand * me = new IODMACommand;
+
+    if (me && !me->initWithRefCon(refCon))
+    {
+        me->release();
+        return 0;
+    }
+
+    return me;
+}
+
+IODMACommand *
+IODMACommand::withSpecification(SegmentFunction  outSegFunc,
+                         const SegmentOptions * segmentOptions,
+                         uint32_t               mappingOptions,
+                         IOMapper             * mapper,
+                         void                 * refCon)
+{
+    IODMACommand * me = new IODMACommand;
+
+    if (me && !me->initWithSpecification(outSegFunc, segmentOptions, mappingOptions, 
+                                        mapper, refCon))
+    {
+        me->release();
+        return 0;
+    }
+
+    return me;
+}
+
 IODMACommand *
 IODMACommand::withSpecification(SegmentFunction outSegFunc,
                                UInt8           numAddressBits,
@@ -124,7 +155,7 @@ IODMACommand::withSpecification(SegmentFunction outSegFunc,
     {
         me->release();
         return 0;
-    };
+    }
 
     return me;
 }
@@ -132,12 +163,54 @@ IODMACommand::withSpecification(SegmentFunction outSegFunc,
 IODMACommand *
 IODMACommand::cloneCommand(void *refCon)
 {
-    return withSpecification(fOutSeg, fNumAddressBits, fMaxSegmentSize,
-           fMappingOptions, fMaxTransferSize, fAlignMask + 1, fMapper, refCon);
+    SegmentOptions segmentOptions =
+    {
+       .fStructSize                = sizeof(segmentOptions),
+       .fNumAddressBits            = fNumAddressBits,
+       .fMaxSegmentSize            = fMaxSegmentSize,
+       .fMaxTransferSize           = fMaxTransferSize,
+       .fAlignment                 = fAlignMask + 1,
+       .fAlignmentLength           = fAlignMaskInternalSegments + 1,
+       .fAlignmentInternalSegments = fAlignMaskLength + 1
+    };
+
+    return (IODMACommand::withSpecification(fOutSeg, &segmentOptions,
+                                           fMappingOptions, fMapper, refCon));
 }
 
 #define kLastOutputFunction ((SegmentFunction) kLastOutputFunction)
 
+bool
+IODMACommand::initWithRefCon(void * refCon)
+{
+    if (!super::init()) return (false);
+
+    if (!reserved)
+    {
+       reserved = IONew(IODMACommandInternal, 1);
+       if (!reserved) return false;
+    }
+    bzero(reserved, sizeof(IODMACommandInternal));
+    fRefCon = refCon;
+
+    return (true);
+}
+
+bool
+IODMACommand::initWithSpecification(SegmentFunction       outSegFunc,
+                                   const SegmentOptions * segmentOptions,
+                                   uint32_t               mappingOptions,
+                                   IOMapper             * mapper,
+                                   void                 * refCon)
+{
+    if (!initWithRefCon(refCon)) return false;
+
+    if (kIOReturnSuccess != setSpecification(outSegFunc, segmentOptions, 
+                                            mappingOptions, mapper))      return false;
+
+    return (true);
+}
+
 bool
 IODMACommand::initWithSpecification(SegmentFunction outSegFunc,
                                    UInt8           numAddressBits,
@@ -147,89 +220,119 @@ IODMACommand::initWithSpecification(SegmentFunction outSegFunc,
                                    UInt32          alignment,
                                    IOMapper       *mapper,
                                    void           *refCon)
+{
+    SegmentOptions segmentOptions =
+    {
+       .fStructSize                = sizeof(segmentOptions),
+       .fNumAddressBits            = numAddressBits,
+       .fMaxSegmentSize            = maxSegmentSize,
+       .fMaxTransferSize           = maxTransferSize,
+       .fAlignment                 = alignment,
+       .fAlignmentLength           = 1,
+       .fAlignmentInternalSegments = alignment
+    };
+
+    return (initWithSpecification(outSegFunc, &segmentOptions, mappingOptions, mapper, refCon));
+}
+
+IOReturn
+IODMACommand::setSpecification(SegmentFunction        outSegFunc,
+                              const SegmentOptions * segmentOptions,
+                              uint32_t               mappingOptions,
+                              IOMapper             * mapper)
 {
     IOService * device = 0;
+    UInt8       numAddressBits;
+    UInt64      maxSegmentSize;
+    UInt64      maxTransferSize;
+    UInt32      alignment;
+
+    bool        is32Bit;
 
-    if (!super::init() || !outSegFunc)
-        return false;
+    if (!outSegFunc || !segmentOptions) return (kIOReturnBadArgument);
 
-    bool is32Bit = (OutputHost32   == outSegFunc || OutputBig32 == outSegFunc
-                 || OutputLittle32 == outSegFunc);
+    is32Bit = ((OutputHost32 == outSegFunc) 
+               || (OutputBig32 == outSegFunc)
+                || (OutputLittle32 == outSegFunc));
+
+    numAddressBits = segmentOptions->fNumAddressBits;
+    maxSegmentSize = segmentOptions->fMaxSegmentSize;
+    maxTransferSize = segmentOptions->fMaxTransferSize;
+    alignment = segmentOptions->fAlignment;
     if (is32Bit)
     {
        if (!numAddressBits)
            numAddressBits = 32;
        else if (numAddressBits > 32)
-           return false;               // Wrong output function for bits
+           return (kIOReturnBadArgument);              // Wrong output function for bits
     }
 
-    if (numAddressBits && (numAddressBits < PAGE_SHIFT))
-       return false;
-
-    if (!maxSegmentSize)
-       maxSegmentSize--;       // Set Max segment to -1
-    if (!maxTransferSize)
-       maxTransferSize--;      // Set Max transfer to -1
+    if (numAddressBits && (numAddressBits < PAGE_SHIFT)) return (kIOReturnBadArgument);
 
+    if (!maxSegmentSize)  maxSegmentSize--;    // Set Max segment to -1
+    if (!maxTransferSize) maxTransferSize--;   // Set Max transfer to -1
 
     if (mapper && !OSDynamicCast(IOMapper, mapper))
     {
        device = mapper;
        mapper = 0;
     }
-    if (!mapper)
+    if (!mapper && (kUnmapped != MAPTYPE(mappingOptions)))
     {
         IOMapper::checkForSystemMapper();
        mapper = IOMapper::gSystem;
     }
 
     fNumSegments     = 0;
-    fBypassMask      = 0;
     fOutSeg         = outSegFunc;
     fNumAddressBits  = numAddressBits;
     fMaxSegmentSize  = maxSegmentSize;
     fMappingOptions  = mappingOptions;
     fMaxTransferSize = maxTransferSize;
-    if (!alignment)
-       alignment = 1;
+    if (!alignment)    alignment = 1;
     fAlignMask      = alignment - 1;
-    fMapper          = mapper;
-    fRefCon          = refCon;
+
+    alignment = segmentOptions->fAlignmentLength;
+    if (!alignment) alignment = 1;
+    fAlignMaskLength = alignment - 1;
+
+    alignment = segmentOptions->fAlignmentInternalSegments;
+    if (!alignment) alignment = (fAlignMask + 1);
+    fAlignMaskInternalSegments = alignment - 1;
 
     switch (MAPTYPE(mappingOptions))
     {
-    case kMapped:                   break;
-    case kNonCoherent: /*fMapper = 0;*/ break;
+    case kMapped:              break;
+    case kUnmapped:     break;
+    case kNonCoherent:         break;
+
     case kBypassed:
-       if (mapper && !mapper->getBypassMask(&fBypassMask))
-           return false;
-       break;
+       if (!mapper)    break;
+       return (kIOReturnBadArgument);
+
     default:
-       return false;
+       return (kIOReturnBadArgument);
     };
 
-    if (fMapper)
-       fMapper->retain();
-
-    reserved = IONew(IODMACommandInternal, 1);
-    if (!reserved)
-       return false;
-    bzero(reserved, sizeof(IODMACommandInternal));
+    if (mapper != fMapper)
+    {
+       if (mapper)  mapper->retain();
+       if (fMapper) fMapper->release();
+       fMapper = mapper;
+    }
 
     fInternalState->fIterateOnly = (0 != (kIterateOnly & mappingOptions));
     fInternalState->fDevice = device;
 
-    return true;
+    return (kIOReturnSuccess);
 }
 
 void
 IODMACommand::free()
 {
-    if (reserved)
-       IODelete(reserved, IODMACommandInternal, 1);
+    if (reserved) IODelete(reserved, IODMACommandInternal, 1);
 
-    if (fMapper)
-       fMapper->release();
+    if (fMapper) fMapper->release();
 
     super::free();
 }
@@ -237,7 +340,7 @@ IODMACommand::free()
 IOReturn
 IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepare)
 {
-       IOReturn err = kIOReturnSuccess;
+    IOReturn err = kIOReturnSuccess;
        
     if (mem == fMemory)
     {
@@ -312,6 +415,16 @@ IODMACommand::getMemoryDescriptor() const
     return fMemory;
 }
 
+IOMemoryDescriptor *
+IODMACommand::getIOMemoryDescriptor() const
+{
+    IOMemoryDescriptor * mem;
+
+    mem = reserved->fCopyMD;
+    if (!mem) mem = __IODEQUALIFY(IOMemoryDescriptor *, fMemory);
+
+    return (mem);
+}
 
 IOReturn
 IODMACommand::segmentOp(
@@ -325,10 +438,11 @@ IODMACommand::segmentOp(
     addr64_t     maxPhys, address;
     uint64_t     length;
     uint32_t     numPages;
+    uint32_t     mask;
 
     IODMACommandInternal * state = target->reserved;
 
-    if (target->fNumAddressBits && (target->fNumAddressBits < 64) && (state->fLocalMapperPageAlloc || !target->fMapper))
+    if (target->fNumAddressBits && (target->fNumAddressBits < 64) && (state->fLocalMapperAlloc || !target->fMapper))
        maxPhys = (1ULL << target->fNumAddressBits);
     else
        maxPhys = 0;
@@ -342,8 +456,15 @@ IODMACommand::segmentOp(
 
     if (!state->fMisaligned)
     {
-       state->fMisaligned |= (0 != (state->fSourceAlignMask & address));
-       if (state->fMisaligned) DEBG("misaligned %qx:%qx, %lx\n", address, length, state->fSourceAlignMask);
+       mask = (segmentIndex ? target->fAlignMaskInternalSegments : state->fSourceAlignMask);
+       state->fMisaligned |= (0 != (mask & address));
+       if (state->fMisaligned) DEBG("misaligned address %qx:%qx, %x\n", address, length, mask);
+    }
+    if (!state->fMisaligned)
+    {
+       mask = target->fAlignMaskLength;
+       state->fMisaligned |= (0 != (mask & length));
+       if (state->fMisaligned) DEBG("misaligned length %qx:%qx, %x\n", address, length, mask);
     }
 
     if (state->fMisaligned && (kWalkPreflight & op))
@@ -401,7 +522,7 @@ IODMACommand::segmentOp(
                    if ((kMapped == MAPTYPE(target->fMappingOptions))
                        && target->fMapper)
                    {
-                       cpuAddr = target->fMapper->mapAddr(address);
+                       cpuAddr = target->fMapper->mapToPhysicalAddress(address);
                    }
        
                    remapAddr = ptoa_64(vm_page_get_phys_page(lastPage));
@@ -439,6 +560,14 @@ IODMACommand::segmentOp(
     return kIOReturnSuccess;
 }
 
+IOBufferMemoryDescriptor * 
+IODMACommand::createCopyBuffer(IODirection direction, UInt64 length)
+{
+    mach_vm_address_t mask = 0xFFFFF000;       //state->fSourceAlignMask
+    return (IOBufferMemoryDescriptor::inTaskWithPhysicalMask(kernel_task, 
+                                                       direction, length, mask));
+}
+
 IOReturn
 IODMACommand::walkAll(UInt8 op)
 {
@@ -478,7 +607,7 @@ IODMACommand::walkAll(UInt8 op)
 
            DEBG("preflight fCopyPageCount %d\n", state->fCopyPageCount);
 
-           if (!state->fDoubleBuffer)
+           if (!fMapper && !state->fDoubleBuffer)
            {
                kern_return_t kr;
 
@@ -506,9 +635,7 @@ IODMACommand::walkAll(UInt8 op)
            else
            {
                DEBG("alloc IOBMD\n");
-               mach_vm_address_t mask = 0xFFFFF000; //state->fSourceAlignMask
-               state->fCopyMD = IOBufferMemoryDescriptor::inTaskWithPhysicalMask(kernel_task,
-                                   fMDSummary.fDirection, state->fPreparedLength, mask);
+               state->fCopyMD = createCopyBuffer(fMDSummary.fDirection, state->fPreparedLength);
 
                if (state->fCopyMD)
                {
@@ -595,6 +722,40 @@ IODMACommand::getAlignment(void)
     return (fAlignMask + 1);
 }
 
+uint32_t
+IODMACommand::getAlignmentLength(void)
+{
+    return (fAlignMaskLength + 1);
+}
+
+uint32_t
+IODMACommand::getAlignmentInternalSegments(void)
+{
+    return (fAlignMaskInternalSegments + 1);
+}
+
+IOReturn
+IODMACommand::prepareWithSpecification(SegmentFunction       outSegFunc,
+                                      const SegmentOptions * segmentOptions,
+                                      uint32_t               mappingOptions,
+                                      IOMapper             * mapper,
+                                      UInt64                 offset,
+                                      UInt64                 length,
+                                      bool                   flushCache,
+                                      bool                   synchronize)
+{
+    IOReturn ret;
+
+    if (fActive) return kIOReturnNotPermitted;
+
+    ret = setSpecification(outSegFunc, segmentOptions, mappingOptions, mapper);
+    if (kIOReturnSuccess != ret) return (ret);
+
+    ret = prepare(offset, length, flushCache, synchronize);
+
+    return (ret);
+}
+
 IOReturn
 IODMACommand::prepareWithSpecification(SegmentFunction outSegFunc,
                                       UInt8            numAddressBits,
@@ -608,94 +769,36 @@ IODMACommand::prepareWithSpecification(SegmentFunction    outSegFunc,
                                       bool             flushCache,
                                       bool             synchronize)
 {
-    if (fActive)
-        return kIOReturnNotPermitted;
-
-    if (!outSegFunc)
-        return kIOReturnBadArgument;
-
-    bool is32Bit = (OutputHost32   == outSegFunc || OutputBig32 == outSegFunc
-                 || OutputLittle32 == outSegFunc);
-    if (is32Bit)
+    SegmentOptions segmentOptions =
     {
-       if (!numAddressBits)
-           numAddressBits = 32;
-       else if (numAddressBits > 32)
-           return kIOReturnBadArgument;                // Wrong output function for bits
-    }
-
-    if (numAddressBits && (numAddressBits < PAGE_SHIFT))
-       return kIOReturnBadArgument;
-
-    if (!maxSegmentSize)
-       maxSegmentSize--;       // Set Max segment to -1
-    if (!maxTransferSize)
-       maxTransferSize--;      // Set Max transfer to -1
-
-    if (mapper && !OSDynamicCast(IOMapper, mapper))
-    {
-       fInternalState->fDevice = mapper;
-       mapper = 0;
-    }
-    if (!mapper)
-    {
-        IOMapper::checkForSystemMapper();
-       mapper = IOMapper::gSystem;
-    }
-
-    switch (MAPTYPE(mappingOptions))
-    {
-    case kMapped:                   break;
-    case kNonCoherent:              break;
-    case kBypassed:
-       if (mapper && !mapper->getBypassMask(&fBypassMask))
-           return kIOReturnBadArgument;
-       break;
-    default:
-       return kIOReturnBadArgument;
+       .fStructSize                = sizeof(segmentOptions),
+       .fNumAddressBits            = numAddressBits,
+       .fMaxSegmentSize            = maxSegmentSize,
+       .fMaxTransferSize           = maxTransferSize,
+       .fAlignment                 = alignment,
+       .fAlignmentLength           = 1,
+       .fAlignmentInternalSegments = alignment
     };
 
-    fNumSegments     = 0;
-    fBypassMask      = 0;
-    fOutSeg         = outSegFunc;
-    fNumAddressBits  = numAddressBits;
-    fMaxSegmentSize  = maxSegmentSize;
-    fMappingOptions  = mappingOptions;
-    fMaxTransferSize = maxTransferSize;
-    if (!alignment)
-       alignment = 1;
-    fAlignMask      = alignment - 1;
-    if (mapper != fMapper)
-    {
-       mapper->retain();
-       fMapper->release();
-       fMapper = mapper;
-    }
-
-    fInternalState->fIterateOnly = (0 != (kIterateOnly & mappingOptions));
-
-    return prepare(offset, length, flushCache, synchronize);
+    return (prepareWithSpecification(outSegFunc, &segmentOptions, mappingOptions, mapper,
+                                       offset, length, flushCache, synchronize));
 }
 
 
 IOReturn 
 IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchronize)
 {
-    IODMACommandInternal * state = fInternalState;
-    IOReturn               ret   = kIOReturnSuccess;
-    MappingOptions mappingOptions    = fMappingOptions;
+    IODMACommandInternal *  state = fInternalState;
+    IOReturn                  ret = kIOReturnSuccess;
+    uint32_t       mappingOptions = fMappingOptions;
 
-    if (!length)
-       length = fMDSummary.fLength;
+    // check specification has been set
+    if (!fOutSeg) return (kIOReturnNotReady);
 
-    if (length > fMaxTransferSize)
-       return kIOReturnNoSpace;
+    if (!length) length = fMDSummary.fLength;
 
-    if (IS_NONCOHERENT(mappingOptions) && flushCache) {
-       IOMemoryDescriptor *poMD = const_cast<IOMemoryDescriptor *>(fMemory);
+    if (length > fMaxTransferSize) return kIOReturnNoSpace;
 
-       poMD->performOperation(kIOMemoryIncoherentIOStore, offset, length);
-    }
     if (fActive++)
     {
        if ((state->fPreparedOffset != offset)
@@ -704,6 +807,8 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr
     }
     else
     {
+       if (fAlignMaskLength & length) return (kIOReturnNotAligned);
+
        state->fPreparedOffset = offset;
        state->fPreparedLength = length;
 
@@ -716,8 +821,8 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr
        state->fCopyPageCount  = 0;
        state->fNextRemapPage  = NULL;
        state->fCopyMD         = 0;
-       state->fLocalMapperPageAlloc = 0;
-       state->fLocalMapperPageCount = 0;
+       state->fLocalMapperAlloc       = 0;
+       state->fLocalMapperAllocLength = 0;
 
        state->fLocalMapper    = (fMapper && (fMapper != IOMapper::gSystem));
 
@@ -738,46 +843,48 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr
            ret = walkAll(op);
        }
 
-       if (fMapper)
+       if (IS_NONCOHERENT(mappingOptions) && flushCache) 
        {
-           if (state->fLocalMapper)
+           if (state->fCopyMD)
            {
-               state->fLocalMapperPageCount = atop_64(round_page(
-                       state->fPreparedLength + ((state->fPreparedOffset + fMDSummary.fPageAlign) & page_mask)));
-               state->fLocalMapperPageAlloc = ptoa_64(fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount));
-               if (!state->fLocalMapperPageAlloc)
-               {
-                   DEBG("IODMACommand !iovmAlloc");
-                   return (kIOReturnNoResources);
-               }
-               state->fMapContig = true;
+               state->fCopyMD->performOperation(kIOMemoryIncoherentIOStore, 0, length);
            }
            else
            {
-               IOMDDMAMapArgs mapArgs;
-               bzero(&mapArgs, sizeof(mapArgs));
-               mapArgs.fMapper = fMapper;
-               mapArgs.fMapSpec.device         = state->fDevice;
-               mapArgs.fMapSpec.alignment      = fAlignMask + 1;
-               mapArgs.fMapSpec.numAddressBits = fNumAddressBits ? fNumAddressBits : 64;
-               mapArgs.fOffset = state->fPreparedOffset;
-               mapArgs.fLength = state->fPreparedLength;
-               const IOMemoryDescriptor * md = state->fCopyMD;
-               if (!md) md = fMemory;
-               ret = md->dmaCommandOperation(kIOMDDMAMap | state->fIterateOnly, &mapArgs, sizeof(mapArgs));
-               if (kIOReturnSuccess == ret)
-               {
-                   state->fLocalMapperPageAlloc = mapArgs.fAlloc;
-                   state->fLocalMapperPageCount = mapArgs.fAllocCount;
-                   state->fMapContig = mapArgs.fMapContig;
-               }
-               ret = kIOReturnSuccess;
+               IOMemoryDescriptor * md = const_cast<IOMemoryDescriptor *>(fMemory);
+               md->performOperation(kIOMemoryIncoherentIOStore, offset, length);
            }
        }
 
+       if (fMapper)
+       {
+           IOMDDMAMapArgs mapArgs;
+           bzero(&mapArgs, sizeof(mapArgs));
+           mapArgs.fMapper = fMapper;
+           mapArgs.fCommand = this;
+           mapArgs.fMapSpec.device         = state->fDevice;
+           mapArgs.fMapSpec.alignment      = fAlignMask + 1;
+           mapArgs.fMapSpec.numAddressBits = fNumAddressBits ? fNumAddressBits : 64;
+           mapArgs.fLength = state->fPreparedLength;
+           const IOMemoryDescriptor * md = state->fCopyMD;
+           if (md) { mapArgs.fOffset = 0; }
+           else
+           {
+               md = fMemory;
+               mapArgs.fOffset = state->fPreparedOffset;
+           }
+           ret = md->dmaCommandOperation(kIOMDDMAMap | state->fIterateOnly, &mapArgs, sizeof(mapArgs));
+//IOLog("dma %p 0x%x 0x%qx-0x%qx 0x%qx-0x%qx\n", this, ret, state->fPreparedOffset, state->fPreparedLength, mapArgs.fAlloc, mapArgs.fAllocLength);
 
-       if (kIOReturnSuccess == ret)
-           state->fPrepared = true;
+           if (kIOReturnSuccess == ret)
+           {
+               state->fLocalMapperAlloc       = mapArgs.fAlloc;
+               state->fLocalMapperAllocLength = mapArgs.fAllocLength;
+               state->fMapContig = mapArgs.fMapContig;
+           }
+           if (NULL != IOMapper::gSystem) ret = kIOReturnSuccess;
+       }
+       if (kIOReturnSuccess == ret) state->fPrepared = true;
     }
     return ret;
 }
@@ -793,6 +900,19 @@ IODMACommand::complete(bool invalidateCache, bool synchronize)
 
     if (!--fActive)
     {
+       if (IS_NONCOHERENT(fMappingOptions) && invalidateCache) 
+       {
+           if (state->fCopyMD)
+           {
+               state->fCopyMD->performOperation(kIOMemoryIncoherentIOFlush, 0, state->fPreparedLength);
+           }
+           else
+           {
+               IOMemoryDescriptor * md = const_cast<IOMemoryDescriptor *>(fMemory);
+               md->performOperation(kIOMemoryIncoherentIOFlush, state->fPreparedOffset, state->fPreparedLength);
+           }
+       }
+
        if (!state->fCursor)
        {
                IOOptionBits op = kWalkComplete;
@@ -800,28 +920,18 @@ IODMACommand::complete(bool invalidateCache, bool synchronize)
                        op |= kWalkSyncIn;
                ret = walkAll(op);
        }
-       if (state->fLocalMapperPageAlloc)
+       if (state->fLocalMapperAlloc)
        {
-           if (state->fLocalMapper)
-           {
-               fMapper->iovmFreeDMACommand(this, atop_64(state->fLocalMapperPageAlloc), state->fLocalMapperPageCount);
-           }
-           else if (state->fLocalMapperPageCount)
+           if (state->fLocalMapperAllocLength)
            {
-               fMapper->iovmFree(atop_64(state->fLocalMapperPageAlloc), state->fLocalMapperPageCount);
+               fMapper->iovmUnmapMemory(getIOMemoryDescriptor(), this, 
+                                               state->fLocalMapperAlloc, state->fLocalMapperAllocLength);
            }
-           state->fLocalMapperPageAlloc = 0;
-           state->fLocalMapperPageCount = 0;
+           state->fLocalMapperAlloc       = 0;
+           state->fLocalMapperAllocLength = 0;
        }
 
        state->fPrepared = false;
-
-       if (IS_NONCOHERENT(fMappingOptions) && invalidateCache)
-       { 
-           IOMemoryDescriptor *poMD = const_cast<IOMemoryDescriptor *>(fMemory);
-
-           poMD->performOperation(kIOMemoryIncoherentIOFlush, state->fPreparedOffset, state->fPreparedLength);
-       }
     }
 
     return ret;
@@ -913,7 +1023,7 @@ IODMACommand::transferSegment(void   *reference,
        if ((kMapped == MAPTYPE(target->fMappingOptions))
            && target->fMapper)
        {
-           cpuAddr = target->fMapper->mapAddr(ioAddr);
+           cpuAddr = target->fMapper->mapToPhysicalAddress(ioAddr);
            copyLen = min(copyLen, page_size - (ioAddr & (page_size - 1)));
            ioAddr += copyLen;
        }
@@ -1013,11 +1123,10 @@ IODMACommand::genIOVMSegments(uint32_t op,
        state->fIOVMAddr               = 0;
        internalState->fNextRemapPage  = NULL;
        internalState->fNewMD          = false;
-       state->fMapped                 = (IS_MAPPED(fMappingOptions) && fMapper);
+       state->fMapped                 = (0 != fMapper);
        mdOp                           = kIOMDFirstSegment;
     };
        
-    UInt64    bypassMask = fBypassMask;
     UInt32    segIndex = 0;
     UInt32    numSegments = *numSegmentsP;
     Segment64 curSeg = { 0, 0 };
@@ -1039,9 +1148,9 @@ IODMACommand::genIOVMSegments(uint32_t op,
            state->fOffset = offset;
            state->fLength = memLength - offset;
 
-           if (internalState->fMapContig && internalState->fLocalMapperPageAlloc)
+           if (internalState->fMapContig && internalState->fLocalMapperAlloc)
            {
-               state->fIOVMAddr = internalState->fLocalMapperPageAlloc + offset;
+               state->fIOVMAddr = internalState->fLocalMapperAlloc + offset;
                rtn = kIOReturnSuccess;
 #if 0
                {
@@ -1091,13 +1200,14 @@ IODMACommand::genIOVMSegments(uint32_t op,
        {
            UInt64 length = state->fLength;
            offset          += length;
-           curSeg.fIOVMAddr = state->fIOVMAddr | bypassMask;
+           curSeg.fIOVMAddr = state->fIOVMAddr;
            curSeg.fLength   = length;
            state->fIOVMAddr = 0;
        }
 
         if (!state->fIOVMAddr)
        {
+           // maxPhys
            if ((kWalkClient & op) && (curSeg.fIOVMAddr + curSeg.fLength - 1) > maxPhys)
            {
                if (internalState->fCursor)
@@ -1156,23 +1266,67 @@ IODMACommand::genIOVMSegments(uint32_t op,
                }
            }
 
+           // reduce size of output segment
+           uint64_t reduce, leftover = 0;
+
+           // fMaxSegmentSize
            if (curSeg.fLength > fMaxSegmentSize)
            {
-               UInt64 remain = curSeg.fLength - fMaxSegmentSize;
+               leftover      += curSeg.fLength - fMaxSegmentSize;
+               curSeg.fLength = fMaxSegmentSize;
+               state->fIOVMAddr = curSeg.fLength + curSeg.fIOVMAddr;
+           }
+
+           // alignment current length
+
+           reduce = (curSeg.fLength & fAlignMaskLength);
+           if (reduce && (curSeg.fLength > reduce)) 
+           {
+               leftover       += reduce;
+               curSeg.fLength -= reduce;
+               state->fIOVMAddr = curSeg.fLength + curSeg.fIOVMAddr;
+           }
 
-               state->fIOVMAddr = fMaxSegmentSize + curSeg.fIOVMAddr;
-               curSeg.fLength   = fMaxSegmentSize;
+           // alignment next address
 
-               state->fLength   = remain;
-               offset          -= remain;
+           reduce = (state->fIOVMAddr & fAlignMaskInternalSegments);
+           if (reduce && (curSeg.fLength > reduce))
+           {
+               leftover       += reduce;
+               curSeg.fLength -= reduce;
+               state->fIOVMAddr = curSeg.fLength + curSeg.fIOVMAddr;
            }
 
-           if (internalState->fCursor
-               && (0 != (internalState->fSourceAlignMask & curSeg.fIOVMAddr)))
+           if (leftover)
            {
-               curSeg.fIOVMAddr = 0;
-               ret = kIOReturnNotAligned;
-               break;
+               DEBG("reduce seg by 0x%llx @ 0x%llx [0x%llx, 0x%llx]\n", 
+                     leftover, offset,
+                     curSeg.fIOVMAddr, curSeg.fLength);
+               state->fLength   = leftover;
+               offset          -= leftover;
+           }
+
+           // 
+
+           if (internalState->fCursor)
+           {
+               bool misaligned;
+               uint32_t mask;
+
+               mask = (segIndex ? fAlignMaskInternalSegments : internalState->fSourceAlignMask);
+               misaligned = (0 != (mask & curSeg.fIOVMAddr));
+               if (!misaligned)
+               {
+                   mask = fAlignMaskLength;
+                   misaligned |= (0 != (mask &  curSeg.fLength));
+               }
+               if (misaligned)
+               {
+                   if (misaligned) DEBG("cursor misaligned %qx:%qx\n", curSeg.fIOVMAddr, curSeg.fLength);
+                   curSeg.fIOVMAddr = 0;
+                   ret = kIOReturnNotAligned;
+                   break;
+               }
            }
 
            if (offset >= memLength)
@@ -1218,7 +1372,7 @@ IODMACommand::clientOutputSegment(
 
     if (target->fNumAddressBits && (target->fNumAddressBits < 64) 
        && ((segment.fIOVMAddr + segment.fLength - 1) >> target->fNumAddressBits)
-       && (target->reserved->fLocalMapperPageAlloc || !target->fMapper))
+       && (target->reserved->fLocalMapperAlloc || !target->fMapper))
     {
        DEBG("kIOReturnMessageTooLarge(fNumAddressBits) %qx, %qx\n", segment.fIOVMAddr, segment.fLength);
        ret = kIOReturnMessageTooLarge;
index 1b53b0b0b155c46be0da75e3d59fe3538c8c74a2..6875e0c429107c4637b993defa0c82f70536d5c1 100644 (file)
@@ -149,6 +149,13 @@ IOByteCount IODMAEventSource::validFIFODepth(IOByteCount depth, IODirection dire
 }
 
 
+IOReturn IODMAEventSource::setFrameSize(UInt8 byteCount)
+{
+  if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return kIOReturnError;
+  
+  return dmaController->setFrameSize(dmaIndex, byteCount);
+}
+
 // protected
 
 bool IODMAEventSource::checkForWork(void)
index 79c97e1af1145073b5acdbfe01cdeb5eb94aed2c..e3afbdcf3a73d444de06075b910ffffbdfbdf9a3 100644 (file)
 #include <IOKit/IOMemoryDescriptor.h>
 #include <libkern/OSAtomic.h>
 
+struct IODataQueueInternal
+{
+    mach_msg_header_t msg;
+    UInt32            queueSize;
+};
+
 #ifdef enqueue
 #undef enqueue
 #endif
@@ -95,6 +101,14 @@ Boolean IODataQueue::initWithCapacity(UInt32 size)
         return false;
     }
 
+    assert(!notifyMsg);
+    notifyMsg = IONew(IODataQueueInternal, 1);
+       if (!notifyMsg) {
+               return false;
+       }
+    bzero(notifyMsg, sizeof(IODataQueueInternal));
+    ((IODataQueueInternal *)notifyMsg)->queueSize = size;
+
     dataQueue = (IODataQueueMemory *)IOMallocAligned(allocSize, PAGE_SIZE);
     if (dataQueue == 0) {
         return false;
@@ -105,13 +119,6 @@ Boolean IODataQueue::initWithCapacity(UInt32 size)
 //  dataQueue->head         = 0;
 //  dataQueue->tail         = 0;
 
-    if (!notifyMsg) {
-        notifyMsg = IOMalloc(sizeof(mach_msg_header_t));
-        if (!notifyMsg)
-            return false;
-    }
-    bzero(notifyMsg, sizeof(mach_msg_header_t));
-
     return true;
 }
 
@@ -132,14 +139,14 @@ Boolean IODataQueue::initWithEntries(UInt32 numEntries, UInt32 entrySize)
 
 void IODataQueue::free()
 {
-    if (dataQueue) {
-        IOFreeAligned(dataQueue, round_page(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE));
-        dataQueue = NULL;
-
-        if (notifyMsg) {
-            IOFree(notifyMsg, sizeof(mach_msg_header_t));
-            notifyMsg = NULL;
-        }
+       if (notifyMsg) {
+               if (dataQueue) {
+                       IOFreeAligned(dataQueue, round_page(((IODataQueueInternal *)notifyMsg)->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE));
+                       dataQueue = NULL;
+               }
+
+               IODelete(notifyMsg, IODataQueueInternal, 1);
+               notifyMsg = NULL;
     }
 
     super::free();
@@ -152,14 +159,17 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize)
     const UInt32       head      = dataQueue->head;  // volatile
     const UInt32       tail      = dataQueue->tail;
     const UInt32       entrySize = dataSize + DATA_QUEUE_ENTRY_HEADER_SIZE;
+    UInt32             queueSize;
     IODataQueueEntry * entry;
 
     // Check for overflow of entrySize
     if (dataSize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) {
         return false;
     }
+
     // Check for underflow of (dataQueue->queueSize - tail)
-    if (dataQueue->queueSize < tail) {
+    queueSize = ((IODataQueueInternal *) notifyMsg)->queueSize;
+    if ((queueSize < tail) || (queueSize < head)) {
         return false;
     }
 
@@ -167,7 +177,7 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize)
     {
         // Is there enough room at the end for the entry?
         if ((entrySize <= UINT32_MAX - tail) &&
-            ((tail + entrySize) <= dataQueue->queueSize) )
+            ((tail + entrySize) <= queueSize) )
         {
             entry = (IODataQueueEntry *)((UInt8 *)dataQueue->queue + tail);
 
@@ -191,7 +201,7 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize)
             // doing this. The user client checks for this and will look for the size
             // at the beginning if there isn't room for it at the end.
 
-            if ( ( dataQueue->queueSize - tail ) >= DATA_QUEUE_ENTRY_HEADER_SIZE )
+            if ( ( queueSize - tail ) >= DATA_QUEUE_ENTRY_HEADER_SIZE )
             {
                 ((IODataQueueEntry *)((UInt8 *)dataQueue->queue + tail))->size = dataSize;
             }
@@ -236,14 +246,13 @@ Boolean IODataQueue::enqueue(void * data, UInt32 dataSize)
 
 void IODataQueue::setNotificationPort(mach_port_t port)
 {
-    mach_msg_header_t * msgh = (mach_msg_header_t *) notifyMsg;
+    mach_msg_header_t * msgh;
 
-    if (msgh) {
-        bzero(msgh, sizeof(mach_msg_header_t));
-        msgh->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0);
-        msgh->msgh_size = sizeof(mach_msg_header_t);
-        msgh->msgh_remote_port = port;
-    }
+    msgh = &((IODataQueueInternal *) notifyMsg)->msg;
+       bzero(msgh, sizeof(mach_msg_header_t));
+       msgh->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0);
+       msgh->msgh_size = sizeof(mach_msg_header_t);
+       msgh->msgh_remote_port = port;
 }
 
 void IODataQueue::sendDataAvailableNotification()
@@ -251,8 +260,8 @@ void IODataQueue::sendDataAvailableNotification()
     kern_return_t       kr;
     mach_msg_header_t * msgh;
 
-    msgh = (mach_msg_header_t *) notifyMsg;
-    if (msgh && msgh->msgh_remote_port) {
+    msgh = &((IODataQueueInternal *) notifyMsg)->msg;
+    if (msgh->msgh_remote_port) {
         kr = mach_msg_send_from_kernel_with_options(msgh, msgh->msgh_size, MACH_SEND_TIMEOUT, MACH_MSG_TIMEOUT_NONE);
         switch(kr) {
             case MACH_SEND_TIMED_OUT:    // Notification already sent
@@ -269,9 +278,11 @@ void IODataQueue::sendDataAvailableNotification()
 IOMemoryDescriptor *IODataQueue::getMemoryDescriptor()
 {
     IOMemoryDescriptor *descriptor = 0;
+    UInt32              queueSize;
 
+    queueSize = ((IODataQueueInternal *) notifyMsg)->queueSize;
     if (dataQueue != 0) {
-        descriptor = IOMemoryDescriptor::withAddress(dataQueue, dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE, kIODirectionOutIn);
+        descriptor = IOMemoryDescriptor::withAddress(dataQueue, queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE, kIODirectionOutIn);
     }
 
     return descriptor;
index 6533ed937dbdd034862c7b474b28d4313a7eb894..965670f373a9bbe132264421a7b64aef93efd1c4 100644 (file)
@@ -61,6 +61,7 @@ const OSSymbol *      gIODTUnitKey;
 const OSSymbol *       gIODTCompatibleKey;
 const OSSymbol *       gIODTTypeKey;
 const OSSymbol *       gIODTModelKey;
+const OSSymbol *       gIODTTargetTypeKey;
 
 const OSSymbol *       gIODTSizeCellKey;
 const OSSymbol *       gIODTAddressCellKey;
@@ -106,6 +107,7 @@ IODeviceTreeAlloc( void * dtTop )
     gIODTCompatibleKey         = OSSymbol::withCStringNoCopy( "compatible" );
     gIODTTypeKey               = OSSymbol::withCStringNoCopy( "device_type" );
     gIODTModelKey              = OSSymbol::withCStringNoCopy( "model" );
+    gIODTTargetTypeKey         = OSSymbol::withCStringNoCopy( "target-type" );
     gIODTSizeCellKey   = OSSymbol::withCStringNoCopy( "#size-cells" );
     gIODTAddressCellKey = OSSymbol::withCStringNoCopy( "#address-cells" );
     gIODTRangeKey              = OSSymbol::withCStringNoCopy( "ranges" );
@@ -898,7 +900,7 @@ OSCollectionIterator * IODTFindMatchingEntries( IORegistryEntry * from,
     }
 
     cIter = OSCollectionIterator::withCollection( result);
-    result->release();
+    if (result) result->release();
 
     return( cIter);
 }
index 95046dacd2d7eed49047077ac535a105e7955fd8..3393993e0a286123748daf7a2e514cafb39f13d9 100644 (file)
@@ -114,7 +114,7 @@ bool IOEventSource::tryCloseGate()
 
 int IOEventSource::sleepGate(void *event, UInt32 type)
 { 
-       bool res; 
+       int res; 
        IOStatisticsOpenGate(); 
        res = workLoop->sleepGate(event, type); 
        IOStatisticsCloseGate(); 
@@ -123,7 +123,7 @@ int IOEventSource::sleepGate(void *event, UInt32 type)
 
 int IOEventSource::sleepGate(void *event, AbsoluteTime deadline, UInt32 type)
 { 
-       bool res; 
+       int res; 
        IOStatisticsOpenGate(); 
        res = workLoop->sleepGate(event, deadline, type);
        IOStatisticsCloseGate(); 
index f85f2ab0e65d5e75b7419cfc4f45f97d858f41ee..867251b27f3fe04de23a8ab15d9c93c48576025a 100644 (file)
@@ -182,6 +182,7 @@ extern "C" ppnum_t          pmap_find_phys(pmap_t pmap, addr64_t va);
 #define        DISABLE_TRIM            0
 #define TRIM_DELAY             5000
 
+extern boolean_t               root_is_CF_drive;
 extern unsigned int            save_kdebug_enable;
 extern uint32_t                gIOHibernateState;
 uint32_t                       gIOHibernateMode;
@@ -193,23 +194,28 @@ static uint64_t                   gIOHibernateCompression = 0x80;  // default compression 50%
 
 static IODTNVRAM *             gIOOptionsEntry;
 static IORegistryEntry *       gIOChosenEntry;
+
+static const OSSymbol *        gIOHibernateBootImageKey;
+
 #if defined(__i386__) || defined(__x86_64__)
-static const OSSymbol *         gIOCreateEFIDevicePathSymbol;
+
 static const OSSymbol *        gIOHibernateRTCVariablesKey;
 static const OSSymbol *         gIOHibernateBoot0082Key;
 static const OSSymbol *         gIOHibernateBootNextKey;
 static OSData *                        gIOHibernateBoot0082Data;
 static OSData *                        gIOHibernateBootNextData;
 static OSObject *              gIOHibernateBootNextSave;
-static struct kern_direct_file_io_ref_t * gDebugImageFileRef;
-#endif
+
+static IOPolledFileIOVars *     gDebugImageFileVars;
+static IOLock             *     gDebugImageLock;
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
 
 static IOLock *                           gFSLock;
 static uint32_t                           gFSState;
 static IOPolledFileIOVars                gFileVars;
 static IOHibernateVars                   gIOHibernateVars;
-static struct kern_direct_file_io_ref_t * gIOHibernateFileRef;
-static hibernate_cryptvars_t             gIOHibernateCryptWakeContext;
+static IOPolledFileCryptVars             gIOHibernateCryptWakeContext;
 static hibernate_graphics_t              _hibernateGraphics;
 static hibernate_graphics_t *            gIOHibernateGraphicsInfo = &_hibernateGraphics;
 static hibernate_statistics_t            _hibernateStats;
@@ -224,24 +230,14 @@ enum
 };
 
 static IOReturn IOHibernateDone(IOHibernateVars * vars);
+static IOReturn IOWriteExtentsToFile(IOPolledFileIOVars * vars, uint32_t signature);
+static void     IOSetBootImageNVRAM(OSData * data);
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-enum { kXPRamAudioVolume = 8 };
 enum { kDefaultIOSize = 128 * 1024 };
 enum { kVideoMapSize  = 80 * 1024 * 1024 };
 
-#ifndef kIOMediaPreferredBlockSizeKey
-#define kIOMediaPreferredBlockSizeKey  "Preferred Block Size"
-#endif
-
-#ifndef kIOBootPathKey 
-#define kIOBootPathKey                 "bootpath"
-#endif
-#ifndef kIOSelectedBootDeviceKey       
-#define kIOSelectedBootDeviceKey       "boot-device"
-#endif
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 // copy from phys addr to MD
@@ -358,1020 +354,31 @@ hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_
     }
 }
 
-static vm_offset_t
-hibernate_page_list_iterate(hibernate_page_list_t * list, vm_offset_t * pPage)
-{
-    uint32_t            page = *pPage;
-    uint32_t            count;
-    hibernate_bitmap_t * bitmap;
-
-    while ((bitmap = hibernate_page_bitmap_pin(list, &page)))
-    {
-       count = hibernate_page_bitmap_count(bitmap, TRUE, page);
-       if (!count)
-           break;
-       page += count;
-       if (page <= bitmap->last_page)
-           break;
-    }
-
-    *pPage = page;
-    if (bitmap)
-       count = hibernate_page_bitmap_count(bitmap, FALSE, page);
-    else
-       count = 0;
-
-    return (count);
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-static IOReturn
-IOHibernatePollerProbe(IOPolledFileIOVars * vars, IOService * target)
-{
-    IOReturn            err = kIOReturnError;
-    int32_t            idx;
-    IOPolledInterface * poller;
-
-    for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--)
-    {
-        poller = (IOPolledInterface *) vars->pollers->getObject(idx);
-        err = poller->probe(target);
-        if (err)
-        {
-            HIBLOG("IOPolledInterface::probe[%d] 0x%x\n", idx, err);
-            break;
-        }
-    }
-
-    return (err);
-}
-
-static IOReturn
-IOHibernatePollerOpen(IOPolledFileIOVars * vars, uint32_t state, IOMemoryDescriptor * md)
-{
-    IOReturn            err = kIOReturnError;
-    int32_t            idx;
-    IOPolledInterface * poller;
-
-    for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--)
-    {
-        poller = (IOPolledInterface *) vars->pollers->getObject(idx);
-        err = poller->open(state, md);
-        if (err)
-        {
-            HIBLOG("IOPolledInterface::open[%d] 0x%x\n", idx, err);
-            break;
-        }
-    }
-
-    return (err);
-}
-
-static IOReturn
-IOHibernatePollerClose(IOPolledFileIOVars * vars, uint32_t state)
-{
-    IOReturn            err = kIOReturnError;
-    int32_t            idx;
-    IOPolledInterface * poller;
-
-    for (idx = 0;
-         (poller = (IOPolledInterface *) vars->pollers->getObject(idx));
-         idx++)
-    {
-        err = poller->close(state);
-        if (err)
-            HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err);
-    }
-
-    return (err);
-}
-
-static void
-IOHibernatePollerIOComplete(void *   target,
-                            void *   parameter,
-                            IOReturn status,
-                            UInt64   actualByteCount)
-{
-    IOPolledFileIOVars * vars = (IOPolledFileIOVars *) parameter;
-
-    vars->ioStatus = status;
-}
-
-static IOReturn
-IOHibernatePollerIO(IOPolledFileIOVars * vars, 
-                    uint32_t operation, uint32_t bufferOffset, 
-                   uint64_t deviceOffset, uint64_t length)
-{
-
-    IOReturn            err = kIOReturnError;
-    IOPolledInterface * poller;
-    IOPolledCompletion  completion;
-
-    completion.target    = 0;
-    completion.action    = &IOHibernatePollerIOComplete;
-    completion.parameter = vars;
-
-    vars->ioStatus = -1;
-
-    poller = (IOPolledInterface *) vars->pollers->getObject(0);
-    err = poller->startIO(operation, bufferOffset, deviceOffset + vars->block0, length, completion);
-    if (err)
-        HIBLOG("IOPolledInterface::startIO[%d] 0x%x\n", 0, err);
-
-    return (err);
-}
-
-static IOReturn
-IOHibernatePollerIODone(IOPolledFileIOVars * vars, bool abortable)
-{
-    IOReturn            err = kIOReturnSuccess;
-    int32_t            idx = 0;
-    IOPolledInterface * poller;
-
-    while (-1 == vars->ioStatus)
-    {
-        for (idx = 0; 
-           (poller = (IOPolledInterface *) vars->pollers->getObject(idx));
-             idx++)
-        {
-           IOReturn newErr;
-            newErr = poller->checkForWork();
-           if ((newErr == kIOReturnAborted) && !abortable)
-               newErr = kIOReturnSuccess;
-           if (kIOReturnSuccess == err)
-               err = newErr;
-        }
-    }
-
-    if ((kIOReturnSuccess == err) && abortable && hibernate_should_abort())
-    {
-        err = kIOReturnAborted;
-       HIBLOG("IOPolledInterface::checkForWork sw abort\n");
-    }
-
-    if (err)
-    {
-       HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err);
-    }
-    else 
-    {
-       err = vars->ioStatus;
-       if (kIOReturnSuccess != err)
-           HIBLOG("IOPolledInterface::ioStatus 0x%x\n", err);
-    }
-
-    return (err);
-}
-
-IOReturn
-IOPolledInterface::checkAllForWork(void)
-{
-    IOReturn            err = kIOReturnNotReady;
-    int32_t            idx;
-    IOPolledInterface * poller;
-
-    IOHibernateVars * vars  = &gIOHibernateVars;
-
-    if (!vars->fileVars || !vars->fileVars->pollers)
-       return (err);
-
-    for (idx = 0;
-            (poller = (IOPolledInterface *) vars->fileVars->pollers->getObject(idx));
-            idx++)
-    {
-        err = poller->checkForWork();
-        if (err)
-            HIBLOG("IOPolledInterface::checkAllForWork[%d] 0x%x\n", idx, err);
-    }
-
-    return (err);
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-struct _OpenFileContext
-{
-    OSData * extents;
-    uint64_t size;
-};
-
-static void
-file_extent_callback(void * ref, uint64_t start, uint64_t length)
-{
-    _OpenFileContext * ctx = (_OpenFileContext *) ref;
-    IOPolledFileExtent extent;
-
-    extent.start  = start;
-    extent.length = length;
-
-    HIBLOG("[0x%qx, 0x%qx]\n", start, length);
-
-    ctx->extents->appendBytes(&extent, sizeof(extent));
-    ctx->size += length;
-}
-
-static IOService * 
-IOCopyMediaForDev(dev_t device)
-{
-    OSDictionary * matching;
-    OSNumber *     num;
-    OSIterator *   iter;
-    IOService *    result = 0;
-
-    matching = IOService::serviceMatching("IOMedia");
-    if (!matching)
-        return (0);
-    do
-    {
-        num = OSNumber::withNumber(major(device), 32);
-        if (!num)
-            break;
-        matching->setObject(kIOBSDMajorKey, num);
-        num->release();
-        num = OSNumber::withNumber(minor(device), 32);
-        if (!num)
-            break;
-        matching->setObject(kIOBSDMinorKey, num);
-        num->release();
-        if (!num)
-            break;
-        iter = IOService::getMatchingServices(matching);
-        if (iter)
-        {
-            result = (IOService *) iter->getNextObject();
-            result->retain();
-            iter->release();
-        }
-    }
-    while (false);
-    matching->release();
-
-    return (result);
-}
-
-/* 
- * Writes header to disk with signature, block size and file extents data.
- * If there are more than 2 extents, then they are written on second block.
- */
-static IOReturn
-WriteExtentsToFile(struct kern_direct_file_io_ref_t * fileRef,
-                   uint32_t signature, uint32_t blockSize,
-                   IOPolledFileExtent *fileExtents,
-                   IOByteCount size)
-{
-    IOHibernateImageHeader  hdr;
-    IOItemCount  count;
-    IOReturn     err = kIOReturnSuccess;
-    int         rc;
-
-    memset(&hdr, 0, sizeof(IOHibernateImageHeader));
-    count = size;
-    if (count > sizeof(hdr.fileExtentMap))
-    {
-        hdr.fileExtentMapSize = count;
-        count = sizeof(hdr.fileExtentMap);
-    }
-    else
-        hdr.fileExtentMapSize = sizeof(hdr.fileExtentMap);
-
-    bcopy(fileExtents, &hdr.fileExtentMap[0], count);
-
-    // copy file block extent list if larger than header
-    if (hdr.fileExtentMapSize > sizeof(hdr.fileExtentMap))
-    {
-            count = hdr.fileExtentMapSize - sizeof(hdr.fileExtentMap);
-            rc = kern_write_file(fileRef, blockSize, 
-                                 (caddr_t)(((uint8_t *)fileExtents) + sizeof(hdr.fileExtentMap)), 
-                                 count, IO_SKIP_ENCRYPTION);
-            if (rc != 0) {
-                HIBLOG("kern_write_file returned %d\n", rc);
-                err = kIOReturnIOError;
-                goto exit;
-            }    
-    }
-    hdr.signature = signature;
-    hdr.deviceBlockSize = blockSize;
-
-    rc = kern_write_file(fileRef, 0, (char *)&hdr, sizeof(hdr), IO_SKIP_ENCRYPTION);
-    if (rc != 0) {
-        HIBLOG("kern_write_file returned %d\n", rc);
-        err = kIOReturnIOError;
-        goto exit;
-    }
-
-exit:
-    return err;
-}
-
-static IOReturn
-GetImageBlockSize(IOService *part, OSArray *pollers, IOByteCount *blockSize)
-{
-    IOService       * service;
-    IORegistryEntry * next;
-    IORegistryEntry * child;
-
-    IOReturn        err = kIOReturnSuccess;
-
-
-    next = part;
-    do
-    {
-        IOPolledInterface * poller;
-        OSObject *          obj;
-        OSNumber        * num;
-
-        obj = next->getProperty(kIOPolledInterfaceSupportKey);
-        if (kOSBooleanFalse == obj)
-        {
-            pollers->flushCollection();
-            break;
-        }
-        else if ((poller = OSDynamicCast(IOPolledInterface, obj)))
-            pollers->setObject(poller);
-
-        if ((service = OSDynamicCast(IOService, next)) 
-            && service->getDeviceMemory()
-            && !pollers->getCount())   break;
-
-        if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey))))
-            *blockSize = num->unsigned32BitValue();
-        child = next;
-    }
-    while ((next = child->getParentEntry(gIOServicePlane)) 
-           && child->isParent(next, gIOServicePlane, true));
-
-    if (*blockSize < 4096) *blockSize = 4096;
-
-    if (!pollers->getCount())
-        err = kIOReturnUnsupported;
-
-    return err;
-}
-
-IOReturn
-IOPolledFileOpen( const char * filename, uint64_t setFileSize,
-                 IOBufferMemoryDescriptor * ioBuffer, 
-                 IOPolledFileIOVars ** fileVars, OSData ** fileExtents,
-                 OSData ** imagePath, uint8_t * volumeCryptKey)
-{
-    IOReturn                   err = kIOReturnSuccess;
-    IOPolledFileIOVars *       vars;
-    _OpenFileContext           ctx;
-    OSData *                   extentsData;
-    IOService *                 part = 0;
-    OSString *                  keyUUID = 0;
-    OSString *                  keyStoreUUID = 0;
-    dev_t                      block_dev;
-    dev_t                      hibernate_image_dev;
-    uint64_t                   maxiobytes;
-    AbsoluteTime                startTime, endTime;
-    uint64_t                    nsec;
-    caddr_t         write_file_addr = NULL;
-    vm_size_t       write_file_len = 0;
-
-    vars = IONew(IOPolledFileIOVars, 1);
-    if (!vars) return (kIOReturnNoMemory);
-    bzero(vars, sizeof(*vars));
-
-    do
-    {
-       vars->io           = false;
-       vars->buffer       = (uint8_t *) ioBuffer->getBytesNoCopy();
-       vars->bufferHalf   = 0;
-       vars->bufferOffset = 0;
-       vars->bufferSize   = ioBuffer->getLength() >> 1;
-    
-       extentsData = OSData::withCapacity(32);
-       ctx.extents = extentsData;
-       ctx.size    = 0;
-       clock_get_uptime(&startTime);
-    if (!gDebugImageFileRef) 
-    {
-        // Avoid writing the header if it is written when file is prep'd for debug data
-        // Image is locked during prep for debug data. So, write may fail.
-        write_file_addr = (caddr_t)gIOHibernateCurrentHeader;
-        write_file_len = sizeof(IOHibernateImageHeader);
-    }
-       vars->fileRef = kern_open_file_for_direct_io(filename, 
-                                                 true,
-                                                   &file_extent_callback, &ctx, 
-                                                   setFileSize,
-                                                   // write file:
-                                                    0, write_file_addr,
-                                                    write_file_len,
-                                                    // results
-                                                   &block_dev,
-                                                   &hibernate_image_dev,
-                                                    &vars->block0,
-                                                    &maxiobytes,
-                                                    &vars->flags);
-#if 0
-       uint32_t msDelay = (131071 & random());
-       HIBLOG("sleep %d\n", msDelay);
-       IOSleep(msDelay);
-#endif
-        clock_get_uptime(&endTime);
-        SUB_ABSOLUTETIME(&endTime, &startTime);
-        absolutetime_to_nanoseconds(endTime, &nsec);
-
-       if (!vars->fileRef) err = kIOReturnNoSpace;
-
-       IOLockLock(gFSLock);
-       if (kFSOpening != gFSState) err = kIOReturnTimeout;
-       IOLockUnlock(gFSLock);
-
-        HIBLOG("kern_open_file_for_direct_io(%d) took %qd ms\n", err, nsec / 1000000ULL);
-       if (kIOReturnSuccess != err) break;
-
-        if (kIOHibernateModeSSDInvert & gIOHibernateMode)
-            vars->flags ^= kIOHibernateOptionSSD;
-
-       HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx ssd %d\n", filename, ctx.size, 
-                    vars->block0, maxiobytes, kIOHibernateOptionSSD & vars->flags);
-       if (ctx.size < 1*1024*1024)             // check against image size estimate!
-       {
-           err = kIOReturnNoSpace;
-           break;
-       }
-
-       vars->fileSize = ctx.size;
-        if (maxiobytes < vars->bufferSize) vars->bufferSize = maxiobytes;
-    
-       vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy();
-
-        part = IOCopyMediaForDev(block_dev);
-        if (!part)
-        {
-            err = kIOReturnNotFound;
-            break;
-       }
-        err = part->callPlatformFunction(PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID, false, 
-                                         (void *) &keyUUID, (void *) &keyStoreUUID, NULL, NULL);
-        if ((kIOReturnSuccess == err) && keyUUID && keyStoreUUID)
-        {
-//            IOLog("got volume key %s\n", keyStoreUUID->getCStringNoCopy());
-            uuid_t                  volumeKeyUUID;
-            aks_volume_key_t        vek;
-            static IOService *      sKeyStore;
-            static const OSSymbol * sAKSGetKey;
-
-            if (!sAKSGetKey)
-                sAKSGetKey = OSSymbol::withCStringNoCopy(AKS_PLATFORM_FUNCTION_GETKEY);
-            if (!sKeyStore)
-                sKeyStore = (IOService *) IORegistryEntry::fromPath(AKS_SERVICE_PATH, gIOServicePlane);
-            if (sKeyStore)
-                err = uuid_parse(keyStoreUUID->getCStringNoCopy(), volumeKeyUUID);
-            else
-                err = kIOReturnNoResources;
-            if (kIOReturnSuccess == err)    
-                err = sKeyStore->callPlatformFunction(sAKSGetKey, true, volumeKeyUUID, &vek, NULL, NULL);
-            if (kIOReturnSuccess != err)    
-                IOLog("volume key err 0x%x\n", err);
-            else
-            {
-                size_t bytes = (kIOHibernateAESKeySize / 8);
-                if (vek.key.keybytecount < bytes)
-                     bytes = vek.key.keybytecount;
-                bcopy(&vek.key.keybytes[0], volumeCryptKey, bytes);
-            }
-            bzero(&vek, sizeof(vek));
-        }
-        part->release();
-
-        part = IOCopyMediaForDev(hibernate_image_dev);
-        if (!part)
-        {
-            err = kIOReturnNotFound;
-            break;
-       }
-
-        vars->pollers = OSArray::withCapacity(4);
-        if (!vars->pollers)
-        {
-            err = kIOReturnNoMemory;
-            break;
-        }
-
-        err = GetImageBlockSize(part, vars->pollers, &vars->blockSize);
-
-        HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n",
-               major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, 
-               vars->pollers->getCount());
-
-        if (err != kIOReturnSuccess)
-            break;
-
-       IORegistryEntry * next;
-    OSData          * data;
-       if (vars->blockSize < sizeof(IOHibernateImageHeader))
-       {
-           err = kIOReturnError;
-           continue;
-       }
-
-       err = IOHibernatePollerProbe(vars, (IOService *) part);
-       if (kIOReturnSuccess != err) break;
-
-       err = IOHibernatePollerOpen(vars, kIOPolledPreflightState, ioBuffer);
-       if (kIOReturnSuccess != err) break;
-
-       vars->media = part;
-        next = part;
-       while (next)
-       {
-           next->setProperty(kIOPolledInterfaceActiveKey, kOSBooleanTrue);
-           next = next->getParentEntry(gIOServicePlane);
-       }
-
-       *fileVars    = vars;
-       *fileExtents = extentsData;
-    
-       // make imagePath
-
-       if ((extentsData->getLength() >= sizeof(IOPolledFileExtent)))
-       {
-           char str2[24 + sizeof(uuid_string_t) + 2];
-
-#if defined(__i386__) || defined(__x86_64__)
-           if (!gIOCreateEFIDevicePathSymbol)
-               gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath");
-
-            if (keyUUID)
-                snprintf(str2, sizeof(str2), "%qx:%s", 
-                                vars->extentMap[0].start, keyUUID->getCStringNoCopy());
-            else
-                snprintf(str2, sizeof(str2), "%qx", vars->extentMap[0].start);
-
-           err = IOService::getPlatform()->callPlatformFunction(
-                                               gIOCreateEFIDevicePathSymbol, false,
-                                               (void *) part, (void *) str2,
-                                               (void *) (uintptr_t) true, (void *) &data);
-#else
-           char str1[256];
-           int len = sizeof(str1);
-
-           if (!part->getPath(str1, &len, gIODTPlane))
-               err = kIOReturnNotFound;
-           else
-           {
-               snprintf(str2, sizeof(str2), ",%qx", vars->extentMap[0].start);
-               // (strip the plane name)
-               char * tail = strchr(str1, ':');
-               if (!tail)
-                   tail = str1 - 1;
-               data = OSData::withBytes(tail + 1, strlen(tail + 1));
-               data->appendBytes(str2, strlen(str2));
-           }
-#endif
-       if (kIOReturnSuccess == err)
-           *imagePath = data;
-       else
-           HIBLOG("error 0x%x getting path\n", err);
-       }
-    }
-    while (false);
-
-    if (kIOReturnSuccess != err)
-    {
-        HIBLOG("error 0x%x opening hibernation file\n", err);
-       if (vars->fileRef)
-       {
-           kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0, 0, 0);
-           vars->fileRef = NULL;
-       }
-    }
-    else
-    {
-        WriteExtentsToFile(vars->fileRef, kIOHibernateHeaderOpenSignature, vars->blockSize, 
-                           (IOPolledFileExtent *)extentsData->getBytesNoCopy(),
-                           extentsData->getLength());
-    }
-
-    if (part)
-       part->release();
-
-    return (err);
-}
-
-IOReturn
-IOPolledFileClose( IOPolledFileIOVars * vars )
-{
-    if (vars->pollers)
-    {
-       IOHibernatePollerClose(vars, kIOPolledPostflightState);
-        vars->pollers->release();
-    }
-
-    bzero(vars, sizeof(IOPolledFileIOVars));
-
-    return (kIOReturnSuccess);
-}
-
-static IOReturn
-IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position)
-{
-    IOPolledFileExtent * extentMap;
-
-    extentMap = vars->extentMap;
-
-    vars->position = position;
-
-    while (position >= extentMap->length)
-    {
-       position -= extentMap->length;
-       extentMap++;
-    }
-
-    vars->currentExtent   = extentMap;
-    vars->extentRemaining = extentMap->length - position;
-    vars->extentPosition  = vars->position - position;
-
-    if (vars->bufferSize <= vars->extentRemaining)
-       vars->bufferLimit = vars->bufferSize;
-    else
-       vars->bufferLimit = vars->extentRemaining;
-
-    return (kIOReturnSuccess);
-}
-
-static IOReturn
-IOPolledFileWrite(IOPolledFileIOVars * vars,
-                    const uint8_t * bytes, IOByteCount size,
-                    hibernate_cryptvars_t * cryptvars)
-{
-    IOReturn    err = kIOReturnSuccess;
-    IOByteCount copy;
-    bool       flush = false;
-
-    do
-    {
-       if (!bytes && !size)
-       {
-           // seek to end of block & flush
-           size = vars->position & (vars->blockSize - 1);
-           if (size)
-               size = vars->blockSize - size;
-           flush = true;
-            // use some garbage for the fill
-            bytes = vars->buffer + vars->bufferOffset;
-       }
-
-       copy = vars->bufferLimit - vars->bufferOffset;
-       if (copy > size)
-           copy = size;
-       else
-           flush = true;
-
-       if (bytes)
-       {
-           bcopy(bytes, vars->buffer + vars->bufferHalf + vars->bufferOffset, copy);
-           bytes += copy;
-       }
-        else
-           bzero(vars->buffer + vars->bufferHalf + vars->bufferOffset, copy);
-        
-       size -= copy;
-       vars->bufferOffset += copy;
-       vars->position += copy;
-
-       if (flush && vars->bufferOffset)
-       {
-           uint64_t offset = (vars->position - vars->bufferOffset 
-                               - vars->extentPosition + vars->currentExtent->start);
-           uint32_t length = (vars->bufferOffset);
-
-#if CRYPTO
-            if (cryptvars && vars->encryptStart
-                && (vars->position > vars->encryptStart)
-                && ((vars->position - length) < vars->encryptEnd))
-            {
-                AbsoluteTime startTime, endTime;
-
-                uint64_t encryptLen, encryptStart;
-                encryptLen = vars->position - vars->encryptStart;
-                if (encryptLen > length)
-                    encryptLen = length;
-                encryptStart = length - encryptLen;
-                if (vars->position > vars->encryptEnd)
-                    encryptLen -= (vars->position - vars->encryptEnd);
-
-                clock_get_uptime(&startTime);
-
-                // encrypt the buffer
-                aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart,
-                                &cryptvars->aes_iv[0],
-                                encryptLen / AES_BLOCK_SIZE,
-                                vars->buffer + vars->bufferHalf + encryptStart,
-                                &cryptvars->ctx.encrypt);
-    
-                clock_get_uptime(&endTime);
-                ADD_ABSOLUTETIME(&vars->cryptTime, &endTime);
-                SUB_ABSOLUTETIME(&vars->cryptTime, &startTime);
-                vars->cryptBytes += encryptLen;
-
-                // save initial vector for following encrypts
-                bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE,
-                        &cryptvars->aes_iv[0],
-                        AES_BLOCK_SIZE);
-            }
-#endif /* CRYPTO */
-
-           if (vars->io)
-            {
-               err = IOHibernatePollerIODone(vars, true);
-                if (kIOReturnSuccess != err)
-                    break;
-            }
-
-if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
-//if (length != vars->bufferSize) HIBLOG("short write of %qx ends@ %qx\n", length, offset + length);
-
-           err = IOHibernatePollerIO(vars, kIOPolledWrite, vars->bufferHalf, offset, length);
-            if (kIOReturnSuccess != err)
-                break;
-           vars->io = true;
-
-           vars->extentRemaining -= vars->bufferOffset;
-           if (!vars->extentRemaining)
-           {
-               vars->currentExtent++;
-               vars->extentRemaining = vars->currentExtent->length;
-               vars->extentPosition  = vars->position;
-                if (!vars->extentRemaining)
-                {
-                    err = kIOReturnOverrun;
-                    break;
-                }
-           }
-
-           vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
-           vars->bufferOffset = 0;
-           if (vars->bufferSize <= vars->extentRemaining)
-               vars->bufferLimit = vars->bufferSize;
-           else
-               vars->bufferLimit = vars->extentRemaining;
-
-           flush = false;
-       }
-    }
-    while (size);
-
-    return (err);
-}
-
-static IOReturn
-IOPolledFileRead(IOPolledFileIOVars * vars,
-                    uint8_t * bytes, IOByteCount size,
-                    hibernate_cryptvars_t * cryptvars)
-{
-    IOReturn    err = kIOReturnSuccess;
-    IOByteCount copy;
-
-//    bytesWritten += size;
-
-    do
-    {
-       copy = vars->bufferLimit - vars->bufferOffset;
-       if (copy > size)
-           copy = size;
-
-       if (bytes)
-       {
-           bcopy(vars->buffer + vars->bufferHalf + vars->bufferOffset, bytes, copy);
-           bytes += copy;
-       }
-       size -= copy;
-       vars->bufferOffset += copy;
-//     vars->position += copy;
-
-       if ((vars->bufferOffset == vars->bufferLimit) && (vars->position < vars->readEnd))
-       {
-           if (vars->io)
-            {
-               err = IOHibernatePollerIODone(vars, false);
-                if (kIOReturnSuccess != err)
-                    break;
-            }
-            else
-                cryptvars = 0;
-
-if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
-
-           vars->position        += vars->lastRead;
-           vars->extentRemaining -= vars->lastRead;
-           vars->bufferLimit      = vars->lastRead;
-
-           if (!vars->extentRemaining)
-           {
-               vars->currentExtent++;
-               vars->extentRemaining = vars->currentExtent->length;
-               vars->extentPosition  = vars->position;
-                if (!vars->extentRemaining)
-                {
-                    err = kIOReturnOverrun;
-                    break;
-                }
-           }
-
-           uint64_t length;
-           uint64_t lastReadLength = vars->lastRead;
-           uint64_t offset = (vars->position 
-                               - vars->extentPosition + vars->currentExtent->start);
-           if (vars->extentRemaining <= vars->bufferSize)
-               length = vars->extentRemaining;
-           else
-               length = vars->bufferSize;
-           if ((length + vars->position) > vars->readEnd)
-               length = vars->readEnd - vars->position;
-
-           vars->lastRead = length;
-           if (length)
-           {
-//if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length);
-               err = IOHibernatePollerIO(vars, kIOPolledRead, vars->bufferHalf, offset, length);
-               if (kIOReturnSuccess != err)
-                   break;
-               vars->io = true;
-           }
-
-           vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
-           vars->bufferOffset = 0;
-
-#if CRYPTO
-            if (cryptvars)
-            {
-                uint8_t thisVector[AES_BLOCK_SIZE];
-                AbsoluteTime startTime, endTime;
-
-                // save initial vector for following decrypts
-                bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE);
-                bcopy(vars->buffer + vars->bufferHalf + lastReadLength - AES_BLOCK_SIZE, 
-                        &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
-
-                // decrypt the buffer
-                clock_get_uptime(&startTime);
-
-                aes_decrypt_cbc(vars->buffer + vars->bufferHalf,
-                                &thisVector[0],
-                                lastReadLength / AES_BLOCK_SIZE,
-                                vars->buffer + vars->bufferHalf,
-                                &cryptvars->ctx.decrypt);
-
-                clock_get_uptime(&endTime);
-                ADD_ABSOLUTETIME(&vars->cryptTime, &endTime);
-                SUB_ABSOLUTETIME(&vars->cryptTime, &startTime);
-                vars->cryptBytes += lastReadLength;
-            }
-#endif /* CRYPTO */
-       }
-    }
-    while (size);
-
-    return (err);
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-#if HIBERNATION
-IOReturn
-IOHibernateOpenForDebugData( )
-{
-    dev_t       image_dev;
-    OSData      *extentsData = NULL;
-    OSObject    *obj;
-    OSString    *str;
-    IOByteCount blockSize = 0;
-    IOByteCount size;
-    IOService *                 part = 0;
-    OSData *   data = NULL;
-
-    IOPolledFileExtent *     fileExtents;
-    IOReturn            err = kIOReturnSuccess;
-    IORegistryEntry * regEntry;
-    OSArray     * pollers = NULL;
-
-    _OpenFileContext                       ctx;
-
-    if (gDebugImageFileRef != NULL)
-        return kIOReturnError;
-
-    if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey)))
-    {
-        if ((str = OSDynamicCast(OSString, obj)))
-            strlcpy(gIOHibernateFilename, str->getCStringNoCopy(),
-                    sizeof(gIOHibernateFilename));
-        obj->release();
-    }
-
-    if (!gIOHibernateFilename[0]) {
-        HIBLOG("Failed to get hibernate image filename\n");
-        return (kIOReturnUnsupported);
-    }
-
-    extentsData = OSData::withCapacity(32);
-    ctx.extents = extentsData;
-    ctx.size    = 0;
-
-       bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader));
-       gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags;
-       gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
-
-    gDebugImageFileRef = kern_open_file_for_direct_io(gIOHibernateFilename, 
-                                                      false,
-                                                      &file_extent_callback, &ctx, 
-                                                      0, 0, 
-                                                      (caddr_t)gIOHibernateCurrentHeader, 
-                                                      sizeof(IOHibernateImageHeader),
-                                                      NULL, &image_dev, NULL, NULL, NULL);
-
-    if (gDebugImageFileRef == NULL) 
-    {
-        HIBLOG("Failed to open the file \n");
-        err = kIOReturnError;
-        goto exit;
-    }
-    fileExtents = (IOPolledFileExtent *)extentsData->getBytesNoCopy();
-    size = extentsData->getLength();
-
-    part = IOCopyMediaForDev(image_dev);
-    if (!part)
-    {
-        HIBLOG("Failed to get the media device\n");
-        err = kIOReturnNotFound;
-        goto exit;
-    }
-
-
-    pollers = OSArray::withCapacity(4);
-    if (!pollers)
-    {
-        err = kIOReturnNoMemory;
-        goto exit;
-    }
-
-    err = GetImageBlockSize(part, pollers, &blockSize);
-    if (err != kIOReturnSuccess)
-    {
-        HIBLOG("Failed to get block size\n");
-        goto exit;
-    }
-    if (blockSize < sizeof(IOHibernateImageHeader))
-    {
-        HIBLOG("block size %llu is less than the size of the header\n", blockSize);
-        err = kIOReturnError;
-        goto exit;
-    }
-
-    WriteExtentsToFile(gDebugImageFileRef, kIOHibernateHeaderOpenSignature,
-                       blockSize, fileExtents, size);
-
-    char str2[24 + sizeof(uuid_string_t) + 2];
-
-    if (!gIOCreateEFIDevicePathSymbol)
-        gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath");
-
-    snprintf(str2, sizeof(str2), "%qx", fileExtents[0].start);
-
-    err = IOService::getPlatform()->callPlatformFunction(
-                                                         gIOCreateEFIDevicePathSymbol, false,
-                                                         (void *) part, (void *) str2,
-                                                         (void *) (uintptr_t) true, (void *) &data);
-
-    if (!gIOOptionsEntry)
-    {
-        regEntry = IORegistryEntry::fromPath("/options", gIODTPlane);
-        gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry);
-        if (regEntry && !gIOOptionsEntry)
-            regEntry->release();
-    }
-    if (gIOOptionsEntry)
-    {
-        const OSSymbol *  sym;
-
-        sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
-        if (sym)
-        {
-            gIOOptionsEntry->setProperty(sym, data);
-            sym->release();
-        }
-    }
-
-
-exit:
+static vm_offset_t
+hibernate_page_list_iterate(hibernate_page_list_t * list, vm_offset_t * pPage)
+{
+    uint32_t            page = *pPage;
+    uint32_t            count;
+    hibernate_bitmap_t * bitmap;
 
-    if ( (err != kIOReturnSuccess) && gDebugImageFileRef) {
-        kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0);
-        gDebugImageFileRef = NULL;
+    while ((bitmap = hibernate_page_bitmap_pin(list, &page)))
+    {
+       count = hibernate_page_bitmap_count(bitmap, TRUE, page);
+       if (!count)
+           break;
+       page += count;
+       if (page <= bitmap->last_page)
+           break;
     }
-    if (extentsData) extentsData->release();
-    if (part) part->release();
-    if (pollers) pollers->release();
-    if (data) data->release();
 
-    return err;
+    *pPage = page;
+    if (bitmap)
+       count = hibernate_page_bitmap_count(bitmap, FALSE, page);
+    else
+       count = 0;
+
+    return (count);
 }
-#endif
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -1379,18 +386,16 @@ IOReturn
 IOHibernateSystemSleep(void)
 {
     IOReturn   err;
-    OSData *   data;
+    OSData *   nvramData;
     OSObject * obj;
     OSString * str;
     OSNumber * num;
     bool       dsSSD, vmflush;
     IOHibernateVars * vars;
+    uint64_t   setFileSize = 0;
 
     gIOHibernateState = kIOHibernateStateInactive;
 
-    if (!gIOChosenEntry)
-       gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
-
     gIOHibernateDebugFlags = 0;
     if (kIOLogHibernate & gIOKitDebug)
        gIOHibernateDebugFlags |= kIOHibernateDebugRestoreLogs;
@@ -1436,13 +441,11 @@ IOHibernateSystemSleep(void)
     {
         vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn,
                                    2 * page_size + WKdm_SCRATCH_BUF_SIZE, page_size);
-        vars->ioBuffer  = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, 
-                                   2 * kDefaultIOSize, page_size);
 
        vars->handoffBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, 
                                    ptoa_64(gIOHibernateHandoffPageCount), page_size);
 
-        if (!vars->srcBuffer || !vars->ioBuffer || !vars->handoffBuffer)
+        if (!vars->srcBuffer || !vars->handoffBuffer)
         {
             err = kIOReturnNoMemory;
             break;
@@ -1468,8 +471,7 @@ IOHibernateSystemSleep(void)
        gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags;
        gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
 
-       vmflush = (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey));
-       uint64_t setFileSize = 0;
+       vmflush = ((kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)) && root_is_CF_drive == FALSE);
         err = hibernate_alloc_page_lists(&vars->page_list, 
                                         &vars->page_list_wired,
                                         &vars->page_list_pal);
@@ -1507,16 +509,28 @@ IOHibernateSystemSleep(void)
            }
        }
     
-       // open & invalidate the image file
+       // Invalidate the image file
+    if (gDebugImageLock) {
+        IOLockLock(gDebugImageLock);
+        if (gDebugImageFileVars != 0) {
+            kprintf("IOHIBSystemSleep: Closing debugdata file\n");
+            IOSetBootImageNVRAM(0);
+            IOPolledFileClose(&gDebugImageFileVars, 0, 0, 0, 0, 0);
+        }
+        IOLockUnlock(gDebugImageLock);
+    }
 
-       if (gDebugImageFileRef) {
-           kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0);
-           gDebugImageFileRef = NULL;
-       }
+        err = IOPolledFileOpen(gIOHibernateFilename, setFileSize, 0,
+                               gIOHibernateCurrentHeader, sizeof(gIOHibernateCurrentHeader),
+                                &vars->fileVars, &nvramData, 
+                                &vars->volumeCryptKey[0], sizeof(vars->volumeCryptKey));
 
-        err = IOPolledFileOpen(gIOHibernateFilename, setFileSize, vars->ioBuffer,
-                                &vars->fileVars, &vars->fileExtents, &data, 
-                                &vars->volumeCryptKey[0]);
+        if (KERN_SUCCESS != err)
+        {
+           IOLockLock(gFSLock);
+           if (kFSOpening != gFSState) err = kIOReturnTimeout;
+           IOLockUnlock(gFSLock);
+       }
 
         if (KERN_SUCCESS != err)
         {
@@ -1524,53 +538,50 @@ IOHibernateSystemSleep(void)
             break;
         }
 
+       // write extents for debug data usage in EFI
+        IOWriteExtentsToFile(vars->fileVars, kIOHibernateHeaderOpenSignature);
+
+        err = IOPolledFilePollersSetup(vars->fileVars, kIOPolledPreflightState);
+        if (KERN_SUCCESS != err) break;
+
         clock_get_uptime(&startTime);
         err = hibernate_setup(gIOHibernateCurrentHeader, 
-                                gIOHibernateFreeRatio, gIOHibernateFreeTime,
                                 vmflush,
                                 vars->page_list, vars->page_list_wired, vars->page_list_pal);
         clock_get_uptime(&endTime);
         SUB_ABSOLUTETIME(&endTime, &startTime);
         absolutetime_to_nanoseconds(endTime, &nsec);
         HIBLOG("hibernate_setup(%d) took %qd ms\n", err, nsec / 1000000ULL);
+        if (KERN_SUCCESS != err) break;
 
-        dsSSD = ((0 != (kIOHibernateOptionSSD & vars->fileVars->flags))
+        dsSSD = ((0 != (kIOPolledFileSSD & vars->fileVars->flags))
                 && (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)));
-        if (dsSSD)
-        {
-            gIOHibernateCurrentHeader->options |= 
-                                                kIOHibernateOptionSSD
-                                              | kIOHibernateOptionColor;
+
+        if (dsSSD) gIOHibernateCurrentHeader->options |= kIOHibernateOptionSSD | kIOHibernateOptionColor;
+        else       gIOHibernateCurrentHeader->options |= kIOHibernateOptionProgress;
+
 
 #if defined(__i386__) || defined(__x86_64__)
-            if (!uuid_is_null(vars->volumeCryptKey) &&
-                  (kOSBooleanTrue != IOService::getPMRootDomain()->getProperty(kIOPMDestroyFVKeyOnStandbyKey)))
-            {
-                uintptr_t smcVars[2];
-                smcVars[0] = sizeof(vars->volumeCryptKey);
-                smcVars[1] = (uintptr_t)(void *) &gIOHibernateVars.volumeCryptKey[0];
+       if (!uuid_is_null(vars->volumeCryptKey) &&
+             (kOSBooleanTrue != IOService::getPMRootDomain()->getProperty(kIOPMDestroyFVKeyOnStandbyKey)))
+       {
+           uintptr_t smcVars[2];
+           smcVars[0] = sizeof(vars->volumeCryptKey);
+           smcVars[1] = (uintptr_t)(void *) &gIOHibernateVars.volumeCryptKey[0];
 
-                IOService::getPMRootDomain()->setProperty(kIOHibernateSMCVariablesKey, smcVars, sizeof(smcVars));
-                bzero(smcVars, sizeof(smcVars));
-            }
+           IOService::getPMRootDomain()->setProperty(kIOHibernateSMCVariablesKey, smcVars, sizeof(smcVars));
+           bzero(smcVars, sizeof(smcVars));
+       }
 #endif
-        }
-        else
-        {
-            gIOHibernateCurrentHeader->options |= kIOHibernateOptionProgress;
-        }
 
 
-        if (KERN_SUCCESS != err)
-            break;
-
         if (encryptedswap || !uuid_is_null(vars->volumeCryptKey))
             gIOHibernateMode ^= kIOHibernateModeEncrypt; 
 
         if (kIOHibernateOptionProgress & gIOHibernateCurrentHeader->options)
         {
             vars->videoAllocSize = kVideoMapSize;
-            if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize))
+            if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize, VM_KERN_MEMORY_IOKIT))
                 vars->videoMapping = 0;
         }
 
@@ -1582,28 +593,11 @@ IOHibernateSystemSleep(void)
 
        // set nvram
 
-        IORegistryEntry * regEntry;
-        if (!gIOOptionsEntry)
-        {
-            regEntry = IORegistryEntry::fromPath("/options", gIODTPlane);
-            gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry);
-            if (regEntry && !gIOOptionsEntry)
-                regEntry->release();
-        }
-
-       if (gIOOptionsEntry)
-       {
-            const OSSymbol *  sym;
-
-            sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
-            if (sym)
-            {
-                gIOOptionsEntry->setProperty(sym, data);
-                sym->release();
-            }
-            data->release();
+       IOSetBootImageNVRAM(nvramData);
+        nvramData->release();
 
 #if defined(__i386__) || defined(__x86_64__)
+       {
            struct AppleRTCHibernateVars
            {
                uint8_t     signature[4];
@@ -1612,6 +606,7 @@ IOHibernateSystemSleep(void)
                uint8_t     wiredCryptKey[16];
            };
            AppleRTCHibernateVars rtcVars;
+           OSData * data;
 
            rtcVars.signature[0] = 'A';
            rtcVars.signature[1] = 'A';
@@ -1627,167 +622,92 @@ IOHibernateSystemSleep(void)
                    (c = gIOHibernateBootSignature[i]) && (i < (sizeof(rtcVars.booterSignature) << 1));
                    i++)
                {
-                   if (c >= 'a')
-                       c -= 'a' - 10;
-                   else if (c >= 'A')
-                       c -= 'A' - 10;
-                   else if (c >= '0')
-                       c -= '0';
-                   else
-                       continue;
+                   if (c >= 'a')      c -= 'a' - 10;
+                   else if (c >= 'A') c -= 'A' - 10;
+                   else if (c >= '0') c -= '0';
+                   else               continue;
                    value = (value << 4) | c;
-                   if (i & 1)
-                       rtcVars.booterSignature[i >> 1] = value;
+                   if (i & 1) rtcVars.booterSignature[i >> 1] = value;
                }
            }
            data = OSData::withBytes(&rtcVars, sizeof(rtcVars));
            if (data)
            { 
-               if (!gIOHibernateRTCVariablesKey)
-                   gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
                if (gIOHibernateRTCVariablesKey)
                    IOService::getPMRootDomain()->setProperty(gIOHibernateRTCVariablesKey, data);
-       
-               if( gIOOptionsEntry )
-               {
-                   if( gIOHibernateMode & kIOHibernateModeSwitch )
-                   {
-                       const OSSymbol *sym;
-                       sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSwitchVarsKey);
-                       if( sym )
-                       {
-                           gIOOptionsEntry->setProperty(sym, data); /* intentional insecure backup of rtc boot vars */
-                           sym->release();
-                       }
-                   }   
-               }
-
                data->release();
            }
             if (gIOChosenEntry)
             {
                 data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey));
-                if (data)
-                    gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy());
+                if (data) gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy());
+               // set BootNext
+               if (!gIOHibernateBoot0082Data)
                {
-                   // set BootNext
-
-                   if (!gIOHibernateBoot0082Data)
+                   data = OSDynamicCast(OSData, gIOChosenEntry->getProperty("boot-device-path"));
+                   if (data)
                    {
-                       data = OSDynamicCast(OSData, gIOChosenEntry->getProperty("boot-device-path"));
-                       if (data)
+                       // AppleNVRAM_EFI_LOAD_OPTION
+                       struct {
+                           uint32_t Attributes;
+                           uint16_t FilePathLength;
+                           uint16_t Desc;
+                       } loadOptionHeader;
+                       loadOptionHeader.Attributes     = 1;
+                       loadOptionHeader.FilePathLength = data->getLength();
+                       loadOptionHeader.Desc           = 0;
+                       gIOHibernateBoot0082Data = OSData::withCapacity(sizeof(loadOptionHeader) + loadOptionHeader.FilePathLength);
+                       if (gIOHibernateBoot0082Data)
                        {
-                           // AppleNVRAM_EFI_LOAD_OPTION
-                           struct {
-                               uint32_t Attributes;
-                               uint16_t FilePathLength;
-                               uint16_t Desc;
-                           } loadOptionHeader;
-                           loadOptionHeader.Attributes     = 1;
-                           loadOptionHeader.FilePathLength = data->getLength();
-                           loadOptionHeader.Desc           = 0;
-                           gIOHibernateBoot0082Data = OSData::withCapacity(sizeof(loadOptionHeader) + loadOptionHeader.FilePathLength);
-                           if (gIOHibernateBoot0082Data)
-                           {
-                               gIOHibernateBoot0082Data->appendBytes(&loadOptionHeader, sizeof(loadOptionHeader));
-                               gIOHibernateBoot0082Data->appendBytes(data);
-                           }
+                           gIOHibernateBoot0082Data->appendBytes(&loadOptionHeader, sizeof(loadOptionHeader));
+                           gIOHibernateBoot0082Data->appendBytes(data);
                        }
                    }
-                   if (!gIOHibernateBoot0082Key)
-                       gIOHibernateBoot0082Key = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:Boot0082");
-                   if (!gIOHibernateBootNextKey)
-                       gIOHibernateBootNextKey = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:BootNext");
-                   if (!gIOHibernateBootNextData)
-                   {
-                       uint16_t bits = 0x0082;
-                       gIOHibernateBootNextData = OSData::withBytes(&bits, sizeof(bits));
-                   }
-                   if (gIOHibernateBoot0082Key && gIOHibernateBoot0082Data && gIOHibernateBootNextKey && gIOHibernateBootNextData)
-                   {
-                       gIOHibernateBootNextSave = gIOOptionsEntry->copyProperty(gIOHibernateBootNextKey);
-                       gIOOptionsEntry->setProperty(gIOHibernateBoot0082Key, gIOHibernateBoot0082Data);
-                       gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextData);
-                   }
-               }
-            }
-#else /* !i386 && !x86_64 */
-            if (kIOHibernateModeEncrypt & gIOHibernateMode)
-            {
-                data = OSData::withBytes(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey));
-                sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey);
-                if (sym && data)
-                    gIOOptionsEntry->setProperty(sym, data);
-                if (sym)
-                    sym->release();
-                if (data)
-                    data->release();
-                if (false && gIOHibernateBootSignature[0])
-                {
-                    data = OSData::withCapacity(16);
-                    sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSignatureKey);
-                    if (sym && data)
-                    {
-                        char c;
-                        uint8_t value = 0;
-                        for (uint32_t i = 0; (c = gIOHibernateBootSignature[i]); i++)
-                        {
-                            if (c >= 'a')
-                                c -= 'a' - 10;
-                            else if (c >= 'A')
-                                c -= 'A' - 10;
-                            else if (c >= '0')
-                                c -= '0';
-                            else
-                                continue;
-                            value = (value << 4) | c;
-                            if (i & 1)
-                                data->appendBytes(&value, sizeof(value));
-                        }
-                        gIOOptionsEntry->setProperty(sym, data);
-                    }
-                    if (sym)
-                        sym->release();
-                    if (data)
-                        data->release();
-                }
-            }
-            if (!vars->haveFastBoot)
-            {
-                // set boot volume to zero
-                IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform());
-                if (platform && (kIOReturnSuccess == platform->readXPRAM(kXPRamAudioVolume, 
-                                            &vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume))))
-                {
-                    uint8_t newVolume;
-                    newVolume = vars->saveBootAudioVolume & 0xf8;
-                    platform->writeXPRAM(kXPRamAudioVolume, 
-                                            &newVolume, sizeof(newVolume));
-                }
+               }
+               if (!gIOHibernateBootNextData)
+               {
+                   uint16_t bits = 0x0082;
+                   gIOHibernateBootNextData = OSData::withBytes(&bits, sizeof(bits));
+               }
+               if (gIOHibernateBoot0082Key && gIOHibernateBoot0082Data && gIOHibernateBootNextKey && gIOHibernateBootNextData)
+               {
+                   gIOHibernateBootNextSave = gIOOptionsEntry->copyProperty(gIOHibernateBootNextKey);
+                   gIOOptionsEntry->setProperty(gIOHibernateBoot0082Key, gIOHibernateBoot0082Data);
+                   gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextData);
+               }
+               // BootNext
             }
-#endif /* !i386 && !x86_64 */
        }
-       // --
-
+#endif /* !i386 && !x86_64 */
     }
     while (false);
 
     IOLockLock(gFSLock);
-    if ((kIOReturnSuccess == err) && (kFSOpening == gFSState))
+    if ((kIOReturnSuccess == err) && (kFSOpening != gFSState))
+    {
+       HIBLOG("hibernate file close due timeout\n");
+       err = kIOReturnTimeout;
+    }
+    if (kIOReturnSuccess == err)
     {
        gFSState = kFSOpened;
        gIOHibernateVars = *vars;
        gFileVars = *vars->fileVars;
+       gFileVars.allocated = false;
        gIOHibernateVars.fileVars = &gFileVars;
-       gIOHibernateFileRef = gFileVars.fileRef;
        gIOHibernateCurrentHeader->signature = kIOHibernateHeaderSignature;
        gIOHibernateState = kIOHibernateStateHibernating;
     }
     else
     {
-       HIBLOG("hibernate file close due timeout\n");
-       if (vars->fileVars && vars->fileVars->fileRef) kern_close_file_for_direct_io(vars->fileVars->fileRef, 0, 0, 0, 0, 0);
+       IOPolledFileIOVars * fileVars = vars->fileVars;
        IOHibernateDone(vars);
+    IOPolledFileClose(&fileVars,
+#if DISABLE_TRIM
+                      0, NULL, 0, 0, 0);
+#else
+                      0, NULL, 0, sizeof(IOHibernateImageHeader), setFileSize);
+#endif
        gFSState = kFSIdle;
     }
     IOLockUnlock(gFSLock);
@@ -1800,6 +720,136 @@ IOHibernateSystemSleep(void)
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+static void
+IOSetBootImageNVRAM(OSData * data)
+{
+    IORegistryEntry * regEntry;
+
+    if (!gIOOptionsEntry)
+    {
+        regEntry = IORegistryEntry::fromPath("/options", gIODTPlane);
+        gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry);
+        if (regEntry && !gIOOptionsEntry)
+            regEntry->release();
+    }
+    if (gIOOptionsEntry && gIOHibernateBootImageKey)
+    {
+       if (data) gIOOptionsEntry->setProperty(gIOHibernateBootImageKey, data);
+       else
+       {
+           gIOOptionsEntry->removeProperty(gIOHibernateBootImageKey);
+           gIOOptionsEntry->sync();
+       }
+    }
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* 
+ * Writes header to disk with signature, block size and file extents data.
+ * If there are more than 2 extents, then they are written on second block.
+ */
+static IOReturn
+IOWriteExtentsToFile(IOPolledFileIOVars * vars, uint32_t signature)
+{
+    IOHibernateImageHeader hdr;
+    IOItemCount            count;
+    IOReturn               err = kIOReturnSuccess;
+    int                    rc;
+    IOPolledFileExtent *   fileExtents;
+
+    fileExtents = (typeof(fileExtents)) vars->fileExtents->getBytesNoCopy(),
+
+    memset(&hdr, 0, sizeof(IOHibernateImageHeader));
+    count = vars->fileExtents->getLength();
+    if (count > sizeof(hdr.fileExtentMap))
+    {
+        hdr.fileExtentMapSize = count;
+        count = sizeof(hdr.fileExtentMap);
+    }
+    else
+        hdr.fileExtentMapSize = sizeof(hdr.fileExtentMap);
+
+    bcopy(fileExtents, &hdr.fileExtentMap[0], count);
+
+    // copy file block extent list if larger than header
+    if (hdr.fileExtentMapSize > sizeof(hdr.fileExtentMap))
+    {
+            count = hdr.fileExtentMapSize - sizeof(hdr.fileExtentMap);
+            rc = kern_write_file(vars->fileRef, vars->blockSize, 
+                                 (caddr_t)(((uint8_t *)fileExtents) + sizeof(hdr.fileExtentMap)), 
+                                 count, IO_SKIP_ENCRYPTION);
+            if (rc != 0) {
+                HIBLOG("kern_write_file returned %d\n", rc);
+                err = kIOReturnIOError;
+                goto exit;
+            }    
+    }
+    hdr.signature = signature;
+    hdr.deviceBlockSize = vars->blockSize;
+
+    rc = kern_write_file(vars->fileRef, 0, (char *)&hdr, sizeof(hdr), IO_SKIP_ENCRYPTION);
+    if (rc != 0) {
+        HIBLOG("kern_write_file returned %d\n", rc);
+        err = kIOReturnIOError;
+        goto exit;
+    }
+
+exit:
+    return err;
+}
+
+void
+IOOpenDebugDataFile(const char *fname, uint64_t size)
+{
+    IOReturn   err;
+    OSData *   imagePath = NULL;
+    uint64_t   padding;
+
+    if (!gDebugImageLock) {
+        gDebugImageLock = IOLockAlloc();
+    }
+
+    // Try to get a lock, but don't block for getting lock
+    if (!IOLockTryLock(gDebugImageLock)) {
+        HIBLOG("IOOpenDebugDataFile: Failed to get lock\n");
+        return;
+    }
+
+    if (gDebugImageFileVars ||  !fname || !size) {
+        HIBLOG("IOOpenDebugDataFile: conditions failed\n");
+        goto exit;
+    }
+
+    padding = (PAGE_SIZE*2);  // allocate couple more pages for header and fileextents
+    err = IOPolledFileOpen(fname, size+padding, 32ULL*1024*1024*1024,
+                           NULL, 0,
+                           &gDebugImageFileVars, &imagePath, NULL, 0);
+
+    if ((kIOReturnSuccess == err) && imagePath)
+    {
+        if ((gDebugImageFileVars->fileSize < (size+padding)) ||
+            (gDebugImageFileVars->fileExtents->getLength() > PAGE_SIZE)) {
+            // Can't use the file
+            IOPolledFileClose(&gDebugImageFileVars, 0, 0, 0, 0, 0);
+            HIBLOG("IOOpenDebugDataFile: too many file extents\n");
+            goto exit;
+        }
+
+        // write extents for debug data usage in EFI
+        IOWriteExtentsToFile(gDebugImageFileVars, kIOHibernateHeaderOpenSignature);
+        IOSetBootImageNVRAM(imagePath);
+        kprintf("IOOpenDebugDataFile: opened debugdata file\n");
+    }
+
+exit:
+    IOLockUnlock(gDebugImageLock);
+
+    if (imagePath) imagePath->release();
+    return;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 DECLARE_IOHIBERNATEPROGRESSALPHA
 
 static void
@@ -2040,6 +1090,7 @@ IOHibernateSystemWake(void)
 {
     if (kFSOpened == gFSState)
     {
+       IOPolledFilePollersClose(gIOHibernateVars.fileVars, kIOPolledPostflightState);
        IOHibernateDone(&gIOHibernateVars);
     }
     else
@@ -2053,8 +1104,6 @@ IOHibernateSystemWake(void)
 static IOReturn
 IOHibernateDone(IOHibernateVars * vars)
 {
-    IORegistryEntry * next;
-
     hibernate_teardown(vars->page_list, vars->page_list_wired, vars->page_list_pal);
 
     if (vars->videoMapping)
@@ -2095,17 +1144,6 @@ IOHibernateDone(IOHibernateVars * vars)
         IOService::getPMRootDomain()->removeProperty(kIOHibernateGfxStatusKey);
     }
 
-    if (vars->fileVars)
-    {
-       if ((next = vars->fileVars->media)) do
-       {
-           next->removeProperty(kIOPolledInterfaceActiveKey);
-           next = next->getParentEntry(gIOServicePlane);
-       }
-       while (next);
-       IOPolledFileClose(vars->fileVars);
-    }
-
     // invalidate nvram properties - (gIOOptionsEntry != 0) => nvram was touched
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -2139,10 +1177,7 @@ IOHibernateDone(IOHibernateVars * vars)
        }
 #endif
 
-    if (vars->srcBuffer)
-       vars->srcBuffer->release();
-    if (vars->ioBuffer)
-       vars->ioBuffer->release();
+    if (vars->srcBuffer) vars->srcBuffer->release();
     bzero(&gIOHibernateHandoffPages[0], gIOHibernateHandoffPageCount * sizeof(gIOHibernateHandoffPages[0]));
     if (vars->handoffBuffer)
     {
@@ -2187,8 +1222,6 @@ IOHibernateDone(IOHibernateVars * vars)
        }
        vars->handoffBuffer->release();
     }
-    if (vars->fileExtents)
-       vars->fileExtents->release();
 
     bzero(vars, sizeof(*vars));
 
@@ -2200,53 +1233,31 @@ IOHibernateDone(IOHibernateVars * vars)
 IOReturn
 IOHibernateSystemPostWake(void)
 {
-    struct kern_direct_file_io_ref_t * fileRef;
-
+    gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
     if (kFSOpened == gFSState)
     {
        // invalidate & close the image file
-       gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
-       if ((fileRef = gIOHibernateFileRef))
-       {
-           gIOHibernateFileRef = 0;
-           IOSleep(TRIM_DELAY);
-           kern_close_file_for_direct_io(fileRef,
+       IOSleep(TRIM_DELAY);
+       IOPolledFileIOVars * vars = &gFileVars;
+       IOPolledFileClose(&vars,
 #if DISABLE_TRIM
-                                      0, 0, 0, 0, 0);
+                                      0, NULL, 0, 0, 0);
 #else
-                                      0, (caddr_t) gIOHibernateCurrentHeader, 
-                                      sizeof(IOHibernateImageHeader),
-                                      0,
-                                      gIOHibernateCurrentHeader->imageSize);
+                                      0, (caddr_t)gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader),
+                                      sizeof(IOHibernateImageHeader), gIOHibernateCurrentHeader->imageSize);
 #endif
-       }
-       gFSState = kFSIdle;
-    }
-
-    if (gDebugImageFileRef) {
-        kern_close_file_for_direct_io(gDebugImageFileRef, 0, 0, 0, 0, 0);
-        gDebugImageFileRef = NULL;
     }
+    gFSState = kFSIdle;
 
-    if (!gIOOptionsEntry)
-    {
-        IORegistryEntry * regEntry;
-        regEntry = IORegistryEntry::fromPath("/options", gIODTPlane);
-        gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry);
-        if (regEntry && !gIOOptionsEntry)
-            regEntry->release();
-    }
-    if (gIOOptionsEntry)
-    {
-        const OSSymbol *  sym;
+    IOSetBootImageNVRAM(0);
 
-        sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
-        if (sym)
-        {
-            gIOOptionsEntry->removeProperty(sym);
-            gIOOptionsEntry->sync();
-            sym->release();
+    if (gDebugImageLock) {
+        IOLockLock(gDebugImageLock);
+        if (gDebugImageFileVars != 0) {
+            kprintf("IOHibernateSystemPostWake: Closing debugdata file\n");
+            IOPolledFileClose(&gDebugImageFileVars, 0, 0, 0, 0, 0);
         }
+        IOLockUnlock(gDebugImageLock);
     }
 
     return (kIOReturnSuccess);
@@ -2307,6 +1318,15 @@ SYSCTL_UINT(_kern, OID_AUTO, hibernatehidready,
 void
 IOHibernateSystemInit(IOPMrootDomain * rootDomain)
 {
+    gIOHibernateBootImageKey = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
+
+#if defined(__i386__) || defined(__x86_64__)
+    gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
+    gIOHibernateBoot0082Key     = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:Boot0082");
+    gIOHibernateBootNextKey     = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:BootNext");
+    gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
     OSData * data = OSData::withBytesNoCopy(&gIOHibernateState, sizeof(gIOHibernateState));
     if (data)
     {
@@ -2328,6 +1348,8 @@ IOHibernateSystemInit(IOPMrootDomain * rootDomain)
     sysctl_register_oid(&sysctl__kern_hibernatelockscreenready);
     sysctl_register_oid(&sysctl__kern_hibernatehidready);
 
+    gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
+
     gFSLock = IOLockAlloc();
 }
 
@@ -2341,45 +1363,20 @@ hibernate_setup_for_wake(void)
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-#define C_ASSERT(e) typedef char    __C_ASSERT__[(e) ? 1 : -1]
-
-static bool
-no_encrypt_page(vm_offset_t ppnum)
-{
-    if (pmap_is_noencrypt((ppnum_t)ppnum) == TRUE)
-    {
-        return true;
-    }
-    return false;
-}
-
-static void
-hibernate_pal_callback(void *vars_arg, vm_offset_t addr)
-{
-       IOHibernateVars *vars = (IOHibernateVars *)vars_arg;
-       /* Make sure it's not in either of the save lists */
-       hibernate_set_page_state(vars->page_list, vars->page_list_wired, atop_64(addr), 1, kIOHibernatePageStateFree);
-
-       /* Set it in the bitmap of pages owned by the PAL */
-       hibernate_page_bitset(vars->page_list_pal, TRUE, atop_64(addr));
-}
-
-static struct hibernate_cryptvars_t *local_cryptvars;
-
-extern "C" int
-hibernate_pal_write(void *buffer, size_t size)
+static IOReturn 
+IOHibernatePolledFileWrite(IOPolledFileIOVars * vars,
+                          const uint8_t * bytes, IOByteCount size,
+                          IOPolledFileCryptVars * cryptvars)
 {
-    IOHibernateVars * vars  = &gIOHibernateVars;
+    IOReturn err;
 
-       IOReturn err = IOPolledFileWrite(vars->fileVars, (const uint8_t *)buffer, size, local_cryptvars);
-       if (kIOReturnSuccess != err) {
-               kprintf("epic hibernate fail! %d\n", err);
-               return err;
-       }
+    err = IOPolledFileWrite(vars, bytes, size, cryptvars);
+    if ((kIOReturnSuccess == err) && hibernate_should_abort()) err = kIOReturnAborted;
 
-       return 0;
+    return (err);
 }
 
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 extern "C" uint32_t
 hibernate_write_image(void)
@@ -2388,7 +1385,7 @@ hibernate_write_image(void)
     IOHibernateVars *        vars  = &gIOHibernateVars;
     IOPolledFileExtent *     fileExtents;
 
-    C_ASSERT(sizeof(IOHibernateImageHeader) == 512);
+    assert_static(sizeof(IOHibernateImageHeader) == 512);
 
     uint32_t    pageCount, pagesDone;
     IOReturn     err;
@@ -2398,8 +1395,7 @@ hibernate_write_image(void)
     uint8_t *   data;
     uint8_t *   compressed;
     uint8_t *   scratch;
-    void *       zerosCompressed;
-    IOByteCount  pageCompressedSize, zerosCompressedLen;
+    IOByteCount  pageCompressedSize;
     uint64_t    compressedSize, uncompressedSize;
     uint64_t    image1Size = 0;
     uint32_t    bitmap_size;
@@ -2411,6 +1407,8 @@ hibernate_write_image(void)
     uint32_t    pageAndCount[2];
     addr64_t     phys64;
     IOByteCount  segLen;
+    uintptr_t    hibernateBase;
+    uintptr_t    hibernateEnd;
 
     AbsoluteTime startTime, endTime;
     AbsoluteTime allTime, compTime;
@@ -2423,17 +1421,19 @@ hibernate_write_image(void)
     uint32_t    wiredPagesEncrypted;
     uint32_t    dirtyPagesEncrypted;
     uint32_t    wiredPagesClear;
-    uint32_t    zeroPageCount;
+    uint32_t    svPageCount;
+    uint32_t    zvPageCount;
 
-    hibernate_cryptvars_t _cryptvars;
-    hibernate_cryptvars_t * cryptvars = 0;
+    IOPolledFileCryptVars _cryptvars;
+    IOPolledFileCryptVars * cryptvars = 0;
 
     wiredPagesEncrypted = 0;
     dirtyPagesEncrypted = 0;
     wiredPagesClear     = 0;
-    zeroPageCount       = 0;
+    svPageCount         = 0;
+    zvPageCount         = 0;
 
-    if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents)
+    if (!vars->fileVars || !vars->fileVars->pollers)
         return (false /* sleep */ );
 
     if (kIOHibernateModeSleep & gIOHibernateMode)
@@ -2444,8 +1444,6 @@ hibernate_write_image(void)
 
     restore1Sum = sum1 = sum2 = 0;
 
-    hibernate_pal_prepare();
-
 #if CRYPTO
     // encryption data. "iv" is the "initial vector".
     if (kIOHibernateModeEncrypt & gIOHibernateMode)
@@ -2455,7 +1453,7 @@ hibernate_write_image(void)
              0xdf, 0x9e, 0x5f, 0x32, 0xd7, 0x61, 0x63, 0xda };
     
         cryptvars = &gIOHibernateCryptWakeContext;
-        bzero(cryptvars, sizeof(hibernate_cryptvars_t));
+        bzero(cryptvars, sizeof(IOPolledFileCryptVars));
         aes_encrypt_key(vars->cryptKey,
                         kIOHibernateAESKeySize,
                         &cryptvars->ctx.encrypt);
@@ -2464,7 +1462,7 @@ hibernate_write_image(void)
                         &cryptvars->ctx.decrypt);
 
         cryptvars = &_cryptvars;
-        bzero(cryptvars, sizeof(hibernate_cryptvars_t));
+        bzero(cryptvars, sizeof(IOPolledFileCryptVars));
         for (pageCount = 0; pageCount < sizeof(vars->wiredCryptKey); pageCount++)
             vars->wiredCryptKey[pageCount] ^= vars->volumeCryptKey[pageCount];
         bzero(&vars->volumeCryptKey[0], sizeof(vars->volumeCryptKey));
@@ -2475,8 +1473,6 @@ hibernate_write_image(void)
         bcopy(&first_iv[0], &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
         bzero(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey));
         bzero(&vars->cryptKey[0], sizeof(vars->cryptKey));
-
-        local_cryptvars = cryptvars;
     }
 #endif /* CRYPTO */
 
@@ -2493,7 +1489,7 @@ hibernate_write_image(void)
 
     HIBLOG("hibernate_page_list_setall found pageCount %d\n", pageCount);
 
-    fileExtents = (IOPolledFileExtent *) vars->fileExtents->getBytesNoCopy();
+    fileExtents = (IOPolledFileExtent *) vars->fileVars->fileExtents->getBytesNoCopy();
 
 #if 0
     count = vars->fileExtents->getLength() / sizeof(IOPolledFileExtent);
@@ -2516,13 +1512,14 @@ hibernate_write_image(void)
     {
         compressedSize   = 0;
         uncompressedSize = 0;
-        zeroPageCount    = 0;
+        svPageCount      = 0;
+        zvPageCount      = 0;
 
         IOPolledFileSeek(vars->fileVars, vars->fileVars->blockSize);
     
         HIBLOG("IOHibernatePollerOpen, ml_get_interrupts_enabled %d\n", 
                 ml_get_interrupts_enabled());
-        err = IOHibernatePollerOpen(vars->fileVars, kIOPolledBeforeSleepState, vars->ioBuffer);
+        err = IOPolledFilePollersOpen(vars->fileVars, kIOPolledBeforeSleepState, true);
         HIBLOG("IOHibernatePollerOpen(%x)\n", err);
         pollerOpen = (kIOReturnSuccess == err);
         if (!pollerOpen)
@@ -2530,21 +1527,17 @@ hibernate_write_image(void)
     
         // copy file block extent list if larger than header
     
-        count = vars->fileExtents->getLength();
+        count = vars->fileVars->fileExtents->getLength();
         if (count > sizeof(header->fileExtentMap))
         {
             count -= sizeof(header->fileExtentMap);
-            err = IOPolledFileWrite(vars->fileVars,
+            err = IOHibernatePolledFileWrite(vars->fileVars,
                                     ((uint8_t *) &fileExtents[0]) + sizeof(header->fileExtentMap), count, cryptvars);
             if (kIOReturnSuccess != err)
                 break;
         }
 
-        uintptr_t hibernateBase;
-        uintptr_t hibernateEnd;
-
         hibernateBase = HIB_BASE; /* Defined in PAL headers */
-
         hibernateEnd = (segHIBB + segSizeHIB);
 
         // copy out restore1 code
@@ -2585,11 +1578,11 @@ hibernate_write_image(void)
         count = ((uintptr_t) &gIOHibernateRestoreStack[0]) - trunc_page(hibernateBase);
         if (count)
         {
-            err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars);
+            err = IOHibernatePolledFileWrite(vars->fileVars, src, count, cryptvars);
             if (kIOReturnSuccess != err)
                 break;
         }
-        err = IOPolledFileWrite(vars->fileVars, 
+        err = IOHibernatePolledFileWrite(vars->fileVars, 
                                         (uint8_t *) 0,
                                         &gIOHibernateRestoreStackEnd[0] - &gIOHibernateRestoreStack[0],
                                         cryptvars);
@@ -2599,7 +1592,7 @@ hibernate_write_image(void)
         count = round_page(hibernateEnd) - ((uintptr_t) src);
         if (count)
         {
-            err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars);
+            err = IOHibernatePolledFileWrite(vars->fileVars, src, count, cryptvars);
             if (kIOReturnSuccess != err)
                 break;
         }
@@ -2622,7 +1615,7 @@ hibernate_write_image(void)
                 phys64 = vars->previewBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone);
                 pageAndCount[0] = atop_64(phys64);
                 pageAndCount[1] = atop_32(segLen);
-                err = IOPolledFileWrite(vars->fileVars, 
+                err = IOHibernatePolledFileWrite(vars->fileVars, 
                                         (const uint8_t *) &pageAndCount, sizeof(pageAndCount), 
                                         cryptvars);
                 if (kIOReturnSuccess != err)
@@ -2648,15 +1641,16 @@ hibernate_write_image(void)
                 phys64 = vars->previewBuffer->getPhysicalSegment(page, NULL, kIOMemoryMapperNone);
                 sum1 += hibernate_sum_page(src + page, atop_64(phys64));
             }
-            err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars);
+            err = IOHibernatePolledFileWrite(vars->fileVars, src, count, cryptvars);
             if (kIOReturnSuccess != err)
                 break;
         }
 
         // mark areas for no save
-    
+        IOMemoryDescriptor * ioBuffer;
+        ioBuffer = IOPolledFileGetIOBuffer(vars->fileVars);
         for (count = 0;
-            (phys64 = vars->ioBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone));
+            (phys64 = ioBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone));
             count += segLen)
         {
             hibernate_set_page_state(vars->page_list, vars->page_list_wired, 
@@ -2679,7 +1673,7 @@ hibernate_write_image(void)
     
         bitmap_size = vars->page_list_wired->list_size;
         src = (uint8_t *) vars->page_list_wired;
-        err = IOPolledFileWrite(vars->fileVars, src, bitmap_size, cryptvars);
+        err = IOHibernatePolledFileWrite(vars->fileVars, src, bitmap_size, cryptvars);
         if (kIOReturnSuccess != err)
             break;
 
@@ -2716,20 +1710,10 @@ hibernate_write_image(void)
             pageCount -= atop_32(segLen);
         }
 
-               (void)hibernate_pal_callback;
-
         src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();
        compressed = src + page_size;
         scratch    = compressed + page_size;
 
-       // compress a zero page
-       bzero(src, page_size);
-       zerosCompressed    = vars->handoffBuffer->getBytesNoCopy();
-       zerosCompressedLen = WKdm_compress_new((WK_word*) src,
-                                              (WK_word*) zerosCompressed, 
-                                              (WK_word*) scratch,
-                                              page_size - 4);
-
         pagesDone  = 0;
         lastBlob   = 0;
     
@@ -2776,7 +1760,7 @@ hibernate_write_image(void)
                     uint32_t checkIndex;
                     for (checkIndex = 0;
                             (checkIndex < count) 
-                                && (((kEncrypt & pageType) == 0) == no_encrypt_page(ppnum + checkIndex)); 
+                                && (((kEncrypt & pageType) == 0) == pmap_is_noencrypt(ppnum + checkIndex)); 
                             checkIndex++)
                     {}
                     if (!checkIndex)
@@ -2799,7 +1783,7 @@ hibernate_write_image(void)
                 {
                     pageAndCount[0] = ppnum;
                     pageAndCount[1] = count;
-                    err = IOPolledFileWrite(vars->fileVars, 
+                    err = IOHibernatePolledFileWrite(vars->fileVars, 
                                             (const uint8_t *) &pageAndCount, sizeof(pageAndCount), 
                                             cryptvars);
                     if (kIOReturnSuccess != err)
@@ -2822,7 +1806,7 @@ hibernate_write_image(void)
                         sum2 += sum;
        
                     clock_get_uptime(&startTime);
-                    wkresult = WKdm_compress_new((WK_word*) src,
+                    wkresult = WKdm_compress_new((const WK_word*) src,
                                                 (WK_word*) compressed, 
                                                 (WK_word*) scratch,
                                                 page_size - 4);
@@ -2834,27 +1818,30 @@ hibernate_write_image(void)
                     compBytes += page_size;
                     pageCompressedSize = (-1 == wkresult) ? page_size : wkresult;
 
-                   if ((pageCompressedSize == zerosCompressedLen) 
-                    && !bcmp(compressed, zerosCompressed, zerosCompressedLen))
+                   if (pageCompressedSize == 0) 
                    {
-                       pageCompressedSize = 0;
-                       zeroPageCount++;
-                   }
-
-                    if (kIOHibernateModeEncrypt & gIOHibernateMode)
-                        pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1);
-
-                    if (pageCompressedSize != page_size)
-                        data = compressed;
-                    else
+                       pageCompressedSize = 4;
                         data = src;
+
+                       if (*(uint32_t *)src)
+                               svPageCount++;
+                       else
+                               zvPageCount++;
+                   }
+                   else 
+                   {
+                       if (pageCompressedSize != page_size)
+                           data = compressed;
+                       else
+                           data = src;
+                   }
     
                     tag = pageCompressedSize | kIOHibernateTagSignature;
-                    err = IOPolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars);
+                    err = IOHibernatePolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars);
                     if (kIOReturnSuccess != err)
                         break;
     
-                    err = IOPolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars);
+                    err = IOHibernatePolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars);
                     if (kIOReturnSuccess != err)
                         break;
     
@@ -2901,14 +1888,14 @@ hibernate_write_image(void)
             if (kWiredEncrypt != pageType)
             {
                 // end of image1/2 - fill to next block
-                err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars);
+                err = IOHibernatePolledFileWrite(vars->fileVars, 0, 0, cryptvars);
                 if (kIOReturnSuccess != err)
                     break;
             }
             if (kWiredClear == pageType)
             {
                // enlarge wired image for test
-//              err = IOPolledFileWrite(vars->fileVars, 0, 0x60000000, cryptvars);
+//              err = IOHibernatePolledFileWrite(vars->fileVars, 0, 0x60000000, cryptvars);
 
                 // end wired image
                 header->encryptStart = vars->fileVars->encryptStart;
@@ -2922,9 +1909,12 @@ hibernate_write_image(void)
         {
             if (kIOReturnOverrun == err)
             {
-               // update actual compression ratio on not enough space
+                // update actual compression ratio on not enough space (for retry)
                 gIOHibernateCompression = (compressedSize << 8) / uncompressedSize;
             }
+
+            // update partial amount written (for IOPolledFileClose cleanup/unmap)
+            header->imageSize = vars->fileVars->position;
             break;
         }
 
@@ -2943,7 +1933,7 @@ hibernate_write_image(void)
        header->compression     = (compressedSize << 8) / uncompressedSize;
        gIOHibernateCompression = header->compression;
     
-        count = vars->fileExtents->getLength();
+        count = vars->fileVars->fileExtents->getLength();
         if (count > sizeof(header->fileExtentMap))
         {
             header->fileExtentMapSize = count;
@@ -2957,17 +1947,12 @@ hibernate_write_image(void)
         header->deviceBlockSize = vars->fileVars->blockSize;
     
         IOPolledFileSeek(vars->fileVars, 0);
-        err = IOPolledFileWrite(vars->fileVars,
+        err = IOHibernatePolledFileWrite(vars->fileVars,
                                     (uint8_t *) header, sizeof(IOHibernateImageHeader), 
                                     cryptvars);
         if (kIOReturnSuccess != err)
             break;
-        err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars);
-        if (kIOReturnSuccess != err)
-            break;
-        err = IOHibernatePollerIODone(vars->fileVars, true);
-        if (kIOReturnSuccess != err)
-            break;
+        err = IOHibernatePolledFileWrite(vars->fileVars, 0, 0, cryptvars);
     }
     while (false);
     
@@ -2998,14 +1983,11 @@ hibernate_write_image(void)
                uncompressedSize ? ((int) ((compressedSize * 100ULL) / uncompressedSize)) : 0,
                sum1, sum2);
 
-    HIBLOG("zeroPageCount %d, wiredPagesEncrypted %d, wiredPagesClear %d, dirtyPagesEncrypted %d\n", 
-             zeroPageCount, wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted);
-
-    if (vars->fileVars->io)
-        (void) IOHibernatePollerIODone(vars->fileVars, false);
+    HIBLOG("svPageCount %d, zvPageCount %d, wiredPagesEncrypted %d, wiredPagesClear %d, dirtyPagesEncrypted %d\n", 
+          svPageCount, zvPageCount, wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted);
 
     if (pollerOpen)
-        IOHibernatePollerClose(vars->fileVars, kIOPolledBeforeSleepState);
+        IOPolledFilePollersClose(vars->fileVars, kIOPolledBeforeSleepState);
 
     if (vars->consoleMapping)
         ProgressUpdate(gIOHibernateGraphicsInfo, 
@@ -3062,12 +2044,12 @@ hibernate_machine_init(void)
     uint64_t     compBytes;
     uint32_t     lastProgressStamp = 0;
     uint32_t     progressStamp;
-    hibernate_cryptvars_t * cryptvars = 0;
+    IOPolledFileCryptVars * cryptvars = 0;
 
     IOHibernateVars * vars  = &gIOHibernateVars;
     bzero(gIOHibernateStats, sizeof(hibernate_statistics_t));
 
-    if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents)
+    if (!vars->fileVars || !vars->fileVars->pollers)
        return;
 
     sum = gIOHibernateCurrentHeader->actualImage1Sum;
@@ -3217,18 +2199,17 @@ hibernate_machine_init(void)
     AbsoluteTime_to_scalar(&compTime) = 0;
     compBytes = 0;
 
-    HIBLOG("IOHibernatePollerOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled());
-    err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0);
+    HIBLOG("IOPolledFilePollersOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled());
+    err = IOPolledFilePollersOpen(vars->fileVars, kIOPolledAfterSleepState, false);
     clock_get_uptime(&startIOTime);
     endTime = startIOTime;
     SUB_ABSOLUTETIME(&endTime, &allTime);
     absolutetime_to_nanoseconds(endTime, &nsec);
-    HIBLOG("IOHibernatePollerOpen(%x) %qd ms\n", err, nsec / 1000000ULL);
+    HIBLOG("IOPolledFilePollersOpen(%x) %qd ms\n", err, nsec / 1000000ULL);
 
     IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size);
 
     // kick off the read ahead
-    vars->fileVars->io          = false;
     vars->fileVars->bufferHalf   = 0;
     vars->fileVars->bufferLimit  = 0;
     vars->fileVars->lastRead     = 0;
@@ -3278,30 +2259,39 @@ hibernate_machine_init(void)
                break;
            }
 
-           if (!compressedSize) bzero_phys(ptoa_64(ppnum), page_size);
-           else
+           err = IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars);
+           if (kIOReturnSuccess != err) break;
+
+           if (compressedSize < page_size)
            {
-               err = IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars);
-               if (kIOReturnSuccess != err) break;
-               if (compressedSize < page_size)
-               {
-                   decoOffset = page_size;
-                   clock_get_uptime(&startTime);
-                   WKdm_decompress_new((WK_word*) src, (WK_word*) compressed, (WK_word*) scratch, page_size);
-                   clock_get_uptime(&endTime);
-                   ADD_ABSOLUTETIME(&compTime, &endTime);
-                   SUB_ABSOLUTETIME(&compTime, &startTime);
-                   compBytes += page_size;
+               decoOffset = page_size;
+               clock_get_uptime(&startTime);
+
+               if (compressedSize == 4) {
+                   int i;
+                   uint32_t *s, *d;
+                       
+                   s = (uint32_t *)src;
+                   d = (uint32_t *)(uintptr_t)compressed;
+
+                   for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
+                       *d++ = *s;
                }
-               else decoOffset = 0;
+               else 
+                   WKdm_decompress_new((WK_word*) src, (WK_word*) compressed, (WK_word*) scratch, compressedSize);
+               clock_get_uptime(&endTime);
+               ADD_ABSOLUTETIME(&compTime, &endTime);
+               SUB_ABSOLUTETIME(&compTime, &startTime);
+               compBytes += page_size;
+           }
+           else decoOffset = 0;
 
-               sum += hibernate_sum_page((src + decoOffset), ppnum);
-               err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size);
-               if (err)
-               {
+           sum += hibernate_sum_page((src + decoOffset), ppnum);
+           err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size);
+           if (err)
+           {
                    HIBLOG("IOMemoryDescriptorReadToPhysical [%ld] %x\n", (long)ppnum, err);
                    break;
-               }
            }
 
            ppnum++;
@@ -3332,12 +2322,9 @@ hibernate_machine_init(void)
     gIOHibernateCurrentHeader->actualImage2Sum = sum;
     gIOHibernateCompression = gIOHibernateCurrentHeader->compression;
 
-    if (vars->fileVars->io)
-        (void) IOHibernatePollerIODone(vars->fileVars, false);
-
     clock_get_uptime(&endIOTime);
 
-    err = IOHibernatePollerClose(vars->fileVars, kIOPolledAfterSleepState);
+    err = IOPolledFilePollersClose(vars->fileVars, kIOPolledAfterSleepState);
 
     clock_get_uptime(&endTime);
 
index 23f875d18e5428b467ef326ec3205f67f3eb1074..0c800aec59b7861ce7066a37b4cbbf7efb6b3a68 100644 (file)
@@ -42,7 +42,6 @@ struct IOHibernateVars
     class IOBufferMemoryDescriptor *    handoffBuffer;
     class IOMemoryDescriptor *          previewBuffer;
     OSData *                           previewData;
-    OSData *                           fileExtents;
     OSObject *                         saveBootDevice;
 
     struct IOPolledFileIOVars *                fileVars;
@@ -60,37 +59,6 @@ struct IOHibernateVars
 };
 typedef struct IOHibernateVars IOHibernateVars;
 
-
-struct IOPolledFileIOVars
-{
-    struct kern_direct_file_io_ref_t * fileRef;
-    IORegistryEntry *                   media;
-    class OSArray *                    pollers;
-    IOByteCount                                blockSize;
-    uint8_t *                                  buffer;
-    IOByteCount                        bufferSize;
-    IOByteCount                        bufferLimit;
-    IOByteCount                        bufferOffset;
-    IOByteCount                        bufferHalf;
-    IOByteCount                                extentRemaining;
-    IOByteCount                                lastRead;
-    IOByteCount                                readEnd;
-    uint32_t                            flags;
-    uint64_t                           fileSize;
-    uint64_t                           block0;
-    uint64_t                           position;
-    uint64_t                           extentPosition;
-    uint64_t                           encryptStart;
-    uint64_t                           encryptEnd;
-    uint64_t                            cryptBytes;
-    AbsoluteTime                        cryptTime;
-    IOPolledFileExtent *               extentMap;
-    IOPolledFileExtent *               currentExtent;
-    bool                               io;
-    IOReturn                           ioStatus;
-};
-typedef struct IOPolledFileIOVars IOPolledFileIOVars;
-
 #endif         /* __cplusplus */
 
 enum
index daf5d2804b2405acee3cda41ab3f9c7b48a6a06a..141a280a54d5cb8bb4afbf3c2489e62fa50372f1 100644 (file)
@@ -401,8 +401,17 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize,
        if (compressedSize != PAGE_SIZE)
        {
                dst = pal_hib_map(DEST_COPY_AREA, dst);
-               if (compressedSize) WKdm_decompress_new((WK_word*) src, (WK_word*)(uintptr_t)dst, (WK_word*) &scratch[0], PAGE_SIZE);
-               else bzero((void *) dst, PAGE_SIZE);
+               if (compressedSize != 4) WKdm_decompress_new((WK_word*) src, (WK_word*)(uintptr_t)dst, (WK_word*) &scratch[0], compressedSize);
+               else {
+                       int i;
+                       uint32_t *s, *d;
+                       
+                       s = src;
+                       d = (uint32_t *)(uintptr_t)dst;
+
+                       for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
+                               *d++ = *s;
+               }
        }
        else
        {
@@ -412,8 +421,6 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize,
        return hibernate_sum_page((uint8_t *)(uintptr_t)dst, ppnum);
 }
 
-#define C_ASSERT(e) typedef char    __C_ASSERT__[(e) ? 1 : -1]
-
 long 
 hibernate_kernel_entrypoint(uint32_t p1, 
                             uint32_t p2, uint32_t p3, uint32_t p4)
@@ -449,7 +456,7 @@ hibernate_kernel_entrypoint(uint32_t p1,
     uint64_t timeStart;
     timeStart = rdtsc64();
 
-    C_ASSERT(sizeof(IOHibernateImageHeader) == 512);
+    assert_static(sizeof(IOHibernateImageHeader) == 512);
 
     headerPhys = ptoa_64(p1);
 
@@ -697,6 +704,8 @@ hibernate_kernel_entrypoint(uint32_t p1,
 
     gIOHibernateCurrentHeader->trampolineTime = (((rdtsc64() - timeStart)) >> 8);
 
+//  debug_code('done', 0);
+
 #if CONFIG_SLEEP
 #if defined(__i386__) || defined(__x86_64__)
     typedef void (*ResetProc)(void);
index f51a1c936c7a0c01282efb0b6e20ffe49c7e746a..21e92e9350ecd108825ed23e2aeccc8e07d6df9f 100644 (file)
@@ -349,7 +349,8 @@ IOHistogramReporter::tallyValue(int64_t value)
     hist_values.bucket_sum += value;
     hist_values.bucket_hits++;
     
-    if (setElementValues(element_index, (IOReportElementValues *)&hist_values) == kIOReturnSuccess) {
+    if (setElementValues(element_index, (IOReportElementValues *)&hist_values)
+                != kIOReturnSuccess) {
         goto finish;
     }
 
index 2560a0687e664d5a0a0696a75dccfa21972159d2..d99e9399e00361fc7091f8a632b960effb9abe02 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+
 #include <sys/sysctl.h>
+extern "C" {
+#include <vm/vm_kern.h>
+}
 
 #include <libkern/c++/OSContainers.h>
+#include <libkern/OSDebug.h>
 #include <libkern/c++/OSCPPDebug.h>
 
 #include <IOKit/IOKitDebug.h>
 #define DEBUG_INIT_VALUE 0
 #endif
 
-SInt64         gIOKitDebug = DEBUG_INIT_VALUE;
-SInt64         gIOKitTrace = 0;
+SInt64          gIOKitDebug = DEBUG_INIT_VALUE;
+SInt64          gIOKitTrace = 0;
 
 #if DEVELOPMENT || DEBUG
-#define IODEBUG_CTLFLAGS       CTLFLAG_RW
+#define IODEBUG_CTLFLAGS        CTLFLAG_RW
 #else
-#define IODEBUG_CTLFLAGS       CTLFLAG_RD
+#define IODEBUG_CTLFLAGS        CTLFLAG_RD
 #endif
 
 SYSCTL_QUAD(_debug, OID_AUTO, iokit, IODEBUG_CTLFLAGS | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io");
 SYSCTL_QUAD(_debug, OID_AUTO, iotrace, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitTrace, "trace io");
 
 
-int            debug_malloc_size;
-int            debug_iomalloc_size;
+int             debug_malloc_size;
+int             debug_iomalloc_size;
 
-vm_size_t      debug_iomallocpageable_size;
-int            debug_container_malloc_size;
-// int                 debug_ivars_size; // in OSObject.cpp
+vm_size_t       debug_iomallocpageable_size;
+int             debug_container_malloc_size;
+// int          debug_ivars_size; // in OSObject.cpp
 
 extern "C" {
 
@@ -73,11 +78,11 @@ extern "C" {
 
 void IOPrintPlane( const IORegistryPlane * plane )
 {
-    IORegistryEntry *          next;
-    IORegistryIterator *       iter;
-    OSOrderedSet *             all;
-    char                       format[] = "%xxxs";
-    IOService *                        service;
+    IORegistryEntry *           next;
+    IORegistryIterator *        iter;
+    OSOrderedSet *              all;
+    char                        format[] = "%xxxs";
+    IOService *                 service;
 
     iter = IORegistryIterator::iterateOver( plane );
     assert( iter );
@@ -86,20 +91,20 @@ void IOPrintPlane( const IORegistryPlane * plane )
         DEBG("Count %d\n", all->getCount() );
         all->release();
     } else
-       DEBG("Empty\n");
+        DEBG("Empty\n");
 
     iter->reset();
     while( (next = iter->getNextObjectRecursive())) {
-       snprintf(format + 1, sizeof(format) - 1, "%ds", 2 * next->getDepth( plane ));
-       DEBG( format, "");
-       DEBG( "\033[33m%s", next->getName( plane ));
-       if( (next->getLocation( plane )))
+        snprintf(format + 1, sizeof(format) - 1, "%ds", 2 * next->getDepth( plane ));
+        DEBG( format, "");
+        DEBG( "\033[33m%s", next->getName( plane ));
+        if( (next->getLocation( plane )))
             DEBG("@%s", next->getLocation( plane ));
-       DEBG("\033[0m <class %s", next->getMetaClass()->getClassName());
+        DEBG("\033[0m <class %s", next->getMetaClass()->getClassName());
         if( (service = OSDynamicCast(IOService, next)))
             DEBG(", busy %ld", (long) service->getBusyState());
-       DEBG( ">\n");
-//     IOSleep(250);
+        DEBG( ">\n");
+//      IOSleep(250);
     }
     iter->release();
 }
@@ -118,12 +123,12 @@ void IOPrintMemory( void )
 //    OSMetaClass::printInstanceCounts();
 
     IOLog("\n"
-           "ivar kalloc()       0x%08x\n"
-           "malloc()            0x%08x\n"
+            "ivar kalloc()       0x%08x\n"
+            "malloc()            0x%08x\n"
             "containers kalloc() 0x%08x\n"
-           "IOMalloc()          0x%08x\n"
+            "IOMalloc()          0x%08x\n"
             "----------------------------------------\n",
-           debug_ivars_size,
+            debug_ivars_size,
             debug_malloc_size,
             debug_container_malloc_size,
             debug_iomalloc_size
@@ -132,12 +137,12 @@ void IOPrintMemory( void )
 
 } /* extern "C" */
 
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #define super OSObject
 OSDefineMetaClassAndStructors(IOKitDiagnostics, OSObject)
 
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 OSObject * IOKitDiagnostics::diagnostics( void )
 {
@@ -145,21 +150,21 @@ OSObject * IOKitDiagnostics::diagnostics( void )
 
     diags = new IOKitDiagnostics;
     if( diags && !diags->init()) {
-       diags->release();
-       diags = 0;
+        diags->release();
+        diags = 0;
     }
 
     return( diags );
 }
 
 void IOKitDiagnostics::updateOffset( OSDictionary * dict,
-                       UInt32 value, const char * name )
+                        UInt64 value, const char * name )
 {
     OSNumber * off;
 
-    off = OSNumber::withNumber( value, 32 );
+    off = OSNumber::withNumber( value, 64 );
     if( !off)
-       return;
+        return;
 
     dict->setObject( name, off );
     off->release();
@@ -167,12 +172,12 @@ void IOKitDiagnostics::updateOffset( OSDictionary * dict,
 
 bool IOKitDiagnostics::serialize(OSSerialize *s) const
 {
-    OSDictionary *     dict;
-    bool               ok;
+    OSDictionary *      dict;
+    bool                ok;
 
     dict = OSDictionary::withCapacity( 5 );
     if( !dict)
-       return( false );
+        return( false );
 
     updateOffset( dict, debug_ivars_size, "Instance allocation" );
     updateOffset( dict, debug_container_malloc_size, "Container allocation" );
@@ -188,4 +193,863 @@ bool IOKitDiagnostics::serialize(OSSerialize *s) const
     return( ok );
 }
 
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#if IOTRACKING
+
+#include <libkern/c++/OSCPPDebug.h>
+#include <libkern/c++/OSKext.h>
+#include <kern/zalloc.h>
+
+__private_extern__ "C" void qsort(
+    void * array,
+    size_t nmembers,
+    size_t member_size,
+    int (*)(const void *, const void *));
+
+extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
+extern "C" ppnum_t pmap_valid_page(ppnum_t pn);
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+struct IOTRecursiveLock
+{
+    lck_mtx_t * mutex;
+    thread_t    thread;
+    UInt32      count;
+};
+
+struct IOTrackingQueue
+{
+    queue_chain_t     link;
+    IOTRecursiveLock  lock;
+    queue_head_t      sites;
+    const char *      name;
+    size_t            allocSize;
+    size_t            minCaptureSize;
+    uint32_t          siteCount;
+    uint8_t           captureOn;
+    uint8_t           isAlloc;
+};
+
+struct IOTrackingCallSite
+{
+    queue_chain_t          link;
+    IOTrackingQueue *      queue;
+    uint32_t               crc;
+    IOTrackingCallSiteInfo info; 
+    queue_chain_t          instances;
+    IOTracking *           addresses;
+};
+
+struct IOTrackingLeaksRef
+{
+    uintptr_t * instances;
+    uint32_t    count;
+    uint32_t    found;
+    size_t      bytes;
+};
+
+enum
+{
+    kInstanceFlagAddress    = 0x01UL,
+    kInstanceFlagReferenced = 0x02UL,
+    kInstanceFlags          = 0x03UL
+};
+
+lck_mtx_t *  gIOTrackingLock;
+queue_head_t gIOTrackingQ;
+
+enum
+{
+    kTrackingAddressFlagAllocated    = 0x00000001
+};
+
+#if defined(__LP64__)
+#define IOTrackingAddressFlags(ptr)    (ptr->flags)
+#else
+#define IOTrackingAddressFlags(ptr)    (ptr->tracking.flags)
+#endif
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static void 
+IOTRecursiveLockLock(IOTRecursiveLock * lock)
+{
+    if (lock->thread == current_thread()) lock->count++;
+    else
+    {
+        lck_mtx_lock(lock->mutex);
+        assert(lock->thread == 0);
+        assert(lock->count == 0);
+        lock->thread = current_thread();
+        lock->count = 1;
+    }
+}
+
+static void 
+IOTRecursiveLockUnlock(IOTRecursiveLock * lock)
+{
+    assert(lock->thread == current_thread());
+    if (0 == (--lock->count))
+    {
+        lock->thread = 0;
+        lck_mtx_unlock(lock->mutex);
+    }
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingInit(void)
+{
+    queue_init(&gIOTrackingQ);
+    gIOTrackingLock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOTrackingQueue *
+IOTrackingQueueAlloc(const char * name, size_t allocSize, size_t minCaptureSize, bool isAlloc)
+{
+    IOTrackingQueue * queue;
+    queue = (typeof(queue)) kalloc(sizeof(IOTrackingQueue));
+    bzero(queue, sizeof(IOTrackingQueue));
+
+    queue->name           = name;
+    queue->allocSize      = allocSize;
+    queue->minCaptureSize = minCaptureSize;
+    queue->lock.mutex     = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL);
+    queue_init(&queue->sites);
+
+    queue->captureOn = (0 != (kIOTrackingBoot & gIOKitDebug));
+    queue->isAlloc   = isAlloc;
+
+    lck_mtx_lock(gIOTrackingLock);
+    queue_enter(&gIOTrackingQ, queue, IOTrackingQueue *, link);
+    lck_mtx_unlock(gIOTrackingLock);
+
+    return (queue);
+};
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingQueueFree(IOTrackingQueue * queue)
+{
+    lck_mtx_lock(gIOTrackingLock);
+    IOTrackingReset(queue);
+    remque(&queue->link);
+    lck_mtx_unlock(gIOTrackingLock);
+
+    lck_mtx_free(queue->lock.mutex, IOLockGroup);
+
+    kfree(queue, sizeof(IOTrackingQueue));
+};
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/* fasthash
+   The MIT License
+
+   Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+
+// Compression function for Merkle-Damgard construction.
+// This function is generated using the framework provided.
+#define mix(h) ({                               \
+                  (h) ^= (h) >> 23;             \
+                  (h) *= 0x2127599bf4325c37ULL; \
+                  (h) ^= (h) >> 47; })
+
+static uint64_t
+fasthash64(const void *buf, size_t len, uint64_t seed)
+{
+    const uint64_t    m = 0x880355f21e6d1965ULL;
+    const uint64_t *pos = (const uint64_t *)buf;
+    const uint64_t *end = pos + (len / 8);
+    const unsigned char *pos2;
+    uint64_t h = seed ^ (len * m);
+    uint64_t v;
+
+    while (pos != end) {
+        v  = *pos++;
+        h ^= mix(v);
+        h *= m;
+    }
+
+    pos2 = (const unsigned char*)pos;
+    v = 0;
+
+    switch (len & 7) {
+    case 7: v ^= (uint64_t)pos2[6] << 48;
+    case 6: v ^= (uint64_t)pos2[5] << 40;
+    case 5: v ^= (uint64_t)pos2[4] << 32;
+    case 4: v ^= (uint64_t)pos2[3] << 24;
+    case 3: v ^= (uint64_t)pos2[2] << 16;
+    case 2: v ^= (uint64_t)pos2[1] << 8;
+    case 1: v ^= (uint64_t)pos2[0];
+            h ^= mix(v);
+            h *= m;
+    }
+
+    return mix(h);
+} 
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static uint32_t
+fasthash32(const void *buf, size_t len, uint32_t seed)
+{
+    // the following trick converts the 64-bit hashcode to Fermat
+    // residue, which shall retain information from both the higher
+    // and lower parts of hashcode.
+    uint64_t h = fasthash64(buf, len, seed);
+    return h - (h >> 32);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingAdd(IOTrackingQueue * queue, IOTracking * mem, size_t size, bool address)
+{
+    IOTrackingCallSite * site;
+    uint32_t             crc, num;
+    uintptr_t            bt[kIOTrackingCallSiteBTs + 1];
+
+    if (mem->site)                    return;
+    if (!queue->captureOn)            return;
+    if (size < queue->minCaptureSize) return;
+
+    assert(!mem->link.next);
+
+    num  = fastbacktrace(&bt[0], kIOTrackingCallSiteBTs + 1);
+    num--;
+    crc = fasthash32(&bt[1], num * sizeof(bt[0]), 0x04C11DB7);
+
+    IOTRecursiveLockLock(&queue->lock);
+    queue_iterate(&queue->sites, site, IOTrackingCallSite *, link)
+    {
+        if (crc == site->crc) break;
+    }
+
+    if (queue_end(&queue->sites, (queue_entry_t) site))
+    {
+        site = (typeof(site)) kalloc(sizeof(IOTrackingCallSite));
+
+        queue_init(&site->instances);
+        site->addresses  = (IOTracking *) &site->instances;
+        site->queue      = queue;
+        site->crc        = crc;
+        site->info.count = 0;
+        memset(&site->info.size[0], 0, sizeof(site->info.size));
+        bcopy(&bt[1], &site->info.bt[0], num * sizeof(site->info.bt[0]));
+        assert(num <= kIOTrackingCallSiteBTs);
+        bzero(&site->info.bt[num], (kIOTrackingCallSiteBTs - num) * sizeof(site->info.bt[0]));
+
+        queue_enter_first(&queue->sites, site, IOTrackingCallSite *, link);
+        queue->siteCount++;
+    }
+
+    if (address)
+    {
+        queue_enter/*last*/(&site->instances, mem, IOTrackingCallSite *, link);
+        if (queue_end(&site->instances, (queue_entry_t)site->addresses)) site->addresses = mem;
+    }
+    else queue_enter_first(&site->instances, mem, IOTrackingCallSite *, link);
+
+    mem->site = site;
+    site->info.size[0] += size;
+    site->info.count++;
+
+    IOTRecursiveLockUnlock(&queue->lock);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingRemove(IOTrackingQueue * queue, IOTracking * mem, size_t size)
+{
+    if (!mem->link.next) return;
+
+    IOTRecursiveLockLock(&queue->lock);
+
+    assert(mem->site);
+
+    if (mem == mem->site->addresses) mem->site->addresses = (IOTracking *) queue_next(&mem->link);
+    remque(&mem->link);
+
+    assert(mem->site->info.count);
+    mem->site->info.count--;
+    assert(mem->site->info.size[0] >= size);
+    mem->site->info.size[0] -= size;
+    if (!mem->site->info.count)
+    {
+        assert(queue_empty(&mem->site->instances));
+        assert(!mem->site->info.size[0]);
+        assert(!mem->site->info.size[1]);
+
+        remque(&mem->site->link);
+        assert(queue->siteCount);
+        queue->siteCount--;
+        kfree(mem->site, sizeof(IOTrackingCallSite));
+    }
+    IOTRecursiveLockUnlock(&queue->lock);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingAlloc(IOTrackingQueue * queue, uintptr_t address, size_t size)
+{
+    IOTrackingAddress * tracking;
+    
+    if (!queue->captureOn)            return;
+    if (size < queue->minCaptureSize) return;
+
+    address = ~address;
+    tracking = (typeof(tracking)) kalloc(sizeof(IOTrackingAddress));
+    bzero(tracking, sizeof(IOTrackingAddress));
+    IOTrackingAddressFlags(tracking) |= kTrackingAddressFlagAllocated;
+    tracking->address = address;
+    tracking->size    = size;
+
+    IOTrackingAdd(queue, &tracking->tracking, size, true);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingFree(IOTrackingQueue * queue, uintptr_t address, size_t size)
+{
+    IOTrackingCallSite * site;
+    IOTrackingAddress  * tracking;
+    bool                 done;
+
+    address = ~address;
+    IOTRecursiveLockLock(&queue->lock);
+    done = false;
+    queue_iterate(&queue->sites, site, IOTrackingCallSite *, link)
+    {
+        for (tracking = (IOTrackingAddress *) site->addresses; 
+                !done && !queue_end(&site->instances, (queue_entry_t) tracking);
+                tracking = (IOTrackingAddress *) queue_next(&tracking->tracking.link))
+        {
+            if ((done = (address == tracking->address)))
+            {
+                IOTrackingRemove(queue, &tracking->tracking, size);
+                kfree(tracking, sizeof(IOTrackingAddress));
+            }
+        }
+        if (done) break;
+    }
+
+    IOTRecursiveLockUnlock(&queue->lock);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingAccumSize(IOTrackingQueue * queue, IOTracking * mem, size_t size)
+{
+    IOTRecursiveLockLock(&queue->lock);
+    if (mem->link.next)
+    {
+        assert(mem->site);
+        assert((size > 0) || (mem->site->info.size[1] >= -size));
+        mem->site->info.size[1] += size;    
+    };
+    IOTRecursiveLockUnlock(&queue->lock);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOTrackingReset(IOTrackingQueue * queue)
+{
+    IOTrackingCallSite * site;
+    IOTracking         * tracking;
+    IOTrackingAddress  * trackingAddress;
+    bool                 addresses;
+
+    IOTRecursiveLockLock(&queue->lock);
+    while (!queue_empty(&queue->sites))
+    {
+        queue_remove_first(&queue->sites, site, IOTrackingCallSite *, link);
+        addresses = false;
+        while (!queue_empty(&site->instances))
+        {
+            queue_remove_first(&site->instances, tracking, IOTracking *, link);
+            tracking->link.next = 0;
+            if (tracking == site->addresses) addresses = true;
+            if (addresses)
+            {
+                trackingAddress = (typeof(trackingAddress)) tracking;
+                if (kTrackingAddressFlagAllocated & IOTrackingAddressFlags(trackingAddress))
+                {
+                   kfree(tracking, sizeof(IOTrackingAddress));
+               }
+           }
+        }
+        kfree(site, sizeof(IOTrackingCallSite));
+    }
+    queue->siteCount = 0;
+    IOTRecursiveLockUnlock(&queue->lock);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static int
+IOTrackingCallSiteInfoCompare(const void * left, const void * right)
+{
+    IOTrackingCallSiteInfo * l = (typeof(l)) left;
+    IOTrackingCallSiteInfo * r = (typeof(r)) right;
+    size_t                   lsize, rsize;
+
+    rsize = r->size[0] + r->size[1];
+    lsize = l->size[0] + l->size[1];
+
+    return ((rsize > lsize) ? 1 : ((rsize == lsize) ? 0 : -1));
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static int
+IOTrackingAddressCompare(const void * left, const void * right)
+{
+    IOTracking * instance;
+    uintptr_t    inst, laddr, raddr;
+
+    inst = ((typeof(inst) *) left)[0];
+    instance = (typeof(instance)) (inst & ~kInstanceFlags);
+    if (kInstanceFlagAddress & inst) laddr = ~((IOTrackingAddress *)instance)->address;
+    else                             laddr = (uintptr_t) (instance + 1);
+
+    inst = ((typeof(inst) *) right)[0];
+    instance = (typeof(instance)) (inst & ~kInstanceFlags);
+    if (kInstanceFlagAddress & inst) raddr = ~((IOTrackingAddress *)instance)->address;
+    else                             raddr = (uintptr_t) (instance + 1);
+
+    return ((laddr > raddr) ? 1 : ((laddr == raddr) ? 0 : -1));
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static void
+IOTrackingLeakScan(void * refcon)
+{
+    IOTrackingLeaksRef * ref = (typeof(ref)) refcon;
+    uintptr_t          * instances;
+    IOTracking         * instance;
+    uint64_t             vaddr, vincr;
+    ppnum_t              ppn;
+    uintptr_t            ptr, addr, inst;
+    size_t               size;
+    uint32_t             baseIdx, lim, ptrIdx, count;
+    boolean_t            is;
+
+//    if (cpu_number()) return;
+
+    instances = ref->instances;
+    count     = ref->count;
+
+    for (vaddr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
+         vaddr < VM_MAX_KERNEL_ADDRESS;
+         ml_set_interrupts_enabled(is), vaddr += vincr)
+    {
+#if !defined(__LP64__)
+        thread_block(NULL);
+#endif
+        is = ml_set_interrupts_enabled(false);
+
+        ppn = kernel_pmap_present_mapping(vaddr, &vincr);
+        // check noencrypt to avoid VM structs (map entries) with pointers
+        if (ppn && (!pmap_valid_page(ppn) || pmap_is_noencrypt(ppn))) ppn = 0;
+        if (!ppn) continue;
+
+        for (ptrIdx = 0; ptrIdx < (page_size / sizeof(uintptr_t)); ptrIdx++)
+        {
+            ptr = ((uintptr_t *)vaddr)[ptrIdx];
+
+            for (lim = count, baseIdx = 0; lim; lim >>= 1)
+            {
+                inst = instances[baseIdx + (lim >> 1)];
+                instance = (typeof(instance)) (inst & ~kInstanceFlags);
+                if (kInstanceFlagAddress & inst)
+                {
+                    addr = ~((IOTrackingAddress *)instance)->address;
+                    size = ((IOTrackingAddress *)instance)->size;
+                }
+                else
+                {
+                    addr = (uintptr_t) (instance + 1);
+                    size = instance->site->queue->allocSize;
+                }
+                if ((ptr >= addr) && (ptr < (addr + size)))
+                {
+                    if (!(kInstanceFlagReferenced & inst))
+                    {
+                        inst |= kInstanceFlagReferenced;
+                        instances[baseIdx + (lim >> 1)] = inst;
+                        ref->found++;
+                    }
+                    break;
+                }
+                if (ptr > addr) 
+                {       
+                    // move right
+                    baseIdx += (lim >> 1) + 1;
+                    lim--;
+                }
+                // else move left
+            }
+        }
+        ref->bytes += page_size;    
+    }
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static OSData *
+IOTrackingLeaks(OSData * data)
+{
+    IOTrackingLeaksRef       ref;
+    IOTrackingCallSiteInfo   unslideInfo;
+    IOTrackingCallSite     * site;
+    OSData                 * leakData;
+    uintptr_t              * instances;
+    IOTracking             * instance;
+    uintptr_t                inst;
+    uint32_t                 count, idx, numSites, dups, siteCount;
+
+    instances = (typeof(instances)) data->getBytesNoCopy();
+    count = (data->getLength() / sizeof(*instances));
+    qsort(instances, count, sizeof(*instances), &IOTrackingAddressCompare);
+    
+    bzero(&ref, sizeof(ref));
+    ref.instances = instances;
+    ref.count = count;
+
+    IOTrackingLeakScan(&ref);
+    
+    IOLog("leaks scanned %ld MB, instance count %d, found %d\n", ref.bytes / 1024 / 1024, count, ref.found);
+
+    leakData = OSData::withCapacity(128 * sizeof(IOTrackingCallSiteInfo));
+
+    for (numSites = 0, idx = 0; idx < count; idx++)
+    {
+        inst = instances[idx];
+        if (kInstanceFlagReferenced & inst) continue;
+        instance = (typeof(instance)) (inst & ~kInstanceFlags);
+        site = instance->site;
+       instances[numSites] = (uintptr_t) site;
+       numSites++;
+    }
+
+    for (idx = 0; idx < numSites; idx++)
+    {
+        inst = instances[idx];
+        if (!inst) continue;
+        site = (typeof(site)) inst;
+       for (siteCount = 1, dups = (idx + 1); dups < numSites; dups++)
+       {
+           if (instances[dups] == (uintptr_t) site)
+           {
+               siteCount++;
+               instances[dups] = 0;
+           }
+       }
+        unslideInfo.count   = siteCount;
+        unslideInfo.size[0] = (site->info.size[0] * site->info.count) / siteCount;
+        unslideInfo.size[1] = (site->info.size[1] * site->info.count) / siteCount;;
+        for (uint32_t j = 0; j < kIOTrackingCallSiteBTs; j++)
+        {
+            unslideInfo.bt[j] = VM_KERNEL_UNSLIDE(site->info.bt[j]);
+        }
+        leakData->appendBytes(&unslideInfo, sizeof(unslideInfo));
+    }
+    data->release();
+
+    return (leakData);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static bool
+SkipName(uint32_t options, const char * name, size_t namesLen, const char * names)
+{
+    const char * scan;
+    const char * next;
+    bool         exclude, found;
+    size_t       qLen, sLen;
+
+    if (!namesLen || !names) return (false);
+    // <len><name>...<len><name><0>
+    exclude = (0 != (kIOTrackingExcludeNames & options));
+    qLen    = strlen(name);
+    scan    = names;
+    found   = false;
+    do
+    {
+        sLen = scan[0];
+        scan++;
+        next = scan + sLen;
+        if (next >= (names + namesLen)) break;
+        found = ((sLen == qLen) && !strncmp(scan, name, sLen));
+        scan = next;
+    }
+    while (!found && (scan < (names + namesLen)));
+
+    return (!(exclude ^ found));
+}
+
+#endif /* IOTRACKING */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+kern_return_t
+IOTrackingDebug(uint32_t selector, uint32_t options,
+                const char * names, size_t namesLen, 
+                size_t size, OSObject ** result)
+{
+    kern_return_t            ret;
+    OSData                 * data;
+
+    if (result) *result = 0;
+    data = 0;
+    ret = kIOReturnNotReady;
+
+#if IOTRACKING
+
+    IOTrackingQueue        * queue;
+    IOTracking             * instance;
+    IOTrackingCallSite     * site;
+    IOTrackingCallSiteInfo * siteInfos;
+    IOTrackingCallSiteInfo * siteInfo;
+    bool                     addresses;
+    uint32_t                 num, idx;
+    uintptr_t                instFlags;
+
+    if (!(kIOTracking & gIOKitDebug)) return (kIOReturnNotReady);
+    ret = kIOReturnNotFound;
+
+    lck_mtx_lock(gIOTrackingLock);
+    queue_iterate(&gIOTrackingQ, queue, IOTrackingQueue *, link)
+    {
+        if (SkipName(options, queue->name, namesLen, names)) continue;
+
+        switch (selector)
+        {
+            case kIOTrackingResetTracking:
+            {
+                IOTrackingReset(queue);
+                ret = kIOReturnSuccess;
+                break;
+            }
+
+            case kIOTrackingStartCapture:
+            case kIOTrackingStopCapture:
+            {
+                queue->captureOn = (kIOTrackingStartCapture == selector);
+                ret = kIOReturnSuccess;
+                break;
+            }
+
+            case kIOTrackingSetMinCaptureSize:
+            {
+                queue->minCaptureSize = size;
+                ret = kIOReturnSuccess;
+                break;
+            }
+
+            case kIOTrackingLeaks:
+            {
+                if (!queue->isAlloc) break;
+
+                if (!data) data = OSData::withCapacity(1024 * sizeof(uintptr_t));
+
+                IOTRecursiveLockLock(&queue->lock);
+                queue_iterate(&queue->sites, site, IOTrackingCallSite *, link)
+                {
+                    addresses = false;
+                    queue_iterate(&site->instances, instance, IOTracking *, link)
+                    {
+                        if (instance == site->addresses) addresses = true;
+                        instFlags = (typeof(instFlags)) instance; 
+                        if (addresses) instFlags |= kInstanceFlagAddress;
+                        data->appendBytes(&instFlags, sizeof(instFlags));
+                    }
+                }
+                // queue is locked
+                ret = kIOReturnSuccess;
+                break;
+            }
+
+            case kIOTrackingGetTracking:
+            case kIOTrackingPrintTracking:
+            {
+                if (!data) data = OSData::withCapacity(128 * sizeof(IOTrackingCallSiteInfo));
+
+                IOTRecursiveLockLock(&queue->lock);
+                num = queue->siteCount;
+                idx = 0;
+                queue_iterate(&queue->sites, site, IOTrackingCallSite *, link)
+                {
+                    assert(idx < num);
+                    idx++;
+
+                    if (size && ((site->info.size[0] + site->info.size[1]) < size)) continue;
+
+                    IOTrackingCallSiteInfo unslideInfo;
+                    unslideInfo.count = site->info.count;
+                    memcpy(&unslideInfo.size[0], &site->info.size[0], sizeof(unslideInfo.size));
+
+                    for (uint32_t j = 0; j < kIOTrackingCallSiteBTs; j++)
+                    {
+                        unslideInfo.bt[j] = VM_KERNEL_UNSLIDE(site->info.bt[j]);
+                    }
+                    data->appendBytes(&unslideInfo, sizeof(unslideInfo));
+                }
+                assert(idx == num);
+                IOTRecursiveLockUnlock(&queue->lock);
+                ret = kIOReturnSuccess;
+                break;
+            }
+            default:
+                ret = kIOReturnUnsupported;
+                break;
+        }
+    }
+
+    if ((kIOTrackingLeaks == selector) && data)
+    {
+        data = IOTrackingLeaks(data);
+        queue_iterate(&gIOTrackingQ, queue, IOTrackingQueue *, link)
+        {
+            if (SkipName(options, queue->name, namesLen, names)) continue;
+            if (!queue->isAlloc)                                 continue;
+            IOTRecursiveLockUnlock(&queue->lock);
+        }
+    }
+
+    lck_mtx_unlock(gIOTrackingLock);
+
+    if (data)
+    {
+        siteInfos = (typeof(siteInfos)) data->getBytesNoCopy();
+        num = (data->getLength() / sizeof(IOTrackingCallSiteInfo));
+        qsort(siteInfos, num, sizeof(*siteInfos), &IOTrackingCallSiteInfoCompare);
+
+        if (kIOTrackingPrintTracking == selector)
+        {
+            for (idx = 0; idx < num; idx++)
+            {
+                siteInfo = &siteInfos[idx];
+                printf("\n0x%lx bytes (0x%lx + 0x%lx), %d call%s, [%d]\n",
+                    siteInfo->size[0] + siteInfo->size[1], 
+                    siteInfo->size[0], siteInfo->size[1], 
+                    siteInfo->count, (siteInfo->count != 1) ? "s" : "", idx);
+                uintptr_t * bt = &siteInfo->bt[0];
+                printf("      Backtrace 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 
+                        bt[0], bt[1], bt[2], bt[3], bt[4], bt[5], bt[6], bt[7], 
+                        bt[8], bt[9], bt[10], bt[11], bt[12], bt[13], bt[14], bt[15]);
+                kmod_dump_log((vm_offset_t *) &bt[0], kIOTrackingCallSiteBTs, FALSE);
+            }
+            data->release();
+            data = 0;
+        }
+    }
+
+    *result = data;
+
+#endif /* IOTRACKING */
+
+    return (ret);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include <IOKit/IOKitDiagnosticsUserClient.h>
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#undef super
+#define super IOUserClient
+
+OSDefineMetaClassAndStructors(IOKitDiagnosticsClient, IOUserClient)
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOUserClient * IOKitDiagnosticsClient::withTask(task_t owningTask)
+{
+    IOKitDiagnosticsClient * inst;
+
+    inst = new IOKitDiagnosticsClient;
+    if (inst && !inst->init())
+    {
+        inst->release();
+        inst = 0;
+    }
+
+    return (inst);
+}
+
+IOReturn IOKitDiagnosticsClient::clientClose(void)
+{
+    terminate();
+    return (kIOReturnSuccess);
+}
+
+IOReturn IOKitDiagnosticsClient::setProperties(OSObject * properties)
+{
+    IOReturn kr = kIOReturnUnsupported;
+    return (kr);
+}
+
+IOReturn IOKitDiagnosticsClient::externalMethod(uint32_t selector, IOExternalMethodArguments * args,
+                                                IOExternalMethodDispatch * dispatch, OSObject * target, void * reference)
+{
+    IOReturn                           ret = kIOReturnBadArgument;
+    const IOKitDiagnosticsParameters * params;
+    const char * names;
+    size_t       namesLen;
+    OSObject   * result;
+
+    if (args->structureInputSize < sizeof(IOKitDiagnosticsParameters)) return (kIOReturnBadArgument);
+    params = (typeof(params)) args->structureInput;
+    if (!params) return (kIOReturnBadArgument);
+
+    names = 0;
+    namesLen = args->structureInputSize - sizeof(IOKitDiagnosticsParameters);
+    if (namesLen) names = (typeof(names))(params + 1);
+
+    ret = IOTrackingDebug(selector, params->options, names, namesLen, params->size, &result);
+
+    if ((kIOReturnSuccess == ret) && args->structureVariableOutputData) *args->structureVariableOutputData = result;
+    else if (result) result->release();
+
+    return (ret);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
index a22bd8c60f97836377de1573432ee76a71ef941e..37e6f9416936de06a4357291e08461ca3b88adb7 100644 (file)
@@ -37,6 +37,7 @@ __BEGIN_DECLS
 #include <vm/vm_pageout.h>
 #include <mach/memory_object_types.h>
 #include <device/device_port.h>
+#include <IOKit/IODMACommand.h>
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -68,14 +69,19 @@ kern_return_t IOIteratePageableMaps(vm_size_t size,
                     IOIteratePageableMapsCallback callback, void * ref);
 vm_map_t IOPageableMapForAddress(uintptr_t address);
 
+struct IOMemoryDescriptorMapAllocRef
+{
+    vm_map_t          map;
+    mach_vm_address_t mapped;
+    mach_vm_size_t    size;
+    vm_prot_t         prot;
+    vm_tag_t          tag;
+    IOOptionBits      options;
+};
+
 kern_return_t 
-IOMemoryDescriptorMapMemEntry(vm_map_t * map, ipc_port_t entry, IOOptionBits options, bool pageable,
-                               mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length);
-kern_return_t 
-IOMemoryDescriptorMapCopy(vm_map_t * map, 
-                               IOOptionBits options,
-                               mach_vm_size_t offset, 
-                               mach_vm_address_t * address, mach_vm_size_t length);
+IOMemoryDescriptorMapAlloc(vm_map_t map, void * ref);
+
 
 mach_vm_address_t
 IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, 
@@ -83,7 +89,6 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
 void
 IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size);
 
-
 extern vm_size_t debug_iomallocpageable_size;
 
 // osfmk/device/iokit_rpc.c
@@ -106,49 +111,15 @@ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t size);
 
 __END_DECLS
 
-// Used for dedicated communications for IODMACommand
-enum  {
-    kIOMDWalkSegments             = 0x01000000,
-    kIOMDFirstSegment            = 1 | kIOMDWalkSegments,
-    kIOMDGetCharacteristics       = 0x02000000,
-    kIOMDGetCharacteristicsMapped = 1 | kIOMDGetCharacteristics,
-    kIOMDDMAActive                = 0x03000000,
-    kIOMDSetDMAActive             = 1 | kIOMDDMAActive,
-    kIOMDSetDMAInactive           = kIOMDDMAActive,
-    kIOMDAddDMAMapSpec            = 0x04000000,
-    kIOMDDMAMap                   = 0x05000000,
-    kIOMDDMACommandOperationMask  = 0xFF000000,
-};
-struct IOMDDMACharacteristics {
-    UInt64 fLength;
-    UInt32 fSGCount;
-    UInt32 fPages;
-    UInt32 fPageAlign;
-    ppnum_t fHighestPage;
-    IODirection fDirection;
-    UInt8 fIsPrepared;
-};
-struct IOMDDMAWalkSegmentArgs {
-    UInt64 fOffset;                    // Input/Output offset
-    UInt64 fIOVMAddr, fLength;         // Output variables
-    UInt8 fMapped;                     // Input Variable, Require mapped IOVMA
-};
-typedef UInt8 IOMDDMAWalkSegmentState[128];
-
-struct IOMDDMAMapArgs {
-    IOMapper *            fMapper;
-    IODMAMapSpecification fMapSpec;
-    uint64_t              fOffset;
-    uint64_t              fLength;
-    uint64_t              fAlloc;
-    ppnum_t               fAllocCount;
-    uint8_t               fMapContig;
-};
+#define __IODEQUALIFY(type, expr)                              \
+   ({ typeof(expr) expr_ = (type)(uintptr_t)(expr);            \
+       (type)(uintptr_t)(expr_); })
+
 
 struct IODMACommandInternal
 {
-    IOMDDMAWalkSegmentState fState;
-    IOMDDMACharacteristics  fMDSummary;
+    IOMDDMAWalkSegmentState      fState;
+    IOMDDMACharacteristics       fMDSummary;
 
     UInt64 fPreparedOffset;
     UInt64 fPreparedLength;
@@ -171,8 +142,8 @@ struct IODMACommandInternal
 
     ppnum_t  fCopyPageCount;
 
-    addr64_t  fLocalMapperPageAlloc;
-    ppnum_t  fLocalMapperPageCount;
+    uint64_t  fLocalMapperAlloc;
+    uint64_t  fLocalMapperAllocLength;
 
     class IOBufferMemoryDescriptor * fCopyMD;
 
@@ -235,16 +206,24 @@ extern clock_sec_t gIOConsoleLockTime;
 
 extern OSSet * gIORemoveOnReadProperties;
 
-extern "C" void IOKitResetTime( void );
 extern "C" void IOKitInitializeTime( void );
 
 extern "C" OSString * IOCopyLogNameForPID(int pid);
 
 #if defined(__i386__) || defined(__x86_64__)
+#ifndef __cplusplus
+#error xx
+#endif
+
+extern const OSSymbol * gIOCreateEFIDevicePathSymbol;
 extern "C" void IOSetKeyStoreData(IOMemoryDescriptor * data);
 #endif
+extern const  OSSymbol * gAKSGetKey;
 
 void IOScreenLockTimeUpdate(clock_sec_t secs);
 
+void     IOCPUInitialize(void);
+IOReturn IOInstallServicePlatformActions(IOService * service);
+IOReturn IORemoveServicePlatformActions(IOService * service);
 
 #endif /* ! _IOKIT_KERNELINTERNAL_H */
index 3714d1d4fb9018868dd6f59bd91733572a6ed4c8..44a436346612d99a6441fcab26d86e26379af1fe 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/* 
  * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
@@ -73,6 +73,10 @@ do { \
 
 #endif /* IOKITSTATS */
 
+
+#define TRACK_ALLOC    (IOTRACKING && (kIOTracking & gIOKitDebug))
+
+
 extern "C"
 {
 
@@ -87,7 +91,8 @@ __doprnt(
        va_list                 argp,
        void                    (*putc)(int, void *),
        void                    *arg,
-       int                     radix);
+       int                     radix,
+       int                     is_log);
 
 extern void cons_putc_locked(char);
 extern void bsd_log_lock(void);
@@ -146,6 +151,12 @@ static iopa_t gIOPageablePageAllocator;
 
 uint32_t  gIOPageAllocChunkBytes;
 
+#if IOTRACKING
+IOTrackingQueue * gIOMallocTracking;
+IOTrackingQueue * gIOWireTracking;
+IOTrackingQueue * gIOMapTracking;
+#endif /* IOTRACKING */
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 void IOLibInit(void)
@@ -157,18 +168,25 @@ void IOLibInit(void)
     if(libInitialized)
         return;        
 
+    IOLockGroup = lck_grp_alloc_init("IOKit", LCK_GRP_ATTR_NULL);
+
+#if IOTRACKING
+    IOTrackingInit();
+    gIOMallocTracking = IOTrackingQueueAlloc(kIOMallocTrackingName, 0, 0,         true);
+    gIOWireTracking   = IOTrackingQueueAlloc(kIOWireTrackingName,   0, page_size, false);
+    gIOMapTracking    = IOTrackingQueueAlloc(kIOMapTrackingName,    0, page_size, false);
+#endif
+
     gIOKitPageableSpace.maps[0].address = 0;
     ret = kmem_suballoc(kernel_map,
                     &gIOKitPageableSpace.maps[0].address,
                     kIOPageableMapSize,
                     TRUE,
-                    VM_FLAGS_ANYWHERE,
+                    VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IOKIT),
                     &gIOKitPageableSpace.maps[0].map);
     if (ret != KERN_SUCCESS)
         panic("failed to allocate iokit pageable map\n");
 
-    IOLockGroup = lck_grp_alloc_init("IOKit", LCK_GRP_ATTR_NULL);
-
     gIOKitPageableSpace.lock           = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL);
     gIOKitPageableSpace.maps[0].end    = gIOKitPageableSpace.maps[0].address + kIOPageableMapSize;
     gIOKitPageableSpace.hint           = 0;
@@ -182,11 +200,22 @@ void IOLibInit(void)
     iopa_init(&gIOBMDPageAllocator);
     iopa_init(&gIOPageablePageAllocator);
 
+
     libInitialized = true;
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+static uint32_t 
+log2up(uint32_t size)
+{
+    if (size <= 1) size = 0;
+    else size = 32 - __builtin_clz(size - 1);
+    return (size);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 IOThread IOCreateThread(IOThreadFunc fcn, void *arg)
 {
        kern_return_t   result;
@@ -209,17 +238,49 @@ void IOExitThread(void)
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+#if IOTRACKING
+struct IOLibMallocHeader
+{
+    IOTrackingAddress tracking;
+};
+#endif
+
+#if IOTRACKING
+#define sizeofIOLibMallocHeader        (sizeof(IOLibMallocHeader) - (TRACK_ALLOC ? 0 : sizeof(IOTrackingAddress)))
+#else
+#define sizeofIOLibMallocHeader        (0)
+#endif
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 void * IOMalloc(vm_size_t size)
 {
     void * address;
+    vm_size_t allocSize;
+
+    allocSize = size + sizeofIOLibMallocHeader;
+#if IOTRACKING
+    if (sizeofIOLibMallocHeader && (allocSize <= size)) return (NULL); // overflow
+#endif
+    address = kalloc_tag_bt(allocSize, VM_KERN_MEMORY_IOKIT);
 
-    address = (void *)kalloc(size);
     if ( address ) {
+#if IOTRACKING
+       if (TRACK_ALLOC) {
+           IOLibMallocHeader * hdr;
+           hdr = (typeof(hdr)) address;
+           bzero(&hdr->tracking, sizeof(hdr->tracking));
+           hdr->tracking.address = ~(((uintptr_t) address) + sizeofIOLibMallocHeader);
+           hdr->tracking.size    = size;
+           IOTrackingAdd(gIOMallocTracking, &hdr->tracking.tracking, size, true);
+       }
+#endif
+       address = (typeof(address)) (((uintptr_t) address) + sizeofIOLibMallocHeader);
+
 #if IOALLOCDEBUG
-               debug_iomalloc_size += size;
+    OSAddAtomic(size, &debug_iomalloc_size);
 #endif
-               IOStatisticsAlloc(kIOStatisticsMalloc, size);
+       IOStatisticsAlloc(kIOStatisticsMalloc, size);
     }
 
     return address;
@@ -228,31 +289,79 @@ void * IOMalloc(vm_size_t size)
 void IOFree(void * address, vm_size_t size)
 {
     if (address) {
-               kfree(address, size);
+
+       address = (typeof(address)) (((uintptr_t) address) - sizeofIOLibMallocHeader);
+       
+#if IOTRACKING
+       if (TRACK_ALLOC) {
+           IOLibMallocHeader * hdr;
+           hdr = (typeof(hdr)) address;
+            if (size != hdr->tracking.size)
+           {
+               OSReportWithBacktrace("bad IOFree size 0x%lx should be 0x%lx", size, hdr->tracking.size);
+               size = hdr->tracking.size;
+           }
+           IOTrackingRemove(gIOMallocTracking, &hdr->tracking.tracking, size);
+       }
+#endif
+
+       kfree(address, size + sizeofIOLibMallocHeader);
 #if IOALLOCDEBUG
-               debug_iomalloc_size -= size;
+    OSAddAtomic(-size, &debug_iomalloc_size);
 #endif
-               IOStatisticsAlloc(kIOStatisticsFree, size);
+       IOStatisticsAlloc(kIOStatisticsFree, size);
     }
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+vm_tag_t 
+IOMemoryTag(vm_map_t map)
+{
+    vm_tag_t tag;
+
+    if (!vm_kernel_map_is_kernel(map)) return (VM_MEMORY_IOKIT);
+
+    tag = vm_tag_bt();
+    if (tag == VM_KERN_MEMORY_NONE) tag = VM_KERN_MEMORY_IOKIT;
+
+    return (tag);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+struct IOLibPageMallocHeader
+{
+    mach_vm_size_t    allocationSize;
+    mach_vm_address_t allocationAddress;
+#if IOTRACKING
+    IOTrackingAddress tracking;
+#endif
+};
+
+#if IOTRACKING
+#define sizeofIOLibPageMallocHeader    (sizeof(IOLibPageMallocHeader) - (TRACK_ALLOC ? 0 : sizeof(IOTrackingAddress)))
+#else
+#define sizeofIOLibPageMallocHeader    (sizeof(IOLibPageMallocHeader))
+#endif
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
 {
-    kern_return_t      kr;
-    vm_offset_t                address;
-    vm_offset_t                allocationAddress;
-    vm_size_t          adjustedSize;
-    uintptr_t          alignMask;
+    kern_return_t          kr;
+    vm_offset_t                    address;
+    vm_offset_t                    allocationAddress;
+    vm_size_t              adjustedSize;
+    uintptr_t              alignMask;
+    IOLibPageMallocHeader * hdr;
 
     if (size == 0)
         return 0;
-    if (alignment == 0) 
-        alignment = 1;
 
+    alignment = (1UL << log2up(alignment));
     alignMask = alignment - 1;
-    adjustedSize = size + sizeof(vm_size_t) + sizeof(vm_address_t);
+    adjustedSize = size + sizeofIOLibPageMallocHeader;
 
     if (size > adjustedSize) {
            address = 0;    /* overflow detected */
@@ -260,9 +369,11 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
     else if (adjustedSize >= page_size) {
 
         kr = kernel_memory_allocate(kernel_map, &address,
-                                       size, alignMask, 0);
-       if (KERN_SUCCESS != kr)
-           address = 0;
+                                       size, alignMask, 0, IOMemoryTag(kernel_map));
+       if (KERN_SUCCESS != kr) address = 0;
+#if IOTRACKING
+       else if (TRACK_ALLOC) IOTrackingAlloc(gIOMallocTracking, address, size);
+#endif
 
     } else {
 
@@ -271,22 +382,27 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
        if (adjustedSize >= page_size) {
 
            kr = kernel_memory_allocate(kernel_map, &allocationAddress,
-                                           adjustedSize, 0, 0);
-           if (KERN_SUCCESS != kr)
-               allocationAddress = 0;
+                                           adjustedSize, 0, 0, IOMemoryTag(kernel_map));
+           if (KERN_SUCCESS != kr) allocationAddress = 0;
 
        } else
-           allocationAddress = (vm_address_t) kalloc(adjustedSize);
+           allocationAddress = (vm_address_t) kalloc_tag_bt(adjustedSize, VM_KERN_MEMORY_IOKIT);
 
         if (allocationAddress) {
-            address = (allocationAddress + alignMask
-                    + (sizeof(vm_size_t) + sizeof(vm_address_t)))
+            address = (allocationAddress + alignMask + sizeofIOLibPageMallocHeader)
                     & (~alignMask);
 
-            *((vm_size_t *)(address - sizeof(vm_size_t) - sizeof(vm_address_t))) 
-                           = adjustedSize;
-            *((vm_address_t *)(address - sizeof(vm_address_t)))
-                            = allocationAddress;
+           hdr = (typeof(hdr))(address - sizeofIOLibPageMallocHeader);
+           hdr->allocationSize    = adjustedSize;
+           hdr->allocationAddress = allocationAddress;
+#if IOTRACKING
+           if (TRACK_ALLOC) {
+               bzero(&hdr->tracking, sizeof(hdr->tracking));
+               hdr->tracking.address = ~address;
+               hdr->tracking.size = size;
+               IOTrackingAdd(gIOMallocTracking, &hdr->tracking.tracking, size, true);
+           }
+#endif
        } else
            address = 0;
     }
@@ -295,7 +411,7 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
 
     if( address) {
 #if IOALLOCDEBUG
-               debug_iomalloc_size += size;
+               OSAddAtomic(size, &debug_iomalloc_size);
 #endif
        IOStatisticsAlloc(kIOStatisticsMallocAligned, size);
        }
@@ -305,33 +421,47 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
 
 void IOFreeAligned(void * address, vm_size_t size)
 {
-    vm_address_t       allocationAddress;
-    vm_size_t  adjustedSize;
+    vm_address_t           allocationAddress;
+    vm_size_t              adjustedSize;
+    IOLibPageMallocHeader * hdr;
 
     if( !address)
        return;
 
     assert(size);
 
-    adjustedSize = size + sizeof(vm_size_t) + sizeof(vm_address_t);
+    adjustedSize = size + sizeofIOLibPageMallocHeader;
     if (adjustedSize >= page_size) {
-
+#if IOTRACKING
+       if (TRACK_ALLOC) IOTrackingFree(gIOMallocTracking, (uintptr_t) address, size);
+#endif
         kmem_free( kernel_map, (vm_offset_t) address, size);
 
     } else {
-       adjustedSize = *((vm_size_t *)( (vm_address_t) address
-                                - sizeof(vm_address_t) - sizeof(vm_size_t)));
-        allocationAddress = *((vm_address_t *)( (vm_address_t) address
-                               - sizeof(vm_address_t) ));
+        hdr = (typeof(hdr)) (((uintptr_t)address) - sizeofIOLibPageMallocHeader);
+       adjustedSize = hdr->allocationSize;
+        allocationAddress = hdr->allocationAddress;
 
-       if (adjustedSize >= page_size)
+#if IOTRACKING
+       if (TRACK_ALLOC)
+       {
+            if (size != hdr->tracking.size)
+           {
+               OSReportWithBacktrace("bad IOFreeAligned size 0x%lx should be 0x%lx", size, hdr->tracking.size);
+               size = hdr->tracking.size;
+           }
+           IOTrackingRemove(gIOMallocTracking, &hdr->tracking.tracking, size);
+       }
+#endif
+       if (adjustedSize >= page_size) {
            kmem_free( kernel_map, allocationAddress, adjustedSize);
-       else
-         kfree((void *)allocationAddress, adjustedSize);
+       } else {
+           kfree((void *)allocationAddress, adjustedSize);
+       }
     }
 
 #if IOALLOCDEBUG
-    debug_iomalloc_size -= size;
+    OSAddAtomic(-size, &debug_iomalloc_size);
 #endif
 
     IOStatisticsAlloc(kIOStatisticsFreeAligned, size);
@@ -342,31 +472,36 @@ void IOFreeAligned(void * address, vm_size_t size)
 void
 IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size)
 {
-    mach_vm_address_t allocationAddress;
-    mach_vm_size_t    adjustedSize;
+    mach_vm_address_t       allocationAddress;
+    mach_vm_size_t          adjustedSize;
+    IOLibPageMallocHeader * hdr;
 
     if (!address)
        return;
 
     assert(size);
 
-    adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t);
+    adjustedSize = (2 * size) + sizeofIOLibPageMallocHeader;
     if (adjustedSize >= page_size) {
-
+#if IOTRACKING
+       if (TRACK_ALLOC) IOTrackingFree(gIOMallocTracking, address, size);
+#endif
        kmem_free( kernel_map, (vm_offset_t) address, size);
 
     } else {
 
-       adjustedSize = *((mach_vm_size_t *)
-                       (address - sizeof(mach_vm_address_t) - sizeof(mach_vm_size_t)));
-       allocationAddress = *((mach_vm_address_t *)
-                       (address - sizeof(mach_vm_address_t) ));
+        hdr = (typeof(hdr)) (((uintptr_t)address) - sizeofIOLibPageMallocHeader);
+       adjustedSize = hdr->allocationSize;
+        allocationAddress = hdr->allocationAddress;
+#if IOTRACKING
+       if (TRACK_ALLOC) IOTrackingRemove(gIOMallocTracking, &hdr->tracking.tracking, size);
+#endif
        kfree((void *)allocationAddress, adjustedSize);
     }
 
     IOStatisticsAlloc(kIOStatisticsFreeContiguous, size);
 #if IOALLOCDEBUG
-    debug_iomalloc_size -= size;
+    OSAddAtomic(-size, &debug_iomalloc_size);
 #endif
 }
 
@@ -375,11 +510,12 @@ mach_vm_address_t
 IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, 
                                        mach_vm_size_t alignment, bool contiguous)
 {
-    kern_return_t      kr;
-    mach_vm_address_t  address;
-    mach_vm_address_t  allocationAddress;
-    mach_vm_size_t     adjustedSize;
-    mach_vm_address_t  alignMask;
+    kern_return_t          kr;
+    mach_vm_address_t      address;
+    mach_vm_address_t      allocationAddress;
+    mach_vm_size_t         adjustedSize;
+    mach_vm_address_t      alignMask;
+    IOLibPageMallocHeader * hdr;
 
     if (size == 0)
        return (0);
@@ -387,7 +523,8 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
         alignment = 1;
 
     alignMask = alignment - 1;
-    adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t);
+    adjustedSize = (2 * size) + sizeofIOLibPageMallocHeader;
+    if (adjustedSize < size) return (0);
 
     contiguous = (contiguous && (adjustedSize > page_size))
                    || (alignment > page_size);
@@ -416,36 +553,49 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
        if (contiguous || maxPhys)
        {
            kr = kmem_alloc_contig(kernel_map, &virt, size,
-                                  alignMask, atop(maxPhys), atop(alignMask), 0);
+                                  alignMask, atop(maxPhys), atop(alignMask), 0, IOMemoryTag(kernel_map));
        }
        else
        {
            kr = kernel_memory_allocate(kernel_map, &virt,
-                                       size, alignMask, options);
+                                       size, alignMask, options, IOMemoryTag(kernel_map));
        }
        if (KERN_SUCCESS == kr)
+       {
            address = virt;
+#if IOTRACKING
+           if (TRACK_ALLOC) IOTrackingAlloc(gIOMallocTracking, address, size);
+#endif
+       }
        else
            address = 0;
     }
     else
     {
        adjustedSize += alignMask;
-        allocationAddress = (mach_vm_address_t) kalloc(adjustedSize);
+        if (adjustedSize < size) return (0);
+        allocationAddress = (mach_vm_address_t) kalloc_tag_bt(adjustedSize, VM_KERN_MEMORY_IOKIT);
 
         if (allocationAddress) {
 
-            address = (allocationAddress + alignMask
-                    + (sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t)))
+
+            address = (allocationAddress + alignMask + sizeofIOLibPageMallocHeader)
                     & (~alignMask);
 
             if (atop_32(address) != atop_32(address + size - 1))
                 address = round_page(address);
 
-            *((mach_vm_size_t *)(address - sizeof(mach_vm_size_t)
-                            - sizeof(mach_vm_address_t))) = adjustedSize;
-            *((mach_vm_address_t *)(address - sizeof(mach_vm_address_t)))
-                            = allocationAddress;
+           hdr = (typeof(hdr))(address - sizeofIOLibPageMallocHeader);
+           hdr->allocationSize    = adjustedSize;
+           hdr->allocationAddress = allocationAddress;
+#if IOTRACKING
+           if (TRACK_ALLOC) {
+               bzero(&hdr->tracking, sizeof(hdr->tracking));
+               hdr->tracking.address = ~address;
+               hdr->tracking.size    = size;
+               IOTrackingAdd(gIOMallocTracking, &hdr->tracking.tracking, size, true);
+           }
+#endif
        } else
            address = 0;
     }
@@ -453,7 +603,7 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
     if (address) {
     IOStatisticsAlloc(kIOStatisticsMallocContiguous, size);
 #if IOALLOCDEBUG
-       debug_iomalloc_size += size;
+    OSAddAtomic(size, &debug_iomalloc_size);
 #endif
     }
 
@@ -608,7 +758,7 @@ kern_return_t IOIteratePageableMaps(vm_size_t size,
                     &min,
                     segSize,
                     TRUE,
-                    VM_FLAGS_ANYWHERE,
+                    VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IOKIT),
                     &map);
         if( KERN_SUCCESS != kr) {
             lck_mtx_unlock( gIOKitPageableSpace.lock );
@@ -631,7 +781,8 @@ kern_return_t IOIteratePageableMaps(vm_size_t size,
 struct IOMallocPageableRef
 {
     vm_offset_t address;
-    vm_size_t   size;
+    vm_size_t  size;
+    vm_tag_t    tag;
 };
 
 static kern_return_t IOMallocPageableCallback(vm_map_t map, void * _ref)
@@ -639,12 +790,12 @@ static kern_return_t IOMallocPageableCallback(vm_map_t map, void * _ref)
     struct IOMallocPageableRef * ref = (struct IOMallocPageableRef *) _ref;
     kern_return_t               kr;
 
-    kr = kmem_alloc_pageable( map, &ref->address, ref->size );
+    kr = kmem_alloc_pageable( map, &ref->address, ref->size, ref->tag );
 
     return( kr );
 }
 
-static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment)
+static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment, vm_tag_t tag)
 {
     kern_return_t             kr = kIOReturnNotReady;
     struct IOMallocPageableRef ref;
@@ -655,6 +806,7 @@ static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment)
         return( 0 );
 
     ref.size = size;
+    ref.tag  = tag;
     kr = IOIteratePageableMaps( size, &IOMallocPageableCallback, &ref );
     if( kIOReturnSuccess != kr)
         ref.address = 0;
@@ -691,19 +843,19 @@ static void IOFreePageablePages(void * address, vm_size_t size)
 
 static uintptr_t IOMallocOnePageablePage(iopa_t * a)
 {
-    return ((uintptr_t) IOMallocPageablePages(page_size, page_size));
+    return ((uintptr_t) IOMallocPageablePages(page_size, page_size, VM_KERN_MEMORY_IOKIT));
 }
 
 void * IOMallocPageable(vm_size_t size, vm_size_t alignment)
 {
     void * addr;
 
-    if (size >= (page_size - 4*gIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment);
+    if (size >= (page_size - 4*gIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment, IOMemoryTag(kernel_map));
     else                   addr = ((void * ) iopa_alloc(&gIOPageablePageAllocator, &IOMallocOnePageablePage, size, alignment));
 
     if (addr) {
 #if IOALLOCDEBUG
-       debug_iomallocpageable_size += size;
+          OSAddAtomicLong(size, &debug_iomallocpageable_size);
 #endif
        IOStatisticsAlloc(kIOStatisticsMallocPageable, size);
     }
@@ -714,7 +866,7 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment)
 void IOFreePageable(void * address, vm_size_t size)
 {
 #if IOALLOCDEBUG
-    debug_iomallocpageable_size -= size;
+       OSAddAtomicLong(-size, &debug_iomallocpageable_size);
 #endif
     IOStatisticsAlloc(kIOStatisticsFreePageable, size);
 
@@ -768,14 +920,6 @@ iopa_allocinpage(iopa_page_t * pa, uint32_t count, uint64_t align)
     return (0);
 }
 
-static uint32_t 
-log2up(uint32_t size)
-{
-    if (size <= 1) size = 0;
-    else size = 32 - __builtin_clz(size - 1);
-    return (size);
-}
-
 uintptr_t 
 iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign)
 {
@@ -798,7 +942,7 @@ iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign)
     align = align_masks[log2up((balign + gIOPageAllocChunkBytes - 1) / gIOPageAllocChunkBytes)];
 
     IOLockLock(a->lock);
-    pa = (typeof(pa)) queue_first(&a->list);
+    __IGNORE_WCASTALIGN(pa = (typeof(pa)) queue_first(&a->list));
     while (!queue_end(&a->list, &pa->link))
     {
        addr = iopa_allocinpage(pa, count, align);
@@ -807,7 +951,7 @@ iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign)
            a->bytecount += bytes;
            break;
        }
-       pa = (typeof(pa)) queue_next(&pa->link);
+       __IGNORE_WCASTALIGN(pa = (typeof(pa)) queue_next(&pa->link));
     }
     IOLockUnlock(a->lock);
 
@@ -941,6 +1085,15 @@ void IOSleep(unsigned milliseconds)
     delay_for_interval(milliseconds, kMillisecondScale);
 }
 
+/*
+ * Spin for indicated number of milliseconds, and potentially an
+ * additional number of milliseconds up to the leeway values.
+ */
+void IOSleepWithLeeway(unsigned intervalMilliseconds, unsigned leewayMilliseconds)
+{
+    delay_for_interval_with_leeway(intervalMilliseconds, leewayMilliseconds, kMillisecondScale);
+}
+
 /*
  * Spin for indicated number of microseconds.
  */
@@ -985,11 +1138,12 @@ void IOLogv(const char *format, va_list ap)
     va_copy(ap2, ap);
 
     bsd_log_lock();
-    __doprnt(format, ap, _iolog_logputc, NULL, 16);
+    __doprnt(format, ap, _iolog_logputc, NULL, 16, TRUE);
     bsd_log_unlock();
     logwakeup();
 
-    __doprnt(format, ap2, _iolog_consputc, NULL, 16);
+    __doprnt(format, ap2, _iolog_consputc, NULL, 16, TRUE);
+    va_end(ap2);
 }
 
 #if !__LP64__
@@ -1043,7 +1197,7 @@ OSString * IOCopyLogNameForPID(int pid)
 
 IOAlignment IOSizeToAlignment(unsigned int size)
 {
-    register int shift;
+    int shift;
     const int intsize = sizeof(unsigned int) * 8;
     
     for (shift = 1; shift < intsize; shift++) {
index 294e1382205fced90097059ddf53f1bfa51d9f47..8f2b35992597f371206caa88bfc37301aac0e0b8 100644 (file)
@@ -30,6 +30,7 @@
 #include <IOKit/IODMACommand.h>
 #include <libkern/c++/OSData.h>
 #include <libkern/OSDebug.h>
+#include "IOKitKernelInternal.h"
 
 __BEGIN_DECLS
 extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
@@ -38,10 +39,10 @@ __END_DECLS
 #define super IOService
 OSDefineMetaClassAndAbstractStructors(IOMapper, IOService);
 
-OSMetaClassDefineReservedUsed(IOMapper, 0);
-OSMetaClassDefineReservedUsed(IOMapper, 1);
-OSMetaClassDefineReservedUsed(IOMapper, 2);
-OSMetaClassDefineReservedUsed(IOMapper, 3);
+OSMetaClassDefineReservedUnused(IOMapper, 0);
+OSMetaClassDefineReservedUnused(IOMapper, 1);
+OSMetaClassDefineReservedUnused(IOMapper, 2);
+OSMetaClassDefineReservedUnused(IOMapper, 3);
 OSMetaClassDefineReservedUnused(IOMapper, 4);
 OSMetaClassDefineReservedUnused(IOMapper, 5);
 OSMetaClassDefineReservedUnused(IOMapper, 6);
@@ -80,6 +81,8 @@ bool IOMapper::start(IOService *provider)
     if (!initHardware(provider))
         return false;
 
+    fPageSize = getPageSize();
+
     if (fIsSystem) { 
         sMapperLock.lock();
         IOMapper::gSystem = this;
@@ -98,22 +101,8 @@ bool IOMapper::start(IOService *provider)
     return true;
 }
 
-bool IOMapper::allocTable(IOByteCount size)
-{
-    assert(!fTable);
-
-    fTableSize = size;
-    fTableHandle = NewARTTable(size, &fTable, &fTablePhys);
-    return fTableHandle != 0;
-}
-
 void IOMapper::free()
 {
-    if (fTableHandle) {
-        FreeARTTable(fTableHandle, fTableSize);
-        fTableHandle = 0;
-    }
-
     super::free();
 }
 
@@ -186,135 +175,45 @@ done:
     return (mapper);
 }
 
-ppnum_t IOMapper::iovmAllocDMACommand(IODMACommand * command, IOItemCount pageCount)
-{
-    return (0);
-}
-
-void IOMapper::iovmFreeDMACommand(IODMACommand * command,
-                                 ppnum_t addr, IOItemCount pageCount)
-{
-}
-
-ppnum_t IOMapper::iovmMapMemory(
-                         OSObject                    * memory,   // dma command or iomd
-                         ppnum_t                       offsetPage,
-                         ppnum_t                       pageCount,
-                         uint32_t                      options,
-                         upl_page_info_t             * pageList,
-                         const IODMAMapSpecification * mapSpecification)
-{
-    return (0);
-}
-
-void IOMapper::iovmInsert(ppnum_t addr, IOItemCount offset,
-                            ppnum_t *pageList, IOItemCount pageCount)
-{
-    while (pageCount--)
-        iovmInsert(addr, offset++, *pageList++);
-}
-
-void IOMapper::iovmInsert(ppnum_t addr, IOItemCount offset,
-                            upl_page_info_t *pageList, IOItemCount pageCount)
-{
-    for (IOItemCount i = 0; i < pageCount; i++)
-        iovmInsert(addr, offset + i, pageList[i].phys_addr);
-}
-
-OSData * IOMapper::
-NewARTTable(IOByteCount size, void ** virtAddrP, ppnum_t *physAddrP)
-{
-    if (!virtAddrP || !physAddrP)
-       return 0;
-
-    kern_return_t kr;
-    vm_address_t address;
-
-    size = round_page(size);
-    kr = kmem_alloc_contig(kernel_map, &address, size, PAGE_MASK, 0 /*max_pnum*/, 0 /*pnum_mask*/, false);
-    if (kr)
-        return 0;
-
-    ppnum_t pagenum = pmap_find_phys(kernel_pmap, (addr64_t) address);
-    if (pagenum)
-       *physAddrP = pagenum;
-    else {
-       FreeARTTable((OSData *) address, size);
-       address = 0;
-    }
-
-    *virtAddrP = (void *) address;
-
-    return (OSData *) address;
-}
-
-void IOMapper::FreeARTTable(OSData *artHandle, IOByteCount size)
-{
-    vm_address_t address = (vm_address_t) artHandle;
-
-    size = round_page(size);
-    kmem_free(kernel_map, address, size);      // Just panic if address is 0
-}
-
-bool IOMapper::getBypassMask(addr64_t *maskP) const
-{
-    return false;
-}
-
 __BEGIN_DECLS
 
 // These are C accessors to the system mapper for non-IOKit clients
 ppnum_t IOMapperIOVMAlloc(unsigned pages)
 {
+    IOReturn ret;
+    uint64_t dmaAddress, dmaLength;
+
     IOMapper::checkForSystemMapper();
 
+    ret = kIOReturnUnsupported;
     if (IOMapper::gSystem)
-        return IOMapper::gSystem->iovmAlloc((IOItemCount) pages);
-    else
-        return 0;
+    {
+        ret = IOMapper::gSystem->iovmMapMemory(
+                       NULL, 0, ptoa_64(pages), 
+                       (kIODMAMapReadAccess | kIODMAMapWriteAccess),
+                       NULL, NULL, NULL,
+                       &dmaAddress, &dmaLength);
+    }
+
+    if (kIOReturnSuccess == ret) return (atop_64(dmaAddress));
+    return (0);
 }
 
 void IOMapperIOVMFree(ppnum_t addr, unsigned pages)
 {
     if (IOMapper::gSystem)
-        IOMapper::gSystem->iovmFree(addr, (IOItemCount) pages);
-}
-
-ppnum_t IOMapperInsertPage(ppnum_t addr, unsigned offset, ppnum_t page)
-{
-    if (IOMapper::gSystem) {
-               if (!addr) panic("!addr");
-        IOMapper::gSystem->iovmInsert(addr, (IOItemCount) offset, page);
-        return addr + offset;
+    {
+        IOMapper::gSystem->iovmUnmapMemory(NULL, NULL, ptoa_64(addr), ptoa_64(pages));
     }
-    else
-        return page;
 }
 
-void IOMapperInsertPPNPages(ppnum_t addr, unsigned offset,
-                            ppnum_t *pageList, unsigned pageCount)
-{
-    if (!IOMapper::gSystem)
-        panic("IOMapperInsertPPNPages no system mapper");
-    else
-        assert(!((vm_address_t) IOMapper::gSystem & 3));
-
-    IOMapper::gSystem->
-        iovmInsert(addr, (IOItemCount) offset, pageList, pageCount);
-}
-
-void IOMapperInsertUPLPages(ppnum_t addr, unsigned offset,
-                            upl_page_info_t *pageList, unsigned pageCount)
+ppnum_t IOMapperInsertPage(ppnum_t addr, unsigned offset, ppnum_t page)
 {
-    if (!IOMapper::gSystem)
-        panic("IOMapperInsertUPLPages no system mapper");
-    else
-        assert(!((vm_address_t) IOMapper::gSystem & 3));
-
-    IOMapper::gSystem->iovmInsert(addr,
-                                 (IOItemCount) offset,
-                                  pageList,
-                                  (IOItemCount) pageCount);
+    if (!IOMapper::gSystem) return (page);
+    if (!addr) panic("!addr");
+    IOMapper::gSystem->iovmInsert((kIODMAMapReadAccess | kIODMAMapWriteAccess),
+                                 ptoa_64(addr), ptoa_64(offset), ptoa_64(page), ptoa_64(1));
+    return (addr + offset);
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -332,7 +231,7 @@ UInt8 IOMappedRead8(IOPhysicalAddress address)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         return (UInt8) ml_phys_read_byte_64(addr);
     }
     else
@@ -344,7 +243,7 @@ UInt16 IOMappedRead16(IOPhysicalAddress address)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         return (UInt16) ml_phys_read_half_64(addr);
     }
     else
@@ -356,7 +255,7 @@ UInt32 IOMappedRead32(IOPhysicalAddress address)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
        return (UInt32) ml_phys_read_word_64(addr);
     }
     else
@@ -368,7 +267,7 @@ UInt64 IOMappedRead64(IOPhysicalAddress address)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         return (UInt64) ml_phys_read_double_64(addr);
     }
     else
@@ -380,7 +279,7 @@ void IOMappedWrite8(IOPhysicalAddress address, UInt8 value)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         ml_phys_write_byte_64(addr, value);
     }
     else
@@ -392,7 +291,7 @@ void IOMappedWrite16(IOPhysicalAddress address, UInt16 value)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         ml_phys_write_half_64(addr, value);
     }
     else
@@ -404,7 +303,7 @@ void IOMappedWrite32(IOPhysicalAddress address, UInt32 value)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         ml_phys_write_word_64(addr, value);
     }
     else
@@ -416,7 +315,7 @@ void IOMappedWrite64(IOPhysicalAddress address, UInt64 value)
     IOMapper::checkForSystemMapper();
 
     if (IOMapper::gSystem) {
-        addr64_t addr = IOMapper::gSystem->mapAddr(address);
+        addr64_t addr = IOMapper::gSystem->mapToPhysicalAddress(address);
         ml_phys_write_double_64(addr, value);
     }
     else
index 0c77443861c05e2b02c8ac46dd5ffafc0181b486..4bd9659e79e523b4793c713f88f7e8788b5ec963 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
 
 
 #include <sys/cdefs.h>
@@ -43,9 +37,8 @@
 #include <IOKit/IODMACommand.h>
 #include <IOKit/IOKitKeysPrivate.h>
 
-#ifndef __LP64__
 #include <IOKit/IOSubMemoryDescriptor.h>
-#endif /* !__LP64__ */
+#include <IOKit/IOMultiMemoryDescriptor.h>
 
 #include <IOKit/IOKitDebug.h>
 #include <libkern/OSDebug.h>
@@ -74,16 +67,6 @@ __BEGIN_DECLS
 extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
 extern void ipc_port_release_send(ipc_port_t port);
 
-kern_return_t
-memory_object_iopl_request(
-       ipc_port_t              port,
-       memory_object_offset_t  offset,
-       vm_size_t               *upl_size,
-       upl_t                   *upl_ptr,
-       upl_page_info_array_t   user_page_list,
-       unsigned int            *page_list_count,
-       int                     *flags);
-
 // osfmk/device/iokit_rpc.c
 unsigned int IODefaultCacheBits(addr64_t pa);
 unsigned int  IOTranslateCacheBits(struct phys_entry *pp);
@@ -151,8 +134,12 @@ struct ioGMDData {
     IOMapper *  fMapper;
     uint8_t    fDMAMapNumAddressBits;
     uint64_t    fDMAMapAlignment;
-    addr64_t    fMappedBase;
-    uint64_t fPreparationID;
+    uint64_t    fMappedBase;
+    uint64_t    fMappedLength;
+    uint64_t    fPreparationID;
+#if IOTRACKING
+    IOTracking  fWireTracking;
+#endif
     unsigned int fPageCnt;
     unsigned char fDiscontig:1;
     unsigned char fCompletionError:1;
@@ -484,9 +471,12 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate(
     IOOptionBits         type = (_flags & kIOMemoryTypeMask);
     IOOptionBits         cacheMode;
     unsigned int        pagerFlags;
+    vm_tag_t             tag;
 
     ref = memoryReferenceAlloc(kCapacity, NULL);
     if (!ref) return (kIOReturnNoMemory);
+
+    tag = IOMemoryTag(kernel_map);
     entries = &ref->entries[0];
     count = 0;
 
@@ -497,6 +487,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate(
     {
         nextAddr = getPhysicalSegment(offset, &physLen, kIOMemoryMapperNone);
         nextLen = physLen;
+
        // default cache mode for physical
        if (kIODefaultCache == ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift))
        {
@@ -607,7 +598,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate(
     }
     else
     {
-       // _task == 0, physical
+        // _task == 0, physical or kIOMemoryTypeUPL
        memory_object_t pager;
         vm_size_t       size = ptoa_32(_pages);
 
@@ -667,16 +658,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate(
     return (err);
 }
 
-struct IOMemoryDescriptorMapAllocRef
-{
-    vm_map_t          map;
-    mach_vm_address_t mapped;
-    mach_vm_size_t    size;
-    vm_prot_t         prot;
-    IOOptionBits      options;
-};
-
-static kern_return_t 
+kern_return_t 
 IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref)
 {
     IOMemoryDescriptorMapAllocRef * ref = (typeof(ref))_ref;
@@ -684,12 +666,13 @@ IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref)
     vm_map_offset_t                addr;
 
     addr = ref->mapped;
+
     err = vm_map_enter_mem_object(map, &addr, ref->size,
                                  (vm_map_offset_t) 0,
                                  (((ref->options & kIOMapAnywhere)
                                    ? VM_FLAGS_ANYWHERE
                                    : VM_FLAGS_FIXED)
-                                  | VM_MAKE_TAG(VM_MEMORY_IOKIT)
+                                  | VM_MAKE_TAG(ref->tag)
                                   | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */
                                  IPC_PORT_NULL,
                                  (memory_object_offset_t) 0,
@@ -721,13 +704,14 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
     vm_map_offset_t addr, mapAddr;
     vm_map_offset_t pageOffset, entryOffset, remain, chunk;
 
-    mach_vm_address_t srcAddr, nextAddr;
-    mach_vm_size_t    srcLen, nextLen;
+    mach_vm_address_t nextAddr;
+    mach_vm_size_t    nextLen;
     IOByteCount       physLen;
     IOMemoryEntry   * entry;
     vm_prot_t         prot, memEntryCacheMode;
     IOOptionBits      type;
     IOOptionBits      cacheMode;
+    vm_tag_t          tag;
 
     /*
      * For the kIOMapPrefault option.
@@ -747,6 +731,8 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
        memEntryCacheMode = (MAP_MEM_ONLY | VM_PROT_WRITE | prot | vmProtForCacheMode(cacheMode));
     }
 
+    tag = IOMemoryTag(map);
+
     if (_task)
     {
        // Find first range for offset
@@ -788,9 +774,19 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
 
     // allocate VM
     size = round_page_64(size + pageOffset);
+    if (kIOMapOverwrite & options)
+    {
+        if ((map == kernel_map) && (kIOMemoryBufferPageable & _flags))
+        {
+            map = IOPageableMapForAddress(addr);
+        }
+        err = KERN_SUCCESS;
+    }
+    else
     {
        IOMemoryDescriptorMapAllocRef ref;
        ref.map     = map;
+       ref.tag     = tag;
        ref.options = options;
        ref.size    = size;
        ref.prot    = prot;
@@ -799,7 +795,6 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
            ref.mapped = 0;
        else
            ref.mapped = addr;
-
        if ((ref.map == kernel_map) && (kIOMemoryBufferPageable & _flags))
            err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref );
        else
@@ -815,7 +810,8 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
      * Prefaulting is only possible if we wired the memory earlier. Check the
      * memory type, and the underlying data.
      */
-    if (options & kIOMapPrefault) {
+    if (options & kIOMapPrefault)
+    {
         /*
          * The memory must have been wired by calling ::prepare(), otherwise
          * we don't have the UPL. Without UPLs, pages cannot be pre-faulted
@@ -829,7 +825,7 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
         {
             return kIOReturnBadArgument;
         }
-        
+
         // Get the page list.
         ioGMDData* dataP = getDataP(_memoryEntries);
         ioPLBlock const* ioplList = getIOPLList(dataP);
@@ -871,22 +867,9 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
     remain  = size;
     mapAddr = addr;
     addr    += pageOffset;
-    while (remain && nextLen && (KERN_SUCCESS == err))
-    {
-       srcAddr  = nextAddr;
-       srcLen   = nextLen;
-       nextAddr = 0;
-       nextLen  = 0;
-       // coalesce addr range
-       for (++rangeIdx; rangeIdx < _rangesCount; rangeIdx++)
-       {
-           getAddrLenForInd(nextAddr, nextLen, type, _ranges, rangeIdx);
-           if ((srcAddr + srcLen) != nextAddr) break;
-           srcLen += nextLen;
-       }
 
-        while (srcLen && (KERN_SUCCESS == err))
-        {
+    while (remain && (KERN_SUCCESS == err))
+    {
             entryOffset = offset - entry->offset;
             if ((page_mask & entryOffset) != pageOffset) 
             {
@@ -908,15 +891,15 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
             if (chunk)
             {
                 if (chunk > remain) chunk = remain;
-
-                if (options & kIOMapPrefault) {
+               if (options & kIOMapPrefault) 
+               {
                     UInt nb_pages = round_page(chunk) / PAGE_SIZE;
                     err = vm_map_enter_mem_object_prefault(map,
                                                            &mapAddr,
                                                            chunk, 0 /* mask */, 
                                                             (VM_FLAGS_FIXED
                                                            | VM_FLAGS_OVERWRITE
-                                                           | VM_MAKE_TAG(VM_MEMORY_IOKIT)
+                                                           | VM_MAKE_TAG(tag)
                                                            | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */
                                                            entry->entry,
                                                            entryOffset,
@@ -928,13 +911,15 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
                     // Compute the next index in the page list.
                     currentPageIndex += nb_pages;
                     assert(currentPageIndex <= _pages);
-                } else {
+               } 
+               else 
+               {
                     err = vm_map_enter_mem_object(map,
                                                   &mapAddr,
                                                   chunk, 0 /* mask */, 
                                                    (VM_FLAGS_FIXED
                                                   | VM_FLAGS_OVERWRITE
-                                                  | VM_MAKE_TAG(VM_MEMORY_IOKIT)
+                                                  | VM_MAKE_TAG(tag)
                                                   | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */
                                                   entry->entry,
                                                   entryOffset,
@@ -943,7 +928,6 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
                                                   prot, // max
                                                   VM_INHERIT_NONE);
                 }
-
                 if (KERN_SUCCESS != err) break;
                 remain -= chunk;
                 if (!remain) break;
@@ -959,9 +943,8 @@ IOGeneralMemoryDescriptor::memoryReferenceMap(
                 break;
             }
         }
-    }
 
-    if ((KERN_SUCCESS != err) && addr)
+    if ((KERN_SUCCESS != err) && addr && !(kIOMapOverwrite & options))
     {
         (void) mach_vm_deallocate(map, trunc_page_64(addr), size);
         addr = 0;
@@ -1188,7 +1171,7 @@ IOMemoryDescriptor::withSubRange(IOMemoryDescriptor *     of,
                                IOByteCount             length,
                                IODirection             direction)
 {
-    return (IOSubMemoryDescriptor::withSubRange(of, offset, length, direction | kIOMemoryThreadSafe));
+    return (IOSubMemoryDescriptor::withSubRange(of, offset, length, direction));
 }
 #endif /* !__LP64__ */
 
@@ -1660,7 +1643,7 @@ void IOGeneralMemoryDescriptor::free()
        ioGMDData * dataP;
        if (_memoryEntries && (dataP = getDataP(_memoryEntries)) && dataP->fMappedBase)
        {
-           dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages);
+           dataP->fMapper->iovmUnmapMemory(this, NULL, dataP->fMappedBase, dataP->fMappedLength);
            dataP->fMappedBase = 0;
        }
     }
@@ -1952,31 +1935,35 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *
 
        if (_memoryEntries && data->fMapper)
        {
-           bool remap;
-           bool whole = ((data->fOffset == 0) && (data->fLength == _length));
+           bool remap, keepMap;
            dataP = getDataP(_memoryEntries);
 
            if (data->fMapSpec.numAddressBits < dataP->fDMAMapNumAddressBits) dataP->fDMAMapNumAddressBits = data->fMapSpec.numAddressBits;
            if (data->fMapSpec.alignment      > dataP->fDMAMapAlignment)      dataP->fDMAMapAlignment      = data->fMapSpec.alignment;
 
-           remap = (dataP->fDMAMapNumAddressBits < 64)
-                && ((dataP->fMappedBase + _length) > (1ULL << dataP->fDMAMapNumAddressBits));
+           keepMap = (data->fMapper == gIOSystemMapper);
+           keepMap &= ((data->fOffset == 0) && (data->fLength == _length));
+
+           remap = (!keepMap);
+           remap |= (dataP->fDMAMapNumAddressBits < 64)
+                 && ((dataP->fMappedBase + _length) > (1ULL << dataP->fDMAMapNumAddressBits));
            remap |= (dataP->fDMAMapAlignment > page_size);
-           remap |= (!whole);
+
            if (remap || !dataP->fMappedBase)
            {
 //             if (dataP->fMappedBase) OSReportWithBacktrace("kIOMDDMAMap whole %d remap %d params %d\n", whole, remap, params);
-               err = md->dmaMap(data->fMapper, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocCount);
-               if ((kIOReturnSuccess == err) && whole && !dataP->fMappedBase)
+               err = md->dmaMap(data->fMapper, data->fCommand, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocLength);
+               if (keepMap && (kIOReturnSuccess == err) && !dataP->fMappedBase)
                {
-                   dataP->fMappedBase = data->fAlloc;
-                   data->fAllocCount = 0;                      // IOMD owns the alloc now
+                   dataP->fMappedBase   = data->fAlloc;
+                   dataP->fMappedLength = data->fAllocLength;
+                   data->fAllocLength   = 0;                   // IOMD owns the alloc now
                }
            }
            else
            {
                data->fAlloc = dataP->fMappedBase;
-               data->fAllocCount = 0;                          // IOMD owns the alloc
+               data->fAllocLength = 0;                         // give out IOMD map
            }
            data->fMapContig = !dataP->fDiscontig;
        }
@@ -2077,7 +2064,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *
            bzero(&mapSpec, sizeof(mapSpec));
            mapSpec.numAddressBits = dataP->fDMAMapNumAddressBits;
            mapSpec.alignment = dataP->fDMAMapAlignment;
-           err = md->dmaMap(dataP->fMapper, &mapSpec, 0, _length, &dataP->fMappedBase, NULL);
+           err = md->dmaMap(dataP->fMapper, NULL, &mapSpec, 0, _length, &dataP->fMappedBase, &dataP->fMappedLength);
            if (kIOReturnSuccess != err) return (err);
        }
     }
@@ -2350,10 +2337,10 @@ IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *l
                addr64_t    origAddr = address;
                IOByteCount origLen  = length;
 
-               address = mapper->mapAddr(origAddr);
+               address = mapper->mapToPhysicalAddress(origAddr);
                length = page_size - (address & (page_size - 1));
                while ((length < origLen)
-                   && ((address + length) == mapper->mapAddr(origAddr + length)))
+                   && ((address + length) == mapper->mapToPhysicalAddress(origAddr + length)))
                    length += page_size;
                if (length > origLen)
                    length = origLen;
@@ -2437,11 +2424,11 @@ IOMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *length
     {
        IOByteCount origLen;
 
-       phys64 = mapper->mapAddr(phys32);
+       phys64 = mapper->mapToPhysicalAddress(phys32);
        origLen = *lengthOfSegment;
        length = page_size - (phys64 & (page_size - 1));
        while ((length < origLen)
-           && ((phys64 + length) == mapper->mapAddr(phys32 + length)))
+           && ((phys64 + length) == mapper->mapToPhysicalAddress(phys32 + length)))
            length += page_size;
        if (length > origLen)
            length = origLen;
@@ -2522,7 +2509,7 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data
        if (params) panic("class %s does not support IODMACommand::kIterateOnly", getMetaClass()->getClassName());
 
        data->fMapContig = true;
-       err = md->dmaMap(data->fMapper, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocCount);
+       err = md->dmaMap(data->fMapper, data->fCommand, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocLength);
        return (err);                           
     }
     else return kIOReturnBadArgument;
@@ -2607,10 +2594,23 @@ IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState,
 IOReturn IOMemoryDescriptor::getPageCounts( IOByteCount * residentPageCount,
                                            IOByteCount * dirtyPageCount )
 {
-   IOReturn err = kIOReturnNotReady;
+    IOReturn err = kIOReturnNotReady;
 
     if (kIOMemoryThreadSafe & _flags) LOCK;
     if (_memRef) err = IOGeneralMemoryDescriptor::memoryReferenceGetPageCounts(_memRef, residentPageCount, dirtyPageCount);
+    else
+    {
+       IOMultiMemoryDescriptor * mmd;
+       IOSubMemoryDescriptor   * smd;
+       if ((smd = OSDynamicCast(IOSubMemoryDescriptor, this)))
+       {
+           err = smd->getPageCounts(residentPageCount, dirtyPageCount);
+       }
+       else if ((mmd = OSDynamicCast(IOMultiMemoryDescriptor, this)))
+       {
+           err = mmd->getPageCounts(residentPageCount, dirtyPageCount);
+       }
+    }
     if (kIOMemoryThreadSafe & _flags) UNLOCK;
 
     return (err);
@@ -2703,8 +2703,10 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options,
 }
 
 #if defined(__i386__) || defined(__x86_64__)
-extern vm_offset_t             first_avail;
-#define io_kernel_static_end   first_avail
+
+#define io_kernel_static_start vm_kernel_stext
+#define io_kernel_static_end   vm_kernel_etext
+
 #else
 #error io_kernel_static_end is undefined for this architecture
 #endif
@@ -2713,7 +2715,7 @@ static kern_return_t
 io_get_kernel_static_upl(
        vm_map_t                /* map */,
        uintptr_t               offset,
-       vm_size_t               *upl_size,
+       upl_size_t              *upl_size,
        upl_t                   *upl,
        upl_page_info_array_t   page_list,
        unsigned int            *count,
@@ -2762,7 +2764,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
     if ((kIODirectionOutIn & forDirection) == kIODirectionNone)
         forDirection = (IODirection) (forDirection | getDirection());
 
-    int uplFlags;    // This Mem Desc's default flags for upl creation
+    upl_control_flags_t uplFlags;    // This Mem Desc's default flags for upl creation
     switch (kIODirectionOutIn & forDirection)
     {
     case kIODirectionOut:
@@ -2793,6 +2795,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
     dataP->fMappedBase = 0;
 
     uplFlags |= UPL_SET_IO_WIRE | UPL_SET_LITE;
+    uplFlags |= UPL_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map));
+
     if (kIODirectionPrepareToPhys32 & forDirection) 
     {
        if (!mapper) uplFlags |= UPL_NEED_32BIT_ADDR;
@@ -2857,15 +2861,18 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
                theMap = IOPageableMapForAddress(kernelStart);
            }
 
-            int ioplFlags = uplFlags;
+           // ioplFlags is an in/out parameter
+            upl_control_flags_t ioplFlags = uplFlags;
            dataP = getDataP(_memoryEntries);
            pageInfo = getPageList(dataP);
             upl_page_list_ptr_t baseInfo = &pageInfo[pageIndex];
 
-            vm_size_t ioplSize = round_page(numBytes);
+            upl_size_t ioplSize = round_page(numBytes);
             unsigned int numPageInfo = atop_32(ioplSize);
 
-           if ((theMap == kernel_map) && (kernelStart < io_kernel_static_end)) {
+           if ((theMap == kernel_map) 
+            && (kernelStart >= io_kernel_static_start) 
+            && (kernelStart <  io_kernel_static_end)) {
                error = io_get_kernel_static_upl(theMap, 
                                                kernelStart,
                                                &ioplSize,
@@ -2877,7 +2884,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
            else if (_memRef) {
                memory_object_offset_t entryOffset;
 
-               entryOffset = (mdOffset - iopl.fPageOffset - memRefEntry->offset);
+                entryOffset = mdOffset;
+                entryOffset = (entryOffset - iopl.fPageOffset - memRefEntry->offset);
                if (entryOffset >= memRefEntry->size) {
                    memRefEntry++;
                    if (memRefEntry >= &_memRef->entries[_memRef->count]) panic("memRefEntry");
@@ -2968,6 +2976,16 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
 
     if (UPL_COPYOUT_FROM & uplFlags) _flags |= kIOMemoryPreparedReadOnly;
 
+    if ((kIOTracking & gIOKitDebug) 
+     //&& !(_flags & kIOMemoryAutoPrepare)
+     )
+    {
+        dataP = getDataP(_memoryEntries);
+#if IOTRACKING
+        IOTrackingAdd(gIOWireTracking, &dataP->fWireTracking, ptoa(_pages), false);
+#endif
+    }
+
     return kIOReturnSuccess;
 
 abortExit:
@@ -3028,106 +3046,52 @@ bool IOGeneralMemoryDescriptor::initMemoryEntries(size_t size, IOMapper * mapper
 
 IOReturn IOMemoryDescriptor::dmaMap(
     IOMapper                    * mapper,
+    IODMACommand                * command,
     const IODMAMapSpecification * mapSpec,
     uint64_t                      offset,
     uint64_t                      length,
-    uint64_t                    * address,
-    ppnum_t                     * mapPages)
+    uint64_t                    * mapAddress,
+    uint64_t                    * mapLength)
 {
-    IOMDDMAWalkSegmentState  walkState;
-    IOMDDMAWalkSegmentArgs * walkArgs = (IOMDDMAWalkSegmentArgs *) (void *)&walkState;
-    IOOptionBits             mdOp;
-    IOReturn                 ret;
-    IOPhysicalLength         segLen;
-    addr64_t                 phys, align, pageOffset;
-    ppnum_t                  base, pageIndex, pageCount;
-    uint64_t                 index;
-    uint32_t                 mapOptions = 0;
+    IOReturn ret;
+    uint32_t mapOptions;
 
+    mapOptions = 0;
+    mapOptions |= kIODMAMapReadAccess;
     if (!(kIOMemoryPreparedReadOnly & _flags)) mapOptions |= kIODMAMapWriteAccess;
 
-    walkArgs->fMapped = false;
-    mdOp = kIOMDFirstSegment;
-    pageCount = 0;
-    for (index = 0; index < length; )
-    {
-       if (index && (page_mask & (index + pageOffset))) break;
-
-       walkArgs->fOffset = offset + index;
-       ret = dmaCommandOperation(mdOp, &walkState, sizeof(walkState));
-       mdOp = kIOMDWalkSegments;
-       if (ret != kIOReturnSuccess) break;
-       phys = walkArgs->fIOVMAddr;
-       segLen = walkArgs->fLength;
-
-       align = (phys & page_mask);
-       if (!index) pageOffset = align;
-       else if (align) break;
-       pageCount += atop_64(round_page_64(align + segLen));
-       index += segLen;
-    }
-
-    if (index < length) return (kIOReturnVMError);
+    ret = mapper->iovmMapMemory(this, offset, length, mapOptions, 
+                               mapSpec, command, NULL, mapAddress, mapLength);
 
-    base = mapper->iovmMapMemory(this, offset, pageCount, 
-                                mapOptions, NULL, mapSpec);
-
-    if (!base) return (kIOReturnNoResources);
-
-    mdOp = kIOMDFirstSegment;
-    for (pageIndex = 0, index = 0; index < length; )
-    {
-       walkArgs->fOffset = offset + index;
-       ret = dmaCommandOperation(mdOp, &walkState, sizeof(walkState));
-       mdOp = kIOMDWalkSegments;
-       if (ret != kIOReturnSuccess) break;
-       phys = walkArgs->fIOVMAddr;
-       segLen = walkArgs->fLength;
-
-       ppnum_t page = atop_64(phys);
-       ppnum_t count = atop_64(round_page_64(phys + segLen)) - page;
-       while (count--)
-       {
-           mapper->iovmInsert(base, pageIndex, page);
-           page++;
-           pageIndex++;
-       }
-       index += segLen;
-    }
-    if (pageIndex != pageCount) panic("pageIndex");
-
-    *address = ptoa_64(base) + pageOffset;
-    if (mapPages) *mapPages = pageCount;
-
-    return (kIOReturnSuccess);
+    return (ret);
 }
 
 IOReturn IOGeneralMemoryDescriptor::dmaMap(
     IOMapper                    * mapper,
+    IODMACommand                * command,
     const IODMAMapSpecification * mapSpec,
     uint64_t                      offset,
     uint64_t                      length,
-    uint64_t                    * address,
-    ppnum_t                     * mapPages)
+    uint64_t                    * mapAddress,
+    uint64_t                    * mapLength)
 {
     IOReturn          err = kIOReturnSuccess;
     ioGMDData *       dataP;
     IOOptionBits      type = _flags & kIOMemoryTypeMask;
 
-    *address = 0;
+    *mapAddress = 0;
     if (kIOMemoryHostOnly & _flags) return (kIOReturnSuccess);
 
     if ((type == kIOMemoryTypePhysical) || (type == kIOMemoryTypePhysical64)
      || offset || (length != _length))
     {
-       err = super::dmaMap(mapper, mapSpec, offset, length, address, mapPages);
+       err = super::dmaMap(mapper, command, mapSpec, offset, length, mapAddress, mapLength);
     }
     else if (_memoryEntries && _pages && (dataP = getDataP(_memoryEntries)))
     {
        const ioPLBlock * ioplList = getIOPLList(dataP);
        upl_page_info_t * pageList;
        uint32_t          mapOptions = 0;
-       ppnum_t           base;
 
        IODMAMapSpecification mapSpec;
        bzero(&mapSpec, sizeof(mapSpec));
@@ -3141,18 +3105,27 @@ IOReturn IOGeneralMemoryDescriptor::dmaMap(
            pageList = (upl_page_info_t *) ioplList->fPageInfo;
            mapOptions |= kIODMAMapPagingPath;
        }
-       else
-           pageList = getPageList(dataP);
+       else pageList = getPageList(dataP);
 
-    if (!(kIOMemoryPreparedReadOnly & _flags)) mapOptions |= kIODMAMapWriteAccess;
+       if ((_length == ptoa_64(_pages)) && !(page_mask & ioplList->fPageOffset))
+       {
+           mapOptions |= kIODMAMapPageListFullyOccupied;
+       }
+
+       mapOptions |= kIODMAMapReadAccess;
+       if (!(kIOMemoryPreparedReadOnly & _flags)) mapOptions |= kIODMAMapWriteAccess;
 
        // Check for direct device non-paged memory
        if (ioplList->fFlags & kIOPLOnDevice) mapOptions |= kIODMAMapPhysicallyContiguous;
 
-       base = mapper->iovmMapMemory(
-                       this, offset, _pages, mapOptions, &pageList[0], &mapSpec);
-       *address = ptoa_64(base) + (ioplList->fPageOffset & PAGE_MASK);
-       if (mapPages) *mapPages = _pages;
+       IODMAMapPageList dmaPageList =
+       {
+               .pageOffset    = ioplList->fPageOffset & page_mask,
+               .pageListCount = _pages,
+               .pageList      = &pageList[0]
+       };
+       err = mapper->iovmMapMemory(this, offset, length, mapOptions, &mapSpec, 
+                                   command, &dmaPageList, mapAddress, mapLength);
     }
 
     return (err);
@@ -3260,11 +3233,19 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection forDirection)
 #endif /* IOMD_DEBUG_DMAACTIVE */
 
                if (dataP->fMappedBase) {
-                   dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages);
+                   dataP->fMapper->iovmUnmapMemory(this, NULL, dataP->fMappedBase, dataP->fMappedLength);
                    dataP->fMappedBase = 0;
                }
                // Only complete iopls that we created which are for TypeVirtual
                if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) {
+#if IOTRACKING
+                   if ((kIOTracking & gIOKitDebug) 
+                    //&& !(_flags & kIOMemoryAutoPrepare)
+                    )
+                   {
+                       IOTrackingRemove(gIOWireTracking, &dataP->fWireTracking, ptoa(_pages));
+                   }
+#endif
                    for (ind = 0; ind < count; ind++)
                        if (ioplList[ind].fIOPL) {
                            if (dataP->fCompletionError) 
@@ -3296,7 +3277,6 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
        IOOptionBits            options,
        IOByteCount             __offset,
        IOByteCount             __length )
-
 {
 #ifndef __LP64__
     if (!(kIOMap64Bit & options)) panic("IOGeneralMemoryDescriptor::doMap !64bit");
@@ -3361,10 +3341,10 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
     {
         do
        {
-           upl_t          redirUPL2;
-           vm_size_t      size;
-           int            flags;
-           unsigned int   lock_count;
+           upl_t               redirUPL2;
+           upl_size_t          size;
+           upl_control_flags_t flags;
+           unsigned int        lock_count;
 
            if (!_memRef || (1 != _memRef->count))
            {
@@ -3374,7 +3354,8 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
 
            size = round_page(mapping->fLength);
            flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL 
-                       | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS;
+                       | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS
+                       | UPL_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map));
 
            if (KERN_SUCCESS != memory_object_iopl_request(_memRef->entries[0].entry, 0, &size, &redirUPL2,
                                            NULL, NULL,
@@ -3420,14 +3401,14 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
     else
     {
        err = memoryReferenceMap(_memRef, mapping->fAddressMap, offset, length, options, &mapping->fAddress);
-
+#if IOTRACKING
+        if (err == KERN_SUCCESS) IOTrackingAdd(gIOMapTracking, &mapping->fTracking, length, false);
+#endif
        if ((err == KERN_SUCCESS) && pager)
        {
            err = populateDevicePager(pager, mapping->fAddressMap, mapping->fAddress, offset, length, options);
-           if (err != KERN_SUCCESS)
-           {
-               doUnmap(mapping->fAddressMap, (IOVirtualAddress) mapping, 0);
-           }
+
+           if (err != KERN_SUCCESS) doUnmap(mapping->fAddressMap, (IOVirtualAddress) mapping, 0);
            else if (kIOMapDefaultCache == (options & kIOMapCacheMask))
            {
                mapping->fOptions |= ((_flags & kIOMemoryBufferCacheMask) >> kIOMemoryBufferCacheShift);
@@ -3561,8 +3542,17 @@ IOReturn IOMemoryDescriptor::populateDevicePager(
     mach_vm_size_t     page;
     mach_vm_size_t     pageOffset;
     mach_vm_size_t     pagerOffset;
-    IOPhysicalLength   segLen;
+    IOPhysicalLength   segLen, chunk;
     addr64_t           physAddr;
+    IOOptionBits        type;
+
+    type = _flags & kIOMemoryTypeMask;
+
+    if (reserved->dp.pagerContig)
+    {
+        sourceOffset = 0;
+        pagerOffset  = 0;
+    }
 
     physAddr = getPhysicalSegment( sourceOffset, &segLen, kIOMemoryMapperNone );
     assert( physAddr );
@@ -3583,26 +3573,24 @@ IOReturn IOMemoryDescriptor::populateDevicePager(
 
        if (kIOReturnSuccess != err) break;
 
-       if (reserved && reserved->dp.pagerContig)
+#if DEBUG || DEVELOPMENT
+        if ((kIOMemoryTypeUPL != type) 
+            && pmap_has_managed_page(atop_64(physAddr), atop_64(physAddr + segLen - 1))) 
        {
-           IOPhysicalLength    allLen;
-           addr64_t            allPhys;
+            OSReportWithBacktrace("IOMemoryDescriptor physical with managed page 0x%qx:0x%qx", physAddr, segLen);
+        }
+#endif /* DEBUG || DEVELOPMENT */
+
+        chunk = (reserved->dp.pagerContig ? round_page(segLen) : page_size);
+        for (page = 0;
+            (page < segLen) && (KERN_SUCCESS == err);
+            page += chunk)
+        {
+            err = device_pager_populate_object(pager, pagerOffset,
+                (ppnum_t)(atop_64(physAddr + page)), chunk);
+            pagerOffset += chunk;
+        }
 
-           allPhys = getPhysicalSegment( 0, &allLen, kIOMemoryMapperNone );
-           assert( allPhys );
-           err = device_pager_populate_object( pager, 0, atop_64(allPhys), round_page(allLen) );
-       }
-       else
-       {
-           for( page = 0;
-                (page < segLen) && (KERN_SUCCESS == err);
-                page += page_size)
-           {
-               err = device_pager_populate_object(pager, pagerOffset,
-                       (ppnum_t)(atop_64(physAddr + page)), page_size);
-               pagerOffset += page_size;
-           }
-       }
        assert (KERN_SUCCESS == err);
        if (err) break;
 
@@ -3637,31 +3625,32 @@ IOReturn IOMemoryDescriptor::doUnmap(
        IOByteCount             __length )
 {
     IOReturn         err;
+    IOMemoryMap *     mapping;
     mach_vm_address_t address;
     mach_vm_size_t    length;
 
-    if (__length)
-    {
-       address = __address;
-       length  = __length;
-    }
-    else
-    {
-       addressMap = ((IOMemoryMap *) __address)->fAddressMap;
-       address    = ((IOMemoryMap *) __address)->fAddress;
-       length     = ((IOMemoryMap *) __address)->fLength;
-    }
+    if (__length) panic("doUnmap");
 
-    if ((addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags))
-       addressMap = IOPageableMapForAddress( address );
+    mapping = (IOMemoryMap *) __address;
+    addressMap = mapping->fAddressMap;
+    address    = mapping->fAddress;
+    length     = mapping->fLength;
 
+    if (kIOMapOverwrite & mapping->fOptions) err = KERN_SUCCESS;
+    else
+    {
+        if ((addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags))
+            addressMap = IOPageableMapForAddress( address );
 #if DEBUG
-    if( kIOLogMapping & gIOKitDebug)
-       IOLog("IOMemoryDescriptor::doUnmap map %p, 0x%qx:0x%qx\n",
-               addressMap, address, length );
+        if( kIOLogMapping & gIOKitDebug) IOLog("IOMemoryDescriptor::doUnmap map %p, 0x%qx:0x%qx\n",
+                                               addressMap, address, length );
 #endif
+        err = mach_vm_deallocate( addressMap, address, length );
+    }
 
-    err = mach_vm_deallocate( addressMap, address, length );
+#if IOTRACKING
+    IOTrackingRemove(gIOMapTracking, &mapping->fTracking, length);
+#endif
 
     return (err);
 }
@@ -3774,7 +3763,7 @@ IOReturn IOMemoryMap::unmap( void )
     LOCK;
 
     if( fAddress && fAddressMap && (0 == fSuperMap) && fMemory
-       && (0 == (fOptions & kIOMapStatic))) {
+        && (0 == (kIOMapStatic & fOptions))) {
 
         err = fMemory->doUnmap(fAddressMap, (IOVirtualAddress) this, 0);
 
@@ -3797,8 +3786,11 @@ IOReturn IOMemoryMap::unmap( void )
 void IOMemoryMap::taskDied( void )
 {
     LOCK;
-    if (fUserClientUnmap)
-       unmap();
+    if (fUserClientUnmap) unmap();
+#if IOTRACKING
+    else                  IOTrackingRemove(gIOMapTracking, &fTracking, fLength);
+#endif
+
     if( fAddressMap) {
         vm_map_deallocate(fAddressMap);
         fAddressMap = 0;
@@ -3958,10 +3950,13 @@ IOReturn IOMemoryMap::wireRange(
     IOReturn kr;
     mach_vm_address_t start = trunc_page_64(fAddress + offset);
     mach_vm_address_t end   = round_page_64(fAddress + offset + length);
-    
-    if (kIODirectionOutIn & options)
+    vm_prot_t prot;
+
+    prot = (kIODirectionOutIn & options);
+    if (prot)
     {
-       kr = vm_map_wire(fAddressMap, start, end, (kIODirectionOutIn & options), FALSE);
+       prot |= VM_PROT_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map));
+       kr = vm_map_wire(fAddressMap, start, end, prot, FALSE);
     }
     else
     {
@@ -4009,9 +4004,13 @@ void IOMemoryDescriptor::initialize( void )
 
 void IOMemoryDescriptor::free( void )
 {
-    if( _mappings)
-       _mappings->release();
+    if( _mappings) _mappings->release();
 
+    if (reserved)
+    {
+       IODelete(reserved, IOMemoryDescriptorReserved, 1);
+       reserved = NULL;
+    }
     super::free();
 }
 
@@ -4117,9 +4116,10 @@ IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory,
 
        if (!fRedirUPL && fMemory->_memRef && (1 == fMemory->_memRef->count))
        {
-           vm_size_t size = round_page(fLength);
-           int flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL 
-                       | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS;
+           upl_size_t          size = round_page(fLength);
+           upl_control_flags_t flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL 
+                                       | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS
+                                       | UPL_MEMORY_TAG_MAKE(IOMemoryTag(kernel_map));
            if (KERN_SUCCESS != memory_object_iopl_request(fMemory->_memRef->entries[0].entry, 0, &size, &fRedirUPL,
                                            NULL, NULL,
                                            &flags))
@@ -4451,144 +4451,6 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-#if DEVELOPMENT || DEBUG
-
-extern "C" void IOMemoryDescriptorTest(int x)
-{
-    IOGeneralMemoryDescriptor * md;
-
-    vm_offset_t data[2];
-    vm_size_t  bsize = 16*1024*1024;
-
-    vm_size_t  srcsize, srcoffset, mapoffset, size;
-    
-    kern_return_t kr;
-
-    kr = vm_allocate(kernel_map, &data[0], bsize, VM_FLAGS_ANYWHERE);
-    vm_inherit(kernel_map, data[0] + 1*4096, 4096, VM_INHERIT_NONE);
-    vm_inherit(kernel_map, data[0] + 16*4096, 4096, VM_INHERIT_NONE);
-
-    kprintf("data 0x%lx, 0x%lx\n", (long)data[0], (long)data[1]);
-
-    uint32_t idx, offidx;
-    for (idx = 0; idx < (bsize / sizeof(uint32_t)); idx++)
-    {
-       ((uint32_t*)data[0])[idx] = idx;    
-    }
-
-    for (srcoffset = 0; srcoffset < bsize; srcoffset = ((srcoffset << 1) + 0x40c))
-    {
-       for (srcsize = 4; srcsize < (bsize - srcoffset - 1); srcsize = ((srcsize << 1) + 0x3fc))
-       {
-           IOAddressRange ranges[3];
-           uint32_t rangeCount = 1;
-
-           bzero(&ranges[0], sizeof(ranges));
-           ranges[0].address = data[0] + srcoffset;
-           ranges[0].length  = srcsize;
-
-           if (srcsize > 5*page_size)
-           {
-               ranges[0].length  = 7634;
-               ranges[1].length  = 9870;
-               ranges[2].length  = srcsize - ranges[0].length - ranges[1].length;
-               ranges[1].address = ranges[0].address + ranges[0].length;
-               ranges[2].address = ranges[1].address + ranges[1].length;
-               rangeCount = 3;     
-           }
-           else if ((srcsize > 2*page_size) && !(page_mask & srcoffset))
-           {
-               ranges[0].length  = 4096;
-               ranges[1].length  = 4096;
-               ranges[2].length  = srcsize - ranges[0].length - ranges[1].length;
-               ranges[0].address = data[0] + srcoffset + 4096;
-               ranges[1].address = data[0] + srcoffset;
-               ranges[2].address = ranges[0].address + ranges[0].length;
-               rangeCount = 3;     
-           }
-
-           md = OSDynamicCast(IOGeneralMemoryDescriptor, 
-               IOMemoryDescriptor::withAddressRanges(&ranges[0], rangeCount, kIODirectionInOut, kernel_task));
-           assert(md);
-
-           kprintf("IOMemoryReferenceCreate [0x%lx @ 0x%lx]\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx]\n", 
-                   (long) srcsize, (long) srcoffset,
-                   (long long) ranges[0].address - data[0], (long long) ranges[0].length,
-                   (long long) ranges[1].address - data[0], (long long) ranges[1].length,
-                   (long long) ranges[2].address - data[0], (long long) ranges[2].length);
-
-           if (kIOReturnSuccess == kr)
-           {
-               for (mapoffset = 0; mapoffset < srcsize; mapoffset = ((mapoffset << 1) + 0xf00))
-               {
-                   for (size = 4; size < (srcsize - mapoffset - 1); size = ((size << 1) + 0x20))
-                   {
-                       IOMemoryMap     * map;
-                       mach_vm_address_t addr = 0;
-                       uint32_t          data;
-
-                       kprintf("<mapRef [0x%lx @ 0x%lx]\n", (long) size, (long) mapoffset);
-
-                       map = md->createMappingInTask(kernel_task, 0, kIOMapAnywhere, mapoffset, size);
-                       if (map) addr = map->getAddress();
-                       else kr = kIOReturnError;
-
-                       kprintf(">mapRef 0x%x %llx\n", kr, addr);
-
-                       if (kIOReturnSuccess != kr) break;
-                       kr = md->prepare();
-                       if (kIOReturnSuccess != kr)
-                       {
-                           kprintf("prepare() fail 0x%x\n", kr);
-                           break;
-                       }
-                       for (idx = 0; idx < size; idx += sizeof(uint32_t))
-                       {
-                           offidx = (idx + mapoffset + srcoffset);
-                           if ((srcsize <= 5*page_size) && (srcsize > 2*page_size) && !(page_mask & srcoffset))
-                           {
-                               if (offidx < 8192) offidx ^= 0x1000;
-                           }
-                           offidx /= sizeof(uint32_t);
-
-                           if (offidx != ((uint32_t*)addr)[idx/sizeof(uint32_t)]) 
-                           {
-                               kprintf("vm mismatch @ 0x%x, 0x%lx, 0x%lx, \n", idx, (long) srcoffset, (long) mapoffset);
-                               kr = kIOReturnBadMedia;
-                           }
-                           else
-                           {
-                               if (sizeof(data) != md->readBytes(mapoffset + idx, &data, sizeof(data))) data = 0;
-                               if (offidx != data) 
-                               {
-                                   kprintf("phys mismatch @ 0x%x, 0x%lx, 0x%lx, \n", idx, (long) srcoffset, (long) mapoffset);
-                                   kr = kIOReturnBadMedia;
-                               }
-                           }
-                       }
-                       md->complete();
-                       map->release();
-                       kprintf("unmapRef %llx\n", addr);
-                   }
-                   if (kIOReturnSuccess != kr) break;
-               }
-           }
-            if (kIOReturnSuccess != kr) break;
-       }
-       if (kIOReturnSuccess != kr) break;
-    }
-
-    if (kIOReturnSuccess != kr) kprintf("FAIL: src 0x%lx @ 0x%lx, map 0x%lx @ 0x%lx\n", 
-                                       (long) srcsize, (long) srcoffset, (long) size, (long) mapoffset);
-
-    vm_deallocate(kernel_map, data[0], bsize);
-//    vm_deallocate(kernel_map, data[1], size);
-}
-
-#endif  /* DEVELOPMENT || DEBUG */
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
 OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 0);
 #ifdef __LP64__
 OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 1);
index 6d209ab1f9c878f2b81e4d90daf632a2dac880c3..fd17233c14897bed9df9003ae22761d62684bab6 100644 (file)
@@ -67,6 +67,8 @@ bool IOMultiMemoryDescriptor::initWithDescriptors(
                                   IODirection           withDirection,
                                   bool                  asReference )
 {
+    unsigned index;
+    IOOptionBits copyFlags;
     //
     // Initialize an IOMultiMemoryDescriptor. The "buffer" is made up of several
     // memory descriptors, that are to be chained end-to-end to make up a single
@@ -117,7 +119,7 @@ bool IOMultiMemoryDescriptor::initWithDescriptors(
                /* bytes */ withCount * sizeof(IOMemoryDescriptor *) );
     }
 
-    for ( unsigned index = 0; index < withCount; index++ ) 
+    for ( index = 0; index < withCount; index++ )
     {
         descriptors[index]->retain();
         _length += descriptors[index]->getLength();
@@ -126,6 +128,16 @@ bool IOMultiMemoryDescriptor::initWithDescriptors(
               (withDirection & kIOMemoryDirectionMask));
     }
 
+    enum { kCopyFlags = kIOMemoryBufferPageable };
+    copyFlags = 0;
+    for ( index = 0; index < withCount; index++ )
+    {
+       if (!index)  copyFlags =  (kCopyFlags & descriptors[index]->_flags);
+       else if     (copyFlags != (kCopyFlags & descriptors[index]->_flags)) break;
+    }
+    if (index < withCount) return (false);
+    _flags |= copyFlags;
+
     return true;
 }
 
@@ -174,9 +186,9 @@ IOReturn IOMultiMemoryDescriptor::prepare(IODirection forDirection)
 
     if ( status != kIOReturnSuccess )
     {
-        for ( unsigned indexUndo = 0; indexUndo <= index; indexUndo++ )
+        for ( unsigned indexUndo = 0; indexUndo < index; indexUndo++ )
         {
-            statusUndo = _descriptors[index]->complete(forDirection);
+            statusUndo = _descriptors[indexUndo]->complete(forDirection);
             assert(statusUndo == kIOReturnSuccess);
         }
     }
@@ -212,10 +224,9 @@ IOReturn IOMultiMemoryDescriptor::complete(IODirection forDirection)
     return statusFinal;
 }
 
-addr64_t IOMultiMemoryDescriptor::getPhysicalSegment(
-                                                       IOByteCount   offset,
-                                                       IOByteCount * length,
-                                                       IOOptionBits  options )
+addr64_t IOMultiMemoryDescriptor::getPhysicalSegment(IOByteCount   offset,
+                                                     IOByteCount * length,
+                                                     IOOptionBits  options)
 {
     //
     // This method returns the physical address of the byte at the given offset
@@ -238,3 +249,140 @@ addr64_t IOMultiMemoryDescriptor::getPhysicalSegment(
 
     return 0;
 }
+
+#include "IOKitKernelInternal.h"
+
+IOReturn IOMultiMemoryDescriptor::doMap(vm_map_t           __addressMap,
+                                        IOVirtualAddress *  __address,
+                                        IOOptionBits       options,
+                                        IOByteCount        __offset,
+                                        IOByteCount        __length)
+{
+    IOMemoryMap *     mapping = (IOMemoryMap *) *__address;
+    vm_map_t          map     = mapping->fAddressMap;
+    mach_vm_size_t    offset  = mapping->fOffset;
+    mach_vm_size_t    length  = mapping->fLength;
+    mach_vm_address_t address = mapping->fAddress;
+
+    kern_return_t     err;
+    IOOptionBits      subOptions;
+    mach_vm_size_t    mapOffset;
+    mach_vm_size_t    bytesRemaining, chunk;
+    mach_vm_address_t nextAddress;
+    IOMemoryDescriptorMapAllocRef ref;
+    vm_prot_t                     prot;
+
+    do
+    {
+        prot = VM_PROT_READ;
+        if (!(kIOMapReadOnly & options)) prot |= VM_PROT_WRITE;
+        ref.map     = map;
+       ref.tag     = IOMemoryTag(map);
+        ref.options = options;
+        ref.size    = length;
+        ref.prot    = prot;
+        if (options & kIOMapAnywhere)
+            // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE
+            ref.mapped = 0;
+        else
+            ref.mapped = mapping->fAddress;
+
+        if ((ref.map == kernel_map) && (kIOMemoryBufferPageable & _flags))
+            err = IOIteratePageableMaps(ref.size, &IOMemoryDescriptorMapAlloc, &ref);
+        else
+            err = IOMemoryDescriptorMapAlloc(ref.map, &ref);
+
+        if (KERN_SUCCESS != err) break;
+
+        address = ref.mapped;
+        mapping->fAddress = address;
+
+        mapOffset = offset;
+        bytesRemaining = length;
+        nextAddress = address;
+        assert(mapOffset <= _length);
+        subOptions = (options & ~kIOMapAnywhere) | kIOMapOverwrite;
+
+        for (unsigned index = 0; bytesRemaining && (index < _descriptorsCount); index++) 
+        {
+            chunk = _descriptors[index]->getLength();
+            if (mapOffset >= chunk)
+            {
+                mapOffset -= chunk;
+                continue;
+            }
+            chunk -= mapOffset;
+            if (chunk > bytesRemaining) chunk = bytesRemaining;
+            IOMemoryMap * subMap;
+            subMap = _descriptors[index]->createMappingInTask(mapping->fAddressTask, nextAddress, subOptions, mapOffset, chunk );
+            if (!subMap) break;
+            subMap->release();          // kIOMapOverwrite means it will not deallocate
+
+            bytesRemaining -= chunk;
+            nextAddress += chunk;
+            mapOffset = 0;
+        }
+        if (bytesRemaining) err = kIOReturnUnderrun;
+    }
+    while (false);
+
+    if (kIOReturnSuccess == err)
+    {
+#if IOTRACKING
+        IOTrackingAdd(gIOMapTracking, &mapping->fTracking, length, false);
+#endif
+    }
+    else
+    {
+        mapping->release();
+        mapping = 0;
+    }
+
+    return (err);
+}
+
+IOReturn IOMultiMemoryDescriptor::setPurgeable( IOOptionBits newState,
+                                                IOOptionBits * oldState )
+{
+    IOReturn     err;
+    IOOptionBits totalState, state;
+
+    totalState = kIOMemoryPurgeableNonVolatile;
+    for (unsigned index = 0; index < _descriptorsCount; index++) 
+    {
+        err = _descriptors[index]->setPurgeable(newState, &state);
+        if (kIOReturnSuccess != err) break;
+
+        if (kIOMemoryPurgeableEmpty == state)              totalState = kIOMemoryPurgeableEmpty;
+        else if (kIOMemoryPurgeableEmpty == totalState)    continue;
+        else if (kIOMemoryPurgeableVolatile == totalState) continue;
+        else if (kIOMemoryPurgeableVolatile == state)      totalState = kIOMemoryPurgeableVolatile;
+        else totalState = kIOMemoryPurgeableNonVolatile;
+    }
+    if (oldState) *oldState = totalState;
+
+    return (err);
+}
+
+IOReturn IOMultiMemoryDescriptor::getPageCounts(IOByteCount * pResidentPageCount,
+                                               IOByteCount * pDirtyPageCount)
+{
+    IOReturn    err;
+    IOByteCount totalResidentPageCount, totalDirtyPageCount;
+    IOByteCount residentPageCount, dirtyPageCount;
+
+    err = kIOReturnSuccess;
+    totalResidentPageCount = totalDirtyPageCount = 0;
+    for (unsigned index = 0; index < _descriptorsCount; index++) 
+    {
+        err = _descriptors[index]->getPageCounts(&residentPageCount, &dirtyPageCount);
+        if (kIOReturnSuccess != err) break;
+        totalResidentPageCount += residentPageCount;
+        totalDirtyPageCount    += dirtyPageCount;
+    }
+
+    if (pResidentPageCount) *pResidentPageCount = totalResidentPageCount;
+    if (pDirtyPageCount)    *pDirtyPageCount = totalDirtyPageCount;
+
+    return (err);
+}
index 5af96b290baff8718f1668b570c9884cc4edc6bf..17cd841bcefa9f0776f5d9487e0c3ae9f5c0cc50 100644 (file)
 #include <IOKit/IOPlatformExpert.h>
 #include <IOKit/IOUserClient.h>
 #include <IOKit/IOKitKeys.h>
+#include <IOKit/IOKitKeysPrivate.h>
 #include <kern/debug.h>
 #include <pexpert/pexpert.h>
 
+#if CONFIG_MACF
+extern "C" {
+#include <security/mac.h>
+#include <security/mac_framework.h>
+};
+#endif /* MAC */
+
 #define super IOService
 
 #define kIONVRAMPrivilege      kIOClientPrivilegeAdministrator
@@ -67,7 +75,7 @@ bool IODTNVRAM::init(IORegistryEntry *old, const IORegistryPlane *plane)
   // <rdar://problem/9529235> race condition possible between
   // IODTNVRAM and IONVRAMController (restore loses boot-args)
   initProxyData();
-  
+
   return true;
 }
 
@@ -109,6 +117,8 @@ void IODTNVRAM::registerNVRAMController(IONVRAMController *nvram)
   if (!_isProxied) {
     _nvramController->read(0, _nvramImage, kIODTNVRAMImageSize);
     initNVRAMImage();
+  } else {
+    syncOFVariables();
   }
 }
 
@@ -217,9 +227,10 @@ void IODTNVRAM::initNVRAMImage(void)
       // Set the partition checksum.
       _nvramImage[freePartitionOffset + 1] =
        calculatePartitionChecksum(_nvramImage + freePartitionOffset);
-      
-      // Set the nvram image as dirty.
-      _nvramImageDirty = true;
+
+      if (_nvramController != 0) {
+        _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize);
+      }
     }
   } else {
     _piImage = _nvramImage + _piPartitionOffset;
@@ -231,20 +242,21 @@ void IODTNVRAM::initNVRAMImage(void)
   initOFVariables();
 }
 
-void IODTNVRAM::sync(void)
+void IODTNVRAM::syncInternal(bool rateLimit)
 {
-  if (!_nvramImageDirty && !_ofImageDirty) return;
-  
-  // Don't try to sync OF Variables if the system has already paniced.
-  if (!_systemPaniced) syncOFVariables();
-  
   // Don't try to perform controller operations if none has been registered.  
   if (_nvramController == 0) return;
+
+  // Rate limit requests to sync. Drivers that need this rate limiting will
+  // shadow the data and only write to flash when they get a sync call
+  if (rateLimit && !safeToSync()) return;
   
-  _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize);
   _nvramController->sync();
-  
-  _nvramImageDirty = false;
+}
+
+void IODTNVRAM::sync(void)
+{
+  syncInternal(false);
 }
 
 bool IODTNVRAM::serializeProperties(OSSerialize *s) const
@@ -280,7 +292,11 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const
       
       variablePerm = getOFVariablePerm(key);
       if ((hasPrivilege || (variablePerm != kOFVariablePermRootOnly)) &&
-         ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) {}
+         ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )
+#if CONFIG_MACF
+          && (current_task() == kernel_task || mac_iokit_check_nvram_get(kauth_cred_get(), key->getCStringNoCopy()) == 0)
+#endif
+         ) { }
       else dict->removeObject(key);
     }
   }
@@ -309,6 +325,12 @@ OSObject *IODTNVRAM::copyProperty(const OSSymbol *aKey) const
   }
   if (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) return 0;
 
+#if CONFIG_MACF
+  if (current_task() != kernel_task &&
+      mac_iokit_check_nvram_get(kauth_cred_get(), aKey->getCStringNoCopy()) != 0)
+    return 0;
+#endif
+
   IOLockLock(_ofLock);
   theObject = _ofDict->getObject(aKey);
   if (theObject) theObject->retain();
@@ -370,6 +392,12 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject)
 
   // Don't allow change of 'aapl,panic-info'.
   if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return false;
+
+#if CONFIG_MACF
+  if (current_task() != kernel_task &&
+      mac_iokit_check_nvram_set(kauth_cred_get(), aKey->getCStringNoCopy(), anObject) != 0)
+    return false;
+#endif
   
   // Make sure the object is of the correct type.
   propType = getOFVariableType(aKey);
@@ -403,9 +431,9 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject)
   IOLockLock(_ofLock);
   result = _ofDict->setObject(aKey, propObject);
   IOLockUnlock(_ofLock);
-  
+
   if (result) {
-    _ofImageDirty = true;
+    syncOFVariables();
   }
   
   return result;
@@ -429,15 +457,24 @@ void IODTNVRAM::removeProperty(const OSSymbol *aKey)
   // Don't allow change of 'aapl,panic-info'.
   if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return;
   
+#if CONFIG_MACF
+  if (current_task() != kernel_task &&
+      mac_iokit_check_nvram_delete(kauth_cred_get(), aKey->getCStringNoCopy()) != 0)
+    return;
+#endif
+
   // If the object exists, remove it from the dictionary.
 
   IOLockLock(_ofLock);
   result = _ofDict->getObject(aKey) != 0;
   if (result) {
     _ofDict->removeObject(aKey);
-    _ofImageDirty = true;
   }
   IOLockUnlock(_ofLock);
+
+  if (result) {
+    syncOFVariables();
+  }
 }
 
 IOReturn IODTNVRAM::setProperties(OSObject *properties)
@@ -472,14 +509,15 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties)
                } else {
                        result = false;
                }
-    } else if(key->isEqualTo(kIONVRAMSyncNowPropertyKey)) {
+    } else if(key->isEqualTo(kIONVRAMSyncNowPropertyKey) || key->isEqualTo(kIONVRAMForceSyncNowPropertyKey)) {
                tmpStr = OSDynamicCast(OSString, object);
                if (tmpStr != 0) {
 
-                       result = true; // We are not going to gaurantee sync, this is best effort
+                       result = true;
+
+      // We still want to throttle NVRAM commit rate for SyncNow. ForceSyncNow is provided as a really big hammer.
 
-                       if(safeToSync())
-                               sync();
+                       syncInternal(key->isEqualTo(kIONVRAMSyncNowPropertyKey));
 
                } else {
                        result = false;
@@ -587,7 +625,9 @@ IOReturn IODTNVRAM::writeNVRAMPartition(const OSSymbol *partitionID,
   
   bcopy(buffer, _nvramImage + partitionOffset + offset, length);
   
-  _nvramImageDirty = true;
+  if (_nvramController != 0) {
+    _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize);
+  }
   
   return kIOReturnSuccess;
 }
@@ -605,7 +645,9 @@ IOByteCount IODTNVRAM::savePanicInfo(UInt8 *buffer, IOByteCount length)
   // Save the Panic Info length.
   *(UInt32 *)_piImage = length;
   
-  _nvramImageDirty = true;
+  if (_nvramController != 0) {
+    _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize);
+  }
   /* 
    * This prevents OF variables from being committed if the system has panicked
    */
@@ -701,7 +743,9 @@ IOReturn IODTNVRAM::initOFVariables(void)
       
       // Clear the length from _piImage and mark dirty.
       *(UInt32 *)_piImage = 0;
-      _nvramImageDirty = true;
+      if (_nvramController != 0) {
+        _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize);
+      }
     }
   }
 
@@ -717,9 +761,7 @@ IOReturn IODTNVRAM::syncOFVariables(void)
   OSObject             *tmpObject;
   OSCollectionIterator *iter;
   
-  if ((_ofImage == 0) || (_ofDict == 0)) return kIOReturnNotReady;
-  
-  if (!_ofImageDirty) return kIOReturnSuccess;
+  if ((_ofImage == 0) || (_ofDict == 0) || _systemPaniced) return kIOReturnNotReady;
   
   buffer = tmpBuffer = IONew(UInt8, _ofPartitionSize);
   if (buffer == 0) return kIOReturnNoMemory;
@@ -759,8 +801,9 @@ IOReturn IODTNVRAM::syncOFVariables(void)
   
   if (!ok) return kIOReturnBadArgument;
   
-  _ofImageDirty = false;
-  _nvramImageDirty = true;
+  if (_nvramController != 0) {
+    _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize);
+  }
   
   return kIOReturnSuccess;
 }
@@ -1427,12 +1470,13 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry,
 
   if (ok) {
     ok = _ofDict->setObject(_registryPropertiesKey, data);
-    if (ok) _ofImageDirty = true;
   }
 
   IOLockUnlock(_ofLock);
   if (data) data->release();
 
+  if (ok) syncOFVariables();
+
   return ok ? kIOReturnSuccess : kIOReturnNoMemory;
 }
 
index 2b34e768b034b503ec56fd7c1e80a0e162dbe3f2..bccf6f45f1df53e8f00064fa1fd82f34b3760ad9 100644 (file)
@@ -51,8 +51,8 @@ private:
     IOLock *        queueLock;
 
 protected:
-    virtual bool checkForWork( void );
-    virtual bool init( OSObject * owner, Action action );
+    virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE;
+    virtual bool init( OSObject * owner, Action action ) APPLE_KEXT_OVERRIDE;
 
 public:
     static IOPMPowerStateQueue * PMPowerStateQueue( OSObject * owner, Action action );
index 73738c14ba9eaa6915d2230c0617caec22f3288a..a2762212863f3bb29974ac0314f860bbeaa61131 100644 (file)
@@ -47,6 +47,7 @@
 #include "IOPMPowerStateQueue.h"
 #include <IOKit/IOCatalogue.h>
 #include <IOKit/IOReportMacros.h>
+#include "IOKitKernelInternal.h"
 #if HIBERNATION
 #include <IOKit/IOHibernatePrivate.h>
 #endif
@@ -63,6 +64,7 @@
 
 __BEGIN_DECLS
 #include <mach/shared_region.h>
+#include <kern/clock.h>
 __END_DECLS
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -172,7 +174,7 @@ extern "C" addr64_t     kvtophys(vm_offset_t va);
 extern "C" int  stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced);
 
 static void idleSleepTimerExpired( thread_call_param_t, thread_call_param_t );
-static void notifySystemShutdown( IOService * root, unsigned long event );
+static void notifySystemShutdown( IOService * root, uint32_t messageType );
 static void handleAggressivesFunction( thread_call_param_t, thread_call_param_t );
 static void pmEventTimeStamp(uint64_t *recordTS);
 
@@ -192,13 +194,6 @@ static const OSSymbol *sleepMessagePEFunction   = NULL;
                            | kIOPMSupportedOnBatt \
                            | kIOPMSupportedOnUPS)
 
-enum
-{
-    // not idle around autowake time, secs
-    kAutoWakePreWindow  = 45,
-    kAutoWakePostWindow = 15
-};
-
 #define kLocalEvalClamshellCommand  (1 << 15)
 #define kIdleSleepRetryInterval     (3 * 60)
 
@@ -316,6 +311,7 @@ static uint32_t         gAggressivesState = 0;
 uuid_string_t bootsessionuuid_string;
 
 static uint32_t         gDarkWakeFlags = kDarkWakeFlagHIDTickleNone;
+static uint32_t         gNoIdleFlag = 0;
 static PMStatsStruct    gPMStats;
 
 #if HIBERNATION
@@ -330,6 +326,19 @@ struct timeval gIOLastWakeTime;
 static char gWakeReasonString[128];
 static bool gWakeReasonSysctlRegistered = false;
 
+#if defined(__i386__) || defined(__x86_64__)
+static bool gSpinDumpBufferFull = false;
+#endif
+
+static unsigned int     gPMHaltBusyCount;
+static unsigned int     gPMHaltIdleCount;
+static int              gPMHaltDepth;
+static uint32_t         gPMHaltMessageType;
+static IOLock *         gPMHaltLock  = 0;
+static OSArray *        gPMHaltArray = 0;
+static const OSSymbol * gPMHaltClientAcknowledgeKey = 0;
+static bool             gPMQuiesced;
+
 // Constants used as arguments to IOPMrootDomain::informCPUStateChange
 #define kCPUUnknownIndex    9999999
 enum {
@@ -357,7 +366,7 @@ class PMSettingHandle : public OSObject
 
 private:
     PMSettingObject *pmso;
-    void free(void);
+    void free(void) APPLE_KEXT_OVERRIDE;
 };
 
 /*
@@ -381,7 +390,7 @@ private:
     uint32_t                        settingCount;
     bool                            disabled;
 
-    void free(void);
+    void free(void) APPLE_KEXT_OVERRIDE;
 
 public:
     static PMSettingObject *pmSettingObject(
@@ -431,7 +440,7 @@ public:
     void                        traceLoginWindowPhase(uint8_t phase);
     int                         recordTopLevelPCIDevice(IOService *);
     void                        RTC_TRACE(void);
-    virtual bool                serialize(OSSerialize *s) const;
+    virtual bool                serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
 
     IOPMTracePointHandler       tracePointHandler;
     void *                      tracePointTarget;
@@ -525,7 +534,7 @@ public:
     static  void main( void * arg, wait_result_t waitResult );
     static  void work( PMHaltWorker * me );
     static  void checkTimeout( PMHaltWorker * me, AbsoluteTime * now );
-    virtual void free( void );
+    virtual void free( void ) APPLE_KEXT_OVERRIDE;
 };
 
 OSDefineMetaClassAndFinalStructors( PMHaltWorker, OSObject )
@@ -582,6 +591,9 @@ extern "C"
     void IOSystemShutdownNotification(void)
     {
         IOPMRootDomainWillShutdown();
+#if HIBERNATION
+        IOHibernateSystemPostWake();
+#endif
         if (OSCompareAndSwap(0, 1, &gPagingOff))
         {
             gRootDomain->handlePlatformHaltRestart(kPEPagingOff);
@@ -646,6 +658,16 @@ IOPMrootDomain * IOPMrootDomain::construct( void )
     return( root );
 }
 
+//******************************************************************************
+// updateConsoleUsersCallout
+//
+//******************************************************************************
+
+static void updateConsoleUsersCallout(thread_call_param_t p0, thread_call_param_t p1)
+{
+    IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn);
+}
+
 //******************************************************************************
 
 static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 )
@@ -664,6 +686,9 @@ static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 )
     else
     {
         IOHibernateSystemPostWake();
+
+        if (gRootDomain)
+            gRootDomain->sleepWakeDebugSaveSpinDumpFile();
     }
 #endif
 
@@ -679,7 +704,7 @@ static void hib_debugSetup_callout( thread_call_param_t p0, thread_call_param_t
     uint32_t    notifyRef  = (uint32_t)(uintptr_t) p1;
 
 #if    HIBERNATION
-    IOHibernateOpenForDebugData();
+    IOOpenDebugDataFile(kSleepWakeStackBinFilename, SWD_BUF_SIZE);
 #endif
 
     rootDomain->allowPowerChange(notifyRef);
@@ -753,6 +778,8 @@ static SYSCTL_PROC(_kern, OID_AUTO, willshutdown,
         CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
         0, 0, sysctl_willshutdown, "I", "");
 
+extern struct sysctl_oid sysctl__kern_iokittest;
+
 
 static int
 sysctl_progressmeterenable
@@ -791,6 +818,26 @@ static SYSCTL_PROC(_kern, OID_AUTO, progressmeter,
         0, 0, sysctl_progressmeter, "I", "");
 
 
+
+
+static int
+sysctl_consoleoptions
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+    int error;
+    int new_value, changed;
+
+    error = sysctl_io_number(req, vc_user_options, sizeof(int), &new_value, &changed);
+
+    if (changed) vc_set_options(new_value);
+
+    return (error);
+}
+
+static SYSCTL_PROC(_kern, OID_AUTO, consoleoptions,
+        CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+        0, 0, sysctl_consoleoptions, "I", "");
+
 static int
 sysctl_wakereason SYSCTL_HANDLER_ARGS
 {
@@ -807,7 +854,33 @@ SYSCTL_PROC(_kern, OID_AUTO, wakereason,
     CTLTYPE_STRING| CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
     NULL, 0, sysctl_wakereason, "A", "wakereason");
 
+static int
+sysctl_targettype SYSCTL_HANDLER_ARGS
+{
+    IOService * root;
+    OSObject *  obj;
+    OSData *    data;
+    char        tt[32];
+
+    tt[0] = '\0';
+    root = IOService::getServiceRoot();
+    if (root && (obj = root->copyProperty(gIODTTargetTypeKey)))
+    {
+       if ((data = OSDynamicCast(OSData, obj)))
+       {
+           strlcpy(tt, (const char *) data->getBytesNoCopy(), sizeof(tt));
+       }
+       obj->release();
+    }
+    return sysctl_io_string(req, tt, 0, 0, NULL);
+}
+
+SYSCTL_PROC(_hw, OID_AUTO, targettype,
+    CTLTYPE_STRING| CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+    NULL, 0, sysctl_targettype, "A", "targettype");
+
 static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, "");
+static SYSCTL_INT(_debug, OID_AUTO, noidle, CTLFLAG_RW, &gNoIdleFlag, 0, "");
 
 static const OSSymbol * gIOPMSettingAutoWakeCalendarKey;
 static const OSSymbol * gIOPMSettingAutoWakeSecondsKey;
@@ -877,6 +950,7 @@ bool IOPMrootDomain::start( IOService * nub )
         };
 
     PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags));
+    PE_parse_boot_argn("noidle", &gNoIdleFlag, sizeof(gNoIdleFlag));
 
     queue_init(&aggressivesQueue);
     aggressivesThreadCall = thread_call_allocate(handleAggressivesFunction, this);
@@ -899,6 +973,10 @@ bool IOPMrootDomain::start( IOService * nub )
                         &hib_debugSetup_callout,
                         (thread_call_param_t) this);
 
+    updateConsoleUsersEntry = thread_call_allocate(
+                        &updateConsoleUsersCallout,
+                        (thread_call_param_t) this);
+
 #if DARK_TO_FULL_EVALUATE_CLAMSHELL
     fullWakeThreadCall = thread_call_allocate(
                             OSMemberFunctionCast(thread_call_func_t, this,
@@ -952,6 +1030,7 @@ bool IOPMrootDomain::start( IOService * nub )
     _statsResponseTypeKey   = OSSymbol::withCString(kIOPMStatsApplicationResponseTypeKey);
     _statsMessageTypeKey    = OSSymbol::withCString(kIOPMStatsMessageTypeKey);
     _statsPowerCapsKey      = OSSymbol::withCString(kIOPMStatsPowerCapabilityKey);
+    assertOnWakeSecs        = -1;  // Invalid value to prevent updates
 
     pmStatsLock = IOLockAlloc();
     idxPMCPUClamshell = kCPUUnknownIndex;
@@ -1047,7 +1126,10 @@ bool IOPMrootDomain::start( IOService * nub )
     // IOBacklightDisplay can take a long time to load at boot, or it may
     // not load at all if you're booting with clamshell closed. We publish
     // 'DisplayDims' here redundantly to get it published early and at all.
-    psIterator = getMatchingServices( serviceMatching("IOPMPowerSource") );
+    OSDictionary * matching;
+    matching = serviceMatching("IOPMPowerSource");
+    psIterator = getMatchingServices( matching );
+    if (matching) matching->release();
     if( psIterator && psIterator->getNextObject() )
     {
         // There's at least one battery on the system, so we publish
@@ -1061,9 +1143,13 @@ bool IOPMrootDomain::start( IOService * nub )
     sysctl_register_oid(&sysctl__kern_sleeptime);
     sysctl_register_oid(&sysctl__kern_waketime);
     sysctl_register_oid(&sysctl__kern_willshutdown);
+    sysctl_register_oid(&sysctl__kern_iokittest);
+    sysctl_register_oid(&sysctl__hw_targettype);
+
     sysctl_register_oid(&sysctl__kern_progressmeterenable);
     sysctl_register_oid(&sysctl__kern_progressmeter);
     sysctl_register_oid(&sysctl__kern_wakereason);
+    sysctl_register_oid(&sysctl__kern_consoleoptions);
 
 #if HIBERNATION
     IOHibernateSystemInit(this);
@@ -1212,25 +1298,6 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
         // Relay our allowed PM settings onto our registered PM clients
         else if ((allowedPMSettings->getNextIndexOfObject(key, 0) != (unsigned int) -1))
         {
-            if ((gIOPMSettingAutoWakeSecondsKey == key) && ((n = OSDynamicCast(OSNumber, obj))))
-            {
-                UInt32 rsecs = n->unsigned32BitValue();
-                if (!rsecs)
-                autoWakeStart = autoWakeEnd = 0;
-                else
-                {
-                AbsoluteTime deadline;
-                clock_interval_to_deadline(rsecs + kAutoWakePostWindow, kSecondScale, &deadline);
-                autoWakeEnd = AbsoluteTime_to_scalar(&deadline);
-                if (rsecs > kAutoWakePreWindow)
-                    rsecs -= kAutoWakePreWindow;
-                else
-                    rsecs = 0;
-                clock_interval_to_deadline(rsecs, kSecondScale, &deadline);
-                autoWakeStart = AbsoluteTime_to_scalar(&deadline);
-                }
-            }
-
             return_value = setPMSetting(key, obj);
             if (kIOReturnSuccess != return_value)
                 break;
@@ -1793,6 +1860,10 @@ void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds )
     AbsoluteTime deadline;
 
     ASSERT_GATED();
+    if (gNoIdleFlag) {
+        DLOG("idle timer not set (noidle=%d)\n", gNoIdleFlag);
+        return;
+    }
     if (inSeconds)
     {
         clock_interval_to_deadline(inSeconds, kSecondScale, &deadline);
@@ -1819,6 +1890,18 @@ void IOPMrootDomain::cancelIdleSleepTimer( void )
         DLOG("idle timer cancelled\n");
         thread_call_cancel(extraSleepTimer);
         idleSleepTimerPending = false;
+
+        if (!assertOnWakeSecs && systemWakeTime) {
+                AbsoluteTime    now;
+                clock_usec_t    microsecs;
+                clock_get_uptime(&now);
+                SUB_ABSOLUTETIME(&now, &systemWakeTime);
+                absolutetime_to_microtime(now, &assertOnWakeSecs, &microsecs);
+                if (assertOnWakeReport)  {
+                    HISTREPORT_TALLYVALUE(assertOnWakeReport, (int64_t)assertOnWakeSecs);
+                    DLOG("Updated assertOnWake %lu\n", (unsigned long)assertOnWakeSecs);
+                }
+        }
     }
 }
 
@@ -1859,13 +1942,6 @@ void IOPMrootDomain::handleSleepTimerExpiration( void )
     idleSleepTimerPending = false;
 
     clock_get_uptime(&time);
-    if ((AbsoluteTime_to_scalar(&time) > autoWakeStart) &&
-        (AbsoluteTime_to_scalar(&time) < autoWakeEnd))
-    {
-        thread_call_enter_delayed(extraSleepTimer, *((AbsoluteTime *) &autoWakeEnd));
-        return;
-    }
-
     setQuickSpinDownTimeout();
     adjustPowerState(true);
 }
@@ -2007,6 +2083,7 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason )
 
 void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
 {
+    uint64_t    now;
     ASSERT_GATED();
     DLOG("PowerChangeDone: %u->%u\n",
         (uint32_t) previousPowerState, (uint32_t) getPowerState());
@@ -2024,13 +2101,27 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
 
             clock_sec_t     secs;
             clock_usec_t    microsecs;
-            clock_get_calendar_microtime(&secs, &microsecs);
+            clock_get_calendar_absolute_and_microtime(&secs, &microsecs, &now);
             logtime(secs);
             gIOLastSleepTime.tv_sec  = secs;
             gIOLastSleepTime.tv_usec = microsecs;
             gIOLastWakeTime.tv_sec = 0;
             gIOLastWakeTime.tv_usec = 0;
 
+            if (wake2DarkwakeDelay && sleepDelaysReport) {
+                clock_usec_t    microsecs;
+                clock_sec_t     wake2DarkwakeSecs, darkwake2SleepSecs;
+                // Update 'wake2DarkwakeDelay' histogram if this is a fullwake->sleep transition
+
+                SUB_ABSOLUTETIME(&now, &ts_sleepStart);
+                absolutetime_to_microtime(now, &darkwake2SleepSecs, &microsecs);
+                absolutetime_to_microtime(wake2DarkwakeDelay, &wake2DarkwakeSecs, &microsecs);
+                HISTREPORT_TALLYVALUE(sleepDelaysReport, 
+                                           (int64_t)(wake2DarkwakeSecs+darkwake2SleepSecs));
+
+                DLOG("Updated sleepDelaysReport %lu %lu\n", (unsigned long)wake2DarkwakeSecs, (unsigned long)darkwake2SleepSecs);
+                wake2DarkwakeDelay = 0;
+            }
 #if HIBERNATION
             LOG("System %sSleep\n", gIOHibernateState ? "Safe" : "");
 
@@ -2047,6 +2138,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                     event->release();
                 }
             }
+            assertOnWakeSecs = 0;
             ((IOService *)this)->stop_watchdog_timer(); //14456299
             getPlatform()->sleepKernel();
 
@@ -2064,8 +2156,17 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
             // sleep transition complete
             gSleepOrShutdownPending = 0;
 
-            // trip the reset of the calendar clock
-            clock_wakeup_calendar();
+                       // trip the reset of the calendar clock
+                       {
+                               clock_sec_t  wakeSecs;
+                               clock_usec_t wakeMicrosecs;
+
+                               clock_initialize_calendar();
+
+                               clock_get_calendar_microtime(&wakeSecs, &wakeMicrosecs);
+                               gIOLastWakeTime.tv_sec  = wakeSecs;
+                               gIOLastWakeTime.tv_usec = wakeMicrosecs;
+                       }
 
 #if HIBERNATION
             LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : "");
@@ -2097,7 +2198,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
             userWasActive           = false;
             fullWakeReason = kFullWakeReasonNone;
 
-
             OSString * wakeType = OSDynamicCast(
                 OSString, getProperty(kIOPMRootDomainWakeTypeKey));
             OSString * wakeReason = OSDynamicCast(
@@ -2118,9 +2218,9 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
             }
             else if ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) != 0)
             {
+#if HIBERNATION
                 OSNumber * hibOptions = OSDynamicCast(
                     OSNumber, getProperty(kIOHibernateOptionsKey));
-
                 if (hibernateAborted || ((hibOptions &&
                     !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake))))
                 {
@@ -2131,6 +2231,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                         hibOptions ? hibOptions->unsigned32BitValue() : 0);
                 }
                 else
+#endif
                 if (wakeType && (
                     wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) ||
                     wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm)))
@@ -2165,9 +2266,11 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                 {
                     darkWakeMaintenance = true;
                     darkWakeSleepService = true;
+#if HIBERNATION
                     if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
                         sleepToStandby = true;
                     }
+#endif
                 }
                 else
                 if (wakeType &&
@@ -2213,6 +2316,10 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
                 fullWakeReason = kFullWakeReasonLocalUser;
                 reportUserInput();
             }
+            else if (displayPowerOnRequested && checkSystemCanSustainFullWake())
+            {
+                handleDisplayPowerOn();
+            }
             else if (!darkWakeMaintenance)
             {
                 // Early/late tickle for non-maintenance wake.
@@ -2232,8 +2339,24 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
 #endif
             sleepCnt++;
 
+           thread_call_enter(updateConsoleUsersEntry);
+
             changePowerStateToPriv(ON_STATE);
         }   break;
+#if !__i386__ && !__x86_64__
+        case ON_STATE: {
+            if (previousPowerState != ON_STATE)
+            {
+                DLOG("Force re-evaluating aggressiveness\n");
+                /* Force re-evaluate the aggressiveness values to set appropriate idle sleep timer */
+                pmPowerStateQueue->submitPowerEvent(
+                    kPowerEventPolicyStimulus,
+                    (void *) kStimulusNoIdleSleepPreventers );
+          }
+          break;
+        }
+
+#endif
 
     }
 }
@@ -2255,6 +2378,7 @@ IOReturn IOPMrootDomain::requestPowerDomainState (
     return super::requestPowerDomainState(0, childConnection, specification);
 }
 
+
 //******************************************************************************
 // updatePreventIdleSleepList
 //
@@ -2310,6 +2434,8 @@ bool IOPMrootDomain::updatePreventIdleSleepList(
         changePowerStateTo(SLEEP_STATE);
         evaluatePolicy( kStimulusNoIdleSleepPreventers );
     }
+    messageClient(kIOPMMessageIdleSleepPreventers, systemCapabilityNotifier,
+        &newCount, sizeof(newCount));
 
 #if defined(__i386__) || defined(__x86_64__)
     if (addNotRemove && (service == wrangler) && !checkSystemCanSustainFullWake())
@@ -2330,7 +2456,7 @@ bool IOPMrootDomain::updatePreventIdleSleepList(
 void IOPMrootDomain::updatePreventSystemSleepList(
         IOService * service, bool addNotRemove )
 {
-    unsigned int oldCount;
+    unsigned int oldCount, newCount;
 
     ASSERT_GATED();
     if (this == service)
@@ -2342,6 +2468,17 @@ void IOPMrootDomain::updatePreventSystemSleepList(
         preventSystemSleepList->setObject(service);
         DLOG("prevent system sleep list: %s+ (%u)\n",
             service->getName(), preventSystemSleepList->getCount());
+        if (!assertOnWakeSecs && systemWakeTime) {
+                AbsoluteTime    now;
+                clock_usec_t    microsecs;
+                clock_get_uptime(&now);
+                SUB_ABSOLUTETIME(&now, &systemWakeTime);
+                absolutetime_to_microtime(now, &assertOnWakeSecs, &microsecs);
+                if (assertOnWakeReport)  {
+                    HISTREPORT_TALLYVALUE(assertOnWakeReport, (int64_t)assertOnWakeSecs);
+                    DLOG("Updated assertOnWake %lu\n", (unsigned long)assertOnWakeSecs);
+                }
+        }
     }
     else if (preventSystemSleepList->member(service))
     {
@@ -2356,6 +2493,62 @@ void IOPMrootDomain::updatePreventSystemSleepList(
             evaluatePolicy( kStimulusDarkWakeEvaluate );
         }
     }
+    newCount = preventSystemSleepList->getCount();
+    messageClient(kIOPMMessageSystemSleepPreventers, systemCapabilityNotifier,
+        &newCount, sizeof(newCount));
+}
+
+void IOPMrootDomain::copySleepPreventersList(OSArray **idleSleepList, OSArray **systemSleepList)
+{
+
+    OSCollectionIterator *iterator = NULL;
+    OSObject    *object = NULL;
+    OSArray     *array = NULL;
+
+    if (!getPMworkloop()->inGate())
+    {
+        getPMworkloop()->runAction(
+            OSMemberFunctionCast(IOWorkLoop::Action, this,
+                &IOPMrootDomain::IOPMrootDomain::copySleepPreventersList),
+            this, (void *)idleSleepList, (void *)systemSleepList);
+        return;
+    }
+
+    if (idleSleepList && preventIdleSleepList && (preventIdleSleepList->getCount() != 0))
+    {
+        iterator = OSCollectionIterator::withCollection(preventIdleSleepList);
+        array = OSArray::withCapacity(5);
+
+        while ((object = iterator->getNextObject()))
+        {
+            IOService *service = OSDynamicCast(IOService, object);
+            if (object)
+            {
+                array->setObject(OSSymbol::withCString(service->getName()));
+            }
+        }
+
+        iterator->release();
+        *idleSleepList = array;
+    }
+
+    if (systemSleepList && preventSystemSleepList && (preventSystemSleepList->getCount() != 0))
+    {
+        iterator = OSCollectionIterator::withCollection(preventSystemSleepList);
+        array = OSArray::withCapacity(5);
+
+        while ((object = iterator->getNextObject()))
+        {
+            IOService *service = OSDynamicCast(IOService, object);
+            if (object)
+            {
+                array->setObject(OSSymbol::withCString(service->getName()));
+            }
+        }
+
+        iterator->release();
+        *systemSleepList = array;
+    }
 }
 
 //******************************************************************************
@@ -2474,6 +2667,9 @@ void IOPMrootDomain::askChangeDownDone(
 
 void IOPMrootDomain::systemDidNotSleep( void )
 {
+    // reset console lock state
+    thread_call_enter(updateConsoleUsersEntry);
+
     if (!wrangler)
     {
         if (idleSeconds)
@@ -2494,6 +2690,30 @@ void IOPMrootDomain::systemDidNotSleep( void )
 
     preventTransitionToUserActive(false);
     IOService::setAdvisoryTickleEnable( true );
+
+    // After idle revert and cancel, send a did-change message to powerd
+    // to balance the previous will-change message. Kernel clients do not
+    // need this since sleep cannot be canceled once they are notified.
+
+    if (toldPowerdCapWillChange && systemCapabilityNotifier &&
+        (_pendingCapability != _currentCapability) &&
+        ((_systemMessageClientMask & kSystemMessageClientPowerd) != 0))
+    {
+        // Differs from a real capability gain change where notifyRef != 0,
+        // but it is zero here since no response is expected.
+
+        IOPMSystemCapabilityChangeParameters params;
+
+        bzero(&params, sizeof(params));
+        params.fromCapabilities = _pendingCapability;
+        params.toCapabilities = _currentCapability;
+        params.changeFlags = kIOPMSystemCapabilityDidChange;
+
+        DLOG("MESG cap %x->%x did change\n",
+            params.fromCapabilities, params.toCapabilities);
+        messageClient(kIOMessageSystemCapabilityChange, systemCapabilityNotifier,
+            &params, sizeof(params));
+    }
 }
 
 //******************************************************************************
@@ -2531,7 +2751,6 @@ void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum )
 
 void IOPMrootDomain::tellChangeUp( unsigned long stateNum )
 {
-
     DLOG("tellChangeUp %u->%u\n",
         (uint32_t) getPowerState(), (uint32_t) stateNum);
 
@@ -2556,7 +2775,6 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum )
             tellClients( kIOMessageSystemWillPowerOn );
         }
 
-
         tracePoint( kIOPMTracePointWakeApplications );
         tellClients( kIOMessageSystemHasPoweredOn );
     }
@@ -2583,11 +2801,14 @@ IOReturn IOPMrootDomain::sysPowerDownHandler(
     if (messageType == kIOMessageSystemWillSleep)
     {
 #if HIBERNATION
-        uint32_t mem_only = 0;
+        static int32_t mem_only = -1;
         IOPowerStateChangeNotification *notify =
                     (IOPowerStateChangeNotification *)messageArgs;
 
-       PE_parse_boot_argn("swd_mem_only", &mem_only, sizeof(mem_only));
+       if ((mem_only == -1) &&
+           (PE_parse_boot_argn("swd_mem_only", &mem_only, sizeof(mem_only)) == false)) {
+           mem_only = 0;
+       }
        if ((mem_only != 1) && (gRootDomain->sleepWakeDebugIsWdogEnabled()))
        {
            notify->returnValue = 30 * 1000 * 1000;
@@ -2851,13 +3072,29 @@ hibernate_should_abort(void)
 
 void IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState )
 {
-#if HIBERNATION
+    OSDictionary *dict;
+    OSNumber *secs;
+
     if (SLEEP_STATE == newPowerState)
     {
+#if HIBERNATION
         IOHibernateSystemSleep();
         IOHibernateIOKitSleep();
-    }
 #endif
+        if (gRootDomain->activitySinceSleep()) {
+            dict = OSDictionary::withCapacity(1);
+            secs = OSNumber::withNumber(1, 32);
+
+            if (dict && secs) {
+                dict->setObject(gIOPMSettingDebugWakeRelativeKey, secs);
+                gRootDomain->setProperties(dict);
+                MSG("Reverting sleep with relative wake\n");
+            }
+            if (dict) dict->release();
+            if (secs) secs->release();
+        }
+
+    }
 }
 
 //******************************************************************************
@@ -4083,6 +4320,7 @@ struct HaltRestartApplierContext {
     IOPMPowerFlags      PowerFlags;
     UInt32              MessageType;
     UInt32              Counter;
+    const char *        LogString;
 };
 
 static void
@@ -4091,7 +4329,7 @@ platformHaltRestartApplier( OSObject * object, void * context )
     IOPowerStateChangeNotification  notify;
     HaltRestartApplierContext *     ctx;
     AbsoluteTime                    startTime;
-    UInt32                          deltaTime;
+    uint32_t                        deltaTime;
 
     ctx = (HaltRestartApplierContext *) context;
 
@@ -4117,20 +4355,26 @@ platformHaltRestartApplier( OSObject * object, void * context )
         if (notifier)
         {
             LOG("%s handler %p took %u ms\n",
-                (ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" :
-                     (ctx->MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart",
-                OBFUSCATE(notifier->handler), (uint32_t) deltaTime );
+                ctx->LogString, OBFUSCATE(notifier->handler), deltaTime);
         }
     }
 
     ctx->Counter++;
 }
 
+static void quiescePowerTreeCallback( void * target, void * param )
+{
+    IOLockLock(gPMHaltLock);
+    gPMQuiesced = true;
+    thread_wakeup(param);
+    IOLockUnlock(gPMHaltLock);
+}
+
 void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type )
 {
     HaltRestartApplierContext   ctx;
     AbsoluteTime                startTime;
-    UInt32                      deltaTime;
+    uint32_t                    deltaTime;
 
     memset(&ctx, 0, sizeof(ctx));
     ctx.RootDomain = this;
@@ -4142,16 +4386,19 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type )
         case kPEUPSDelayHaltCPU:
             ctx.PowerState  = OFF_STATE;
             ctx.MessageType = kIOMessageSystemWillPowerOff;
+            ctx.LogString   = "PowerOff";
             break;
 
         case kPERestartCPU:
             ctx.PowerState  = RESTART_STATE;
             ctx.MessageType = kIOMessageSystemWillRestart;
+            ctx.LogString   = "Restart";
             break;
 
         case kPEPagingOff:
             ctx.PowerState  = ON_STATE;
             ctx.MessageType = kIOMessageSystemPagingOff;
+            ctx.LogString   = "PagingOff";
             IOService::updateConsoleUsers(NULL, kIOMessageSystemPagingOff);
 #if HIBERNATION
             IOHibernateSystemRestart();
@@ -4186,11 +4433,29 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type )
 
     IOCPURunPlatformHaltRestartActions(pe_type);
 
+    // Wait for PM to quiesce
+    if ((kPEPagingOff != pe_type) && gPMHaltLock)
+    {
+        AbsoluteTime quiesceTime = mach_absolute_time();
+
+        IOLockLock(gPMHaltLock);
+        gPMQuiesced = false;
+        if (quiescePowerTree(this, &quiescePowerTreeCallback, &gPMQuiesced) ==
+            kIOReturnSuccess)
+        {
+            while (!gPMQuiesced)
+            {
+                IOLockSleep(gPMHaltLock, &gPMQuiesced, THREAD_UNINT);
+            }
+        }
+        IOLockUnlock(gPMHaltLock);
+
+        deltaTime = computeDeltaTimeMS(&quiesceTime);
+        DLOG("PM quiesce took %u ms\n", deltaTime);
+    }
+
     deltaTime = computeDeltaTimeMS(&startTime);
-    LOG("%s all drivers took %u ms\n",
-        (ctx.MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" :
-            (ctx.MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart",
-        (uint32_t) deltaTime );
+    LOG("%s all drivers took %u ms\n", ctx.LogString, deltaTime);
 }
 
 //******************************************************************************
@@ -4442,6 +4707,7 @@ void IOPMrootDomain::handleOurPowerChangeStart(
     _systemTransitionType    = kSystemTransitionNone;
     _systemMessageClientMask = 0;
     capabilityLoss           = false;
+    toldPowerdCapWillChange  = false;
 
     if (lowBatteryCondition)
     {
@@ -4565,6 +4831,10 @@ void IOPMrootDomain::handleOurPowerChangeStart(
 
             // Publish a UUID for the Sleep --> Wake cycle
             handlePublishSleepWakeUUID(true);
+            if (sleepDelaysReport) {
+                clock_get_uptime(&ts_sleepStart);
+                DLOG("sleepDelaysReport f->9 start at 0x%llx\n", ts_sleepStart);
+            }
         }
     }
 
@@ -4587,6 +4857,10 @@ void IOPMrootDomain::handleOurPowerChangeStart(
 
         publishSleepReason = true;
         lastSleepReason = sleepReason;
+        if (sleepDelaysReport) {
+            clock_get_uptime(&ts_sleepStart);
+                DLOG("sleepDelaysReport 9->0 start at 0x%llx\n", ts_sleepStart);
+        }
     }
 
     // 3. System wake.
@@ -4704,6 +4978,13 @@ void IOPMrootDomain::handleOurPowerChangeDone(
                     // userIsActive will be cleared by wrangler powering down
                     wranglerTickled = false;
                     fullWakeReason = kFullWakeReasonNone;
+
+                    if (ts_sleepStart) {
+                        clock_get_uptime(&wake2DarkwakeDelay);
+                        SUB_ABSOLUTETIME(&wake2DarkwakeDelay, &ts_sleepStart);
+                        DLOG("sleepDelaysReport f->9 end 0x%llx\n", wake2DarkwakeDelay);
+                        ts_sleepStart = 0;
+                    }
                 }
             }
 
@@ -4802,6 +5083,7 @@ void IOPMrootDomain::handleOurPowerChangeDone(
 
         _systemTransitionType = kSystemTransitionNone;
         _systemMessageClientMask = 0;
+        toldPowerdCapWillChange  = false;
 
         logGraphicsClamp = false;
     }
@@ -4964,7 +5246,8 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler(
 
     clock_get_uptime(&userActivityTime);
     bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle)
-                  || (lastSleepReason == kIOPMSleepReasonMaintenance));
+                  || (lastSleepReason == kIOPMSleepReasonMaintenance)
+                  || (lastSleepReason == kIOPMSleepReasonSoftware));
     if (aborting) {
         userActivityCount++;
         DLOG("display wrangler tickled1 %d lastSleepReason %d\n",
@@ -5230,6 +5513,12 @@ bool IOPMrootDomain::systemMessageFilter(
                     capArgs->changeFlags = kIOPMSystemCapabilityWillChange;
                 else
                     capArgs->changeFlags = kIOPMSystemCapabilityDidChange;
+
+                if ((object == (void *) systemCapabilityNotifier) &&
+                    context->isPreChange)
+                {
+                    toldPowerdCapWillChange = true;
+                }
             }
 
             // Capability change messages only go to the PM configd plugin.
@@ -5505,10 +5794,13 @@ void IOPMrootDomain::reportUserInput( void )
 {
 #if !NO_KERNEL_HID
     OSIterator * iter;
+    OSDictionary * matching;
 
     if(!wrangler)
     {
-        iter = getMatchingServices(serviceMatching("IODisplayWrangler"));
+        matching = serviceMatching("IODisplayWrangler");
+        iter = getMatchingServices(matching);
+        if (matching) matching->release();
         if(iter)
         {
             wrangler = (IOService *) iter->getNextObject();
@@ -5567,11 +5859,8 @@ bool IOPMrootDomain::latchDisplayWranglerTickle( bool latch )
 
 void IOPMrootDomain::setDisplayPowerOn( uint32_t options )
 {
-    if (checkSystemCanSustainFullWake())
-    {
-        pmPowerStateQueue->submitPowerEvent( kPowerEventSetDisplayPowerOn,
-                                             (void *) 0, options );
-    }
+    pmPowerStateQueue->submitPowerEvent( kPowerEventSetDisplayPowerOn,
+                                         (void *) 0, options );
 }
 
 // MARK: -
@@ -5766,6 +6055,41 @@ void IOPMrootDomain::adjustPowerState( bool sleepASAP )
     }
 }
 
+void IOPMrootDomain::handleDisplayPowerOn( )
+{
+    if (!wrangler) return;
+    if (displayPowerOnRequested)
+    {
+        if (!checkSystemCanSustainFullWake()) return;
+
+        // Force wrangler to max power state. If system is in dark wake
+        // this alone won't raise the wrangler's power state.
+
+        wrangler->changePowerStateForRootDomain(kWranglerPowerStateMax);
+
+        // System in dark wake, always requesting full wake should
+        // not have any bad side-effects, even if the request fails.
+
+        if (!CAP_CURRENT(kIOPMSystemCapabilityGraphics))
+        {
+            setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeNotification);
+            requestFullWake( kFullWakeReasonDisplayOn );
+        }
+    }
+    else
+    {
+        // Relenquish desire to power up display.
+        // Must first transition to state 1 since wrangler doesn't
+        // power off the displays at state 0. At state 0 the root
+        // domain is removed from the wrangler's power client list.
+
+        wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin + 1);
+        wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin);
+
+    }
+
+}
+
 //******************************************************************************
 // dispatchPowerEvent
 //
@@ -5919,30 +6243,13 @@ void IOPMrootDomain::dispatchPowerEvent(
             if (!wrangler) break;
             if (arg1 != 0)
             {
-                // Force wrangler to max power state. If system is in dark wake
-                // this alone won't raise the wrangler's power state.
-
-                wrangler->changePowerStateForRootDomain(kWranglerPowerStateMax);
-
-                // System in dark wake, always requesting full wake should
-                // not have any bad side-effects, even if the request fails.
-
-                if (!CAP_CURRENT(kIOPMSystemCapabilityGraphics))
-                {
-                    setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeNotification);
-                    requestFullWake( kFullWakeReasonDisplayOn );
-                }
+                displayPowerOnRequested = true;
             }
             else
             {
-                // Relenquish desire to power up display.
-                // Must first transition to state 1 since wrangler doesn't
-                // power off the displays at state 0. At state 0 the root
-                // domain is removed from the wrangler's power client list.
-
-                wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin + 1);
-                wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin);
+                displayPowerOnRequested = false;
             }
+            handleDisplayPowerOn();
             break;
     }
 }
@@ -6086,15 +6393,13 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg )
     }
 
     /*
-     * Sleep if system is in dark wake
+     * Forward DW thermal notification to client, if system is not going to sleep
      */
-    if (msg & kIOPMDWOverTemp)
+    if ((msg & kIOPMDWOverTemp) && (_systemTransitionType != kSystemTransitionSleep))
     {
         DLOG("DarkWake thermal limits message received!\n");
 
-        // Inform cap client that we're going to sleep
         messageClients(kIOPMMessageDarkWakeThermalEmergency);
-
     }
 
     /*
@@ -6791,8 +7096,20 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I
         }
     }
 
-    if (changedBits & kIOPMDriverAssertionCPUBit)
+    if (changedBits & kIOPMDriverAssertionCPUBit) {
         evaluatePolicy(kStimulusDarkWakeEvaluate);
+        if (!assertOnWakeSecs && systemWakeTime) {
+                AbsoluteTime    now;
+                clock_usec_t    microsecs;
+                clock_get_uptime(&now);
+                SUB_ABSOLUTETIME(&now, &systemWakeTime);
+                absolutetime_to_microtime(now, &assertOnWakeSecs, &microsecs);
+                if (assertOnWakeReport)  {
+                    HISTREPORT_TALLYVALUE(assertOnWakeReport, (int64_t)assertOnWakeSecs);
+                    DLOG("Updated assertOnWake %lu\n", (unsigned long)assertOnWakeSecs);
+                }
+        }
+    }
 
     if (changedBits & kIOPMDriverAssertionReservedBit7) {
         bool value = (newAssertions & kIOPMDriverAssertionReservedBit7) ? true : false;
@@ -6935,7 +7252,7 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse(
         if (response->isEqualTo(gIOPMStatsDriverPSChangeSlow)) {
             powerCaps = OSNumber::withNumber(powerState, 32);
 
-#if !defined(__i386__) && !defined(__x86_64__)
+#if !defined(__i386__) && !defined(__x86_64__) && (DEVELOPMENT || DEBUG)
             IOLog("%s::powerStateChange type(%d) to(%lu) async took %d ms\n",
                   name, messageType,
                   powerState, delay_ms);
@@ -7065,26 +7382,137 @@ void IOPMrootDomain::traceDetail( uint32_t detail )
 }
 
 
+void IOPMrootDomain::configureReportGated(uint64_t channel_id, uint64_t action, void *result)
+{
+    size_t      reportSize;
+    void        **report = NULL;
+    uint32_t    bktCnt;
+    uint32_t    bktSize;
+    uint32_t    *clientCnt;
+
+    ASSERT_GATED();
+
+    report = NULL;
+    if (channel_id == kAssertDelayChID) {
+        report = &assertOnWakeReport;
+        bktCnt = kAssertDelayBcktCnt;
+        bktSize = kAssertDelayBcktSize;
+        clientCnt = &assertOnWakeClientCnt;
+    }
+    else if (channel_id == kSleepDelaysChID) {
+        report = &sleepDelaysReport;
+        bktCnt = kSleepDelaysBcktCnt;
+        bktSize = kSleepDelaysBcktSize;
+        clientCnt = &sleepDelaysClientCnt;
+    }
+
+    switch (action)
+    {
+        case kIOReportEnable:
+
+            if (*report) {
+                (*clientCnt)++;
+                break;
+            }
+
+            reportSize = HISTREPORT_BUFSIZE(bktCnt);
+            *report = IOMalloc(reportSize);
+            if (*report == NULL) {
+                break;
+            }
+            bzero(*report, reportSize);
+            HISTREPORT_INIT(bktCnt, bktSize, *report, reportSize,
+                                 getRegistryEntryID(), channel_id,  kIOReportCategoryPower);
+
+            if (channel_id == kAssertDelayChID)
+                assertOnWakeSecs = 0;
+
+            break;
+
+        case kIOReportDisable:
+            if (*clientCnt == 0) {
+                break;
+            }
+            if (*clientCnt == 1)
+            {
+                IOFree(*report, HISTREPORT_BUFSIZE(bktCnt));
+                *report = NULL;
+            }
+            (*clientCnt)--;
+
+            if (channel_id == kAssertDelayChID)
+                assertOnWakeSecs = -1;  // Invalid value to prevent updates
+
+            break;
+
+        case kIOReportGetDimensions:
+            if (*report) {
+                HISTREPORT_UPDATERES(*report, kIOReportGetDimensions, result);
+            }
+            break;
+    }
+
+    return;
+}
+
 IOReturn IOPMrootDomain::configureReport(IOReportChannelList    *channelList,
                                     IOReportConfigureAction action,
                                     void                   *result,
                                     void                   *destination)
 {
     unsigned cnt;
-    if (action != kIOReportGetDimensions) goto exit;
+    uint64_t configAction = (uint64_t)action;
 
     for (cnt = 0; cnt < channelList->nchannels; cnt++) {
         if ( (channelList->channels[cnt].channel_id == kSleepCntChID) ||
                (channelList->channels[cnt].channel_id == kDarkWkCntChID) ||
                (channelList->channels[cnt].channel_id == kUserWkCntChID) ) {
+            if (action != kIOReportGetDimensions) continue;
             SIMPLEREPORT_UPDATERES(kIOReportGetDimensions, result);
         }
+        else if ((channelList->channels[cnt].channel_id == kAssertDelayChID) ||
+                 (channelList->channels[cnt].channel_id == kSleepDelaysChID)) {
+            gIOPMWorkLoop->runAction(
+                     OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::configureReportGated),
+                     (OSObject *)this, (void *)channelList->channels[cnt].channel_id,
+                     (void *)configAction, (void *)result);
+        }
     }
 
-exit:
     return super::configureReport(channelList, action, result, destination);
 }
 
+IOReturn IOPMrootDomain::updateReportGated(uint64_t ch_id, void *result, IOBufferMemoryDescriptor *dest)
+{
+
+    uint32_t    size2cpy;
+    void        *data2cpy;
+    void        **report;
+
+    ASSERT_GATED();
+
+    report = NULL;
+    if (ch_id == kAssertDelayChID) {
+        report = &assertOnWakeReport;
+    }
+    else if (ch_id == kSleepDelaysChID) {
+        report = &sleepDelaysReport;
+    }
+
+    if (*report == NULL) {
+        return kIOReturnNotOpen;
+    }
+
+    HISTREPORT_UPDATEPREP(*report, data2cpy, size2cpy);
+    if (size2cpy > (dest->getCapacity() - dest->getLength()) ) {
+        return kIOReturnOverrun;
+    }
+
+    HISTREPORT_UPDATERES(*report, kIOReportCopyChannelData, result);
+    dest->appendBytes(data2cpy, size2cpy);
+
+    return kIOReturnSuccess;
+}
 
 IOReturn IOPMrootDomain::updateReport(IOReportChannelList      *channelList,
                                  IOReportUpdateAction      action,
@@ -7103,7 +7531,15 @@ IOReturn IOPMrootDomain::updateReport(IOReportChannelList      *channelList,
     for (cnt = 0; cnt < channelList->nchannels; cnt++) {
         ch_id = channelList->channels[cnt].channel_id ;
 
-        if ((ch_id == kSleepCntChID) ||
+        if ((ch_id == kAssertDelayChID) || (ch_id == kSleepDelaysChID)) {
+            gIOPMWorkLoop->runAction(
+                     OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::updateReportGated),
+                     (OSObject *)this, (void *)ch_id,
+                     (void *)result, (void *)dest);
+            continue;
+
+        }
+        else if ((ch_id == kSleepCntChID) ||
                 (ch_id == kDarkWkCntChID) || (ch_id == kUserWkCntChID)) {
             SIMPLEREPORT_INIT(buf, sizeof(buf), getRegistryEntryID(), ch_id, kIOReportCategoryPower);
         }
@@ -7312,7 +7748,7 @@ void PMTraceWorker::tracePCIPowerChange(
 
 uint64_t  PMTraceWorker::getPMStatusCode( )
 {
-    return (((uint64_t)traceData32 << 32) | (tracePhase << 24) |
+    return (((uint64_t)traceData32 << 32) | ((uint64_t)tracePhase << 24) |
             (loginWindowPhase << 16) | (traceData8 << 8));
 
 }
@@ -7325,14 +7761,6 @@ uint64_t  PMTraceWorker::getPMStatusCode( )
 //
 //******************************************************************************
 
-static unsigned int     gPMHaltBusyCount;
-static unsigned int     gPMHaltIdleCount;
-static int              gPMHaltDepth;
-static unsigned long    gPMHaltEvent;
-static IOLock *         gPMHaltLock  = 0;
-static OSArray *        gPMHaltArray = 0;
-static const OSSymbol * gPMHaltClientAcknowledgeKey = 0;
-
 PMHaltWorker * PMHaltWorker::worker( void )
 {
     PMHaltWorker *  me;
@@ -7455,7 +7883,7 @@ void PMHaltWorker::work( PMHaltWorker * me )
             me->timeout   = false;
             IOLockUnlock(me->lock);
 
-            service->systemWillShutdown( gPMHaltEvent );
+            service->systemWillShutdown( gPMHaltMessageType );
 
             // Wait for driver acknowledgement
             IOLockLock(me->lock);
@@ -7472,10 +7900,10 @@ void PMHaltWorker::work( PMHaltWorker * me )
         if ((deltaTime > kPMHaltTimeoutMS) || timeout ||
             (gIOKitDebug & kIOLogPMRootDomain))
         {
-            LOG("%s driver %s (%p) took %u ms\n",
-                (gPMHaltEvent == kIOMessageSystemWillPowerOff) ?
+            LOG("%s driver %s (0x%llx) took %u ms\n",
+                (gPMHaltMessageType == kIOMessageSystemWillPowerOff) ?
                     "PowerOff" : "Restart",
-                service->getName(), OBFUSCATE(service),
+                service->getName(), service->getRegistryEntryID(),
                 (uint32_t) deltaTime );
         }
 
@@ -7506,7 +7934,7 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now )
         {
             me->timeout = true;
             MSG("%s still waiting on %s\n",
-                (gPMHaltEvent == kIOMessageSystemWillPowerOff) ?
+                (gPMHaltMessageType == kIOMessageSystemWillPowerOff) ?
                     "PowerOff" : "Restart",
                 me->service->getName());
         }
@@ -7514,7 +7942,6 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now )
     IOLockUnlock(me->lock);
 }
 
-
 //******************************************************************************
 // acknowledgeSystemWillShutdown
 //
@@ -7555,7 +7982,7 @@ void IOPMrootDomain::acknowledgeSystemWillShutdown( IOService * from )
 //******************************************************************************
 
 static void
-notifySystemShutdown( IOService * root, unsigned long event )
+notifySystemShutdown( IOService * root, uint32_t messageType )
 {
 #define PLACEHOLDER ((OSSet *)gPMHaltArray)
     IORegistryIterator *    iter;
@@ -7573,7 +8000,7 @@ notifySystemShutdown( IOService * root, unsigned long event )
     void *                  baseFunc;
     bool                    ok;
 
-    DLOG("%s event = %lx\n", __FUNCTION__, event);
+    DLOG("%s msgType = 0x%x\n", __FUNCTION__, messageType);
 
     baseFunc = OSMemberFunctionCast(void *, root, &IOService::systemWillShutdown);
 
@@ -7607,7 +8034,7 @@ notifySystemShutdown( IOService * root, unsigned long event )
         if (!gPMHaltClientAcknowledgeKey) goto done;
     }
 
-    gPMHaltEvent = event;
+    gPMHaltMessageType = messageType;
 
     // Depth-first walk of PM plane
 
@@ -7837,18 +8264,18 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const
     if (!strncmp(aKey, kIOPMSleepWakeWdogRebootKey,
                         sizeof(kIOPMSleepWakeWdogRebootKey))) {
         if (swd_flags & SWD_BOOT_BY_SW_WDOG)
-            return OSBoolean::withBoolean(true);
+            return kOSBooleanTrue;
         else
-            return OSBoolean::withBoolean(false);
+            return kOSBooleanFalse;
 
     }
 
     if (!strncmp(aKey, kIOPMSleepWakeWdogLogsValidKey,
                         sizeof(kIOPMSleepWakeWdogLogsValidKey))) {
         if (swd_flags & SWD_VALID_LOGS)
-            return OSBoolean::withBoolean(true);
+            return kOSBooleanTrue;
         else
-            return OSBoolean::withBoolean(false);
+            return kOSBooleanFalse;
 
     }
 
@@ -7859,16 +8286,16 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const
      */
     if (!strcmp(aKey, "DesktopMode")) {
         if (desktopMode)
-            return OSBoolean::withBoolean(true);
+            return kOSBooleanTrue;
         else
-            return OSBoolean::withBoolean(false);
+            return kOSBooleanFalse;
     }
     if (!strcmp(aKey, "DisplayIdleForDemandSleep")) {
         if (displayIdleForDemandSleep) {
-            return OSBoolean::withBoolean(true);
+            return kOSBooleanTrue;
         }
         else  {
-            return OSBoolean::withBoolean(false);
+            return kOSBooleanFalse;
         }
     }
 
@@ -7876,8 +8303,12 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const
     {
         OSArray * array = 0;
         WAKEEVENT_LOCK();
-        if (_systemWakeEventsArray && _systemWakeEventsArray->getCount())
-            array = OSArray::withArray(_systemWakeEventsArray);
+        if (_systemWakeEventsArray && _systemWakeEventsArray->getCount()) {
+            OSCollection *collection = _systemWakeEventsArray->copyCollection();
+            if (collection && !(array = OSDynamicCast(OSArray, collection))) {
+                collection->release();
+            }
+        }
         WAKEEVENT_UNLOCK();
         return array;
     }
@@ -7887,13 +8318,30 @@ OSObject * IOPMrootDomain::copyProperty( const char * aKey) const
         OSArray * array = 0;
         IOLockLock(pmStatsLock);
         if (pmStatsAppResponses && pmStatsAppResponses->getCount()) {
-            array = OSArray::withArray(pmStatsAppResponses);
+            OSCollection *collection = pmStatsAppResponses->copyCollection();
+            if (collection && !(array = OSDynamicCast(OSArray, collection))) {
+                collection->release();
+            }
             pmStatsAppResponses->flushCollection();
         }
         IOLockUnlock(pmStatsLock);
         return array;
     }
 
+    if (!strcmp(aKey, kIOPMIdleSleepPreventersKey))
+    {
+        OSArray *idleSleepList = NULL;
+        gRootDomain->copySleepPreventersList(&idleSleepList, NULL);
+        return idleSleepList;
+    }
+
+    if (!strcmp(aKey, kIOPMSystemSleepPreventersKey))
+    {
+        OSArray *systemSleepList = NULL;
+        gRootDomain->copySleepPreventersList(NULL, &systemSleepList);
+        return systemSleepList;
+    }
+
     return NULL;
 }
 
@@ -8618,17 +9066,17 @@ IOReturn IOPMrootDomain::restartWithStackshot()
     if ((swd_flags & SWD_WDOG_ENABLED) == 0)
         return kIOReturnError;
 
-    takeStackshot(true, true);
+    takeStackshot(true, true, false);
 
     return kIOReturnSuccess;
 }
 
 void IOPMrootDomain::sleepWakeDebugTrig(bool wdogTrigger)
 {
-    takeStackshot(wdogTrigger, false);
+    takeStackshot(wdogTrigger, false, false);
 }
 
-void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
+void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog, bool isSpinDump)
 {
    swd_hdr *         hdr = NULL;
    addr64_t          data[3];
@@ -8646,8 +9094,18 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
    IOMemoryMap *     logBufMap = NULL;
 
    swd_stackshot_hdr *stackshotHdr = NULL;
-   if ( kIOSleepWakeWdogOff & gIOKitDebug )
-      return;
+
+   uint32_t          bufSize;
+   uint32_t          initialStackSize;
+
+   if (isSpinDump) {
+       if (_systemTransitionType != kSystemTransitionSleep &&
+           _systemTransitionType != kSystemTransitionWake)
+           return;
+   } else {
+       if ( kIOSleepWakeWdogOff & gIOKitDebug )
+           return;
+   }
 
    if (wdogTrigger) {
        if (PE_parse_boot_argn("swd_panic", &wdog_panic, sizeof(wdog_panic)) &&
@@ -8659,8 +9117,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
        else if (swd_flags & SWD_BOOT_BY_SW_WDOG) {
            // If current boot is due to this watch dog trigger restart in previous boot,
            // then don't trigger again until at least 1 successful sleep & wake.
-           sleepCnt = displayWakeCnt = 1;
-           if (!(sleepCnt && displayWakeCnt)) {
+           if (!(sleepCnt && (displayWakeCnt || darkWakeCnt))) {
                IOLog("Shutting down due to repeated Sleep/Wake failures\n");
                PEHaltRestart(kPEHaltCPU);
                return;
@@ -8669,19 +9126,36 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
 
    }
 
-   if (sleepWakeDebugIsWdogEnabled() == false)
-       return;
+   if (isSpinDump) {
+      if (gSpinDumpBufferFull)
+         return;
+      if (swd_spindump_buffer == NULL) {
+         sleepWakeDebugSpinDumpMemAlloc();
+         if (swd_spindump_buffer == NULL) return;
+      }
+
+      bufSize = SWD_SPINDUMP_SIZE;
+      initialStackSize = SWD_INITIAL_SPINDUMP_SIZE;
+   } else {
+      if (sleepWakeDebugIsWdogEnabled() == false)
+         return;
 
-   if (swd_buffer == NULL) {
-      sleepWakeDebugMemAlloc();
-      if (swd_buffer == NULL) return;
+      if (swd_buffer == NULL) {
+         sleepWakeDebugMemAlloc();
+         if (swd_buffer == NULL) return;
+      }
+
+      bufSize = SWD_BUF_SIZE;
+      initialStackSize = SWD_INITIAL_STACK_SIZE;
    }
 
    if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock))
        return;
 
-
-   hdr = (swd_hdr *)swd_buffer;
+   if (isSpinDump)
+      hdr = (swd_hdr *)swd_spindump_buffer;
+   else
+      hdr = (swd_hdr *)swd_buffer;
    memset(hdr->UUID, 0x20, sizeof(hdr->UUID));
    if ((UUIDstring = OSDynamicCast(OSString, getProperty(kIOPMSleepWakeUUIDKey))) != NULL ) {
 
@@ -8696,7 +9170,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
    }
 
    dstAddr = (char*)hdr + hdr->spindump_offset;
-   bytesRemaining = SWD_BUF_SIZE - hdr->spindump_offset;
+   bytesRemaining = bufSize - hdr->spindump_offset;
 
    /* if AppleOSXWatchdog triggered the stackshot, set the flag in the heaer */
    hdr->is_osx_watchdog = isOSXWatchdog;
@@ -8722,7 +9196,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
             * to SWD_INITIAL_STACK_SIZE
             */
            pid = -1;
-           size = (bytesRemaining > SWD_INITIAL_STACK_SIZE) ? SWD_INITIAL_STACK_SIZE : bytesRemaining;
+           size = (bytesRemaining > initialStackSize) ? initialStackSize : bytesRemaining;
            flags = STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO|STACKSHOT_SAVE_KERNEL_FRAMES_ONLY;
        }
        else {
@@ -8743,7 +9217,7 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
        IOSleep(10); // 10 ms
    }
 
-   hdr->spindump_size = (SWD_BUF_SIZE - bytesRemaining - hdr->spindump_offset);
+   hdr->spindump_size = (bufSize - bytesRemaining - hdr->spindump_offset);
 
 
    memset(hdr->cps, 0x20, sizeof(hdr->cps));
@@ -8753,6 +9227,12 @@ void IOPMrootDomain::takeStackshot(bool wdogTrigger, bool isOSXWatchdog)
    snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "\nCode: %08x %08x",
            (uint32_t)((code >> 32) & 0xffffffff), (uint32_t)(code & 0xffffffff));
    memset(hdr->reason, 0x20, sizeof(hdr->reason));
+   if (isSpinDump) {
+      snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: PSC Delay\n\n");
+      gRootDomain->swd_lock = 0;
+      gSpinDumpBufferFull = true;
+      return;
+   }
    snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: Watchdog\n\n");
 
 
@@ -8834,6 +9314,41 @@ exit:
     gRootDomain->swd_lock = 0;
 }
 
+void IOPMrootDomain::sleepWakeDebugSpinDumpMemAlloc( )
+{
+    vm_size_t    size = SWD_SPINDUMP_SIZE;
+
+    swd_hdr      *hdr = NULL;
+
+    IOBufferMemoryDescriptor  *memDesc = NULL;
+
+    if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock))
+        return;
+
+    memDesc = IOBufferMemoryDescriptor::inTaskWithOptions(
+                            kernel_task, kIODirectionIn|kIOMemoryMapperNone,
+                            SWD_SPINDUMP_SIZE);
+
+    if (memDesc == NULL)
+    {
+        DLOG("Failed to allocate Memory descriptor for sleepWake debug spindump\n");
+        goto exit;
+    }
+
+
+    hdr = (swd_hdr *)memDesc->getBytesNoCopy();
+    memset(hdr, 0, sizeof(swd_hdr));
+
+    hdr->signature = SWD_HDR_SIGNATURE;
+    hdr->alloc_size = size;
+
+    hdr->spindump_offset = sizeof(swd_hdr);
+    swd_spindump_buffer = (void *)hdr;
+
+exit:
+    gRootDomain->swd_lock = 0;
+}
+
 void IOPMrootDomain::sleepWakeDebugEnableWdog()
 {
     swd_flags |= SWD_WDOG_ENABLED;
@@ -8844,7 +9359,28 @@ void IOPMrootDomain::sleepWakeDebugEnableWdog()
 bool IOPMrootDomain::sleepWakeDebugIsWdogEnabled()
 {
     return ((swd_flags & SWD_WDOG_ENABLED) &&
-            !systemBooting && !systemShutdown);
+            !systemBooting && !systemShutdown && !gWillShutdown);
+}
+
+void IOPMrootDomain::sleepWakeDebugSaveSpinDumpFile()
+{
+    swd_hdr *hdr = NULL;
+    errno_t error = EIO;
+
+    if (swd_spindump_buffer && gSpinDumpBufferFull) {
+        hdr = (swd_hdr *)swd_spindump_buffer;
+
+        error = sleepWakeDebugSaveFile("/var/tmp/SleepWakeDelayStacks.dump",
+                        (char*)hdr+hdr->spindump_offset, hdr->spindump_size);
+
+        if (error) return;
+
+        sleepWakeDebugSaveFile("/var/tmp/SleepWakeDelayLog.dump",
+                         (char*)hdr+offsetof(swd_hdr, UUID),
+                         sizeof(swd_hdr)-offsetof(swd_hdr, UUID));
+
+        gSpinDumpBufferFull = false;
+    }
 }
 
 errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int len)
@@ -8929,7 +9465,7 @@ errno_t IOPMrootDomain::sleepWakeDebugCopyFile(
         bytesToRead = (round_page(numBytes) > tmpBufSize) ? tmpBufSize : round_page(numBytes);
         readFileOffset = trunc_page(srcOffset);
 
-       DLOG("Read file (numBytes:0x%llx)\n", bytesToRead);
+       DLOG("Read file (numBytes:0x%llx offset:0x%llx)\n", bytesToRead, readFileOffset);
        error = vn_rdwr(UIO_READ, srcVp, tmpBuf, bytesToRead, readFileOffset,
                UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, 
                vfs_context_ucred(srcCtx), (int *) 0,
@@ -8946,6 +9482,7 @@ errno_t IOPMrootDomain::sleepWakeDebugCopyFile(
        if (crc) {
            newcrc = crc32(newcrc, (void *)srcDataOffset, bytesToWrite);
        }
+       DLOG("Write file (numBytes:0x%llx offset:0x%llx)\n", bytesToWrite, writeFileOffset);
        error = vn_rdwr(UIO_WRITE, vp, (char *)srcDataOffset, bytesToWrite, writeFileOffset,
                UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, 
                vfs_context_ucred(ctx), (int *) 0,
@@ -8988,11 +9525,70 @@ exit:
 
 
 
+}
+void IOPMrootDomain::checkForValidDebugData(const char *fname, vfs_context_t *ctx, 
+                                            void *tmpBuf, struct vnode **vp)
+{
+    int             rc;
+    uint64_t        hdrOffset;
+
+    struct vnode_attr           va;
+    IOHibernateImageHeader      *imageHdr;
+
+    *vp = NULL;
+    if (vnode_open(fname, (FREAD | O_NOFOLLOW), 0,
+                   VNODE_LOOKUP_NOFOLLOW, vp, *ctx) != 0) 
+    {
+        DMSG("sleepWakeDebugDumpFromFile: Failed to open the file %s\n", fname);
+        goto err;
+    }
+    VATTR_INIT(&va);
+    VATTR_WANTED(&va, va_nlink);
+    VATTR_WANTED(&va, va_data_alloc);
+    if ((*vp)->v_type != VREG ||
+        vnode_getattr((*vp), &va, *ctx) || va.va_nlink != 1) {
+        DMSG("sleepWakeDebugDumpFromFile: Bailing as %s is not a regular file\n", fname);
+        goto err;
+    }
+
+    /* Read the sleepimage file header */
+    rc = vn_rdwr(UIO_READ, *vp, (char *)tmpBuf, round_page(sizeof(IOHibernateImageHeader)), 0,
+                UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, 
+                vfs_context_ucred(*ctx), (int *) 0,
+                vfs_context_proc(*ctx));
+    if (rc != 0) {
+        DMSG("sleepWakeDebugDumpFromFile: Failed to read header size %lu(rc=%d) from %s\n", 
+             round_page(sizeof(IOHibernateImageHeader)), rc, fname);
+        goto err;
+    }
+
+    imageHdr = ((IOHibernateImageHeader *)tmpBuf);
+    if (imageHdr->signature != kIOHibernateHeaderDebugDataSignature) {
+        DMSG("sleepWakeDebugDumpFromFile: File %s header has unexpected value 0x%x\n", 
+             fname, imageHdr->signature);
+        goto err;
+    }
+
+    /* Sleep/Wake debug header(swd_hdr) is at the beggining of the second block */
+    hdrOffset = imageHdr->deviceBlockSize;
+    if (hdrOffset + sizeof(swd_hdr) >= va.va_data_alloc) {
+        DMSG("sleepWakeDebugDumpFromFile: header is crossing file size(0x%llx) in file %s\n",  
+             va.va_data_alloc, fname);
+        goto err;
+    }
+
+    return; 
+
+err:
+    if (*vp) vnode_close(*vp, FREAD, *ctx);
+    *vp = NULL;
+
+    return;
 }
 
 void IOPMrootDomain::sleepWakeDebugDumpFromFile( )
 {
-
+#if HIBERNATION
     int             rc;
     char                       hibernateFilename[MAXPATHLEN+1];
     char            PMStatusCode[100];
@@ -9008,9 +9604,7 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( )
     struct vnode    *vp = NULL;
     vfs_context_t   ctx = NULL;
 
-    struct vnode_attr           va;
     IOBufferMemoryDescriptor    *tmpBufDesc = NULL;
-    IOHibernateImageHeader      *imageHdr;
 
     DLOG("sleepWakeDebugDumpFromFile\n");
     if ((swd_flags & SWD_LOGS_IN_FILE) == 0)
@@ -9020,20 +9614,6 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( )
        return;
 
 
-    hibernateFilename[0] = 0;
-    if ((obj = copyProperty(kIOHibernateFileKey)))
-    {
-        if ((str = OSDynamicCast(OSString, obj)))
-            strlcpy(hibernateFilename, str->getCStringNoCopy(),
-                    sizeof(hibernateFilename));
-        obj->release();
-    }
-    if (!hibernateFilename[0]) {
-        DMSG("sleepWakeDebugDumpFromFile: Failed to hib file name\n");
-        goto exit;
-    }
-    DLOG("sleepWakeDebugDumpFromFile: Hib file name %s\n", hibernateFilename);
-
     /* Allocate a temp buffer to copy data between files */
     tmpBufSize = 2*4096;
     tmpBufDesc = IOBufferMemoryDescriptor::
@@ -9048,44 +9628,37 @@ void IOPMrootDomain::sleepWakeDebugDumpFromFile( )
     tmpBuf = tmpBufDesc->getBytesNoCopy();
 
    ctx = vfs_context_create(vfs_context_current());
-    if (vnode_open(hibernateFilename, (FREAD | O_NOFOLLOW), 0,
-                   VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) 
-    {
-        DMSG("sleepWakeDebugDumpFromFile: Failed to open the hibernate file %s\n", hibernateFilename);
-        goto exit;
-    }
-    VATTR_INIT(&va);
-    VATTR_WANTED(&va, va_nlink);
-    VATTR_WANTED(&va, va_data_alloc);
-    if (vp->v_type != VREG ||
-        vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) {
-        DMSG("sleepWakeDebugDumpFromFile: Bailing as this is not a regular file\n");
-        goto exit;
-    }
 
-    /* Read the sleepimage file header */
-    rc = vn_rdwr(UIO_READ, vp, (char *)tmpBuf, round_page(sizeof(IOHibernateImageHeader)), 0,
-                UIO_SYSSPACE, IO_SKIP_ENCRYPTION|IO_SYNC|IO_NODELOCKED|IO_UNIT|IO_NOCACHE, 
-                vfs_context_ucred(ctx), (int *) 0,
-                vfs_context_proc(ctx));
-    if (rc != 0) {
-        DMSG("sleepWakeDebugDumpFromFile: Failed to read header size %lu(rc=%d)\n", round_page(sizeof(IOHibernateImageHeader)), rc);
-        goto exit;
-    }
+    /* First check if 'kSleepWakeStackBinFilename' has valid data */
+    checkForValidDebugData(kSleepWakeStackBinFilename, &ctx, tmpBuf, &vp);
+    if (vp == NULL) {
+        /* Check if the debug data is saved to hibernation file */
+        hibernateFilename[0] = 0;
+        if ((obj = copyProperty(kIOHibernateFileKey)))
+        {
+            if ((str = OSDynamicCast(OSString, obj)))
+                strlcpy(hibernateFilename, str->getCStringNoCopy(),
+                        sizeof(hibernateFilename));
+            obj->release();
+        }
+        if (!hibernateFilename[0]) {
+            DMSG("sleepWakeDebugDumpFromFile: Failed to get hibernation file name\n");
+            goto exit;
+        }
 
-    imageHdr = ((IOHibernateImageHeader *)tmpBuf);
-    if (imageHdr->signature != kIOHibernateHeaderDebugDataSignature) {
-        DMSG("sleepWakeDebugDumpFromFile: File header has unexpected value 0x%x\n", imageHdr->signature);
-        goto exit;
+        checkForValidDebugData(hibernateFilename, &ctx, tmpBuf, &vp);
+        if (vp == NULL) {
+            DMSG("sleepWakeDebugDumpFromFile: No valid debug data is found\n");
+            goto exit;
+        }
+        DLOG("Getting SW Stacks image from file %s\n", hibernateFilename);
     }
-
-    /* Sleep/Wake debug header(swd_hdr) is at the beggining of the second block */
-    hdrOffset = imageHdr->deviceBlockSize;
-    if (hdrOffset + sizeof(swd_hdr) >= va.va_data_alloc) {
-        DMSG("sleepWakeDebugDumpFromFile: header is crossing file size(0x%llx)\n",  va.va_data_alloc);
-        goto exit;
+    else {
+        DLOG("Getting SW Stacks image from file %s\n", kSleepWakeStackBinFilename);
     }
 
+    hdrOffset = ((IOHibernateImageHeader *)tmpBuf)->deviceBlockSize;
+
     DLOG("Reading swd_hdr len 0x%lx offset 0x%lx\n", round_page(sizeof(swd_hdr)), trunc_page(hdrOffset));
     /* Read the sleep/wake debug header(swd_hdr) */
     rc = vn_rdwr(UIO_READ, vp, (char *)tmpBuf, round_page(sizeof(swd_hdr)), trunc_page(hdrOffset),
@@ -9147,7 +9720,7 @@ exit:
     if (vp) vnode_close(vp, FREAD, ctx);
     if (ctx) vfs_context_rele(ctx);
     if (tmpBufDesc) tmpBufDesc->release();
-
+#endif /* HIBERNATION */
 }
 
 void IOPMrootDomain::sleepWakeDebugDumpFromMem(IOMemoryMap *logBufMap)
@@ -9340,7 +9913,7 @@ void IOPMrootDomain::sleepWakeDebugTrig(bool restart)
 {
 }
 
-void IOPMrootDomain::takeStackshot(bool restart, bool isOSXWatchdog)
+void IOPMrootDomain::takeStackshot(bool restart, bool isOSXWatchdog, bool isSpinDump)
 {
 #pragma unused(restart)
 #pragma unused(isOSXWatchdog)
index 8bcc9342e733535515fb98c6b950e7a3e7e0a592..31ab8b7007f457778c98580577fb745fac7e3ca3 100644 (file)
@@ -39,6 +39,7 @@
 #include <IOKit/IOKitKeys.h>
 #include <IOKit/IOTimeStamp.h>
 #include <IOKit/IOUserClient.h>
+#include <IOKit/IOKitDiagnosticsUserClient.h>
 
 #include <IOKit/system.h>
 
@@ -819,6 +820,10 @@ int PEHaltRestart(unsigned int type)
        replies.
      */
    }
+   else if(type == kPEPanicRestartCPU || type == kPEPanicSync)
+   {
+    IOCPURunPlatformPanicActions(type);
+   }
 
   if (gIOPlatform) return gIOPlatform->haltRestart(type);
   else return -1;
@@ -901,6 +906,38 @@ err:
     return FALSE;
 }
 
+boolean_t
+PEWriteNVRAMBooleanProperty(const char *symbol, boolean_t value)
+{
+       const OSSymbol *sym = NULL;
+       OSBoolean *data = NULL;
+       bool ret = false;
+
+       if (symbol == NULL) {
+               goto exit;
+       }
+
+       if (init_gIOOptionsEntry() < 0) {
+               goto exit;
+       }
+
+       if ((sym = OSSymbol::withCStringNoCopy(symbol)) == NULL) {
+               goto exit;
+       }
+
+       data  = value ? kOSBooleanTrue : kOSBooleanFalse;
+       ret = gIOOptionsEntry->setProperty(sym, data);
+
+       sym->release();
+
+       /* success, force the NVRAM to flush writes */
+       if (ret == true) {
+               gIOOptionsEntry->sync();
+       }
+
+exit:
+       return ret;
+}
 
 boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, 
                                const unsigned int len)
@@ -1188,6 +1225,7 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * rootEntry )
         } else {
          dtNVRAM->attach(this);
          dtNVRAM->registerService();
+         options->release();
        }
       }
     }
@@ -1195,7 +1233,10 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * rootEntry )
     // Publish the cpus.
     cpus = rootEntry->childFromPath( "cpus", gIODTPlane);
     if ( cpus)
+    {
       createNubs( this, IODTFindMatchingEntries( cpus, kIODTExclusive, 0));
+      cpus->release();
+    }
 
     // publish top level, minus excludeList
     createNubs( this, IODTFindMatchingEntries( rootEntry, kIODTExclusive, excludeList()));
@@ -1476,6 +1517,40 @@ IOReturn IOPlatformExpertDevice::setProperties( OSObject * properties )
     return kIOReturnUnsupported;
 }
 
+IOReturn IOPlatformExpertDevice::newUserClient( task_t owningTask, void * securityID,
+                                    UInt32 type,  OSDictionary * properties,
+                                    IOUserClient ** handler )
+{
+    IOReturn            err = kIOReturnSuccess;
+    IOUserClient *      newConnect = 0;
+    IOUserClient *      theConnect = 0;
+
+    switch (type)
+    {
+        case kIOKitDiagnosticsClientType:
+           newConnect = IOKitDiagnosticsClient::withTask(owningTask);
+           if (!newConnect) err = kIOReturnNotPermitted;
+            break;
+        default:
+            err = kIOReturnBadArgument;
+    }
+
+    if (newConnect)
+    {
+        if ((false == newConnect->attach(this))
+                || (false == newConnect->start(this)))
+        {
+            newConnect->detach( this );
+            newConnect->release();
+        }
+        else
+            theConnect = newConnect;
+    }
+
+    *handler = theConnect;
+    return (err);
+}
+
 void IOPlatformExpertDevice::free()
 {
     if (workLoop)
@@ -1526,7 +1601,7 @@ class IOPanicPlatform : IOPlatformExpert {
     OSDeclareDefaultStructors(IOPanicPlatform);
 
 public:
-    bool start(IOService * provider);
+    bool start(IOService * provider) APPLE_KEXT_OVERRIDE;
 };
 
 
index 440ac32356e0c06d704a7fd79a8b3800fb0583d9..1917714fcfe0e8906dd7660ec746999f0905120c 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+#include <sys/uio.h>
+#include <sys/conf.h>
+
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
 #include <IOKit/IOService.h>
+#include <IOKit/IOPlatformExpert.h>
 #include <IOKit/IOPolledInterface.h>
+#include <IOKit/IOHibernatePrivate.h>
+#include <IOKit/IOBufferMemoryDescriptor.h>
+#include <IOKit/AppleKeyStoreInterface.h>
+#include "IOKitKernelInternal.h"
+
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -50,13 +61,929 @@ OSMetaClassDefineReservedUnused(IOPolledInterface, 13);
 OSMetaClassDefineReservedUnused(IOPolledInterface, 14);
 OSMetaClassDefineReservedUnused(IOPolledInterface, 15);
 
-#if !HIBERNATION
-/* KPI stub if hibernate is configured off */
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#ifndef kIOMediaPreferredBlockSizeKey
+#define kIOMediaPreferredBlockSizeKey  "Preferred Block Size"
+#endif
+
+enum { kDefaultIOSize = 128*1024 };
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+class IOPolledFilePollers : public OSObject
+{
+    OSDeclareDefaultStructors(IOPolledFilePollers)
+
+public:
+    IOService                * media;
+    OSArray                  * pollers;
+    IOBufferMemoryDescriptor * ioBuffer;
+    bool                 abortable;
+    bool                 io;
+    IOReturn            ioStatus;
+    uint32_t             openCount;
+    uint32_t             openState;
+
+    static IOPolledFilePollers * copyPollers(IOService * media);
+};
+
+OSDefineMetaClassAndStructors(IOPolledFilePollers, OSObject)
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOPolledFilePollers *
+IOPolledFilePollers::copyPollers(IOService * media)
+{
+    IOPolledFilePollers * vars;
+    IOReturn              err;
+    IOService       * service;
+    OSObject        * obj;
+    IORegistryEntry * next;
+    IORegistryEntry * child;
+
+    if ((obj = media->copyProperty(kIOPolledInterfaceStackKey)))
+    {
+       return (OSDynamicCast(IOPolledFilePollers, obj));
+    }
+
+    do
+    {
+       vars = OSTypeAlloc(IOPolledFilePollers);
+       vars->init();
+
+       vars->pollers = OSArray::withCapacity(4);
+       if (!vars->pollers)
+       {
+           err = kIOReturnNoMemory;
+           break;
+       }
+
+       next = vars->media = media;
+       do
+       {
+           IOPolledInterface * poller;
+           OSObject *          obj;
+
+           obj = next->getProperty(kIOPolledInterfaceSupportKey);
+           if (kOSBooleanFalse == obj)
+           {
+               vars->pollers->flushCollection();
+               break;
+           }
+           else if ((poller = OSDynamicCast(IOPolledInterface, obj)))
+               vars->pollers->setObject(poller);
+
+           if ((service = OSDynamicCast(IOService, next)) 
+               && service->getDeviceMemory()
+               && !vars->pollers->getCount())  break;
+
+           child = next;
+       }
+       while ((next = child->getParentEntry(gIOServicePlane)) 
+              && child->isParent(next, gIOServicePlane, true));
+
+       if (!vars->pollers->getCount())
+       {
+           err = kIOReturnUnsupported;
+           break;
+       }
+    }
+    while (false);
+
+    media->setProperty(kIOPolledInterfaceStackKey, vars);
+
+    return (vars);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static IOReturn 
+IOPolledFilePollersIODone(IOPolledFilePollers * vars, bool abortable);
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static IOReturn
+IOPolledFilePollersProbe(IOPolledFilePollers * vars)
+{
+    IOReturn            err = kIOReturnError;
+    int32_t            idx;
+    IOPolledInterface * poller;
+
+    for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--)
+    {
+        poller = (IOPolledInterface *) vars->pollers->getObject(idx);
+        err = poller->probe(vars->media);
+        if (err)
+        {
+            HIBLOG("IOPolledInterface::probe[%d] 0x%x\n", idx, err);
+            break;
+        }
+    }
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFilePollersOpen(IOPolledFileIOVars * filevars, uint32_t state, bool abortable)
+{
+
+    IOPolledFilePollers      * vars = filevars->pollers;
+    IOBufferMemoryDescriptor * ioBuffer;
+    IOPolledInterface        * poller;
+    IOService                * next;
+    IOReturn                   err = kIOReturnError;
+    int32_t                   idx;
+
+    vars->abortable = abortable;
+    ioBuffer = 0;
+
+    if (kIOPolledAfterSleepState == state)
+    {
+        vars->ioStatus = 0;
+       vars->io = false;
+    }
+    (void) IOPolledFilePollersIODone(vars, false);
+
+    if ((kIOPolledPreflightState == state) || (kIOPolledPreflightCoreDumpState == state))
+    {
+        ioBuffer = vars->ioBuffer;
+        if (!ioBuffer)
+        {
+           vars->ioBuffer = ioBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionInOut, 
+                                                           2 * kDefaultIOSize, page_size);
+           if (!ioBuffer) return (kIOReturnNoMemory);
+        }
+    }
+
+    for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--)
+    {
+        poller = (IOPolledInterface *) vars->pollers->getObject(idx);
+        err = poller->open(state, ioBuffer);
+        if ((kIOReturnSuccess != err) && (kIOPolledPreflightCoreDumpState == state))
+        {
+           err = poller->open(kIOPolledPreflightState, ioBuffer);
+        }
+        if (kIOReturnSuccess != err)
+        {
+            HIBLOG("IOPolledInterface::open[%d] 0x%x\n", idx, err);
+            break;
+        }
+    }
+    if (kIOReturnSuccess == err)
+    {
+        next = vars->media;
+       while (next)
+       {
+           next->setProperty(kIOPolledInterfaceActiveKey, kOSBooleanTrue);
+           next = next->getProvider();
+       }
+    }
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFilePollersClose(IOPolledFileIOVars * filevars, uint32_t state)
+{
+    IOPolledFilePollers * vars = filevars->pollers;
+    IOPolledInterface * poller;
+    IORegistryEntry *   next;
+    IOReturn            err;
+    int32_t            idx;
+
+    (void) IOPolledFilePollersIODone(vars, false);
+
+    if (kIOPolledPostflightState == state)
+    {
+       vars->openCount--;
+       if (vars->openCount) 
+       {
+           // 21207427
+            IOPolledFilePollersOpen(filevars, vars->openState, vars->abortable);
+           return (kIOReturnSuccess);
+       }
+    }
+
+    for (idx = 0, err = kIOReturnSuccess;
+         (poller = (IOPolledInterface *) vars->pollers->getObject(idx));
+         idx++)
+    {
+        err = poller->close(state);
+        if (err) HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err);
+    }
+
+    if (kIOPolledPostflightState == state)
+    {   
+       next = vars->media;
+       while (next)
+       {
+           next->removeProperty(kIOPolledInterfaceActiveKey);
+           next = next->getParentEntry(gIOServicePlane);
+       }
+
+       if (vars->ioBuffer)
+       {
+           vars->ioBuffer->release();
+           vars->ioBuffer = 0;
+       }
+    }
+    return (err);
+}
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOMemoryDescriptor *
+IOPolledFileGetIOBuffer(IOPolledFileIOVars * vars)
+{
+    return (vars->pollers->ioBuffer);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static void
+IOPolledIOComplete(void *   target,
+                  void *   parameter,
+                  IOReturn status,
+                  UInt64   actualByteCount)
+{
+    IOPolledFilePollers * vars = (IOPolledFilePollers *) parameter;
+
+    vars->ioStatus = status;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static IOReturn
+IOStartPolledIO(IOPolledFilePollers * vars, 
+                    uint32_t operation, uint32_t bufferOffset, 
+                   uint64_t deviceOffset, uint64_t length)
+{
+    IOReturn            err;
+    IOPolledInterface * poller;
+    IOPolledCompletion  completion;
+
+    err = vars->ioStatus;
+    if (kIOReturnSuccess != err) return (err);
+
+    completion.target    = 0;
+    completion.action    = &IOPolledIOComplete;
+    completion.parameter = vars;
+
+    vars->ioStatus = -1;
+
+    poller = (IOPolledInterface *) vars->pollers->getObject(0);
+    err = poller->startIO(operation, bufferOffset, deviceOffset, length, completion);
+    if (err)
+        HIBLOG("IOPolledInterface::startIO[%d] 0x%x\n", 0, err);
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static IOReturn
+IOPolledFilePollersIODone(IOPolledFilePollers * vars, bool abortable)
+{
+    IOReturn            err = kIOReturnSuccess;
+    int32_t            idx = 0;
+    IOPolledInterface * poller;
+    AbsoluteTime        deadline;
+
+    if (!vars->io) return (kIOReturnSuccess);
+
+    abortable &= vars->abortable;
+
+    clock_interval_to_deadline(2000, kMillisecondScale, &deadline);
+
+    while (-1 == vars->ioStatus)
+    {
+        for (idx = 0; 
+           (poller = (IOPolledInterface *) vars->pollers->getObject(idx));
+             idx++)
+        {
+           IOReturn newErr;
+            newErr = poller->checkForWork();
+           if ((newErr == kIOReturnAborted) && !abortable)
+               newErr = kIOReturnSuccess;
+           if (kIOReturnSuccess == err)
+               err = newErr;
+        }
+        if ((false) && (kIOReturnSuccess == err) && (mach_absolute_time() > AbsoluteTime_to_scalar(&deadline)))
+       {
+           HIBLOG("IOPolledInterface::forced timeout\n");
+           vars->ioStatus = kIOReturnTimeout;
+       }
+    }
+    vars->io = false;
+
+#if HIBERNATION
+    if ((kIOReturnSuccess == err) && abortable && hibernate_should_abort())
+    {
+        err = kIOReturnAborted;
+       HIBLOG("IOPolledInterface::checkForWork sw abort\n");
+    }
+#endif
+
+    if (err)
+    {
+       HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err);
+    }
+    else 
+    {
+       err = vars->ioStatus;
+       if (kIOReturnSuccess != err) HIBLOG("IOPolledInterface::ioStatus 0x%x\n", err);
+    }
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+struct _OpenFileContext
+{
+    OSData * extents;
+    uint64_t size;
+};
+
+static void
+file_extent_callback(void * ref, uint64_t start, uint64_t length)
+{
+    _OpenFileContext * ctx = (_OpenFileContext *) ref;
+    IOPolledFileExtent extent;
+
+    extent.start  = start;
+    extent.length = length;
+    ctx->extents->appendBytes(&extent, sizeof(extent));
+    ctx->size += length;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static IOService * 
+IOCopyMediaForDev(dev_t device)
+{
+    OSDictionary * matching;
+    OSNumber *     num;
+    OSIterator *   iter;
+    IOService *    result = 0;
+
+    matching = IOService::serviceMatching("IOMedia");
+    if (!matching)
+        return (0);
+    do
+    {
+        num = OSNumber::withNumber(major(device), 32);
+        if (!num)
+            break;
+        matching->setObject(kIOBSDMajorKey, num);
+        num->release();
+        num = OSNumber::withNumber(minor(device), 32);
+        if (!num)
+            break;
+        matching->setObject(kIOBSDMinorKey, num);
+        num->release();
+        if (!num)
+            break;
+        iter = IOService::getMatchingServices(matching);
+        if (iter)
+        {
+            result = (IOService *) iter->getNextObject();
+            result->retain();
+            iter->release();
+        }
+    }
+    while (false);
+    matching->release();
+
+    return (result);
+}
+
+static IOReturn 
+IOGetVolumeCryptKey(dev_t block_dev,  OSString ** pKeyUUID, 
+                   uint8_t * volumeCryptKey, size_t keySize)
+{
+    IOReturn         err;
+    IOService *      part;
+    OSString *       keyUUID = 0;
+    OSString *       keyStoreUUID = 0;
+    uuid_t           volumeKeyUUID;
+    aks_volume_key_t vek;
+
+    static IOService * sKeyStore;
+
+    part = IOCopyMediaForDev(block_dev);
+    if (!part) return (kIOReturnNotFound);
+
+    err = part->callPlatformFunction(PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID, false, 
+                                     (void *) &keyUUID, (void *) &keyStoreUUID, NULL, NULL);
+    if ((kIOReturnSuccess == err) && keyUUID && keyStoreUUID)
+    {
+//            IOLog("got volume key %s\n", keyStoreUUID->getCStringNoCopy());
+
+       if (!sKeyStore)
+           sKeyStore = (IOService *) IORegistryEntry::fromPath(AKS_SERVICE_PATH, gIOServicePlane);
+       if (sKeyStore)
+           err = uuid_parse(keyStoreUUID->getCStringNoCopy(), volumeKeyUUID);
+       else
+           err = kIOReturnNoResources;
+       if (kIOReturnSuccess == err)    
+           err = sKeyStore->callPlatformFunction(gAKSGetKey, true, volumeKeyUUID, &vek, NULL, NULL);
+       if (kIOReturnSuccess != err)    
+           IOLog("volume key err 0x%x\n", err);
+       else
+       {
+           if (vek.key.keybytecount < keySize) keySize = vek.key.keybytecount;
+           bcopy(&vek.key.keybytes[0], volumeCryptKey, keySize);
+       }
+       bzero(&vek, sizeof(vek));
+
+    }
+    part->release();
+    if (pKeyUUID) *pKeyUUID = keyUUID;
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 IOReturn
-IOPolledInterface::checkAllForWork(void)
+IOPolledFileOpen(const char * filename,
+                uint64_t setFileSize, uint64_t fsFreeSize,
+                void * write_file_addr, size_t write_file_len,
+                IOPolledFileIOVars ** fileVars,
+                OSData ** imagePath,
+                uint8_t * volumeCryptKey, size_t keySize)
 {
-  IOReturn     err = kIOReturnNotReady;
+    IOReturn             err = kIOReturnSuccess;
+    IOPolledFileIOVars * vars;
+    _OpenFileContext     ctx;
+    OSData *             extentsData;
+    OSNumber *           num;
+    IOService *          part = 0;
+    dev_t                block_dev;
+    dev_t                image_dev;
+    AbsoluteTime         startTime, endTime;
+    uint64_t             nsec;
+
+    vars = IONew(IOPolledFileIOVars, 1);
+    if (!vars) return (kIOReturnNoMemory);
+    bzero(vars, sizeof(*vars));
+    vars->allocated = true;
+
+    do
+    {
+       extentsData = OSData::withCapacity(32);
+       ctx.extents = extentsData;
+       ctx.size    = 0;
+       clock_get_uptime(&startTime);
+
+       vars->fileRef = kern_open_file_for_direct_io(filename, 
+                                                     (write_file_addr != NULL) || (0 != setFileSize),
+                                                    &file_extent_callback, &ctx, 
+                                                    setFileSize,
+                                                    fsFreeSize,
+                                                    // write file:
+                                                     0, write_file_addr, write_file_len,
+                                                     // results
+                                                    &block_dev,
+                                                    &image_dev,
+                                                     &vars->block0,
+                                                     &vars->maxiobytes,
+                                                     &vars->flags);
+#if 0
+       uint32_t msDelay = (131071 & random());
+       HIBLOG("sleep %d\n", msDelay);
+       IOSleep(msDelay);
+#endif
+        clock_get_uptime(&endTime);
+        SUB_ABSOLUTETIME(&endTime, &startTime);
+        absolutetime_to_nanoseconds(endTime, &nsec);
+
+       if (!vars->fileRef) err = kIOReturnNoSpace;
+
+        HIBLOG("kern_open_file_for_direct_io took %qd ms\n", nsec / 1000000ULL);
+       if (kIOReturnSuccess != err) break;
+
+       HIBLOG("Opened file %s, size %qd, extents %ld, maxio %qx ssd %d\n", filename, ctx.size, 
+                    (extentsData->getLength() / sizeof(IOPolledFileExtent)) - 1,
+                    vars->maxiobytes, kIOPolledFileSSD & vars->flags);
+       assert(!vars->block0);
+       if (extentsData->getLength() < sizeof(IOPolledFileExtent))
+       {
+           err = kIOReturnNoSpace;
+           break;
+       }
+
+       vars->fileSize = ctx.size;
+       vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy();
+
+        part = IOCopyMediaForDev(image_dev);
+        if (!part)
+        {
+            err = kIOReturnNotFound;
+            break;
+       }
+
+       if (!(vars->pollers = IOPolledFilePollers::copyPollers(part))) break;
+
+       if ((num = OSDynamicCast(OSNumber, part->getProperty(kIOMediaPreferredBlockSizeKey))))
+               vars->blockSize = num->unsigned32BitValue();
+       if (vars->blockSize < 4096) vars->blockSize = 4096;
+
+        HIBLOG("polled file major %d, minor %d, blocksize %ld, pollers %d\n",
+               major(image_dev), minor(image_dev), (long)vars->blockSize, 
+               vars->pollers->pollers->getCount());
+
+        OSString * keyUUID = NULL;
+        if (volumeCryptKey)
+        {
+            err = IOGetVolumeCryptKey(block_dev, &keyUUID, volumeCryptKey, keySize);
+        }
+
+       *fileVars    = vars;
+       vars->fileExtents = extentsData;
+    
+       // make imagePath
+       OSData * data;
+       if (imagePath)
+       {
+#if defined(__i386__) || defined(__x86_64__)
+           char str2[24 + sizeof(uuid_string_t) + 2];
+
+            if (keyUUID)
+                snprintf(str2, sizeof(str2), "%qx:%s", 
+                                vars->extentMap[0].start, keyUUID->getCStringNoCopy());
+            else
+                snprintf(str2, sizeof(str2), "%qx", vars->extentMap[0].start);
+
+           err = IOService::getPlatform()->callPlatformFunction(
+                                               gIOCreateEFIDevicePathSymbol, false,
+                                               (void *) part, (void *) str2,
+                                               (void *) (uintptr_t) true, (void *) &data);
+#else
+           data = 0;
+           err = kIOReturnSuccess;
+#endif
+           if (kIOReturnSuccess != err)
+           {
+               HIBLOG("error 0x%x getting path\n", err);
+               break;
+           }
+           *imagePath = data;
+       }
+    }
+    while (false);
+
+    if (kIOReturnSuccess != err)
+    {
+        HIBLOG("error 0x%x opening polled file\n", err);
+       IOPolledFileClose(&vars, 0, 0, 0, 0, 0);
+    }
+
+    if (part) part->release();
 
-  return err;
+    return (err);
 }
-#endif /* !HIBERNATION */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFileClose(IOPolledFileIOVars ** pVars,
+                 off_t write_offset, void * addr, size_t write_length,
+                 off_t discard_offset, off_t discard_end)
+{
+    IOPolledFileIOVars * vars;
+
+    vars = *pVars;
+    if (!vars) return(kIOReturnSuccess);
+
+    if (vars->fileRef)
+    {
+       kern_close_file_for_direct_io(vars->fileRef, write_offset, addr, write_length, 
+                                     discard_offset, discard_end);
+       vars->fileRef = NULL;
+    }
+    if (vars->fileExtents) 
+    {
+       vars->fileExtents->release();
+       vars->fileExtents = 0;
+    }
+    if (vars->pollers) 
+    {
+       vars->pollers->release();
+       vars->pollers = 0;
+    }
+
+    if (vars->allocated) IODelete(vars, IOPolledFileIOVars, 1);
+    else                 bzero(vars, sizeof(IOPolledFileIOVars));
+    *pVars = NULL;
+
+    return (kIOReturnSuccess);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFilePollersSetup(IOPolledFileIOVars * vars,
+                        uint32_t openState)
+{
+    IOReturn err;
+
+    err = kIOReturnSuccess;
+    do
+    {
+        if (!vars->pollers->openCount)
+        {
+           err = IOPolledFilePollersProbe(vars->pollers);
+           if (kIOReturnSuccess != err) break;
+           err = IOPolledFilePollersOpen(vars, openState, false);
+           if (kIOReturnSuccess != err) break;
+           vars->pollers->openState = openState;
+       }
+       vars->pollers->openCount++;
+       vars->pollers->io  = false;
+       vars->buffer       = (uint8_t *) vars->pollers->ioBuffer->getBytesNoCopy();
+       vars->bufferHalf   = 0;
+       vars->bufferOffset = 0;
+       vars->bufferSize   = (vars->pollers->ioBuffer->getLength() >> 1);
+
+        if (vars->maxiobytes < vars->bufferSize) vars->bufferSize = vars->maxiobytes;
+    }
+    while (false);
+
+    if (kIOReturnSuccess != err) HIBLOG("IOPolledFilePollersSetup(%d) error 0x%x\n", openState, err);
+
+    return (err);
+}
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position)
+{
+    IOPolledFileExtent * extentMap;
+
+    extentMap = vars->extentMap;
+
+    vars->position = position;
+
+    while (position >= extentMap->length)
+    {
+       position -= extentMap->length;
+       extentMap++;
+    }
+
+    vars->currentExtent   = extentMap;
+    vars->extentRemaining = extentMap->length - position;
+    vars->extentPosition  = vars->position - position;
+
+    if (vars->bufferSize <= vars->extentRemaining)
+       vars->bufferLimit = vars->bufferSize;
+    else
+       vars->bufferLimit = vars->extentRemaining;
+
+    return (kIOReturnSuccess);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFileWrite(IOPolledFileIOVars * vars,
+                    const uint8_t * bytes, IOByteCount size,
+                    IOPolledFileCryptVars * cryptvars)
+{
+    IOReturn    err = kIOReturnSuccess;
+    IOByteCount copy;
+    bool       flush = false;
+
+    do
+    {
+       if (!bytes && !size)
+       {
+           // seek to end of block & flush
+           size = vars->position & (vars->blockSize - 1);
+           if (size)
+               size = vars->blockSize - size;
+           flush = true;
+            // use some garbage for the fill
+            bytes = vars->buffer + vars->bufferOffset;
+       }
+
+       copy = vars->bufferLimit - vars->bufferOffset;
+       if (copy > size)
+           copy = size;
+       else
+           flush = true;
+
+       if (bytes)
+       {
+           bcopy(bytes, vars->buffer + vars->bufferHalf + vars->bufferOffset, copy);
+           bytes += copy;
+       }
+        else
+           bzero(vars->buffer + vars->bufferHalf + vars->bufferOffset, copy);
+        
+       size -= copy;
+       vars->bufferOffset += copy;
+       vars->position += copy;
+
+       if (flush && vars->bufferOffset)
+       {
+           uint64_t offset = (vars->position - vars->bufferOffset 
+                               - vars->extentPosition + vars->currentExtent->start);
+           uint32_t length = (vars->bufferOffset);
+
+#if CRYPTO
+            if (cryptvars && vars->encryptStart
+                && (vars->position > vars->encryptStart)
+                && ((vars->position - length) < vars->encryptEnd))
+            {
+                AbsoluteTime startTime, endTime;
+
+                uint64_t encryptLen, encryptStart;
+                encryptLen = vars->position - vars->encryptStart;
+                if (encryptLen > length)
+                    encryptLen = length;
+                encryptStart = length - encryptLen;
+                if (vars->position > vars->encryptEnd)
+                    encryptLen -= (vars->position - vars->encryptEnd);
+
+                clock_get_uptime(&startTime);
+
+                // encrypt the buffer
+                aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart,
+                                &cryptvars->aes_iv[0],
+                                encryptLen / AES_BLOCK_SIZE,
+                                vars->buffer + vars->bufferHalf + encryptStart,
+                                &cryptvars->ctx.encrypt);
+    
+                clock_get_uptime(&endTime);
+                ADD_ABSOLUTETIME(&vars->cryptTime, &endTime);
+                SUB_ABSOLUTETIME(&vars->cryptTime, &startTime);
+                vars->cryptBytes += encryptLen;
+
+                // save initial vector for following encrypts
+                bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE,
+                        &cryptvars->aes_iv[0],
+                        AES_BLOCK_SIZE);
+            }
+#endif /* CRYPTO */
+
+           err = IOPolledFilePollersIODone(vars->pollers, true);
+           if (kIOReturnSuccess != err)
+               break;
+
+if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
+//if (length != vars->bufferSize) HIBLOG("short write of %qx ends@ %qx\n", length, offset + length);
+
+           err = IOStartPolledIO(vars->pollers, kIOPolledWrite, vars->bufferHalf, offset, length);
+            if (kIOReturnSuccess != err)
+                break;
+           vars->pollers->io = true;
+
+           vars->extentRemaining -= vars->bufferOffset;
+           if (!vars->extentRemaining)
+           {
+               vars->currentExtent++;
+               vars->extentRemaining = vars->currentExtent->length;
+               vars->extentPosition  = vars->position;
+           }
+
+           vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
+           vars->bufferOffset = 0;
+           if (vars->bufferSize <= vars->extentRemaining)
+               vars->bufferLimit = vars->bufferSize;
+           else
+               vars->bufferLimit = vars->extentRemaining;
+
+           if (!vars->extentRemaining)
+           {
+               err = kIOReturnOverrun;
+               break;
+           }
+
+           flush = false;
+       }
+    }
+    while (size);
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOPolledFileRead(IOPolledFileIOVars * vars,
+                    uint8_t * bytes, IOByteCount size,
+                    IOPolledFileCryptVars * cryptvars)
+{
+    IOReturn    err = kIOReturnSuccess;
+    IOByteCount copy;
+
+//    bytesWritten += size;
+
+    do
+    {
+       copy = vars->bufferLimit - vars->bufferOffset;
+       if (copy > size)
+           copy = size;
+
+       if (bytes)
+       {
+           bcopy(vars->buffer + vars->bufferHalf + vars->bufferOffset, bytes, copy);
+           bytes += copy;
+       }
+       size -= copy;
+       vars->bufferOffset += copy;
+//     vars->position += copy;
+
+       if ((vars->bufferOffset == vars->bufferLimit) && (vars->position < vars->readEnd))
+       {
+           if (!vars->pollers->io) cryptvars = 0;
+           err = IOPolledFilePollersIODone(vars->pollers, true);
+           if (kIOReturnSuccess != err)
+               break;
+
+if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
+
+           vars->position        += vars->lastRead;
+           vars->extentRemaining -= vars->lastRead;
+           vars->bufferLimit      = vars->lastRead;
+
+           if (!vars->extentRemaining)
+           {
+               vars->currentExtent++;
+               vars->extentRemaining = vars->currentExtent->length;
+               vars->extentPosition  = vars->position;
+                if (!vars->extentRemaining)
+                {
+                    err = kIOReturnOverrun;
+                    break;
+                }
+           }
+
+           uint64_t length;
+           uint64_t lastReadLength = vars->lastRead;
+           uint64_t offset = (vars->position 
+                               - vars->extentPosition + vars->currentExtent->start);
+           if (vars->extentRemaining <= vars->bufferSize)
+               length = vars->extentRemaining;
+           else
+               length = vars->bufferSize;
+           if ((length + vars->position) > vars->readEnd)
+               length = vars->readEnd - vars->position;
+
+           vars->lastRead = length;
+           if (length)
+           {
+//if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length);
+               err = IOStartPolledIO(vars->pollers, kIOPolledRead, vars->bufferHalf, offset, length);
+               if (kIOReturnSuccess != err)
+                   break;
+               vars->pollers->io = true;
+           }
+
+           vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
+           vars->bufferOffset = 0;
+
+#if CRYPTO
+            if (cryptvars)
+            {
+                uint8_t thisVector[AES_BLOCK_SIZE];
+                AbsoluteTime startTime, endTime;
+
+                // save initial vector for following decrypts
+                bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE);
+                bcopy(vars->buffer + vars->bufferHalf + lastReadLength - AES_BLOCK_SIZE, 
+                        &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
+
+                // decrypt the buffer
+                clock_get_uptime(&startTime);
+
+                aes_decrypt_cbc(vars->buffer + vars->bufferHalf,
+                                &thisVector[0],
+                                lastReadLength / AES_BLOCK_SIZE,
+                                vars->buffer + vars->bufferHalf,
+                                &cryptvars->ctx.decrypt);
+
+                clock_get_uptime(&endTime);
+                ADD_ABSOLUTETIME(&vars->cryptTime, &endTime);
+                SUB_ABSOLUTETIME(&vars->cryptTime, &startTime);
+                vars->cryptBytes += lastReadLength;
+            }
+#endif /* CRYPTO */
+       }
+    }
+    while (size);
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
index c4bbb35135a11069ad792088fc1d5d47d62f61b9..1d9cf8f9d6314ca11eff3bd0649da55eb0a563d0 100644 (file)
@@ -95,7 +95,7 @@ class IORegistryPlane : public OSObject {
     int                        reserved[2];
 
 public:
-    virtual bool serialize(OSSerialize *s) const;
+    virtual bool serialize(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
 };
 
 OSDefineMetaClassAndStructors(IORegistryPlane, OSObject)
index 33e04ed5d62fa078d35a69f2ad4f3acb07c31f8c..20ae27af841b02911d53eefddce9fed6a8a73e35 100644 (file)
 //#define IORDEBUG_LEGEND 1
 
 #ifdef IORDEBUG_LEGEND
-#define IORLEGENDLOG(fmt, args...)      \
-do {                                    \
-IOLog("IOReportLegend | ");           \
-IOLog(fmt, ##args);                     \
-IOLog("\n");                            \
-} while(0)
+    #define IORLEGENDLOG(fmt, args...)      \
+    do {                                    \
+        IOLog("IOReportLegend | ");         \
+        IOLog(fmt, ##args);                 \
+        IOLog("\n");                        \
+    } while(0)
 #else
-#define IORLEGENDLOG(fmt, args...)
+    #define IORLEGENDLOG(fmt, args...)
 #endif
 
 
@@ -101,25 +101,33 @@ IOReportLegend::addReporterLegend(IOService *reportingService,
                                   const char *subGroupName)
 {
     IOReturn res = kIOReturnError;
-    IOReportLegend *legend;
+    IOReportLegend *legend = NULL;
+    OSObject *curLegend = NULL;
     
     // No need to check groupName and subGroupName because optional params
     if (!reportingService || !reporter) {
         goto finish;
     }
     
-    legend = IOReportLegend::with(OSDynamicCast(OSArray, reportingService->getProperty(kIOReportLegendKey)));
-    
-    if (legend)
-    {
-        legend->addReporterLegend(reporter, groupName, subGroupName);
-        reportingService->setProperty(kIOReportLegendKey, legend->getLegend());
-        reportingService->setProperty(kIOReportLegendPublicKey, true);
-        legend->free();
-        res = kIOReturnSuccess;
-    }
+    // It's fine if the legend doesn't exist (IOReportLegend::with(NULL)
+    // is how you make an empty legend).  If it's not an array, then
+    // we're just going to replace it.
+    curLegend = reportingService->copyProperty(kIOReportLegendKey);
+    legend = IOReportLegend::with(OSDynamicCast(OSArray, curLegend));
+    if (!legend)        goto finish;
+
+    // Add the reporter's entries and update the service property.
+    // The overwrite triggers a release of the old legend array.
+    legend->addReporterLegend(reporter, groupName, subGroupName);
+    reportingService->setProperty(kIOReportLegendKey, legend->getLegend());
+    reportingService->setProperty(kIOReportLegendPublicKey, true);
+
+    res = kIOReturnSuccess;
     
 finish:
+    if (legend)         legend->release();
+    if (curLegend)      curLegend->release();
+
     return res;
 }
 
index 1a79a2de6a11732d74024641656a71d2aa0e8a7d..ec94a3d24e83efd9b0635cafba5641da03246596 100644 (file)
@@ -59,11 +59,11 @@ do {                            \
 
 #define PREFL_MEMOP_FAIL(__val, __type) do {  \
     if (__val <= 0) {  \
-        IORERROR("%s - %s <= 0!", __func__, #__val);  \
+        IORERROR("%s - %s <= 0!\n", __func__, #__val);  \
         res = kIOReturnUnderrun;  \
         goto finish;  \
     }  else if (__val > INT_MAX / (int)sizeof(__type)) {  \
-        IORERROR("%s - %s > INT_MAX / sizeof(%s)!", __func__, #__val, #__type);\
+        IORERROR("%s - %s > INT_MAX / sizeof(%s)!\n",__func__,#__val,#__type);\
         res = kIOReturnOverrun;  \
         goto finish;  \
     }  \
index 96bb4fc6c13cdc7a90a7ec22194b61dd6d48e047..bed5b5e4e5917bc23d33816cf30dcf0bf346c554 100644 (file)
 #include <IOKit/IOHibernatePrivate.h>
 #include <IOKit/IOInterruptAccountingPrivate.h>
 #include <IOKit/IOKernelReporters.h>
+#include <IOKit/AppleKeyStoreInterface.h>
+#include <IOKit/IOCPU.h>
 #include <mach/sync_policy.h>
 #include <IOKit/assert.h>
 #include <sys/errno.h>
+#include <sys/kdebug.h>
+#include <string.h>
 
 #include <machine/pal_routines.h>
 
@@ -147,6 +151,11 @@ const OSSymbol *           gIOAppPowerStateInterest;
 const OSSymbol *               gIOPriorityPowerStateInterest;
 const OSSymbol *               gIOConsoleSecurityInterest;
 
+const  OSSymbol *               gAKSGetKey;
+#if defined(__i386__) || defined(__x86_64__)
+const OSSymbol *                gIOCreateEFIDevicePathSymbol;
+#endif
+
 static OSDictionary *          gNotifications;
 static IORecursiveLock *       gNotificationLock;
 
@@ -160,6 +169,7 @@ static int                  gOutstandingJobs;
 static int                     gNumConfigThreads;
 static int                     gNumWaitingThreads;
 static IOLock *                        gIOServiceBusyLock;
+static bool             gCPUsRunning;
 
 static thread_t                        gIOTerminateThread;
 static UInt32                  gIOTerminateWork;
@@ -173,12 +183,6 @@ static OSData *                    gIOConsoleUsersSeedValue;
 
 extern const OSSymbol *                gIODTPHandleKey;
 
-const OSSymbol *               gIOPlatformSleepActionKey;
-const OSSymbol *               gIOPlatformWakeActionKey;
-const OSSymbol *               gIOPlatformQuiesceActionKey;
-const OSSymbol *               gIOPlatformActiveActionKey;
-const OSSymbol *               gIOPlatformHaltRestartActionKey;
-
 const OSSymbol *               gIOPlatformFunctionHandlerSet;
 
 static IOLock *                        gIOConsoleUsersLock;
@@ -266,6 +270,7 @@ static IORecursiveLock *sCpuDelayLock = IORecursiveLockAlloc();
 static OSArray         *sCpuLatencyHandlers[kCpuNumDelayTypes];
 const OSSymbol         *sCPULatencyFunctionName[kCpuNumDelayTypes];
 static OSNumber * sCPULatencyHolder[kCpuNumDelayTypes];
+static char sCPULatencyHolderName[kCpuNumDelayTypes][128];
 static OSNumber * sCPULatencySet[kCpuNumDelayTypes];
 
 static void
@@ -346,12 +351,6 @@ void IOService::initialize( void )
     gIOConsoleSessionScreenLockedTimeKey = OSSymbol::withCStringNoCopy(kIOConsoleSessionScreenLockedTimeKey);
 
     gIOConsoleUsersSeedValue          = OSData::withBytesNoCopy(&gIOConsoleUsersSeed, sizeof(gIOConsoleUsersSeed));
-       
-    gIOPlatformSleepActionKey      = OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey);
-    gIOPlatformWakeActionKey       = OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey);
-    gIOPlatformQuiesceActionKey            = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey);
-    gIOPlatformActiveActionKey     = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey);
-    gIOPlatformHaltRestartActionKey = OSSymbol::withCStringNoCopy(kIOPlatformHaltRestartActionKey);
 
     gIOPlatformFunctionHandlerSet              = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerSet);
 #if defined(__i386__) || defined(__x86_64__)
@@ -364,9 +363,12 @@ void IOService::initialize( void )
        sCPULatencyHolder[idx] = OSNumber::withNumber(0ULL, 64);
         assert(sCPULatencySet[idx] && sCPULatencyHolder[idx]);
     }
+    gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath");
 #endif
     gNotificationLock          = IORecursiveLockAlloc();
 
+    gAKSGetKey                   = OSSymbol::withCStringNoCopy(AKS_PLATFORM_FUNCTION_GETKEY);
+
     assert( gIOServicePlane && gIODeviceMemoryKey
         && gIOInterruptControllersKey && gIOInterruptSpecifiersKey
         && gIOResourcesKey && gNotifications && gNotificationLock
@@ -413,6 +415,16 @@ void IOService::initialize( void )
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+#if defined(__i386__) || defined(__x86_64__)
+extern "C" {
+
+const char *getCpuDelayBusStallHolderName(void) {
+    return sCPULatencyHolderName[kCpuDelayBusStall];
+}
+
+}
+#endif
+
 #if IOMATCHDEBUG
 static UInt64 getDebugFlags( OSDictionary * props )
 {
@@ -453,20 +465,14 @@ void IOService::stop( IOService * provider )
 
 bool IOService::init( OSDictionary * dictionary )
 {
-    bool ret = false;
+    bool ret;
 
     ret = super::init(dictionary);
-
-    if (!ret)
-        goto done;
+    if (!ret)     return (false);
+    if (reserved) return (true);
 
     reserved = IONew(ExpansionData, 1);
-
-    if (!reserved) {
-        ret = false;
-        goto done;
-    }
-
+    if (!reserved) return (false);
     bzero(reserved, sizeof(*reserved));
 
     /*
@@ -480,33 +486,22 @@ bool IOService::init( OSDictionary * dictionary )
      * which should be unlikely).
      */
     reserved->interruptStatisticsLock = IOLockAlloc(); 
+    if (!reserved->interruptStatisticsLock) return (false);
 
-    if (!reserved->interruptStatisticsLock) {
-        ret = false;
-        goto done;
-    }
-
-done:
-    return ret;
+    return (true);
 }
 
 bool IOService::init( IORegistryEntry * from,
                       const IORegistryPlane * inPlane )
 {
-    bool ret = false;
+    bool ret;
 
     ret = super::init(from, inPlane);
-
-    if (!ret)
-        goto done;
+    if (!ret)     return (false);
+    if (reserved) return (true);
 
     reserved = IONew(ExpansionData, 1);
-
-    if (!reserved) {
-        ret = false;
-        goto done;
-    }
-
+    if (!reserved) return (false);
     bzero(reserved, sizeof(*reserved));
 
     /*
@@ -520,14 +515,9 @@ bool IOService::init( IORegistryEntry * from,
      * which should be unlikely).
      */
     reserved->interruptStatisticsLock = IOLockAlloc(); 
+    if (!reserved->interruptStatisticsLock) return (false);
 
-    if (!reserved->interruptStatisticsLock) {
-        ret = false;
-        goto done;
-    }
-
-done:
-    return ret;
+    return (true);
 }
 
 void IOService::free( void )
@@ -554,6 +544,12 @@ void IOService::free( void )
         IODelete(reserved, ExpansionData, 1);
     }
 
+    if (_numInterruptSources && _interruptSources)
+    {
+       IOFree(_interruptSources, _numInterruptSources * sizeof(IOInterruptSource));
+       _interruptSources = 0;
+    }
+
     super::free();
 }
 
@@ -603,6 +599,15 @@ void IOService::detach( IOService * provider )
 
     lockForArbitration();
 
+    uint64_t regID1 = provider->getRegistryEntryID();
+    uint64_t regID2 = getRegistryEntryID();
+    IOServiceTrace(
+       IOSERVICE_DETACH,
+       (uintptr_t) regID1,
+       (uintptr_t) (regID1 >> 32),
+       (uintptr_t) regID2,
+       (uintptr_t) (regID2 >> 32));
+
     adjParent = ((busy = (__state[1] & kIOServiceBusyStateMask))
                && (provider == getProvider()));
 
@@ -616,6 +621,7 @@ void IOService::detach( IOService * provider )
 
     if (kIOServiceInactiveState & __state[0]) {
        getMetaClass()->removeInstance(this);
+       IORemoveServicePlatformActions(this);
     }
 
     unlockForArbitration();
@@ -669,6 +675,8 @@ void IOService::registerService( IOOptionBits options )
     if( gIOPlatform && (!gIOPlatform->platformAdjustService(this)))
        return;
 
+    IOInstallServicePlatformActions(this);
+
     if( (this != gIOResources)
      && (kIOLogRegister & gIOKitDebug)) {
 
@@ -1861,7 +1869,7 @@ bool IOService::requestTerminate( IOService * provider, IOOptionBits options )
     // -- compat
     if( ok) {
         provider->terminateClient( this, options | kIOServiceRecursing );
-        ok = (0 != (__state[1] & kIOServiceRecursing));
+        ok = (0 != (kIOServiceInactiveState & __state[0]));
     }
     // --
 
@@ -1874,8 +1882,9 @@ bool IOService::terminatePhase1( IOOptionBits options )
     IOService *         client;
     OSIterator * iter;
     OSArray *   makeInactive;
-       int          waitResult = THREAD_AWAKENED;
-       bool         wait;
+    OSArray *   waitingInactive;
+    int          waitResult = THREAD_AWAKENED;
+    bool         wait;
     bool                ok;
     bool                didInactive;
     bool                startPhase2 = false;
@@ -1893,70 +1902,86 @@ bool IOService::terminatePhase1( IOOptionBits options )
     // -- compat
     if( options & kIOServiceRecursing) {
         lockForArbitration();
-       __state[0] |= kIOServiceInactiveState;
-        __state[1] |= kIOServiceRecursing;
+       if (0 == (kIOServiceInactiveState & __state[0]))
+       {
+           __state[0] |= kIOServiceInactiveState;
+           __state[1] |= kIOServiceRecursing | kIOServiceTermPhase1State;
+       }
         unlockForArbitration();
 
         return( true );
     }
     // -- 
 
-    makeInactive = OSArray::withCapacity( 16 );
-    if( !makeInactive)
-        return( false );
+    makeInactive    = OSArray::withCapacity( 16 );
+    waitingInactive = OSArray::withCapacity( 16 );
+    if(!makeInactive || !waitingInactive) return( false );
 
     victim = this;
     victim->retain();
 
-    while( victim ) {
-
+    while( victim )
+    {
        didInactive = victim->lockForArbitration( true );
-        if( didInactive) {
-            didInactive = (0 == (victim->__state[0] & kIOServiceInactiveState))
-                        || (victim->__state[1] & kIOServiceRecursing);
-            if( didInactive) {
-                victim->__state[0] |= kIOServiceInactiveState;
-                victim->__state[0] &= ~(kIOServiceRegisteredState | kIOServiceMatchedState
-                                        | kIOServiceFirstPublishState | kIOServiceFirstMatchState);
-                victim->__state[1] &= ~kIOServiceRecursing;
+        if( didInactive)
+        {
+           uint64_t regID1 = victim->getRegistryEntryID();
+           IOServiceTrace(IOSERVICE_TERM_SET_INACTIVE,
+               (uintptr_t) regID1, 
+               (uintptr_t) (regID1 >> 32),
+               (uintptr_t) victim->__state[1], 
+               (uintptr_t) 0);
 
+           enum { kRP1 = kIOServiceRecursing | kIOServiceTermPhase1State };
+            didInactive = (kRP1 == (victim->__state[1] & kRP1))
+                        || (0 == (victim->__state[0] & kIOServiceInactiveState));
+
+           if (!didInactive)
+           {
+               // a multiply attached IOService can be visited twice
+               if (-1U == waitingInactive->getNextIndexOfObject(victim, 0)) do
+               {
+                   IOLockLock(gIOServiceBusyLock);
+                   wait = (victim->__state[1] & kIOServiceTermPhase1State);
+                   if( wait) {
+                       TLOG("%s[0x%qx]::waitPhase1(%s[0x%qx])\n", 
+                           getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID());
+                       victim->__state[1] |= kIOServiceTerm1WaiterState;
+                       victim->unlockForArbitration();
+                       assert_wait((event_t)&victim->__state[1], THREAD_UNINT);
+                   }
+                   IOLockUnlock(gIOServiceBusyLock);
+                   if( wait) {
+                       waitResult = thread_block(THREAD_CONTINUE_NULL);
+                       TLOG("%s[0x%qx]::did waitPhase1(%s[0x%qx])\n", 
+                           getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID());
+                       victim->lockForArbitration();
+                   }
+               }
+               while (wait && (waitResult != THREAD_TIMED_OUT));
+           }
+           else
+           {
+               victim->__state[0] |= kIOServiceInactiveState;
+               victim->__state[0] &= ~(kIOServiceRegisteredState | kIOServiceMatchedState
+                                       | kIOServiceFirstPublishState | kIOServiceFirstMatchState);
+               victim->__state[1] &= ~kIOServiceRecursing;
+               victim->__state[1] |= kIOServiceTermPhase1State;
+               waitingInactive->headQ(victim);
                if (victim == this)
                {
-                   victim->__state[1] |= kIOServiceTermPhase1State;
                    if (kIOServiceTerminateNeedWillTerminate & options)
                    {
                        victim->__state[1] |= kIOServiceNeedWillTerminate;
                    }
                }
-
-                victim->_adjustBusy( 1 );
-
-            } else if (victim != this) do {
-
-               IOLockLock(gIOServiceBusyLock);
-               wait = (victim->__state[1] & kIOServiceTermPhase1State);
-               if( wait) {
-                   TLOG("%s[0x%qx]::waitPhase1(%s[0x%qx])\n", 
-                       getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID());
-                       victim->__state[1] |= kIOServiceTerm1WaiterState;
-                       victim->unlockForArbitration();
-                       assert_wait((event_t)&victim->__state[1], THREAD_UNINT);
-               }
-               IOLockUnlock(gIOServiceBusyLock);
-               if( wait) {
-                   waitResult = thread_block(THREAD_CONTINUE_NULL);
-                   TLOG("%s[0x%qx]::did waitPhase1(%s[0x%qx])\n", 
-                       getName(), getRegistryEntryID(), victim->getName(), victim->getRegistryEntryID());
-                       victim->lockForArbitration();
-               }
-           } while( wait && (waitResult != THREAD_TIMED_OUT));
-
+               victim->_adjustBusy( 1 );
+           }
            victim->unlockForArbitration();
         }
-        if( victim == this)
-            startPhase2 = didInactive;
-        if( didInactive) {
-
+        if( victim == this) startPhase2 = didInactive;
+        if (didInactive)
+        {
             victim->deliverNotification( gIOTerminatedNotification, 0, 0xffffffff );
             IOUserClient::destroyUserReferences( victim );
 
@@ -1994,23 +2019,35 @@ bool IOService::terminatePhase1( IOOptionBits options )
             makeInactive->removeObject(0);
         }
     }
-
     makeInactive->release();
 
-    if( startPhase2)
+    while ((victim = (IOService *) waitingInactive->getObject(0)))
     {
-       lockForArbitration();
-       __state[1] &= ~kIOServiceTermPhase1State;
-       if (kIOServiceTerm1WaiterState & __state[1])
+       victim->retain();
+       waitingInactive->removeObject(0);
+
+       victim->lockForArbitration();
+       victim->__state[1] &= ~kIOServiceTermPhase1State;
+       if (kIOServiceTerm1WaiterState & victim->__state[1])
        {
-           __state[1] &= ~kIOServiceTerm1WaiterState;
-           TLOG("%s[0x%qx]::wakePhase1\n", getName(), getRegistryEntryID());
+           victim->__state[1] &= ~kIOServiceTerm1WaiterState;
+           TLOG("%s[0x%qx]::wakePhase1\n", victim->getName(), victim->getRegistryEntryID());
            IOLockLock( gIOServiceBusyLock );
-           thread_wakeup( (event_t) &__state[1]);
+           thread_wakeup( (event_t) &victim->__state[1]);
            IOLockUnlock( gIOServiceBusyLock );
        }
+       victim->unlockForArbitration();
+        victim->release();
+    }
+    waitingInactive->release();
+
+    if( startPhase2)
+    {
+        retain();
+       lockForArbitration();
+       scheduleTerminatePhase2(options);
        unlockForArbitration();
-       scheduleTerminatePhase2( options );
+        release();
     }
 
     return( true );
@@ -2035,16 +2072,25 @@ void IOService::setTerminateDefer(IOService * provider, bool defer)
 void IOService::scheduleTerminatePhase2( IOOptionBits options )
 {
     AbsoluteTime       deadline;
+    uint64_t           regID1;
     int                        waitResult = THREAD_AWAKENED;
     bool               wait, haveDeadline = false;
 
-    if (!(__state[0] & kIOServiceInactiveState)
-      || (__state[1] & kIOServiceTermPhase1State))             return;
+    if (!(__state[0] & kIOServiceInactiveState)) return;
 
-    options |= kIOServiceRequired;
+    regID1 = getRegistryEntryID();
+    IOServiceTrace(
+       IOSERVICE_TERM_SCHED_PHASE2,
+       (uintptr_t) regID1,
+       (uintptr_t) (regID1 >> 32),
+       (uintptr_t) __state[1],
+       (uintptr_t) options);
 
-    retain();
+    if (__state[1] & kIOServiceTermPhase1State)                return;
 
+    retain();
+    unlockForArbitration();
+    options |= kIOServiceRequired;
     IOLockLock( gJobsLock );
 
     if( (options & kIOServiceSynchronous)
@@ -2096,7 +2142,7 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options )
     }
 
     IOLockUnlock( gJobsLock );
-
+    lockForArbitration();
     release();
 }
 
@@ -2229,7 +2275,7 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options,
 {
     OSIterator * iter;
     IOService *         client;
-    bool defer = false;
+    bool         defer;
     uint64_t     regID1, regID2 = victim->getRegistryEntryID();
 
     victim->messageClients( kIOMessageServiceIsTerminated, (void *)(uintptr_t) options );
@@ -2242,6 +2288,7 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options,
             TLOG("%s[0x%qx]::didTerminate(%s[0x%qx], %08llx)\n",
                     client->getName(), regID1, 
                     victim->getName(), regID2, (long long)options);
+            defer = false;
             client->didTerminate( victim, options, &defer );
 
            IOServiceTrace(
@@ -2397,22 +2444,48 @@ void IOService::terminateWorker( IOOptionBits options )
             gIOTerminatePhase2List->removeObject(0);
             IOLockUnlock( gJobsLock );
 
+           uint64_t regID1 = victim->getRegistryEntryID();
+           IOServiceTrace(
+               IOSERVICE_TERM_START_PHASE2,
+               (uintptr_t) regID1,
+               (uintptr_t) (regID1 >> 32),
+               (uintptr_t) 0,
+               (uintptr_t) 0);
+
             while( victim ) {
         
                 doPhase2 = victim->lockForArbitration( true );
                 if( doPhase2) {
                     doPhase2 = (0 != (kIOServiceInactiveState & victim->__state[0]));
                     if( doPhase2) {
+
+                       uint64_t regID1 = victim->getRegistryEntryID();
+                       IOServiceTrace(
+                           IOSERVICE_TERM_TRY_PHASE2,
+                           (uintptr_t) regID1,
+                           (uintptr_t) (regID1 >> 32),
+                           (uintptr_t) victim->__state[1],
+                           (uintptr_t) 0);
+
                         doPhase2 = (0 == (victim->__state[1] & kIOServiceTermPhase2State))
                                 && (0 == (victim->__state[1] & kIOServiceConfigState));
 
                        if (doPhase2 && (iter = victim->getClientIterator())) {
                            while (doPhase2 && (client = (IOService *) iter->getNextObject())) {
                                doPhase2 = (0 == (client->__state[1] & kIOServiceStartState));
-
-                               if (!doPhase2) TLOG("%s[0x%qx]::defer phase2(%s[0x%qx])\n", 
-                                              victim->getName(), victim->getRegistryEntryID(), 
-                                              client->getName(), client->getRegistryEntryID());
+                               if (!doPhase2)
+                               {
+                                   uint64_t regID1 = client->getRegistryEntryID();
+                                   IOServiceTrace(
+                                       IOSERVICE_TERM_UC_DEFER,
+                                       (uintptr_t) regID1,
+                                       (uintptr_t) (regID1 >> 32),
+                                       (uintptr_t) client->__state[1],
+                                       (uintptr_t) 0);
+                                   TLOG("%s[0x%qx]::defer phase2(%s[0x%qx])\n",
+                                          victim->getName(), victim->getRegistryEntryID(),
+                                          client->getName(), client->getRegistryEntryID());
+                               }
                            }
                            iter->release();
                        }
@@ -3510,9 +3583,9 @@ UInt32 IOService::_adjustBusy( SInt32 delta )
             next->unlockForArbitration();
 
         if( (wasQuiet || nowQuiet) ) {
-           uint64_t regID = next->getRegistryEntryID();
 
-               IOServiceTrace(
+           uint64_t regID = next->getRegistryEntryID();
+           IOServiceTrace(
                ((wasQuiet/*nowBusy*/) ? IOSERVICE_BUSY : IOSERVICE_NONBUSY),
                (uintptr_t) regID, 
                (uintptr_t) (regID >> 32),
@@ -3635,13 +3708,54 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value,
 
 IOReturn IOService::waitQuiet( uint64_t timeout )
 {
-       IOReturn ret;
+    IOReturn ret;
     ret = waitForState( kIOServiceBusyStateMask, 0, timeout );
-       if ((kIOReturnTimeout == ret) && (timeout >= 30000000000) && (kIOWaitQuietPanics & gIOKitDebug))
+    if ((kIOReturnTimeout == ret) && (timeout >= 41000000000) && (kIOWaitQuietPanics & gIOKitDebug))
+    {
+       IORegistryIterator * iter;
+       OSOrderedSet       * set;
+       OSOrderedSet       * leaves;
+       IOService          * next;
+       IOService          * nextParent;
+       char               * string;
+       char               * s;
+       size_t               len, l;
+
+       len = 256;
+       string = IONew(char, len);
+       set = NULL;
+        iter = IORegistryIterator::iterateOver(this, gIOServicePlane, kIORegistryIterateRecursively);
+        leaves = OSOrderedSet::withCapacity(4);
+       if (iter) set = iter->iterateAll();
+       if (string && leaves && set)
        {
-               panic("IOService 0x%llx (%s) busy timeout", getRegistryEntryID(), getName());
+           while ((next = (IOService *) set->getLastObject()))
+           {
+               if (next->getBusyState())
+               {
+                   leaves->setObject(next);
+                   nextParent = next;
+                   while ((nextParent = nextParent->getProvider()))
+                   {
+                       set->removeObject(nextParent);
+                       leaves->removeObject(nextParent);
+                   }
+               }
+               set->removeObject(next);            
+           }
+           s = string;
+           while ((next = (IOService *) leaves->getLastObject()))
+           {
+               l = snprintf(s, len, "%s'%s'", ((s == string) ? "" : ", "), next->getName());
+               if (l >= len) break;
+               s += l;
+               len -= l;
+               leaves->removeObject(next);         
+           }
        }
-       return (ret);
+        panic("busy timeout(%llds): %s", timeout / 1000000000ULL, string ? string : "");
+    }
+    return (ret);
 }
 
 IOReturn IOService::waitQuiet( mach_timespec_t * timeout )
@@ -3781,6 +3895,11 @@ IOReturn IOService::waitMatchIdle( UInt32 msToWait )
         return( kIOReturnSuccess );
 }
 
+void IOService::cpusRunning(void)
+{
+    gCPUsRunning = true;
+}
+
 void _IOServiceJob::pingConfig( _IOServiceJob * job )
 {
     int                count;
@@ -3797,7 +3916,9 @@ void _IOServiceJob::pingConfig( _IOServiceJob * job )
 //    if( gNumConfigThreads) count++;// assume we're called from a config thread
 
     create = (  (gOutstandingJobs > count)
-               && (gNumConfigThreads < kMaxConfigThreads) );
+               && ((gNumConfigThreads < kMaxConfigThreads) 
+            || (job->nub == gIOResources) 
+            || !gCPUsRunning));
     if( create) {
        gNumConfigThreads++;
        gNumWaitingThreads++;
@@ -4644,6 +4765,13 @@ bool IOResources::init( OSDictionary * dictionary )
     return true;
 }
 
+IOReturn IOResources::newUserClient(task_t owningTask, void * securityID,
+                                    UInt32 type,  OSDictionary * properties,
+                                    IOUserClient ** handler)
+{
+    return( kIOReturnUnsupported );
+}
+
 IOWorkLoop * IOResources::getWorkLoop() const
 {
     // If we are the resource root
@@ -5568,6 +5696,10 @@ requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType)
 
     if (setCpuDelay)
     {
+        if (holder && debug_boot_arg) {
+            strlcpy(sCPULatencyHolderName[delayType], holder->getName(), sizeof(sCPULatencyHolderName[delayType]));
+        }
+
         // Must be safe to call from locked context
         if (delayType == kCpuDelayBusStall)
         {
@@ -5785,6 +5917,7 @@ IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistic
   IOReportLegend * legend = NULL;
   IOInterruptAccountingData * oldValue = NULL;
   IOInterruptAccountingReporter * newArray = NULL;
+  char subgroupName[64];
   int newArraySize = 0;
   int i = 0;
 
@@ -5862,7 +5995,7 @@ IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistic
      * TODO: Some statistics do in fact have common units (time); should this be
      * split into separate reporters to communicate this?
      */
-     reserved->interruptStatisticsArray[source].reporter = IOSimpleReporter::with(this, kIOReportCategoryInterrupt, kIOReportUnitNone);
+     reserved->interruptStatisticsArray[source].reporter = IOSimpleReporter::with(this, kIOReportCategoryPower, kIOReportUnitNone);
 
     /*
      * Each statistic is given an identifier based on the interrupt index (which
@@ -5885,19 +6018,13 @@ IOReturn IOService::addInterruptStatistics(IOInterruptAccountingData * statistic
      */
     legend = IOReportLegend::with(OSDynamicCast(OSArray, getProperty(kIOReportLegendKey)));
 
-    if ((source >= IA_MAX_SUBGROUP_NAME) || (source < 0)) {
-      /*
-       * Either we're using a nonsensical index (should never happen), or the
-       * index is larger than anticipated (may happen, almost certainly won't).
-       * This may move to live generation of the names in the future, but for
-       * now, point both cases to a generic subgroup name (this will confuse
-       * clients, unfortunately).
-       */
-      legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, kInterruptAccountingGenericSubgroupName);
-    } else {
-      legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, kInterruptAccountingSubgroupNames[source]);
-    }
-
+    /*
+     * Note that while we compose the subgroup name, we do not need to
+     * manage its lifecycle (the reporter will handle this).
+     */
+    snprintf(subgroupName, sizeof(subgroupName), "%s %d", getName(), source);
+    subgroupName[sizeof(subgroupName) - 1] = 0;
+    legend->addReporterLegend(reserved->interruptStatisticsArray[source].reporter, kInterruptAccountingGroupName, subgroupName);
     setProperty(kIOReportLegendKey, legend->getLegend());
     legend->release();
 
index 7c363a269a916b6c68163f1036b9046d04430af8..30612ce42f2012abd04f2afa0c9f33e28febb773 100644 (file)
@@ -47,6 +47,7 @@
 
 #include <sys/proc.h>
 #include <sys/proc_internal.h>
+#include <sys/sysctl.h>
 #include <libkern/OSDebug.h>
 #include <kern/thread.h>
 
@@ -81,18 +82,32 @@ OSDefineMetaClassAndStructors(IOPMprot, OSObject)
 //******************************************************************************
 
 static bool                  gIOPMInitialized       = false;
-static uint32_t              gIOPMBusyCount         = 0;
-static uint32_t              gIOPMWorkCount         = 0;
+static uint32_t              gIOPMBusyRequestCount  = 0;
+static uint32_t              gIOPMWorkInvokeCount   = 0;
 static uint32_t              gIOPMTickleGeneration  = 0;
 static IOWorkLoop *          gIOPMWorkLoop          = 0;
 static IOPMRequestQueue *    gIOPMRequestQueue      = 0;
 static IOPMRequestQueue *    gIOPMReplyQueue        = 0;
 static IOPMWorkQueue *       gIOPMWorkQueue         = 0;
-static IOPMCompletionQueue * gIOPMFreeQueue         = 0;
+static IOPMCompletionQueue * gIOPMCompletionQueue   = 0;
 static IOPMRequest *         gIOPMRequest           = 0;
 static IOService *           gIOPMRootNode          = 0;
 static IOPlatformExpert *    gPlatform              = 0;
 
+static char                  gIOSpinDumpKextName[128];
+static char                  gIOSpinDumpDelayType[16];
+static uint32_t              gIOSpinDumpDelayDuration = 0;
+
+static SYSCTL_STRING(_debug, OID_AUTO, swd_kext_name,
+        CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+        &gIOSpinDumpKextName, sizeof(gIOSpinDumpKextName), "");
+static SYSCTL_STRING(_debug, OID_AUTO, swd_delay_type,
+        CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+        &gIOSpinDumpDelayType, sizeof(gIOSpinDumpDelayType), "");
+static SYSCTL_INT(_debug, OID_AUTO, swd_delay_duration,
+        CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+        &gIOSpinDumpDelayDuration, 0, "");
+
 const OSSymbol *             gIOPMPowerClientDevice     = 0;
 const OSSymbol *             gIOPMPowerClientDriver     = 0;
 const OSSymbol *             gIOPMPowerClientChildProxy = 0;
@@ -102,6 +117,7 @@ const OSSymbol *             gIOPMPowerClientRootDomain = 0;
 static const OSSymbol *      gIOPMPowerClientAdvisoryTickle = 0;
 static bool                  gIOPMAdvisoryTickleEnabled = true;
 static thread_t              gIOPMWatchDogThread        = NULL;
+uint32_t                     gCanSleepTimeout           = 0;
 
 static uint32_t getPMRequestType( void )
 {
@@ -122,6 +138,8 @@ static IOPMRequestTag getPMRequestTag( void )
     return tag;
 }
 
+SYSCTL_UINT(_kern, OID_AUTO, pmtimeout, CTLFLAG_RW | CTLFLAG_LOCKED, &gCanSleepTimeout, 0, "Power Management Timeout");
+
 //******************************************************************************
 // Macros
 //******************************************************************************
@@ -159,8 +177,12 @@ do {                                  \
 #define PM_LOCK_SLEEP(event, dl)    IOLockSleepDeadline(fPMLock, event, dl, THREAD_UNINT)
 #define PM_LOCK_WAKEUP(event)       IOLockWakeup(fPMLock, event, false)
 
+#define us_per_s                    1000000
 #define ns_per_us                   1000
-#define k30Seconds                  (30*1000000)
+#define k30Seconds                  (30*us_per_s)
+#define k5Seconds                   ( 5*us_per_s)
+#define kCanSleepMaxTimeReq         k30Seconds
+#define kMaxTimeRequested           k30Seconds
 #define kMinAckTimeoutTicks         (10*1000000)
 #define kIOPMTardyAckSPSKey         "IOPMTardyAckSetPowerState"
 #define kIOPMTardyAckPSCKey         "IOPMTardyAckPowerStateChange"
@@ -298,22 +320,21 @@ void IOService::PMinit( void )
             {
                 gIOPMRequestQueue = IOPMRequestQueue::create(
                     this, OSMemberFunctionCast(IOPMRequestQueue::Action,
-                        this, &IOService::servicePMRequestQueue));
+                        this, &IOService::actionPMRequestQueue));
 
                 gIOPMReplyQueue = IOPMRequestQueue::create(
                     this, OSMemberFunctionCast(IOPMRequestQueue::Action,
-                        this, &IOService::servicePMReplyQueue));
+                        this, &IOService::actionPMReplyQueue));
 
-                gIOPMWorkQueue = IOPMWorkQueue::create(
-                    this,
+                gIOPMWorkQueue = IOPMWorkQueue::create(this,
                     OSMemberFunctionCast(IOPMWorkQueue::Action, this,
-                        &IOService::servicePMRequest),
+                        &IOService::actionPMWorkQueueInvoke),
                     OSMemberFunctionCast(IOPMWorkQueue::Action, this,
-                        &IOService::retirePMRequest));
+                        &IOService::actionPMWorkQueueRetire));
 
-                gIOPMFreeQueue = IOPMCompletionQueue::create(
+                gIOPMCompletionQueue = IOPMCompletionQueue::create(
                     this, OSMemberFunctionCast(IOPMCompletionQueue::Action,
-                        this, &IOService::servicePMFreeQueue));
+                        this, &IOService::actionPMCompletionQueue));
 
                 if (gIOPMWorkLoop->addEventSource(gIOPMRequestQueue) !=
                     kIOReturnSuccess)
@@ -336,11 +357,13 @@ void IOService::PMinit( void )
                     gIOPMWorkQueue = 0;
                 }
 
-                if (gIOPMWorkLoop->addEventSource(gIOPMFreeQueue) !=
+                // Must be added after the work queue, which pushes request
+                // to the completion queue without signaling the work loop.
+                if (gIOPMWorkLoop->addEventSource(gIOPMCompletionQueue) !=
                     kIOReturnSuccess)
                 {
-                    gIOPMFreeQueue->release();
-                    gIOPMFreeQueue = 0;
+                    gIOPMCompletionQueue->release();
+                    gIOPMCompletionQueue = 0;
                 }
 
                 gIOPMPowerClientDevice =
@@ -360,9 +383,12 @@ void IOService::PMinit( void )
 
                 gIOPMPowerClientRootDomain =
                     OSSymbol::withCStringNoCopy( "RootDomainPower" );
+
+                gIOSpinDumpKextName[0] = '\0';
+                gIOSpinDumpDelayType[0] = '\0';
             }
 
-            if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMFreeQueue)
+            if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMCompletionQueue)
                 gIOPMInitialized = true;
         }
         if (!gIOPMInitialized)
@@ -432,6 +458,11 @@ void IOService::PMinit( void )
         fDriverCallEntry = thread_call_allocate(
             (thread_call_func_t) &IOService::pmDriverCallout, this);
         assert(fDriverCallEntry);
+        if (kIOKextSpinDump & gIOKitDebug)
+        {
+            fSpinDumpTimer = thread_call_allocate(
+                &IOService::spindump_timer_expired, (thread_call_param_t)this);
+        }
 
         // Check for powerChangeDone override.
         if (OSMemberFunctionCast(void (*)(void),
@@ -505,6 +536,11 @@ void IOService::PMfree( void )
             thread_call_free(fDriverCallEntry);
             fDriverCallEntry = NULL;
         }
+        if ( fSpinDumpTimer ) {
+            thread_call_cancel(fSpinDumpTimer);
+            thread_call_free(fSpinDumpTimer);
+            fSpinDumpTimer = NULL;
+        }
         if ( fPMLock ) {
             IOLockFree(fPMLock);
             fPMLock = NULL;
@@ -827,7 +863,7 @@ IOReturn IOService::addPowerChild( IOService * child )
         requests[1]->fArg0 = connection;
         requests[2]->fArg0 = connection;
 
-        submitPMRequest( requests, 3 );
+        submitPMRequests( requests, 3 );
         return kIOReturnSuccess;
     }
     while (false);
@@ -1018,8 +1054,12 @@ IOReturn IOService::removePowerChild( IOPowerConnection * theNub )
             {
                 stop_ack_timer();
 
-                // Request unblocked, work queue
-                // should re-scan all busy requests.
+                // This parent may have a request in the work queue that is
+                // blocked on fHeadNotePendingAcks=0. And removePowerChild()
+                // is called while executing the child's PMstop request so they
+                // can occur simultaneously. IOPMWorkQueue::checkForWork() must
+                // restart and check all request queues again.
+
                 gIOPMWorkQueue->incrementProducerCount();
             }
         }
@@ -1308,7 +1348,6 @@ IOPMPowerFlags IOService::registerInterestedDriver( IOService * driver )
 
 IOReturn IOService::deRegisterInterestedDriver( IOService * driver )
 {
-    IOPMinformeeList *  list;
     IOPMinformee *      item;
     IOPMRequest *       request;
     bool                signal;
@@ -1319,18 +1358,25 @@ IOReturn IOService::deRegisterInterestedDriver( IOService * driver )
         return IOPMNotPowerManaged;
 
     PM_LOCK();
+    if (fInsertInterestSet)
+    {
+        fInsertInterestSet->removeObject(driver);
+    }
+
+    item = fInterestedDrivers->findItem(driver);
+    if (!item)
+    {
+        PM_UNLOCK();
+        return kIOReturnNotFound;
+    }
+
     signal = (!fRemoveInterestSet && !fInsertInterestSet);
     if (fRemoveInterestSet == NULL)
         fRemoveInterestSet = OSSet::withCapacity(4);
     if (fRemoveInterestSet)
     {
         fRemoveInterestSet->setObject(driver);
-        if (fInsertInterestSet)
-            fInsertInterestSet->removeObject(driver);
-
-        list = fInterestedDrivers;
-        item = list->findItem(driver);
-        if (item && item->active)
+        if (item->active)
         {
             item->active = false;
             waitForPMDriverCall( driver );
@@ -1740,6 +1786,13 @@ void IOService::handlePowerDomainWillChangeTo( IOPMRequest * request )
         maxPowerState = fControllingDriver->maxCapabilityForDomainState(
                             combinedPowerFlags);
 
+        if (parentChangeFlags & kIOPMDomainPowerDrop)
+        {
+            // fMaxPowerState set a limit on self-initiated power changes.
+            // Update it before a parent power drop.
+            fMaxPowerState = maxPowerState;
+        }
+
         // Use kIOPMSynchronize below instead of kIOPMRootBroadcastFlags
         // to avoid propagating the root change flags if any service must
         // change power state due to root's will-change notification.
@@ -1841,6 +1894,13 @@ void IOService::handlePowerDomainDidChangeTo( IOPMRequest * request )
         maxPowerState = fControllingDriver->maxCapabilityForDomainState(
                             fParentsCurrentPowerFlags);
 
+        if ((parentChangeFlags & kIOPMDomainPowerDrop) == 0)
+        {
+            // fMaxPowerState set a limit on self-initiated power changes.
+            // Update it after a parent power rise.
+            fMaxPowerState = maxPowerState;
+        }
+
         if (fInitialPowerChange)
         {
             computeDesire = true;
@@ -2301,6 +2361,44 @@ IOReturn IOService::changePowerStateForRootDomain( IOPMPowerStateIndex ordinal )
     return requestPowerState( gIOPMPowerClientRootDomain, ordinal );
 }
 
+//*********************************************************************************
+// [public for PMRD] quiescePowerTree
+//
+// For root domain to issue a request to quiesce the power tree.
+// Supplied callback invoked upon completion.
+//*********************************************************************************
+
+IOReturn IOService::quiescePowerTree(
+    void * target, IOPMCompletionAction action, void * param )
+{
+    IOPMRequest * request;
+
+    if (!initialized)
+        return kIOPMNotYetInitialized;
+    if (!target || !action)
+        return kIOReturnBadArgument;
+
+    OUR_PMLog(kPMLogQuiescePowerTree, 0, 0);
+
+    // Target the root node instead of root domain. This is to avoid blocking
+    // the quiesce request behind an existing root domain request in the work
+    // queue. Root parent and root domain requests in the work queue must not
+    // block the completion of the quiesce request.
+
+    request = acquirePMRequest(gIOPMRootNode, kIOPMRequestTypeQuiescePowerTree);
+    if (!request)
+        return kIOReturnNoMemory;
+
+    request->installCompletionAction(target, action, param);
+
+    // Submit through the normal request flow. This will make sure any request
+    // already in the request queue will get pushed over to the work queue for
+    // execution. Any request submitted after this request may not be serviced.
+
+    submitPMRequest( request );
+    return kIOReturnSuccess;
+}
+
 //*********************************************************************************
 // [private] requestPowerState
 //*********************************************************************************
@@ -3847,9 +3945,11 @@ void IOService::driverSetPowerState( void )
     if (assertPMDriverCall(&callEntry))
     {
         OUR_PMLog(          kPMLogProgramHardware, (uintptr_t) this, powerState);
+        start_spindump_timer("SetState");
         clock_get_uptime(&fDriverCallStartTime);
         result = fControllingDriver->setPowerState( powerState, this );
         clock_get_uptime(&end);
+        stop_spindump_timer();
         OUR_PMLog((UInt32) -kPMLogProgramHardware, (uintptr_t) this, (UInt32) result);
 
         deassertPMDriverCall(&callEntry);
@@ -3926,17 +4026,21 @@ void IOService::driverInformPowerChange( void )
             if (fDriverCallReason == kDriverCallInformPreChange)
             {
                 OUR_PMLog(kPMLogInformDriverPreChange, (uintptr_t) this, powerState);
+                start_spindump_timer("WillChange");
                 clock_get_uptime(&informee->startTime);
                 result = driver->powerStateWillChangeTo(powerFlags, powerState, this);
                 clock_get_uptime(&end);
+                stop_spindump_timer();
                 OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (uintptr_t) this, result);
             }
             else
             {
                 OUR_PMLog(kPMLogInformDriverPostChange, (uintptr_t) this, powerState);
+                start_spindump_timer("DidChange");
                 clock_get_uptime(&informee->startTime);
                 result = driver->powerStateDidChangeTo(powerFlags, powerState, this);
                 clock_get_uptime(&end);
+                stop_spindump_timer();
                 OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result);
             }
 
@@ -4146,7 +4250,7 @@ void IOService::all_done( void )
     const IOPMPSEntry *     powerStatePtr;
     IOPMDriverCallEntry     callEntry;
     uint32_t                prevMachineState = fMachineState;
-    bool                    callAction = false;
+    bool                    actionCalled = false;
     uint64_t                ts;
 
     fMachineState = kIOPM_Finished;
@@ -4192,10 +4296,10 @@ void IOService::all_done( void )
     }
 
     // our power change
-    if ( fHeadNoteChangeFlags & kIOPMSelfInitiated )
+    if (fHeadNoteChangeFlags & kIOPMSelfInitiated)
     {
-        // could our driver switch to the new state?
-        if ( !( fHeadNoteChangeFlags & kIOPMNotDone) )
+        // power state changed
+        if ((fHeadNoteChangeFlags & kIOPMNotDone) == 0)
         {
             trackSystemSleepPreventers(
                 fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags);
@@ -4224,7 +4328,7 @@ void IOService::all_done( void )
             OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, prevPowerState);
             PM_ACTION_2(actionPowerChangeDone,
                 fHeadNotePowerState, fHeadNoteChangeFlags);
-            callAction = true;
+            actionCalled = true;
 
             powerStatePtr = &fPowerStates[fCurrentPowerState];
             fCurrentCapabilityFlags = powerStatePtr->capabilityFlags;
@@ -4252,16 +4356,14 @@ void IOService::all_done( void )
         }
     }
 
-    // parent's power change
-    if ( fHeadNoteChangeFlags & kIOPMParentInitiated)
+    // parent-initiated power change
+    if (fHeadNoteChangeFlags & kIOPMParentInitiated)
     {
         if (fHeadNoteChangeFlags & kIOPMRootChangeDown)
             ParentChangeRootChangeDown();
 
-        if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) &&
-             (StateOrder(fCurrentPowerState) >= StateOrder(fHeadNotePowerState)))   ||
-              ((fHeadNoteChangeFlags & kIOPMDomainDidChange)  &&
-             (StateOrder(fCurrentPowerState) < StateOrder(fHeadNotePowerState))))
+        // power state changed
+        if ((fHeadNoteChangeFlags & kIOPMNotDone) == 0)
         {
             trackSystemSleepPreventers(
                 fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags);
@@ -4284,12 +4386,11 @@ void IOService::all_done( void )
 #if PM_VARS_SUPPORT
             fPMVars->myCurrentState = fCurrentPowerState;
 #endif
-            fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainFlags);
 
             OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, prevPowerState);
             PM_ACTION_2(actionPowerChangeDone,
                 fHeadNotePowerState, fHeadNoteChangeFlags);
-            callAction = true;
+            actionCalled = true;
 
             powerStatePtr = &fPowerStates[fCurrentPowerState];
             fCurrentCapabilityFlags = powerStatePtr->capabilityFlags;
@@ -4314,7 +4415,7 @@ void IOService::all_done( void )
         fIdleTimerMinPowerState = kPowerStateZero;
     }
 
-    if (!callAction)
+    if (!actionCalled)
     {
         PM_ACTION_2(actionPowerChangeDone,
             fHeadNotePowerState, fHeadNoteChangeFlags);
@@ -4779,11 +4880,15 @@ IOReturn IOService::ParentChangeStart( void )
             // to our children.
             fMachineState     = kIOPM_SyncNotifyDidChange;
             fDriverCallReason = kDriverCallInformPreChange;
+            fHeadNoteChangeFlags |= kIOPMNotDone;
             notifyChildren();
             return IOPMWillAckLater;
         }
     }
 
+    // No power state change necessary
+    fHeadNoteChangeFlags |= kIOPMNotDone;
+
     all_done();
     return IOPMAckImplied;
 }
@@ -4817,6 +4922,7 @@ void IOService::ParentChangeRootChangeDown( void )
         {
             updatePowerClient(gIOPMPowerClientDevice, kPowerStateZero);
             computeDesiredState(kPowerStateZero, true);
+            requestDomainPower( fDesiredPowerState );
             PM_LOG1("%s: tickle desire removed\n", fName);
         }
 
@@ -5237,13 +5343,20 @@ void IOService::start_watchdog_timer( void )
 {
     AbsoluteTime    deadline;
     boolean_t       pending;
+    static int      timeout = -1;
 
     if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug))
        return;
 
     if (thread_call_isactive(fWatchdogTimer)) return;
+    if (timeout == -1) {
+       PE_parse_boot_argn("swd_timeout", &timeout, sizeof(timeout));
+    }
+    if (timeout < 60) {
+       timeout = WATCHDOG_TIMER_PERIOD;
+    }
 
-    clock_interval_to_deadline(WATCHDOG_TIMER_PERIOD, kSecondScale, &deadline);
+    clock_interval_to_deadline(timeout, kSecondScale, &deadline);
 
     retain();
     pending = thread_call_enter_delayed(fWatchdogTimer, deadline);
@@ -5388,6 +5501,103 @@ IOService::ack_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1
     me->release();
 }
 
+//*********************************************************************************
+// [private] start_spindump_timer
+//*********************************************************************************
+
+void IOService::start_spindump_timer( const char * delay_type )
+{
+    AbsoluteTime    deadline;
+    boolean_t       pending;
+
+    if (!fSpinDumpTimer || !(kIOKextSpinDump & gIOKitDebug))
+        return;
+
+    if (gIOSpinDumpKextName[0] == '\0' &&
+        !(PE_parse_boot_argn("swd_kext_name", &gIOSpinDumpKextName,
+        sizeof(gIOSpinDumpKextName))))
+    {
+        return;
+    }
+
+    if (strncmp(gIOSpinDumpKextName, fName, sizeof(gIOSpinDumpKextName)) != 0)
+        return;
+
+    if (gIOSpinDumpDelayType[0] == '\0' &&
+        !(PE_parse_boot_argn("swd_delay_type", &gIOSpinDumpDelayType,
+        sizeof(gIOSpinDumpDelayType))))
+    {
+        strncpy(gIOSpinDumpDelayType, "SetState", sizeof(gIOSpinDumpDelayType));
+    }
+
+    if (strncmp(delay_type, gIOSpinDumpDelayType, sizeof(gIOSpinDumpDelayType)) != 0)
+        return;
+
+    if (gIOSpinDumpDelayDuration == 0 &&
+        !(PE_parse_boot_argn("swd_delay_duration", &gIOSpinDumpDelayDuration,
+        sizeof(gIOSpinDumpDelayDuration))))
+    {
+        gIOSpinDumpDelayDuration = 300;
+    }
+
+    clock_interval_to_deadline(gIOSpinDumpDelayDuration, kMillisecondScale, &deadline);
+
+    retain();
+    pending = thread_call_enter_delayed(fSpinDumpTimer, deadline);
+    if (pending) release();
+}
+
+//*********************************************************************************
+// [private] stop_spindump_timer
+//*********************************************************************************
+
+void IOService::stop_spindump_timer( void )
+{
+    boolean_t   pending;
+
+    if (!fSpinDumpTimer || !(kIOKextSpinDump & gIOKitDebug))
+        return;
+
+    pending = thread_call_cancel(fSpinDumpTimer);
+    if (pending) release();
+}
+
+
+//*********************************************************************************
+// [static] actionSpinDumpTimerExpired
+//
+// Inside PM work loop's gate.
+//*********************************************************************************
+
+IOReturn
+IOService::actionSpinDumpTimerExpired(
+    OSObject * target,
+    void * arg0, void * arg1,
+    void * arg2, void * arg3 )
+{
+    getPMRootDomain()->takeStackshot(false, false, true);
+
+    return kIOReturnSuccess;
+}
+
+//*********************************************************************************
+// spindump_timer_expired
+//
+// Thread call function. Holds a retain while the callout is in flight.
+//*********************************************************************************
+
+void
+IOService::spindump_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 )
+{
+    IOService * me = (IOService *) arg0;
+
+    if (gIOPMWorkLoop)
+    {
+        gIOPMWorkLoop->runAction(&actionSpinDumpTimerExpired, me);
+    }
+    me->release();
+}
+
 // MARK: -
 // MARK: Client Messaging
 
@@ -5589,6 +5799,7 @@ bool IOService::tellClientsWithResponse( int messageType )
 {
     IOPMInterestContext     context;
     bool                    isRootDomain = IS_ROOT_DOMAIN;
+    uint32_t                maxTimeOut = kMaxTimeRequested;
 
     PM_ASSERT_IN_GATE();
     assert( fResponseArray == NULL );
@@ -5646,8 +5857,15 @@ bool IOService::tellClientsWithResponse( int messageType )
                 context.notifyType  = fOutOfBandParameter;
                 context.messageType = messageType;
             }
-            context.maxTimeRequested = k30Seconds;
-
+           if(context.messageType == kIOMessageCanSystemSleep)
+           {
+               maxTimeOut = kCanSleepMaxTimeReq;
+               if(gCanSleepTimeout)
+               {
+                   maxTimeOut = (gCanSleepTimeout*us_per_s);
+               }
+           }
+           context.maxTimeRequested = maxTimeOut;
             applyToInterested( gIOGeneralInterest,
                 pmTellClientWithResponse, (void *) &context );
 
@@ -5673,7 +5891,15 @@ bool IOService::tellClientsWithResponse( int messageType )
             applyToInterested( gIOAppPowerStateInterest,
                 pmTellCapabilityAppWithResponse, (void *) &context );
             fNotifyClientArray = context.notifyClients;
-            context.maxTimeRequested = k30Seconds;
+           if(context.messageType == kIOMessageCanSystemSleep)
+           {
+               maxTimeOut = kCanSleepMaxTimeReq;
+               if(gCanSleepTimeout)
+               {
+                   maxTimeOut = (gCanSleepTimeout*us_per_s);
+               }
+           }
+           context.maxTimeRequested = maxTimeOut;
             break;
 
         case kNotifyCapabilityChangePriority:
@@ -6936,7 +7162,7 @@ void IOService::releasePMRequest( IOPMRequest * request )
 }
 
 //*********************************************************************************
-// [private] submitPMRequest
+// [private static] submitPMRequest
 //*********************************************************************************
 
 void IOService::submitPMRequest( IOPMRequest * request )
@@ -6957,7 +7183,7 @@ void IOService::submitPMRequest( IOPMRequest * request )
         gIOPMRequestQueue->queuePMRequest( request );
 }
 
-void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count )
+void IOService::submitPMRequests( IOPMRequest ** requests, IOItemCount count )
 {
     assert( requests );
     assert( count > 0 );
@@ -6977,12 +7203,12 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count )
 }
 
 //*********************************************************************************
-// [private] servicePMRequestQueue
+// [private] actionPMRequestQueue
 //
-// Called from IOPMRequestQueue::checkForWork().
+// IOPMRequestQueue::checkForWork() passing a new request to the request target.
 //*********************************************************************************
 
-bool IOService::servicePMRequestQueue(
+bool IOService::actionPMRequestQueue(
     IOPMRequest *       request,
     IOPMRequestQueue *  queue )
 {
@@ -6990,34 +7216,40 @@ bool IOService::servicePMRequestQueue(
 
     if (initialized)
     {
-        // Work queue will immediately execute the queue'd request if possible.
-        // If execution blocks, the work queue will wait for a producer signal.
-        // Only need to signal more when completing attached requests.
+        // Work queue will immediately execute the request if the per-service
+        // request queue is empty. Note pwrMgt is the target's IOServicePM.
 
         more = gIOPMWorkQueue->queuePMRequest(request, pwrMgt);
-        return more;
     }
+    else
+    {
+        // Calling PM without PMinit() is not allowed, fail the request.
+        // Need to signal more when completing attached requests.
 
-    // Calling PM without PMinit() is not allowed, fail the request.
+        PM_LOG("%s: PM not initialized\n", getName());
+        PM_LOG1("[- %02x] %p [%p %s] !initialized\n",
+            request->getType(), OBFUSCATE(request),
+            OBFUSCATE(this), getName());
+
+        more = gIOPMCompletionQueue->queuePMRequest(request);
+        if (more) gIOPMWorkQueue->incrementProducerCount();
+    }
 
-    PM_LOG("%s: PM not initialized\n", getName());
-    fAdjustPowerScheduled = false;
-    more = gIOPMFreeQueue->queuePMRequest(request);
-    if (more) gIOPMWorkQueue->incrementProducerCount();
     return more;
 }
 
 //*********************************************************************************
-// [private] servicePMFreeQueue
+// [private] actionPMCompletionQueue
 //
-// Called from IOPMCompletionQueue::checkForWork().
+// IOPMCompletionQueue::checkForWork() passing a completed request to the
+// request target.
 //*********************************************************************************
 
-bool IOService::servicePMFreeQueue(
+bool IOService::actionPMCompletionQueue(
     IOPMRequest *         request,
     IOPMCompletionQueue * queue )
 {
-    bool            more = request->getNextRequest();
+    bool            more = (request->getNextRequest() != 0);
     IOPMRequest *   root = request->getRootRequest();
 
     if (root && (root != request))
@@ -7030,22 +7262,21 @@ bool IOService::servicePMFreeQueue(
 }
 
 //*********************************************************************************
-// [private] retirePMRequest
+// [private] actionPMWorkQueueRetire
 //
-// Called by IOPMWorkQueue to retire a completed request.
+// IOPMWorkQueue::checkForWork() passing a retired request to the request target.
 //*********************************************************************************
 
-bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
+bool IOService::actionPMWorkQueueRetire( IOPMRequest * request, IOPMWorkQueue * queue )
 {
     assert(request && queue);
 
     PM_LOG1("[- %02x] %p [%p %s] state %d, busy %d\n",
         request->getType(), OBFUSCATE(request),
         OBFUSCATE(this), getName(),
-        fMachineState, gIOPMBusyCount);
-
-    // Catch requests created by idleTimerExpired().
+        fMachineState, gIOPMBusyRequestCount);
 
+    // Catch requests created by idleTimerExpired()
     if (request->getType() == kIOPMRequestTypeActivityTickle)
     {
         uint32_t tickleFlags = (uint32_t)(uintptr_t) request->fArg1;
@@ -7061,11 +7292,11 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
             fIdleTimerGeneration++;
         }
     }
+    
+    // When the completed request is linked, tell work queue there is
+    // more work pending.
 
-    // If the request is linked, then Work queue has already incremented its
-    // producer count.
-
-    return (gIOPMFreeQueue->queuePMRequest( request ));
+    return (gIOPMCompletionQueue->queuePMRequest( request ));
 }
 
 //*********************************************************************************
@@ -7137,12 +7368,13 @@ bool IOService::isPMBlocked( IOPMRequest * request, int count )
 }
 
 //*********************************************************************************
-// [private] servicePMRequest
+// [private] actionPMWorkQueueInvoke
 //
-// Service a request from our work queue.
+// IOPMWorkQueue::checkForWork() passing a request to the
+// request target for execution.
 //*********************************************************************************
 
-bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
+bool IOService::actionPMWorkQueueInvoke( IOPMRequest * request, IOPMWorkQueue * queue )
 {
     bool    done = false;
     int     loop = 0;
@@ -7156,7 +7388,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
             OBFUSCATE(this), getName(), fMachineState);
 
         gIOPMRequest = request;
-        gIOPMWorkCount++;
+        gIOPMWorkInvokeCount++;
 
         // Every PM machine states must be handled in one of the cases below.
 
@@ -7427,7 +7659,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
                 break;
 
             default:
-                panic("servicePMWorkQueue: unknown machine state %x",
+                panic("PMWorkQueueInvoke: unknown machine state %x",
                     fMachineState);
         }
 
@@ -7518,16 +7750,23 @@ void IOService::executePMRequest( IOPMRequest * request )
             fIdleTimerIgnored = request->fArg0 ? 1 : 0;
             break;
 
+        case kIOPMRequestTypeQuiescePowerTree:
+            gIOPMWorkQueue->finishQuiesceRequest(request);
+            break;
+
         default:
             panic("executePMRequest: unknown request type %x", request->getType());
     }
 }
 
 //*********************************************************************************
-// [private] servicePMReplyQueue
+// [private] actionPMReplyQueue
+//
+// IOPMRequestQueue::checkForWork() passing a reply-type request to the
+// request target.
 //*********************************************************************************
 
-bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue )
+bool IOService::actionPMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue )
 {
     bool more = false;
 
@@ -7639,7 +7878,8 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
                 // Stop waiting for app replys.
                 if ((fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) ||
                     (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) ||
-                    (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown))
+                    (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown) ||
+                    (fMachineState == kIOPM_SyncTellClientsPowerDown) )
                     cleanClientResponses(false);
                 more = true;
             }
@@ -7654,11 +7894,10 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
             break;
 
         default:
-            panic("servicePMReplyQueue: unknown reply type %x",
-                request->getType());
+            panic("PMReplyQueue: unknown reply type %x", request->getType());
     }
 
-    more |= gIOPMFreeQueue->queuePMRequest(request);
+    more |= gIOPMCompletionQueue->queuePMRequest(request);
     if (more)
         gIOPMWorkQueue->incrementProducerCount();
 
@@ -7841,15 +8080,18 @@ bool IOPMRequest::init( IOService * target, IOOptionBits type )
     if (!IOCommand::init())
         return false;
 
-    fType             = type;
-    fTarget           = target;
-#if NOT_READY
-    fCompletionStatus = kIOReturnSuccess;
-#endif
+    fRequestType = type;
+    fTarget = target;
 
     if (fTarget)
         fTarget->retain();
 
+    // Root node and root domain requests does not prevent the power tree from
+    // becoming quiescent.
+
+    fIsQuiesceBlocker = ((fTarget != gIOPMRootNode) &&
+                         (fTarget != IOService::getPMRootDomain()));
+
     return true;
 }
 
@@ -7861,14 +8103,14 @@ void IOPMRequest::reset( void )
     detachNextRequest();
     detachRootRequest();
 
-    fType = kIOPMRequestTypeInvalid;
-
-#if NOT_READY
-    if (fCompletionAction)
+    if (fCompletionAction && (fRequestType == kIOPMRequestTypeQuiescePowerTree))
     {
-        fCompletionAction(fCompletionTarget, fCompletionParam, fCompletionStatus);
+        // Call the completion on PM work loop context
+        fCompletionAction(fCompletionTarget, fCompletionParam);
+        fCompletionAction = 0;
     }
-#endif
+
+    fRequestType = kIOPMRequestTypeInvalid;
 
     if (fTarget)
     {
@@ -7889,8 +8131,8 @@ bool IOPMRequest::attachNextRequest( IOPMRequest * next )
         fRequestNext->fWorkWaitCount++;
 #if LOG_REQUEST_ATTACH
         PM_LOG("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
-            OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestNext),
-            (uint32_t) fRequestNext->fType,
+            OBFUSCATE(this), fRequestType, OBFUSCATE(fRequestNext),
+            fRequestNext->fRequestType,
             (uint32_t) fRequestNext->fWorkWaitCount,
             fTarget->getName());
 #endif
@@ -7910,8 +8152,8 @@ bool IOPMRequest::detachNextRequest( void )
             fRequestNext->fWorkWaitCount--;
 #if LOG_REQUEST_ATTACH
         PM_LOG("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
-            OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestNext),
-            (uint32_t) fRequestNext->fType,
+            OBFUSCATE(this), fRequestType, OBFUSCATE(fRequestNext),
+            fRequestNext->fRequestType,
             (uint32_t) fRequestNext->fWorkWaitCount,
             fTarget->getName());
 #endif
@@ -8011,7 +8253,7 @@ void IOPMRequestQueue::queuePMRequest( IOPMRequest * request )
 {
     assert(request);
     IOLockLock(fLock);
-    queue_enter(&fQueue, request, IOPMRequest *, fCommandChain);
+    queue_enter(&fQueue, request, typeof(request), fCommandChain);
     IOLockUnlock(fLock);
     if (workLoop) signalWorkAvailable();
 }
@@ -8027,7 +8269,7 @@ IOPMRequestQueue::queuePMRequestChain( IOPMRequest ** requests, IOItemCount coun
     {
         next = *requests;
         requests++;
-        queue_enter(&fQueue, next, IOPMRequest *, fCommandChain);
+        queue_enter(&fQueue, next, typeof(next), fCommandChain);
     }
     IOLockUnlock(fLock);
     if (workLoop) signalWorkAvailable();
@@ -8038,14 +8280,22 @@ bool IOPMRequestQueue::checkForWork( void )
     Action          dqAction = (Action) action;
     IOPMRequest *   request;
     IOService *     target;
+    int             dequeueCount = 0;
     bool            more = false;
 
     IOLockLock( fLock );
 
     while (!queue_empty(&fQueue))
     {
-        queue_remove_first( &fQueue, request, IOPMRequest *, fCommandChain );
-        IOLockUnlock( fLock );
+        if (dequeueCount++ >= kMaxDequeueCount)
+        {
+            // Allow other queues a chance to work
+            more = true;
+            break;
+        }
+    
+        queue_remove_first(&fQueue, request, typeof(request), fCommandChain);
+        IOLockUnlock(fLock);
         target = request->getTarget();
         assert(target);
         more |= (*dqAction)( target, request, this );
@@ -8062,16 +8312,17 @@ bool IOPMRequestQueue::checkForWork( void )
 //*********************************************************************************
 // IOPMWorkQueue Class
 //
-// Queue of IOServicePM objects with busy IOPMRequest(s).
+// Queue of IOServicePM objects, each with a queue of IOPMRequest sharing the
+// same target.
 //*********************************************************************************
 
 OSDefineMetaClassAndStructors( IOPMWorkQueue, IOEventSource );
 
 IOPMWorkQueue *
-IOPMWorkQueue::create( IOService * inOwner, Action work, Action retire )
+IOPMWorkQueue::create( IOService * inOwner, Action invoke, Action retire )
 {
     IOPMWorkQueue * me = OSTypeAlloc(IOPMWorkQueue);
-    if (me && !me->init(inOwner, work, retire))
+    if (me && !me->init(inOwner, invoke, retire))
     {
         me->release();
         me = 0;
@@ -8079,15 +8330,15 @@ IOPMWorkQueue::create( IOService * inOwner, Action work, Action retire )
     return me;
 }
 
-bool IOPMWorkQueue::init( IOService * inOwner, Action work, Action retire )
+bool IOPMWorkQueue::init( IOService * inOwner, Action invoke, Action retire )
 {
-    if (!work || !retire ||
+    if (!invoke || !retire ||
         !IOEventSource::init(inOwner, (IOEventSourceAction)0))
         return false;
 
     queue_init(&fWorkQueue);
 
-    fWorkAction    = work;
+    fInvokeAction  = invoke;
     fRetireAction  = retire;
     fConsumerCount = fProducerCount = 0;
 
@@ -8096,8 +8347,9 @@ bool IOPMWorkQueue::init( IOService * inOwner, Action work, Action retire )
 
 bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt )
 {
-    bool more = false;
-    bool empty;
+    queue_head_t *  requestQueue;
+    bool            more  = false;
+    bool            empty;
 
     assert( request );
     assert( pwrMgt );
@@ -8105,24 +8357,42 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt
     assert( queue_next(&request->fCommandChain) ==
             queue_prev(&request->fCommandChain) );
 
-    gIOPMBusyCount++;
+    gIOPMBusyRequestCount++;
+
+    if (request->isQuiesceType())
+    {
+        if ((request->getTarget() == gIOPMRootNode) && !fQuiesceStartTime)
+        {
+            // Attach new quiesce request to all quiesce blockers in the queue
+            fQuiesceStartTime = mach_absolute_time();
+            attachQuiesceRequest(request);
+            fQuiesceRequest = request;
+        }
+    }
+    else if (fQuiesceRequest && request->isQuiesceBlocker())
+    {
+        // Attach the new quiesce blocker to the blocked quiesce request
+        request->attachNextRequest(fQuiesceRequest);
+    }
 
     // Add new request to the tail of the per-service request queue.
     // Then immediately check the request queue to minimize latency
     // if the queue was empty.
 
-    empty = queue_empty(&pwrMgt->RequestHead);
-    queue_enter(&pwrMgt->RequestHead, request, IOPMRequest *, fCommandChain);
+    requestQueue = &pwrMgt->RequestHead;
+    empty = queue_empty(requestQueue);
+    queue_enter(requestQueue, request, typeof(request), fCommandChain);
     if (empty)
     {
-        more = checkRequestQueue(&pwrMgt->RequestHead, &empty);
+        more = checkRequestQueue(requestQueue, &empty);
         if (!empty)
         {
-            // New Request is blocked, add IOServicePM to work queue.
+            // Request just added is blocked, add its target IOServicePM
+            // to the work queue.
             assert( queue_next(&pwrMgt->WorkChain) ==
                     queue_prev(&pwrMgt->WorkChain) );
 
-            queue_enter(&fWorkQueue, pwrMgt, IOServicePM *, WorkChain);
+            queue_enter(&fWorkQueue, pwrMgt, typeof(pwrMgt), WorkChain);
             fQueueLength++;
             PM_LOG3("IOPMWorkQueue: [%u] added %s@%p to queue\n",
                 fQueueLength, pwrMgt->Name, OBFUSCATE(pwrMgt));
@@ -8132,40 +8402,53 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt
     return more;
 }
 
-bool IOPMWorkQueue::checkRequestQueue( queue_head_t * queue, bool * empty )
+bool IOPMWorkQueue::checkRequestQueue( queue_head_t * requestQueue, bool * empty )
 {
     IOPMRequest *   request;
     IOService *     target;
     bool            more = false;
     bool            done = false;
 
-    assert(!queue_empty(queue));
+    assert(!queue_empty(requestQueue));
     do {
-        request = (IOPMRequest *) queue_first(queue);
+        request = (typeof(request)) queue_first(requestQueue);
         if (request->isWorkBlocked())
-            break;  // cannot start, blocked on attached request
+            break;  // request dispatch blocked on attached request
 
         target = request->getTarget();
-        done = (*fWorkAction)( target, request, this );
+        if (fInvokeAction)
+        {
+            done = (*fInvokeAction)( target, request, this );
+        }
+        else
+        {
+            PM_LOG("PM request 0x%x dropped\n", request->getType());
+            done = true;
+        }
         if (!done)
-            break;  // work started, blocked on PM state machine
+            break;  // PM state machine blocked
 
-        assert(gIOPMBusyCount > 0);
-        if (gIOPMBusyCount)
-            gIOPMBusyCount--;
+        assert(gIOPMBusyRequestCount > 0);
+        if (gIOPMBusyRequestCount)
+            gIOPMBusyRequestCount--;
 
-        queue_remove_first(queue, request, IOPMRequest *, fCommandChain);
+        if (request == fQuiesceRequest)
+        {
+            fQuiesceRequest = 0;
+        }
+
+        queue_remove_first(requestQueue, request, typeof(request), fCommandChain);
         more |= (*fRetireAction)( target, request, this );
-        done = queue_empty(queue);
+        done = queue_empty(requestQueue);
     } while (!done);
 
     *empty = done;
 
     if (more)
     {
-        // Retired request blocks another request, since the
-        // blocked request may reside in the work queue, we
-        // must bump the producer count to avoid work stall.
+        // Retired a request that may unblock a previously visited request
+        // that is still waiting on the work queue. Must trigger another
+        // queue check.
         fProducerCount++;
     }
 
@@ -8183,8 +8466,8 @@ bool IOPMWorkQueue::checkForWork( void )
     fStatCheckForWork++;
 #endif
 
-    // Each producer signal triggers a full iteration over
-    // all IOServicePM entries in the work queue.
+    // Iterate over all IOServicePM entries in the work queue,
+    // and check each entry's request queue.
 
     while (fConsumerCount != fProducerCount)
     {
@@ -8200,31 +8483,31 @@ bool IOPMWorkQueue::checkForWork( void )
             break;
         }
         fStatScanEntries++;
-        uint32_t cachedWorkCount = gIOPMWorkCount;
+        uint32_t cachedWorkCount = gIOPMWorkInvokeCount;
 #endif
 
-        entry = (IOServicePM *) queue_first(&fWorkQueue);
+        __IGNORE_WCASTALIGN(entry = (typeof(entry)) queue_first(&fWorkQueue));
         while (!queue_end(&fWorkQueue, (queue_entry_t) entry))
         {
             more |= checkRequestQueue(&entry->RequestHead, &empty);
 
             // Get next entry, points to head if current entry is last.
-            next = (IOServicePM *) queue_next(&entry->WorkChain);
+            __IGNORE_WCASTALIGN(next = (typeof(next)) queue_next(&entry->WorkChain));
 
-            // if request queue is empty, remove IOServicePM from queue.
+            // if request queue is empty, remove IOServicePM from work queue.
             if (empty)
             {
                 assert(fQueueLength);
                 if (fQueueLength) fQueueLength--;
                 PM_LOG3("IOPMWorkQueue: [%u] removed %s@%p from queue\n",
                     fQueueLength, entry->Name, OBFUSCATE(entry));
-                queue_remove(&fWorkQueue, entry, IOServicePM *, WorkChain);
+                queue_remove(&fWorkQueue, entry, typeof(entry), WorkChain);
             }
             entry = next;
         }
 
 #if WORK_QUEUE_STATS
-        if (cachedWorkCount == gIOPMWorkCount)
+        if (cachedWorkCount == gIOPMWorkInvokeCount)
             fStatNoWorkDone++;
 #endif
     }
@@ -8243,6 +8526,42 @@ void IOPMWorkQueue::incrementProducerCount( void )
     fProducerCount++;
 }
 
+void IOPMWorkQueue::attachQuiesceRequest( IOPMRequest * quiesceRequest )
+{
+    IOServicePM *   entry;
+    IOPMRequest *   request;
+
+    if (queue_empty(&fWorkQueue))
+    {
+        return;
+    }
+
+    queue_iterate(&fWorkQueue, entry, typeof(entry), WorkChain)
+    {
+        queue_iterate(&entry->RequestHead, request, typeof(request), fCommandChain)
+        {
+            // Attach the quiesce request to any request in the queue that
+            // is not linked to a next request. These requests will block
+            // the quiesce request.
+            
+            if (request->isQuiesceBlocker())
+            {
+                request->attachNextRequest(quiesceRequest);
+            }
+        }
+    }
+}
+
+void IOPMWorkQueue::finishQuiesceRequest( IOPMRequest * quiesceRequest )
+{
+    if (fQuiesceRequest && (quiesceRequest == fQuiesceRequest) &&
+        (fQuiesceStartTime != 0))
+    {
+        fInvokeAction = 0;
+        fQuiesceFinishTime = mach_absolute_time();
+    }
+}
+
 // MARK: -
 // MARK: IOPMCompletionQueue
 
@@ -8280,7 +8599,7 @@ bool IOPMCompletionQueue::queuePMRequest( IOPMRequest * request )
     assert(request);
     // unblock dependent request
     more = request->detachNextRequest();
-    queue_enter(&fQueue, request, IOPMRequest *, fCommandChain);
+    queue_enter(&fQueue, request, typeof(request), fCommandChain);
     return more;
 }
 
@@ -8292,13 +8611,13 @@ bool IOPMCompletionQueue::checkForWork( void )
     IOService *     target;
     bool            more = false;
 
-    request = (IOPMRequest *) queue_first(&fQueue);
+    request = (typeof(request)) queue_first(&fQueue);
     while (!queue_end(&fQueue, (queue_entry_t) request))
     {
-        next = (IOPMRequest *) queue_next(&request->fCommandChain);
+        next = (typeof(next)) queue_next(&request->fCommandChain);
         if (!request->isFreeBlocked())
         {
-            queue_remove(&fQueue, request, IOPMRequest *, fCommandChain);
+            queue_remove(&fQueue, request, typeof(request), fCommandChain);
             target = request->getTarget();
             assert(target);
             more |= (*dqAction)( target, request, this );
index 313d8e3c5ce9c759a0925805bd84dd477d56e5c9..ca91e9d46320f2f8812a9ef11de5ec0483d323e1 100644 (file)
@@ -55,6 +55,7 @@ enum {
     kIOPMRequestTypeRequestPowerStateOverride   = 0x0E,
     kIOPMRequestTypeSetIdleTimerPeriod          = 0x0F,
     kIOPMRequestTypeIgnoreIdleTimer             = 0x10,
+    kIOPMRequestTypeQuiescePowerTree            = 0x11,
 
     /* Reply Types */
     kIOPMRequestTypeReplyStart                  = 0x80,
@@ -183,6 +184,7 @@ private:
     thread_call_t           SettleTimer;
     thread_call_t           IdleTimer;
     thread_call_t           WatchdogTimer;
+    thread_call_t           SpinDumpTimer;
 
     // Settle time after changing power state.
     uint32_t                SettleTimeUS;
@@ -343,7 +345,7 @@ private:
 
     // Serialize IOServicePM state for debug output.
     IOReturn gatedSerialize( OSSerialize * s ) const;
-    virtual bool serialize( OSSerialize * s ) const;
+    virtual bool serialize( OSSerialize * s ) const APPLE_KEXT_OVERRIDE;
 
     // PM log and trace
     void pmPrint( uint32_t event, uintptr_t param1, uintptr_t param2 ) const;
@@ -358,6 +360,7 @@ private:
 #define fSettleTimer                pwrMgt->SettleTimer
 #define fIdleTimer                  pwrMgt->IdleTimer
 #define fWatchdogTimer              pwrMgt->WatchdogTimer
+#define fSpinDumpTimer              pwrMgt->SpinDumpTimer
 #define fSettleTimeUS               pwrMgt->SettleTimeUS
 #define fIdleTimerGeneration        pwrMgt->IdleTimerGeneration
 #define fHeadNoteChangeFlags        pwrMgt->HeadNoteChangeFlags
@@ -552,26 +555,22 @@ extern const OSSymbol *gIOPMStatsDriverPSChangeSlow;
 // IOPMRequest
 //******************************************************************************
 
-typedef void (*IOPMCompletionAction)(void * target, void * param, IOReturn status);
-
 class IOPMRequest : public IOCommand
 {
     OSDeclareDefaultStructors( IOPMRequest )
 
 protected:
-    IOService *          fTarget;        // request target
-    IOPMRequest *        fRequestNext;   // the next request in the chain
-    IOPMRequest *        fRequestRoot;   // the root request in the issue tree
-    IOItemCount          fWorkWaitCount; // execution blocked if non-zero
-    IOItemCount          fFreeWaitCount; // completion blocked if non-zero
-    uint32_t             fType;          // request type
+    IOService *          fTarget;           // request target
+    IOPMRequest *        fRequestNext;      // the next request in the chain
+    IOPMRequest *        fRequestRoot;      // the root request in the call tree
+    IOItemCount          fWorkWaitCount;    // execution blocked if non-zero
+    IOItemCount          fFreeWaitCount;    // completion blocked if non-zero
+    uint32_t             fRequestType;      // request type
+    bool                 fIsQuiesceBlocker;
 
-#if NOT_READY
     IOPMCompletionAction fCompletionAction;
     void *               fCompletionTarget;
     void *               fCompletionParam;
-    IOReturn             fCompletionStatus;
-#endif
 
 public:
     uint32_t             fRequestTag;
@@ -605,12 +604,12 @@ public:
 
     inline uint32_t      getType( void ) const
     {
-        return fType;
+        return fRequestType;
     }
 
     inline bool          isReplyType( void ) const
     {
-        return (fType > kIOPMRequestTypeReplyStart);
+        return (fRequestType > kIOPMRequestTypeReplyStart);
     }
 
     inline IOService *   getTarget( void ) const
@@ -618,22 +617,26 @@ public:
         return fTarget;
     }
 
-#if NOT_READY
-    inline bool          isCompletionInstalled( void )
+    inline bool          isQuiesceBlocker( void ) const
+    {
+        return fIsQuiesceBlocker;
+    }
+
+    inline bool          isQuiesceType( void ) const
     {
-        return (fCompletionAction != 0);
+        return ((kIOPMRequestTypeQuiescePowerTree == fRequestType) &&
+                (fCompletionAction != 0) && (fCompletionTarget != 0));
     }
 
     inline void          installCompletionAction(
-                            IOPMCompletionAction action,
                             void *               target,
+                            IOPMCompletionAction action,
                             void *               param )
     {
-        fCompletionAction = action;
         fCompletionTarget = target;
+        fCompletionAction = action;
         fCompletionParam  = param;
     }
-#endif /* NOT_READY */
 
     static IOPMRequest * create( void );
     bool   init( IOService * owner, IOOptionBits type );
@@ -659,8 +662,10 @@ protected:
     queue_head_t    fQueue;
     IOLock *        fLock;
 
-    virtual bool checkForWork( void );
-    virtual void free( void );
+    enum { kMaxDequeueCount = 256 };
+
+    virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE;
+    virtual void free( void ) APPLE_KEXT_OVERRIDE;
     virtual bool init( IOService * inOwner, Action inAction );
 
 public:
@@ -691,21 +696,26 @@ public:
 
 protected:
     queue_head_t        fWorkQueue;
-    Action              fWorkAction;
+    Action              fInvokeAction;
     Action              fRetireAction;
     uint32_t            fQueueLength;
     uint32_t            fConsumerCount;
     volatile uint32_t   fProducerCount;
+    IOPMRequest *       fQuiesceRequest;
+    AbsoluteTime        fQuiesceStartTime;
+    AbsoluteTime        fQuiesceFinishTime;
 
-    virtual bool checkForWork( void );
-    virtual bool init( IOService * inOwner, Action work, Action retire );
+    virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE;
+    virtual bool init( IOService * inOwner, Action invoke, Action retire );
     bool    checkRequestQueue( queue_head_t * queue, bool * empty );
 
 public:
-    static  IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire );
+    static  IOPMWorkQueue * create( IOService * inOwner, Action invoke, Action retire );
     bool    queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt );
     void    signalWorkAvailable( void );
     void    incrementProducerCount( void );
+    void    attachQuiesceRequest( IOPMRequest * quiesceRequest );
+    void    finishQuiesceRequest( IOPMRequest * quiesceRequest );
 };
 
 //******************************************************************************
@@ -722,7 +732,7 @@ public:
 protected:
     queue_head_t    fQueue;
 
-    virtual bool checkForWork( void );
+    virtual bool checkForWork( void ) APPLE_KEXT_OVERRIDE;
     virtual bool init( IOService * inOwner, Action inAction );
 
 public:
index 465b8261a08638dab0684ec5bb0218bdd4032196..af6ca5636cd26707976c51da180a4bf94d1192f3 100644 (file)
@@ -95,10 +95,10 @@ public:
     queue_head_t                       handlerInvocations;
     IOOptionBits                       state;
 
-    virtual void free();
-    virtual void remove();
-    virtual bool disable();
-    virtual void enable( bool was );
+    virtual void free() APPLE_KEXT_OVERRIDE;
+    virtual void remove() APPLE_KEXT_OVERRIDE;
+    virtual bool disable() APPLE_KEXT_OVERRIDE;
+    virtual void enable( bool was ) APPLE_KEXT_OVERRIDE;
     virtual void wait();
 };
 
@@ -117,12 +117,12 @@ public:
     queue_head_t               handlerInvocations;
     IOOptionBits               state;
 
-    virtual void free();
-    virtual void remove();
-    virtual bool disable();
-    virtual void enable( bool was );
+    virtual void free() APPLE_KEXT_OVERRIDE;
+    virtual void remove() APPLE_KEXT_OVERRIDE;
+    virtual bool disable() APPLE_KEXT_OVERRIDE;
+    virtual void enable( bool was ) APPLE_KEXT_OVERRIDE;
     virtual void wait();
-    virtual bool init();
+    virtual bool init() APPLE_KEXT_OVERRIDE;
 };
 
 class _IOConfigThread : public OSObject
@@ -132,7 +132,7 @@ class _IOConfigThread : public OSObject
     OSDeclareDefaultStructors(_IOConfigThread)
 
 public:
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     static void configThread( void );
     static void main( void * arg, wait_result_t result );
@@ -171,10 +171,13 @@ class IOResources : public IOService
 
 public:
     static IOService * resources( void );
-    virtual bool init( OSDictionary * dictionary = 0 );
-    virtual IOWorkLoop * getWorkLoop( ) const;
-    virtual bool matchPropertyTable( OSDictionary * table );
-    virtual IOReturn setProperties( OSObject * properties );
+    virtual bool init( OSDictionary * dictionary = 0 ) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn newUserClient(task_t owningTask, void * securityID,
+                                   UInt32 type,  OSDictionary * properties,
+                                   IOUserClient ** handler) APPLE_KEXT_OVERRIDE;
+    virtual IOWorkLoop * getWorkLoop( ) const APPLE_KEXT_OVERRIDE;
+    virtual bool matchPropertyTable( OSDictionary * table ) APPLE_KEXT_OVERRIDE;
+    virtual IOReturn setProperties( OSObject * properties ) APPLE_KEXT_OVERRIDE;
 };
 
 class _IOOpenServiceIterator : public OSIterator
@@ -192,10 +195,10 @@ public:
     static OSIterator * iterator( OSIterator * _iter,
                                   const IOService * client,
                                   const IOService * provider );
-    virtual void free();
-    virtual void reset();
-    virtual bool isValid();
-    virtual OSObject * getNextObject();
+    virtual void free() APPLE_KEXT_OVERRIDE;
+    virtual void reset() APPLE_KEXT_OVERRIDE;
+    virtual bool isValid() APPLE_KEXT_OVERRIDE;
+    virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE;
 };
 
 extern const OSSymbol * gIOConsoleUsersKey;
index 787a69bf26679ddd080c2bf68d45b97da7fb7e14..8177603cc2e9042d3b710818ce23a2d440d1aee7 100644 (file)
@@ -76,25 +76,12 @@ void IOKitInitializeTime( void )
     clock_initialize_calendar();
 }
 
-void IOKitResetTime( void )
-{
-    clock_sec_t                secs;
-       clock_usec_t    microsecs;
-
-    clock_initialize_calendar();
-
-    clock_get_calendar_microtime(&secs, &microsecs);
-    gIOLastWakeTime.tv_sec  = secs;
-    gIOLastWakeTime.tv_usec = microsecs;
-
-    IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn);
-}
-
 void iokit_post_constructor_init(void)
 {
     IORegistryEntry *          root;
     OSObject *                 obj;
 
+    IOCPUInitialize();
     root = IORegistryEntry::initialize();
     assert( root );
     IOService::initialize();
@@ -135,18 +122,23 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 )
     int                                debugFlags;
 
     if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) ))
-               gIOKitDebug = debugFlags;
+       gIOKitDebug = debugFlags;
+#if DEVELOPMENT || DEBUG
+    else gIOKitDebug |= kIOWaitQuietPanics;
+#endif /* DEVELOPMENT || DEBUG */
        
     if( PE_parse_boot_argn( "iotrace", &debugFlags, sizeof (debugFlags) ))
-               gIOKitTrace = debugFlags;
+       gIOKitTrace = debugFlags;
        
-       // Compat for boot-args
-       gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs);
+    // Compat for boot-args
+    gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs);
        
     // Check for the log synchronous bit set in io
     if (gIOKitDebug & kIOLogSynchronous)
         debug_mode = true;
 
+    if( PE_parse_boot_argn( "pmtimeout", &debugFlags, sizeof (debugFlags) ))
+        gCanSleepTimeout = debugFlags;
     //
     // Have to start IOKit environment before we attempt to start
     // the C++ runtime environment.  At some stage we have to clean up
index c82a927ee8563b9e5c18e586e683d4529895fe51..5b377141abb4b62bc8a07748c23af44910ca89e0 100644 (file)
@@ -93,6 +93,8 @@ bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent,
     _start     = offset;
     _length    = length;
     _flags     = direction;
+    _flags |= kIOMemoryThreadSafe;
+
 #ifndef __LP64__
     _direction  = (IODirection) (_flags & kIOMemoryDirectionMask);
 #endif /* !__LP64__ */
@@ -205,3 +207,9 @@ IOSubMemoryDescriptor::getPreparationID( void )
     return (super::getPreparationID());    
 }
 
+IOReturn
+IOSubMemoryDescriptor::getPageCounts(IOByteCount * residentPageCount,
+                                     IOByteCount * dirtyPageCount)
+{
+    return (_parent->getPageCounts(residentPageCount, dirtyPageCount));
+}
index 9f3587844c70067f063d0b33023290b3287e823b..6c9ec5df7e1248ae9717c5862c8e2f65f0381302 100644 (file)
@@ -166,7 +166,7 @@ public:
     static mach_port_name_t makeSendRightForTask( task_t task,
                                io_object_t obj, ipc_kobject_type_t type );
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 };
 
 #define super OSObject
@@ -363,13 +363,13 @@ class IOUserNotification : public OSIterator
 
 public:
 
-    virtual bool init( void );
-    virtual void free();
+    virtual bool init( void ) APPLE_KEXT_OVERRIDE;
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     virtual void setNotification( IONotifier * obj );
 
-    virtual void reset();
-    virtual bool isValid();
+    virtual void reset() APPLE_KEXT_OVERRIDE;
+    virtual bool isValid() APPLE_KEXT_OVERRIDE;
 };
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -466,13 +466,13 @@ public:
     virtual bool init( mach_port_t port, natural_t type,
                        void * reference, vm_size_t referenceSize,
                       bool clientIs64 );
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     static bool _handler( void * target,
                           void * ref, IOService * newService, IONotifier * notifier );
     virtual bool handler( void * ref, IOService * newService );
 
-    virtual OSObject * getNextObject();
+    virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE;
 };
 
 class IOServiceMessageUserNotification : public IOUserNotification
@@ -498,7 +498,7 @@ public:
                       vm_size_t extraSize,
                       bool clientIs64 );
 
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
     
     static IOReturn _handler( void * target, void * ref,
                               UInt32 messageType, IOService * provider,
@@ -507,7 +507,7 @@ public:
                               UInt32 messageType, IOService * provider,
                               void * messageArgument, vm_size_t argSize );
 
-    virtual OSObject * getNextObject();
+    virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE;
 };
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -1505,6 +1505,27 @@ extern "C" {
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+// Create a vm_map_copy_t or kalloc'ed data for memory
+// to be copied out. ipc will free after the copyout.
+
+static kern_return_t copyoutkdata( const void * data, vm_size_t len,
+                                    io_buf_ptr_t * buf )
+{
+    kern_return_t      err;
+    vm_map_copy_t      copy;
+
+    err = vm_map_copyin( kernel_map, CAST_USER_ADDR_T(data), len,
+                    false /* src_destroy */, &copy);
+
+    assert( err == KERN_SUCCESS );
+    if( err == KERN_SUCCESS )
+        *buf = (char *) copy;
+
+    return( err );
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 /* Routine io_server_version */
 kern_return_t is_io_server_version(
        mach_port_t master_port,
@@ -1516,20 +1537,26 @@ kern_return_t is_io_server_version(
 
 /* Routine io_object_get_class */
 kern_return_t is_io_object_get_class(
-       io_object_t object,
-       io_name_t className )
+    io_object_t object,
+    io_name_t className )
 {
-       const OSMetaClass* my_obj = NULL;
+    const OSMetaClass* my_obj = NULL;
+    const char * my_class_name = NULL;
        
-       if( !object)
-               return( kIOReturnBadArgument );
+    if( !object)
+        return( kIOReturnBadArgument );
                
-       my_obj = object->getMetaClass();
-       if (!my_obj) {
-               return (kIOReturnNotFound);
-       }
+    if ( !my_class_name ) {
+        my_obj = object->getMetaClass();
+        if (!my_obj) {
+            return (kIOReturnNotFound);
+        }
+
+        my_class_name = my_obj->getClassName();
+    }
        
-    strlcpy( className, my_obj->getClassName(), sizeof(io_name_t));
+    strlcpy( className, my_class_name, sizeof(io_name_t));
+
     return( kIOReturnSuccess );
 }
 
@@ -1623,6 +1650,7 @@ kern_return_t is_io_object_conforms_to(
         return( kIOReturnBadArgument );
 
     *conforms = (0 != object->metaCast( className ));
+
     return( kIOReturnSuccess );
 }
 
@@ -2312,6 +2340,58 @@ kern_return_t is_io_registry_entry_from_path(
     return( kIOReturnSuccess );
 }
 
+
+/* Routine io_registry_entry_from_path */
+kern_return_t is_io_registry_entry_from_path_ool(
+       mach_port_t master_port,
+       io_string_inband_t path,
+       io_buf_ptr_t path_ool,
+       mach_msg_type_number_t path_oolCnt,
+       kern_return_t *result,
+       io_object_t *registry_entry)
+{
+    IORegistryEntry *  entry;
+    vm_map_offset_t    map_data;
+    const char *       cpath;
+    IOReturn            res;
+    kern_return_t       err;
+
+    if (master_port != master_device_port) return(kIOReturnNotPrivileged);
+
+    map_data = 0;
+    entry    = 0;
+    res = err = KERN_SUCCESS;
+    if (path[0]) cpath = path;
+    else
+    {
+       if (!path_oolCnt)                                      return(kIOReturnBadArgument);
+       if (path_oolCnt > (sizeof(io_struct_inband_t) * 1024)) return(kIOReturnMessageTooLarge);
+
+       err = vm_map_copyout(kernel_map, &map_data, (vm_map_copy_t) path_ool);
+       if (KERN_SUCCESS == err)
+       {
+           // must return success to mig after vm_map_copyout() succeeds, so result is actual
+           cpath = CAST_DOWN(const char *, map_data);
+           if (cpath[path_oolCnt - 1]) res = kIOReturnBadArgument;
+       }
+    }
+
+    if ((KERN_SUCCESS == err) && (KERN_SUCCESS == res))
+    {
+       entry = IORegistryEntry::fromPath(cpath);
+       res = entry ? kIOReturnSuccess : kIOReturnNotFound;
+    }
+
+    if (map_data) vm_deallocate(kernel_map, map_data, path_oolCnt);
+
+    if (KERN_SUCCESS != err) res = err;
+    *registry_entry = entry;
+    *result = res;
+
+    return (err);
+}
+
+
 /* Routine io_registry_entry_in_plane */
 kern_return_t is_io_registry_entry_in_plane(
        io_object_t registry_entry,
@@ -2342,6 +2422,42 @@ kern_return_t is_io_registry_entry_get_path(
        return( kIOReturnBadArgument );
 }
 
+/* Routine io_registry_entry_get_path */
+kern_return_t is_io_registry_entry_get_path_ool(
+       io_object_t registry_entry,
+       io_name_t plane,
+       io_string_inband_t path,
+       io_buf_ptr_t *path_ool,
+       mach_msg_type_number_t *path_oolCnt)
+{
+    enum   { kMaxPath = 16384 };
+    IOReturn err;
+    int      length;
+    char   * buf;
+
+    CHECK( IORegistryEntry, registry_entry, entry );
+
+    *path_ool    = NULL;
+    *path_oolCnt = 0;
+    length = sizeof(io_string_inband_t);
+    if (entry->getPath(path, &length, IORegistryEntry::getPlane(plane))) err = kIOReturnSuccess;
+    else
+    {
+       length = kMaxPath;
+       buf = IONew(char, length);
+       if (!buf) err = kIOReturnNoMemory;
+       else if (!entry->getPath(buf, &length, IORegistryEntry::getPlane(plane))) err = kIOReturnError;
+       else
+       {
+           *path_oolCnt = length;
+           err = copyoutkdata(buf, length, path_ool);
+       }
+       if (buf) IODelete(buf, char, kMaxPath);
+    }
+
+    return (err);
+}
+
 
 /* Routine io_registry_entry_get_name */
 kern_return_t is_io_registry_entry_get_name(
@@ -2409,25 +2525,6 @@ kern_return_t is_io_registry_entry_get_registry_entry_id(
     return (kIOReturnSuccess);
 }
 
-// Create a vm_map_copy_t or kalloc'ed data for memory
-// to be copied out. ipc will free after the copyout.
-
-static kern_return_t copyoutkdata( const void * data, vm_size_t len,
-                                    io_buf_ptr_t * buf )
-{
-    kern_return_t      err;
-    vm_map_copy_t      copy;
-
-    err = vm_map_copyin( kernel_map, CAST_USER_ADDR_T(data), len,
-                    false /* src_destroy */, &copy);
-
-    assert( err == KERN_SUCCESS );
-    if( err == KERN_SUCCESS )
-        *buf = (char *) copy;
-
-    return( err );
-}
-
 /* Routine io_registry_entry_get_property */
 kern_return_t is_io_registry_entry_get_property_bytes(
        io_object_t registry_entry,
@@ -2799,6 +2896,7 @@ kern_return_t is_io_registry_entry_get_property_bin(
     return( err );
 }
 
+
 /* Routine io_registry_entry_set_properties */
 kern_return_t is_io_registry_entry_set_properties
 (
@@ -2981,6 +3079,8 @@ kern_return_t is_io_service_open_extended(
 
     CHECK( IOService, _service, service );
 
+    if (!owningTask) return (kIOReturnBadArgument);
+
     do
     {
        if (properties)
@@ -3148,6 +3248,8 @@ kern_return_t is_io_connect_map_memory_into_task
 
     CHECK( IOUserClient, connection, client );
 
+    if (!into_task) return (kIOReturnBadArgument);
+
     IOStatisticsClientCall();
     map = client->mapClientMemory64( memory_type, into_task, flags, *address );
 
@@ -3252,6 +3354,8 @@ kern_return_t is_io_connect_unmap_memory_from_task
 
     CHECK( IOUserClient, connection, client );
 
+    if (!from_task) return (kIOReturnBadArgument);
+
     IOStatisticsClientCall();
     err = client->clientMemoryForType( (UInt32) memory_type, &options, &memory );
 
@@ -4148,8 +4252,7 @@ kern_return_t shim_io_connect_method_scalarI_structureI(
 
     do
     {
-       if( (kIOUCVariableStructureSize != method->count0)
-               && (inputCount != method->count0))
+       if (inputCount != method->count0)
        {
            IOLog("%s: IOUserClient inputCount count mismatch\n", object->getName());
            continue;
@@ -4225,8 +4328,7 @@ kern_return_t shim_io_async_method_scalarI_structureI(
 
     do
     {
-       if( (kIOUCVariableStructureSize != method->count0)
-               && (inputCount != method->count0))
+       if (inputCount != method->count0)
        {
            IOLog("%s: IOUserClient inputCount count mismatch\n", object->getName());
            continue;
@@ -4815,8 +4917,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume
     if (args->asyncWakePort)
     {
        IOExternalAsyncMethod * method;
-
-       if( !(method = getAsyncTargetAndMethodForIndex(&object, selector)) )
+       object = 0;
+       if( !(method = getAsyncTargetAndMethodForIndex(&object, selector)) || !object )
            return (kIOReturnUnsupported);
 
     if (kIOUCForegroundOnly & method->flags)
@@ -4864,8 +4966,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume
     else
     {
        IOExternalMethod *      method;
-
-       if( !(method = getTargetAndMethodForIndex(&object, selector)) )
+       object = 0;
+       if( !(method = getTargetAndMethodForIndex(&object, selector)) || !object )
            return (kIOReturnUnsupported);
 
     if (kIOUCForegroundOnly & method->flags)
index 0789f66bb4baf299359bf97ef220eaa319e61492..6207b1ea1be3a465720b954cebd2879f7d09d574 100644 (file)
@@ -255,6 +255,7 @@ void IOWorkLoop::free()
        // Either way clean up all of our resources and return.
        
        if (controlG) {
+           controlG->workLoop = 0;
            controlG->release();
            controlG = 0;
        }
@@ -559,10 +560,10 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *)
                                if (eventChain == inEvent)
                                        eventChain = inEvent->getNext();
                                else {
-                                       IOEventSource *event, *next;
+                                       IOEventSource *event, *next = 0;
                
                                        event = eventChain;
-                                       while ((next = event->getNext()) && next != inEvent)
+                                       if (event) while ((next = event->getNext()) && (next != inEvent))
                                                event = next;
                
                                        if (!next) {
@@ -576,10 +577,10 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *)
                                if (passiveEventChain == inEvent)
                                        passiveEventChain = inEvent->getNext();
                                else {
-                                       IOEventSource *event, *next;
+                                       IOEventSource *event, *next = 0;
                
                                        event = passiveEventChain;
-                                       while ((next = event->getNext()) && next != inEvent)
+                                       if (event) while ((next = event->getNext()) && (next != inEvent))
                                                event = next;
                
                                        if (!next) {
index 9083d8772a112f8329fb5d3d512c667fc450672b..ac09ffbf08bee4fa6b5d98ad968900875f9e0d13 100644 (file)
@@ -71,24 +71,24 @@ private:
 
 public:
 
-    virtual IOReturn clientClose( void );
+    virtual IOReturn clientClose( void ) APPLE_KEXT_OVERRIDE;
 
     virtual IOReturn externalMethod( uint32_t selector,
                     IOExternalMethodArguments * arguments,
                     IOExternalMethodDispatch * dispatch,
                     OSObject * target,
-                    void * reference );
+                    void * reference ) APPLE_KEXT_OVERRIDE;
 
-    virtual bool start( IOService * provider );
+    virtual bool start( IOService * provider ) APPLE_KEXT_OVERRIDE;
 
     virtual bool initWithTask(task_t owningTask, void *security_id,
-                    UInt32 type, OSDictionary * properties);
+                    UInt32 type, OSDictionary * properties) APPLE_KEXT_OVERRIDE;
 
     // Unused - retained for symbol compatibility
     void setPreventative(UInt32 on_off, UInt32 types_of_sleep);
 
     // Unused - retained for symbol compatibility
-    virtual IOExternalMethod * getTargetAndMethodForIndex( IOService ** targetP, UInt32 index );
+    virtual IOExternalMethod * getTargetAndMethodForIndex( IOService ** targetP, UInt32 index ) APPLE_KEXT_OVERRIDE;
 
 };
 
diff --git a/iokit/Tests/TestIOMemoryDescriptor.cpp b/iokit/Tests/TestIOMemoryDescriptor.cpp
new file mode 100644 (file)
index 0000000..926681a
--- /dev/null
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#include <IOKit/assert.h>
+#include <IOKit/system.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOMemoryDescriptor.h>
+#include <IOKit/IOMapper.h>
+#include <IOKit/IODMACommand.h>
+#include <IOKit/IOKitKeysPrivate.h>
+
+#ifndef __LP64__
+#include <IOKit/IOSubMemoryDescriptor.h>
+#endif /* !__LP64__ */
+#include <IOKit/IOSubMemoryDescriptor.h>
+#include <IOKit/IOMultiMemoryDescriptor.h>
+#include <IOKit/IOBufferMemoryDescriptor.h>
+
+#include <IOKit/IOKitDebug.h>
+#include <libkern/OSDebug.h>
+#include <sys/uio.h>
+
+__BEGIN_DECLS
+#include <vm/pmap.h>
+#include <vm/vm_pageout.h>
+#include <mach/memory_object_types.h>
+#include <device/device_port.h>
+
+#include <mach/vm_prot.h>
+#include <mach/mach_vm.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_protos.h>
+__END_DECLS
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#if DEVELOPMENT || DEBUG
+
+static int IOMultMemoryDescriptorTest(int newValue)
+{
+    IOMemoryDescriptor * mds[3];
+    IOMultiMemoryDescriptor * mmd;
+    IOMemoryMap * map;
+    void * addr;
+    uint8_t * data;
+    uint32_t i;
+    IOAddressRange ranges[2];
+
+    data = (typeof(data)) IOMallocAligned(ptoa(8), page_size);
+    for (i = 0; i < ptoa(8); i++) data[i] = atop(i) | 0xD0;
+   
+    ranges[0].address = (IOVirtualAddress)(data + ptoa(4));
+    ranges[0].length  = ptoa(4);
+    ranges[1].address = (IOVirtualAddress)(data + ptoa(0));
+    ranges[1].length  = ptoa(4);
+
+    mds[0] = IOMemoryDescriptor::withAddressRanges(&ranges[0], 2, kIODirectionOutIn, kernel_task);
+
+    mds[1] = IOSubMemoryDescriptor::withSubRange(mds[0], ptoa(3), ptoa(2), kIODirectionOutIn);
+    mds[2] = IOSubMemoryDescriptor::withSubRange(mds[0], ptoa(7), ptoa(1), kIODirectionOutIn);
+
+    mmd = IOMultiMemoryDescriptor::withDescriptors(&mds[0], sizeof(mds)/sizeof(mds[0]), kIODirectionOutIn, false);
+    mds[2]->release();
+    mds[1]->release();
+    mds[0]->release();
+    map = mmd->createMappingInTask(kernel_task, 0, kIOMapAnywhere, ptoa(7), mmd->getLength() - ptoa(7));
+    mmd->release();
+    assert(map);
+
+    addr = (void *) map->getVirtualAddress();
+    assert(ptoa(4) == map->getLength());
+    assert(0xd3d3d3d3 == ((uint32_t *)addr)[ptoa(0) / sizeof(uint32_t)]);
+    assert(0xd7d7d7d7 == ((uint32_t *)addr)[ptoa(1) / sizeof(uint32_t)]);
+    assert(0xd0d0d0d0 == ((uint32_t *)addr)[ptoa(2) / sizeof(uint32_t)]);
+    assert(0xd3d3d3d3 == ((uint32_t *)addr)[ptoa(3) / sizeof(uint32_t)]);
+    map->release();
+    IOFreeAligned(data, ptoa(8));
+
+    return (0);
+}
+
+
+int IOMemoryDescriptorTest(int newValue)
+{
+    int result;
+
+#if 0
+    if (5 == newValue)
+    {
+       IOReturn             ret;
+       IOMemoryDescriptor * md;
+       IODMACommand       * dma;
+       IODMACommand::SegmentOptions segOptions =
+       {
+           .fStructSize      = sizeof(segOptions),
+           .fNumAddressBits  = 64,
+           .fMaxSegmentSize  = 4096,
+           .fMaxTransferSize = 128*1024,
+           .fAlignment       = 4,
+           .fAlignmentLength = 4,
+           .fAlignmentInternalSegments = 0x1000
+       };
+
+       IOAddressRange ranges[3][2] =
+       {
+           {
+               { (uintptr_t) &IOMemoryDescriptorTest, 0x2ffc },
+               { 0, 0 },
+           },
+           {
+               { ranges[0][0].address, 0x10 },
+               { 0x3000 + ranges[0][0].address, 0xff0 },
+           },
+           {
+               { ranges[0][0].address, 0x2ffc },
+               { trunc_page(ranges[0][0].address), 0x800 },
+           },
+       };
+       static const uint32_t rangesCount[3] = { 1, 2, 2 };
+       uint32_t test;
+
+       for (test = 0; test < 3; test++)
+       {
+           kprintf("---[%d] address 0x%qx-0x%qx, 0x%qx-0x%qx\n", test, 
+                               ranges[test][0].address, ranges[test][0].length,
+                               ranges[test][1].address, ranges[test][1].length);
+
+           md = IOMemoryDescriptor::withAddressRanges((IOAddressRange*)&ranges[test][0], rangesCount[test], kIODirectionOut, kernel_task);
+           assert(md);
+           ret = md->prepare();
+           assert(kIOReturnSuccess == ret);
+           dma = IODMACommand::withSpecification(kIODMACommandOutputHost64, &segOptions,
+                                                 IODMACommand::kMapped, NULL, NULL);
+           assert(dma);
+           ret = dma->setMemoryDescriptor(md, true);
+           if (kIOReturnSuccess == ret)
+           {
+               IODMACommand::Segment64 segments[1];
+               UInt32                  numSegments;
+               UInt64                  offset;
+
+               offset = 0;
+               do
+               {
+                   numSegments = 1;
+                   ret = dma->gen64IOVMSegments(&offset, &segments[0], &numSegments);
+                   assert(kIOReturnSuccess == ret);
+                   assert(1 == numSegments);
+                   kprintf("seg 0x%qx, 0x%qx\n", segments[0].fIOVMAddr, segments[0].fLength);
+               }
+               while (offset < md->getLength());
+
+               ret = dma->clearMemoryDescriptor(true);
+               assert(kIOReturnSuccess == ret);
+               dma->release();
+           }
+           md->release();
+        }
+
+       return (kIOReturnSuccess);
+    }
+    else if (4 == newValue)
+    {
+       IOService * isp;
+       IOMapper *  mapper;
+       IOBufferMemoryDescriptor * md1;
+       IODMACommand * dma;
+       IOReturn       ret;
+       size_t         bufSize = 8192 * 8192 * sizeof(uint32_t);
+       uint64_t start, time, nano;
+
+       isp = IOService::copyMatchingService(IOService::nameMatching("isp"));
+       assert(isp);
+        mapper = IOMapper::copyMapperForDeviceWithIndex(isp, 0);
+       assert(mapper);
+
+       md1 = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL, 
+               kIODirectionOutIn | kIOMemoryPersistent | kIOMemoryPageable,
+               bufSize, page_size);
+
+       ret = md1->prepare();
+       assert(kIOReturnSuccess == ret);
+
+       IODMAMapSpecification mapSpec;
+       bzero(&mapSpec, sizeof(mapSpec));
+       uint64_t mapped;
+       uint64_t mappedLength;
+
+       start = mach_absolute_time();
+
+       ret =  md1->dmaMap(mapper, NULL, &mapSpec, 0, bufSize, &mapped, &mappedLength);
+       assert(kIOReturnSuccess == ret);
+
+       time = mach_absolute_time() - start;
+
+       absolutetime_to_nanoseconds(time, &nano);
+       kprintf("time %lld us\n", nano / 1000ULL);
+       kprintf("seg0 0x%qx, 0x%qx\n", mapped, mappedLength);
+
+       assert(md1);
+
+       dma = IODMACommand::withSpecification(kIODMACommandOutputHost32, 
+                               32, 0, IODMACommand::kMapped, 0, 1, mapper, NULL);
+
+       assert(dma);
+
+       start = mach_absolute_time();
+       ret = dma->setMemoryDescriptor(md1, true);
+       assert(kIOReturnSuccess == ret);
+       time = mach_absolute_time() - start;
+
+       absolutetime_to_nanoseconds(time, &nano);
+       kprintf("time %lld us\n", nano / 1000ULL);
+
+       
+       IODMACommand::Segment32 segments[1];
+       UInt32                  numSegments = 1;
+       UInt64                  offset;
+
+       offset = 0;
+       ret = dma->gen32IOVMSegments(&offset, &segments[0], &numSegments);
+       assert(kIOReturnSuccess == ret);
+       assert(1 == numSegments);
+       kprintf("seg0 0x%x, 0x%x\n", (int)segments[0].fIOVMAddr, (int)segments[0].fLength);
+
+       ret = dma->clearMemoryDescriptor(true);
+       assert(kIOReturnSuccess == ret);
+
+       md1->release();
+
+       return (kIOReturnSuccess);
+    }
+
+    if (3 == newValue)
+    {
+       IOBufferMemoryDescriptor * md1;
+       IOBufferMemoryDescriptor * md2;
+       IOMemoryMap * map1;
+       IOMemoryMap * map2;
+       uint32_t * buf1;
+       uint32_t * buf2;
+       IOReturn err;
+
+       md1 = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL, 
+               kIODirectionOutIn | kIOMemoryPersistent | kIOMemoryPageable,
+               64*1024, page_size);
+       assert(md1);
+       map1 = md1->createMappingInTask(kernel_task, 0, kIOMapAnywhere | kIOMapUnique);
+       assert(map1);
+       buf1 = (uint32_t *) map1->getVirtualAddress();
+
+       md2 = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL, 
+               kIODirectionOutIn | kIOMemoryPersistent | kIOMemoryPageable,
+               64*1024, page_size);
+       assert(md2);
+       map2 = md2->createMappingInTask(kernel_task, 0, kIOMapAnywhere | kIOMapUnique);
+       assert(map2);
+       buf2 = (uint32_t *) map2->getVirtualAddress();
+
+       memset(buf1, 0x11, 64*1024L);
+       memset(buf2, 0x22, 64*1024L);
+
+       kprintf("md1 %p, map1 %p, buf2 %p; md2 %p, map2 %p, buf2 %p\n", md1, map1, buf1, md2, map2, buf2);
+
+       kprintf("no redir 0x%08x, 0x%08x\n", buf1[0], buf2[0]);
+       assert(0x11111111 == buf1[0]);
+       assert(0x22222222 == buf2[0]);
+       err = map1->redirect(md2, 0, 0ULL);
+       kprintf("redir md2(0x%x) 0x%08x, 0x%08x\n", err, buf1[0], buf2[0]);
+       assert(0x11111111 == buf2[0]);
+       assert(0x22222222 == buf1[0]);
+       err = map1->redirect(md1, 0, 0ULL);
+       kprintf("redir md1(0x%x) 0x%08x, 0x%08x\n", err, buf1[0], buf2[0]);
+       assert(0x11111111 == buf1[0]);
+       assert(0x22222222 == buf2[0]);
+       map1->release();
+       map2->release();
+       md1->release();
+       md2->release();
+    }
+#endif
+
+    result = IOMultMemoryDescriptorTest(newValue);
+    if (result) return (result);
+
+    IOGeneralMemoryDescriptor * md;
+    vm_offset_t data[2];
+    vm_size_t  bsize = 16*1024*1024;
+    vm_size_t  srcsize, srcoffset, mapoffset, size;
+    kern_return_t kr;
+
+    kr = vm_allocate(kernel_map, &data[0], bsize, VM_FLAGS_ANYWHERE);
+    vm_inherit(kernel_map, data[0] + ptoa(1), ptoa(1), VM_INHERIT_NONE);
+    vm_inherit(kernel_map, data[0] + ptoa(16), ptoa(4), VM_INHERIT_NONE);
+
+    IOLog("data 0x%lx, 0x%lx\n", (long)data[0], (long)data[1]);
+
+    uint32_t idx, offidx;
+    for (idx = 0; idx < (bsize / sizeof(uint32_t)); idx++)
+    {
+       ((uint32_t*)data[0])[idx] = idx;    
+    }
+
+    for (srcoffset = 0; srcoffset < bsize; srcoffset = ((srcoffset << 2) + 0x40c))
+    {
+       for (srcsize = 4; srcsize < (bsize - srcoffset - 1); srcsize = ((srcsize << 2) + 0x3fc))
+       {
+           IOAddressRange ranges[3];
+           uint32_t rangeCount = 1;
+
+           bzero(&ranges[0], sizeof(ranges));
+           ranges[0].address = data[0] + srcoffset;
+           ranges[0].length  = srcsize;
+
+           if (srcsize > ptoa(5))
+           {
+               ranges[0].length  = 7634;
+               ranges[1].length  = 9870;
+               ranges[2].length  = srcsize - ranges[0].length - ranges[1].length;
+               ranges[1].address = ranges[0].address + ranges[0].length;
+               ranges[2].address = ranges[1].address + ranges[1].length;
+               rangeCount = 3;     
+           }
+           else if ((srcsize > ptoa(2)) && !(page_mask & srcoffset))
+           {
+               ranges[0].length  = ptoa(1);
+               ranges[1].length  = ptoa(1);
+               ranges[2].length  = srcsize - ranges[0].length - ranges[1].length;
+               ranges[0].address = data[0] + srcoffset + ptoa(1);
+               ranges[1].address = data[0] + srcoffset;
+               ranges[2].address = ranges[0].address + ranges[0].length;
+               rangeCount = 3;     
+           }
+
+           md = OSDynamicCast(IOGeneralMemoryDescriptor, 
+               IOMemoryDescriptor::withAddressRanges(&ranges[0], rangeCount, kIODirectionInOut, kernel_task));
+           assert(md);
+
+           IOLog("IOMemoryDescriptor::withAddressRanges [0x%lx @ 0x%lx]\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx],\n[0x%llx, 0x%llx]\n", 
+                   (long) srcsize, (long) srcoffset,
+                   (long long) ranges[0].address - data[0], (long long) ranges[0].length,
+                   (long long) ranges[1].address - data[0], (long long) ranges[1].length,
+                   (long long) ranges[2].address - data[0], (long long) ranges[2].length);
+
+           if (kIOReturnSuccess == kr)
+           {
+               for (mapoffset = 0; mapoffset < srcsize; mapoffset = ((mapoffset << 1) + 0xf00))
+               {
+                   for (size = 4; size < (srcsize - mapoffset - 1); size = ((size << 2) + 0x200))
+                   {
+                       IOMemoryMap     * map;
+                       mach_vm_address_t addr = 0;
+                       uint32_t          data;
+
+//                     IOLog("<mapRef [0x%lx @ 0x%lx]\n", (long) size, (long) mapoffset);
+
+                       map = md->createMappingInTask(kernel_task, 0, kIOMapAnywhere, mapoffset, size);
+                       if (map) addr = map->getAddress();
+                       else kr = kIOReturnError;
+
+//                     IOLog(">mapRef 0x%x %llx\n", kr, addr);
+
+                       if (kIOReturnSuccess != kr) break;
+                       kr = md->prepare();
+                       if (kIOReturnSuccess != kr)
+                       {
+                           panic("prepare() fail 0x%x\n", kr);
+                           break;
+                       }
+                       for (idx = 0; idx < size; idx += sizeof(uint32_t))
+                       {
+                           offidx = (idx + mapoffset + srcoffset);
+                           if ((srcsize <= ptoa(5)) && (srcsize > ptoa(2)) && !(page_mask & srcoffset))
+                           {
+                               if (offidx < ptoa(2)) offidx ^= ptoa(1);
+                           }
+                           offidx /= sizeof(uint32_t);
+
+                           if (offidx != ((uint32_t*)addr)[idx/sizeof(uint32_t)]) 
+                           {
+                               panic("vm mismatch md %p map %p, @ 0x%x, 0x%lx, 0x%lx, \n", md, map, idx, (long) srcoffset, (long) mapoffset);
+                               kr = kIOReturnBadMedia;
+                           }
+                           else
+                           {
+                               if (sizeof(data) != md->readBytes(mapoffset + idx, &data, sizeof(data))) data = 0;
+                               if (offidx != data) 
+                               {
+                                   panic("phys mismatch md %p map %p, @ 0x%x, 0x%lx, 0x%lx, \n", md, map, idx, (long) srcoffset, (long) mapoffset);
+                                   kr = kIOReturnBadMedia;
+                               }
+                           }
+                       }
+                       md->complete();
+                       map->release();
+//                     IOLog("unmapRef %llx\n", addr);
+                   }
+                   if (kIOReturnSuccess != kr) break;
+               }
+           }
+            md->release();
+            if (kIOReturnSuccess != kr) break;
+       }
+       if (kIOReturnSuccess != kr) break;
+    }
+
+    if (kIOReturnSuccess != kr) IOLog("FAIL: src 0x%lx @ 0x%lx, map 0x%lx @ 0x%lx\n", 
+                                       (long) srcsize, (long) srcoffset, (long) size, (long) mapoffset);
+
+    assert(kr == kIOReturnSuccess);
+
+    vm_deallocate(kernel_map, data[0], bsize);
+//    vm_deallocate(kernel_map, data[1], size);
+
+    return (0);
+}
+
+#endif  /* DEVELOPMENT || DEBUG */
index 405b2035282fda2abe9ff189d3518b5ef122a691..bc2d05b69b5d3d234d03bc9a78abd47007eec8ae 100644 (file)
  *
  */
 
-#include <IOKit/IODeviceTreeSupport.h>
-#include <libkern/c++/OSContainers.h>
-#include <IOKit/IOLib.h>
-
-#include <assert.h>
+#define TEST_HEADERS   0
 
+#if TEST_HEADERS
 
-extern "C" {
-extern int debug_container_malloc_size;
-extern int debug_ivars_size;
-}
-
-static void DumpTree( void )
+#include <libkern/OSByteOrder.h>
+#include <libkern/c++/OSArray.h>
+#include <libkern/c++/OSBoolean.h>
+#include <libkern/c++/OSCollection.h>
+#include <libkern/c++/OSCollectionIterator.h>
+#include <libkern/c++/OSContainers.h>
+#include <libkern/c++/OSCPPDebug.h>
+#include <libkern/c++/OSData.h>
+#include <libkern/c++/OSDictionary.h>
+#include <libkern/c++/OSEndianTypes.h>
+#include <libkern/c++/OSIterator.h>
+#include <libkern/c++/OSKext.h>
+#include <libkern/c++/OSLib.h>
+#include <libkern/c++/OSMetaClass.h>
+#include <libkern/c++/OSNumber.h>
+#include <libkern/c++/OSObject.h>
+#include <libkern/c++/OSOrderedSet.h>
+#include <libkern/c++/OSSerialize.h>
+#include <libkern/c++/OSSet.h>
+#include <libkern/c++/OSString.h>
+#include <libkern/c++/OSSymbol.h>
+#include <libkern/c++/OSUnserialize.h>
+#include <libkern/crypto/aes.h>
+#include <libkern/crypto/aesxts.h>
+#include <libkern/crypto/crypto_internal.h>
+#include <libkern/crypto/des.h>
+#include <libkern/crypto/md5.h>
+#include <libkern/crypto/register_crypto.h>
+#include <libkern/crypto/sha1.h>
+#include <libkern/crypto/sha2.h>
+#include <libkern/kernel_mach_header.h>
+#include <libkern/kext_request_keys.h>
+#include <libkern/kxld.h>
+#include <libkern/kxld_types.h>
+#include <libkern/locks.h>
+#include <libkern/mkext.h>
+#include <libkern/OSAtomic.h>
+#include <libkern/OSBase.h>
+#include <libkern/OSDebug.h>
+#include <libkern/OSKextLib.h>
+#include <libkern/OSKextLibPrivate.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSReturn.h>
+#include <libkern/OSSerializeBinary.h>
+#include <libkern/OSTypes.h>
+#include <libkern/prelink.h>
+#include <libkern/stack_protector.h>
+#include <libkern/sysctl.h>
+#include <libkern/tree.h>
+#include <libkern/zconf.h>
+#include <libkern/zlib.h>
+
+#include <IOKit/AppleKeyStoreInterface.h>
+#include <IOKit/assert.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOBufferMemoryDescriptor.h>
+#include <IOKit/IOCatalogue.h>
+#include <IOKit/IOCommand.h>
+#include <IOKit/IOCommandGate.h>
+#include <IOKit/IOCommandPool.h>
+#include <IOKit/IOCommandQueue.h>
+#include <IOKit/IOConditionLock.h>
+#include <IOKit/IOCPU.h>
+//#include <IOKit/IODataQueue.h>
+#include <IOKit/IODataQueueShared.h>
+#include <IOKit/IODeviceMemory.h>
+#include <IOKit/IODeviceTreeSupport.h>
+#include <IOKit/IODMACommand.h>
+#include <IOKit/IODMAController.h>
+#include <IOKit/IODMAEventSource.h>
+#include <IOKit/IOEventSource.h>
+#include <IOKit/IOFilterInterruptEventSource.h>
+#include <IOKit/IOHibernatePrivate.h>
+#include <IOKit/IOInterleavedMemoryDescriptor.h>
+#include <IOKit/IOInterruptAccounting.h>
+#include <IOKit/IOInterruptAccountingPrivate.h>
+#include <IOKit/IOInterruptController.h>
+#include <IOKit/IOInterruptEventSource.h>
+#include <IOKit/IOInterrupts.h>
+#include <IOKit/IOKernelReporters.h>
+#include <IOKit/IOKernelReportStructs.h>
+#include <IOKit/IOKitDebug.h>
+#include <IOKit/IOKitDiagnosticsUserClient.h>
+#include <IOKit/IOKitKeys.h>
+#include <IOKit/IOKitKeysPrivate.h>
+#include <IOKit/IOKitServer.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOLocks.h>
+#include <IOKit/IOLocksPrivate.h>
+#include <IOKit/IOMapper.h>
+#include <IOKit/IOMemoryCursor.h>
+#include <IOKit/IOMemoryDescriptor.h>
+#include <IOKit/IOMessage.h>
+#include <IOKit/IOMultiMemoryDescriptor.h>
+#include <IOKit/IONotifier.h>
+#include <IOKit/IONVRAM.h>
+#include <IOKit/IOPlatformExpert.h>
+#include <IOKit/IOPolledInterface.h>
+#include <IOKit/IORangeAllocator.h>
+#include <IOKit/IORegistryEntry.h>
+#include <IOKit/IOReportMacros.h>
+#include <IOKit/IOReportTypes.h>
+#include <IOKit/IOReturn.h>
+#include <IOKit/IOService.h>
+#include <IOKit/IOServicePM.h>
+#include <IOKit/IOSharedDataQueue.h>
+#include <IOKit/IOSharedLock.h>
+#include <IOKit/IOStatistics.h>
+#include <IOKit/IOStatisticsPrivate.h>
+#include <IOKit/IOSubMemoryDescriptor.h>
+#include <IOKit/IOSyncer.h>
+#include <IOKit/IOTimerEventSource.h>
+#include <IOKit/IOTimeStamp.h>
+#include <IOKit/IOTypes.h>
+#include <IOKit/IOUserClient.h>
+#include <IOKit/IOWorkLoop.h>
+#include <IOKit/nvram/IONVRAMController.h>
+#include <IOKit/OSMessageNotification.h>
+#include <IOKit/platform/AppleMacIO.h>
+#include <IOKit/platform/AppleMacIODevice.h>
+#include <IOKit/platform/AppleNMI.h>
+#include <IOKit/platform/ApplePlatformExpert.h>
+#include <IOKit/power/IOPwrController.h>
+#include <IOKit/pwr_mgt/IOPM.h>
+#include <IOKit/pwr_mgt/IOPMinformee.h>
+#include <IOKit/pwr_mgt/IOPMinformeeList.h>
+#include <IOKit/pwr_mgt/IOPMLibDefs.h>
+#include <IOKit/pwr_mgt/IOPMlog.h>
+#include <IOKit/pwr_mgt/IOPMPowerSource.h>
+#include <IOKit/pwr_mgt/IOPMPowerSourceList.h>
+#include <IOKit/pwr_mgt/IOPMpowerState.h>
+#include <IOKit/pwr_mgt/IOPMPrivate.h>
+#include <IOKit/pwr_mgt/IOPowerConnection.h>
+#include <IOKit/pwr_mgt/RootDomain.h>
+#include <IOKit/rtc/IORTCController.h>
+#include <IOKit/system.h>
+#include <IOKit/system_management/IOWatchDogTimer.h>
+
+#endif /* TEST_HEADERS */
+
+#include <sys/sysctl.h>
+#include <libkern/c++/OSData.h>
+#include "Tests.h"
+
+static int
+sysctl_iokittest(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
-    IORegistryEntry *          next;
-    IORegistryEntry *          packages = 0;
-    IORegistryEntry *          deblocker = 0;
-    IORegistryEntry *          keyboard = 0;
-    IORegistryIterator *       iter;
-    OSOrderedSet *             all;
-
-    IOLog("ivars %08x, containers %08x\n",
-       debug_ivars_size, debug_container_malloc_size);
-
-    iter = IORegistryIterator::iterateOver( gIODTPlane );
-    assert( iter );
-
-    all = iter->iterateAll();
-    IOLog("\nCount %d\n", all->getCount() );
-    all->release();
-
-    iter->reset();
-    while( (next = iter->nextEntryRecursive())) {
-       if( 0 == strcmp( "packages", next->getName()))
-           packages = next;
-       if( 0 == strcmp( "deblocker", next->getName()))
-           deblocker = next;
-       if( 0 == strcmp( "keyboard", next->getName()))
-           keyboard = next;
-    }
-
-    if( deblocker && keyboard)
-       deblocker->attachToParent( keyboard, gIODTPlane);
-
-    iter->reset();
-    while( (next = iter->nextEntryRecursive())) {
-       IOLog("%s=%d,", next->getName(), next->getDepth( gIODTPlane ));
-       if( 0 == strcmp( "gc", next->getName())) {
-           packages = next;
-       }
+    int error;
+    int newValue, changed;
+
+    error = sysctl_io_number(req, 0, sizeof(int), &newValue, &changed);
+    if (error) return (error);
+
+#if DEVELOPMENT || DEBUG
+    if (changed && (999==newValue))
+    {
+       OSData * data = OSData::withCapacity(16);
+       data->release();
+       data->release();
     }
 
-    IOLog("ivars %08x, containers %08x\n",
-       debug_ivars_size, debug_container_malloc_size);
-
-    if( packages)
-       packages->detachAll( gIODTPlane);
-    all = iter->iterateAll();
-    IOLog("del gc/, count now %d\n", all->getCount() );
-    all->release();
-
-    iter->release();
-
-    IOLog("ivars %08x, containers %08x\n",
-       debug_ivars_size, debug_container_malloc_size);
+    if (changed && newValue) error = IOMemoryDescriptorTest(newValue);
+#endif  /* DEVELOPMENT || DEBUG */
 
+    return (error);
 }
 
-extern "C" {
-void PathTests( void )
-{
-    const char * tests[] = {
-        "IODeviceTree:/bandit",
-        "IODeviceTree:/",
-       "IODeviceTree:/xxxx",
-       "IODeviceTree:/bandit/xxx",
-        "IODeviceTree:/bandit@F2000000",
-        "IODeviceTree:/bandit/gc",
-        "IODeviceTree:/bandit/gc/mace:17.202.42.95,\\mach_kernel",
-        "IODeviceTree:/bandit/@10/mesh",
-        "IODeviceTree:enet:17.202",
-        "IODeviceTree:scsi/@0:0",
-        "IODeviceTree:scsi-int",
-        "IODeviceTree:/bandit/gc@10/mesh",
-        "IODeviceTree:/bandit/gc/53c94/disk@0:6,mach_kernel",
-        "IOService:/",
-        "IOService:/ApplePlatformExpert",
-        "IOService:/ApplePlatformExpert/hammerhead@F8000000",
-        "IOService:/ApplePlatformExpert/bandit/AppleMacRiscPCI"
-    };
-
-    IORegistryEntry *  entry;
-    char               str[256];
-    int                        len;
-
-    for( unsigned int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
+SYSCTL_PROC(_kern, OID_AUTO, iokittest,
+        CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+        0, 0, sysctl_iokittest, "I", "");
 
-       len = sizeof( str );
-       entry = IORegistryEntry::fromPath( tests[i], 0, str, &len );
-        IOLog("\"%s\" ", tests[i] );
-       if( entry) {
-           IOLog("found %s, tail = \"%s\"\n", entry->getName(), str );
-            len = sizeof( str );
-           if( entry->getPath( str, &len,
-                       IORegistryEntry::getPlane("IODeviceTree"))) {
-               IOLog("path = \"%s\"\n", str);
-           }
-           entry->release();
-       } else
-           IOLog("not found\n");
-    }
-}
-}
-
-void TestsCpp( void * dtTop )
-{
-    IORegistryEntry * dt;
-
-    IOLog("\nivars %08x, containers %08x\n",
-       debug_ivars_size, debug_container_malloc_size);
-
-    OSMetaClass::printInstanceCounts();
-    dt = IODeviceTreeAlloc( dtTop );
-    assert( dt );
-
-//    OSMetaClass::printInstanceCounts();
-    DumpTree();
-//    OSMetaClass::printInstanceCounts();
-    dt->detachAll( gIODTPlane);
-    OSMetaClass::printInstanceCounts();
-    IOLog("ivars %08x, containers %08x\n",
-       debug_ivars_size, debug_container_malloc_size);
-}
 
index 6c03b7a83b214e2673845d3b73194533f127c354..67abf6bf644e68a83d7965acf2e2b1cdc9715345 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#include <IOKit/IOLib.h>
 
-#ifdef __cplusplus
+extern int IOMemoryDescriptorTest(int x);
 
-#define logPrintf(x)                                           \
-    do {                                                       \
-        kprintf x;                                             \
-    } while (0)
-
-#define verPrintf(x) logPrintf(x)
-
-// Assumes 'bool res = true' in current scope
-#define TEST_ASSERT(t, l, c)                                           \
-    do {                                                               \
-        if ( !(c) ) {                                                  \
-            verPrintf(("TEST (%c) test %s failed\n", t, l));   \
-            res = false;                                               \
-        }                                                              \
-    } while(0)
-
-#define logSpace()             do { } while(0)
-#define checkPointSpace()      ((void *) 0)
-#define checkSpace(l, ckp, d)  ((int) 1)
-
-// In TestContainers.cc
-extern const int numStrCache;
-extern const char *strCache[];
-
-extern void testString();
-extern void testSymbol();
-extern void testData();
-
-// In TestCollections.cc
-extern void testArray();
-extern void testSet();
-extern void testDictionary();
-extern void testIterator();
-
-// In TestDevice.cc
-extern void testWorkLoop();
-
-#include <libkern/c++/OSObject.h>
-
-class IOWorkLoop;
-class IOCommandQueue;
-class IOInterruptEventSource;
-
-class TestDevice;
-typedef void (*TestDeviceAction)(TestDevice *, int, void *);
-
-class TestDevice : public OSObject
-{
-    OSDeclareDefaultStructors(TestDevice)
-
-    IOWorkLoop *workLoop;
-    int intCount;
-    IOCommandQueue *commQ;
-
-public:
-    IOInterruptEventSource *intES;
-
-    virtual bool init();
-    virtual void free();
-
-    void rawCommandOccurred
-            (void *field0, void *field1, void *field2, void *field3);
-    kern_return_t enqueueCommand(bool sleep,
-                                 TestDeviceAction act, int tag, void *dataP);
-
-    void interruptAction(IOInterruptEventSource *event, int count);
-
-    void producer1Action(int tag);
-    void producer2Action(int tag, void *inCount);
-
-    void alarm();
-};
-
-#endif /* __cplusplus */
index 9b08bb834a3fe8c0f916c06b7d401ec946522473..3e45ff1ff7e7ad049ee14c5b99a54d36df11fbf8 100644 (file)
@@ -32,6 +32,7 @@
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IOKitKeys.h>
 #include <IOKit/IOPlatformExpert.h>
+#include <IOKit/IOUserClient.h>
 
 extern "C" {
 
@@ -39,6 +40,7 @@ extern "C" {
 #include <kern/clock.h>
 #include <uuid/uuid.h>
 #include <sys/vnode_internal.h>
+#include <sys/mount.h>
 
 // how long to wait for matching root device, secs
 #if DEBUG
@@ -47,11 +49,32 @@ extern "C" {
 #define ROOTDEVICETIMEOUT       60
 #endif
 
+int panic_on_exception_triage = 0;
+
 extern dev_t mdevadd(int devid, uint64_t base, unsigned int size, int phys);
 extern dev_t mdevlookup(int devid);
 extern void mdevremoveall(void);
 extern void di_root_ramfile(IORegistryEntry * entry);
 
+
+#if   DEVELOPMENT
+#define IOPOLLED_COREFILE      1
+// no sizing
+#define kIOCoreDumpSize                0ULL
+#define kIOCoreDumpFreeSize    0ULL
+#else
+#define IOPOLLED_COREFILE      0
+#endif
+
+
+#if IOPOLLED_COREFILE
+static bool 
+NewKernelCoreMedia(void * target, void * refCon,
+                  IOService * newService,
+                  IONotifier * notifier);
+#endif /* IOPOLLED_COREFILE */
+
+
 kern_return_t
 IOKitBSDInit( void )
 {
@@ -763,3 +786,157 @@ int IOBSDIsMediaEjectable( const char *cdev_name )
 }
 
 } /* extern "C" */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
+#include <sys/fcntl.h>
+#include <IOKit/IOPolledInterface.h>
+#include <IOKit/IOBufferMemoryDescriptor.h>
+
+IOPolledFileIOVars * gIOPolledCoreFileVars;
+
+#if IOPOLLED_COREFILE
+
+static IOReturn 
+IOOpenPolledCoreFile(const char * filename)
+{
+    IOReturn err;
+    unsigned int debug;
+
+    if (gIOPolledCoreFileVars)                             return (kIOReturnBusy);
+    if (!IOPolledInterface::gMetaClass.getInstanceCount()) return (kIOReturnUnsupported);
+
+    debug = 0;
+    PE_parse_boot_argn("debug", &debug, sizeof (debug));
+    if (DB_DISABLE_LOCAL_CORE & debug)                     return (kIOReturnUnsupported);
+
+    err = IOPolledFileOpen(filename, kIOCoreDumpSize, kIOCoreDumpFreeSize,
+                           NULL, 0,
+                           &gIOPolledCoreFileVars, NULL, NULL, 0);
+    if (kIOReturnSuccess != err)                           return (err);
+
+    err = IOPolledFilePollersSetup(gIOPolledCoreFileVars, kIOPolledPreflightCoreDumpState);
+    if (kIOReturnSuccess != err)
+    {
+       IOPolledFileClose(&gIOPolledCoreFileVars, NULL, NULL, 0, 0, 0);
+    }
+
+    return (err);
+}
+
+static void 
+IOClosePolledCoreFile(void)
+{
+    IOPolledFilePollersClose(gIOPolledCoreFileVars, kIOPolledPostflightState);
+    IOPolledFileClose(&gIOPolledCoreFileVars, NULL, NULL, 0, 0, 0);
+}
+
+static thread_call_t gIOOpenPolledCoreFileTC;
+static IONotifier  * gIOPolledCoreFileNotifier;
+static IONotifier  * gIOPolledCoreFileInterestNotifier;
+
+static IOReturn 
+KernelCoreMediaInterest(void * target, void * refCon,
+                       UInt32 messageType, IOService * provider,
+                       void * messageArgument, vm_size_t argSize )
+{
+    if (kIOMessageServiceIsTerminated == messageType)
+    {
+       gIOPolledCoreFileInterestNotifier->remove();
+       gIOPolledCoreFileInterestNotifier = 0;
+       IOClosePolledCoreFile();
+    }
+
+    return (kIOReturnSuccess);
+}
+
+static void
+OpenKernelCoreMedia(thread_call_param_t p0, thread_call_param_t p1)
+{
+    IOService * newService;
+    OSString  * string;
+    char        filename[16];
+
+    newService = (IOService *) p1;
+    do
+    {
+       if (gIOPolledCoreFileVars) break;
+       string = OSDynamicCast(OSString, newService->getProperty(kIOBSDNameKey));
+       if (!string) break;
+       snprintf(filename, sizeof(filename), "/dev/%s", string->getCStringNoCopy());
+       if (kIOReturnSuccess != IOOpenPolledCoreFile(filename)) break;
+       gIOPolledCoreFileInterestNotifier = newService->registerInterest(
+                               gIOGeneralInterest, &KernelCoreMediaInterest, NULL, 0);
+    }
+    while (false);
+
+    newService->release();
+}
+
+static bool 
+NewKernelCoreMedia(void * target, void * refCon,
+                  IOService * newService,
+                  IONotifier * notifier)
+{
+    do
+    {
+       if (gIOPolledCoreFileVars)    break;
+        if (!gIOOpenPolledCoreFileTC) break;
+        newService = newService->getProvider();
+        if (!newService)              break;
+        newService->retain();
+       thread_call_enter1(gIOOpenPolledCoreFileTC, newService);
+    }
+    while (false);
+
+    return (false);
+}
+
+#endif /* IOPOLLED_COREFILE */
+
+extern "C" void 
+IOBSDMountChange(struct mount * mp, uint32_t op)
+{
+#if IOPOLLED_COREFILE
+
+    OSDictionary * bsdMatching;
+    OSDictionary * mediaMatching;
+    OSString     * string;
+
+    if (!gIOPolledCoreFileNotifier) do
+    {
+       if (!gIOOpenPolledCoreFileTC) gIOOpenPolledCoreFileTC = thread_call_allocate(&OpenKernelCoreMedia, NULL);
+       bsdMatching = IOService::serviceMatching("IOMediaBSDClient");
+       if (!bsdMatching) break;
+       mediaMatching = IOService::serviceMatching("IOMedia");
+       string = OSString::withCStringNoCopy("5361644D-6163-11AA-AA11-00306543ECAC");
+       if (!string || !mediaMatching) break;
+       mediaMatching->setObject("Content", string);
+       string->release();
+       bsdMatching->setObject(gIOParentMatchKey, mediaMatching);
+       mediaMatching->release();
+
+       gIOPolledCoreFileNotifier = IOService::addMatchingNotification(
+                                                 gIOFirstMatchNotification, bsdMatching, 
+                                                 &NewKernelCoreMedia, NULL, NULL, -1000);
+    }
+    while (false);
+
+#endif /* IOPOLLED_COREFILE */
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+extern "C" boolean_t 
+IOTaskHasEntitlement(task_t task, const char * entitlement)
+{
+    OSObject * obj;
+    obj = IOUserClient::copyClientEntitlement(task, entitlement);
+    if (!obj) return (false);
+    obj->release();
+    return (obj != kOSBooleanFalse);
+}
+
index ceffec084943f0ff0653f4593899e334ac1f6513..4fe56b115e6a3ebe7a36f703efb77b3055b93e15 100644 (file)
@@ -87,13 +87,13 @@ $(SOBJS): .SFLAGS
 $(COMPONENT).filelist: $(OBJS)
        $(_v)for hib_file in ${HIB_FILES};              \
        do      \
-                $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \
-                mv $${hib_file}__ $${hib_file} ; \
+                $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} || exit 1; \
+                mv $${hib_file}__ $${hib_file} || exit 1; \
        done
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 do_all: $(COMPONENT).filelist
 
index 31173f88cf123358780a9c2f793e6b8ca0907f5a..0e883af2bf9fa3522e55a729d1759d4634afa6b1 100644 (file)
@@ -70,10 +70,11 @@ iokit/Kernel/IOUserClient.cpp                               optional iokitcpp
 iokit/Kernel/IOKitDebug.cpp                            optional iokitcpp
 iokit/Kernel/IODataQueue.cpp                           optional iokitcpp
 iokit/Kernel/IOSharedDataQueue.cpp                     optional iokitcpp
-# iokit/Tests/Tests.cpp                                        optional iokitcpp
-# iokit/Tests/TestDevice.cpp                           optional iokitcpp
-# iokit/Tests/TestContainers.cpp                       optional iokitcpp
-# iokit/Tests/TestCollections.cpp                      optional iokitcpp
+iokit/Tests/Tests.cpp                                  optional iokitcpp
+iokit/Tests/TestIOMemoryDescriptor.cpp      optional iokitcpp
+# iokit/Tests/TestDevice.cpp                optional iokitcpp
+# iokit/Tests/TestContainers.cpp            optional iokitcpp
+# iokit/Tests/TestCollections.cpp           optional iokitcpp
 
 iokit/Kernel/IOStatistics.cpp                          optional iokitcpp
 iokit/Kernel/IOInterruptAccounting.cpp                 optional iokitcpp
diff --git a/libkdd/kcdata/KCDBasicTypeDescription.h b/libkdd/kcdata/KCDBasicTypeDescription.h
new file mode 100644 (file)
index 0000000..ebab258
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include "kdd.h"
+#include <kern/kern_cdata.h>
+#import <Foundation/Foundation.h>
+
+@interface KCDBasicTypeDescription : KCDataType
+
+- (id)initWithKCTypeDesc:(kcdata_subtype_descriptor_t)sub_type_desc;
+
+/*
+ * Restricted. Only for internal use
+ * the following interface creates a basic type_0x33 = [ array of uint8_t ] kind
+ */
+- (id)createDefaultForType:(uint32_t)typeID;
+
+@end
diff --git a/libkdd/kcdata/KCDBasicTypeDescription.m b/libkdd/kcdata/KCDBasicTypeDescription.m
new file mode 100644 (file)
index 0000000..151093b
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#import "KCDBasicTypeDescription.h"
+
+@interface
+KCDBasicTypeDescription () {
+       int _typeID;
+       uint32_t _size;
+       uint32_t _count;
+       NSString * _name;
+       struct kcdata_subtype_descriptor _subtype_desc;
+}
+
+@end
+
+@implementation KCDBasicTypeDescription
+
+- (id)initWithKCTypeDesc:(kcdata_subtype_descriptor_t)sub_type_desc
+{
+       _typeID = sub_type_desc->kcs_elem_type;
+       _count = kcs_get_elem_count(sub_type_desc);
+       _size = kcs_get_elem_size(sub_type_desc);
+
+       memcpy(&_subtype_desc, sub_type_desc, sizeof(_subtype_desc));
+       _name = [NSString stringWithFormat:@"%s", _subtype_desc.kcs_name];
+
+       return self;
+}
+
+- (id)createDefaultForType:(uint32_t)typeID
+{
+       struct kcdata_subtype_descriptor subtype;
+       subtype.kcs_flags = KCS_SUBTYPE_FLAGS_ARRAY;
+       subtype.kcs_elem_type = KC_ST_UINT8;
+       subtype.kcs_elem_offset = 0;
+       subtype.kcs_elem_size = KCS_SUBTYPE_PACK_SIZE(UINT16_MAX, (uint16_t)sizeof(uint8_t));
+       subtype.kcs_name[0] = '\0';
+       (void)[self initWithKCTypeDesc:&subtype];
+       _name = [NSString stringWithFormat:@"Type_0x%x", typeID];
+       return self;
+}
+
+- (NSObject *)objectForType:(kctype_subtype_t)elem_type withData:(uint8_t *)data
+{
+       NSObject * obj;
+
+       switch (elem_type) {
+       case KC_ST_CHAR: obj = [NSString stringWithFormat:@"%c", *(char *)data]; break;
+       case KC_ST_INT8: obj = [NSNumber numberWithInt:*(int8_t *)data]; break;
+       case KC_ST_UINT8: obj = [NSNumber numberWithInt:*(uint8_t *)data]; break;
+       case KC_ST_INT16: obj = [NSNumber numberWithShort:*(int16_t *)data]; break;
+       case KC_ST_UINT16: obj = [NSNumber numberWithUnsignedShort:*(uint16_t *)data]; break;
+       case KC_ST_INT32: obj = [NSNumber numberWithInt:*(int32_t *)data]; break;
+       case KC_ST_UINT32: obj = [NSNumber numberWithUnsignedInt:*(uint32_t *)data]; break;
+       case KC_ST_INT64: obj = [NSNumber numberWithLongLong:*(int64_t *)data]; break;
+       case KC_ST_UINT64: obj = [NSNumber numberWithUnsignedLongLong:*(uint64_t *)data]; break;
+
+       default: obj = @"<Unknown error occurred>"; break;
+       }
+
+       return obj;
+}
+
+- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length
+{
+       NSMutableDictionary * retval = [[NSMutableDictionary alloc] init];
+       uint8_t * data = (uint8_t *)dataBuffer;
+       uint32_t elem_count = MIN(_count, length / (_size / _count));
+       uint32_t elem_size = _size / _count;
+       if (_count == 1) {
+               retval[_name] = [self objectForType:_subtype_desc.kcs_elem_type withData:&data[_subtype_desc.kcs_elem_offset]];
+       } else if (_subtype_desc.kcs_elem_type == KC_ST_CHAR) {
+               retval[_name] = [NSString stringWithFormat:@"%s", (char *)&data[_subtype_desc.kcs_elem_offset]];
+       } else {
+               NSMutableArray * objArray = [NSMutableArray arrayWithCapacity:elem_count];
+               for (unsigned int i = 0; i < elem_count; i++) {
+                       [objArray addObject:[self objectForType:_subtype_desc.kcs_elem_type
+                                                         withData:&data[(_subtype_desc.kcs_elem_offset + (elem_size * i))]]];
+               }
+               retval[_name] = objArray;
+       }
+       return retval;
+}
+
+- (NSString *)description
+{
+       return [NSString stringWithFormat:@"type: %d => \"%@\" ", [self typeID], [self name]];
+}
+
+- (NSString *)name
+{
+       return _name;
+}
+
+- (uint32_t)count
+{
+       return _count;
+}
+
+- (int)typeID
+{
+       return _typeID;
+}
+
+@end
diff --git a/libkdd/kcdata/KCDStructTypeDescription.h b/libkdd/kcdata/KCDStructTypeDescription.h
new file mode 100644 (file)
index 0000000..68a200e
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#import <Foundation/Foundation.h>
+#import "KCDBasicTypeDescription.h"
+
+@interface KCDStructTypeDescription : KCDataType
+
+- (id)initWithType:(int)typeID withName:(NSString *)name;
+
+- (void)addFieldBasicType:(KCDBasicTypeDescription *)fieldType;
+
+@end
diff --git a/libkdd/kcdata/KCDStructTypeDescription.m b/libkdd/kcdata/KCDStructTypeDescription.m
new file mode 100644 (file)
index 0000000..60f70b1
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#import "KCDStructTypeDescription.h"
+
+#ifndef KCDATA_TYPE_MAX_WITH_DESC
+#define KCDATA_TYPE_MAX_WITH_DESC 0x6
+#endif
+
+@interface
+KCDStructTypeDescription () {
+       int _typeID;
+       NSString * _name;
+       NSMutableArray * _fields;
+       BOOL _needDescriptionAsKey;
+}
+
+@end
+
+@implementation KCDStructTypeDescription
+
+- (id)initWithType:(int)typeID withName:(NSString *)name
+{
+       if ((self = [super init])) {
+               _typeID = typeID;
+               _name = name;
+               _needDescriptionAsKey = NO;
+               if (typeID >= 0x1 && typeID <= KCDATA_TYPE_MAX_WITH_DESC)
+                       _needDescriptionAsKey = YES;
+
+               _fields = [[NSMutableArray alloc] init];
+               return self;
+       }
+       return NULL;
+}
+
+- (void)addFieldBasicType:(KCDBasicTypeDescription *)fieldType
+{
+       [_fields addObject:fieldType];
+}
+
+- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length
+{
+       NSMutableDictionary * retval = [[NSMutableDictionary alloc] init];
+       for (KCDataType * fi in _fields) {
+               NSMutableDictionary * _d = [fi parseData:dataBuffer ofLength:length];
+               for (NSString * k in [_d keyEnumerator]) {
+                       retval[k] = _d[k];
+               }
+       }
+       if (_needDescriptionAsKey) {
+               NSString * desc = retval[@"desc"];
+               NSObject * obj = retval[@"data"];
+               retval[desc] = obj;
+               [retval removeObjectForKey:@"desc"];
+               [retval removeObjectForKey:@"data"];
+       }
+       return retval;
+}
+
+- (NSString *)description
+{
+       return [NSString stringWithFormat:@"type: %d => \"%@\" ", _typeID, _name];
+}
+
+- (NSString *)name
+{
+       return _name;
+}
+
+- (uint32_t)count
+{
+       return (uint32_t)[_fields count];
+}
+
+- (int)typeID
+{
+       return _typeID;
+}
+
+@end
diff --git a/libkdd/kcdata/kcdata_core.m b/libkdd/kcdata/kcdata_core.m
new file mode 100644 (file)
index 0000000..90e6194
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/kern_cdata.h>
+#import <Foundation/Foundation.h>
+#import "kdd.h"
+#import "KCDBasicTypeDescription.h"
+#import "KCDStructTypeDescription.h"
+
+#define MAX_KCDATATYPE_BUFFER_SIZE 2048
+extern struct kcdata_type_definition *kcdata_get_typedescription(unsigned type_id, uint8_t *buffer, uint32_t buffer_size);
+
+
+/*!
+ * @function getTypeFromTypeDef
+ *
+ * @abstract
+ * Build a KCDataType from a type definition.
+ *
+ * @param typeDef
+ * A pointer to kcdata_type_definition_t that specifies the type fields and has subtype definitions
+ * in the memory immediately following the type_definition.
+ *
+ * @return KCDataType * type object which can be used to parse data into dictionaries.
+ * This may return nil if it finds the data to be invalid.
+ *
+ * @discussion
+ * This routine tries to decode the typeDef structure and create either a basic type (KCDBasicTypeDescription)
+ * or a struct type.
+ */
+static KCDataType * getTypeFromTypeDef(struct kcdata_type_definition * typeDef);
+
+static KCDataType *
+getTypeFromTypeDef(struct kcdata_type_definition * typeDef)
+{
+       if (typeDef == NULL) {
+               return nil;
+       }
+       NSString * kct_name = [NSString stringWithFormat:@"%s", typeDef->kct_name];
+       if (typeDef->kct_num_elements == 1) {
+               KCDBasicTypeDescription * retval = [[KCDBasicTypeDescription alloc] initWithKCTypeDesc:&typeDef->kct_elements[0]];
+               return retval;
+       } else {
+               KCDStructTypeDescription * retval =
+                   [[KCDStructTypeDescription alloc] initWithType:typeDef->kct_type_identifier withName:kct_name];
+               /* need to do work here to get the array of elements setup here */
+               KCDBasicTypeDescription * curField = nil;
+               for (unsigned int i = 0; i < typeDef->kct_num_elements; i++) {
+                       curField = [[KCDBasicTypeDescription alloc] initWithKCTypeDesc:&typeDef->kct_elements[i]];
+                       [retval addFieldBasicType:curField];
+               }
+               return retval;
+       }
+       return nil;
+}
+
+KCDataType *
+getKCDataTypeForID(uint32_t typeID)
+{
+       static dispatch_once_t onceToken;
+       static NSMutableDictionary * knownTypes = nil;
+       dispatch_once(&onceToken, ^{
+               if (!knownTypes) {
+                       knownTypes = [[NSMutableDictionary alloc] init];
+               }
+       });
+       NSNumber * type = [NSNumber numberWithUnsignedInt:typeID];
+       if (!knownTypes[type]) {
+               /* code to query system for type information */
+               uint8_t buffer[MAX_KCDATATYPE_BUFFER_SIZE];
+               struct kcdata_type_definition * sys_def = kcdata_get_typedescription(typeID, buffer, MAX_KCDATATYPE_BUFFER_SIZE);
+               if (sys_def == NULL) {
+                       knownTypes[type] = [[KCDBasicTypeDescription alloc] createDefaultForType:typeID];
+               } else {
+                       knownTypes[type] = getTypeFromTypeDef(sys_def);
+               }
+       }
+       assert(knownTypes[type] != nil);
+       return knownTypes[type];
+}
+
+NSString *
+KCDataTypeNameForID(uint32_t typeID)
+{
+       NSString * retval = [NSString stringWithFormat:@"%u", typeID];
+       KCDataType * t = getKCDataTypeForID(typeID);
+
+       if (![[t name] containsString:@"Type_"]) {
+               retval = [t name];
+       }
+       return retval;
+}
+
+NSMutableDictionary *
+parseKCDataArray(void * dataBuffer)
+{
+       uint32_t typeID = KCDATA_ITEM_ARRAY_GET_EL_TYPE(dataBuffer);
+       uint32_t count = KCDATA_ITEM_ARRAY_GET_EL_COUNT(dataBuffer);
+       uint32_t size = KCDATA_ITEM_ARRAY_GET_EL_SIZE(dataBuffer);
+       uint8_t * buffer = (uint8_t *)KCDATA_ITEM_DATA_PTR(dataBuffer);
+       KCDataType * datatype = getKCDataTypeForID(typeID);
+       NSMutableDictionary * retval = [[NSMutableDictionary alloc] initWithCapacity:1];
+       NSMutableArray * arr = [[NSMutableArray alloc] initWithCapacity:count];
+       retval[[datatype name]] = arr;
+       NSMutableDictionary * tmpdict = NULL;
+       for (uint32_t i = 0; i < count; i++) {
+               tmpdict = [datatype parseData:(void *)&buffer[i * size] ofLength:size];
+               [arr addObject:tmpdict];
+       }
+       return retval;
+}
+
+NSMutableDictionary *
+parseKCDataContainer(void * dataBuffer, uint32_t * bytesParsed)
+{
+       if (bytesParsed == NULL)
+               return nil;
+       assert(KCDATA_ITEM_TYPE(dataBuffer) == KCDATA_TYPE_CONTAINER_BEGIN);
+       uint64_t containerID = KCDATA_CONTAINER_ID(dataBuffer);
+
+       /* setup collection object for sub containers */
+       NSMutableDictionary * sub_containers = [[NSMutableDictionary alloc] init];
+       NSMutableDictionary * retval = [[NSMutableDictionary alloc] init];
+       NSMutableDictionary * container = [[NSMutableDictionary alloc] init];
+       struct kcdata_item * buffer = (struct kcdata_item *)KCDATA_ITEM_NEXT_HEADER(dataBuffer);
+       KCDataType * tmptype;
+       uint32_t _t;
+       void * _d;
+       NSMutableDictionary * tmpdict;
+       retval[KCDataTypeNameForID(kcdata_get_container_type(dataBuffer))] = container;
+
+       KCDATA_ITEM_FOREACH(buffer)
+       {
+               _t = KCDATA_ITEM_TYPE(buffer);
+               _d = KCDATA_ITEM_DATA_PTR(buffer);
+               if (_t == KCDATA_TYPE_CONTAINER_END) {
+                       if (KCDATA_CONTAINER_ID(buffer) == containerID) {
+                               break;
+                       }
+                       continue;
+               }
+
+               if (_t == KCDATA_TYPE_ARRAY) {
+                       tmpdict = parseKCDataArray(buffer);
+                       [container addEntriesFromDictionary:tmpdict];
+                       continue;
+               }
+
+               if (_t == KCDATA_TYPE_CONTAINER_BEGIN) {
+                       uint32_t container_size = 0;
+                       tmpdict = parseKCDataContainer(buffer, &container_size);
+                       NSString * subcontainerID = [NSString stringWithFormat:@"%llu", KCDATA_CONTAINER_ID(buffer)];
+                       NSString * k_desc = nil;
+                       assert([tmpdict count] == 1);
+                       for (NSString * k in [tmpdict keyEnumerator]) {
+                               k_desc = k;
+                               if ([k intValue] != 0)
+                                       k_desc = KCDataTypeNameForID([k intValue]);
+
+                               if ([sub_containers objectForKey:k_desc] == nil) {
+                                       sub_containers[k_desc] = [[NSMutableDictionary alloc] init];
+                               }
+                               sub_containers[k_desc][subcontainerID] = tmpdict[k];
+                       }
+                       buffer = (struct kcdata_item *)((uintptr_t)buffer + container_size);
+                       if (KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_BUFFER_END) {
+                               break;
+                       }
+                       continue;
+               }
+
+               tmptype = getKCDataTypeForID(_t);
+               tmpdict = [tmptype parseData:_d ofLength:KCDATA_ITEM_SIZE(buffer)];
+               if ([tmpdict count] == 1)
+                       [container addEntriesFromDictionary:tmpdict];
+               else
+                       container[[tmptype name]] = tmpdict;
+       }
+       [container addEntriesFromDictionary:sub_containers];
+       *bytesParsed = (uint32_t)((uintptr_t)buffer - (uintptr_t)dataBuffer);
+       return retval;
+}
diff --git a/libkdd/kcdata/kcdtypes.c b/libkdd/kcdata/kcdtypes.c
new file mode 100644 (file)
index 0000000..82c97f7
--- /dev/null
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+#include <Kernel/kern/kern_cdata.h>
+#include <Kernel/kern/debug.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include <mach/mach_time.h>
+#include <sys/proc_info.h>
+#include <corpses/task_corpse.h>
+
+/*!
+ * @function kcdata_get_typedescription
+ *
+ * @abstract
+ * Search the known type definitions for type with id type_id.
+ *
+ * @param type_id
+ * A unsinged int type specified by the KCDATA.
+ *
+ * @param buffer
+ * pointer to data area where type definition will be saved. 
+ *
+ * @param buffer_size
+ * size of the buffer provided.
+ *
+ * @return struct kcdata_type_definition *
+ * pointer to a malloc'ed buffer holding the type definition and each subtype defintion for its fields.
+ * It may return NULL if no type with id == type_id is found.
+ * Note: The caller is responsible to free() the memory when its no longer used.
+ *
+ * @discussion
+ * This function queries the known type definitions table. If found the defintion data is returned
+ * else NULL is returned. It is advised to cache the return value from this function since the data
+ * is always going to be the same for same type_id. The definition setup requires memory on heap.
+ * The caller should make sure to free() the data once its done with using it.
+ *
+ */
+struct kcdata_type_definition *kcdata_get_typedescription(unsigned type_id, uint8_t *buffer, uint32_t buffer_size);
+
+
+
+/* forward declarations for helper routines */
+static uint32_t get_kctype_subtype_size(kctype_subtype_t type);
+static void setup_subtype_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, char *name);
+static void setup_subtype_array_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, uint32_t count, char *name);
+static void setup_type_definition(struct kcdata_type_definition *d, uint32_t type, uint32_t num_elems, char *name);
+
+struct kcdata_type_definition *kcdata_get_typedescription(unsigned type_id, uint8_t *buffer, uint32_t buffer_size)
+{
+       int i = 0;
+#define _STR_VALUE(x)  #x
+#define _SUBTYPE(t, s, f)     setup_subtype_description(&subtypes[i++], (t), offsetof(s,f), _STR_VALUE(f))
+#define _SUBTYPE_ARRAY(t, s, f, c)   setup_subtype_array_description(&subtypes[i++], (t), offsetof(s,f), (c), _STR_VALUE(f))
+#define _STRINGTYPE(f)        setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, UINT16_MAX, f)
+
+
+    
+    if (buffer_size < sizeof(struct kcdata_type_definition) || buffer == NULL)
+        return NULL;
+    
+       struct kcdata_type_definition *retval = (struct kcdata_type_definition *)&buffer[0];
+       kcdata_subtype_descriptor_t subtypes = (kcdata_subtype_descriptor_t)&buffer[sizeof(struct kcdata_type_definition)];
+       switch (type_id) {
+
+        case KCDATA_TYPE_STRING_DESC: {
+            i = 0;
+            setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc");
+            setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, KCDATA_DESC_MAXLEN, UINT16_MAX, "data");
+            setup_type_definition(retval, type_id, i, "string_desc");
+            break;
+        }
+            
+               case KCDATA_TYPE_UINT32_DESC: {
+                       i = 0;
+                       setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc");
+                       setup_subtype_description(&subtypes[i++], KC_ST_UINT32, KCDATA_DESC_MAXLEN, "data");
+                       setup_type_definition(retval, type_id, i, "uint32_desc");
+                       break;
+               }
+                       
+               case KCDATA_TYPE_UINT64_DESC: {
+                       i = 0;
+                       setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc");
+                       setup_subtype_description(&subtypes[i++], KC_ST_UINT64, KCDATA_DESC_MAXLEN, "data");
+                       setup_type_definition(retval, type_id, i, "uint64_desc");
+                       break;
+               }
+        
+        case KCDATA_TYPE_INT32_DESC: {
+            i = 0;
+            setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc");
+            setup_subtype_description(&subtypes[i++], KC_ST_INT32, KCDATA_DESC_MAXLEN, "data");
+            setup_type_definition(retval, type_id, i, "int32_desc");
+            break;
+        }
+            
+        case KCDATA_TYPE_INT64_DESC: {
+            i = 0;
+            setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, KCDATA_DESC_MAXLEN, "desc");
+            setup_subtype_description(&subtypes[i++], KC_ST_INT64, KCDATA_DESC_MAXLEN, "data");
+            setup_type_definition(retval, type_id, i, "int64_desc");
+            break;
+        }
+
+               case KCDATA_TYPE_CONTAINER_BEGIN :{
+                       i = 0;
+                       setup_subtype_description(&subtypes[i++], KC_ST_UINT32, 0, "kcContainerType");
+                       setup_type_definition(retval, type_id, i, "container_begin");
+                       break;
+               }
+            
+        case KCDATA_TYPE_LIBRARY_LOADINFO: {
+            i = 0;
+            _SUBTYPE(KC_ST_UINT32, struct dyld_uuid_info_32, imageLoadAddress);
+            _SUBTYPE_ARRAY(KC_ST_UINT8, struct dyld_uuid_info_32, imageUUID, 16);
+            setup_type_definition(retval, type_id, i, "dyld_load_info");
+            break;
+            
+        }
+        
+        case KCDATA_TYPE_LIBRARY_LOADINFO64: /* fall through */
+        case STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO: {
+            i = 0;
+            _SUBTYPE(KC_ST_UINT64, struct dyld_uuid_info_64, imageLoadAddress);
+            _SUBTYPE_ARRAY(KC_ST_UINT8, struct dyld_uuid_info_64, imageUUID, 16);
+            setup_type_definition(retval, type_id, i, "dyld_load_info");
+            break;
+        }
+            
+        case KCDATA_TYPE_TIMEBASE: {
+            i = 0;
+            _SUBTYPE(KC_ST_UINT32, struct mach_timebase_info, numer);
+            _SUBTYPE(KC_ST_UINT32, struct mach_timebase_info, denom);
+            setup_type_definition(retval, type_id, i, "mach_timebase_info");
+        }
+
+        case KCDATA_TYPE_MACH_ABSOLUTE_TIME:
+            setup_type_definition(retval, type_id, 1, "mach_absolute_time");
+            setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "mach_absolute_time");
+            break;
+        
+        case KCDATA_TYPE_TIMEVAL: {
+            i = 0;
+            _SUBTYPE(KC_ST_INT64, struct timeval64, tv_sec);
+            _SUBTYPE(KC_ST_INT64, struct timeval64, tv_usec);
+            setup_type_definition(retval, type_id, i, "timeval");
+        }
+
+       case KCDATA_TYPE_USECS_SINCE_EPOCH:
+            setup_type_definition(retval, type_id, 1, "usecs_since_epoch");
+            setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "usecs_since_epoch");
+            break;
+
+
+            /* stackshot specific types */
+               case STACKSHOT_KCTYPE_IOSTATS: {
+                       i = 0;
+            _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_reads_count);
+            _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_reads_size);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_writes_count);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_disk_writes_size);
+            _SUBTYPE_ARRAY(KC_ST_UINT64, struct io_stats_snapshot, ss_io_priority_count, STACKSHOT_IO_NUM_PRIORITIES);
+            _SUBTYPE_ARRAY(KC_ST_UINT64, struct io_stats_snapshot, ss_io_priority_size, STACKSHOT_IO_NUM_PRIORITIES);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_paging_count);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_paging_size);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_non_paging_count);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_non_paging_size);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_data_count);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_data_size);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_metadata_count);
+                       _SUBTYPE(KC_ST_UINT64, struct io_stats_snapshot, ss_metadata_size);
+
+                       setup_type_definition(retval, type_id, i, "io_statistics");
+                       break;
+               }
+                       
+               case STACKSHOT_KCTYPE_GLOBAL_MEM_STATS       :
+               {   i = 0;
+            _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, snapshot_magic);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, free_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, active_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, inactive_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, purgeable_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, wired_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, speculative_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, throttled_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, filebacked_pages);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, compressions);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, decompressions);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, compressor_size);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, busy_buffer_count);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, pages_wanted);
+                       _SUBTYPE(KC_ST_UINT32, struct mem_and_io_snapshot, pages_reclaimed);
+            _SUBTYPE(KC_ST_UINT8, struct mem_and_io_snapshot, pages_wanted_reclaimed_valid);
+                       setup_type_definition(retval, type_id, i, "mem_and_io_snapshot");
+                       break;
+               }
+                       
+        case STACKSHOT_KCCONTAINER_TASK:
+            setup_type_definition(retval, type_id, 0, "task_snapshots");
+            break;
+
+        case STACKSHOT_KCCONTAINER_THREAD:
+            setup_type_definition(retval, type_id, 0, "thread_snapshots");
+            break;
+
+                       
+               case STACKSHOT_KCTYPE_TASK_SNAPSHOT: {
+                       i = 0;
+            _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_unique_pid);
+                       _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_ss_flags);
+                       _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_user_time_in_terminated_threads);
+                       _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_system_time_in_terminated_threads);
+                       _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_p_start_sec);
+                       _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_task_size);
+                       _SUBTYPE(KC_ST_UINT64, struct task_snapshot_v2, ts_max_resident_size);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_suspend_count);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_faults);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_pageins);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_cow_faults);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_was_throttled);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_did_throttle);
+                       _SUBTYPE(KC_ST_UINT32, struct task_snapshot_v2, ts_latency_qos);
+                       _SUBTYPE(KC_ST_INT32, struct task_snapshot_v2, ts_pid);
+            _SUBTYPE_ARRAY(KC_ST_CHAR, struct task_snapshot_v2, ts_p_comm, 32);
+                       setup_type_definition(retval, type_id, i, "task_snapshot");
+                       break;
+               }
+            
+               case STACKSHOT_KCTYPE_THREAD_SNAPSHOT: {
+                       i = 0;
+                       
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_thread_id);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_wait_event);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_continuation);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_total_syscalls);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_voucher_identifier);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_dqserialnum);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_user_time);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_sys_time);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_ss_flags);
+                       _SUBTYPE(KC_ST_UINT64, struct thread_snapshot_v2, ths_last_run_time);
+                       _SUBTYPE(KC_ST_UINT32, struct thread_snapshot_v2, ths_state);
+                       _SUBTYPE(KC_ST_UINT32, struct thread_snapshot_v2, ths_sched_flags);
+                       _SUBTYPE(KC_ST_INT16, struct thread_snapshot_v2, ths_base_priority);
+                       _SUBTYPE(KC_ST_INT16, struct thread_snapshot_v2, ths_sched_priority);
+                       _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_eqos);
+                       _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_rqos);
+                       _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_rqos_override);
+                       _SUBTYPE(KC_ST_UINT8, struct thread_snapshot_v2, ths_io_tier);
+                       
+                       setup_type_definition(retval, type_id, i, "thread_snapshot");
+                       break;
+               }
+
+                       
+               case STASKSHOT_KCTYPE_DONATING_PIDS:
+                       setup_type_definition(retval, type_id, 1, "donating_pids");
+                       setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "pid");
+                       break;
+            
+        case STACKSHOT_KCTYPE_THREAD_NAME:{
+            i = 0;
+            setup_subtype_array_description(&subtypes[i++], KC_ST_CHAR, 0, 64, "pth_name");
+            setup_type_definition(retval, type_id, i, "pth_name");
+            break;
+        }
+            
+               case STACKSHOT_KCTYPE_KERN_STACKFRAME        :
+                       setup_type_definition(retval, type_id, 2, "kernel_stack_frames");
+                       setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "lr");
+                       setup_subtype_description(&subtypes[1], KC_ST_UINT32, sizeof(uint32_t), "sp");
+                       break;
+            
+               case STACKSHOT_KCTYPE_KERN_STACKFRAME64      :
+                       setup_type_definition(retval, type_id, 2, "kernel_stack_frames");
+                       setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "lr");
+                       setup_subtype_description(&subtypes[1], KC_ST_UINT64, sizeof(uint64_t), "sp");
+                       break;
+                       
+               case STACKSHOT_KCTYPE_USER_STACKFRAME        :
+                       setup_type_definition(retval, type_id, 2, "user_stack_frames");
+                       setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "lr");
+                       setup_subtype_description(&subtypes[1], KC_ST_UINT32, sizeof(uint32_t), "sp");
+                       break;
+                       
+               case STACKSHOT_KCTYPE_USER_STACKFRAME64      :
+                       setup_type_definition(retval, type_id, 2, "user_stack_frames");
+                       setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "lr");
+                       setup_subtype_description(&subtypes[1], KC_ST_UINT64, sizeof(uint64_t), "sp");
+                       break;
+        
+        case STACKSHOT_KCTYPE_BOOTARGS: {
+            i = 0;
+            _STRINGTYPE("boot_args");
+            setup_type_definition(retval, type_id, i, "boot_args");
+            break;
+        }
+        
+        case STACKSHOT_KCTYPE_OSVERSION: {
+            i = 0;
+            _STRINGTYPE("osversion");
+            setup_type_definition(retval, type_id, i, "osversion");
+            break;
+        }
+
+       case STACKSHOT_KCTYPE_KERN_PAGE_SIZE: {
+               i = 0;
+               setup_subtype_description(&subtypes[i++], KC_ST_UINT32, 0, "kernel_page_size");
+               setup_type_definition(retval, type_id, i, "kernel_page_size");
+               break;
+       }
+
+       case STACKSHOT_KCTYPE_JETSAM_LEVEL: {
+               i = 0;
+               setup_subtype_description(&subtypes[i++], KC_ST_UINT32, 0, "jetsam_level");
+               setup_type_definition(retval, type_id, i, "jetsam_level");
+               break;
+       }
+
+                       /* crashinfo types */
+        case TASK_CRASHINFO_BSDINFOWITHUNIQID:
+        {   i = 0;
+            _SUBTYPE_ARRAY(KC_ST_UINT8, struct proc_uniqidentifierinfo, p_uuid, 16);
+            _SUBTYPE(KC_ST_UINT64, struct proc_uniqidentifierinfo, p_uniqueid);
+            _SUBTYPE(KC_ST_UINT64, struct proc_uniqidentifierinfo, p_puniqueid);
+            /* Ignore the p_reserve fields */
+            setup_type_definition(retval, type_id, i, "proc_uniqidentifierinfo");
+            break;
+        }
+            
+        case TASK_CRASHINFO_PID:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "pid");
+            setup_type_definition(retval, type_id, 1, "pid");
+            break;
+        }
+
+        case TASK_CRASHINFO_PPID:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "ppid");
+            setup_type_definition(retval, type_id, 1, "ppid");
+            break;
+        }
+            
+        case TASK_CRASHINFO_RUSAGE_INFO: {
+            i = 0;
+            _SUBTYPE_ARRAY(KC_ST_UINT8, struct rusage_info_v3, ri_uuid, 16);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_user_time);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_system_time);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_pkg_idle_wkups);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_interrupt_wkups);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_pageins);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_wired_size);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_resident_size);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_phys_footprint);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_proc_start_abstime);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_proc_exit_abstime);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_user_time);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_system_time);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_pkg_idle_wkups);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_interrupt_wkups);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_pageins);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_child_elapsed_abstime);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_diskio_bytesread);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_diskio_byteswritten);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_default);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_maintenance);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_background);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_utility);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_legacy);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_user_initiated);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_cpu_time_qos_user_interactive);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_billed_system_time);
+            _SUBTYPE(KC_ST_UINT64, struct rusage_info_v3, ri_serviced_system_time);
+            setup_type_definition(retval, type_id, i, "rusage_info");
+        }
+            
+        case TASK_CRASHINFO_PROC_NAME: {
+            i = 0;
+            _STRINGTYPE("p_comm");
+            setup_type_definition(retval, type_id, i, "p_comm");
+        }
+            
+        case TASK_CRASHINFO_USERSTACK: {
+            i = 0;
+            setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "userstack_ptr");
+            setup_type_definition(retval, type_id, 1, "userstack_ptr");
+            break;
+        }
+            
+        case TASK_CRASHINFO_ARGSLEN: {
+            i = 0;
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "p_argslen");
+            setup_type_definition(retval, type_id, 1, "p_argslen");
+            break;
+        }
+        
+        case TASK_CRASHINFO_PROC_PATH: {
+            i = 0;
+            _STRINGTYPE("p_path");
+            setup_type_definition(retval, type_id, i, "p_path");
+        }
+            
+        case TASK_CRASHINFO_PROC_CSFLAGS:{
+            setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "p_csflags");
+            setup_type_definition(retval, type_id, 1, "p_csflags");
+            break;
+        }
+            
+        case TASK_CRASHINFO_PROC_STATUS: {
+            setup_subtype_description(&subtypes[0], KC_ST_UINT8, 0, "p_status");
+            setup_type_definition(retval, type_id, 1, "p_status");
+            break;
+        }
+            
+        case TASK_CRASHINFO_UID:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "uid");
+            setup_type_definition(retval, type_id, 1, "uid");
+            break;
+        }
+            
+        case TASK_CRASHINFO_GID:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "gid");
+            setup_type_definition(retval, type_id, 1, "gid");
+            break;
+        }
+            
+        case TASK_CRASHINFO_PROC_ARGC:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "argc");
+            setup_type_definition(retval, type_id, 1, "argc");
+            break;
+        }
+            
+        case TASK_CRASHINFO_PROC_FLAGS:{
+            setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "p_flags");
+            setup_type_definition(retval, type_id, 1, "p_flags");
+            break;
+        }
+            
+        case TASK_CRASHINFO_CPUTYPE:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "cputype");
+            setup_type_definition(retval, type_id, 1, "cputype");
+            break;
+        }
+            
+        case TASK_CRASHINFO_RESPONSIBLE_PID:{
+            setup_subtype_description(&subtypes[0], KC_ST_INT32, 0, "responsible_pid");
+            setup_type_definition(retval, type_id, 1, "responsible_pid");
+            break;
+        }
+            
+        case TASK_CRASHINFO_DIRTY_FLAGS:{
+            setup_subtype_description(&subtypes[0], KC_ST_UINT32, 0, "dirty_flags");
+            setup_type_definition(retval, type_id, 1, "dirty_flags");
+            break;
+        }
+            
+        case TASK_CRASHINFO_CRASHED_THREADID: {
+            setup_subtype_description(&subtypes[0], KC_ST_UINT64, 0, "crashed_threadid");
+            setup_type_definition(retval, type_id, 1, "crashed_threadid");
+            break;
+        }
+
+               default:
+                       retval = NULL;
+                       break;
+       }
+       
+    assert(retval == NULL || (buffer_size > sizeof(struct kcdata_type_definition) + (retval->kct_num_elements * sizeof(struct kcdata_subtype_descriptor))));
+       return retval;
+}
+
+
+static void setup_type_definition(struct kcdata_type_definition *d, uint32_t type, uint32_t num_elems, char *name)
+{
+    d->kct_type_identifier = type;
+    d->kct_num_elements = num_elems;
+    memcpy(d->kct_name, name, sizeof(d->kct_name));
+    d->kct_name[sizeof(d->kct_name) - 1] = '\0';
+}
+
+static uint32_t get_kctype_subtype_size(kctype_subtype_t type){
+    switch (type) {
+        case KC_ST_CHAR:
+        case KC_ST_INT8:
+        case KC_ST_UINT8:
+            return sizeof(uint8_t);
+            break;
+        case KC_ST_INT16:
+        case KC_ST_UINT16:
+            return sizeof(uint16_t);
+            break;
+        case KC_ST_INT32:
+        case KC_ST_UINT32:
+            return sizeof(uint32_t);
+            break;
+        case KC_ST_INT64:
+        case KC_ST_UINT64:
+            return sizeof(uint64_t);
+            break;
+            
+        default:
+            assert(0);
+            break;
+    }
+    return 0;
+}
+
+static void setup_subtype_array_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, uint32_t count, char *name)
+{
+    desc->kcs_flags = KCS_SUBTYPE_FLAGS_ARRAY;
+    desc->kcs_elem_type = type;
+    desc->kcs_elem_offset = offset;
+    desc->kcs_elem_size = KCS_SUBTYPE_PACK_SIZE(count, get_kctype_subtype_size(type));
+    memcpy(desc->kcs_name, name, sizeof(desc->kcs_name));
+    desc->kcs_name[sizeof(desc->kcs_name) - 1] = '\0';
+}
+
+static void setup_subtype_description(kcdata_subtype_descriptor_t desc, kctype_subtype_t type, uint32_t offset, char *name)
+{
+    desc->kcs_flags = KCS_SUBTYPE_FLAGS_NONE;
+    desc->kcs_elem_type = type;
+    desc->kcs_elem_offset = offset;
+    desc->kcs_elem_size = get_kctype_subtype_size(type);
+    memcpy(desc->kcs_name, name, sizeof(desc->kcs_name));
+    desc->kcs_name[sizeof(desc->kcs_name) - 1] = '\0';
+}
+
diff --git a/libkdd/kcdata/kdd.h b/libkdd/kcdata/kdd.h
new file mode 100644 (file)
index 0000000..ba9106d
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KDD_H_
+#define _KDD_H_
+
+#import <Foundation/Foundation.h>
+
+/*!
+ * @class KCDataType
+ * A basic abstraction that allows for parsing data provided by kernel chunked
+ * data library.
+ *
+ * @discussion
+ * Each type object has a name and a method to parse and populate data in memory to
+ * a dictionary. The dictionary will have keys as NSStrings and values could be NSObject
+ *
+ */
+@interface KCDataType : NSObject
+- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length;
+- (NSString *)name;
+@end
+
+/*!
+ * @function getKCDataTypeForID
+ *
+ * @abstract
+ * Find a type description for give TypeID
+ *
+ * @param typeID
+ * A unsinged int type specified by the KCDATA.
+ *
+ * @discussion
+ * This routine queries the system for a give type. If a known type description is found it will be used to
+ * initialize a KCDataType object. If no known type is found it assumes the data is uint8_t[].
+ */
+KCDataType * getKCDataTypeForID(uint32_t typeID);
+
+/*!
+ * @function KCDataTypeNameForID
+ *
+ * @abstract
+ * Get a name for the type.
+ *
+ * @param typeID
+ * A unsinged int type specified by the KCDATA.
+ *
+ * @return NSString *
+ * Returns name of the type. If a type is not found the return
+ * value will be string object of the passed value.
+ */
+NSString * KCDataTypeNameForID(uint32_t typeID);
+
+/*!
+ * @function parseKCDataArray
+ *
+ * @abstract
+ * Parse the given KCDATA buffer as an Array of element. The buffer should begin with header
+ * of type KCDATA_TYPE_ARRAY.
+ *
+ * @param dataBuffer
+ * A pointer in memory where KCDATA is allocated.
+ *
+ * @return
+ * A dictionary with  key specifying name of the type of each elements and value is an Array of data.
+ *
+ */
+
+NSMutableDictionary * parseKCDataArray(void * dataBuffer);
+
+/*!
+ * @function parseKCDataContainer
+ *
+ * @abstract
+ * Parse the given KCDATA buffer as a container and convert each sub structures as fields in a dictionary.
+ *
+ * @param dataBuffer
+ * A pointer in memory where KCDATA is allocated. The data should be pointing to
+ * kcdata_item_t of type KCDATA_TYPE_CONTAINER_BEGIN
+ *
+ * @param bytesParsed
+ * A pointer to uint32_t field where the routine will save the number of bytes parsed for this container.
+ *
+ * @return NSDictionary *
+ * containing each field and potentially sub containers within the provided container.
+ *
+ * @discussion
+ * This function tries to parse one container. If it encounters sub containers
+ * they will be parsed and collected within the same dictionary.
+ * Other data type fields will also be parsed based on their type. The bytesParsed
+ * param is populated with the number of bytes processed. With this return value the caller can
+ * advance its buffer_read position as
+ *   buffer = (kcdata_item_t)((uintptr_t)buffer + bytesParsed); //advance to next KCDATA_HEADER.
+ * Note: Keep in mind that the next header may be KCDATA_TYPE_BUFFER_END.
+ *
+ * A sample usage call can be:
+ * KCDATA_ITEM_FOREACH(buffer) {
+ *     if(KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_CONTAINER_BEGIN) {
+ *         uint32_t container_size = 0;
+ *         NSMutableDictionary *parsedContainer = parseKCDataContainer(buffer, &container_size);
+ *         NSLog(@"Parsed container has : %@", parsedContainer);
+ *         buffer = (kcdata_item_t) ((uintptr_t)buffer + container_size);
+ *         if(KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_BUFFER_END)
+ *             break;
+ *     }
+ * }
+ *
+ */
+NSMutableDictionary * parseKCDataContainer(void * dataBuffer, uint32_t * bytesParsed);
+
+#endif /* _KDD_H_ */
diff --git a/libkdd/kcdata/kdd.m b/libkdd/kcdata/kdd.m
new file mode 100644 (file)
index 0000000..599cea9
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#import "kdd.h"
+
+@implementation KCDataType
+
+- (NSMutableDictionary *)parseData:(void *)dataBuffer ofLength:(uint32_t)length
+{
+       assert(0);
+}
+
+- (NSString *)name
+{
+       assert(0);
+}
+
+@end
diff --git a/libkdd/kdd.xcodeproj/project.pbxproj b/libkdd/kdd.xcodeproj/project.pbxproj
new file mode 100644 (file)
index 0000000..fb67530
--- /dev/null
@@ -0,0 +1,269 @@
+// !$*UTF8*$!
+{
+       archiveVersion = 1;
+       classes = {
+       };
+       objectVersion = 46;
+       objects = {
+
+/* Begin PBXBuildFile section */
+               C91C93CB1ACB58B700119B60 /* kdd.h in Headers */ = {isa = PBXBuildFile; fileRef = C91C93CA1ACB58B700119B60 /* kdd.h */; settings = {ATTRIBUTES = (Private, ); }; };
+               C91C93CD1ACB58B700119B60 /* kdd.m in Sources */ = {isa = PBXBuildFile; fileRef = C91C93CC1ACB58B700119B60 /* kdd.m */; };
+               C91C93E41ACB598700119B60 /* KCDBasicTypeDescription.h in Headers */ = {isa = PBXBuildFile; fileRef = C91C93E01ACB598700119B60 /* KCDBasicTypeDescription.h */; };
+               C91C93E51ACB598700119B60 /* KCDBasicTypeDescription.m in Sources */ = {isa = PBXBuildFile; fileRef = C91C93E11ACB598700119B60 /* KCDBasicTypeDescription.m */; };
+               C91C93E61ACB598700119B60 /* KCDStructTypeDescription.h in Headers */ = {isa = PBXBuildFile; fileRef = C91C93E21ACB598700119B60 /* KCDStructTypeDescription.h */; };
+               C91C93E71ACB598700119B60 /* KCDStructTypeDescription.m in Sources */ = {isa = PBXBuildFile; fileRef = C91C93E31ACB598700119B60 /* KCDStructTypeDescription.m */; };
+               C9C5C68C1ACDAFDB00BE0E5E /* kcdtypes.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C5C68B1ACDAFDB00BE0E5E /* kcdtypes.c */; };
+               C9DE39141ACB5A540020F4A3 /* kcdata_core.m in Sources */ = {isa = PBXBuildFile; fileRef = C9DE39131ACB5A540020F4A3 /* kcdata_core.m */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+               C91C93C71ACB58B700119B60 /* libkdd.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkdd.a; sourceTree = BUILT_PRODUCTS_DIR; };
+               C91C93CA1ACB58B700119B60 /* kdd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = kdd.h; sourceTree = "<group>"; };
+               C91C93CC1ACB58B700119B60 /* kdd.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = kdd.m; sourceTree = "<group>"; };
+               C91C93E01ACB598700119B60 /* KCDBasicTypeDescription.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KCDBasicTypeDescription.h; sourceTree = "<group>"; };
+               C91C93E11ACB598700119B60 /* KCDBasicTypeDescription.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KCDBasicTypeDescription.m; sourceTree = "<group>"; };
+               C91C93E21ACB598700119B60 /* KCDStructTypeDescription.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KCDStructTypeDescription.h; sourceTree = "<group>"; };
+               C91C93E31ACB598700119B60 /* KCDStructTypeDescription.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KCDStructTypeDescription.m; sourceTree = "<group>"; };
+               C9C5C68B1ACDAFDB00BE0E5E /* kcdtypes.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kcdtypes.c; sourceTree = "<group>"; };
+               C9DE39131ACB5A540020F4A3 /* kcdata_core.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = kcdata_core.m; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+               C91C93C41ACB58B700119B60 /* Frameworks */ = {
+                       isa = PBXFrameworksBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+               C91C93BE1ACB58B700119B60 = {
+                       isa = PBXGroup;
+                       children = (
+                               C91C93C91ACB58B700119B60 /* kcdata */,
+                               C91C93C81ACB58B700119B60 /* Products */,
+                       );
+                       sourceTree = "<group>";
+               };
+               C91C93C81ACB58B700119B60 /* Products */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C91C93C71ACB58B700119B60 /* libkdd.a */,
+                       );
+                       name = Products;
+                       sourceTree = "<group>";
+               };
+               C91C93C91ACB58B700119B60 /* kcdata */ = {
+                       isa = PBXGroup;
+                       children = (
+                               C9C5C68B1ACDAFDB00BE0E5E /* kcdtypes.c */,
+                               C9DE39131ACB5A540020F4A3 /* kcdata_core.m */,
+                               C91C93E01ACB598700119B60 /* KCDBasicTypeDescription.h */,
+                               C91C93E11ACB598700119B60 /* KCDBasicTypeDescription.m */,
+                               C91C93E21ACB598700119B60 /* KCDStructTypeDescription.h */,
+                               C91C93E31ACB598700119B60 /* KCDStructTypeDescription.m */,
+                               C91C93CA1ACB58B700119B60 /* kdd.h */,
+                               C91C93CC1ACB58B700119B60 /* kdd.m */,
+                       );
+                       path = kcdata;
+                       sourceTree = "<group>";
+               };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+               C91C93C51ACB58B700119B60 /* Headers */ = {
+                       isa = PBXHeadersBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C91C93CB1ACB58B700119B60 /* kdd.h in Headers */,
+                               C91C93E41ACB598700119B60 /* KCDBasicTypeDescription.h in Headers */,
+                               C91C93E61ACB598700119B60 /* KCDStructTypeDescription.h in Headers */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+               C91C93C61ACB58B700119B60 /* kdd */ = {
+                       isa = PBXNativeTarget;
+                       buildConfigurationList = C91C93DA1ACB58B700119B60 /* Build configuration list for PBXNativeTarget "kdd" */;
+                       buildPhases = (
+                               C91C93C31ACB58B700119B60 /* Sources */,
+                               C91C93C41ACB58B700119B60 /* Frameworks */,
+                               C91C93C51ACB58B700119B60 /* Headers */,
+                       );
+                       buildRules = (
+                       );
+                       dependencies = (
+                       );
+                       name = kdd;
+                       productName = kdd;
+                       productReference = C91C93C71ACB58B700119B60 /* libkdd.a */;
+                       productType = "com.apple.product-type.library.static";
+               };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+               C91C93BF1ACB58B700119B60 /* Project object */ = {
+                       isa = PBXProject;
+                       attributes = {
+                               LastUpgradeCheck = 0700;
+                               ORGANIZATIONNAME = "Vishal Patel";
+                               TargetAttributes = {
+                                       C91C93C61ACB58B700119B60 = {
+                                               CreatedOnToolsVersion = 7.0;
+                                       };
+                               };
+                       };
+                       buildConfigurationList = C91C93C21ACB58B700119B60 /* Build configuration list for PBXProject "kdd" */;
+                       compatibilityVersion = "Xcode 3.2";
+                       developmentRegion = English;
+                       hasScannedForEncodings = 0;
+                       knownRegions = (
+                               en,
+                       );
+                       mainGroup = C91C93BE1ACB58B700119B60;
+                       productRefGroup = C91C93C81ACB58B700119B60 /* Products */;
+                       projectDirPath = "";
+                       projectRoot = "";
+                       targets = (
+                               C91C93C61ACB58B700119B60 /* kdd */,
+                       );
+               };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+               C91C93C31ACB58B700119B60 /* Sources */ = {
+                       isa = PBXSourcesBuildPhase;
+                       buildActionMask = 2147483647;
+                       files = (
+                               C9DE39141ACB5A540020F4A3 /* kcdata_core.m in Sources */,
+                               C91C93E71ACB598700119B60 /* KCDStructTypeDescription.m in Sources */,
+                               C91C93E51ACB598700119B60 /* KCDBasicTypeDescription.m in Sources */,
+                               C91C93CD1ACB58B700119B60 /* kdd.m in Sources */,
+                               C9C5C68C1ACDAFDB00BE0E5E /* kcdtypes.c in Sources */,
+                       );
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+               C91C93D81ACB58B700119B60 /* Debug */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               ALWAYS_SEARCH_USER_PATHS = NO;
+                               CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+                               CLANG_CXX_LIBRARY = "libc++";
+                               CLANG_ENABLE_OBJC_ARC = YES;
+                               CLANG_WARN_BOOL_CONVERSION = YES;
+                               CLANG_WARN_CONSTANT_CONVERSION = YES;
+                               CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+                               CLANG_WARN_EMPTY_BODY = YES;
+                               CLANG_WARN_ENUM_CONVERSION = YES;
+                               CLANG_WARN_INT_CONVERSION = YES;
+                               CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+                               CLANG_WARN_UNREACHABLE_CODE = YES;
+                               CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+                               COPY_PHASE_STRIP = NO;
+                               DEBUG_INFORMATION_FORMAT = dwarf;
+                               ENABLE_STRICT_OBJC_MSGSEND = YES;
+                               GCC_C_LANGUAGE_STANDARD = gnu99;
+                               GCC_DYNAMIC_NO_PIC = NO;
+                               GCC_NO_COMMON_BLOCKS = YES;
+                               GCC_OPTIMIZATION_LEVEL = 0;
+                               GCC_PREPROCESSOR_DEFINITIONS = (
+                                       "DEBUG=1",
+                                       "$(inherited)",
+                               );
+                               GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+                               GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+                               GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+                               GCC_WARN_UNDECLARED_SELECTOR = YES;
+                               GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+                               GCC_WARN_UNUSED_FUNCTION = YES;
+                               GCC_WARN_UNUSED_VARIABLE = YES;
+                               MTL_ENABLE_DEBUG_INFO = YES;
+                               ONLY_ACTIVE_ARCH = YES;
+                               OTHER_CFLAGS = "";
+                       };
+                       name = Debug;
+               };
+               C91C93D91ACB58B700119B60 /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               ALWAYS_SEARCH_USER_PATHS = NO;
+                               CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+                               CLANG_CXX_LIBRARY = "libc++";
+                               CLANG_ENABLE_OBJC_ARC = YES;
+                               CLANG_WARN_BOOL_CONVERSION = YES;
+                               CLANG_WARN_CONSTANT_CONVERSION = YES;
+                               CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+                               CLANG_WARN_EMPTY_BODY = YES;
+                               CLANG_WARN_ENUM_CONVERSION = YES;
+                               CLANG_WARN_INT_CONVERSION = YES;
+                               CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+                               CLANG_WARN_UNREACHABLE_CODE = YES;
+                               CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+                               COPY_PHASE_STRIP = NO;
+                               DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+                               ENABLE_NS_ASSERTIONS = NO;
+                               ENABLE_STRICT_OBJC_MSGSEND = YES;
+                               GCC_C_LANGUAGE_STANDARD = gnu99;
+                               GCC_NO_COMMON_BLOCKS = YES;
+                               GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+                               GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+                               GCC_WARN_UNDECLARED_SELECTOR = YES;
+                               GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+                               GCC_WARN_UNUSED_FUNCTION = YES;
+                               GCC_WARN_UNUSED_VARIABLE = YES;
+                               MTL_ENABLE_DEBUG_INFO = NO;
+                               OTHER_CFLAGS = "";
+                       };
+                       name = Release;
+               };
+               C91C93DB1ACB58B700119B60 /* Debug */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               EXECUTABLE_PREFIX = lib;
+                               OTHER_CFLAGS = "-I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders";
+                               PRODUCT_NAME = kdd;
+                       };
+                       name = Debug;
+               };
+               C91C93DC1ACB58B700119B60 /* Release */ = {
+                       isa = XCBuildConfiguration;
+                       buildSettings = {
+                               EXECUTABLE_PREFIX = lib;
+                               OTHER_CFLAGS = "-I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders";
+                               PRODUCT_NAME = kdd;
+                       };
+                       name = Release;
+               };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+               C91C93C21ACB58B700119B60 /* Build configuration list for PBXProject "kdd" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C91C93D81ACB58B700119B60 /* Debug */,
+                               C91C93D91ACB58B700119B60 /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+               C91C93DA1ACB58B700119B60 /* Build configuration list for PBXNativeTarget "kdd" */ = {
+                       isa = XCConfigurationList;
+                       buildConfigurations = (
+                               C91C93DB1ACB58B700119B60 /* Debug */,
+                               C91C93DC1ACB58B700119B60 /* Release */,
+                       );
+                       defaultConfigurationIsVisible = 0;
+                       defaultConfigurationName = Release;
+               };
+/* End XCConfigurationList section */
+       };
+       rootObject = C91C93BF1ACB58B700119B60 /* Project object */;
+}
diff --git a/libkern/.clang-format b/libkern/.clang-format
new file mode 100644 (file)
index 0000000..cd99c24
--- /dev/null
@@ -0,0 +1,30 @@
+# See top level .clang-format for explanation of options
+AlignEscapedNewlinesLeft: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: false
+AlwaysBreakBeforeMultilineStrings: true
+BinPackArguments: true
+BinPackParameters: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Allman
+ColumnLimit: 132
+IndentCaseLabels: false
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+PointerAlignment: Middle
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+TabWidth: 4
+UseTab: Never
index ad77bbce23dc585b85e581474e9ccddc5d20cf22..806567fc49f6be6b70451f76ea91dd95e3759033 100644 (file)
@@ -11,12 +11,14 @@ INSTINC_SUBDIRS = \
 INSTINC_SUBDIRS_X86_64 = libkern
 INSTINC_SUBDIRS_X86_64H = libkern
 INSTINC_SUBDIRS_ARM = libkern
+INSTINC_SUBDIRS_ARM64 = libkern
 
 EXPINC_SUBDIRS = \
        libkern
 EXPINC_SUBDIRS_X86_64 = libkern
 EXPINC_SUBDIRS_X86_64H = libkern
 EXPINC_SUBDIRS_ARM = libkern
+EXPINC_SUBDIRS_ARM64 = libkern
 
 COMP_SUBDIRS = conf
 
index f9013853cbe83c2871c443ab06e96e8c7107817b..3f94e0d02d566f95e1c52e64f32bbd78ac9c603b 100644 (file)
@@ -31,7 +31,7 @@
 #include <libkern/OSKextLibPrivate.h>
 #else
 #include <libc.h>
-#include <System/libkern/OSKextLib.h>
+#include <libkern/OSKextLib.h>
 #include <System/libkern/OSKextLibPrivate.h>
 #endif /* KERNEL */
 
index 61ed05f9742ae96af3fee04669504343bc1bc8f2..fcdca78d14a360f31ab04062ef75c47cb03fe7bc 100644 (file)
@@ -33,6 +33,7 @@
 #include <libkern/c++/OSDictionary.h>
 #include <libkern/c++/OSSerialize.h>
 #include <libkern/c++/OSLib.h>
+#include <libkern/OSDebug.h>
 
 #define super OSCollection
 
@@ -46,14 +47,6 @@ OSMetaClassDefineReservedUnused(OSArray, 5);
 OSMetaClassDefineReservedUnused(OSArray, 6);
 OSMetaClassDefineReservedUnused(OSArray, 7);
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
 
 #define EXT_CAST(obj) \
     reinterpret_cast<OSObject *>(const_cast<OSMetaClassBase *>(obj))
@@ -70,7 +63,7 @@ bool OSArray::initWithCapacity(unsigned int inCapacity)
         return false;
 
     size = sizeof(const OSMetaClassBase *) * inCapacity;
-    array = (const OSMetaClassBase **) kalloc(size);
+    array = (const OSMetaClassBase **) kalloc_container(size);
     if (!array)
         return false;
 
@@ -79,7 +72,7 @@ bool OSArray::initWithCapacity(unsigned int inCapacity)
     capacityIncrement = (inCapacity)? inCapacity : 16;
 
     bzero(array, size);
-    ACCUMSIZE(size);
+    OSCONTAINER_ACCUMSIZE(size);
 
     return true;
 }
@@ -171,7 +164,7 @@ void OSArray::free()
 
     if (array) {
         kfree(array, sizeof(const OSMetaClassBase *) * capacity);
-        ACCUMSIZE( -(sizeof(const OSMetaClassBase *) * capacity) );
+        OSCONTAINER_ACCUMSIZE( -(sizeof(const OSMetaClassBase *) * capacity) );
     }
 
     super::free();
@@ -207,11 +200,11 @@ unsigned int OSArray::ensureCapacity(unsigned int newCapacity)
 
     newSize = sizeof(const OSMetaClassBase *) * finalCapacity;
 
-    newArray = (const OSMetaClassBase **) kalloc(newSize);
+    newArray = (const OSMetaClassBase **) kalloc_container(newSize);
     if (newArray) {
         oldSize = sizeof(const OSMetaClassBase *) * capacity;
 
-        ACCUMSIZE(newSize - oldSize);
+        OSCONTAINER_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize));
 
         bcopy(array, newArray, oldSize);
         bzero(&newArray[capacity], newSize - oldSize);
index d44e0d500a6c80d9e175b591bb86d866e131e112..e623b6492ff0293104361b41f34b738f9c7ef99f 100644 (file)
 
 OSDefineMetaClassAndStructors(OSCollectionIterator, OSIterator)
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
-
 bool OSCollectionIterator::initWithCollection(const OSCollection *inColl)
 {
     if ( !super::init() || !inColl)
@@ -56,7 +47,7 @@ bool OSCollectionIterator::initWithCollection(const OSCollection *inColl)
     initialUpdateStamp = 0;
     valid = false;
 
-    return this;
+    return true;
 }
 
 OSCollectionIterator *
@@ -77,7 +68,7 @@ void OSCollectionIterator::free()
 {
     if (collIterator) {
         kfree(collIterator, collection->iteratorSize());
-       ACCUMSIZE(-(collection->iteratorSize()));
+       OSCONTAINER_ACCUMSIZE(-((size_t) collection->iteratorSize()));
         collIterator = 0;
     }
 
@@ -94,8 +85,8 @@ void OSCollectionIterator::reset()
     valid = false;
 
     if (!collIterator) {
-        collIterator = (void *)kalloc(collection->iteratorSize());
-       ACCUMSIZE(collection->iteratorSize());
+        collIterator = (void *)kalloc_container(collection->iteratorSize());
+       OSCONTAINER_ACCUMSIZE(collection->iteratorSize());
         if (!collIterator)
             return;
     }
@@ -110,8 +101,8 @@ void OSCollectionIterator::reset()
 bool OSCollectionIterator::isValid()
 {
     if (!collIterator) {
-        collIterator = (void *)kalloc(collection->iteratorSize());
-       ACCUMSIZE(collection->iteratorSize());
+        collIterator = (void *)kalloc_container(collection->iteratorSize());
+       OSCONTAINER_ACCUMSIZE(collection->iteratorSize());
         if (!collection->initIterator(collIterator))
             return false;
         initialUpdateStamp = collection->updateStamp;
index d43f67a13cb423120dcf4ad4a1cfa768d66cbd6d..a48142d2c0513f3886f0df87b707357a69203196 100644 (file)
@@ -49,32 +49,30 @@ OSMetaClassDefineReservedUnused(OSData, 7);
 
 #define EXTERNAL ((unsigned int) -1)
 
-#if OSALLOCDEBUG
-extern int debug_container_malloc_size;
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
-
 bool OSData::initWithCapacity(unsigned int inCapacity)
 {
+    if (data)
+    {
+        OSCONTAINER_ACCUMSIZE(-((size_t)capacity));
+       if (!inCapacity || (capacity < inCapacity))
+       {
+           // clean out old data's storage if it isn't big enough
+           kfree(data, capacity);
+           data = 0;
+           capacity = 0;
+       }
+    }
+
     if (!super::init())
         return false;
 
-    if (data && (!inCapacity || capacity < inCapacity) ) {
-        // clean out old data's storage if it isn't big enough
-        kfree(data, capacity);
-        data = 0;
-        ACCUMSIZE(-capacity);
-    }
-
     if (inCapacity && !data) {
-        data = (void *) kalloc(inCapacity);
+        data = (void *) kalloc_container(inCapacity);
         if (!data)
             return false;
         capacity = inCapacity;
-        ACCUMSIZE(inCapacity);
     }
+    OSCONTAINER_ACCUMSIZE(capacity);
 
     length = 0;
     if (inCapacity < 16)
@@ -189,7 +187,7 @@ void OSData::free()
 {
     if (capacity != EXTERNAL && data && capacity) {
         kfree(data, capacity);
-        ACCUMSIZE( -capacity );
+        OSCONTAINER_ACCUMSIZE( -((size_t)capacity) );
     } else if (capacity == EXTERNAL) {
        DeallocFunction freemem = reserved ? reserved->deallocFunction : NULL;
        if (freemem && data && length) {
@@ -230,7 +228,7 @@ unsigned int OSData::ensureCapacity(unsigned int newCapacity)
     if (finalCapacity < newCapacity)
         return capacity;
 
-    newData = (unsigned char *) kalloc(finalCapacity);
+    newData = (unsigned char *) kalloc_container(finalCapacity);
 
     if ( newData ) {
         bzero(newData + capacity, finalCapacity - capacity);
@@ -238,7 +236,7 @@ unsigned int OSData::ensureCapacity(unsigned int newCapacity)
             bcopy(data, newData, capacity);
             kfree(data, capacity);
         }
-        ACCUMSIZE( finalCapacity - capacity );
+        OSCONTAINER_ACCUMSIZE( ((size_t)finalCapacity) - ((size_t)capacity) );
         data = (void *) newData;
         capacity = finalCapacity;
     }
@@ -445,7 +443,7 @@ void OSData::setDeallocFunction(DeallocFunction func)
 {
     if (!reserved)
     {
-       reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData));
+       reserved = (typeof(reserved)) kalloc_container(sizeof(ExpansionData));
         if (!reserved) return;
         bzero(reserved, sizeof(ExpansionData));
     }
@@ -456,7 +454,7 @@ void OSData::setSerializable(bool serializable)
 {
     if (!reserved)
     {
-       reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData));
+       reserved = (typeof(reserved)) kalloc_container(sizeof(ExpansionData));
        if (!reserved) return;
        bzero(reserved, sizeof(ExpansionData));
     }
index 2f86e9a1d03bdb3d0bd33a70d57992c844f36318..c511e9d14d576ab6a8a9a204bcdd00725dea1d5b 100644 (file)
@@ -49,15 +49,6 @@ OSMetaClassDefineReservedUnused(OSDictionary, 5);
 OSMetaClassDefineReservedUnused(OSDictionary, 6);
 OSMetaClassDefineReservedUnused(OSDictionary, 7);
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
-
 #define EXT_CAST(obj) \
     reinterpret_cast<OSObject *>(const_cast<OSMetaClassBase *>(obj))
 
@@ -72,12 +63,12 @@ bool OSDictionary::initWithCapacity(unsigned int inCapacity)
     unsigned int size = inCapacity * sizeof(dictEntry);
 //fOptions |= kSort;
 
-    dictionary = (dictEntry *) kalloc(size);
+    dictionary = (dictEntry *) kalloc_container(size);
     if (!dictionary)
         return false;
 
     bzero(dictionary, size);
-    ACCUMSIZE(size);
+    OSCONTAINER_ACCUMSIZE(size);
 
     count = 0;
     capacity = inCapacity;
@@ -254,7 +245,7 @@ void OSDictionary::free()
     flushCollection();
     if (dictionary) {
         kfree(dictionary, capacity * sizeof(dictEntry));
-        ACCUMSIZE( -(capacity * sizeof(dictEntry)) );
+        OSCONTAINER_ACCUMSIZE( -(capacity * sizeof(dictEntry)) );
     }
 
     super::free();
@@ -293,14 +284,14 @@ unsigned int OSDictionary::ensureCapacity(unsigned int newCapacity)
     
     newSize = sizeof(dictEntry) * finalCapacity;
 
-    newDict = (dictEntry *) kalloc(newSize);
+    newDict = (dictEntry *) kalloc_container(newSize);
     if (newDict) {
         oldSize = sizeof(dictEntry) * capacity;
 
         bcopy(dictionary, newDict, oldSize);
         bzero(&newDict[capacity], newSize - oldSize);
 
-        ACCUMSIZE(newSize - oldSize);
+        OSCONTAINER_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize));
         kfree(dictionary, oldSize);
 
         dictionary = newDict;
index f1e6133442987a3616833655409f2cbc333ae9ce..5f77f045be558e49162be24136267729340f26e0 100644 (file)
@@ -30,6 +30,7 @@ extern "C" {
 #include <kern/clock.h>
 #include <kern/host.h>
 #include <kern/kext_alloc.h>
+#include <vm/vm_kern.h>
 #include <kextd/kextd_mach.h>
 #include <libkern/kernel_mach_header.h>
 #include <libkern/kext_panic_report.h>
@@ -46,6 +47,8 @@ extern "C" {
 // 04/18/11 - gab: <rdar://problem/9236163>
 #include <sys/random.h>
 
+#include <sys/pgo.h>
+
 #if CONFIG_MACF
 #include <sys/kauth.h>
 #include <security/mac_framework.h>
@@ -62,6 +65,7 @@ extern "C" {
 #include <IOKit/IOService.h>
 
 #include <IOKit/IOStatisticsPrivate.h>
+#include <IOKit/IOBSD.h>
 
 #if PRAGMA_MARK
 #pragma mark External & Internal Function Protos
@@ -354,10 +358,15 @@ static AbsoluteTime         sLastWakeTime;                       // last time we
 * to automatically parse the list of loaded kexts.
 **********/
 static IOLock                 * sKextSummariesLock                = NULL;
+extern "C" lck_spin_t           vm_allocation_sites_lock;
+static IOSimpleLock           * sKextAccountsLock = &vm_allocation_sites_lock;
 
 void (*sLoadedKextSummariesUpdated)(void) = OSKextLoadedKextSummariesUpdated;
 OSKextLoadedKextSummaryHeader * gLoadedKextSummaries __attribute__((used)) = NULL;
 static size_t sLoadedKextSummariesAllocSize = 0;
+
+static OSKextActiveAccount    * sKextAccounts;
+static uint32_t                 sKextAccountsCount;
 };
 
 /*********************************************************************
@@ -380,6 +389,22 @@ static  OSArray           * sUserSpaceLogMessageArray  = NULL;
 * End scope for sKextInnerLock-protected variables.
 *********************************************************************/
 
+
+/*********************************************************************
+ helper function used for collecting PGO data upon unload of a kext
+ */
+
+static int OSKextGrabPgoDataLocked(OSKext *kext,
+                                   bool metadata,
+                                   uuid_t instance_uuid,
+                                   uint64_t *pSize,
+                                   char *pBuffer,
+                                   uint64_t bufferSize);
+
+/**********************************************************************/
+
+
+
 #if PRAGMA_MARK
 #pragma mark OSData callbacks (need to move to OSData)
 #endif
@@ -852,7 +877,7 @@ OSKext::removeKextBootstrap(void)
        /* Allocate space for the LINKEDIT copy.
         */
         mem_result = kmem_alloc(kernel_map, (vm_offset_t *) &seg_copy,
-            seg_length);
+            seg_length, VM_KERN_MEMORY_KEXT);
         if (mem_result != KERN_SUCCESS) {
             OSKextLog(/* kext */ NULL,
                 kOSKextLogErrorLevel |
@@ -1518,6 +1543,17 @@ OSKext::initWithPrelinkedInfoDict(
         }
     }
 
+    result = slidePrelinkedExecutable();
+    if (result != kOSReturnSuccess) {
+        goto finish;
+    }
+
+    /* set VM protections now, wire later at kext load */
+    result = setVMAttributes(true, false);
+    if (result != KERN_SUCCESS) {
+        goto finish;
+    }
+
     flags.prelinked = true;
 
    /* If we created a kext from prelink info,
@@ -1532,7 +1568,6 @@ finish:
 
     return result;
 }
-
 /*********************************************************************
 *********************************************************************/
 OSKext *
@@ -2643,7 +2678,7 @@ z_alloc(void * notused __unused, u_int num_items, u_int size)
     }
     uint32_t allocSize = (uint32_t)allocSize64;
 
-    zmem = (z_mem *)kalloc(allocSize);
+    zmem = (z_mem *)kalloc_tag(allocSize, VM_KERN_MEMORY_OSKEXT);
     if (!zmem) {
         goto finish;
     }
@@ -2691,7 +2726,7 @@ OSKext::extractMkext2FileData(
     }
 
     if (KERN_SUCCESS != kmem_alloc(kernel_map,
-        (vm_offset_t*)&uncompressedDataBuffer, fullSize)) {
+        (vm_offset_t*)&uncompressedDataBuffer, fullSize, VM_KERN_MEMORY_OSKEXT)) {
 
        /* How's this for cheesy? The kernel is only asked to extract
         * kext plists so we tailor the log messages.
@@ -3092,7 +3127,7 @@ OSKext::serializeLogInfo(
         logInfo = serializer->text();
         logInfoLength = serializer->getLength();
 
-        kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, round_page(logInfoLength));
+        kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, round_page(logInfoLength), VM_KERN_MEMORY_OSKEXT);
         if (kmem_result != KERN_SUCCESS) {
             OSKextLog(/* kext */ NULL,
                 kOSKextLogErrorLevel |
@@ -3203,6 +3238,53 @@ finish:
     return foundKext;
 }
 
+
+/*********************************************************************
+*********************************************************************/
+OSKext *
+OSKext::lookupKextWithUUID(uuid_t wanted)
+{
+    OSKext * foundKext = NULL;                 // returned
+    uint32_t count, i;
+
+    IORecursiveLockLock(sKextLock);
+
+    count = sLoadedKexts->getCount();
+
+    for (i = 0; i < count; i++) {
+        OSKext   * thisKext     = NULL;
+
+        thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
+        if (!thisKext) {
+            continue;
+        }
+
+        OSData *uuid_data = thisKext->copyUUID();
+        if (!uuid_data) {
+            continue;
+        }
+
+        uuid_t uuid;
+        memcpy(&uuid, uuid_data->getBytesNoCopy(), sizeof(uuid));
+        uuid_data->release();
+
+        if (0 == uuid_compare(wanted, uuid)) {
+            foundKext = thisKext;
+            foundKext->retain();
+            goto finish;
+        }
+
+    }
+
+finish:
+    IORecursiveLockUnlock(sKextLock);
+
+    return foundKext;
+}
+
+
+
+
 /*********************************************************************
 *********************************************************************/
 /* static */
@@ -3276,6 +3358,11 @@ OSKext::removeKext(
         }
 #endif
 
+        /* make sure there are no resource requests in flight - 17187548 */
+        if (aKext->countRequestCallbacks()) {
+            goto finish;
+        }
+
        /* If we are terminating, send the request to the IOCatalogue
         * (which will actually call us right back but that's ok we have
         * a recursive lock don't you know) but do not ask the IOCatalogue
@@ -3923,7 +4010,7 @@ static char * makeHostArchKey(const char * key, uint32_t * keySizeOut)
    /* Add 1 for the ARCH_SEPARATOR_CHAR, and 1 for the '\0'.
     */
     keySize = 1 + 1 + strlen(key) + strlen(ARCHNAME);
-    result = (char *)kalloc(keySize);
+    result = (char *)kalloc_tag(keySize, VM_KERN_MEMORY_OSKEXT);
     if (!result) {
         goto finish;
     }
@@ -4018,7 +4105,7 @@ OSKext::isInExcludeList(void)
      * string (or strings) that we will not allow to load
      */
     versionString = OSDynamicCast(OSString, sExcludeListByID->getObject(bundleID));
-    if (!versionString) {
+    if (versionString == NULL || versionString->getLength() > (sizeof(myBuffer) - 1)) {
         return(false);
     }
     
@@ -4397,7 +4484,7 @@ OSKext::load(
     if (!sKxldContext) {
         kxldResult = kxld_create_context(&sKxldContext, &kern_allocate, 
             &kxld_log_callback, /* Flags */ (KXLDFlags) 0, 
-            /* cputype */ 0, /* cpusubtype */ 0);
+            /* cputype */ 0, /* cpusubtype */ 0, /* page size */ 0);
         if (kxldResult) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
@@ -4479,6 +4566,19 @@ OSKext::load(
         goto finish;
     }
 
+    pendingPgoHead.next = &pendingPgoHead;
+    pendingPgoHead.prev = &pendingPgoHead;
+
+    uuid_generate(instance_uuid);
+    account = IONew(OSKextAccount, 1);
+    if (!account) {
+       result = KERN_MEMORY_ERROR;
+       goto finish;
+    }
+    bzero(account, sizeof(*account));
+    account->loadTag = kmod_info->id;
+    account->site.flags = VM_TAG_KMOD;
+
     flags.loaded = true;
 
    /* Add the kext to the list of loaded kexts and update the kmod_info
@@ -4524,6 +4624,14 @@ OSKext::load(
 #else
         jettisonLinkeditSegment();
 #endif /* CONFIG_DTRACE */
+
+#if !VM_MAPPED_KEXTS
+        /* If there is a page (or more) worth of padding after the end
+         * of the last data section but before the end of the data segment
+         * then free it in the same manner the LinkeditSegment is freed
+         */
+        jettisonDATASegmentPadding();
+#endif
     }
 
 loaded:
@@ -4601,7 +4709,7 @@ static char * strdup(const char * string)
     }
     
     size = 1 + strlen(string);
-    result = (char *)kalloc(size);
+    result = (char *)kalloc_tag(size, VM_KERN_MEMORY_OSKEXT);
     if (!result) {
         goto finish;
     }
@@ -4615,6 +4723,40 @@ finish:
 /*********************************************************************
 * 
 *********************************************************************/
+
+kernel_section_t *
+OSKext::lookupSection(const char *segname, const char *secname)
+{
+    kernel_section_t         * found_section = NULL;
+    kernel_mach_header_t     * mh            = NULL;
+    kernel_segment_command_t * seg           = NULL;
+    kernel_section_t         * sec           = NULL;
+
+    mh = (kernel_mach_header_t *)linkedExecutable->getBytesNoCopy();
+
+    for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) {
+
+        if (0 != strcmp(seg->segname, segname)) {
+            continue;
+        }
+
+        for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) {
+
+            if (0 == strcmp(sec->sectname, secname)) {
+                found_section = sec;
+                goto out;
+            }
+        }
+    }
+
+ out:
+    return found_section;
+}
+
+/*********************************************************************
+*
+*********************************************************************/
+
 OSReturn
 OSKext::slidePrelinkedExecutable()
 {
@@ -4891,13 +5033,19 @@ OSKext::loadExecutable()
     }
 
     if (isPrelinked()) {
-        result = slidePrelinkedExecutable();
-        if (result != kOSReturnSuccess) {
-            goto finish;
-        }
         goto register_kmod;
     }
 
+    /* <rdar://problem/21444003> all callers must be entitled */
+    if (FALSE == IOTaskHasEntitlement(current_task(), "com.apple.rootless.kext-management")) {
+        OSKextLog(this,
+                  kOSKextLogErrorLevel | kOSKextLogLoadFlag,
+                  "Not entitled to link kext '%s'",
+                  getIdentifierCString());
+        result = kOSKextReturnNotPrivileged;
+        goto finish;
+    }
+    
     theExecutable = getExecutable();
     if (!theExecutable) {
         if (declaresExecutable()) {
@@ -4952,7 +5100,7 @@ OSKext::loadExecutable()
         goto finish;
     }
 
-    kxlddeps = (KXLDDependency *)kalloc(num_kxlddeps * sizeof(*kxlddeps));
+    kxlddeps = (KXLDDependency *)kalloc_tag(num_kxlddeps * sizeof(*kxlddeps), VM_KERN_MEMORY_OSKEXT);
     if (!kxlddeps) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -5057,7 +5205,7 @@ register_kmod:
 
        /* Whip up a fake kmod_info entry for the interface kext.
         */
-        kmod_info = (kmod_info_t *)kalloc(sizeof(kmod_info_t));
+        kmod_info = (kmod_info_t *)kalloc_tag(sizeof(kmod_info_t), VM_KERN_MEMORY_OSKEXT);
         if (!kmod_info) {
             result = KERN_MEMORY_ERROR;
             goto finish;
@@ -5092,8 +5240,8 @@ register_kmod:
     */
     num_kmod_refs = getNumDependencies();
     if (num_kmod_refs) {
-        kmod_info->reference_list = (kmod_reference_t *)kalloc(
-            num_kmod_refs * sizeof(kmod_reference_t));
+        kmod_info->reference_list = (kmod_reference_t *)kalloc_tag(
+            num_kmod_refs * sizeof(kmod_reference_t), VM_KERN_MEMORY_OSKEXT);
         if (!kmod_info->reference_list) {
             result = KERN_MEMORY_ERROR;
             goto finish;
@@ -5123,7 +5271,8 @@ register_kmod:
             (unsigned)kmod_info->id);
     }
 
-    result = setVMProtections();
+    /* if prelinked, VM protections are already set */
+    result = setVMAttributes(!isPrelinked(), true);
     if (result != KERN_SUCCESS) {
         goto finish;
     }
@@ -5193,14 +5342,6 @@ OSKext::jettisonLinkeditSegment(void)
     vm_size_t                  linkeditsize, kextsize;
     OSData                   * data = NULL;
 
-    /* 16K_XXX: To Remove */
-    /* We don't currently guarantee alignment greater than 4KB for kext
-     * segments, so we cannot always jettison __LINKEDIT cleanly, so let
-     * it be for now.
-     */
-    if (!TEST_PAGE_SIZE_4K)
-       return;
-
 #if NO_KEXTD
     /* We can free symbol tables for all embedded kexts because we don't
      * support runtime kext linking.
@@ -5264,6 +5405,61 @@ finish:
     return;
 }
 
+/*********************************************************************
+* If there are whole pages that are unused betweem the last section
+* of the DATA segment and the end of the DATA segment then we can free
+* them
+*********************************************************************/
+void
+OSKext::jettisonDATASegmentPadding(void)
+{
+    kernel_mach_header_t * mh;
+    kernel_segment_command_t * dataSeg;
+    kernel_section_t * sec, * lastSec;
+    vm_offset_t dataSegEnd, lastSecEnd;
+    vm_size_t padSize;
+
+    mh = (kernel_mach_header_t *)kmod_info->address;
+
+    dataSeg = getsegbynamefromheader(mh, SEG_DATA);
+    if (dataSeg == NULL) {
+        return;
+    }
+
+    lastSec = NULL;
+    sec = firstsect(dataSeg);
+    while (sec != NULL) {
+        lastSec = sec;
+        sec = nextsect(dataSeg, sec);
+    } 
+
+    if (lastSec == NULL) {
+        return;
+    }
+
+    if ((dataSeg->vmaddr != round_page(dataSeg->vmaddr)) ||
+        (dataSeg->vmsize != round_page(dataSeg->vmsize))) {
+        return;
+    }
+
+    dataSegEnd = dataSeg->vmaddr + dataSeg->vmsize;
+    lastSecEnd = round_page(lastSec->addr + lastSec->size);
+
+    if (dataSegEnd <= lastSecEnd) {
+        return;
+    }
+
+    padSize = dataSegEnd - lastSecEnd;
+
+    if (padSize >= PAGE_SIZE) {
+#if VM_MAPPED_KEXTS
+        kext_free(lastSecEnd, padSize);
+#else
+        ml_static_mfree(lastSecEnd, padSize);
+#endif
+    }
+}
+
 /*********************************************************************
 *********************************************************************/
 void
@@ -5380,12 +5576,12 @@ OSKext_wire(
     vm_prot_t  access_type,
     boolean_t       user_wire)
 {
-       return vm_map_wire(map, start, end, access_type, user_wire);
+       return vm_map_wire(map, start, end, access_type | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_KEXT), user_wire);
 }
 #endif
 
 OSReturn
-OSKext::setVMProtections(void)
+OSKext::setVMAttributes(bool protect, bool wire)
 {
     vm_map_t                    kext_map        = NULL;
     kernel_segment_command_t  * seg             = NULL;
@@ -5393,7 +5589,7 @@ OSKext::setVMProtections(void)
     vm_map_offset_t             end             = 0;
     OSReturn                    result          = kOSReturnError;
 
-    if (!kmod_info->address && !kmod_info->size) {
+    if (isInterface() || !declaresExecutable()) {
         result = kOSReturnSuccess;
         goto finish;
     }
@@ -5406,8 +5602,9 @@ OSKext::setVMProtections(void)
     }
 
     /* Protect the headers as read-only; they do not need to be wired */
-    result = OSKext_protect(kext_map, kmod_info->address, 
-        kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE);
+    result = (protect) ? OSKext_protect(kext_map, kmod_info->address, 
+        kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE)
+            : KERN_SUCCESS;
     if (result != KERN_SUCCESS) {
         goto finish;
     }
@@ -5415,32 +5612,36 @@ OSKext::setVMProtections(void)
     /* Set the VM protections and wire down each of the segments */
     seg = firstsegfromheader((kernel_mach_header_t *)kmod_info->address);
     while (seg) {
+
+
         start = round_page(seg->vmaddr);
         end = trunc_page(seg->vmaddr + seg->vmsize);
 
-        result = OSKext_protect(kext_map, start, end, seg->maxprot, TRUE);
-        if (result != KERN_SUCCESS) {
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogLoadFlag,
-                "Kext %s failed to set maximum VM protections "
-                "for segment %s - 0x%x.", 
-                getIdentifierCString(), seg->segname, (int)result);
-            goto finish;
-        }
+        if (protect) {
+            result = OSKext_protect(kext_map, start, end, seg->maxprot, TRUE);
+            if (result != KERN_SUCCESS) {
+                OSKextLog(this,
+                    kOSKextLogErrorLevel |
+                    kOSKextLogLoadFlag,
+                    "Kext %s failed to set maximum VM protections "
+                    "for segment %s - 0x%x.",
+                    getIdentifierCString(), seg->segname, (int)result);
+                goto finish;
+            }
 
-        result = OSKext_protect(kext_map, start, end, seg->initprot, FALSE);
-        if (result != KERN_SUCCESS) {
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogLoadFlag,
-                "Kext %s failed to set initial VM protections "
-                "for segment %s - 0x%x.", 
-                getIdentifierCString(), seg->segname, (int)result);
-            goto finish;
+            result = OSKext_protect(kext_map, start, end, seg->initprot, FALSE);
+            if (result != KERN_SUCCESS) {
+                OSKextLog(this,
+                    kOSKextLogErrorLevel |
+                    kOSKextLogLoadFlag,
+                    "Kext %s failed to set initial VM protections "
+                    "for segment %s - 0x%x.",
+                    getIdentifierCString(), seg->segname, (int)result);
+                goto finish;
+            }
         }
 
-        if (segmentShouldBeWired(seg)) {
+        if (segmentShouldBeWired(seg) && wire) {
             result = OSKext_wire(kext_map, start, end, seg->initprot, FALSE);
             if (result != KERN_SUCCESS) {
                 goto finish;
@@ -5897,9 +6098,10 @@ finish:
 OSReturn
 OSKext::unload(void)
 {
-    OSReturn     result = kOSReturnError;
-    unsigned int index;
-    uint32_t     num_kmod_refs = 0;
+    OSReturn        result = kOSReturnError;
+    unsigned int    index;
+    uint32_t        num_kmod_refs = 0;
+    OSKextAccount * freeAccount;
 
     if (!sUnloadEnabled) {
         OSKextLog(this,
@@ -5978,6 +6180,24 @@ OSKext::unload(void)
         "Kext %s unloading.",
         getIdentifierCString());
 
+    {
+        struct list_head *p;
+        struct list_head *prev;
+        struct list_head *next;
+        for (p = pendingPgoHead.next; p != &pendingPgoHead; p = next) {
+            OSKextGrabPgoStruct *s = container_of(p, OSKextGrabPgoStruct, list_head);
+            s->err = OSKextGrabPgoDataLocked(this, s->metadata, instance_uuid, s->pSize, s->pBuffer, s->bufferSize);
+            prev = p->prev;
+            next = p->next;
+            prev->next = next;
+            next->prev = prev;
+            p->prev = p;
+            p->next = p;
+            IORecursiveLockWakeup(sKextLock, s, false);
+        }
+    }
+
+
    /* Even if we don't call the stop function, we want to be sure we
     * have no OSMetaClass references before unloading the kext executable
     * from memory. OSMetaClasses may have pointers into the kext executable
@@ -6039,6 +6259,13 @@ OSKext::unload(void)
 
     notifyKextUnloadObservers(this);
 
+    freeAccount = NULL;
+    IOSimpleLockLock(sKextAccountsLock);
+    if (account->site.tag) account->site.flags |= VM_TAG_UNLOAD;
+    else                   freeAccount = account;
+    IOSimpleLockUnlock(sKextAccountsLock);
+    if (freeAccount) IODelete(freeAccount, OSKextAccount, 1);
+
     /* Unwire and free the linked executable.
      */
     if (linkedExecutable) {
@@ -7620,7 +7847,7 @@ OSKext::handleRequest(
        /* This kmem_alloc sets the return value of the function.
         */
         kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer,
-            round_page(responseLength));
+            round_page(responseLength), VM_KERN_MEMORY_OSKEXT);
         if (kmem_result != KERN_SUCCESS) {
             OSKextLog(/* kext */ NULL,
                 kOSKextLogErrorLevel |
@@ -7664,6 +7891,274 @@ finish:
     return result;
 }
 
+
+// #include <InstrProfiling.h>
+extern "C" {
+
+    uint64_t __llvm_profile_get_size_for_buffer_internal(const char *DataBegin,
+                                                         const char *DataEnd,
+                                                         const char *CountersBegin,
+                                                         const char *CountersEnd ,
+                                                         const char *NamesBegin,
+                                                         const char *NamesEnd);
+    int __llvm_profile_write_buffer_internal(char *Buffer,
+                                             const char *DataBegin,
+                                             const char *DataEnd,
+                                             const char *CountersBegin,
+                                             const char *CountersEnd ,
+                                             const char *NamesBegin,
+                                             const char *NamesEnd);
+}
+
+
+static
+void OSKextPgoMetadataPut(char *pBuffer,
+                          size_t *position,
+                          size_t bufferSize,
+                          uint32_t *num_pairs,
+                          const char *key,
+                          const char *value)
+{
+    size_t strlen_key = strlen(key);
+    size_t strlen_value = strlen(value);
+    size_t len = strlen(key) + 1 + strlen(value) + 1;
+    char *pos = pBuffer + *position;
+    *position += len;
+    if (pBuffer && bufferSize && *position <= bufferSize) {
+        memcpy(pos, key, strlen_key); pos += strlen_key;
+        *(pos++) = '=';
+        memcpy(pos, value, strlen_value); pos += strlen_value;
+        *(pos++) = 0;
+        if (num_pairs) {
+            (*num_pairs)++;
+        }
+    }
+}
+
+
+static
+void OSKextPgoMetadataPutMax(size_t *position, const char *key, size_t value_max)
+{
+    *position += strlen(key) + 1 + value_max + 1;
+}
+
+
+static
+void OSKextPgoMetadataPutAll(OSKext *kext,
+                             uuid_t instance_uuid,
+                             char *pBuffer,
+                             size_t *position,
+                             size_t bufferSize,
+                             uint32_t *num_pairs)
+{
+    assert_static(sizeof(clock_sec_t) % 2 == 0);
+    //log_10 2^16 ≈ 4.82
+    const size_t max_secs_string_size = 5 * sizeof(clock_sec_t)/2;
+    const size_t max_timestamp_string_size = max_secs_string_size + 1 + 6;
+
+    if (!pBuffer) {
+        OSKextPgoMetadataPutMax(position, "INSTANCE", 36);
+        OSKextPgoMetadataPutMax(position, "UUID", 36);
+        OSKextPgoMetadataPutMax(position, "TIMESTAMP", max_timestamp_string_size);
+    } else {
+        uuid_string_t instance_uuid_string;
+        uuid_unparse(instance_uuid, instance_uuid_string);
+        OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs,
+                             "INSTANCE", instance_uuid_string);
+
+        OSData *uuid_data;
+        uuid_t uuid;
+        uuid_string_t uuid_string;
+        uuid_data = kext->copyUUID();
+        if (uuid_data) {
+            memcpy(uuid, uuid_data->getBytesNoCopy(), sizeof(uuid));
+            OSSafeRelease(uuid_data);
+            uuid_unparse(uuid, uuid_string);
+            OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs,
+                                 "UUID", uuid_string);
+        }
+
+        clock_sec_t secs;
+        clock_usec_t usecs;
+        clock_get_calendar_microtime(&secs, &usecs);
+        assert(usecs < 1000000);
+        char timestamp[max_timestamp_string_size + 1];
+        assert_static(sizeof(long) >= sizeof(clock_sec_t));
+        snprintf(timestamp, sizeof(timestamp), "%lu.%06d", (unsigned long)secs, (int)usecs);
+        OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs,
+                             "TIMESTAMP", timestamp);
+    }
+
+    OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs,
+                         "NAME", kext->getIdentifierCString());
+
+    char versionCString[kOSKextVersionMaxLength];
+    OSKextVersionGetString(kext->getVersion(), versionCString, kOSKextVersionMaxLength);
+    OSKextPgoMetadataPut(pBuffer, position, bufferSize, num_pairs,
+                         "VERSION", versionCString);
+
+}
+
+static
+size_t OSKextPgoMetadataSize(OSKext *kext)
+{
+    size_t position = 0;
+    uuid_t fakeuuid = {};
+    OSKextPgoMetadataPutAll(kext, fakeuuid, NULL, &position, 0, NULL);
+    return position;
+}
+
+
+int OSKextGrabPgoDataLocked(OSKext *kext,
+                            bool metadata,
+                            uuid_t instance_uuid,
+                            uint64_t *pSize,
+                            char *pBuffer,
+                            uint64_t bufferSize)
+{
+
+    int err = 0;
+
+    kernel_section_t *sect_prf_data = NULL;
+    kernel_section_t *sect_prf_name = NULL;
+    kernel_section_t *sect_prf_cnts = NULL;
+    uint64_t size;
+    size_t metadata_size = 0;
+
+    sect_prf_data = kext->lookupSection("__DATA", "__llvm_prf_data");
+    sect_prf_name = kext->lookupSection("__DATA", "__llvm_prf_name");
+    sect_prf_cnts = kext->lookupSection("__DATA", "__llvm_prf_cnts");
+
+    if (!sect_prf_data || !sect_prf_name || !sect_prf_cnts) {
+        err = ENOTSUP;
+        goto out;
+    }
+
+    size = __llvm_profile_get_size_for_buffer_internal(
+                         (const char*) sect_prf_data->addr, (const char*) sect_prf_data->addr + sect_prf_data->size,
+                         (const char*) sect_prf_cnts->addr, (const char*) sect_prf_cnts->addr + sect_prf_cnts->size,
+                         (const char*) sect_prf_name->addr, (const char*) sect_prf_name->addr + sect_prf_name->size);
+
+    if (metadata) {
+        metadata_size = OSKextPgoMetadataSize(kext);
+        size += metadata_size;
+        size += sizeof(pgo_metadata_footer);
+    }
+
+
+    if (pSize) {
+        *pSize = size;
+    }
+
+    if (pBuffer && bufferSize) {
+        if (bufferSize < size) {
+            err = ERANGE;
+            goto out;
+        }
+
+        err = __llvm_profile_write_buffer_internal(
+                    pBuffer,
+                    (const char*) sect_prf_data->addr, (const char*) sect_prf_data->addr + sect_prf_data->size,
+                    (const char*) sect_prf_cnts->addr, (const char*) sect_prf_cnts->addr + sect_prf_cnts->size,
+                    (const char*) sect_prf_name->addr, (const char*) sect_prf_name->addr + sect_prf_name->size);
+
+        if (err) {
+            err = EIO;
+            goto out;
+        }
+
+        if (metadata) {
+            char *end_of_buffer = pBuffer + size;
+            struct pgo_metadata_footer *footerp = (struct pgo_metadata_footer *) (end_of_buffer - sizeof(struct pgo_metadata_footer));
+            char *metadata_buffer = end_of_buffer - (sizeof(struct pgo_metadata_footer) + metadata_size);
+
+            size_t metadata_position = 0;
+            uint32_t num_pairs = 0;
+            OSKextPgoMetadataPutAll(kext, instance_uuid, metadata_buffer, &metadata_position, metadata_size, &num_pairs);
+            while (metadata_position < metadata_size) {
+                metadata_buffer[metadata_position++] = 0;
+            }
+
+            struct pgo_metadata_footer footer;
+            footer.magic = htonl(0x6d657461);
+            footer.number_of_pairs = htonl( num_pairs );
+            footer.offset_to_pairs = htonl( sizeof(struct pgo_metadata_footer) + metadata_size );
+            memcpy(footerp, &footer, sizeof(footer));
+        }
+
+    }
+
+out:
+    return err;
+}
+
+
+int
+OSKextGrabPgoData(uuid_t uuid,
+                  uint64_t *pSize,
+                  char *pBuffer,
+                  uint64_t bufferSize,
+                  int wait_for_unload,
+                  int metadata)
+{
+    int err = 0;
+    OSKext *kext = NULL;
+
+
+    IORecursiveLockLock(sKextLock);
+
+    kext = OSKext::lookupKextWithUUID(uuid);
+    if (!kext)  {
+        err = ENOENT;
+        goto out;
+    }
+
+    if (wait_for_unload) {
+        OSKextGrabPgoStruct s;
+
+        s.metadata = metadata;
+        s.pSize = pSize;
+        s.pBuffer = pBuffer;
+        s.bufferSize = bufferSize;
+        s.err = EINTR;
+
+        struct list_head *prev = &kext->pendingPgoHead;
+        struct list_head *next = kext->pendingPgoHead.next;
+
+        s.list_head.prev = prev;
+        s.list_head.next = next;
+
+        prev->next = &s.list_head;
+        next->prev = &s.list_head;
+
+        kext->release();
+        kext = NULL;
+
+        IORecursiveLockSleep(sKextLock, &s, THREAD_ABORTSAFE);
+
+        prev = s.list_head.prev;
+        next = s.list_head.next;
+
+        prev->next = next;
+        next->prev = prev;
+
+        err = s.err;
+
+    } else {
+        err = OSKextGrabPgoDataLocked(kext, metadata, kext->instance_uuid, pSize, pBuffer, bufferSize);
+    }
+
+ out:
+    if (kext) {
+        kext->release();
+    }
+
+    IORecursiveLockUnlock(sKextLock);
+
+    return err;
+}
+
+
 /*********************************************************************
 *********************************************************************/
 /* static */
@@ -7680,6 +8175,26 @@ OSKext::copyLoadedKextInfo(
 
     IORecursiveLockLock(sKextLock);
 
+#if CONFIG_MACF
+    /* Is the calling process allowed to query kext info? */
+    if (current_task() != kernel_task) {
+        int                 macCheckResult      = 0;
+        kauth_cred_t        cred                = NULL;
+
+        cred = kauth_cred_get_with_ref();
+        macCheckResult = mac_kext_check_query(cred);
+        kauth_cred_unref(&cred);
+
+        if (macCheckResult != 0) {
+            OSKextLog(/* kext */ NULL,
+                      kOSKextLogErrorLevel | kOSKextLogLoadFlag,
+                      "Failed to query kext info (MAC policy error 0x%x).",
+                      macCheckResult);
+            goto finish;
+        }
+   }
+#endif
+
    /* Empty list of bundle ids is equivalent to no list (get all).
     */
     if (kextIdentifiers && !kextIdentifiers->getCount()) {
@@ -7699,6 +8214,45 @@ OSKext::copyLoadedKextInfo(
     if (!result) {
         goto finish;
     }
+
+#if 0
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_kernel_slide 0x%lx \n",
+              vm_kernel_slide);
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_kernel_stext 0x%lx vm_kernel_etext 0x%lx \n",
+              vm_kernel_stext, vm_kernel_etext);
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_kernel_base 0x%lx vm_kernel_top 0x%lx \n",
+              vm_kernel_base, vm_kernel_top);
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_kext_base 0x%lx vm_kext_top 0x%lx \n",
+              vm_kext_base, vm_kext_top);
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_prelink_stext 0x%lx vm_prelink_etext 0x%lx \n",
+              vm_prelink_stext, vm_prelink_etext);
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_prelink_sinfo 0x%lx vm_prelink_einfo 0x%lx \n",
+              vm_prelink_sinfo, vm_prelink_einfo);
+    OSKextLog(/* kext */ NULL,
+              kOSKextLogErrorLevel |
+              kOSKextLogGeneralFlag,
+              "kaslr: vm_slinkedit 0x%lx vm_elinkedit 0x%lx \n",
+              vm_slinkedit, vm_elinkedit);
+#endif
+
     for (i = 0; i < count; i++) {
         OSKext   * thisKext     = NULL;  // do not release
         Boolean    includeThis  = true;
@@ -7804,6 +8358,7 @@ OSKext::copyInfo(OSArray * infoKeys)
                 linkedExecutable->getBytesNoCopy();
 
 #if !SECURE_KERNEL
+            // do not return macho header info on shipping iOS - 19095897
             if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleMachOHeadersKey)) {
                 kernel_mach_header_t *  temp_kext_mach_hdr;
                 struct load_command *   lcp;
@@ -7845,10 +8400,10 @@ OSKext::copyInfo(OSArray * infoKeys)
                                   VM_KERNEL_UNSLIDE(segp->vmaddr),
                                   segp->vmsize, segp->nsects);
                         if ( (VM_KERNEL_IS_SLID(segp->vmaddr) == false) &&
-                             (VM_KERNEL_IS_KEXT(segp->vmaddr) == false) &&
-                             (VM_KERNEL_IS_PRELINKTEXT(segp->vmaddr) == false) &&
-                             (VM_KERNEL_IS_PRELINKINFO(segp->vmaddr) == false) &&
-                             (VM_KERNEL_IS_KEXT_LINKEDIT(segp->vmaddr) == false) ) {
+                            (VM_KERNEL_IS_KEXT(segp->vmaddr) == false) &&
+                            (VM_KERNEL_IS_PRELINKTEXT(segp->vmaddr) == false) &&
+                            (VM_KERNEL_IS_PRELINKINFO(segp->vmaddr) == false) &&
+                            (VM_KERNEL_IS_KEXT_LINKEDIT(segp->vmaddr) == false) ) {
                             OSKextLog(/* kext */ NULL,
                                       kOSKextLogErrorLevel |
                                       kOSKextLogGeneralFlag,
@@ -7861,7 +8416,7 @@ OSKext::copyInfo(OSArray * infoKeys)
                         for (secp = firstsect(segp); secp != NULL; secp = nextsect(segp, secp)) {
                             secp->addr = VM_KERNEL_UNSLIDE(secp->addr);
                         }
-                   }
+                    }
                     lcp = (struct load_command *)((caddr_t)lcp + lcp->cmdsize);
                 }
                 result->setObject(kOSBundleMachOHeadersKey, headerData);
@@ -7933,8 +8488,8 @@ OSKext::copyInfo(OSArray * infoKeys)
             // +1 for slash, +1 for \0
             executablePathCStringSize = pathLength + executableRelPath->getLength() + 2;
 
-            executablePathCString = (char *)kalloc((executablePathCStringSize) *
-                sizeof(char)); // +1 for \0
+            executablePathCString = (char *)kalloc_tag((executablePathCStringSize) *
+                sizeof(char), VM_KERN_MEMORY_OSKEXT); // +1 for \0
             if (!executablePathCString) {
                 goto finish;
             }
@@ -9473,7 +10028,7 @@ OSKextVLog(
     va_end(argList);
 
     if (length + 1 >= sizeof(stackBuffer)) {
-        allocBuffer = (char *)kalloc((length + 1) * sizeof(char));
+        allocBuffer = (char *)kalloc_tag((length + 1) * sizeof(char), VM_KERN_MEMORY_OSKEXT);
         if (!allocBuffer) {
             goto finish;
         }
@@ -9739,6 +10294,7 @@ OSKext::printKextsInBacktrace(
     u_int       i = 0;
 
     if (lockFlag) {
+        if (!sKextSummariesLock) return;
         IOLockLock(sKextSummariesLock);
     }
 
@@ -10172,7 +10728,7 @@ OSKext::saveLoadedKextPanicList(void)
     uint32_t   newlist_size   = 0;
     
     newlist_size = KEXT_PANICLIST_SIZE;
-    newlist = (char *)kalloc(newlist_size);
+    newlist = (char *)kalloc_tag(newlist_size, VM_KERN_MEMORY_OSKEXT);
     
     if (!newlist) {
         OSKextLog(/* kext */ NULL,
@@ -10303,7 +10859,13 @@ OSKext::updateLoadedKextSummaries(void)
     u_int count;
     u_int maxKexts;
     u_int i, j;
+    OSKextActiveAccount * accountingList;
+    OSKextActiveAccount * prevAccountingList;
+    uint32_t idx, accountingListAlloc, accountingListCount, prevAccountingListCount;
     
+    prevAccountingList = NULL;
+    prevAccountingListCount = 0;
+
 #if DEVELOPMENT || DEBUG
     if (IORecursiveLockHaveLock(sKextLock) == false) {
         panic("sKextLock must be held");
@@ -10338,7 +10900,7 @@ OSKext::updateLoadedKextSummaries(void)
         }
         result = kmem_alloc(kernel_map,
                             (vm_offset_t*)&summaryHeaderAlloc,
-                            size);
+                            size, VM_KERN_MEMORY_OSKEXT);
         if (result != KERN_SUCCESS) goto finish;
         summaryHeader = summaryHeaderAlloc;
         summarySize = size;
@@ -10363,11 +10925,12 @@ OSKext::updateLoadedKextSummaries(void)
     bzero(summaryHeader, summarySize);
     summaryHeader->version = kOSKextLoadedKextSummaryVersion;
     summaryHeader->entry_size = sizeof(OSKextLoadedKextSummary);
-    
+
     /* Populate each kext summary.
      */
     
     count = sLoadedKexts->getCount();
+    accountingListAlloc = 0;
     for (i = 0, j = 0; i < count && j < maxKexts; ++i) {
         aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
         if (!aKext || !aKext->isExecutable()) {
@@ -10376,8 +10939,29 @@ OSKext::updateLoadedKextSummaries(void)
         
         aKext->updateLoadedKextSummary(&summaryHeader->summaries[j++]);
         summaryHeader->numSummaries++;
+       accountingListAlloc++;
     }
-    
+
+    accountingList = IONew(typeof(accountingList[0]), accountingListAlloc);
+    accountingListCount = 0;
+    for (i = 0, j = 0; i < count && j < maxKexts; ++i) {
+        aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
+        if (!aKext || !aKext->isExecutable()) {
+            continue;
+        }
+
+       OSKextActiveAccount activeAccount;
+       aKext->updateActiveAccount(&activeAccount);
+       // order by address
+       for (idx = 0; idx < accountingListCount; idx++)
+       {
+           if (activeAccount.address < accountingList[idx].address) break;
+       }
+       bcopy(&accountingList[idx], &accountingList[idx + 1], (accountingListCount - idx) * sizeof(accountingList[0]));
+       accountingList[idx] = activeAccount;
+       accountingListCount++;
+    }
+    assert(accountingListCount == accountingListAlloc);
     /* Write protect the buffer and move it into place.
      */
     
@@ -10396,6 +10980,13 @@ OSKext::updateLoadedKextSummaries(void)
     */
     if (sLoadedKextSummariesUpdated) (*sLoadedKextSummariesUpdated)();
 
+    IOSimpleLockLock(sKextAccountsLock);
+    prevAccountingList      = sKextAccounts;
+    prevAccountingListCount = sKextAccountsCount;
+    sKextAccounts           = accountingList;
+    sKextAccountsCount      = accountingListCount;
+    IOSimpleLockUnlock(sKextAccountsLock);
+
 finish:
     IOLockUnlock(sKextSummariesLock);
 
@@ -10405,6 +10996,9 @@ finish:
     if (summaryHeaderAlloc) {
         kmem_free(kernel_map, (vm_offset_t)summaryHeaderAlloc, summarySize);
     }
+    if (prevAccountingList) {
+        IODelete(prevAccountingList, typeof(accountingList[0]), prevAccountingListCount);
+    }
 
     return;
 }
@@ -10435,6 +11029,67 @@ OSKext::updateLoadedKextSummary(OSKextLoadedKextSummary *summary)
     return;
 }
 
+/*********************************************************************
+*********************************************************************/
+
+void
+OSKext::updateActiveAccount(OSKextActiveAccount *account)
+{
+    bzero(account, sizeof(*account));
+    account->address = kmod_info->address;
+    if (account->address) {
+        account->address_end = kmod_info->address + kmod_info->size;
+    }
+    account->account = this->account;
+}
+
+extern "C" const vm_allocation_site_t * 
+OSKextGetAllocationSiteForCaller(uintptr_t address)
+{
+    OSKextActiveAccount *  active;
+    vm_allocation_site_t * site;
+    uint32_t baseIdx;
+    uint32_t lim;
+
+    IOSimpleLockLock(sKextAccountsLock);
+    site = NULL;
+    // bsearch sKextAccounts list
+    for (baseIdx = 0, lim = sKextAccountsCount; lim; lim >>= 1)
+    {
+       active = &sKextAccounts[baseIdx + (lim >> 1)];
+       if ((address >= active->address) && (address < active->address_end))
+       {
+           site = &active->account->site;
+           if (!site->tag) vm_tag_alloc_locked(site);
+           break;
+       }
+       else if (address > active->address) 
+       {       
+           // move right
+           baseIdx += (lim >> 1) + 1;
+           lim--;
+       }
+       // else move left
+    }
+    IOSimpleLockUnlock(sKextAccountsLock);
+
+    return (site);
+}
+
+extern "C" uint32_t 
+OSKextGetKmodIDForSite(vm_allocation_site_t * site)
+{
+    OSKextAccount * account = (typeof(account)) site;
+    return (account->loadTag);
+}
+
+extern "C" void 
+OSKextFreeSite(vm_allocation_site_t * site)
+{
+    OSKextAccount * freeAccount = (typeof(freeAccount)) site;
+    IODelete(freeAccount, OSKextAccount, 1);
+}
+
 /*********************************************************************
 *********************************************************************/
     
@@ -10473,7 +11128,7 @@ GetAppleTEXTHashForKext(OSKext * theKext, OSDictionary *theInfoDict)
     // KEC_FIPS type kexts never unload so we don't have to clean up our 
     // AppleTEXTHash_t
     if (kmem_alloc(kernel_map, (vm_offset_t *) &my_athp, 
-                   sizeof(AppleTEXTHash_t)) != KERN_SUCCESS) {
+                   sizeof(AppleTEXTHash_t), VM_KERN_MEMORY_OSKEXT) != KERN_SUCCESS) {
         return(NULL);
     }
     
index 807eb459845f33efed058e64c1408013acb3c410..b32ab8c9b7a6dd3c9a17566b873de32a8df4edcc 100644 (file)
@@ -48,6 +48,9 @@
 
 #include <IOKit/IOLib.h>
 
+#include <IOKit/IOKitDebug.h>
+
+
 __BEGIN_DECLS
 
 #include <sys/systm.h>
@@ -64,13 +67,6 @@ __BEGIN_DECLS
 /*********************************************************************
 * Macros
 *********************************************************************/
-#if OSALLOCDEBUG
-extern int debug_container_malloc_size;
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while (0)
-#else
-#define ACCUMSIZE(s)
-#endif /* OSALLOCDEBUG */
-
 __END_DECLS
 
 #if PRAGMA_MARK
@@ -111,10 +107,12 @@ static struct StalledData {
 } * sStalled;
 IOLock * sStalledClassesLock = NULL;
 
-
 struct ExpansionData {
-    OSOrderedSet * instances;
-    OSKext *       kext;
+    OSOrderedSet    * instances;
+    OSKext          * kext;
+#if IOTRACKING
+    IOTrackingQueue * tracking;
+#endif
 };
 
 
@@ -393,6 +391,9 @@ OSMetaClass::OSMetaClass(
 
     reserved = IONew(ExpansionData, 1);
     bzero(reserved, sizeof(ExpansionData));
+#if IOTRACKING
+    reserved->tracking = IOTrackingQueueAlloc(inClassName, inClassSize, 0, true);
+#endif
 
    /* Hack alert: We are just casting inClassName and storing it in
     * an OSString * instance variable. This may be because you can't
@@ -416,7 +417,7 @@ OSMetaClass::OSMetaClass(
             int newSize = oldSize
                 + kKModCapacityIncrement * sizeof(OSMetaClass *);
 
-            sStalled->classes = (OSMetaClass **)kalloc(newSize);
+            sStalled->classes = (OSMetaClass **)kalloc_tag(newSize, VM_KERN_MEMORY_OSKEXT);
             if (!sStalled->classes) {
                 sStalled->classes = oldStalled;
                 sStalled->result = kOSMetaClassNoTempData;
@@ -426,7 +427,7 @@ OSMetaClass::OSMetaClass(
             sStalled->capacity += kKModCapacityIncrement;
             memmove(sStalled->classes, oldStalled, oldSize);
             kfree(oldStalled, oldSize);
-            ACCUMSIZE(newSize - oldSize);
+            OSMETA_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize));
         }
 
         sStalled->classes[sStalled->count++] = this;
@@ -489,6 +490,10 @@ OSMetaClass::~OSMetaClass()
             }
         }
     }
+#if IOTRACKING
+    IOTrackingQueueFree(reserved->tracking);
+#endif
+    IODelete(reserved, ExpansionData, 1);
 }
 
 /*********************************************************************
@@ -533,15 +538,15 @@ OSMetaClass::preModLoad(const char * kextIdentifier)
     IOLockLock(sStalledClassesLock);
 
     assert (sStalled == NULL);
-    sStalled = (StalledData *)kalloc(sizeof(* sStalled));
+    sStalled = (StalledData *)kalloc_tag(sizeof(* sStalled), VM_KERN_MEMORY_OSKEXT);
     if (sStalled) {
         sStalled->classes = (OSMetaClass **)
-            kalloc(kKModCapacityIncrement * sizeof(OSMetaClass *));
+            kalloc_tag(kKModCapacityIncrement * sizeof(OSMetaClass *), VM_KERN_MEMORY_OSKEXT);
         if (!sStalled->classes) {
             kfree(sStalled, sizeof(*sStalled));
             return 0;
         }
-        ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) +
+        OSMETA_ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) +
             sizeof(*sStalled));
 
         sStalled->result   = kOSReturnSuccess;
@@ -710,7 +715,7 @@ finish:
     OSSafeRelease(myKext);
 
     if (sStalled) {
-        ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) +
+        OSMETA_ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) +
             sizeof(*sStalled)));
         kfree(sStalled->classes, sStalled->capacity * sizeof(OSMetaClass *));
         kfree(sStalled, sizeof(*sStalled));
@@ -1192,3 +1197,61 @@ finish:
 
     return;
 }
+
+
+/*********************************************************************
+*********************************************************************/
+
+#if IOTRACKING
+
+void *OSMetaClass::trackedNew(size_t size)
+{
+    IOTracking * mem;
+
+    mem = (typeof(mem)) kalloc_tag_bt(size + sizeof(IOTracking), VM_KERN_MEMORY_LIBKERN);
+    assert(mem);
+    if (!mem) return (mem);
+
+    memset(mem, 0, size + sizeof(IOTracking));
+    mem++;
+
+    OSIVAR_ACCUMSIZE(size);
+
+    return (mem);
+}
+
+void OSMetaClass::trackedDelete(void * instance, size_t size)
+{
+    IOTracking * mem = (typeof(mem)) instance; mem--;
+
+    kfree(mem, size + sizeof(IOTracking));
+    OSIVAR_ACCUMSIZE(-size);
+}
+
+void OSMetaClass::trackedInstance(OSObject * instance) const
+{
+    IOTracking * mem = (typeof(mem)) instance; mem--;
+
+    return (IOTrackingAdd(reserved->tracking, mem, classSize, false));
+}
+
+void OSMetaClass::trackedFree(OSObject * instance) const
+{
+    IOTracking * mem = (typeof(mem)) instance; mem--;
+
+    return (IOTrackingRemove(reserved->tracking, mem, classSize));
+}
+
+void OSMetaClass::trackedAccumSize(OSObject * instance, size_t size) const
+{
+    IOTracking * mem = (typeof(mem)) instance; mem--;
+
+    return (IOTrackingAccumSize(reserved->tracking, mem, size));
+}
+
+IOTrackingQueue * OSMetaClass::getTracking() const
+{
+    return (reserved->tracking);
+}
+
+#endif /* IOTRACKING */
\ No newline at end of file
index 0dc95ed5543fd6cd99dbdd76de199909c40fac4f..45652a1ca70fea933df9ae13dfb20223461d4a5d 100644 (file)
@@ -45,11 +45,6 @@ __BEGIN_DECLS
 int debug_ivars_size;
 __END_DECLS
 
-#if OSALLOCDEBUG
-#define ACCUMSIZE(s) do { debug_ivars_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
 
 // OSDefineMetaClassAndAbstractStructors(OSObject, 0);
 /* Class global data */
@@ -58,8 +53,6 @@ const OSMetaClass * const OSObject::metaClass = &OSObject::gMetaClass;
 const OSMetaClass * const OSObject::superClass = 0;
 
 /* Class member functions - Can't use defaults */
-OSObject::OSObject()                   { retainCount = 1; }
-OSObject::OSObject(const OSMetaClass *)        { retainCount = 1; }
 OSObject::~OSObject()                  { }
 const OSMetaClass * OSObject::getMetaClass() const
     { return &gMetaClass; }
@@ -94,18 +87,6 @@ static const char *getClassName(const OSObject *obj)
     return (meta) ? meta->getClassName() : "unknown class?";
 }
 
-bool OSObject::init()
-    { return true; }
-
-void OSObject::free()
-{
-    const OSMetaClass *meta = getMetaClass();
-
-    if (meta)
-       meta->instanceDestructed();
-    delete this;
-}
-
 int OSObject::getRetainCount() const
 {
     return (int) ((UInt16) retainCount);
@@ -257,91 +238,72 @@ bool OSObject::serialize(OSSerialize *s) const
     return (ok);
 }
 
+void *OSObject::operator new(size_t size)
+{
+#if IOTRACKING
+    if (kIOTracking & gIOKitDebug) return (OSMetaClass::trackedNew(size));
+#endif
 
-thread_t gOSObjectTrackThread;
-
-queue_head_t gOSObjectTrackList =
-    { (queue_t) &gOSObjectTrackList, (queue_t) &gOSObjectTrackList };
+    void * mem = kalloc_tag_bt(size, VM_KERN_MEMORY_LIBKERN);
+    assert(mem);
+    bzero(mem, size);
+    OSIVAR_ACCUMSIZE(size);
 
-lck_spin_t gOSObjectTrackLock;
+    return (void *) mem;
+}
 
-OSArray * OSFlushObjectTrackList(void)
+void OSObject::operator delete(void * mem, size_t size)
 {
-    OSArray *     array;
-    queue_entry_t next;
-
-    array = OSArray::withCapacity(16);
+    if (!mem) return;
 
-    lck_spin_lock(&gOSObjectTrackLock);
-    while (!queue_empty(&gOSObjectTrackList))
-    {
-       next = queue_first(&gOSObjectTrackList);
-       remque(next);
-       lck_spin_unlock(&gOSObjectTrackLock);
-       array->setObject((OSObject *) (next + 1));
-       lck_spin_lock(&gOSObjectTrackLock);
-    }
-    lck_spin_unlock(&gOSObjectTrackLock);
+#if IOTRACKING
+    if (kIOTracking & gIOKitDebug) return (OSMetaClass::trackedDelete(mem, size));
+#endif
 
-    return (array);
+    kfree(mem, size);
+    OSIVAR_ACCUMSIZE(-size);
 }
 
-struct OSObjectTracking
+bool OSObject::init()
 {
-    queue_chain_t link;
-    void *       bt[14];
-};
+#if IOTRACKING
+    if (kIOTracking & gIOKitDebug) getMetaClass()->trackedInstance(this);
+#endif
+    return true;
+}
 
-void *OSObject::operator new(size_t size)
+void OSObject::free()
 {
-    size_t tracking        = (gIOKitDebug & kOSTraceObjectAlloc) 
-                          ? sizeof(OSObjectTracking) : 0;
-    OSObjectTracking * mem = (OSObjectTracking *) kalloc(size + tracking);
-
-    assert(mem);
+    const OSMetaClass *meta = getMetaClass();
 
-    if (tracking)
+    if (meta)
     {
-       if ((((thread_t) 1) == gOSObjectTrackThread) || (current_thread() == gOSObjectTrackThread))
-       {
-           (void) OSBacktrace(&mem->bt[0], sizeof(mem->bt) / sizeof(mem->bt[0]));
-           lck_spin_lock(&gOSObjectTrackLock);
-           enqueue_tail(&gOSObjectTrackList, &mem->link);
-           lck_spin_unlock(&gOSObjectTrackLock);
-       }
-       else
-           mem->link.next = 0;
-       mem++;
+       meta->instanceDestructed();
+#if IOTRACKING
+       if (kIOTracking & gIOKitDebug) getMetaClass()->trackedFree(this);
+#endif
     }
-
-    bzero(mem, size);
-
-    ACCUMSIZE(size);
-
-    return (void *) mem;
+    delete this;
 }
 
-void OSObject::operator delete(void *_mem, size_t size)
+#if IOTRACKING
+void OSObject::trackingAccumSize(size_t size)
 {
-    size_t             tracking = (gIOKitDebug & kOSTraceObjectAlloc)
-                               ? sizeof(OSObjectTracking) : 0;
-    OSObjectTracking * mem      = (OSObjectTracking *) _mem;
-
-    if (!mem)
-       return;
+    if (kIOTracking & gIOKitDebug) getMetaClass()->trackedAccumSize(this, size);
+}
+#endif
 
-    if (tracking)
-    {
-       mem--;
-       if (mem->link.next)
-       {
-           lck_spin_lock(&gOSObjectTrackLock);
-           remque(&mem->link);
-           lck_spin_unlock(&gOSObjectTrackLock);
-       }
-    }
+/* Class member functions - Can't use defaults */
+/* During constructor vtable is always OSObject's - can't call any subclass */
 
-    kfree(mem, size + tracking);
+OSObject::OSObject()
+{
+    retainCount = 1;
+//    if (kIOTracking & gIOKitDebug) getMetaClass()->trackedInstance(this);
+}
 
-    ACCUMSIZE(-size);
+OSObject::OSObject(const OSMetaClass *)
+{
+    retainCount = 1;
+//    if (kIOTracking & gIOKitDebug) getMetaClass()->trackedInstance(this);
 }
index 1ba5e04ddbe8a7774510adf0c963d7f87801d762..cd9e4477aa413b2d07c2a467b408196e1382ed5f 100644 (file)
@@ -42,14 +42,6 @@ OSMetaClassDefineReservedUnused(OSOrderedSet, 5);
 OSMetaClassDefineReservedUnused(OSOrderedSet, 6);
 OSMetaClassDefineReservedUnused(OSOrderedSet, 7);
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
 
 struct _Element {
     const OSMetaClassBase *            obj;
@@ -72,7 +64,7 @@ initWithCapacity(unsigned int inCapacity,
         return false;
 
     size = sizeof(_Element) * inCapacity;
-    array = (_Element *) kalloc(size);
+    array = (_Element *) kalloc_container(size);
     if (!array)
         return false;
 
@@ -83,9 +75,9 @@ initWithCapacity(unsigned int inCapacity,
     orderingRef = inOrderingRef;
 
     bzero(array, size);
-    ACCUMSIZE(size);
+    OSCONTAINER_ACCUMSIZE(size);
 
-    return this;       
+    return true;       
 }
 
 OSOrderedSet * OSOrderedSet::
@@ -109,7 +101,7 @@ void OSOrderedSet::free()
 
     if (array) {
         kfree(array, sizeof(_Element) * capacity);
-        ACCUMSIZE( -(sizeof(_Element) * capacity) );
+        OSCONTAINER_ACCUMSIZE( -(sizeof(_Element) * capacity) );
     }
 
     super::free();
@@ -142,11 +134,11 @@ unsigned int OSOrderedSet::ensureCapacity(unsigned int newCapacity)
     }
     newSize = sizeof(_Element) * finalCapacity;
 
-    newArray = (_Element *) kalloc(newSize);
+    newArray = (_Element *) kalloc_container(newSize);
     if (newArray) {
         oldSize = sizeof(_Element) * capacity;
 
-        ACCUMSIZE(newSize - oldSize);
+        OSCONTAINER_ACCUMSIZE(((size_t)newSize) - ((size_t)oldSize));
 
         bcopy(array, newArray, oldSize);
         bzero(&newArray[capacity], newSize - oldSize);
index d16fa34cebcb37dce59dbe242bcd66536ea2a52b..d8841a9eeec3450b530c21601e432b64479f2f35 100644 (file)
@@ -33,6 +33,7 @@
 #include <libkern/c++/OSKext.h>
 #include <libkern/c++/OSLib.h>
 #include <libkern/c++/OSSymbol.h>
+#include <IOKit/IOKitDebug.h>
 
 #include <sys/cdefs.h>
 
@@ -73,7 +74,6 @@ static bool gKernelCPPInitialized = false;
         }                                                     \
     } while (0)
 
-
 #if PRAGMA_MARK
 #pragma mark kern_os Allocator Package
 #endif /* PRAGMA_MARK */
@@ -104,7 +104,7 @@ kern_os_malloc(size_t size)
         return (0);
     }
 
-    mem = (struct _mhead *)kalloc(memsize);
+    mem = (struct _mhead *)kalloc_tag_bt(memsize, VM_KERN_MEMORY_LIBKERN);
     if (!mem) {
         return (0);
     }
@@ -172,7 +172,7 @@ kern_os_realloc(
     }
 
     nmemsize = sizeof (*nmem) + nsize ;
-    nmem = (struct _mhead *) kalloc(nmemsize);
+    nmem = (struct _mhead *) kalloc_tag_bt(nmemsize, VM_KERN_MEMORY_LIBKERN);
     if (!nmem){
         kern_os_free(addr);
         return (0);
@@ -412,7 +412,7 @@ OSRuntimeInitializeCPP(
     kernel_segment_command_t * segment         = NULL;  // do not free
     kernel_segment_command_t * failure_segment = NULL;  // do not free
 
-    if (!kmodInfo || !kmodInfo->address || !kmodInfo->name) {
+    if (!kmodInfo || !kmodInfo->address) {
         result = kOSKextReturnInvalidArgument;
         goto finish;
     }
@@ -537,14 +537,11 @@ finish:
 
 /*********************************************************************
 *********************************************************************/
-extern lck_spin_t  gOSObjectTrackLock;
 extern lck_grp_t * IOLockGroup;
 extern kmod_info_t g_kernel_kmod_info;
 
 void OSlibkernInit(void)
 {
-    lck_spin_init(&gOSObjectTrackLock, IOLockGroup, LCK_ATTR_NULL);
     // This must be called before calling OSRuntimeInitializeCPP.
     OSMetaClassBase::initialize();
     
@@ -568,6 +565,9 @@ __END_DECLS
 *********************************************************************/
 void *
 operator new(size_t size)
+#if __cplusplus >= 201103L
+                                                               noexcept
+#endif
 {
     void * result;
 
@@ -577,6 +577,9 @@ operator new(size_t size)
 
 void
 operator delete(void * addr)
+#if __cplusplus >= 201103L
+                                                               noexcept
+#endif
 {
     kern_os_free(addr);
     return;
@@ -584,6 +587,9 @@ operator delete(void * addr)
 
 void *
 operator new[](unsigned long sz)
+#if __cplusplus >= 201103L
+                                                               noexcept
+#endif
 {
     if (sz == 0) sz = 1;
     return kern_os_malloc(sz);
@@ -591,6 +597,9 @@ operator new[](unsigned long sz)
 
 void
 operator delete[](void * ptr)
+#if __cplusplus >= 201103L
+                                                               noexcept
+#endif
 {
     if (ptr) {
         kern_os_free(ptr);
index 909bc0a4cec7dd74096b5e49ee36a9c5fc6d54aa..38696bc24beaf1e0aa7d94dec099d2002fc45167 100644 (file)
@@ -37,6 +37,7 @@ __END_DECLS
 #include <libkern/c++/OSLib.h>
 #include <libkern/c++/OSDictionary.h>
 #include <libkern/OSSerializeBinary.h>
+#include <IOKit/IOLib.h>
 
 #define super OSObject
 
@@ -50,14 +51,6 @@ OSMetaClassDefineReservedUnused(OSSerialize, 5);
 OSMetaClassDefineReservedUnused(OSSerialize, 6);
 OSMetaClassDefineReservedUnused(OSSerialize, 7);
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
 
 char * OSSerialize::text() const
 {
@@ -184,7 +177,7 @@ bool OSSerialize::initWithCapacity(unsigned int inCapacity)
     // allocate from the kernel map so that we can safely map this data
     // into user space (the primary use of the OSSerialize object)
     
-    kern_return_t rc = kmem_alloc(kernel_map, (vm_offset_t *)&data, capacity);
+    kern_return_t rc = kmem_alloc(kernel_map, (vm_offset_t *)&data, capacity, IOMemoryTag(kernel_map));
     if (rc) {
         tags->release();
         tags = 0;
@@ -193,7 +186,7 @@ bool OSSerialize::initWithCapacity(unsigned int inCapacity)
     bzero((void *)data, capacity);
 
 
-    ACCUMSIZE(capacity);
+    OSCONTAINER_ACCUMSIZE(capacity);
 
     return true;
 }
@@ -233,13 +226,14 @@ unsigned int OSSerialize::ensureCapacity(unsigned int newCapacity)
                                        (vm_offset_t)data,
                                        capacity,
                                        (vm_offset_t *)&newData,
-                                       newCapacity);
+                                       newCapacity,
+                                       VM_KERN_MEMORY_IOKIT);
        if (!rc) {
-           ACCUMSIZE(newCapacity);
+           OSCONTAINER_ACCUMSIZE(newCapacity);
 
            // kmem realloc does not free the old address range
            kmem_free(kernel_map, (vm_offset_t)data, capacity); 
-           ACCUMSIZE(-capacity);
+           OSCONTAINER_ACCUMSIZE(-((size_t)capacity));
            
            // kmem realloc does not zero out the new memory
            // and this could end up going to user land
@@ -259,7 +253,7 @@ void OSSerialize::free()
 
     if (data) {
        kmem_free(kernel_map, (vm_offset_t)data, capacity); 
-        ACCUMSIZE( -capacity );
+        OSCONTAINER_ACCUMSIZE( -((size_t)capacity) );
     }
     super::free();
 }
index e939f0558af88009781ac27da4207cf589098f4c..accbbf224d3117f52a478ae2c199a2c95cf6b725 100644 (file)
@@ -241,7 +241,7 @@ bool OSSerialize::binarySerialize(const OSMetaClassBase *o)
        if (idx >= v##Capacity)                                                                                                         \
        {                                                                                                                                                       \
                uint32_t ncap = v##Capacity + 64;                                                                               \
-               typeof(v##Array) nbuf = (typeof(v##Array)) kalloc(ncap * sizeof(o));    \
+               typeof(v##Array) nbuf = (typeof(v##Array)) kalloc_container(ncap * sizeof(o));  \
                if (!nbuf) ok = false;                                                                                                  \
                if (v##Array)                                                                                                                   \
                {                                                                                                                                               \
@@ -276,6 +276,7 @@ OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorStrin
     OSSet        * newSet;
     OSObject     * o;
     OSSymbol     * sym;
+    OSString     * str;
 
     size_t           bufferPos;
     const uint32_t * next;
@@ -403,6 +404,12 @@ OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorStrin
                        else 
                        {
                                sym = OSDynamicCast(OSSymbol, o);
+                               if (!sym && (str = OSDynamicCast(OSString, o)))
+                               {
+                                   sym = (OSSymbol *) OSSymbol::withString(str);
+                                   o->release();
+                                   o = 0;
+                               }
                                ok = (sym != 0);
                        }
                }
index 65032f5eb3368092156deb8b29d3117f189b9322..2bd875ee6b5cbd01d247170ca02a144fe02ad1b6 100644 (file)
@@ -56,15 +56,6 @@ OSMetaClassDefineReservedUnused(OSString, 13);
 OSMetaClassDefineReservedUnused(OSString, 14);
 OSMetaClassDefineReservedUnused(OSString, 15);
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
-
 bool OSString::initWithString(const OSString *aString)
 {
     return initWithCString(aString->string);
@@ -72,35 +63,54 @@ bool OSString::initWithString(const OSString *aString)
 
 bool OSString::initWithCString(const char *cString)
 {
-    if (!cString || !super::init())
-        return false;
+    unsigned int   newLength;
+    char         * newString;
 
-    length = strlen(cString) + 1;
-    string = (char *) kalloc(length);
-    if (!string)
-        return false;
+    if (!cString || !super::init()) return false;
+
+    newLength = strlen(cString) + 1;
+    newString = (char *) kalloc_container(newLength);
+    if (!newString) return false;
 
-    bcopy(cString, string, length);
+    bcopy(cString, newString, newLength);
 
-    ACCUMSIZE(length);
+    if ( !(flags & kOSStringNoCopy) && string) {
+        kfree(string, (vm_size_t)length);
+        OSCONTAINER_ACCUMSIZE(-((size_t)length));
+    }
+    string = newString;
+    length = newLength;
+    flags &= ~kOSStringNoCopy;
+
+    OSCONTAINER_ACCUMSIZE(length);
 
     return true;
 }
 
 bool OSString::initWithStringOfLength(const char *cString, size_t inlength)
 {
-    if (!cString || !super::init())
-        return false;
+    unsigned int   newLength;
+    char         * newString;
 
-    length = inlength + 1;
-    string = (char *) kalloc(length);
-    if (!string)
-        return false;
+    if (!cString || !super::init()) return false;
+
+    newLength = inlength + 1;
+    newString = (char *) kalloc_container(newLength);
+    if (!newString) return false;
+
+    bcopy(cString, newString, inlength);
+    newString[inlength] = 0;
+
+    if ( !(flags & kOSStringNoCopy) && string) {
+        kfree(string, (vm_size_t)length);
+        OSCONTAINER_ACCUMSIZE(-((size_t)length));
+    }
 
-    bcopy(cString, string, inlength);
-    string[inlength] = 0;
+    string = newString;
+    length = newLength;
+    flags &= ~kOSStringNoCopy;
 
-    ACCUMSIZE(length);
+    OSCONTAINER_ACCUMSIZE(length);
 
     return true;
 }
@@ -197,7 +207,7 @@ void OSString::free()
 {
     if ( !(flags & kOSStringNoCopy) && string) {
         kfree(string, (vm_size_t)length);
-        ACCUMSIZE(-length);
+        OSCONTAINER_ACCUMSIZE(-((size_t)length));
     }
 
     super::free();
index b2f5f94bd283a7b3c19a85de63b225b9d8fb8e3c..7b3f214082e3eb4286446adb00e7982a06c0ec2e 100644 (file)
 
 typedef struct { unsigned int i, j; } OSSymbolPoolState;
 
-#if OSALLOCDEBUG
-extern "C" {
-    extern int debug_container_malloc_size;
-};
-#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0)
-#else
-#define ACCUMSIZE(s)
-#endif
-
 #define INITIAL_POOL_SIZE  (exp2ml(1 + log2(kInitBucketCount)))
 
 #define GROW_FACTOR   (1)
@@ -126,8 +117,8 @@ public:
 
 void * OSSymbolPool::operator new(size_t size)
 {
-    void *mem = (void *)kalloc(size);
-    ACCUMSIZE(size);
+    void *mem = (void *)kalloc_tag(size, VM_KERN_MEMORY_LIBKERN);
+    OSMETA_ACCUMSIZE(size);
     assert(mem);
     bzero(mem, size);
 
@@ -137,7 +128,7 @@ void * OSSymbolPool::operator new(size_t size)
 void OSSymbolPool::operator delete(void *mem, size_t size)
 {
     kfree(mem, size);
-    ACCUMSIZE(-size);
+    OSMETA_ACCUMSIZE(-size);
 }
 
 extern lck_grp_t *IOLockGroup;
@@ -146,8 +137,8 @@ bool OSSymbolPool::init()
 {
     count = 0;
     nBuckets = INITIAL_POOL_SIZE;
-    buckets = (Bucket *) kalloc(nBuckets * sizeof(Bucket));
-    ACCUMSIZE(nBuckets * sizeof(Bucket));
+    buckets = (Bucket *) kalloc_tag(nBuckets * sizeof(Bucket), VM_KERN_MEMORY_LIBKERN);
+    OSMETA_ACCUMSIZE(nBuckets * sizeof(Bucket));
     if (!buckets)
         return false;
 
@@ -174,11 +165,11 @@ OSSymbolPool::~OSSymbolPool()
         for (thisBucket = &buckets[0]; thisBucket < &buckets[nBuckets]; thisBucket++) {
             if (thisBucket->count > 1) {
                 kfree(thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *));
-                ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *)));
+                OSMETA_ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *)));
             }
         }
         kfree(buckets, nBuckets * sizeof(Bucket));
-        ACCUMSIZE(-(nBuckets * sizeof(Bucket)));
+        OSMETA_ACCUMSIZE(-(nBuckets * sizeof(Bucket)));
     }
 
     if (poolGate)
@@ -253,8 +244,8 @@ void OSSymbolPool::reconstructSymbols(bool grow)
 
     count = 0;
     nBuckets = new_nBuckets;
-    buckets = (Bucket *) kalloc(nBuckets * sizeof(Bucket));
-    ACCUMSIZE(nBuckets * sizeof(Bucket));
+    buckets = (Bucket *) kalloc_tag(nBuckets * sizeof(Bucket), VM_KERN_MEMORY_LIBKERN);
+    OSMETA_ACCUMSIZE(nBuckets * sizeof(Bucket));
     /* @@@ gvdl: Zero test and panic if can't set up pool */
     bzero(buckets, nBuckets * sizeof(Bucket));
 
@@ -320,8 +311,8 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym)
         &&  strncmp(probeSymbol->string, cString, probeSymbol->length) == 0)
             return probeSymbol;
 
-        list = (OSSymbol **) kalloc(2 * sizeof(OSSymbol *));
-        ACCUMSIZE(2 * sizeof(OSSymbol *));
+        list = (OSSymbol **) kalloc_tag(2 * sizeof(OSSymbol *), VM_KERN_MEMORY_LIBKERN);
+        OSMETA_ACCUMSIZE(2 * sizeof(OSSymbol *));
         /* @@@ gvdl: Zero test and panic if can't set up pool */
         list[0] = sym;
         list[1] = probeSymbol;
@@ -342,13 +333,13 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym)
 
     j = thisBucket->count++;
     count++;
-    list = (OSSymbol **) kalloc(thisBucket->count * sizeof(OSSymbol *));
-    ACCUMSIZE(thisBucket->count * sizeof(OSSymbol *));
+    list = (OSSymbol **) kalloc_tag(thisBucket->count * sizeof(OSSymbol *), VM_KERN_MEMORY_LIBKERN);
+    OSMETA_ACCUMSIZE(thisBucket->count * sizeof(OSSymbol *));
     /* @@@ gvdl: Zero test and panic if can't set up pool */
     list[0] = sym;
     bcopy(thisBucket->symbolP, list + 1, j * sizeof(OSSymbol *));
     kfree(thisBucket->symbolP, j * sizeof(OSSymbol *));
-    ACCUMSIZE(-(j * sizeof(OSSymbol *)));
+    OSMETA_ACCUMSIZE(-(j * sizeof(OSSymbol *)));
     thisBucket->symbolP = list;
     GROW_POOL();
 
@@ -392,7 +383,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
         if (probeSymbol == sym) {
             thisBucket->symbolP = (OSSymbol **) list[1];
             kfree(list, 2 * sizeof(OSSymbol *));
-           ACCUMSIZE(-(2 * sizeof(OSSymbol *)));
+           OSMETA_ACCUMSIZE(-(2 * sizeof(OSSymbol *)));
             count--;
             thisBucket->count--;
             SHRINK_POOL();
@@ -403,7 +394,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
         if (probeSymbol == sym) {
             thisBucket->symbolP = (OSSymbol **) list[0];
             kfree(list, 2 * sizeof(OSSymbol *));
-           ACCUMSIZE(-(2 * sizeof(OSSymbol *)));
+           OSMETA_ACCUMSIZE(-(2 * sizeof(OSSymbol *)));
             count--;
             thisBucket->count--;
             SHRINK_POOL();
@@ -419,8 +410,8 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
         if (probeSymbol == sym) {
 
             list = (OSSymbol **)
-                kalloc((thisBucket->count-1) * sizeof(OSSymbol *));
-           ACCUMSIZE((thisBucket->count-1) * sizeof(OSSymbol *));
+                kalloc_tag((thisBucket->count-1) * sizeof(OSSymbol *), VM_KERN_MEMORY_LIBKERN);
+           OSMETA_ACCUMSIZE((thisBucket->count-1) * sizeof(OSSymbol *));
             if (thisBucket->count-1 != j)
                 bcopy(thisBucket->symbolP, list,
                       (thisBucket->count-1-j) * sizeof(OSSymbol *));
@@ -429,7 +420,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
                       list + thisBucket->count-1-j,
                       j * sizeof(OSSymbol *));
             kfree(thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *));
-           ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *)));
+           OSMETA_ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *)));
             thisBucket->symbolP = list;
             count--;
             thisBucket->count--;
@@ -555,12 +546,7 @@ void OSSymbol::checkForPageUnload(void *startAddr, void *endAddr)
     state = pool->initHashState();
     while ( (probeSymbol = pool->nextHashState(&state)) ) {
         if (probeSymbol->string >= startAddr && probeSymbol->string < endAddr) {
-            const char *oldString = probeSymbol->string;
-
-            probeSymbol->string = (char *) kalloc(probeSymbol->length);
-           ACCUMSIZE(probeSymbol->length);
-            bcopy(oldString, probeSymbol->string, probeSymbol->length);
-            probeSymbol->flags &= ~kOSStringNoCopy;
+           probeSymbol->OSString::initWithCString(probeSymbol->string);
         }
     }
     pool->openGate();
index 5885a434c6afa646660dcb2e161c2d35d8e94509..23bbd4f6a2b359afb306332c83e59a0a8ed37c37 100644 (file)
@@ -107,13 +107,13 @@ $(SOBJS): .SFLAGS
 $(COMPONENT).filelist: $(OBJS)
        $(_v)for hib_file in ${HIB_FILES}; \
        do      \
-               $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \
-               mv $${hib_file}__ $${hib_file} ; \
+               $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} || exit 1; \
+               mv $${hib_file}__ $${hib_file} || exit 1; \
        done
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 do_all: $(COMPONENT).filelist
 
index bdf3cb6e032591e854247dff41552e1e24a9a1e2..c91df14cb06362266294ab0d5bed0e8e11943fa4 100644 (file)
@@ -5,6 +5,7 @@ OPTIONS/kdebug                                          optional kdebug
 OPTIONS/gprof                                          optional gprof
 OPTIONS/config_dtrace                                  optional config_dtrace
 OPTIONS/hibernation                                    optional hibernation
+OPTIONS/iotracking                                     optional iotracking
 OPTIONS/networking                                     optional networking
 OPTIONS/crypto                                         optional crypto
 OPTIONS/zlib                                           optional zlib
index 51d4530fe5f036eb6c75c8c41971427dbf696d7b..8b137891791fe96927ad78e64b0aad7bded08bdc 100644 (file)
@@ -1,2 +1 @@
-libkern/x86_64/OSAtomic.s                      standard
 
index dc0d6f40e221b785b81ff17397fd54b3000c7e2a..ef33084cfefc48918548f3e89ee14ae8950a08df 100644 (file)
@@ -101,5 +101,6 @@ int xts_decrypt(const uint8_t *ct, unsigned long ptlen,
 
 void xts_done(symmetric_xts *xts __unused)
 {
-
+       cc_clear(sizeof(xts->enc), xts->enc);
+       cc_clear(sizeof(xts->dec), xts->dec);
 }
index e85479d3bd803906d1628a7a1de1143a359af914..3143c01618f58da8b36d99b8956c9d13fc7190d5 100644 (file)
@@ -83,7 +83,7 @@ void SHA384_Update(SHA384_CTX *ctx, const void *data, size_t len)
 void SHA384_Final(void *digest, SHA384_CTX *ctx)
 {
        const struct ccdigest_info *di;
-       di=g_crypto_funcs->ccsha512_di;
+       di=g_crypto_funcs->ccsha384_di;
 
        ccdigest_final(di, ctx->ctx, digest);
 }
index 25ff477ff7424472c86c105d81ffac637070fbaa..5affc1eefdc73334b3a4eb6dac6882104c5707ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -27,6 +27,8 @@
  */
 
 #include <libkern/OSAtomic.h>
+#include <kern/debug.h>
+#include <machine/atomic.h>
 
 enum {
        false   = 0,
@@ -37,28 +39,109 @@ enum {
 #define NULL ((void *)0)
 #endif
 
+#define ATOMIC_DEBUG DEBUG
+
+#if ATOMIC_DEBUG
+#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) panic("Unaligned atomic pointer %p\n",p);}while(0)
+#else
+#define ALIGN_TEST(p,t) do{}while(0)
+#endif
+
+// 19831745 - start of big hammer!
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+
 /*
  * atomic operations
- *     these are _the_ atomic operations, currently cast atop CompareAndSwap,
- *     which is implemented in assembler.      if we are worried about the cost of
- *     this layering (we shouldn't be), then all this stuff could be
- *     implemented in assembler, as it is in MacOS8/9
- *     (derived from SuperMario/NativeLibs/IO/DriverServices/Synchronization.s,
- *     which I wrote for NuKernel in a previous life with a different last name...)
- *
- * native Boolean      CompareAndSwap(UInt32 oldValue, UInt32 newValue, UInt32 * oldValuePtr);
- *
- * We've since implemented a few more of these -- OSAddAtomic, OSDequeueAtomic,
- * OSEnqueueAtomic etc -- in assembler, either for speed or correctness.  See also the
- * commpage atomic operations, and the platform specific versions.
- * Like standards, there are a lot of atomic ops to choose from!
+ *     These are _the_ atomic operations, now implemented via compiler built-ins.
+ *     It is expected that this C implementation is a candidate for Link-Time-
+ *     Optimization inlining, whereas the assembler implementations they replace
+ *     were not.
  */
 
-#if defined(__i386__) || defined(__x86_64__)
-/* Implemented in assembly for i386 and x86_64 */
+#undef OSCompareAndSwap8
+Boolean OSCompareAndSwap8(UInt8 oldValue, UInt8 newValue, volatile UInt8 *address)
+{
+       return __c11_atomic_compare_exchange_strong((_Atomic UInt8 *)address, &oldValue, newValue,
+                       memory_order_acq_rel_smp, memory_order_relaxed);
+}
+
+#undef OSCompareAndSwap16
+Boolean OSCompareAndSwap16(UInt16 oldValue, UInt16 newValue, volatile UInt16 *address)
+{
+       return __c11_atomic_compare_exchange_strong((_Atomic UInt16 *)address, &oldValue, newValue,
+                       memory_order_acq_rel_smp, memory_order_relaxed);
+}
+
+#undef OSCompareAndSwap
+Boolean OSCompareAndSwap(UInt32 oldValue, UInt32 newValue, volatile UInt32 *address)
+{
+       ALIGN_TEST(address, UInt32);
+       return __c11_atomic_compare_exchange_strong((_Atomic UInt32 *)address, &oldValue, newValue,
+                       memory_order_acq_rel_smp, memory_order_relaxed);
+}
+
+#undef OSCompareAndSwap64
+Boolean OSCompareAndSwap64(UInt64 oldValue, UInt64 newValue, volatile UInt64 *address)
+{
+       /*
+        * _Atomic uint64 requires 8-byte alignment on all architectures.
+        * This silences the compiler cast warning.  ALIGN_TEST() verifies
+        * that the cast was legal, if defined.
+        */
+       _Atomic UInt64 *aligned_addr = (_Atomic UInt64 *)(uintptr_t)address;
+
+       ALIGN_TEST(address, UInt64);
+       return __c11_atomic_compare_exchange_strong(aligned_addr, &oldValue, newValue,
+                       memory_order_acq_rel_smp, memory_order_relaxed);
+}
+
+#undef OSCompareAndSwapPtr
+Boolean OSCompareAndSwapPtr(void *oldValue, void *newValue, void * volatile *address)
+{
+#if __LP64__
+  return OSCompareAndSwap64((UInt64)oldValue, (UInt64)newValue, (volatile UInt64 *)address);
 #else
-#error Unsupported arch
+  return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue, (volatile UInt32 *)address);
 #endif
+}
+
+SInt8 OSAddAtomic8(SInt32 amount, volatile SInt8 *address)
+{
+       return __c11_atomic_fetch_add((_Atomic SInt8*)address, amount, memory_order_relaxed);
+}
+
+SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 *address)
+{
+       return __c11_atomic_fetch_add((_Atomic SInt16*)address, amount, memory_order_relaxed);
+}
+
+#undef OSAddAtomic
+SInt32 OSAddAtomic(SInt32 amount, volatile SInt32 *address)
+{
+       ALIGN_TEST(address, UInt32);
+       return __c11_atomic_fetch_add((_Atomic SInt32*)address, amount, memory_order_relaxed);
+}
+
+#undef OSAddAtomic64
+SInt64 OSAddAtomic64(SInt64 amount, volatile SInt64 *address)
+{
+       _Atomic SInt64* aligned_address = (_Atomic SInt64*)(uintptr_t)address;
+
+       ALIGN_TEST(address, SInt64);
+       return __c11_atomic_fetch_add(aligned_address, amount, memory_order_relaxed);
+}
+
+#undef OSAddAtomicLong
+long
+OSAddAtomicLong(long theAmount, volatile long *address)
+{
+#ifdef __LP64__
+       return (long)OSAddAtomic64((SInt64)theAmount, (SInt64*)address);
+#else
+       return (long)OSAddAtomic((SInt32)theAmount, address);
+#endif
+}
 
 #undef OSIncrementAtomic
 SInt32 OSIncrementAtomic(volatile SInt32 * value)
@@ -72,58 +155,24 @@ SInt32     OSDecrementAtomic(volatile SInt32 * value)
        return OSAddAtomic(-1, value);
 }
 
-static UInt32  OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt32 * value)
-{
-       UInt32  oldValue;
-       UInt32  newValue;
-       
-       do {
-               oldValue = *value;
-               newValue = ((oldValue & and_mask) | or_mask) ^ xor_mask;
-       } while (! OSCompareAndSwap(oldValue, newValue, value));
-       
-       return oldValue;
-}
-
 #undef OSBitAndAtomic
 UInt32 OSBitAndAtomic(UInt32 mask, volatile UInt32 * value)
 {
-       return OSBitwiseAtomic(mask, 0, 0, value);
+       return __c11_atomic_fetch_and((_Atomic UInt32*)value, mask, memory_order_relaxed);
 }
 
 #undef OSBitOrAtomic
 UInt32 OSBitOrAtomic(UInt32 mask, volatile UInt32 * value)
 {
-       return OSBitwiseAtomic((UInt32) -1, mask, 0, value);
+       return __c11_atomic_fetch_or((_Atomic UInt32*)value, mask, memory_order_relaxed);
 }
 
 #undef OSBitXorAtomic
 UInt32 OSBitXorAtomic(UInt32 mask, volatile UInt32 * value)
 {
-       return OSBitwiseAtomic((UInt32) -1, 0, mask, value);
+       return __c11_atomic_fetch_xor((_Atomic UInt32*)value, mask, memory_order_relaxed);
 }
 
-#if defined(__i386__) || defined(__x86_64__)
-static Boolean OSCompareAndSwap8(UInt8 oldValue8, UInt8 newValue8, volatile UInt8 * value8)
-{
-       UInt32                          mask            = 0x000000ff;
-       UInt32                          alignment       = (UInt32)((unsigned long) value8) & (sizeof(UInt32) - 1);
-       UInt32                          shiftValues = (24 << 24) | (16 << 16) | (8 << 8);
-       int                                     shift           = (UInt32) *(((UInt8 *) &shiftValues) + alignment);
-       volatile UInt32 *       value32         = (volatile UInt32 *) ((uintptr_t)value8 - alignment);
-       UInt32                          oldValue;
-       UInt32                          newValue;
-
-       mask <<= shift;
-
-       oldValue = *value32;
-       oldValue = (oldValue & ~mask) | (oldValue8 << shift);
-       newValue = (oldValue & ~mask) | (newValue8 << shift);
-
-       return OSCompareAndSwap(oldValue, newValue, value32);
-}
-#endif
-
 static Boolean OSTestAndSetClear(UInt32 bit, Boolean wantSet, volatile UInt8 * startAddress)
 {
        UInt8           mask = 1;
@@ -139,7 +188,8 @@ static Boolean      OSTestAndSetClear(UInt32 bit, Boolean wantSet, volatile UInt8 * s
                if ((oldValue & mask) == wantValue) {
                        break;
                }
-       } while (! OSCompareAndSwap8(oldValue, (oldValue & ~mask) | wantValue, startAddress));
+       } while (! __c11_atomic_compare_exchange_strong((_Atomic UInt8 *)startAddress,
+               &oldValue, (oldValue & ~mask) | wantValue, memory_order_relaxed, memory_order_relaxed));
        
        return (oldValue & mask) == wantValue;
 }
@@ -168,70 +218,21 @@ SInt8     OSDecrementAtomic8(volatile SInt8 * value)
        return OSAddAtomic8(-1, value);
 }
 
-#if defined(__i386__) || defined(__x86_64__)
-SInt8  OSAddAtomic8(SInt32 amount, volatile SInt8 * value)
-{
-       SInt8   oldValue;
-       SInt8   newValue;
-       
-       do {
-               oldValue = *value;
-               newValue = oldValue + amount;
-       } while (! OSCompareAndSwap8((UInt8) oldValue, (UInt8) newValue, (volatile UInt8 *) value));
-       
-       return oldValue;
-}
-#endif
-
-static UInt8   OSBitwiseAtomic8(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt8 * value)
-{
-       UInt8   oldValue;
-       UInt8   newValue;
-       
-       do {
-               oldValue = *value;
-               newValue = ((oldValue & and_mask) | or_mask) ^ xor_mask;
-       } while (! OSCompareAndSwap8(oldValue, newValue, value));
-       
-       return oldValue;
-}
-
 UInt8  OSBitAndAtomic8(UInt32 mask, volatile UInt8 * value)
 {
-       return OSBitwiseAtomic8(mask, 0, 0, value);
+       return __c11_atomic_fetch_and((_Atomic UInt8 *)value, mask, memory_order_relaxed);
 }
 
 UInt8  OSBitOrAtomic8(UInt32 mask, volatile UInt8 * value)
 {
-       return OSBitwiseAtomic8((UInt32) -1, mask, 0, value);
+       return __c11_atomic_fetch_or((_Atomic UInt8 *)value, mask, memory_order_relaxed);
 }
 
 UInt8  OSBitXorAtomic8(UInt32 mask, volatile UInt8 * value)
 {
-       return OSBitwiseAtomic8((UInt32) -1, 0, mask, value);
+       return __c11_atomic_fetch_xor((_Atomic UInt8 *)value, mask, memory_order_relaxed);
 }
 
-#if defined(__i386__) || defined(__x86_64__)
-static Boolean OSCompareAndSwap16(UInt16 oldValue16, UInt16 newValue16, volatile UInt16 * value16)
-{
-       UInt32                          mask            = 0x0000ffff;
-       UInt32                          alignment       = (UInt32)((unsigned long) value16) & (sizeof(UInt32) - 1);
-       UInt32                          shiftValues = (16 << 24) | (16 << 16);
-       UInt32                          shift           = (UInt32) *(((UInt8 *) &shiftValues) + alignment);
-       volatile UInt32 *       value32         = (volatile UInt32 *) (((unsigned long) value16) - alignment);
-       UInt32                          oldValue;
-       UInt32                          newValue;
-
-       mask <<= shift;
-
-       oldValue = *value32;
-       oldValue = (oldValue & ~mask) | (oldValue16 << shift);
-       newValue = (oldValue & ~mask) | (newValue16 << shift);
-
-       return OSCompareAndSwap(oldValue, newValue, value32);
-}
-#endif
-
 SInt16 OSIncrementAtomic16(volatile SInt16 * value)
 {
        return OSAddAtomic16(1, value);
@@ -242,46 +243,21 @@ SInt16    OSDecrementAtomic16(volatile SInt16 * value)
        return OSAddAtomic16(-1, value);
 }
 
-#if defined(__i386__) || defined(__x86_64__)
-SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 * value)
-{
-       SInt16  oldValue;
-       SInt16  newValue;
-       
-       do {
-               oldValue = *value;
-               newValue = oldValue + amount;
-       } while (! OSCompareAndSwap16((UInt16) oldValue, (UInt16) newValue, (volatile UInt16 *) value));
-       
-       return oldValue;
-}
-#endif
-
-static UInt16  OSBitwiseAtomic16(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt16 * value)
-{
-       UInt16  oldValue;
-       UInt16  newValue;
-       
-       do {
-               oldValue = *value;
-               newValue = ((oldValue & and_mask) | or_mask) ^ xor_mask;
-       } while (! OSCompareAndSwap16(oldValue, newValue, value));
-       
-       return oldValue;
-}
-
 UInt16 OSBitAndAtomic16(UInt32 mask, volatile UInt16 * value)
 {
-       return OSBitwiseAtomic16(mask, 0, 0, value);
+       return __c11_atomic_fetch_and((_Atomic UInt16 *)value, mask, memory_order_relaxed);
 }
 
 UInt16 OSBitOrAtomic16(UInt32 mask, volatile UInt16 * value)
 {
-       return OSBitwiseAtomic16((UInt32) -1, mask, 0, value);
+       return __c11_atomic_fetch_or((_Atomic UInt16 *)value, mask, memory_order_relaxed);
 }
 
 UInt16 OSBitXorAtomic16(UInt32 mask, volatile UInt16 * value)
 {
-       return OSBitwiseAtomic16((UInt32) -1, 0, mask, value);
+       return __c11_atomic_fetch_xor((_Atomic UInt16 *)value, mask, memory_order_relaxed);
 }
 
+// 19831745 - end of big hammer!
+#pragma clang diagnostic pop
+
index 7cb84710872a392c6ba8d0d4091108e41b158583..305cfc3cb94d9cd3b224d91b727389bd6f327149 100644 (file)
@@ -49,6 +49,9 @@ __BEGIN_DECLS
 extern vm_offset_t min_valid_stack_address(void);
 extern vm_offset_t max_valid_stack_address(void);
 
+// From osfmk/kern/printf.c
+extern boolean_t doprnt_hide_pointers;
+
 // From osfmk/kmod.c
 extern void kmod_dump_log(vm_offset_t *addr, unsigned int cnt, boolean_t doUnslide);
 
@@ -106,12 +109,15 @@ OSReportWithBacktrace(const char *str, ...)
 
     lck_mtx_lock(sOSReportLock);
     {
+        boolean_t old_doprnt_hide_pointers = doprnt_hide_pointers;
+        doprnt_hide_pointers = FALSE;
         printf("%s\nBacktrace 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", buf, 
             (unsigned long) VM_KERNEL_UNSLIDE(bt[2]), (unsigned long) VM_KERNEL_UNSLIDE(bt[3]), 
             (unsigned long) VM_KERNEL_UNSLIDE(bt[4]), (unsigned long) VM_KERNEL_UNSLIDE(bt[5]), 
             (unsigned long) VM_KERNEL_UNSLIDE(bt[6]), (unsigned long) VM_KERNEL_UNSLIDE(bt[7]), 
             (unsigned long) VM_KERNEL_UNSLIDE(bt[8]));
         kmod_dump_log((vm_offset_t *) &bt[2], cnt - 2, TRUE);
+        doprnt_hide_pointers = old_doprnt_hide_pointers;
     }
     lck_mtx_unlock(sOSReportLock);
 }
@@ -166,6 +172,7 @@ OSPrintBacktrace(void)
 unsigned OSBacktrace(void **bt, unsigned maxAddrs)
 {
     unsigned frame;
+    if (!current_thread()) return 0;
 
 #if   __x86_64__
 #define SANE_x86_64_FRAME_SIZE (kernel_stack_size >> 1)
index 59f788571d3f597bc783837872c5efe11b97d52b..cc20497a10f4c8f899f4f300bb8589648e1a545f 100644 (file)
@@ -106,7 +106,7 @@ $(shell [ -d $(OBJROOT) ] || mkdir -p $(OBJROOT))
 $(OBJROOT)/%.o : $(OBJSRC)/%.c
        $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) $(OPTIM) $(INCLUDES) -c $< -o $@
 $(OBJROOT)/%.o : $(TESTSRC)/%.c
-       $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) -O0 -DDEBUG $(INCLUDES) -I $(SRCROOT) -c $< -o $@
+       $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) -O0 -DDEBUG $(INCLUDES) -I$(SRCROOT) -c $< -o $@
 
 SRCROOTESC=$(subst /,\/,$(SRCROOT))
 OBJROOTESC=$(subst /,\/,$(OBJROOT))
@@ -114,7 +114,7 @@ SEDOBJS=sed -E 's/(^[a-z_]+)\.o/$(OBJROOTESC)\/\1\.o $(OBJROOTESC)\/\1\.d/'
 SEDSRCS=sed -E 's/ ([a-z_]+\.[ch])/ $(SRCROOTESC)\/\1/g'
 $(OBJROOT)/%.d: $(OBJSRC)/%.c
        @set -e; rm -f $@; \
-       $(CC) $(INCLUDES) -MM $< | $(SEDOBJS) | $(SEDSRCS) > $@;
+       $(CC) $(CFLAGS) $(DEFINES) $(INCLUDES) -MM $< | $(SEDOBJS) | $(SEDSRCS) > $@;
 
 # Rules
 release: OPTIM=-Os -dynamic
@@ -194,8 +194,8 @@ $(TESTDST)/copyrighttest: $(COPYTESTOBJS)
        install -c -m 755 $(OBJROOT)/copyrighttest $@
 
 analyze:
-       @$(CLANG_ANALYZER) *.c
-       @$(CLANG_ANALYZER) -I. tests/*.c
+       @$(CLANG_ANALYZER) $(CFLAGS) $(INCLUDES) $(filter-out WKdm%.c,$(wildcard *.c))
+       @$(CLANG_ANALYZER) $(CFLAGS) $(INCLUDES) -I$(SRCROOT) tests/*.c
        @rm -f *.plist
 
 clean: 
index da3fbec7d23a92947ea5cf2e9ccbc3a6cb8f1b56..728774c371805130b73ca73fd3d06215b0e663b0 100644 (file)
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
+#if KERNEL
+#define __KXLD_KERNEL_UNUSED __unused
+#else
+#define __KXLD_KERNEL_UNUSED
+#endif
+
 #if !KERNEL
     #include "kxld.h"
     #include "kxld_types.h"
@@ -104,7 +110,8 @@ static void clear_context(KXLDContext *context);
 kern_return_t
 kxld_create_context(KXLDContext **_context, 
     KXLDAllocateCallback allocate_callback, KXLDLoggingCallback logging_callback,
-    KXLDFlags flags, cpu_type_t cputype, cpu_subtype_t cpusubtype)
+    KXLDFlags flags, cpu_type_t cputype, cpu_subtype_t cpusubtype,
+    vm_size_t pagesize __KXLD_KERNEL_UNUSED)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDContext       * context         = NULL;
@@ -127,6 +134,12 @@ kxld_create_context(KXLDContext **_context,
     context->cputype = cputype;
     context->cpusubtype = cpusubtype;
 
+#if !KERNEL
+    if (pagesize) {
+        kxld_set_cross_link_page_size(pagesize);
+    }
+#endif /* !KERNEL */
+
     kxld_set_logging_callback(logging_callback);
 
     context->kext = kxld_alloc(kxld_kext_sizeof());
@@ -467,7 +480,7 @@ allocate_kext(KXLDContext *context, void *callback_data,
 
     kxld_kext_get_vmsize(context->kext, &header_size, &vmsize);
     vmaddr = context->allocate_callback(vmsize, &flags, callback_data);
-    require_action(!(vmaddr & (PAGE_SIZE-1)), finish,
+    require_action(!(vmaddr & (kxld_get_effective_page_size()-1)), finish,
         kxld_log(kKxldLogLinking, kKxldLogErr,
             "Load address %p is not page-aligned.",
             (void *) (uintptr_t) vmaddr));
index c0bb5e276b117b53ae771f3290e60a8459c1ed4c..3b0dffe5e33529c1c13648be95b02c867720a550 100644 (file)
@@ -53,8 +53,6 @@ kxld_demangle(const char *str, char **buffer __unused, size_t *length __unused)
     char *demangled = NULL;
     int status;
 
-    if (!str) goto finish;
-
     rval = str;
 
     if (!buffer || !length) goto finish;
index 5c38abc8f940441018037b86423dec4dfacc3288..a5250ab49f9b11d4fe04332b5ad4f2029a4e6808 100644 (file)
@@ -47,6 +47,6 @@
  * 
  */
 const char * kxld_demangle(const char *str, char **buffer, size_t *length)
-    __attribute__((pure, nonnull, visibility("hidden")));
+    __attribute__((pure, nonnull(1), visibility("hidden")));
 
 #endif /* !_KXLD_DEMANGLE_H_ */
index d936c7853edc56855759106f515bcdc6745b779d..36383e41e03a729c98b8cbfde74dd6907346f7b7 100644 (file)
@@ -634,6 +634,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
             break;
         case LC_VERSION_MIN_MACOSX:
         case LC_VERSION_MIN_IPHONEOS:
+        case LC_VERSION_MIN_WATCHOS:
             versionmin_hdr = (struct version_min_command *) cmd_hdr;
             kxld_versionmin_init_from_macho(&object->versionmin, versionmin_hdr);
             break;
@@ -676,7 +677,7 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
         default:
             rval=KERN_FAILURE;
             kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                "Invalid segment type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd);
+                "Invalid load command type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd);
             goto finish;
         }
 
@@ -960,12 +961,13 @@ init_from_object(KXLDObject *object)
             break;
         case LC_VERSION_MIN_MACOSX:
         case LC_VERSION_MIN_IPHONEOS:
+        case LC_VERSION_MIN_WATCHOS:
         case LC_SOURCE_VERSION:
             /* Not supported for object files, fall through */
         default:
             rval = KERN_FAILURE;
             kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                "Invalid segment type in MH_OBJECT kext: %u.", cmd_hdr->cmd);
+                "Invalid load command type in MH_OBJECT kext: %u.", cmd_hdr->cmd);
             goto finish;
         }
     }
@@ -1141,7 +1143,7 @@ get_macho_data_size(const KXLDObject *object)
          */
         if ((symtab_size + reloc_size) > seg_vmsize) {
             u_long  overflow = (symtab_size + reloc_size) - seg_vmsize;
-            data_size += round_page(overflow);
+            data_size += kxld_round_page_cross_safe(overflow);
         }
     }
 #endif  // KXLD_PIC_KEXTS
@@ -1710,7 +1712,7 @@ kxld_object_get_vmsize(const KXLDObject *object, u_long *header_size,
     /* vmsize is the padded header page(s) + segment vmsizes */
 
     *header_size = (object->is_final_image) ?
-        0 : round_page(get_macho_header_size(object));
+        0 : (u_long)kxld_round_page_cross_safe(get_macho_header_size(object));
     *vmsize = *header_size + get_macho_data_size(object);
 
 }
@@ -1746,7 +1748,6 @@ kxld_object_export_linked_object(const KXLDObject *object,
     /* Calculate the size of the headers and data */
 
     header_size = get_macho_header_size(object);
-    data_offset = (object->is_final_image) ? header_size : round_page(header_size);
     size = object->output_buffer_size;
 
     /* Copy data to the file */
index 41e899eac27dad3d8667d730ad499bff90dc177d..bb93003a85c6631ce3d5a69d77ec0e50e28aad21 100644 (file)
@@ -659,7 +659,7 @@ get_pointer_at_addr_32(const KXLDRelocator *relocator,
     
     check(relocator);
 
-    addr = *(const uint32_t *) ((void *) (data + offset));
+    addr = *(const uint32_t *) ((const void *) (data + offset));
 #if !KERNEL
     if (relocator->swap) {
         addr = OSSwapInt32(addr);
@@ -681,7 +681,7 @@ get_pointer_at_addr_64(const KXLDRelocator *relocator,
     
     check(relocator);
 
-    addr = *(const uint64_t *) ((void *) (data + offset));
+    addr = *(const uint64_t *) ((const void *) (data + offset));
 #if !KERNEL
     if (relocator->swap) {
         addr = OSSwapInt64(addr);
index 4ea424356198ac738e66cb5d41896f5db29f198e..00ef81333ff5a2d18ccf58132737dcaf2cd414f0 100644 (file)
@@ -195,7 +195,7 @@ kxld_seg_finalize_object_segment(KXLDArray *segarray, KXLDArray *section_order,
 
     /* Set the initial link address at the end of the header pages */
 
-    seg->link_addr = round_page(hdrsize);
+    seg->link_addr = kxld_round_page_cross_safe(hdrsize);
 
     /* Fix up all of the section addresses */
 
@@ -209,7 +209,7 @@ kxld_seg_finalize_object_segment(KXLDArray *segarray, KXLDArray *section_order,
 
     /* Finish initializing the segment */
 
-    seg->vmsize = round_page(sect_offset) - seg->link_addr;
+    seg->vmsize = kxld_round_page_cross_safe(sect_offset) - seg->link_addr;
 
     rval = KERN_SUCCESS;
 finish:
@@ -420,7 +420,7 @@ kxld_seg_init_linkedit(KXLDArray *segs)
     le = kxld_array_get_item(segs, 1);
 
     strlcpy(le->segname, SEG_LINKEDIT, sizeof(le->segname));
-    le->link_addr = round_page(seg->link_addr + seg->vmsize);
+    le->link_addr = kxld_round_page_cross_safe(seg->link_addr + seg->vmsize);
     le->maxprot = VM_PROT_ALL;
     le->initprot = VM_PROT_DEFAULT;
 
@@ -511,7 +511,7 @@ kxld_seg_get_macho_data_size(const KXLDSeg *seg)
         size += kxld_sect_get_macho_data_size(sect);
     }
 
-    return round_page(size);
+    return kxld_round_page_cross_safe(size);
 }
 #endif
 
@@ -572,7 +572,7 @@ kxld_seg_export_macho_to_file_buffer(const KXLDSeg *seg, u_char *buf,
         hdr64->filesize = (uint64_t) (*data_offset - base_data_offset);
     }
 
-    *data_offset = round_page(*data_offset);
+    *data_offset = (u_long)kxld_round_page_cross_safe(*data_offset);
 
     rval = KERN_SUCCESS;
 
@@ -743,8 +743,15 @@ kxld_seg_finish_init(KXLDSeg *seg)
     KXLDSect *sect = NULL;
     kxld_addr_t maxaddr = 0;
     kxld_size_t maxsize = 0;
-    
-    if (seg->sects.nitems) {
+
+    /* If we already have a size for this segment (e.g. from the mach-o load
+     * command) then don't recalculate the segment size. This is safer since 
+     * when we recalculate we are making assumptions about page alignment and 
+     * padding that the kext mach-o file was built with. Better to trust the 
+     * macho-o info, if we have it. If we don't (i.e. vmsize == 0) then add up 
+     * the section sizes and take a best guess at page padding.
+     */
+    if ((seg->vmsize == 0) && (seg->sects.nitems)) {
         for (i = 0; i < seg->sects.nitems; ++i) {
             sect = get_sect_by_index(seg, i);
             require_action(sect, finish, rval=KERN_FAILURE);
@@ -754,11 +761,8 @@ kxld_seg_finish_init(KXLDSeg *seg)
             }
         }
 
-        /* XXX Cross architecture linking will fail if the page size ever differs
-         * from 4096.  (As of this writing, we're fine on i386, x86_64, arm, and
-         * arm64.)
-         */
-        seg->vmsize = round_page(maxaddr + maxsize - seg->base_addr);
+        seg->vmsize = kxld_round_page_cross_safe(maxaddr + 
+                                                 maxsize - seg->base_addr);
     }
 
     rval = KERN_SUCCESS;
@@ -772,14 +776,8 @@ finish:
 void
 kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections)
 {
-    /* This is unnecessary except to make the clang analyzer happy.  When
-     * the analyzer no longer ignores nonnull attributes for if statements,
-     * we can remove this line.
-     */
-    if (!seg) return;
-
     if (strict_protections) {
-        if (streq_safe(seg->segname, SEG_TEXT, const_strlen(SEG_TEXT))) {
+        if (!strncmp(seg->segname, SEG_TEXT, const_strlen(SEG_TEXT))) {
             seg->initprot = TEXT_SEG_PROT;
             seg->maxprot = TEXT_SEG_PROT;
         } else {
@@ -828,6 +826,6 @@ kxld_seg_populate_linkedit(KXLDSeg *seg, const KXLDSymtab *symtab, boolean_t is_
     }
 #endif /* KXLD_PIC_KEXTS */
 
-    seg->vmsize = round_page(size);
+    seg->vmsize = kxld_round_page_cross_safe(size);
 }
 
index 2b10ce68722002832eb0c0265132943641b09fea..31fcf7b5177d42c9cb6756f803ad3cc86b34e428 100644 (file)
@@ -44,7 +44,7 @@ kxld_create_context(KXLDContext **_context __unused,
     KXLDAllocateCallback allocate_callback __unused,
     KXLDLoggingCallback logging_callback __unused,
     KXLDFlags flags __unused, cpu_type_t cputype __unused,
-    cpu_subtype_t cpusubtype __unused)
+    cpu_subtype_t cpusubtype __unused, vm_size_t pagesize __unused)
 {
     return KERN_SUCCESS;
 }
index d82cd5cce9a884ecd6aad9ab82545e54c2612ccb..252d39e3b4e8f4c2842fb755cc0333da8b2ff333 100644 (file)
@@ -261,7 +261,7 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc)
         }
 
         /* Set the C++-specific fields */
-        if ((streq_safe(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX)))) {
+        if (!strncmp(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX))) {
             sym->is_cxx = 1;
 
             if (streq_safe(sym->name, METACLASS_VTABLE_PREFIX, 
index 67d838fe8f1ba5466a3b440c0322609a3f406927..af9f16e4e46b1aa1a8ed58e8ea9772c6e6d5cd83 100644 (file)
@@ -65,6 +65,12 @@ static KXLDLoggingCallback s_logging_callback = NULL;
 static const char *s_callback_name = NULL;
 static void *s_callback_data = NULL;
 
+#if !KERNEL
+static boolean_t s_cross_link_enabled  = FALSE;
+static kxld_size_t s_cross_link_page_size = PAGE_SIZE;
+#endif
+
+
 /*******************************************************************************
 *******************************************************************************/
 void 
@@ -165,7 +171,7 @@ kxld_page_alloc_untracked(size_t size)
     if (size < KALLOC_MAX) {
         ptr = kalloc(size);
     } else {
-        rval = kmem_alloc(kernel_map, &addr, size);
+        rval = kmem_alloc(kernel_map, &addr, size, VM_KERN_MEMORY_OSKEXT);
         if (!rval) ptr = (void *) addr;
     }
 #else /* !KERNEL */
@@ -204,7 +210,7 @@ kxld_alloc_pageable(size_t size)
     kern_return_t rval = 0;
     vm_offset_t ptr = 0;
 
-    rval = kmem_alloc_pageable(kernel_map, &ptr, size);
+    rval = kmem_alloc_pageable(kernel_map, &ptr, size, VM_KERN_MEMORY_OSKEXT);
     if (rval) ptr = 0;
 
     return (void *) ptr;
@@ -804,3 +810,53 @@ kxld_print_memory_report(void)
 #endif
 }
 
+/*********************************************************************
+*********************************************************************/
+#if !KERNEL
+boolean_t kxld_set_cross_link_page_size(kxld_size_t target_page_size)
+{
+    // verify radix 2
+    if ((target_page_size != 0) && 
+        ((target_page_size & (target_page_size - 1)) == 0)) {
+
+        s_cross_link_enabled = TRUE;
+        s_cross_link_page_size = target_page_size;
+
+        return TRUE;   
+    } else {
+        return FALSE;
+    }
+}
+#endif /* !KERNEL */
+
+/*********************************************************************
+*********************************************************************/
+kxld_size_t kxld_get_effective_page_size(void)
+{
+#if KERNEL
+    return PAGE_SIZE;
+#else
+    if (s_cross_link_enabled) {
+        return s_cross_link_page_size;
+    } else {
+        return PAGE_SIZE;
+    }
+#endif /* KERNEL */
+}
+
+/*********************************************************************
+*********************************************************************/
+kxld_addr_t kxld_round_page_cross_safe(kxld_addr_t offset)
+{
+#if KERNEL
+    return round_page(offset);
+#else
+    // assume s_cross_link_page_size is power of 2
+    if (s_cross_link_enabled) {
+        return (offset + (s_cross_link_page_size - 1)) & 
+               (~(s_cross_link_page_size - 1));
+    } else {
+        return round_page(offset);
+    }
+#endif /* KERNEL */
+}
index 0eb0f2f7a36987bf8273ede84a252a9157d732cb..f20bc18e2f15f02714b2739033709b7c4053fc84 100644 (file)
@@ -205,4 +205,14 @@ const char * kxld_strstr(const char *s, const char *find)
 void kxld_print_memory_report(void) 
     __attribute__((visibility("hidden")));
 
+/*******************************************************************************
+* Cross Linking
+*******************************************************************************/
+#if !KERNEL
+boolean_t kxld_set_cross_link_page_size(kxld_size_t target_page_size);
+#endif /* !KERNEL */
+kxld_size_t kxld_get_effective_page_size(void);
+kxld_addr_t kxld_round_page_cross_safe(kxld_addr_t addr);
+
+
 #endif /* _KXLD_UTIL_H_ */
index 9b4753c4b6a72f836532fe7c30422db62969ee7f..e422495e5844441790edf91ae01b987511b0ec8e 100644 (file)
@@ -42,7 +42,7 @@ kxld_versionmin_init_from_macho(KXLDversionmin *versionmin, struct version_min_c
 {
     check(versionmin);
     check(src);
-    check((src->cmd == LC_VERSION_MIN_MACOSX) || (src->cmd == LC_VERSION_MIN_IPHONEOS));
+    check((src->cmd == LC_VERSION_MIN_MACOSX) || (src->cmd == LC_VERSION_MIN_IPHONEOS) || (src->cmd == LC_VERSION_MIN_WATCHOS));
 
     switch (src->cmd) {
         case LC_VERSION_MIN_MACOSX:
@@ -51,6 +51,9 @@ kxld_versionmin_init_from_macho(KXLDversionmin *versionmin, struct version_min_c
         case LC_VERSION_MIN_IPHONEOS:
             versionmin->platform = kKxldVersionMiniPhoneOS;
             break;
+        case LC_VERSION_MIN_WATCHOS:
+            versionmin->platform = kKxldVersionMinWatchOS;
+            break;
     }
 
     versionmin->version = src->version;
@@ -99,6 +102,9 @@ kxld_versionmin_export_macho(const KXLDversionmin *versionmin, u_char *buf,
         case kKxldVersionMiniPhoneOS:
             versionminhdr->cmd = LC_VERSION_MIN_IPHONEOS;
             break;
+        case kKxldVersionMinWatchOS:
+            versionminhdr->cmd = LC_VERSION_MIN_WATCHOS;
+            break;
     }
     versionminhdr->cmdsize = (uint32_t) sizeof(*versionminhdr);
     versionminhdr->version = versionmin->version;
index 3ebcac665ffa9d054b71b6da30ead5f7b0e8943c..d4ce76b219fcd0a4f2545983e1602b263c5a32c6 100644 (file)
@@ -40,7 +40,8 @@ typedef struct kxld_versionmin KXLDversionmin;
 
 enum kxld_versionmin_platforms {
     kKxldVersionMinMacOSX,
-    kKxldVersionMiniPhoneOS
+    kKxldVersionMiniPhoneOS,
+    kKxldVersionMinWatchOS
 };
 
 struct kxld_versionmin {
index 7e545d32869293a90f3492aa3c1c144a8be2aba0..b99feebf8c8913372eceb1fe004140431eaea4a9 100644 (file)
@@ -112,7 +112,7 @@ convert_cfstring(CFStringRef the_string)
 
     result = converted_string;
 finish:
-    CFRelease(the_data);
+    if (the_data) CFRelease(the_data);
     return result;
 }
 
index 8b2f0606a7e5230d0511c71b3daf4e0d7bb5c5ec..ee046b925b52855b57b22ab4933ce25d8d29bc72 100644 (file)
@@ -14,61 +14,62 @@ INSTINC_SUBDIRS_X86_64 = \
         i386
 INSTINC_SUBDIRS_X86_64H = \
         i386
+INSTINC_SUBDIRS_ARM = \
+        arm
+INSTINC_SUBDIRS_ARM64 = \
+        arm
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_X86_64H = ${INSTINC_SUBDIRS_X86_64H}
+EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
+EXPINC_SUBDIRS_ARM64 = ${INSTINC_SUBDIRS_ARM64}
 
 DATAFILES = \
-       OSAtomic.h      \
-        OSBase.h       \
-        OSByteOrder.h  \
-        _OSByteOrder.h \
-        OSDebug.h      \
-       OSKextLib.h     \
-        OSMalloc.h     \
-        OSReturn.h     \
-        OSTypes.h      \
-       locks.h         \
-       sysctl.h        \
-       tree.h          \
-       zconf.h         \
+       OSByteOrder.h \
+       _OSByteOrder.h \
+       OSDebug.h \
+       OSKextLib.h \
+       OSReturn.h \
+       OSTypes.h
+
+KERNELFILES = \
+       ${DATAFILES} \
+       OSAtomic.h \
+       OSBase.h \
+       OSMalloc.h \
+       locks.h \
+       sysctl.h \
+       tree.h \
+       zconf.h \
        zlib.h
 
-PRIVATE_DATAFILES =         \
-       OSKextLibPrivate.h  \
+PRIVATE_KERNELFILES = \
+       OSKextLibPrivate.h \
+       OSSerializeBinary.h \
        kext_request_keys.h \
-       mkext.h             \
-       prelink.h               \
-    OSSerializeBinary.h
-
-INSTALL_MI_LIST        =       \
-       OSByteOrder.h   \
-       _OSByteOrder.h  \
-       OSDebug.h       \
-       OSKextLib.h     \
-       OSReturn.h      \
-       OSTypes.h
+       mkext.h \
+       prelink.h
+
+PRIVATE_DATAFILES = \
+       ${PRIVATE_KERNELFILES} \
+       tree.h
+
+INSTALL_MI_LIST        = ${DATAFILES}
 
 INSTALL_MI_DIR = libkern
 
 INSTALL_MI_LCL_LIST =        \
-       ${INSTALL_MI_LIST}   \
        ${PRIVATE_DATAFILES} \
-       tree.h               \
        kext_panic_report.h  \
        OSCrossEndian.h
 
-INSTALL_KF_MI_LIST =       \
-       ${DATAFILES}
+INSTALL_KF_MI_LIST = ${KERNELFILES}
 
-INSTALL_KF_MI_LCL_LIST =   \
-       ${DATAFILES}       \
-       ${PRIVATE_DATAFILES}
+INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES}
 
-EXPORT_MI_LIST =              \
-       ${DATAFILES}           \
-       ${PRIVATE_DATAFILES}   \
+EXPORT_MI_LIST = \
+       $(sort ${KERNELFILES} ${PRIVATE_DATAFILES}) \
        kernel_mach_header.h   \
        kxld.h                 \
        kxld_types.h           \
index c802adeff1be6e7fcf98fd7121db3f24bd33830a..656a2cd2aeb980ae09e401c6a8850d68ab5d7b80 100644 (file)
@@ -172,6 +172,52 @@ inline static long OSDecrementAtomicLong(volatile long * address)
 #endif /* XNU_KERNEL_PRIVATE */
 
 #if XNU_KERNEL_PRIVATE
+/*!
+ * @function OSCompareAndSwap8
+ *
+ * @abstract
+ * Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
+ *
+ * @discussion
+ * The OSCompareAndSwap8 function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false.
+ *
+ * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures.
+ *
+ * @param oldValue The value to compare at address.
+ * @param newValue The value to write to address if oldValue compares true.
+ * @param address The byte aligned address of the data to update atomically.
+ * @result true if newValue was written to the address.
+ */
+extern Boolean OSCompareAndSwap8(
+    UInt8            oldValue,
+    UInt8            newValue,
+    volatile UInt8 * address);
+#define OSCompareAndSwap8(a, b, c) \
+       (OSCompareAndSwap8(a, b, __SAFE_CAST_PTR(volatile UInt8*,c)))
+
+/*!
+ * @function OSCompareAndSwap16
+ *
+ * @abstract
+ * Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
+ *
+ * @discussion
+ * The OSCompareAndSwap16 function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false.
+ *
+ * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures.
+ *
+ * @param oldValue The value to compare at address.
+ * @param newValue The value to write to address if oldValue compares true.
+ * @param address The 2-byte aligned address of the data to update atomically.
+ * @result true if newValue was written to the address.
+ */
+extern Boolean OSCompareAndSwap16(
+    UInt16            oldValue,
+    UInt16            newValue,
+    volatile UInt16 * address);
+#define OSCompareAndSwap16(a, b, c) \
+       (OSCompareAndSwap16(a, b, __SAFE_CAST_PTR(volatile UInt16*,c)))
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 /*!
@@ -546,7 +592,7 @@ extern UInt8 OSBitXorAtomic8(
  *
  * @discussion
  * The OSTestAndSet function sets a single bit in a byte at a specified address. It returns true if the bit was already set, false otherwise.
- * @param bit The bit number in the range 0 through 7.
+ * @param bit The bit number in the range 0 through 7. Bit 0 is the most significant.
  * @param startAddress The address of the byte to update atomically.
  * @result true if the bit was already set, false otherwise.
  */
@@ -564,7 +610,7 @@ extern Boolean OSTestAndSet(
  * The OSTestAndClear function clears a single bit in a byte at a specified address. It returns true if the bit was already clear, false otherwise.
  *
  * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures.
- * @param bit The bit number in the range 0 through 7.
+ * @param bit The bit number in the range 0 through 7. Bit 0 is the most significant.
  * @param startAddress The address of the byte to update atomically.
  * @result true if the bit was already clear, false otherwise.
  */
index 8d752baf66b1c39fe31db4a02447a8e3d91b1ab2..8435d0c49382b6dea2e9c4f7596e6440667dce04 100644 (file)
@@ -35,13 +35,14 @@ __BEGIN_DECLS
 #include <stdint.h>
 #include <mach/kmod.h>
 #include <mach/vm_types.h>
+#include <uuid/uuid.h>
 
 #ifdef KERNEL
 #include <libkern/OSTypes.h>
 #include <libkern/OSReturn.h>
 #else
 #include <CoreFoundation/CoreFoundation.h>
-#include <System/libkern/OSReturn.h>
+#include <libkern/OSReturn.h>
 #endif /* KERNEL */
 
 /*!
@@ -878,6 +879,36 @@ OSReturn OSKextCancelRequest(
     void             ** contextOut);
 
 
+/*!
+ * @function OSKextGrabPgoData
+ *
+ * @abstract
+ * Grab a LLVM profile data buffer from a loaded kext.
+ *
+ * @param   uuid             the uuid identifying the kext to retrieve data from
+ * @param   pSize            pointer of where to store the size of the buffer.   May be NULL.
+ * @param   pBuffer          pointer to the output buffer.   May be NULL.
+ * @param   bufferSize       size of the buffer pointed to by pBuffer
+ * @param   wait_for_unload  (boolean) sleep until the kext is unloaded
+ * @param   metadata         (boolean) include metadata footer
+ *
+ * @result
+ * 0 on success
+ * ENOTSUP if the kext does not have profile data to retrieve.
+ * ENOTSUP if no kext with the given UUID is found
+ * ERRORS  if the provided buffer is too small
+ * EIO     internal error, such as if __llvm_profile_write_buffer_internal fails
+ */
+int
+OSKextGrabPgoData(uuid_t uuid,
+                  uint64_t *pSize,
+                  char *pBuffer,
+                  uint64_t bufferSize,
+                  int wait_for_unload,
+                  int metadata);
+
+
+
 #if PRAGMA_MARK
 #pragma mark -
 /********************************************************************/
index 06a7fe8d3d9d2a8dd0835301cbc96dcef3b93331..4ae4b98060aee14ebc2b3cd666ec23bb2a91484a 100644 (file)
@@ -39,7 +39,7 @@ __BEGIN_DECLS
 #include <mach/vm_types.h>
 #else
 #include <CoreFoundation/CoreFoundation.h>
-#include <System/mach/kmod.h>
+#include <mach/kmod.h>
 #endif /* KERNEL */
 __END_DECLS
 
@@ -910,6 +910,14 @@ extern OSKextLoadedKextSummaryHeader * gLoadedKextSummaries;
  */
 void OSKextLoadedKextSummariesUpdated(void);
 
+#ifdef XNU_KERNEL_PRIVATE
+
+extern const vm_allocation_site_t * OSKextGetAllocationSiteForCaller(uintptr_t address);
+extern uint32_t                     OSKextGetKmodIDForSite(vm_allocation_site_t * site);
+extern void                         OSKextFreeSite(vm_allocation_site_t * site);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 __END_DECLS
 
 #endif /* ! _LIBKERN_OSKEXTLIBPRIVATE_H */
index 91cbd81bf1a9803c254191c7ac90b4883af9f2e9..67da96ff4806fa8ffa15f9aeeeea01fbcb889976 100644 (file)
@@ -104,9 +104,9 @@ protected:
     ExpansionData          * reserved;
 
    /* OSCollectionIterator interfaces. */
-    virtual unsigned int iteratorSize() const;
-    virtual bool initIterator(void * iterator) const;
-    virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const;
+    virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE;
+    virtual bool initIterator(void * iterator) const APPLE_KEXT_OVERRIDE;
+    virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -312,7 +312,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -324,7 +324,7 @@ public:
     * @result
     * The current number of objects within the array.
     */
-    virtual unsigned int getCount() const;
+    virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -349,7 +349,7 @@ public:
     * //apple_ref/cpp/instm/OSArray/ensureCapacity/virtualunsignedint/(unsignedint)
     * <code>ensureCapacity</code>.@/link
     */
-    virtual unsigned int getCapacity() const;
+    virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE;
     
     
    /*!
@@ -365,7 +365,7 @@ public:
     * An OSArray allocates storage for objects in multiples
     * of the capacity increment.
     */
-    virtual unsigned int getCapacityIncrement() const;
+    virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -383,7 +383,7 @@ public:
     * of the capacity increment.
     * Calling this function does not immediately reallocate storage.
     */
-    virtual unsigned int setCapacityIncrement(unsigned increment);
+    virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -409,7 +409,7 @@ public:
     *
     * There is no way to reduce the capacity of an OSArray.
     */
-    virtual unsigned int ensureCapacity(unsigned int newCapacity);
+    virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -422,7 +422,7 @@ public:
     * The array's capacity (and therefore direct memory consumption)
     * is not reduced by this function.
     */
-    virtual void flushCollection();
+    virtual void flushCollection() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -575,7 +575,7 @@ public:
     * if that object is derived from OSArray
     * and contains the same or equivalent objects.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -657,7 +657,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -686,7 +686,7 @@ public:
     virtual unsigned setOptions(
         unsigned   options,
         unsigned   mask,
-        void     * context = 0);
+        void     * context = 0) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -711,7 +711,7 @@ public:
     * Objects that are not derived from OSCollection are retained
     * rather than copied.
     */
-    OSCollection * copyCollection(OSDictionary * cycleDict = 0);
+    OSCollection * copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(OSArray, 0);
     OSMetaClassDeclareReservedUnused(OSArray, 1);
index 4eb533ccf7f525f77eebe05acfd20a9da0f2155d..8821a18647f1032c75a87b87f5b3860bd1145107 100644 (file)
@@ -80,7 +80,7 @@ protected:
     */
     virtual void taggedRelease(
         const void * tag,
-        const int    when) const;
+        const int    when) const APPLE_KEXT_OVERRIDE;
 
 public:
     static void initialize();
@@ -113,7 +113,7 @@ public:
     * @discussion
     * This function should never be called.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -124,7 +124,7 @@ public:
     *
     * @param tag  Unused. 
     */
-    virtual void taggedRetain(const void * tag) const;
+    virtual void taggedRetain(const void * tag) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -210,7 +210,7 @@ public:
     * if that object is derived from OSBoolean
     * and represents the same C++ <code>bool</code> value.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -225,7 +225,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(OSBoolean, 0);
     OSMetaClassDeclareReservedUnused(OSBoolean, 1);
index adb7cbf8bdedef4181a122d6fd7dcbbe2794b6b6..91deba1fad3df6ecba7885cf72ea78c68c40fb14 100644 (file)
@@ -212,7 +212,7 @@ protected:
     * This function is used to initialize state
     * within a newly created OSCollection object.
     */
-    virtual bool init();
+    virtual bool init() APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -458,3 +458,5 @@ public:
 };
 
 #endif /* !_OS_OSCOLLECTION_H */
+
+
index 72e8e97929dfe2186085e2e809ce5f5ecea95493..235877add70a7d9e49c39610d7d84ab92f6580c4 100644 (file)
@@ -149,7 +149,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -159,7 +159,7 @@ public:
     * Resets the iterator to the beginning of the collection,
     * as if it had just been created.
     */
-    virtual void reset();
+    virtual void reset() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -173,7 +173,7 @@ public:
     * <code>false</code> otherwise
     * (typically because the iteration context has been modified).
     */
-    virtual bool isValid();
+    virtual bool isValid() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -202,7 +202,7 @@ public:
     * and then to advance the iteration context to the next object (if any)
     * and return that next object, or <code>NULL</code> if there is none.
     */
-    virtual OSObject * getNextObject();
+    virtual OSObject * getNextObject() APPLE_KEXT_OVERRIDE;
 };
 
 #endif /* !_OS_OSCOLLECTIONITERATOR_H */
index 5c499cc8c13de023b704c0883c2f346bee4ab711..b3fcd5732126c3cffa956c07ef9c802d4c5f3883 100644 (file)
@@ -388,7 +388,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -657,7 +657,7 @@ public:
     * if that object is derived from OSData
     * and contains the equivalent bytes of the same length.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -698,7 +698,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
index 9bdba7ac2ff85b122aab985ba4fe404284575975..5bc25627858aa43a1625639599fc1a15bde01bd1 100644 (file)
@@ -131,9 +131,9 @@ protected:
     ExpansionData * reserved;
 
     // Member functions used by the OSCollectionIterator class.
-    virtual unsigned int iteratorSize() const;
-    virtual bool initIterator(void * iterator) const;
-    virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const;
+    virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE;
+    virtual bool initIterator(void * iterator) const APPLE_KEXT_OVERRIDE;
+    virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -438,7 +438,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -452,7 +452,7 @@ public:
     * The current number of key/object pairs
     * contained within the dictionary.
     */
-    virtual unsigned int getCount() const;
+    virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -476,7 +476,7 @@ public:
     * //apple_ref/cpp/instm/OSDictionary/ensureCapacity/virtualunsignedint/(unsignedint)
     * ensureCapacity@/link</code>.
     */
-    virtual unsigned int getCapacity() const;
+    virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -492,7 +492,7 @@ public:
     * An OSDictionary allocates storage for key/object pairs in multiples
     * of the capacity increment.
     */
-    virtual unsigned int getCapacityIncrement() const;
+    virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -510,7 +510,7 @@ public:
     * of the capacity increment.
     * Calling this function does not immediately reallocate storage.
     */
-    virtual unsigned int setCapacityIncrement(unsigned increment);
+    virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -536,7 +536,7 @@ public:
     *
     * There is no way to reduce the capacity of an OSDictionary.
     */
-    virtual unsigned int ensureCapacity(unsigned int newCapacity);
+    virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -549,7 +549,7 @@ public:
     * The dictionary's capacity (and therefore direct memory consumption)
     * is not reduced by this function.
     */
-    virtual void flushCollection();
+    virtual void flushCollection() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -831,7 +831,7 @@ public:
     * if that object is derived from OSDictionary
     * and contains the same or equivalent objects.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -846,7 +846,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -875,7 +875,7 @@ public:
     virtual unsigned setOptions(
         unsigned   options,
         unsigned   mask,
-        void     * context = 0);
+        void     * context = 0) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -901,7 +901,7 @@ public:
     * Objects that are not derived from OSCollection are retained
     * rather than copied.
     */
-    OSCollection * copyCollection(OSDictionary * cycleDict = 0);
+    OSCollection * copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE;
 
 
     OSMetaClassDeclareReservedUnused(OSDictionary, 0);
index 815d8550133baad0769efa42fa7ba0c76db9f6b2..abc3db0e4c595b912f60e8ca81f4e35551ed4d78 100644 (file)
@@ -105,6 +105,44 @@ void kmod_dump_log(vm_offset_t*, unsigned int, boolean_t);
 #if PRAGMA_MARK
 #pragma mark -
 #endif
+
+struct list_head {
+    struct list_head *prev;
+    struct list_head *next;
+};
+
+struct OSKextGrabPgoStruct {
+    bool metadata;
+    uint64_t *pSize;
+    char *pBuffer;
+    uint64_t bufferSize;
+    int err;
+    struct list_head list_head;
+};
+
+#ifndef container_of
+#define container_of(ptr,type,member) ((type*)(((uintptr_t)ptr) - offsetof(type, member)))
+#endif
+/********************************************************************/
+
+#if XNU_KERNEL_PRIVATE
+
+struct OSKextAccount
+{
+    vm_allocation_site_t site;
+    uint32_t            loadTag;
+};
+
+struct OSKextActiveAccount
+{
+    uintptr_t       address;
+    uintptr_t       address_end;
+    OSKextAccount * account;
+};
+typedef struct OSKextActiveAccount OSKextActiveAccount;
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 /*
  * @class OSKext
  */
@@ -122,6 +160,13 @@ class OSKext : public OSObject
     friend class KLDBootstrap;
     friend class OSMetaClass;
 
+    friend int OSKextGrabPgoData(uuid_t uuid,
+                                 uint64_t *pSize,
+                                 char *pBuffer,
+                                 uint64_t bufferSize,
+                                 int wait_for_unload,
+                                 int metadata);
+
 #ifdef XNU_KERNEL_PRIVATE
     friend void OSKextVLog(
         OSKext         * aKext,
@@ -238,6 +283,10 @@ private:
         unsigned int jettisonLinkeditSeg:1;
     } flags;
 
+    struct list_head pendingPgoHead;
+    uuid_t instance_uuid;
+    OSKextAccount * account;
+
 #if PRAGMA_MARK
 /**************************************/
 #pragma mark Private Functions
@@ -298,7 +347,7 @@ private:
         bool           externalDataIsMkext = false);
     virtual bool registerIdentifier(void);
 
-    virtual void free(void);
+    virtual void free(void) APPLE_KEXT_OVERRIDE;
 
     static OSReturn removeKext(
         OSKext * aKext,
@@ -373,6 +422,7 @@ private:
     virtual OSReturn slidePrelinkedExecutable(void);
     virtual OSReturn loadExecutable(void);
     virtual void     jettisonLinkeditSegment(void);
+    virtual void     jettisonDATASegmentPadding(void);
     static  void     considerDestroyingLinkContext(void);
     virtual OSData * getExecutable(void);
     virtual void     setLinkedExecutable(OSData * anExecutable);
@@ -386,7 +436,7 @@ private:
 
     virtual OSReturn start(bool startDependenciesFlag = true);
     virtual OSReturn stop(void);
-    virtual OSReturn setVMProtections(void);
+    virtual OSReturn setVMAttributes(bool protect, bool wire);
     virtual boolean_t segmentShouldBeWired(kernel_segment_command_t *seg);
     virtual OSReturn validateKextMapping(bool startFlag);
     virtual boolean_t verifySegmentMapping(kernel_segment_command_t *seg);
@@ -491,6 +541,7 @@ private:
     */
     static void updateLoadedKextSummaries(void);
     void updateLoadedKextSummary(OSKextLoadedKextSummary *summary);
+    void updateActiveAccount(OSKextActiveAccount *account);
 
     /* C++ Initialization.
      */
@@ -509,6 +560,9 @@ public:
     static OSKext * lookupKextWithIdentifier(OSString * kextIdentifier);
     static OSKext * lookupKextWithLoadTag(OSKextLoadTag aTag);
     static OSKext * lookupKextWithAddress(vm_address_t address);
+    static OSKext * lookupKextWithUUID(uuid_t uuid);
+
+    kernel_section_t *lookupSection(const char *segname, const char*secname);
     
     static bool isKextWithIdentifierLoaded(const char * kextIdentifier);
 
index 90034109bb09755ff9438431c375d01817fcf8be..80bc292da517f1734d45514e18db957b504c053f 100644 (file)
@@ -46,6 +46,33 @@ __BEGIN_DECLS
 
 __END_DECLS
 
+
+#if XNU_KERNEL_PRIVATE
+#include <libkern/OSAtomic.h>
+
+#define kalloc_container(size) \
+       kalloc_tag_bt(size, VM_KERN_MEMORY_LIBKERN)
+
+#if OSALLOCDEBUG
+extern "C" int debug_container_malloc_size;
+extern "C" int debug_ivars_size;
+#if IOTRACKING
+#define OSCONTAINER_ACCUMSIZE(s) do { OSAddAtomic((SInt32)(s), &debug_container_malloc_size); trackingAccumSize(s); } while(0)
+#else
+#define OSCONTAINER_ACCUMSIZE(s) do { OSAddAtomic((SInt32)(s), &debug_container_malloc_size); } while(0)
+#endif
+#define OSMETA_ACCUMSIZE(s)      do { OSAddAtomic((SInt32)(s), &debug_container_malloc_size); } while(0)
+#define OSIVAR_ACCUMSIZE(s)      do { OSAddAtomic((SInt32)(s), &debug_ivars_size);            } while(0)
+
+#else /* OSALLOCDEBUG */
+
+#define OSCONTAINER_ACCUMSIZE(s)
+#define OSMETA_ACCUMSIZE(s)
+#define OSIVAR_ACCUMSIZE(s)
+
+#endif  /* !OSALLOCDEBUG */
+#endif  /* XNU_KERNEL_PRIVATE */
+
 #ifndef NULL
 #if defined (__cplusplus)
 #define NULL 0
index 84c30e6ab92a3d617655ef8f6341f32f6cb35508..2d2267ab1fb970fc90cda0298165bd904c4419b8 100644 (file)
@@ -88,6 +88,20 @@ class OSOrderedSet;
 /*! @parseOnly */
 #define APPLE_KEXT_DEPRECATED  __attribute__((deprecated))
 
+
+#if __cplusplus >= 201103L
+#define APPLE_KEXT_OVERRIDE                            override
+#if defined(__LP64__)
+#define APPLE_KEXT_COMPATIBILITY_OVERRIDE
+#else
+#define APPLE_KEXT_COMPATIBILITY_OVERRIDE      APPLE_KEXT_OVERRIDE
+#endif
+#else
+#define APPLE_KEXT_OVERRIDE
+#define APPLE_KEXT_COMPATIBILITY_OVERRIDE
+#endif
+
+
 /*!
  * @class OSMetaClassBase
  *
@@ -1582,7 +1596,7 @@ public:
             virtual OSObject *alloc() const;                    \
         } gMetaClass;                                           \
         friend class className ::MetaClass;                     \
-        virtual const OSMetaClass * getMetaClass() const;       \
+        virtual const OSMetaClass * getMetaClass() const APPLE_KEXT_OVERRIDE; \
     protected:                                                  \
     className (const OSMetaClass *);                            \
     virtual ~ className ()
@@ -2065,6 +2079,17 @@ void className ::_RESERVED ## className ## index ()             \
     // I/O Kit debug internal routines.
     static void printInstanceCounts();
     static void serializeClassDictionary(OSDictionary * dict);
+#ifdef XNU_KERNEL_PRIVATE
+#if IOTRACKING
+public:
+    static void * trackedNew(size_t size);
+    static void trackedDelete(void * mem, size_t size);
+    void trackedInstance(OSObject * instance) const;
+    void trackedFree(OSObject * instance) const;
+    void trackedAccumSize(OSObject * instance, size_t size) const;
+    struct IOTrackingQueue * getTracking() const;
+#endif
+#endif
 
 private:
     // Obsolete APIs
index 6502a3039fd5efa4d3633881e26fb05b4825f958..c54c3a3c6ce3b6805332fb5726479b368e321a2a 100644 (file)
@@ -215,7 +215,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -397,7 +397,7 @@ public:
     * An OSNumber is considered equal to another object if that object is
     * derived from OSNumber and represents the same C integer value.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -412,7 +412,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
 
     OSMetaClassDeclareReservedUnused(OSNumber, 0);
index a24f30e98497318a34bf7ff0ca044aff10a1b79e..01a480f1988ce7c2f91753839a4976c82766c5fb 100644 (file)
@@ -203,7 +203,7 @@ protected:
     * can be used to break certain retain cycles in object graphs.
     * In general, however, it should be avoided.
     */
-    virtual void release(int freeWhen) const;
+    virtual void release(int freeWhen) const APPLE_KEXT_OVERRIDE;
 
    /*!
     * @function taggedRelease
@@ -230,7 +230,7 @@ protected:
     * can be used to break certain retain cycles in object graphs.
     * In general, however, it should be avoided.
     */
-    virtual void taggedRelease(const void * tag, const int freeWhen) const;
+    virtual void taggedRelease(const void * tag, const int freeWhen) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -324,7 +324,7 @@ public:
     * @result
     * The reference count of the object.
     */
-    virtual int getRetainCount() const;
+    virtual int getRetainCount() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -339,7 +339,7 @@ public:
     * outside the context in which you received it,
     * you should always retain it immediately.
     */
-    virtual void retain() const;
+    virtual void retain() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -357,7 +357,7 @@ public:
     * //apple_ref/cpp/instm/OSObject/free/virtualvoid/()
     * free@/link</code>.
     */
-    virtual void release() const;
+    virtual void release() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -378,7 +378,7 @@ public:
     * outside the context in which you received it,
     * you should always retain it immediately.
     */
-    virtual void taggedRetain(const void * tag = 0) const;
+    virtual void taggedRetain(const void * tag = 0) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -396,7 +396,7 @@ public:
     * It is for use by OSCollection and subclasses to track
     * inclusion in collections.
     */
-    virtual void taggedRelease(const void * tag = 0) const;
+    virtual void taggedRelease(const void * tag = 0) const APPLE_KEXT_OVERRIDE;
     // xx-review: used to say, "Remove a reference on this object with this tag, if an attempt is made to remove a reference that isn't associated with this tag the kernel will panic immediately", but I don't see that in the implementation
 
 
@@ -422,7 +422,13 @@ public:
     * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link
     * for more information.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
+
+#ifdef XNU_KERNEL_PRIVATE
+#if IOTRACKING
+    void trackingAccumSize(size_t size);
+#endif
+#endif
 
     // Unused Padding
     OSMetaClassDeclareReservedUnused(OSObject,  0);
index 8819f933245ed96dd75a4059fad870015eca15ed..4f94b889a83bc89ccd7337934c01211560c82fd1 100644 (file)
@@ -135,9 +135,9 @@ protected:
 
 protected:
    /* OSCollectionIterator interfaces. */
-    virtual unsigned int iteratorSize() const;
-    virtual bool initIterator(void *iterator) const;
-    virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const;
+    virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE;
+    virtual bool initIterator(void *iterator) const APPLE_KEXT_OVERRIDE;
+    virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -250,7 +250,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -262,7 +262,7 @@ public:
     * @result
     * The current number of objects within the ordered set.
     */
-    virtual unsigned int getCount() const;
+    virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -287,7 +287,7 @@ public:
     * //apple_ref/cpp/instm/OSOrderedSet/ensureCapacity/virtualunsignedint/(unsignedint)
     * ensureCapacity@/link</code>.
     */
-    virtual unsigned int getCapacity() const;
+    virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -303,7 +303,7 @@ public:
     * An OSOrderedSet allocates storage for objects in multiples
     * of the capacity increment.
     */
-    virtual unsigned int getCapacityIncrement() const;
+    virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -321,7 +321,7 @@ public:
     * of the capacity increment.
     * Calling this function does not immediately reallocate storage.
     */
-    virtual unsigned int setCapacityIncrement(unsigned increment);
+    virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -347,7 +347,7 @@ public:
     *
     * There is no way to reduce the capacity of an OSOrderedSet.
     */
-    virtual unsigned int ensureCapacity(unsigned int newCapacity);
+    virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -360,7 +360,7 @@ public:
     * The ordered set's capacity (and therefore direct memory consumption)
     * is not reduced by this function.
     */
-    virtual void flushCollection();
+    virtual void flushCollection() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -706,7 +706,7 @@ public:
     * if the other object is derived from OSOrderedSet
     * and compares equal as an OSOrderedSet.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -734,7 +734,7 @@ public:
     virtual unsigned setOptions(
         unsigned   options,
         unsigned   mask,
-        void     * context = 0);
+        void     * context = 0) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -759,7 +759,7 @@ public:
     * Objects that are not derived from OSCollection are retained
     * rather than copied.
     */
-    OSCollection *copyCollection(OSDictionary * cycleDict = 0);
+    OSCollection *copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(OSOrderedSet, 0);
     OSMetaClassDeclareReservedUnused(OSOrderedSet, 1);
index 4d3d56fb6d77f904e290fe1f0c800d69c20a0511..0ffb861d91de4ac15d3d6a68568edee1a27e80e0 100644 (file)
@@ -305,7 +305,7 @@ public:
     virtual unsigned int getCapacityIncrement() const;
     virtual unsigned int setCapacityIncrement(unsigned increment);
     virtual unsigned int ensureCapacity(unsigned int newCapacity);
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(OSSerialize, 0);
     OSMetaClassDeclareReservedUnused(OSSerialize, 1);
@@ -337,7 +337,7 @@ public:
         OSSerializerCallback callback,
         void * ref = 0);
 
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 };
 
 #endif /* _OS_OSSERIALIZE_H */
index 558f4d1da1501e8e12eb2e7b918a5488ed72e46c..6637fa2a860052d717154c072bb12ab219ab7042 100644 (file)
@@ -94,9 +94,9 @@ protected:
     /*
      * OSCollectionIterator interfaces.
      */
-    virtual unsigned int iteratorSize() const;
-    virtual bool initIterator(void * iterator) const;
-    virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const;
+    virtual unsigned int iteratorSize() const APPLE_KEXT_OVERRIDE;
+    virtual bool initIterator(void * iterator) const APPLE_KEXT_OVERRIDE;
+    virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const APPLE_KEXT_OVERRIDE;
 
     struct ExpansionData { };
     
@@ -390,7 +390,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -402,7 +402,7 @@ public:
     * @result
     * The current number of objects within the set.
     */
-    virtual unsigned int getCount() const;
+    virtual unsigned int getCount() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -427,7 +427,7 @@ public:
     * //apple_ref/cpp/instm/OSSet/ensureCapacity/virtualunsignedint/(unsignedint)
     * ensureCapacity@/link</code>.
     */
-    virtual unsigned int getCapacity() const;
+    virtual unsigned int getCapacity() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -443,7 +443,7 @@ public:
     * An OSSet allocates storage for objects in multiples
     * of the capacity increment.
     */
-    virtual unsigned int getCapacityIncrement() const;
+    virtual unsigned int getCapacityIncrement() const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -461,7 +461,7 @@ public:
     * of the capacity increment.
     * Calling this function does not immediately reallocate storage.
     */
-    virtual unsigned int setCapacityIncrement(unsigned increment);
+    virtual unsigned int setCapacityIncrement(unsigned increment) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -486,7 +486,7 @@ public:
     *
     * There is no way to reduce the capacity of an OSSet.
     */
-    virtual unsigned int ensureCapacity(unsigned int newCapacity);
+    virtual unsigned int ensureCapacity(unsigned int newCapacity) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -499,7 +499,7 @@ public:
     * The set's capacity (and therefore direct memory consumption)
     * is not reduced by this function.
     */
-    virtual void flushCollection();
+    virtual void flushCollection() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -698,7 +698,7 @@ public:
     * An OSSet object is considered equal to another object if the other object
     * is derived from OSSet and compares equal as a set.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -713,7 +713,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -739,7 +739,7 @@ public:
     * Child collections' options are changed only if the receiving set's
     * options actually change.
     */
-    virtual unsigned setOptions(unsigned options, unsigned mask, void * context = 0);
+    virtual unsigned setOptions(unsigned options, unsigned mask, void * context = 0) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -764,7 +764,7 @@ public:
     * Objects that are not derived from OSCollection are retained
     * rather than copied.
     */
-    OSCollection *copyCollection(OSDictionary *cycleDict = 0);
+    OSCollection *copyCollection(OSDictionary *cycleDict = 0) APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(OSSet, 0);
     OSMetaClassDeclareReservedUnused(OSSet, 1);
index 29c8be0841aff76497d67efae9f20e02a1189e91..5ce0e5f6e8b9909eeacab6224ec565fe153aea2e 100644 (file)
@@ -180,7 +180,9 @@ public:
     */
     static OSString * withCStringNoCopy(const char * cString);
 
+#if XNU_KERNEL_PRIVATE
     static OSString * withStringOfLength(const char *cString, size_t length);
+#endif  /* XNU_KERNEL_PRIVATE */
 
    /*!
     * @function initWithString
@@ -264,7 +266,7 @@ public:
     * release@/link</code>
     * instead.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -375,7 +377,7 @@ public:
     * if that object is derived from OSString
     * and contains the equivalent bytes of the same length.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -415,7 +417,7 @@ public:
     * @result
     * <code>true</code> if serialization succeeds, <code>false</code> if not.
     */
-    virtual bool serialize(OSSerialize * serializer) const;
+    virtual bool serialize(OSSerialize * serializer) const APPLE_KEXT_OVERRIDE;
 
     OSMetaClassDeclareReservedUnused(OSString,  0);
     OSMetaClassDeclareReservedUnused(OSString,  1);
index d3ae9e1e1c0faeb8c7b577f31c85fb043279a1a5..5fe2f46f76430ffeae8521fcf5b744e9207911da 100644 (file)
@@ -112,7 +112,7 @@ private:
     * Overrides OSString's implementation to prevent creation
     * of distinct OSSymbols with the same string value.
     */
-    virtual bool initWithString(const OSString * aString);
+    virtual bool initWithString(const OSString * aString) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -130,7 +130,7 @@ private:
     * Overrides OSString's implementation to prevent creation
     * of distinct OSSymbols with the same string value.
     */
-    virtual bool initWithCString(const char * cString);
+    virtual bool initWithCString(const char * cString) APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -148,7 +148,7 @@ private:
     * Overrides OSString's implementation to prevent creation
     * of distinct OSSymbols with the same string value.
     */
-    virtual bool initWithCStringNoCopy(const char *cString);
+    virtual bool initWithCStringNoCopy(const char *cString) APPLE_KEXT_OVERRIDE;
 
 protected:
 
@@ -174,7 +174,7 @@ protected:
     */
     virtual void taggedRelease(
         const void * tag,
-        const int    freeWhen) const;
+        const int    freeWhen) const APPLE_KEXT_OVERRIDE;
 
 
 // xx-review: should we just omit this from headerdoc?
@@ -193,7 +193,7 @@ protected:
     * must synchronize access to the class-internal tables
     * used to track those instances.
     */
-    virtual void free();
+    virtual void free() APPLE_KEXT_OVERRIDE;
 
 public:
 
@@ -227,7 +227,7 @@ public:
     * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*,constint)
     * OSObject::taggedRelease(const void *, const int)@/link</code>.
     */
-    virtual void taggedRelease(const void * tag) const;
+    virtual void taggedRelease(const void * tag) const  APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -345,7 +345,7 @@ public:
     * are equivalent to the C string's,
     * <code>false</code> otherwise.
     */
-    virtual bool isEqualTo(const char * cString) const;
+    virtual bool isEqualTo(const char * cString) const APPLE_KEXT_OVERRIDE;
 
 
    /*!
@@ -363,7 +363,7 @@ public:
     * @link //apple_ref/doc/class/OSMetaClassBase OSString@/link
     * and contains the equivalent bytes of the same length.
     */
-    virtual bool isEqualTo(const OSMetaClassBase * anObject) const;
+    virtual bool isEqualTo(const OSMetaClassBase * anObject) const APPLE_KEXT_OVERRIDE;
 
 
 #ifdef XNU_KERNEL_PRIVATE
index af660ec9e346d46dc6b6b6abfc7c626c78af8283..8fe2a54dcc36903c42747eb4249b95bf6fcdc80e 100644 (file)
@@ -40,7 +40,7 @@ extern "C" {
 #define SHA256_DIGEST_LENGTH   CCSHA256_OUTPUT_SIZE
 #define SHA256_DIGEST_STRING_LENGTH    (SHA256_DIGEST_LENGTH * 2 + 1)
 #define SHA384_BLOCK_LENGTH            CCSHA512_BLOCK_SIZE
-#define SHA384_DIGEST_LENGTH   CCSHA512_OUTPUT_SIZE
+#define SHA384_DIGEST_LENGTH   CCSHA384_OUTPUT_SIZE
 #define SHA384_DIGEST_STRING_LENGTH    (SHA384_DIGEST_LENGTH * 2 + 1)
 #define SHA512_BLOCK_LENGTH            CCSHA512_BLOCK_SIZE
 #define SHA512_DIGEST_LENGTH   CCSHA512_OUTPUT_SIZE
index 4fa1e90219834095d7087607e59192ca6cb6e02e..6b3ef392bcbd51575c1a33355efd834a2e2620f0 100644 (file)
@@ -50,6 +50,7 @@
 *   flags               Flags to control the behavior of kxld
 *   cputype             The target arch's CPU type (0 for host arch)
 *   cpusubtype          The target arch's CPU subtype (0 for host subtype)
+*   pagesize            The target page size (0 for host page size)
 *******************************************************************************/
 kern_return_t kxld_create_context(
     KXLDContext **context, 
@@ -57,7 +58,8 @@ kern_return_t kxld_create_context(
     KXLDLoggingCallback log_callback,
     KXLDFlags flags,
     cpu_type_t cputype,
-    cpu_subtype_t cpusubtype)
+    cpu_subtype_t cpusubtype,
+    vm_size_t pagesize)
     __attribute__((nonnull(1,2),visibility("default")));
 
 /*******************************************************************************
index 3f5a5c2c340865483d6fd796bea08a93bc33fdf7..c859769d17a9e18ee152bd322a27c9281563b7bb 100644 (file)
@@ -621,6 +621,15 @@ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
    destination.
 */
 
+#if XNU_KERNEL_PRIVATE
+
+typedef int    (*z_input_func) (z_streamp strm, Bytef *buf, unsigned size);
+typedef int    (*z_output_func)(z_streamp strm, Bytef *buf, unsigned size);
+
+ZEXTERN int ZEXPORT deflateResetWithIO(z_streamp strm, z_input_func zinput, z_output_func zoutput);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
 /*
      This function is equivalent to deflateEnd followed by deflateInit,
@@ -1091,6 +1100,12 @@ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
    buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.
 */
 
+#if XNU_KERNEL_PRIVATE
+
+ZEXTERN uLong zlib_deflate_memory_size(int wbits, int memlevel);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 #if !KERNEL
 
 typedef voidp gzFile;
diff --git a/libkern/x86_64/OSAtomic.s b/libkern/x86_64/OSAtomic.s
deleted file mode 100644 (file)
index f3a7e61..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#;***************************************************************************
-#;* Boolean OSCompareAndSwap(SInt32 oldValue, SInt32 newValue, SInt32 *ptr) *
-#;***************************************************************************
-
-       .globl _OSCompareAndSwap
-_OSCompareAndSwap: #;oldValue, newValue, ptr
-#if    DEBUG
-       test    $3, %rdx
-       jz      1f
-       ud2
-1:
-#endif 
-       movl             %edi, %eax
-       lock
-       cmpxchgl        %esi, (%rdx)    #; CAS (eax is an implicit operand)
-       sete            %al                     #; did CAS succeed? (TZ=1)
-       movzbq          %al, %rax               #; clear out the high bytes
-       ret
-
-#;*****************************************************************************
-#;* Boolean OSCompareAndSwap64(SInt64 oldValue, SInt64 newValue, SInt64 *ptr) *
-#;*****************************************************************************
-
-       .globl _OSCompareAndSwap64
-       .globl _OSCompareAndSwapPtr
-
-_OSCompareAndSwap64:
-_OSCompareAndSwapPtr: #;oldValue, newValue, ptr
-#if    DEBUG
-       test    $7, %rdx
-       jz      1f
-       ud2
-1:
-#endif
-       movq            %rdi, %rax
-       lock
-       cmpxchgq        %rsi, (%rdx)    #; CAS (rax is an implicit operand)
-       sete            %al                     #; did CAS succeed? (TZ=1)
-       movzbq          %al, %rax               #; clear out the high bytes
-       ret
-
-#;*******************************************************
-#;* SInt64 OSAddAtomic64(SInt64 theAmount, SInt64 *ptr) *
-#;*******************************************************
-
-       .globl  _OSAddAtomicLong
-       .globl  _OSAddAtomic64
-_OSAddAtomic64:
-_OSAddAtomicLong:
-#if    DEBUG
-       test    $7, %rsi
-       jz      1f
-       ud2
-1:
-#endif
-       lock
-       xaddq   %rdi, (%rsi)            #; Atomic exchange and add
-       movq    %rdi, %rax;
-       ret
-
-
-#;*******************************************************
-#; SInt32 OSAddAtomic(SInt32 delta, SInt32 *address) 
-#;*******************************************************
-
-       .globl  _OSAddAtomic
-_OSAddAtomic:
-#if    DEBUG
-       test    $3, %rsi
-       jz      1f
-       ud2
-1:
-#endif
-       lock
-       xaddl   %edi, (%rsi)            #; Atomic exchange and add
-       movl    %edi, %eax;
-       ret
index 069331e7fe2077812e305520296582b430766718..6323a0e18d533a7ce082e0af06a38cf99e78f3bb 100644 (file)
@@ -383,6 +383,20 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
 }
 
 /* ========================================================================= */
+
+ZEXTERN int ZEXPORT deflateResetWithIO(z_streamp strm, z_input_func zinput, z_output_func zoutput)
+{
+    int zerr;
+
+    zerr = deflateReset(strm);
+    if (Z_OK != zerr) return (zerr);
+    strm->state->zinput  = zinput;
+    strm->state->zoutput = zoutput;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+
 int ZEXPORT deflateReset (strm)
     z_streamp strm;
 {
@@ -400,6 +414,8 @@ int ZEXPORT deflateReset (strm)
     s = (deflate_state *)strm->state;
     s->pending = 0;
     s->pending_out = s->pending_buf;
+    s->zinput = &read_buf;
+    s->zoutput = NULL;
 
     if (s->wrap < 0) {
         s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
@@ -563,14 +579,18 @@ local void flush_pending(strm)
 {
     unsigned len = strm->state->pending;
 
-    if (len > strm->avail_out) len = strm->avail_out;
-    if (len == 0) return;
+    if (strm->state->zoutput) {
+        len = (*strm->state->zoutput)(strm, strm->state->pending_out, len);
+    } else {
+       if (len > strm->avail_out) len = strm->avail_out;
+       if (len == 0) return;
+       zmemcpy(strm->next_out, strm->state->pending_out, len);
+       strm->next_out  += len;
+       strm->avail_out  -= len;
+    }
 
-    zmemcpy(strm->next_out, strm->state->pending_out, len);
-    strm->next_out  += len;
     strm->state->pending_out  += len;
     strm->total_out += len;
-    strm->avail_out  -= len;
     strm->state->pending -= len;
     if (strm->state->pending == 0) {
         strm->state->pending_out = strm->state->pending_buf;
@@ -1368,7 +1388,7 @@ local void fill_window(s)
          */
         Assert(more >= 2, "more < 2");
 
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        n = (*s->zinput)(s->strm, s->window + s->strstart + s->lookahead, more);
         s->lookahead += n;
 
         /* Initialize the hash value now that we have some input: */
@@ -1763,3 +1783,12 @@ local block_state deflate_rle(s, flush)
     return flush == Z_FINISH ? finish_done : block_done;
 }
 #endif
+
+#if XNU_KERNEL_PRIVATE
+
+uLong zlib_deflate_memory_size(int wbits, int memlevel)
+{
+    return (31 + sizeof(deflate_state) + (1 << (wbits + 2)) + (1 << (memlevel + 9)));
+}
+
+#endif /* XNU_KERNEL_PRIVATE */
index 6378b20aba0289158904ebadd481469bc021daf6..a2c347a31c37aaafc950c3d80b3af2398ea1c0f1 100644 (file)
@@ -120,6 +120,8 @@ typedef unsigned IPos;
 
 typedef struct internal_state {
     z_streamp strm;      /* pointer back to this zlib stream */
+    z_input_func zinput;
+    z_output_func zoutput;
     int   status;        /* as the name implies */
     Bytef *pending_buf;  /* output still pending */
     ulg   pending_buf_size; /* size of pending_buf */
index 940446104dfc746c4dd49629a2d2a05d18d42582..657ce25e76f047c70b03893c97254c64d5d1f59d 100644 (file)
@@ -61,15 +61,17 @@ $(SOBJS): .SFLAGS
 .SFLAGS: ALWAYS
        $(_v)$(REPLACECONTENTS) $@ $(S_KCC) $(SFLAGS) $(INCFLAGS)
 
+KLD_FILES = $(OBJS)
+
 $(COMPONENT).filelist: $(OBJS)
-       $(_v)for kld_file in ${OBJS}; do      \
-               $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} ; \
-               mv $${kld_file}__ $${kld_file} ; \
+       $(_v)for kld_file in ${KLD_FILES}; do      \
+               $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} || exit 1; \
+               mv $${kld_file}__ $${kld_file} || exit 1; \
        done
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 
 do_all: $(COMPONENT).filelist
@@ -80,3 +82,10 @@ do_build_all:: do_all
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
+
+# the KLD segment is mapped read-only on arm, so if we include llvm profiling
+# here it will segfault the kernel.  (see arm_vm_init.c) We don't currently have
+# a way of retrieving these counters from KLD anyway, so there's no harm in just
+# disabling them.
+CXXFLAGS_GEN:=$(filter-out -fprofile-instr-generate,$(CXXFLAGS_GEN))
+CFLAGS_GEN:=$(filter-out -fprofile-instr-generate,$(CFLAGS_GEN))
diff --git a/libsa/lastkerneldataconst.c b/libsa/lastkerneldataconst.c
new file mode 100644 (file)
index 0000000..9b8db0b
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/vm_param.h>
+
+/*
+ * This file is compiled and linked to be the last .o of the __const section
+ * of the __DATA segment (see MakeInc.kernel, lastkernelconstructor is placed
+ * in the __LAST segment.)
+ *
+ * This blank page allows us to safely map the const section RO while the rest
+ * of __DATA is RW. This is needed since ld has no way of specifying section size
+ * alignment and no straight forward way to specify section ordering.
+ */
+
+#define PAD_SIZE       PAGE_SIZE
+
+static const uint8_t __attribute__((section("__DATA,__const"))) data_const_padding[PAD_SIZE] = {[0 ... PAD_SIZE-1] = 0xFF};
+const vm_offset_t    __attribute__((section("__DATA,__data")))  _lastkerneldataconst         = (vm_offset_t)&data_const_padding[0];
+const vm_size_t      __attribute__((section("__DATA,__data")))  _lastkerneldataconst_padsize = sizeof(data_const_padding);
index ddebd34a291ae409a0b23a22d2f6ef0c6f334a02..a1c2fc8d45029a343cc59030004317522563e20c 100644 (file)
@@ -1,18 +1,14 @@
 #include "<DEVELOPER_DIR>/Makefiles/CoreOS/Xcode/BSD.xcconfig"
 
-#include "<DEVELOPER_DIR>/AppleInternal/XcodeConfig/SimulatorSupport.xcconfig"
-// Set INSTALL_PATH[sdk=macosx*] when SimulatorSupport.xcconfig is unavailable
-INSTALL_PATH[sdk=macosx*] = $(INSTALL_PATH_ACTUAL)
-
 BUILD_VARIANTS = normal
-SUPPORTED_PLATFORMS = macosx iphoneos iphoneosnano
+SUPPORTED_PLATFORMS = macosx iphoneos iphoneosnano tvos appletvos watchos
 ONLY_ACTIVE_ARCH = NO
 DEAD_CODE_STRIPPING = YES
 DEBUG_INFORMATION_FORMAT = dwarf-with-dsym
-INSTALL_PATH_ACTUAL = /usr/lib/system
-PUBLIC_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/include
-PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include
-OS_PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include/os
+INSTALL_PATH = /usr/lib/system
+PUBLIC_HEADERS_FOLDER_PATH = /usr/include
+PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include
+OS_PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/os
 EXECUTABLE_PREFIX = libsystem_
 PRODUCT_NAME = kernel
 ALWAYS_SEARCH_USER_PATHS = NO
@@ -21,6 +17,9 @@ OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-p
 OTHER_CFLAGS[sdk=macosx*] = $(inherited) -DSYSCALL_PRE1050
 OTHER_CFLAGS[sdk=macosx*][arch=x86_64*] = $(inherited) -DNO_SYSCALL_LEGACY
 OTHER_CFLAGS[sdk=iphoneos*] = $(inherited) -DNO_SYSCALL_LEGACY
+OTHER_CFLAGS[sdk=watchos*] = $(inherited) -DNO_SYSCALL_LEGACY
+OTHER_CFLAGS[sdk=tvos*] = $(inherited) -DNO_SYSCALL_LEGACY
+OTHER_CFLAGS[sdk=appletvos*] = $(inherited) -DNO_SYSCALL_LEGACY
 GCC_PREPROCESSOR_DEFINITIONS = CF_OPEN_SOURCE CF_EXCLUDE_CSTD_HEADERS DEBUG _FORTIFY_SOURCE=0
 HEADER_SEARCH_PATHS = $(PROJECT_DIR)/mach $(PROJECT_DIR)/os $(PROJECT_DIR)/wrappers $(PROJECT_DIR)/wrappers/string $(PROJECT_DIR)/wrappers/libproc $(PROJECT_DIR)/wrappers/libproc/spawn $(BUILT_PRODUCTS_DIR)/internal_hdr/include $(BUILT_PRODUCTS_DIR)/mig_hdr/local/include $(BUILT_PRODUCTS_DIR)/mig_hdr/include $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
 WARNING_CFLAGS = -Wmost
@@ -30,6 +29,9 @@ CODE_SIGN_IDENTITY = -
 DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
 DYLIB_LDFLAGS = -umbrella System -all_load -Wl,-alias_list,$(SRCROOT)/Libsyscall.aliases
 DYLIB_LDFLAGS[sdk=iphoneos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
+DYLIB_LDFLAGS[sdk=watchos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
+DYLIB_LDFLAGS[sdk=tvos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
+DYLIB_LDFLAGS[sdk=appletvos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
 OTHER_LDFLAGS = 
 INSTALLHDRS_SCRIPT_PHASE = YES
 INSTALLHDRS_COPY_PHASE = YES
index ccd84ed5420081ff803fb92dfc365ba4492bfa67..8243933765dd745682d6fbba8a80a1e20b96387e 100644 (file)
@@ -45,6 +45,7 @@
 
 /* Begin PBXBuildFile section */
                030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 030B179A135377B400DAD1F0 /* open_dprotected_np.c */; };
+               13B598941A142F6400DB2D5A /* stackshot.c in Sources */ = {isa = PBXBuildFile; fileRef = 13B598931A142F5900DB2D5A /* stackshot.c */; };
                240BAC4C1214770F000A1719 /* memcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B028D511FF4FBB00CA64A9 /* memcpy.c */; };
                2419382B12135FF6003CDE41 /* chmod.c in Sources */ = {isa = PBXBuildFile; fileRef = 2419382A12135FF6003CDE41 /* chmod.c */; };
                242AB66611EBDC1200107336 /* errno.c in Sources */ = {isa = PBXBuildFile; fileRef = 242AB66511EBDC1200107336 /* errno.c */; };
                29A59AE6183B110C00E8B896 /* unlinkat.c in Sources */ = {isa = PBXBuildFile; fileRef = 29A59AE5183B110C00E8B896 /* unlinkat.c */; };
                2BA88DCC1810A3CE00EB63F6 /* coalition.c in Sources */ = {isa = PBXBuildFile; fileRef = 2BA88DCB1810A3CE00EB63F6 /* coalition.c */; };
                374A36E314748F1300AAF39D /* varargs_wrappers.s in Sources */ = {isa = PBXBuildFile; fileRef = 374A36E214748EE400AAF39D /* varargs_wrappers.s */; };
+               435F3CAA1B06B7BA005ED9EF /* work_interval.c in Sources */ = {isa = PBXBuildFile; fileRef = 435F3CA91B06B7BA005ED9EF /* work_interval.c */; };
                467DAFD4157E8AF200CE68F0 /* guarded_open_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */; };
                4BDD5F1D1891AB2F004BF300 /* mach_approximate_time.c in Sources */ = {isa = PBXBuildFile; fileRef = 4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */; };
                4BDD5F1E1891AB2F004BF300 /* mach_approximate_time.s in Sources */ = {isa = PBXBuildFile; fileRef = 4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */; };
                72B1E6ED190723DB00FB3FA2 /* guarded_open_dprotected_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 72B1E6EC190723DB00FB3FA2 /* guarded_open_dprotected_np.c */; };
                74119F46188F3B6A00C6F48F /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
                7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
-               746C7FEA18E48791008639D7 /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
                74F3290B18EB269400B2B70E /* vm_page_size.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; };
                7AE28FDF18AC41B1006A5626 /* csr.c in Sources */ = {isa = PBXBuildFile; fileRef = 7AE28FDE18AC41B1006A5626 /* csr.c */; };
                9002401118FC9A7F00D73BFA /* rename_ext.c in Sources */ = {isa = PBXBuildFile; fileRef = 906AA2D018F74CD1001C681A /* rename_ext.c */; };
                BA4414B518336E3600AAE813 /* mach in Copy Files */ = {isa = PBXBuildFile; fileRef = BA4414A51833697C00AAE813 /* mach */; };
                BA4414B618336E3A00AAE813 /* servers in Copy Files */ = {isa = PBXBuildFile; fileRef = BA4414A6183369A100AAE813 /* servers */; };
                BA4414B818336E6F00AAE813 /* mach in CopyFiles */ = {isa = PBXBuildFile; fileRef = BA4414A7183369C100AAE813 /* mach */; };
+               BABA36CB1A856C4700BBBCF7 /* host.c in Sources */ = {isa = PBXBuildFile; fileRef = BABA36CA1A856C4700BBBCF7 /* host.c */; };
                C639F0E51741C25800A39F47 /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; };
                C6460B7C182025DF00F73CCA /* sfi.c in Sources */ = {isa = PBXBuildFile; fileRef = C6460B7B182025DF00F73CCA /* sfi.c */; };
                C6AB38DB174202C10036DD9F /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; };
                        dstSubfolderSpec = 0;
                        files = (
                                BA4414AD18336A9300AAE813 /* mach in CopyFiles */,
-                               746C7FEA18E48791008639D7 /* vm_page_size.h in CopyFiles */,
                        );
                        runOnlyForDeploymentPostprocessing = 1;
                };
 
 /* Begin PBXFileReference section */
                030B179A135377B400DAD1F0 /* open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = open_dprotected_np.c; sourceTree = "<group>"; };
+               13B598931A142F5900DB2D5A /* stackshot.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = stackshot.c; sourceTree = "<group>"; };
                240D716711933ED300556E97 /* mach_install_mig.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = mach_install_mig.sh; sourceTree = "<group>"; };
                2419382A12135FF6003CDE41 /* chmod.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chmod.c; sourceTree = "<group>"; };
                242AB66511EBDC1200107336 /* errno.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = errno.c; sourceTree = "<group>"; };
                2BA88DCB1810A3CE00EB63F6 /* coalition.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = coalition.c; sourceTree = "<group>"; };
                374A36E214748EE400AAF39D /* varargs_wrappers.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = varargs_wrappers.s; sourceTree = "<group>"; };
                37DDFB7614748713009D3355 /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
+               435F3CA91B06B7BA005ED9EF /* work_interval.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = work_interval.c; sourceTree = "<group>"; };
                467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_np.c; sourceTree = "<group>"; };
                4BDD5F1B1891AB2F004BF300 /* mach_approximate_time.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_approximate_time.c; sourceTree = "<group>"; };
                4BDD5F1C1891AB2F004BF300 /* mach_approximate_time.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_approximate_time.s; sourceTree = "<group>"; };
                BA4414A6183369A100AAE813 /* servers */ = {isa = PBXFileReference; lastKnownFileType = text; name = servers; path = mig_hdr/include/servers; sourceTree = BUILT_PRODUCTS_DIR; };
                BA4414A7183369C100AAE813 /* mach */ = {isa = PBXFileReference; lastKnownFileType = text; name = mach; path = mig_hdr/local/include/mach; sourceTree = BUILT_PRODUCTS_DIR; };
                BA5CDB4018AEBAD500E37982 /* __thread_selfusage.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __thread_selfusage.s; sourceTree = "<group>"; };
+               BABA36CA1A856C4700BBBCF7 /* host.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = host.c; sourceTree = "<group>"; };
                C639F0E41741C09A00A39F47 /* gethostuuid.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gethostuuid.h; sourceTree = "<group>"; };
                C6460B7B182025DF00F73CCA /* sfi.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sfi.c; sourceTree = "<group>"; };
                C6BEE9171806840200D25AAB /* posix_sem_obsolete.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = posix_sem_obsolete.c; sourceTree = "<group>"; };
                                C9D9BCD7114B00600000D8B9 /* fprintf_stderr.c */,
                                C9D9BCD8114B00600000D8B9 /* mach */,
                                C9D9BCE4114B00600000D8B9 /* host_priv.defs */,
+                               BABA36CA1A856C4700BBBCF7 /* host.c */,
                                C9D9BCE5114B00600000D8B9 /* host_security.defs */,
                                C9D9BCEA114B00600000D8B9 /* lock_set.defs */,
                                C9D9BCEB114B00600000D8B9 /* mach_error_string.c */,
                                C962B16B18DBA2C80031244A /* setpriority.c */,
                                C6460B7B182025DF00F73CCA /* sfi.c */,
                                24B223B3121DFF12007DAEDE /* sigsuspend-base.c */,
+                               13B598931A142F5900DB2D5A /* stackshot.c */,
                                248AA962122C7B2A0085F5B1 /* unlink.c */,
                                29A59AE5183B110C00E8B896 /* unlinkat.c */,
                                374A36E214748EE400AAF39D /* varargs_wrappers.s */,
                                BA0D9FB0199031AD007E8A73 /* kdebug_trace.c */,
+                               435F3CA91B06B7BA005ED9EF /* work_interval.c */,
                        );
                        path = wrappers;
                        sourceTree = "<group>";
                                C9D9BD21114B00600000D8B9 /* exc_catcher.c in Sources */,
                                C9D9BD24114B00600000D8B9 /* fprintf_stderr.c in Sources */,
                                72B1E6ED190723DB00FB3FA2 /* guarded_open_dprotected_np.c in Sources */,
+                               BABA36CB1A856C4700BBBCF7 /* host.c in Sources */,
                                C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */,
                                C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */,
                                C9D9BD3B114B00600000D8B9 /* mach_init.c in Sources */,
                                248BA01D121C56BF008C073F /* connect.c in Sources */,
                                248BA01F121C607E008C073F /* fchmod.c in Sources */,
                                E4D45C3616F86BD80002AF25 /* posix_spawn.c in Sources */,
+                               13B598941A142F6400DB2D5A /* stackshot.c in Sources */,
                                C962B16C18DBA2C80031244A /* setpriority.c in Sources */,
                                248BA04F121C8F06008C073F /* fcntl.c in Sources */,
                                248BA05C121C9649008C073F /* fcntl-cancel.c in Sources */,
                                24B223B5121DFF29007DAEDE /* sigsuspend.c in Sources */,
                                248AA963122C7B2A0085F5B1 /* unlink.c in Sources */,
                                248AA965122C7C330085F5B1 /* rmdir.c in Sources */,
+                               435F3CAA1B06B7BA005ED9EF /* work_interval.c in Sources */,
                                248AA967122C7CDA0085F5B1 /* rename.c in Sources */,
                                24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */,
                                C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */,
                        baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */;
                        buildSettings = {
                                COPY_PHASE_STRIP = NO;
-                               INSTALL_PATH_ACTUAL = /usr/local/lib/dyld;
+                               INSTALL_PATH = /usr/local/lib/dyld;
                                STRIP_INSTALLED_PRODUCT = NO;
                        };
                        name = Release;
                                COPY_PHASE_STRIP = YES;
                                DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
                                MAP_PLATFORM = "$(MAP_PLATFORM_$(PLATFORM_NAME))";
+                               MAP_PLATFORM_appletvos = iPhoneOS;
                                MAP_PLATFORM_iphoneos = iPhoneOS;
                                MAP_PLATFORM_iphoneosnano = iPhoneOS;
                                MAP_PLATFORM_macosx = MacOSX;
+                               MAP_PLATFORM_tvos = iPhoneOS;
+                               MAP_PLATFORM_watchos = iPhoneOS;
                                PRODUCT_NAME = Syscalls;
                                STRIP_STYLE = debugging;
                        };
diff --git a/libsyscall/mach/host.c b/libsyscall/mach/host.c
new file mode 100644 (file)
index 0000000..3350384
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+#include <mach/kern_return.h>
+#include <mach/mach_host.h>
+#include <mach/host_priv.h>
+
+kern_return_t
+host_get_atm_diagnostic_flag(host_t host __unused,
+                                                        uint32_t *diagnostic_flag)
+{
+       volatile uint32_t *diagnostic_flag_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG);
+       *diagnostic_flag = *diagnostic_flag_address;
+       return KERN_SUCCESS;
+}
+
index 1d1db7ab3cc942a855af4841dbec8de45b08aa3c..00abb72164ef9ce632a3b044d0b0b32f83e2769b 100644 (file)
@@ -97,18 +97,24 @@ extern void                 slot_name(cpu_type_t,
 extern void                    mig_reply_setup(mach_msg_header_t *,
                                                mach_msg_header_t *);
 
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern void                    mach_msg_destroy(mach_msg_header_t *);
 
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg_receive(mach_msg_header_t *);
 
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg_send(mach_msg_header_t *);
 
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg_server_once(boolean_t (*)
                                                     (mach_msg_header_t *,
                                                      mach_msg_header_t *),
                                                     mach_msg_size_t,
                                                     mach_port_t,
                                                     mach_msg_options_t);
+
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg_server(boolean_t (*)
                                                (mach_msg_header_t *,
                                                 mach_msg_header_t *),
@@ -116,6 +122,7 @@ extern mach_msg_return_t    mach_msg_server(boolean_t (*)
                                                mach_port_t,
                                                mach_msg_options_t);
 
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg_server_importance(boolean_t (*)
                                                (mach_msg_header_t *,
                                                 mach_msg_header_t *),
index 893a06e75151cde586696b634c6e5eb80b346e45..90a42ceb5e41233668d9fca2e3f93ae51810727e 100644 (file)
@@ -115,6 +115,7 @@ _mach_fork_child(void)
        return 0;
 }
 
+
 void
 mach_init_doit(void)
 {
index 676a5392c33dcc8b3db0f1a6df2dc02f7401fe1c..bdb446c33fc04313c556f2e8ea317651c6bf6c9e 100644 (file)
@@ -682,141 +682,7 @@ mach_msg_server_importance(
        mach_port_t rcv_name,
        mach_msg_options_t options)
 {
-       mig_reply_error_t *bufRequest, *bufReply;
-       mach_msg_size_t request_size;
-       mach_msg_size_t new_request_alloc;
-       mach_msg_size_t request_alloc;
-       mach_msg_size_t trailer_alloc;
-       mach_msg_size_t reply_alloc;
-       mach_msg_return_t mr;
-       kern_return_t kr;
-       mach_port_t self = mach_task_self_;
-       int retval = 1;
-       uint64_t token;
-       voucher_mach_msg_state_t old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED;
-
-       options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_VOUCHER|MACH_RCV_OVERWRITE);
-
-       reply_alloc = (mach_msg_size_t)round_page((options & MACH_SEND_TRAILER) ? 
-                            (max_size + MAX_TRAILER_SIZE) : max_size);
-
-       kr = vm_allocate(self,
-                    (vm_address_t *)&bufReply,
-                    reply_alloc,
-                    VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE);
-       if (kr != KERN_SUCCESS) 
-               return kr;
-
-       request_alloc = 0;
-       trailer_alloc = REQUESTED_TRAILER_SIZE(options);
-       new_request_alloc = (mach_msg_size_t)round_page(max_size + trailer_alloc);
-
-       request_size = (options & MACH_RCV_LARGE) ?
-                  new_request_alloc : max_size + trailer_alloc;
-
-       for (;;) {
-               if (request_alloc < new_request_alloc) {
-                       request_alloc = new_request_alloc;
-                       kr = vm_allocate(self,
-                               (vm_address_t *)&bufRequest,
-                               request_alloc,
-                               VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE);
-                       if (kr != KERN_SUCCESS) {
-                               vm_deallocate(self,
-                                               (vm_address_t)bufReply,
-                                               reply_alloc);
-                               return kr;
-                       }
-               }
-               
-               mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|MACH_RCV_VOUCHER|options,
-                               0, request_size, rcv_name,
-                               MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
-       
-               if (mr == MACH_MSG_SUCCESS) {
-                       /* we have another request message */
-
-                       old_state = voucher_mach_msg_adopt(&bufRequest->Head);
-
-                       retval = proc_importance_assertion_begin_with_msg(&bufRequest->Head, NULL, &token);
-
-                       (void) (*demux)(&bufRequest->Head, &bufReply->Head);
-
-                       if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) {
-                               if (bufReply->RetCode == MIG_NO_REPLY)
-                                       bufReply->Head.msgh_remote_port = MACH_PORT_NULL;
-                               else if ((bufReply->RetCode != KERN_SUCCESS) &&
-                                       (bufRequest->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) {
-                                       /* destroy the request - but not the reply port */
-                                       bufRequest->Head.msgh_remote_port = MACH_PORT_NULL;
-                                       mach_msg_destroy(&bufRequest->Head);
-                               }
-                       }
-
-                       /*
-                        * We don't want to block indefinitely because the client
-                        * isn't receiving messages from the reply port.
-                        * If we have a send-once right for the reply port, then
-                        * this isn't a concern because the send won't block.
-                        * If we have a send right, we need to use MACH_SEND_TIMEOUT.
-                        * To avoid falling off the kernel's fast RPC path,
-                        * we only supply MACH_SEND_TIMEOUT when absolutely necessary.
-                        */
-                       if (bufReply->Head.msgh_remote_port != MACH_PORT_NULL) {
-
-                               mr = mach_msg(
-                                       &bufReply->Head,
-                                       (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) ==
-                                        MACH_MSG_TYPE_MOVE_SEND_ONCE) ?
-                                        MACH_SEND_MSG|options :
-                                        MACH_SEND_MSG|MACH_SEND_TIMEOUT|options,
-                                       bufReply->Head.msgh_size, 0, MACH_PORT_NULL,
-                                       MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
-
-                               if ((mr != MACH_SEND_INVALID_DEST) &&
-                                       (mr != MACH_SEND_TIMED_OUT)) {
-                                       if (retval == 0)
-                                               proc_importance_assertion_complete(token);
-
-                                       voucher_mach_msg_revert(old_state);
-                                       old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED;
-
-                                       continue;
-                               }
-                               mr = MACH_MSG_SUCCESS;
-                       }
-                       if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)
-                               mach_msg_destroy(&bufReply->Head);
-                       if (retval == 0)
-                               proc_importance_assertion_complete(token);
-
-                       voucher_mach_msg_revert(old_state);
-                       old_state = VOUCHER_MACH_MSG_STATE_UNCHANGED;
-
-               } /* if (mr == MACH_MSG_SUCCESS) */
-               
-               if ((mr == MACH_RCV_TOO_LARGE) && (options & MACH_RCV_LARGE)) {
-                       new_request_alloc = (mach_msg_size_t)round_page(bufRequest->Head.msgh_size +
-                                                               trailer_alloc);
-                       request_size = new_request_alloc;
-                       vm_deallocate(self,
-                                                 (vm_address_t) bufRequest,
-                                                 request_alloc);
-                       continue;
-               } else if (mr == MACH_MSG_SUCCESS)
-                       continue;
-               else
-                       break;
-
-    } /* for(;;) */
-
-    (void)vm_deallocate(self,
-                       (vm_address_t) bufRequest,
-                       request_alloc);
-    (void)vm_deallocate(self,
-                       (vm_address_t) bufReply,
-                       reply_alloc);
-    return mr;
+       return mach_msg_server(demux, max_size, rcv_name, options);
 }
 
 kern_return_t
diff --git a/libsyscall/mach/watchos_prohibited_mig.txt b/libsyscall/mach/watchos_prohibited_mig.txt
new file mode 100644 (file)
index 0000000..4d27c62
--- /dev/null
@@ -0,0 +1,53 @@
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
+thread_terminate
+act_get_state
+act_set_state
+thread_get_state
+thread_set_state
+thread_suspend
+thread_resume
+thread_abort
+thread_abort_safely
+thread_depress_abort
+thread_get_special_port
+thread_set_special_port
+thread_set_exception_ports
+thread_get_exception_ports
+thread_swap_exception_ports
+thread_get_mach_voucher
+thread_set_mach_voucher
+thread_swap_mach_voucher
+mach_ports_register
+mach_ports_lookup
+task_suspend
+task_resume
+task_set_info
+task_get_special_port
+task_set_special_port
+thread_create
+thread_create_running
+task_set_exception_ports
+task_get_exception_ports
+task_swap_exception_ports
+task_policy_set
+task_policy_get
+task_zone_info
+task_get_state
+task_set_state
+task_set_phys_footprint_limit
+task_suspend2
+task_resume2
+task_get_mach_voucher
+task_set_mach_voucher
+task_swap_mach_voucher
+task_set_port_space
+host_request_notification
+host_info
+task_wire
+mach_port_allocate_name
+host_create_mach_voucher
+host_register_mach_voucher_attr_manager
+host_register_well_known_mach_voucher_attr_manager
+host_set_atm_diagnostic_flag
+host_get_atm_diagnostic_flag
+
index c8808f3ae41309810444d878764623f97e861a82..e421e0af4d238da3c74b80577f2355d2fdb0b8bf 100644 (file)
@@ -40,9 +40,15 @@ fcntl(int fd, int cmd, ...)
        va_start(ap, cmd);
        switch(cmd) {
         case F_GETLK:
+        case F_GETLKPID:
         case F_SETLK:
         case F_SETLKW:
         case F_SETLKWTIMEOUT:
+       case F_OFD_GETLK:
+       case F_OFD_GETLKPID:
+       case F_OFD_SETLK:
+       case F_OFD_SETLKW:
+       case F_OFD_SETLKWTIMEOUT:
         case F_PREALLOCATE:
         case F_SETSIZE:
         case F_RDADVISE:
@@ -57,6 +63,7 @@ fcntl(int fd, int cmd, ...)
         case F_ADDSIGS:
         case F_ADDFILESIGS:
         case F_ADDFILESIGS_FOR_DYLD_SIM:
+        case F_ADDFILESIGS_RETURN:
         case F_FINDSIGS:
         case F_TRANSCODEKEY:
                arg = va_arg(ap, void *);
index 2870bf97f480d6481ee47ed318e83516132dd54e..c5944c507c66445f03b8e191ad144fc760e73eb9 100644 (file)
@@ -29,15 +29,10 @@ int __csrctl(csr_op_t op, void *buffer, size_t size);
 
 int csr_check(csr_config_t mask)
 {
-       return __csrctl(CSR_OP_CHECK, &mask, sizeof(csr_config_t));
+       return __csrctl(CSR_SYSCALL_CHECK, &mask, sizeof(csr_config_t));
 }
 
 int csr_get_active_config(csr_config_t *config)
 {
-       return __csrctl(CSR_OP_GET_ACTIVE_CONFIG, config, sizeof(csr_config_t));
-}
-
-int csr_get_pending_config(csr_config_t *config)
-{
-       return __csrctl(CSR_OP_GET_PENDING_CONFIG, config, sizeof(csr_config_t));
+       return __csrctl(CSR_SYSCALL_GET_ACTIVE_CONFIG, config, sizeof(csr_config_t));
 }
index 4867f9b5184b69c94fd42e574e52126b4d2957bd..02f074cab12a136d19e13c3e247f75f2153f104a 100644 (file)
  */
 
 #include <stdint.h>
+#include <stdlib.h>
 #include <machine/cpu_capabilities.h>
 #include <sys/kdebug.h>
 #include <sys/errno.h>
 
-#define CLASS_MASK      0xff000000
-#define CLASS_OFFSET    24
-#define SUBCLASS_MASK   0x00ff0000
-#define SUBCLASS_OFFSET 16
+extern int __kdebug_trace64(uint32_t code, uint64_t arg1, uint64_t arg2,
+                            uint64_t arg3, uint64_t arg4);
+extern uint64_t __kdebug_trace_string(uint32_t debugid, uint64_t str_id,
+                                      const char *str);
 
-#define EXTRACT_CLASS(debugid)          ((uint8_t)(((debugid) & CLASS_MASK) >> CLASS_OFFSET))
-#define EXTRACT_SUBCLASS(debugid)       ( (uint8_t) ( ((debugid) & SUBCLASS_MASK) >> SUBCLASS_OFFSET ) )
+/* Returns non-zero if tracing is enabled. */
+static int
+kdebug_enabled(void)
+{
+       volatile uint32_t *kdebug_enable_address =
+           (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_KDEBUG_ENABLE);
+
+       if (*kdebug_enable_address == 0) {
+               return 0;
+       }
 
-extern int __kdebug_trace64(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4);
+       return 1;
+}
 
-int
-kdebug_trace(uint32_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4)
+static int
+kdebug_validate_debugid(uint32_t debugid)
 {
-       uint8_t code_class;
-       volatile uint32_t *kdebug_enable_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_KDEBUG_ENABLE);
+       uint8_t debugid_class;
 
        /*
-        * This filtering is also done in the kernel, but we also do it here so that errors
-        * are returned in all cases, not just when the system call is actually performed.
+        * This filtering is also done in the kernel, but we also do it here so
+        * that errors are returned in all cases, not just when the system call
+        * is actually performed.
         */
-       code_class = EXTRACT_CLASS(code);
-       switch (code_class) {
+       debugid_class = KDBG_EXTRACT_CLASS(debugid);
+       switch (debugid_class) {
                case DBG_TRACE:
-                       errno = EPERM;
-                       return -1;
+                       return EPERM;
        }
 
-       if (*kdebug_enable_address == 0) {
+       return 0;
+}
+
+int
+kdebug_trace(uint32_t debugid, uint64_t arg1, uint64_t arg2, uint64_t arg3,
+             uint64_t arg4)
+{
+       int err;
+
+       if (!kdebug_enabled()) {
                return 0;
        }
-       
-       return __kdebug_trace64(code, arg1, arg2, arg3, arg4);
+
+       if ((err = kdebug_validate_debugid(debugid)) != 0) {
+               errno = err;
+               return -1;
+       }
+
+       return __kdebug_trace64(debugid, arg1, arg2, arg3, arg4);
+}
+
+uint64_t
+kdebug_trace_string(uint32_t debugid, uint64_t str_id, const char *str)
+{
+       int err;
+
+       if (!kdebug_enabled()) {
+               return 0;
+       }
+
+       if ((int64_t)str_id == -1) {
+               errno = EINVAL;
+               return (uint64_t)-1;
+       }
+
+       if (str_id == 0 && str == NULL) {
+               errno = EINVAL;
+               return (uint64_t)-1;
+       }
+
+       if ((err = kdebug_validate_debugid(debugid)) != 0) {
+               errno = err;
+               return (uint64_t)-1;
+       }
+
+       return __kdebug_trace_string(debugid, str_id, str);
 }
index 2f47dcaf4a2977f35c64557119f2886c0ad68fa8..730a15e41ad0fb8baea318ab4683af39ea76382d 100644 (file)
 #include <errno.h>
 #include <string.h>
 #include <strings.h>
+#include <stdlib.h>
 #include <sys/errno.h>
 #include <sys/msgbuf.h>
 #include <sys/resource.h>
 #include <sys/process_policy.h>
+#include <sys/event.h>
 #include <mach/message.h>
 
 #include "libproc_internal.h"
@@ -113,6 +115,17 @@ proc_pidoriginatorinfo(int flavor, void *buffer, int buffersize)
        return(retval);
 }
 
+int
+proc_listcoalitions(int flavor, int coaltype, void *buffer, int buffersize)
+{
+       int retval;
+
+       if ((retval = __proc_info(PROC_INFO_CALL_LISTCOALITIONS, flavor, coaltype, 0, buffer, buffersize)) == -1)
+               return 0;
+
+       return retval;
+}
+
 int
 proc_pid_rusage(int pid, int flavor, rusage_info_t *buffer)
 {
@@ -517,6 +530,78 @@ proc_disable_wakemon(pid_t pid)
        return (proc_rlimit_control(pid, RLIMIT_WAKEUPS_MONITOR, &params));
 }
 
+int
+proc_list_uptrs(int pid, uint64_t *buf, uint32_t bufsz)
+{
+       int i, j;
+       int nfds, nkns;
+       int count = 0;
+       int knote_max = 4096; /* arbitrary starting point */
+
+       /* if buffer is empty, this call simply counts the knotes */
+       if (bufsz > 0 && buf == NULL) {
+               errno = EFAULT;
+               return -1;
+       }
+
+       struct proc_fdinfo fdlist[OPEN_MAX];
+       nfds = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, fdlist, OPEN_MAX*sizeof(struct proc_fdinfo));
+       if (nfds <= 0 || nfds > OPEN_MAX) {
+               return -1;
+       }
+
+       struct kevent_extinfo *kqext = malloc(knote_max * sizeof(struct kevent_extinfo));
+       if (!kqext) {
+               errno = ENOMEM;
+               return -1;
+       }
+
+       for (i = 0; i < nfds; i++) {
+               if (fdlist[i].proc_fdtype != PROX_FDTYPE_KQUEUE) {
+                       continue;
+               }
+
+ again:
+               nkns = __proc_info(PROC_INFO_CALL_PIDFDINFO, pid, PROC_PIDFDKQUEUE_EXTINFO,
+                               (uint64_t)fdlist[i].proc_fd, kqext, knote_max * sizeof(struct kevent_extinfo));
+               if (nkns < 0) {
+                       if (errno == EBADF) {
+                               /* the FD table can change after enumerating the FDs */
+                               errno = EAGAIN;
+                       }
+                       free(kqext);
+                       return -1;
+               }
+
+               if (nkns > knote_max) {
+                       /* there are more knotes than we requested - try again with a
+                        * larger buffer */
+                       free(kqext);
+                       knote_max = nkns + 32; /* small margin in case of extra knotes */
+                       kqext = malloc(knote_max * sizeof(struct kevent_extinfo));
+                       if (!kqext) {
+                               errno = ENOMEM;
+                               return -1;
+                       }
+                       goto again;
+               }
+
+               for (j = 0; j < nkns; j++) {
+                       if (kqext[j].kqext_kev.udata == 0) {
+                               continue;
+                       }
+
+                       if (bufsz >= sizeof(uint64_t)) {
+                               *buf++ = kqext[j].kqext_kev.udata;
+                               bufsz -= sizeof(uint64_t);
+                       }
+                       count++;
+               }
+       }
+
+       free(kqext);
+       return count;
+}
 
 
 
index 9e98f1760cc8736eff4cd4c64616e6ccca02c438..27633ffa460405044a59aeebc836ab6e0f219cd9 100644 (file)
@@ -126,6 +126,20 @@ int proc_clear_dirty(pid_t pid, uint32_t flags);
 
 int proc_terminate(pid_t pid, int *sig);
 
+#ifdef PRIVATE
+/*
+ * Enumerate potential userspace pointers embedded in kernel data structures.
+ * Currently inspects kqueues only.
+ *
+ * NOTE: returned "pointers" are opaque user-supplied values and thus not
+ * guaranteed to address valid objects or be pointers at all.
+ *
+ * Returns the number of pointers found (which may exceed buffersize), or -1 on
+ * failure and errno set appropriately.
+ */
+int proc_list_uptrs(pid_t pid, uint64_t *buffer, uint32_t buffersize);
+#endif /* PRIVATE */
+
 __END_DECLS
 
 #endif /*_LIBPROC_H_ */
index a39de570f1732840fe2446d10bff61135ee2c1a9..182cf886f761a5d2bc06322dd4c6759a84155755 100644 (file)
@@ -95,6 +95,8 @@ int proc_trace_log(pid_t pid, uint64_t uniqueid) __OSX_AVAILABLE_STARTING(__MAC_
 /* proc_info call to get the originator information */
 int proc_pidoriginatorinfo(int flavor,  void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0);
 
+int proc_listcoalitions(int flavor, int coaltype, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_8_3);
+
 #if !TARGET_IPHONE_SIMULATOR
 
 #define PROC_SUPPRESS_SUCCESS                (0)
index d3bec4ede7d525535756688db91f9cd0abaaea27..88b6cabf3115709b66480963e5bd4c9941e9a35d 100644 (file)
@@ -38,6 +38,7 @@
 #include <strings.h>
 #include <mach/port.h>
 #include <mach/exception_types.h>
+#include <mach/coalition.h> /* for COALITION_TYPE_MAX */
 
 
 /*
@@ -112,7 +113,6 @@ posix_spawnattr_init(posix_spawnattr_t *attr)
 
                 (*psattrp)->short_padding = 0; 
                 (*psattrp)->flags_padding = 0; 
-                (*psattrp)->int_padding = 0;
 
                /* Default is no new apptype requested */
                (*psattrp)->psa_apptype = POSIX_SPAWN_PROCESS_TYPE_DEFAULT;
@@ -120,7 +120,8 @@ posix_spawnattr_init(posix_spawnattr_t *attr)
                /* Jetsam related */
                (*psattrp)->psa_jetsam_flags = 0;
                (*psattrp)->psa_priority = -1;
-               (*psattrp)->psa_high_water_mark = -1;
+               (*psattrp)->psa_memlimit_active = -1;
+               (*psattrp)->psa_memlimit_inactive = -1;
 
                /* Default is no CPU usage monitor active. */
                (*psattrp)->psa_cpumonitor_percent = 0;
@@ -129,11 +130,26 @@ posix_spawnattr_init(posix_spawnattr_t *attr)
                /* Default is no MAC policy extensions. */
                (*psattrp)->psa_mac_extensions = NULL;
 
-               /* Default is to inherit parent's coalition */
-               (*psattrp)->psa_coalitionid = 0;
+               /* Default is to inherit parent's coalition(s) */
+               (*psattrp)->psa_coalition_info = NULL;
+
+               (*psattrp)->reserved = NULL;
+
+               /*
+                * old coalition field
+                * For backwards compatibility reasons, we set this to 1
+                * which is the first valid coalition id. This will allow
+                * newer user space code to properly spawn processes on
+                * older kernels
+                * (they will just all end up in the same coalition).
+                */
+               (*psattrp)->psa_reserved = 1;
 
                /* Default is no new clamp */
                (*psattrp)->psa_qos_clamp = POSIX_SPAWN_PROC_CLAMP_NONE;
+
+               /* Default is no change to role */
+               (*psattrp)->psa_darwin_role = POSIX_SPAWN_DARWIN_ROLE_NONE;
        }
 
        return (err);
@@ -161,6 +177,8 @@ posix_spawnattr_init(posix_spawnattr_t *attr)
  *             EINVAL  The value specified by attr is invalid.
  */
 static int posix_spawn_destroyportactions_np(posix_spawnattr_t *);
+static int posix_spawn_destroycoalition_info_np(posix_spawnattr_t *);
+
 
 int
 posix_spawnattr_destroy(posix_spawnattr_t *attr)
@@ -172,6 +190,7 @@ posix_spawnattr_destroy(posix_spawnattr_t *attr)
 
        psattr = *(_posix_spawnattr_t *)attr;
        posix_spawn_destroyportactions_np(attr);
+       posix_spawn_destroycoalition_info_np(attr);
 
        free(psattr);
        *attr = NULL;
@@ -736,6 +755,29 @@ posix_spawn_destroyportactions_np(posix_spawnattr_t *attr)
        return 0;
 }
 
+/*
+ * posix_spawn_destroycoalition_info_np
+ * Description: clean up coalition_info struct in posix_spawnattr_t attr
+ */
+static int
+posix_spawn_destroycoalition_info_np(posix_spawnattr_t *attr)
+{
+       _posix_spawnattr_t psattr;
+       struct _posix_spawn_coalition_info *coal_info;
+
+       if (attr == NULL || *attr == NULL)
+               return EINVAL;
+
+       psattr = *(_posix_spawnattr_t *)attr;
+       coal_info = psattr->psa_coalition_info;
+       if (coal_info == NULL)
+               return EINVAL;
+
+       psattr->psa_coalition_info = NULL;
+       free(coal_info);
+       return 0;
+}
+
 /*
  * posix_spawn_appendportaction_np
  * Description: append a port action, grow the array if necessary
@@ -1390,16 +1432,31 @@ posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict attr,
        return 0;
 }
 
-int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict attr, uint64_t coalitionid)
+int posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict attr,
+                                   uint64_t coalitionid, int type, int role)
 {
        _posix_spawnattr_t psattr;
+       struct _posix_spawn_coalition_info *coal_info;
 
        if (attr == NULL || *attr == NULL) {
                return EINVAL;
        }
+       if (type < 0 || type > COALITION_TYPE_MAX)
+               return EINVAL;
 
        psattr = *(_posix_spawnattr_t *)attr;
-       psattr->psa_coalitionid = coalitionid;
+
+       coal_info = psattr->psa_coalition_info;
+       if (!coal_info) {
+               coal_info = (struct _posix_spawn_coalition_info *)malloc(sizeof(*coal_info));
+               if (!coal_info)
+                       return ENOMEM;
+               memset(coal_info, 0, sizeof(*coal_info));
+               psattr->psa_coalition_info = coal_info;
+       }
+
+       coal_info->psci_info[type].psci_id   = coalitionid;
+       coal_info->psci_info[type].psci_role = role;
 
        return 0;
 }
@@ -1437,6 +1494,34 @@ posix_spawnattr_get_qos_clamp_np(const posix_spawnattr_t * __restrict attr, uint
        return (0);
 }
 
+int posix_spawnattr_set_darwin_role_np(const posix_spawnattr_t * __restrict attr, uint64_t darwin_role)
+{
+       _posix_spawnattr_t psattr;
+
+       if (attr == NULL || *attr == NULL) {
+               return EINVAL;
+       }
+
+       psattr = *(_posix_spawnattr_t *)attr;
+       psattr->psa_darwin_role = darwin_role;
+
+       return 0;
+}
+
+int
+posix_spawnattr_get_darwin_role_np(const posix_spawnattr_t * __restrict attr, uint64_t * __restrict darwin_rolep)
+{
+       _posix_spawnattr_t psattr;
+
+       if (attr == NULL || *attr == NULL) {
+               return EINVAL;
+       }
+
+       psattr = *(_posix_spawnattr_t *)attr;
+       *darwin_rolep = psattr->psa_darwin_role;
+
+       return (0);
+}
 
 /*
  * posix_spawn
@@ -1511,6 +1596,10 @@ posix_spawn(pid_t * __restrict pid, const char * __restrict path,
                                ad.mac_extensions_size = PS_MAC_EXTENSIONS_SIZE(
                                                ad.mac_extensions->psmx_count);
                        }
+                       if (psattr->psa_coalition_info != NULL) {
+                               ad.coal_info_size = sizeof(struct _posix_spawn_coalition_info);
+                               ad.coal_info = psattr->psa_coalition_info;
+                       }
                }
                if (file_actions != NULL && *file_actions != NULL) {
                        _posix_spawn_file_actions_t psactsp =
index 53b4ecb86cf38db17038015d135e34751c71c62a..663dd3ca5b06ee7d1662b17bba4263a80514ce07 100644 (file)
@@ -29,9 +29,9 @@
  * [SPN] Support for _POSIX_SPAWN
  */
 
-#include <sys/cdefs.h> 
+#include <sys/cdefs.h>
 #include <_types.h>
-#include <sys/spawn.h> /* shared types */
+#include <sys/spawn.h> /* shared types */
 
 #include <Availability.h>
 
@@ -56,38 +56,72 @@ __BEGIN_DECLS
  * gcc under c99 mode won't compile "[ __restrict]" by itself.  As a workaround,
  * a dummy argument name is added.
  */
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn(pid_t * __restrict, const char * __restrict,
                const posix_spawn_file_actions_t *,
                const posix_spawnattr_t * __restrict,
                char *const __argv[ __restrict],
                char *const __envp[ __restrict]) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnp(pid_t * __restrict, const char * __restrict,
                const posix_spawn_file_actions_t *,
                const posix_spawnattr_t * __restrict,
                char *const __argv[ __restrict],
                char *const __envp[ __restrict]) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *, int,
                int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn_file_actions_addopen(
                posix_spawn_file_actions_t * __restrict, int,
                const char * __restrict, int, mode_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn_file_actions_init(posix_spawn_file_actions_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_destroy(posix_spawnattr_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_getsigdefault(const posix_spawnattr_t * __restrict,
                sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_getflags(const posix_spawnattr_t * __restrict,
                short * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_getpgroup(const posix_spawnattr_t * __restrict,
                pid_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_getsigmask(const posix_spawnattr_t * __restrict,
                sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_init(posix_spawnattr_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setsigdefault(posix_spawnattr_t * __restrict,
                const sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setflags(posix_spawnattr_t *, short) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setpgroup(posix_spawnattr_t *, pid_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setsigmask(posix_spawnattr_t * __restrict,
                const sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
 
@@ -115,17 +149,28 @@ __END_DECLS
 
 __BEGIN_DECLS
 
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_getbinpref_np(const posix_spawnattr_t * __restrict,
                size_t, cpu_type_t *__restrict, size_t *__restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setauditsessionport_np(posix_spawnattr_t *__restrict,
                mach_port_t) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setbinpref_np(posix_spawnattr_t * __restrict,
                size_t, cpu_type_t *__restrict, size_t *__restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setexceptionports_np(posix_spawnattr_t *__restrict,
                exception_mask_t, mach_port_t,
                exception_behavior_t, thread_state_flavor_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawnattr_setspecialport_np(posix_spawnattr_t *__restrict,
                mach_port_t, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+
+__WATCHOS_PROHIBITED  __TVOS_PROHIBITED
 int    posix_spawn_file_actions_addinherit_np(posix_spawn_file_actions_t *,
                int) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3);
 
index 10a1b544c944591f2a9d1b8c0104846003e18df5..f98d2d2bd5152a70ea3485d4e628f080395299a3 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <spawn.h>
 #include <sys/cdefs.h>
+#include <sys/types.h>
 #include <Availability.h>
 #include <TargetConditionals.h>
 
@@ -48,9 +49,12 @@ int  posix_spawnattr_set_importancewatch_port_np(posix_spawnattr_t * __restrict a
 int    posix_spawnattr_getmacpolicyinfo_np(const posix_spawnattr_t * __restrict, const char *, void **, size_t *) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0);
 int    posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict, const char *, void *, size_t) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0);
 
-int    posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0);
+int    posix_spawnattr_setcoalition_np(const posix_spawnattr_t * __restrict, uint64_t, int, int) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0);
 
 int     posix_spawnattr_set_qos_clamp_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0);
 int     posix_spawnattr_get_qos_clamp_np(const posix_spawnattr_t * __restrict, uint64_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0);
 
+int     posix_spawnattr_set_darwin_role_np(const posix_spawnattr_t * __restrict, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0);
+int     posix_spawnattr_get_darwin_role_np(const posix_spawnattr_t * __restrict, uint64_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0);
+
 #endif /* !defined _SPAWN_PRIVATE_H_*/
diff --git a/libsyscall/wrappers/stackshot.c b/libsyscall/wrappers/stackshot.c
new file mode 100644 (file)
index 0000000..c563312
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <sys/stackshot.h>
+#include <mach/mach.h>
+#include <mach/mach_vm.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/*
+ * System call entry point
+ */
+int __stack_snapshot_with_config(int stackshot_config_version, user_addr_t stackshot_config, size_t stackshot_config_size);
+
+/*
+ * stackshot_config_create:    create and initialize the arguments for a stackshot
+ *
+ * Outputs:                    NULL if malloc fails
+ *                             a pointer to a new stackshot_config_t on success
+ */
+stackshot_config_t *
+stackshot_config_create(void)
+{
+       stackshot_config_t *s_config;
+
+       s_config = malloc(sizeof(stackshot_config_t));
+       if (s_config == NULL) {
+               return NULL;
+       }
+
+       s_config->sc_pid = -1;
+       s_config->sc_flags = 0;
+       s_config->sc_since_timestamp = 0;
+       s_config->sc_buffer = 0;
+       s_config->sc_size = 0;
+
+       return s_config;
+}
+
+/*
+ * stackshot_config_set_pid:   set the PID to be traced
+ *
+ * Inputs:                     stackshot_config - a pointer to the stackshot_config_t we want to update
+ *                             pid - process id of process to be traced, or -1 for the entire system
+ *
+ * Outputs:                    EINVAL if the passed stackshot_config pointer is NULL
+ *                             0 on success
+ */
+int
+stackshot_config_set_pid(stackshot_config_t *stackshot_config, int pid)
+{
+       stackshot_config_t *s_config;
+
+       if (stackshot_config == NULL) {
+               return EINVAL;
+       }
+
+       s_config = (stackshot_config_t *) stackshot_config;
+       s_config->sc_pid = pid;
+
+       return 0;
+}
+
+/*
+ * stackshot_config_set_flags: set the flags to be passed for the stackshot
+ *
+ * Inputs:                     stackshot_config - a pointer to the stackshot_config_t we want to update
+ *                             flags - flags to pass to stackshot
+ *
+ * Outputs:                    EINVAL if the passed stackshot_config pointer is NULL
+ *                             0 on success
+ */
+int
+stackshot_config_set_flags(stackshot_config_t *stackshot_config, uint32_t flags)
+{
+       stackshot_config_t *s_config;
+
+       if (stackshot_config == NULL) {
+               return EINVAL;
+       }
+
+       s_config = (stackshot_config_t *) stackshot_config;
+       s_config->sc_flags = flags;
+
+       return 0;
+}
+
+/*
+ * stackshot_capture_with_config:      take a stackshot with the provided config
+ *
+ * Inputs:                             stackshot_config - a pointer to the stackshot_config_t we want to use
+ *
+ * Outputs:                            EINVAL if the passed stackshot_config pointer is NULL, a caller is trying
+ *                                             to reuse a config without deallocating its buffer or if there is a
+ *                                             problem with the arguments
+ *                                     EFAULT if there was a problem with accessing the arguments from the kernel
+ *                                     EPERM if the caller is not privileged
+ *                                     ENOTSUP if the caller is passing a stackshot config version that is not
+ *                                             supported by the kernel (indicates libsyscall:kernel mismatch),
+ *                                             or if the caller is requesting unsupported flags
+ *                                     ENOMEM if the kernel is unable to allocate memory
+ *                                     ENOSPC if the caller doesn't have enough space in their address space for
+ *                                             the kernel to remap the buffer
+ *                                     ENOENT if the caller is requesting an existing buffer that doesn't exist
+ *                                             or the target PID isn't found
+ *                                     0 on success
+ */
+int
+stackshot_capture_with_config(stackshot_config_t *stackshot_config)
+{
+       int ret;
+       stackshot_config_t *s_config;
+
+       if (stackshot_config == NULL) {
+               return EINVAL;
+       }
+
+       s_config = (stackshot_config_t *) stackshot_config;
+       if (s_config->sc_buffer != 0)  {
+               return EINVAL;
+       }
+
+       s_config->sc_out_buffer_addr = &s_config->sc_buffer;
+       s_config->sc_out_size_addr = &s_config->sc_size;
+       ret = __stack_snapshot_with_config(STACKSHOT_CONFIG_TYPE, s_config, sizeof(stackshot_config_t));
+       
+       if (ret != 0) {
+               ret = errno;
+               s_config->sc_buffer = 0;
+               s_config->sc_size = 0;
+       }
+
+       return ret;
+}
+
+/*
+ * stackshot_config_get_stackshot_buffer:      get a pointer to the buffer containing the stackshot
+ *
+ * Inputs:                                     stackshot_config - a pointer to a stackshot_config_t
+ *
+ * Outputs:                                    NULL if the passed stackshot_config is NULL or if its buffer is NULL
+ *                                             a pointer to the buffer containing the stackshot on success
+ */
+void *
+stackshot_config_get_stackshot_buffer(stackshot_config_t *stackshot_config)
+{
+       stackshot_config_t *s_config;
+
+       if (stackshot_config == NULL) {
+               return NULL;
+       }
+       s_config = (stackshot_config_t *) stackshot_config;
+
+       return ((void *)s_config->sc_buffer);
+}
+
+/*
+ * stackshot_config_get_stackshot_size:        get the size of the stackshot buffer
+ *
+ * Inputs:  stackshot_config - a pointer to a stackshot_config_t
+ *
+ * Outputs: -1 if the passed stackshot config is NULL or there is no buffer
+ *             the length of the stackshot buffer on success
+ */
+uint32_t
+stackshot_config_get_stackshot_size(stackshot_config_t * stackshot_config)
+{
+       if (stackshot_config == NULL || (void *)stackshot_config->sc_buffer == NULL) {
+               return -1;
+       }
+
+       return stackshot_config->sc_size;
+}
+
+/*
+ * stackshot_config_set_size_hint: set the size of the stackshot buffer
+ *
+ * Inputs:  stackshot_config - a pointer to a stackshot_config_t
+ *          suggested_size - hint for size allocation of stackshot
+ *
+ * Outputs:  -1  if the passed stackshot config is NULL or there is existing stackshot buffer set.
+ *              the length of the stackshot buffer on success.
+ */
+int
+stackshot_config_set_size_hint(stackshot_config_t *stackshot_config, uint32_t suggested_size)
+{
+       if (stackshot_config == NULL || (void *)stackshot_config->sc_buffer != NULL) {
+               return -1;
+       }
+
+       stackshot_config->sc_size = suggested_size;
+
+       return 0;
+}
+
+/*
+ * stackshot_config_dealloc_buffer:  dealloc the stackshot buffer and reset the size so that a
+ *   stackshot_config_t can be used again
+ *
+ * Inputs:   stackshot_config - a pointer to a stackshot_config_t
+ *
+ * Outputs:  EINVAL if the passed stackshot_config is NULL or if its buffer is NULL
+ *           0 otherwise
+ */
+int
+stackshot_config_dealloc_buffer(stackshot_config_t *stackshot_config)
+{
+       stackshot_config_t *s_config;
+
+       if (stackshot_config == NULL) {
+               return EINVAL;
+       }
+       s_config = (stackshot_config_t *) stackshot_config;
+
+       if (s_config->sc_size && s_config->sc_buffer) {
+               mach_vm_deallocate(mach_task_self(), (mach_vm_offset_t)s_config->sc_buffer, (mach_vm_size_t)s_config->sc_size);
+       }
+
+       s_config->sc_buffer = 0;
+       s_config->sc_size = 0;
+
+       return 0;
+}
+
+/*
+ * stackshot_config_dealloc:   dealloc the stackshot buffer and the stackshot config
+ *
+ * Inputs:                     stackshot_config - a pointer to a stackshot_config_t
+ *
+ * Outputs:                    EINVAL if the passed stackshot_cofnig is NULL
+ *                             0 otherwise
+ */
+int
+stackshot_config_dealloc(stackshot_config_t *stackshot_config)
+{
+       stackshot_config_t *s_config;
+
+       if (stackshot_config == NULL) {
+               return EINVAL;
+       }
+       s_config = (stackshot_config_t *) stackshot_config;
+
+       if (s_config->sc_size && s_config->sc_buffer) {
+               mach_vm_deallocate(mach_task_self(), (mach_vm_offset_t)s_config->sc_buffer, (mach_vm_size_t)s_config->sc_size);
+       }
+
+       s_config->sc_buffer = 0;
+       s_config->sc_size = 0;
+
+       free(s_config);
+       return 0;
+}
diff --git a/libsyscall/wrappers/work_interval.c b/libsyscall/wrappers/work_interval.c
new file mode 100644 (file)
index 0000000..29dd2ad
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/work_interval.h>
+#include <mach/mach_time.h>
+#include <sys/errno.h>
+#include <stdlib.h>
+
+struct work_interval {
+       uint64_t thread_id;
+       uint64_t work_interval_id;
+};
+
+extern uint64_t __thread_selfid(void);
+
+/* Create a new work interval handle (currently for the current thread only). Flags is unused */
+int
+work_interval_create(work_interval_t *interval_handle, uint32_t flags __unused)
+{
+       int ret;
+       uint64_t work_interval_id;
+       work_interval_t handle;
+
+       ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_CREATE, 0, &work_interval_id, sizeof(work_interval_id));
+       if (ret == -1) {
+               return ret;
+       }
+
+       handle = malloc(sizeof(*handle));
+       if (handle == NULL) {
+               errno = ENOMEM;
+               return -1;
+       }
+
+       handle->thread_id = __thread_selfid();
+       handle->work_interval_id = work_interval_id;
+
+       *interval_handle = handle;
+       return 0;
+}
+
+int
+work_interval_notify(work_interval_t interval_handle, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags)
+{
+       int ret;
+       uint64_t work_interval_id;
+       struct work_interval_notification notification = {
+               .start = start,
+               .finish = finish,
+               .deadline = deadline,
+               .next_start = next_start,
+               .flags = flags,
+               .unused1 = 0
+       };
+
+       if (interval_handle == NULL) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       work_interval_id = interval_handle->work_interval_id;
+
+       ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_NOTIFY, work_interval_id, &notification, sizeof(notification));
+       return ret;
+}
+
+int
+work_interval_notify_simple(work_interval_t interval_handle, uint64_t start, uint64_t deadline, uint64_t next_start)
+{
+       return work_interval_notify(interval_handle, start, mach_absolute_time(), deadline, next_start, 0);
+}
+
+int
+work_interval_destroy(work_interval_t interval_handle)
+{
+       int ret, saved_errno;
+       uint64_t work_interval_id;
+
+       if (interval_handle == NULL) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       work_interval_id = interval_handle->work_interval_id;
+
+       ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_DESTROY, work_interval_id, NULL, 0);
+       saved_errno = errno;
+       free(interval_handle);
+       errno = saved_errno;
+
+       return ret;
+}
index eace5b9552d69f58c7a70c4e9d8354eaf6298d31..9c587b5369d545c69a59e017f2ebe55c4a689aee 100755 (executable)
@@ -61,9 +61,9 @@ my $OutDir;
 # size in bytes of known types (only used for i386)
 my %TypeBytes = (
     'au_asid_t'                => 4,
-    'associd_t'                => 4,
+    'sae_associd_t'    => 4,
     'caddr_t'          => 4,
-    'connid_t'         => 4,
+    'sae_connid_t'     => 4,
     'gid_t'            => 4,
     'id_t'             => 4,
     'idtype_t'         => 4,
diff --git a/libsyscall/xcodescripts/filter_mig.awk b/libsyscall/xcodescripts/filter_mig.awk
new file mode 100755 (executable)
index 0000000..90fd398
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/awk -f
+
+# Usage: foo <template> <file>
+# Searches through file for instances of 'kern_return_t $FOO'
+# where $FOO is an line in the template file
+# and prepends the first line in the template file.
+
+# Example template format:
+#       __WATCHOS_PROHIBITED
+#       act_get_state
+#       thread_get_state
+# 
+
+# BEGIN { print ARGV[1]; print ARGV[2] }
+
+# In the first file, build array of lines
+NR==FNR {
+       if (NR==1)
+               prefix=$0
+       else
+               templates[$0];
+       next
+}
+
+# In the second file, match kern_return_t <template>
+# at the beginning of the line
+# print the prefix line if found
+
+/^kern_return_t/ {
+#      print "match"
+       if ($2 in templates) {
+               print prefix
+       }
+}
+
+# Pass through everything in the second file
+{ print }
index 1ca67723da0a94bd098af9eb97b64d7ba77ae0fe..9364707d9de3e7bb1d3eac5b5d2649cf148bbe80 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh -x
+#!/bin/sh -xe
 #
 # Copyright (c) 2010 Apple Inc. All rights reserved.
 #
@@ -34,6 +34,7 @@ MIG=`xcrun -sdk "$SDKROOT" -find mig`
 MIGCC=`xcrun -sdk "$SDKROOT" -find cc`
 export MIGCC
 MIG_DEFINES="-DLIBSYSCALL_INTERFACE"
+MIG_HEADER_OBJ="$OBJROOT/mig_hdr/include/mach"
 MIG_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/mach"
 MIG_PRIVATE_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/local/include/mach"
 SERVER_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/servers"
@@ -54,6 +55,7 @@ fi
 SRC="$SRCROOT/mach"
 MIG_INTERNAL_HEADER_DST="$BUILT_PRODUCTS_DIR/internal_hdr/include/mach"
 MIG_PRIVATE_DEFS_INCFLAGS="-I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders"
+FILTER_MIG="$SRCROOT/xcodescripts/filter_mig.awk"
 
 MIGS="clock.defs
        clock_priv.defs
@@ -75,7 +77,7 @@ MIGS_PRIVATE=""
 
 MIGS_DUAL_PUBLIC_PRIVATE=""
 
-if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator"  || "$PLATFORM_NAME" = "iphoneosnano" || "$PLATFORM_NAME" = "iphonenanosimulator" ]]
+if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator"  || "$PLATFORM_NAME" = "iphoneosnano" || "$PLATFORM_NAME" = "iphonenanosimulator" || "$PLATFORM_NAME" = "tvos" || "$PLATFOM_NAME" = "tvsimulator" || "$PLATFOM_NAME" = "appletvos" || "$PLATFOM_NAME" = "appletvsimulator" || "$PLATFOM_NAME" = "watchos" || "$PLATFOM_NAME" = "watchsimulator" ]]
 then
        MIGS_PRIVATE="mach_vm.defs"
 else
@@ -101,6 +103,8 @@ MACH_HDRS="mach.h
        vm_task.h
        vm_page_size.h"
 
+MIG_FILTERS="watchos_prohibited_mig.txt"
+
 # install /usr/include/server headers 
 mkdir -p $SERVER_HEADER_DST
 for hdr in $SERVER_HDRS; do
@@ -119,10 +123,16 @@ $MIG -novouchers -arch $MACHINE_ARCH -header "$SERVER_HEADER_DST/netname.h" $SRC
 # install /usr/include/mach mig headers
 
 mkdir -p $MIG_HEADER_DST
+mkdir -p $MIG_HEADER_OBJ
 
 for mig in $MIGS $MIGS_DUAL_PUBLIC_PRIVATE; do
        MIG_NAME=`basename $mig .defs`
-       $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig
+       $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_HEADER_OBJ/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig
+       for filter in $MIG_FILTERS; do
+               $FILTER_MIG $SRC/$filter $MIG_HEADER_OBJ/$MIG_NAME.h > $MIG_HEADER_OBJ/$MIG_NAME.tmp.h
+               mv $MIG_HEADER_OBJ/$MIG_NAME.tmp.h $MIG_HEADER_OBJ/$MIG_NAME.h
+       done
+       install -o 0 -c -m 444 $MIG_HEADER_OBJ/$MIG_NAME.h $MIG_HEADER_DST/$MIG_NAME.h
 done
 
 mkdir -p $MIG_PRIVATE_HEADER_DST
index af01dedaee8a852d1b0f1904a0633e55c73645f9..06e6a30e11a06ec72e2ce8c7d5f7299d384caaa5 100644 (file)
@@ -36,7 +36,6 @@ endif
 
 SDKROOT ?= macosx.internal
 HOST_SDKROOT ?= macosx
-HOST_SPARSE_SDKROOT ?= /
 
 # SDKROOT may be passed as a shorthand like "iphoneos.internal". We
 # must resolve these to a full path and override SDKROOT.
@@ -59,6 +58,8 @@ ifeq ($(PLATFORM),)
        export PLATFORM := $(shell echo $(PLATFORMPATH) | sed 's,^.*/\([^/]*\)\.platform$$,\1,')
        ifeq ($(PLATFORM),)
                export PLATFORM := MacOSX
+       else ifeq ($(shell echo $(PLATFORM) | tr A-Z a-z),watchos)
+               export PLATFORM := WatchOS
        endif
 endif
 
@@ -66,12 +67,6 @@ ifeq ($(SDKVERSION),)
      export SDKVERSION := $(shell $(XCRUN) -sdk $(SDKROOT) -show-sdk-version)
 endif
 
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-       ifeq ($(HOST_SPARSE_SDKROOT),/)
-               export HOST_SPARSE_SDKROOT := $(shell $(XCRUN) -sdk iphonehost.internal -show-sdk-path)
-       endif
-endif
-
 # CC/CXX get defined by make(1) by default, so we can't check them
 # against the empty string to see if they haven't been set
 ifeq ($(origin CC),default)
@@ -120,8 +115,15 @@ ifeq ($(NMEDIT),)
        export NMEDIT := $(shell $(XCRUN) -sdk $(SDKROOT) -find nmedit)
 endif
 
+#
+# Platform options
+#
+SUPPORTED_EMBEDDED_PLATFORMS := iPhoneOS iPhoneOSNano tvOS AppleTVOS WatchOS
+SUPPORTED_SIMULATOR_PLATFORMS := iPhoneSimulator iPhoneNanoSimulator tvSimulator AppleTVSimulator WatchSimulator
+SUPPORTED_PLATFORMS := MacOSX $(SUPPORTED_SIMULATOR_PLATFORMS) $(SUPPORTED_EMBEDDED_PLATFORMS)
+
 # Platform-specific tools
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
+ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
 ifeq ($(EMBEDDED_DEVICE_MAP),)
        export EMBEDDED_DEVICE_MAP := $(shell $(XCRUN) -sdk $(SDKROOT) -find embedded_device_map)
 endif
@@ -144,6 +146,7 @@ DECOMMENT = $(OBJROOT)/SETUP/decomment/decomment
 NEWVERS = $(SRCROOT)/config/newvers.pl
 INSTALL = $(OBJROOT)/SETUP/installfile/installfile
 REPLACECONTENTS = $(OBJROOT)/SETUP/replacecontents/replacecontents
+JSONCOMPILATIONDB = $(OBJROOT)/SETUP/json_compilation_db/json_compilation_db
 
 # Standard BSD tools
 RM = /bin/rm -f
index 6010d8dbbbe7f60b1e0db4db246f2d54bea244a5..ae513f1930984c0a8bdffeced270425c01664d7a 100644 (file)
@@ -1,6 +1,6 @@
 # -*- mode: makefile;-*-
 #
-# Copyright (C) 1999-2013 Apple Inc. All rights reserved.
+# Copyright (C) 1999-2014 Apple Inc. All rights reserved.
 #
 # MakeInc.def contains global definitions for building,
 # linking, and installing files.
@@ -24,10 +24,6 @@ SUPPORTED_X86_64_MACHINE_CONFIGS = NONE
 SUPPORTED_X86_64H_MACHINE_CONFIGS = NONE
 
 
-#
-# Platform options
-#
-SUPPORTED_PLATFORMS = MacOSX iPhoneOS iPhoneSimulator iPhoneOSNano iPhoneNanoSimulator
 
 #
 # Setup up *_LC variables during recursive invocations
@@ -59,14 +55,22 @@ COMPONENT_IMPORT_LIST = $(filter-out $(COMPONENT),$(COMPONENT_LIST))
 #
 ifeq ($(PLATFORM),MacOSX)
     DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=$(SDKVERSION)
-else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
+else ifeq ($(PLATFORM),WatchOS)
+    DEPLOYMENT_TARGET_FLAGS = -mwatchos-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),tvOS)
+    DEPLOYMENT_TARGET_FLAGS = -mtvos-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),AppleTVOS)
+    DEPLOYMENT_TARGET_FLAGS = -mtvos-version-min=$(SDKVERSION)
+else ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
     DEPLOYMENT_TARGET_FLAGS = -miphoneos-version-min=$(SDKVERSION)
-else ifneq ($(filter iPhoneSimulator iPhoneNanoSimulator,$(PLATFORM)),)
+else ifneq ($(filter $(SUPPORTED_SIMULATOR_PLATFORMS),$(PLATFORM)),)
     DEPLOYMENT_TARGET_FLAGS =
 else
     DEPLOYMENT_TARGET_FLAGS =
 endif
 
+DEPLOYMENT_TARGET_DEFINES = -DPLATFORM_$(PLATFORM)
+
 
 #
 # Standard defines list
@@ -81,6 +85,8 @@ DEFINES = -DAPPLE -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE \
 KCC  = $(CC)
 KC++ = $(CXX)
 
+GENASSYM_KCC = $(CC)
+
 #
 # Compiler warning flags
 #
@@ -145,7 +151,7 @@ BUILD_DSYM := 1
 # probes from the kernel.
 #
 CFLAGS_GEN = $(DEBUG_CFLAGS) -nostdinc \
-       -freorder-blocks -fno-builtin -fno-common \
+       -fno-builtin -fno-common \
        -fsigned-bitfields $(OTHER_CFLAGS)
 
 CFLAGS_RELEASE         = 
@@ -184,6 +190,7 @@ CFLAGS      = $(CFLAGS_GEN) \
                  $($(addsuffix $(CURRENT_KERNEL_CONFIG),CFLAGS_)) \
                  $($(addsuffix $(CURRENT_ARCH_CONFIG), $(addsuffix $(CURRENT_KERNEL_CONFIG),CFLAGS_))) \
                  $(DEPLOYMENT_TARGET_FLAGS) \
+                 $(DEPLOYMENT_TARGET_DEFINES) \
                  $(DEFINES)
 
 #
@@ -228,6 +235,7 @@ SFLAGS      = $(SFLAGS_GEN) \
                  $($(addsuffix $(CURRENT_ARCH_CONFIG),SFLAGS_)) \
                  $($(addsuffix $(CURRENT_KERNEL_CONFIG),SFLAGS_)) \
                  $(DEPLOYMENT_TARGET_FLAGS) \
+                 $(DEPLOYMENT_TARGET_DEFINES) \
                  $(DEFINES)
 
 #
@@ -293,6 +301,10 @@ LDFLAGS_KERNEL_RELEASEX86_64 = \
        -Wl,-sectalign,__HIB,__const,0x1000 \
        -Wl,-sectalign,__HIB,__bss,0x1000 \
        -Wl,-sectalign,__HIB,__common,0x1000 \
+       -Wl,-sectalign,__HIB,__llvm_prf_cnts,0x1000 \
+       -Wl,-sectalign,__HIB,__llvm_prf_names,0x1000 \
+       -Wl,-sectalign,__HIB,__llvm_prf_data,0x1000 \
+       -Wl,-sectalign,__HIB,__textcoal_nt,0x1000 \
        $(LDFLAGS_NOSTRIP_FLAG)
 
 # Define KERNEL_BASE_OFFSET so known at compile time:
@@ -356,6 +368,22 @@ INCFLAGS   = $(INCFLAGS_LOCAL) $(INCFLAGS_GEN) $(INCFLAGS_IMPORT) $(INCFLAGS_EXTE
 MIGFLAGS       = $(DEFINES) $(INCFLAGS) -novouchers $($(addsuffix $(CURRENT_ARCH_CONFIG),CFLAGS_)) $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \
                $(DEPLOYMENT_TARGET_FLAGS)
 
+
+# Support for LLVM Profile Guided Optimization (PGO)
+
+ifeq ($(BUILD_PROFILE),1)
+CFLAGS_GEN += -fprofile-instr-generate -DPROFILE
+CXXFLAGS_GEN += -fprofile-instr-generate -DPROFILE
+endif
+
+ifdef USE_PROFILE
+CFLAGS_GEN += -fprofile-instr-use=$(USE_PROFILE)
+CXXFLAGS_GEN += -fprofile-instr-use=$(USE_PROFILE)
+LDFLAGS_KERNEL_GEN += -fprofile-instr-use=$(USE_PROFILE)
+
+CFLAGS_GEN += -Wno-error=profile-instr-out-of-date
+endif
+
 #
 # Support for LLVM Link Time Optimization (LTO)
 #
@@ -381,21 +409,34 @@ else
 USE_LTO = $(LTO_ENABLED_$(CURRENT_KERNEL_CONFIG))
 endif
 
+SUPPORTS_CTFCONVERT    = 0
 ifeq ($(USE_LTO),1)
 CFLAGS_GEN     += -flto
 CXXFLAGS_GEN   += -flto
 LDFLAGS_KERNEL_GEN     += -Wl,-mllvm,-inline-threshold=125 -Wl,-object_path_lto,$(TARGET)/lto.o # -Wl,-mllvm -Wl,-disable-fp-elim 
 LDFLAGS_NOSTRIP_FLAG = -rdynamic
 CFLAGS_NOLTO_FLAG = -fno-lto
-SUPPORTS_CTFCONVERT    = 0
 NEEDS_CTF_MACHOS       = 1
 else
 LDFLAGS_NOSTRIP_FLAG =
 CFLAGS_NOLTO_FLAG =
+ifneq ($(CTFCONVERT),)
 SUPPORTS_CTFCONVERT    = 1
+endif
 NEEDS_CTF_MACHOS       = 0
 endif
 
+ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1)
+BUILD_DSYM     := 0
+DO_CTFCONVERT  := 0
+DO_CTFMERGE    := 0
+DO_CTFMACHO    := 0
+KCC            = $(JSONCOMPILATIONDB) $(OBJPATH)/compile_commands.json $(PWD) $< $(CC)
+KC++           = $(JSONCOMPILATIONDB) $(OBJPATH)/compile_commands.json $(PWD) $< $(CXX)
+S_KCC          = $(JSONCOMPILATIONDB) $(OBJPATH)/compile_commands.json $(PWD) $< $(CC)
+STRIP          = true
+endif
+
 #
 # Default VPATH
 #
@@ -413,14 +454,7 @@ EXEC_INSTALL_FLAGS = -c -S -m 0755
 #
 # Header file destinations
 #
-ifeq ($(RC_ProjectName),xnu_headers_Sim)
-       include $(MAKEFILEPATH)/../AppleInternal/Makefiles/Makefile.indigo_prefix
-       HEADER_INSTALL_PREFIX = $(INDIGO_PREFIX)
-else
-       HEADER_INSTALL_PREFIX = 
-endif
-
-FRAMEDIR = $(HEADER_INSTALL_PREFIX)/System/Library/Frameworks
+FRAMEDIR = /System/Library/Frameworks
 
 SINCVERS = B
 SINCFRAME = $(FRAMEDIR)/System.framework
@@ -429,7 +463,7 @@ SPINCDIR = $(SINCFRAME)/Versions/$(SINCVERS)/PrivateHeaders
 SRESDIR = $(SINCFRAME)/Versions/$(SINCVERS)/Resources
 
 ifndef INCDIR
-    INCDIR = $(HEADER_INSTALL_PREFIX)/usr/include
+    INCDIR = /usr/include
 endif
 ifndef LCLDIR
     LCLDIR = $(SPINCDIR)
@@ -544,10 +578,10 @@ INSTALL_KERNEL_DIR := $(DEVELOPER_EXTRAS_DIR)
 INSTALL_KERNEL_SYM_DIR := $(DEVELOPER_EXTRAS_DIR)
 INSTALL_KERNEL_SYM_TO_KDK = 1
 INSTALL_XNU_DEBUG_FILES = 1
-else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
+else ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
 INSTALL_KERNEL_SYM_TO_KDK = 1
 USE_BINARY_PLIST = 1
-else ifneq ($(filter iPhoneSimulator iPhoneNanoSimulator,$(PLATFORM)),)
+else ifneq ($(filter $(SUPPORTED_SIMULATOR_PLATFORMS),$(PLATFORM)),)
 USE_BINARY_PLIST = 1
 else ifeq ($(PLATFORM),MacOSX)
 INSTALL_KERNEL_DIR := $(SYSTEM_LIBRARY_KERNELS_DIR)
index 9c06f04eafa39f2a14d2b8ef136a7eb7a8a2ada0..3b6014feee5cc291f86b28727768718580444f15 100644 (file)
@@ -28,6 +28,11 @@ endif
 
 STATIC_KMODS =  $(SRCROOT)/kmods.a
 
+ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1)
+do_build_setup::
+       $(_v)$(CAT) > $(OBJPATH)/compile_commands.json < /dev/null
+endif
+
 #
 # Rules for the highly parallel "build" phase, where each build configuration
 # writes into their own $(TARGET) independent of other build configs
@@ -81,7 +86,7 @@ $(TARGET)/$(KERNEL_FILE_NAME).dSYM: $(TARGET)/$(KERNEL_FILE_NAME).unstripped
        $(_v)$(MV) $@/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME).unstripped $@/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME)
        $(_v)$(TOUCH) $@
 
-$(TARGET)/$(KERNEL_FILE_NAME).unstripped: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST),$(component)/$(CURRENT_KERNEL_CONFIG)/$(component).filelist)) lastkernelconstructor.o $(SRCROOT)/config/version.c $(SRCROOT)/config/MasterVersion .LDFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST))
+$(TARGET)/$(KERNEL_FILE_NAME).unstripped: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST),$(component)/$(CURRENT_KERNEL_CONFIG)/$(component).filelist)) lastkerneldataconst.o lastkernelconstructor.o $(SRCROOT)/config/version.c $(SRCROOT)/config/MasterVersion .LDFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST))
        $(_v)${MAKE} -f $(firstword $(MAKEFILE_LIST)) version.o
        @echo LD $(@F)
        $(_v)$(CAT) $(filter %.filelist,$+) < /dev/null > link.filelist
@@ -100,6 +105,21 @@ $(OBJPATH)/version.c: $(SRCROOT)/config/version.c $(NEWVERS) $(SRCROOT)/config/M
        $(_v)$(CP) $< $@
        $(_v)$(NEWVERS) $(OBJPATH)/version.c > /dev/null;
 
+-include lastkerneldataconst.d
+lastkerneldataconst.o: .CFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST))
+lastkerneldataconst.o: $(SRCROOT)/libsa/lastkerneldataconst.c
+       ${C_RULE_0}
+       ${C_RULE_1A}$<
+       ${C_RULE_2}
+
+
+lastkernelconstructor.o_CFLAGS_RM = -fprofile-instr-generate
+# the LAST segment is mapped read-only on arm, so if we include llvm profiling
+# here it will segfault the kernel.  (see arm_vm_init.c) We don't currently have
+# a way of retrieving these counters from LAST anyway, so there's no harm in just
+# disabling them.
+
+LAST_FILES=lastkernelconstructor.o
 -include lastkernelconstructor.d
 lastkernelconstructor.o: .CFLAGS $(filter %/MakeInc.kernel,$(MAKEFILE_LIST))
 lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c
@@ -108,8 +128,11 @@ lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c
        ${C_RULE_2}
        ${C_RULE_3}
        ${C_RULE_4}
-       $(_v)$(SEG_HACK) -s __DATA -n __LAST -o $@__ $@
-       $(_v)$(MV) $@__ $@
+       $(_v)for last_file in ${LAST_FILES};                            \
+        do                                                             \
+               $(SEG_HACK) -s __DATA -n __LAST -o $${last_file}__ $${last_file} || exit 1; \
+                mv $${last_file}__ $${last_file} || exit 1;            \
+        done
 
 #
 # Install rules. Each build config is classified as "primary" (the first
index a635cf4c79ebcb7196ab9ae2ae74787bc2db574d..13371fd5c07db1a9eaabde4310d1735237e4ba1f 100644 (file)
 # Generic Install rules
 #
 
-ifndef INSTALL_MI_LCL_LIST
-    INSTALL_MI_LCL_LIST = $(INSTALL_MI_LIST)
-endif
-
-ifndef INSTALL_MI_LCL_GEN_LIST
-    INSTALL_MI_LCL_GEN_LIST = $(INSTALL_MI_GEN_LIST)
-endif
-
-ifndef INSTALL_MD_LCL_LIST
-    INSTALL_MD_LCL_LIST = $(INSTALL_MD_LIST)
-endif
-
-ifndef INSTALL_MD_LCL_GEN_LIST
-    INSTALL_MD_LCL_GEN_LIST = $(INSTALL_MD_GEN_LIST)
-endif
-
 ifndef INSTALL_KF_MI_LCL_LIST
     INSTALL_KF_MI_LCL_LIST = $(EXPORT_MI_LIST)
 endif
@@ -121,8 +105,8 @@ $(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MI_GEN_FILES),1,kincmigendi
 # Machine-independent local (private) files
 #
 
-INSTALL_MI_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_LIST))
-INSTALL_MI_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_GEN_LIST))
+INSTALL_MI_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(sort $(INSTALL_MI_LCL_LIST) $(INSTALL_MI_LIST)))
+INSTALL_MI_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(sort $(INSTALL_MI_LCL_GEN_LIST) $(INSTALL_MI_GEN_LIST)))
 
 $(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MI_LCL_FILES),,pincmidir,$(SPINCFRAME_UNIFDEF)))
 $(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MI_LCL_GEN_FILES),1,pincmigendir,$(SPINCFRAME_UNIFDEF)))
@@ -153,8 +137,8 @@ $(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MD_GEN_FILES),1,kincgendir,
 # Machine-dependent local (private) files
 #
 
-INSTALL_MD_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_LIST))
-INSTALL_MD_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_GEN_LIST))
+INSTALL_MD_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(sort $(INSTALL_MD_LCL_LIST) $(INSTALL_MD_LIST)))
+INSTALL_MD_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(sort $(INSTALL_MD_LCL_GEN_LIST) $(INSTALL_MD_GEN_LIST)))
 
 $(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MD_LCL_FILES),,pincdir,$(SPINCFRAME_UNIFDEF)))
 $(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MD_LCL_GEN_FILES),1,pincgendir,$(SPINCFRAME_UNIFDEF)))
@@ -279,6 +263,16 @@ P_CTFRULE_1B=
 P_CTFRULE_2=@true 
 
 
+#
+# This isn't the right place to put this, but we need to := override some settings
+# in Makefiles that include the generic helper fragments (like this file)
+#
+ifeq ($(BUILD_JSON_COMPILATION_DATABASE),1)
+HIB_FILES :=
+LAST_FILES :=
+KLD_FILES :=
+endif
+
 .PHONY: do_build_all
 
 # Do-nothing rule, since not all levels of the recursive hierarchy might implement this
index e698cf2faa7b36399cce5109fd84d494c76a9d50..df78860106ac166dbb7dd0f727965c68c31b078a 100644 (file)
@@ -1,6 +1,6 @@
 # -*- mode: makefile;-*-
 #
-# Copyright (C) 2010-2012 Apple Inc. All rights reserved.
+# Copyright (C) 2010-2014 Apple Inc. All rights reserved.
 #
 # MakeInc.top is the top-level makefile for the xnu
 # build system. All the main XBS targets
@@ -34,9 +34,10 @@ include $(MakeInc_cmd)
 #
 
 # Default to current kernel architecture
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
+
+ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
      override DEFAULT_ARCH_CONFIG := ARM
-else ifneq ($(filter iPhoneSimulator iPhoneNanoSimulator,$(PLATFORM)),)
+else ifneq ($(filter $(SUPPORTED_SIMULATOR_PLATFORMS),$(PLATFORM)),)
      override DEFAULT_ARCH_CONFIG := X86_64
 else
      override DEFAULT_ARCH_CONFIG := X86_64
@@ -46,7 +47,7 @@ endif
 # Accept either explicit ARCH_CONFIGS or XBS-style RC_ARCHS
 ifndef ARCH_CONFIGS
 ifdef RC_ARCHS
-ARCH_CONFIGS   := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | sed -E 's/ARMV[0-9][A-Z]?/ARM/g' | $(TR) " " "\n" | sort -u | $(TR) "\n" " ")
+ARCH_CONFIGS   := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | $(TR) " " "\n" | sort -u | $(TR) "\n" " ")
 else
 ARCH_CONFIGS   := DEFAULT
 endif
@@ -58,7 +59,7 @@ endif
 
 ifeq ($(RC_ProjectName),xnu_debug)
 override DEFAULT_KERNEL_CONFIG := DEBUG
-else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
+else ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
 override DEFAULT_KERNEL_CONFIG := DEVELOPMENT
 else ifeq ($(PLATFORM),MacOSX)
 override DEFAULT_KERNEL_CONFIG := DEVELOPMENT
@@ -542,9 +543,21 @@ TAGS: cscope.files
        @-cat cscope.files | etags -l auto -S - 2> /dev/null
        @rm -f cscope.files 2> /dev/null
 
+#
+# Re-indent source code using xnu clang-format style
+#
+.PHONY: reindent
+
+reindent:
+       $(_v)$(SRCROOT)/tools/reindent.sh
+
+.PHONY: help
+
 help:
        @cat README
 
+.PHONY: print_exports
+
 print_exports:
        $(_v)printenv | sort
 
@@ -555,4 +568,6 @@ $(warning Generate makefile fragment: $(generated_top_level_print_exports))
 endif
 $(eval $(generated_top_level_print_exports))
 
+.PHONY: print_exports_first_build_config
+
 print_exports_first_build_config: print_exports_bootstrap
index 67a457ba4961a208a7c76fe0e2af0b77be4aa95a..27250311c5f0f291dc2dbd45fb2a5670499e7852 100644 (file)
@@ -9,6 +9,7 @@ include $(MakeInc_def)
 INSTINC_SUBDIRS = \
        mach    \
        atm     \
+       corpses \
        bank \
        device \
        default_pager \
@@ -23,21 +24,32 @@ INSTINC_SUBDIRS = \
        vm \
        libsa \
        kdp \
-       pmc \
        kperf \
        prng
+
 INSTINC_SUBDIRS_X86_64 = \
        mach    \
        i386    \
        x86_64
+
 INSTINC_SUBDIRS_X86_64H = \
        mach    \
        i386    \
        x86_64
 
+INSTINC_SUBDIRS_ARM = \
+       mach    \
+       arm     \
+       arm64
+INSTINC_SUBDIRS_ARM64 = \
+       mach    \
+       arm     \
+       arm64
+
 EXPINC_SUBDIRS = \
        mach    \
        atm     \
+       corpses \
        bank \
        device \
        default_pager \
@@ -53,10 +65,10 @@ EXPINC_SUBDIRS = \
        vm \
        libsa \
        console \
-       pmc \
        kperf \
        prng
 
+
 EXPINC_SUBDIRS_X86_64 = \
        mach    \
        i386    \
@@ -65,6 +77,14 @@ EXPINC_SUBDIRS_X86_64H = \
        mach    \
        i386    \
        x86_64
+EXPINC_SUBDIRS_ARM = \
+       mach    \
+       arm     \
+       arm64
+EXPINC_SUBDIRS_ARM64 = \
+       mach    \
+       arm     \
+       arm64
 
 COMP_SUBDIRS =         \
        conf
index 36e129059f0a0c3d56a2f912f459544c17b5c020..efde212fa2e17a190820766dd10e5ffd88bd5549 100644 (file)
@@ -13,22 +13,23 @@ MIG_DEFS = \
        UNDRequest.defs \
        UNDReply.defs
 
-DATAFILES = \
+PRIVATE_DATAFILES = \
        UNDTypes.h \
        ${MIG_TYPES} \
        ${MIG_DEFS}
 
-INSTALL_MI_LIST =
+KERNELFILES = \
+       KUNCUserNotifications.h \
+       ${PRIVATE_DATAFILES}
 
-INSTALL_MI_LCL_LIST    = ${DATAFILES} 
+INSTALL_MI_LIST =
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_MI_GEN_LIST =
 
 INSTALL_MI_DIR = UserNotification
 
-EXPORT_MI_LIST = \
-       KUNCUserNotifications.h \
-       ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_GEN_LIST =
 
index 92e9547bfbab77f865fe85a984ed0535a1bbcd55..6ce1a1e718db9ac25b4814a8ec4128f803283147 100644 (file)
 #include <mach/host_special_ports.h>
 #include <kern/host.h>
 #include <kern/kalloc.h>
+#include <machine/commpage.h>
 
 #define MAX_ATM_VALUES         (2 * 4096)
 #define MAX_TRACE_BUFFER_SIZE  (0x40000000)  /* Restrict to 1GB per task */
-#define MAX_MAILBOX_SIZE       (8 * 4096)
 
 #define ATM_VALUE_TO_HANDLE(x) (CAST_DOWN(atm_voucher_id_t, (x)))
 #define HANDLE_TO_ATM_VALUE(x) (CAST_DOWN(atm_value_t, (x)))
@@ -69,27 +69,26 @@ ipc_voucher_attr_control_t  voucher_attr_control;    /* communication channel fr
 static zone_t atm_value_zone, atm_descriptors_zone, atm_link_objects_zone;
 
 static aid_t get_aid();
-static atm_value_t atm_value_alloc_init();
+static mach_atm_subaid_t get_subaid();
+static atm_value_t atm_value_alloc_init(aid_t);
 static void atm_value_dealloc(atm_value_t atm_value);
 static void atm_hash_table_init();
-static void atm_value_hash_table_insert(atm_value_t new_atm_value);
+static kern_return_t atm_value_hash_table_insert(atm_value_t new_atm_value);
 static void atm_value_hash_table_delete(atm_value_t atm_value);
-static atm_value_t get_atm_value_from_aid(aid_t aid);
+static atm_value_t get_atm_value_from_aid(aid_t aid) __unused;
 static void atm_value_get_ref(atm_value_t atm_value);
-static kern_return_t atm_listener_insert(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, mailbox_offset_t mailbox_offset);
+static kern_return_t atm_listener_insert(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, atm_guard_t guard);
 static void atm_listener_delete_all(atm_value_t atm_value);
-static atm_task_descriptor_t atm_task_descriptor_alloc_init(mach_port_t trace_buffer,uint64_t buffer_size, void *mailbox_addr, uint64_t mailbox_array_size, __assert_only task_t task);
+static atm_task_descriptor_t atm_task_descriptor_alloc_init(mach_port_t trace_buffer,uint64_t buffer_size, __assert_only task_t task);
 static void atm_descriptor_get_reference(atm_task_descriptor_t task_descriptor);
 static void atm_task_descriptor_dealloc(atm_task_descriptor_t task_descriptor);
-static mach_atm_subaid_t atm_get_min_sub_aid(atm_value_t atm_value);
-static void
-atm_get_min_sub_aid_array(aid_t *aid_array, mach_atm_subaid_t *subaid_array, uint32_t count) __unused;
-static kern_return_t atm_value_unregister(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, mailbox_offset_t mailbox_offset);
-static kern_return_t atm_listener_delete(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, mailbox_offset_t mailbox_offset);
-static void atm_link_get_reference(atm_link_object_t link_object);
+static kern_return_t atm_value_unregister(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, atm_guard_t guard);
+static kern_return_t atm_value_register(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, atm_guard_t guard);
+static kern_return_t atm_listener_delete(atm_value_t atm_value, atm_task_descriptor_t task_descriptor, atm_guard_t guard);
+static void atm_link_get_reference(atm_link_object_t link_object) __unused;
 static void atm_link_dealloc(atm_link_object_t link_object);
-kern_return_t atm_invoke_collection(atm_value_t atm_value, uint64_t sub_activity_id, uint32_t flags);
-kern_return_t atm_send_user_notification(aid_t aid, uint64_t subaid, mach_port_t *buffers_array, uint64_t *sizes_array, mach_msg_type_number_t count, uint32_t flags);
+kern_return_t atm_invoke_collection(atm_value_t atm_value, mach_atm_subaid_t subaid, uint32_t flags);
+kern_return_t atm_send_user_notification(aid_t aid, mach_atm_subaid_t sub_aid, mach_port_t *buffers_array, uint64_t *sizes_array, mach_msg_type_number_t count, uint32_t flags);
 
 kern_return_t
 atm_release_value(
@@ -161,6 +160,11 @@ extern vm_map_t kernel_map;
  */
 aid_t global_aid;
 
+/*
+ * Global subaid. Incremented on each get_subaid.
+ */
+mach_atm_subaid_t global_subaid;
+
 /*
  * Lock group attributes for atm sub system.
  */
@@ -168,6 +172,11 @@ lck_grp_t          atm_lock_grp;
 lck_attr_t             atm_lock_attr;
 lck_grp_attr_t         atm_lock_grp_attr;
 
+/*
+ * Global that is set by diagnosticd and readable by userspace
+ * via the commpage.
+ */
+static uint32_t atm_diagnostic_config;
 
 /*
  * Routine: atm_init
@@ -186,6 +195,12 @@ atm_init()
                disable_atm = TRUE;
        }
 
+       if (!PE_parse_boot_argn("atm_diagnostic_config", &atm_diagnostic_config, sizeof(atm_diagnostic_config))) {
+               if (!PE_get_default("kern.atm_diagnostic_config",  &atm_diagnostic_config, sizeof(atm_diagnostic_config))) {
+                       atm_diagnostic_config = 0;
+               }
+       }
+
        /* setup zones for descriptors, values and link objects */
        atm_value_zone       = zinit(sizeof(struct atm_value),
                               MAX_ATM_VALUES * sizeof(struct atm_value),
@@ -208,6 +223,7 @@ atm_init()
        lck_attr_setdefault(&atm_lock_attr);
 
        global_aid = 1;
+       global_subaid = 1;
        atm_hash_table_init();
 
 #if DEVELOPMENT || DEBUG
@@ -296,7 +312,8 @@ atm_get_value(
        mach_voucher_attr_value_handle_t atm_handle;
        atm_task_descriptor_t task_descriptor = ATM_TASK_DESCRIPTOR_NULL;
        task_t task;
-       mailbox_offset_t mailbox_offset;
+       aid_t aid;
+       atm_guard_t guard;
        natural_t i;
        kern_return_t kr = KERN_SUCCESS;
 
@@ -306,7 +323,7 @@ atm_get_value(
        /* never an out voucher */
        *out_value_voucher = IPC_VOUCHER_NULL;
 
-       if (disable_atm)
+       if (disable_atm || (atm_get_diagnostic_config() & ATM_TRACE_DISABLE))
                return KERN_NOT_SUPPORTED;
 
        switch (command) {
@@ -320,32 +337,22 @@ atm_get_value(
                        if (atm_value == VAM_DEFAULT_VALUE)
                                continue;
 
+                       if (recipe_size != sizeof(atm_guard_t)) {
+                               kr = KERN_INVALID_ARGUMENT;
+                               break;
+                       }
+                       memcpy(&guard, recipe, sizeof(atm_guard_t));
+
                        task = current_task();
                        task_descriptor = task->atm_context;
-                       if (task_descriptor != ATM_TASK_DESCRIPTOR_NULL) {
-                               if (recipe_size != sizeof(mailbox_offset_t)) {
-                                       kr = KERN_INVALID_ARGUMENT;
-                                       break;
-                               }
-                               memcpy(&mailbox_offset, recipe, sizeof(mailbox_offset_t));
-                               if (mailbox_offset > task_descriptor->mailbox_array_size) {
-                                       kr = KERN_INVALID_ARGUMENT;
-                                       break;
-                               }
-
-                               kr = atm_listener_insert(atm_value, task_descriptor, mailbox_offset);
-                               if (kr != KERN_SUCCESS) {
-                                       break;
-                               }
-                       } else {
-                               kr = KERN_INVALID_TASK;
+                               
+                       kr = atm_value_register(atm_value, task_descriptor, guard);
+                       if (kr != KERN_SUCCESS) {
                                break;
                        }
 
                        /* Increment sync value. */
-                       lck_mtx_lock(&atm_value->listener_lock);
-                       atm_value->sync++;
-                       lck_mtx_unlock(&atm_value->listener_lock);
+                       atm_sync_reference_internal(atm_value);
 
                        *out_value = atm_handle;
                        return kr;
@@ -356,12 +363,31 @@ atm_get_value(
 
        case MACH_VOUCHER_ATTR_ATM_CREATE:
 
+               /* Handle the old case where aid value is created in kernel */
+               if (recipe_size == 0) {
+                       aid = get_aid();
+               } else if (recipe_size == sizeof(aid_t)) {
+                       memcpy(&aid, recipe, sizeof(aid_t));
+               } else {
+                       kr = KERN_INVALID_ARGUMENT;
+                       break;
+               }
+               
                /* Allocate a new atm value. */
-               atm_value = atm_value_alloc_init();
-               atm_value_hash_table_insert(atm_value);
-
+               atm_value = atm_value_alloc_init(aid);
                if (atm_value == ATM_VALUE_NULL) {
-                       return KERN_RESOURCE_SHORTAGE;
+                       kr = KERN_RESOURCE_SHORTAGE;
+                       break;
+               }
+redrive:       
+               kr = atm_value_hash_table_insert(atm_value);
+               if (kr != KERN_SUCCESS) {
+                       if (recipe_size == 0) {
+                               atm_value->aid = get_aid();
+                               goto redrive;
+                       }
+                       atm_value_dealloc(atm_value);
+                       break;
                }
 
                *out_value = ATM_VALUE_TO_HANDLE(atm_value);
@@ -444,13 +470,14 @@ atm_command(
        assert(manager == &atm_manager);
        atm_value_t atm_value = ATM_VALUE_NULL;
        natural_t i = 0;
-       aid_t *aid_array = NULL;
        mach_atm_subaid_t *subaid_array = NULL;
+       mach_atm_subaid_t next_subaid = 0;
        uint32_t aid_array_count = 0;
        atm_task_descriptor_t task_descriptor = ATM_TASK_DESCRIPTOR_NULL;
        task_t task;
        uint32_t collection_flags = ATM_ACTION_LOGFAIL;
        kern_return_t kr = KERN_SUCCESS;
+       atm_guard_t guard;
        
        switch (command) {
        case ATM_ACTION_COLLECT:
@@ -460,6 +487,9 @@ atm_command(
        case ATM_ACTION_LOGFAIL: {
                mach_atm_subaid_t sub_aid = 0;
 
+               if (disable_atm || (atm_get_diagnostic_config() & ATM_TRACE_DISABLE))
+                       return KERN_NOT_SUPPORTED;
+
                /* find the first non-default atm_value */
                for (i = 0; i < value_count; i++) {
                        atm_value = HANDLE_TO_ATM_VALUE(values[i]);
@@ -473,11 +503,11 @@ atm_command(
                if (atm_value == NULL) {
                        return KERN_FAILURE;
                }
-               if (in_content == NULL || in_content_size < sizeof(mach_atm_subaid_t) ){
-                       return KERN_INVALID_ARGUMENT;
+
+               if (in_content_size >= sizeof(mach_atm_subaid_t)) {
+                       sub_aid = *(mach_atm_subaid_t *)(void *)in_content;
                }
 
-               sub_aid = *(mach_atm_subaid_t *)(void *)in_content;
                *out_content_size = 0;
                kr = atm_invoke_collection(atm_value, sub_aid, collection_flags);
                break;
@@ -491,24 +521,13 @@ atm_command(
                if (aid_array_count > AID_ARRAY_COUNT_MAX)
                        return KERN_FAILURE;
 
-               aid_array = (aid_t *) kalloc(aid_array_count * sizeof(aid_t));
-               if (aid_array == NULL)
-                       return KERN_NO_SPACE;
-
-               subaid_array = (mach_atm_subaid_t *) kalloc(aid_array_count * sizeof(mach_atm_subaid_t));
-               if (subaid_array == NULL) {
-                       kfree(aid_array, aid_array_count * sizeof(aid_t));
-                       return KERN_NO_SPACE;
+               subaid_array = (mach_atm_subaid_t *) (void *) out_content;
+               for (i = 0; i < aid_array_count; i++) {
+                       subaid_array[i] = ATM_SUBAID32_MAX;
                }
 
-               memcpy(aid_array, in_content, aid_array_count * sizeof(aid_t));
-               atm_get_min_sub_aid_array(aid_array, subaid_array, aid_array_count);
-               
-               memcpy(out_content, subaid_array, aid_array_count * sizeof(mach_atm_subaid_t));
                *out_content_size = aid_array_count * sizeof(mach_atm_subaid_t);
 
-               kfree(aid_array, aid_array_count * sizeof(aid_t));
-               kfree(subaid_array, aid_array_count * sizeof(mach_atm_subaid_t));
                kr = KERN_SUCCESS;
 
                break;
@@ -527,19 +546,50 @@ atm_command(
                if (atm_value == NULL) {
                        return KERN_FAILURE;
                }
-               if (in_content == NULL || in_content_size != sizeof(mailbox_offset_t)){
+               if (in_content == NULL || in_content_size != sizeof(atm_guard_t)){
                        return KERN_INVALID_ARGUMENT;
                }
 
-               mailbox_offset_t mailbox_offset;
-               memcpy(&mailbox_offset, in_content, sizeof(mailbox_offset_t));
+               memcpy(&guard, in_content, sizeof(atm_guard_t));
                task = current_task();
                task_descriptor = task->atm_context;
 
-               kr = atm_value_unregister(atm_value, task_descriptor, mailbox_offset);
+               kr = atm_value_unregister(atm_value, task_descriptor, guard);
 
                break;
 
+       case ATM_ACTION_REGISTER:
+               for (i = 0; i < value_count; i++) {
+                       atm_value = HANDLE_TO_ATM_VALUE(values[i]);
+                       if (atm_value != VAM_DEFAULT_VALUE)
+                               break;
+               }
+               /* if we are not able to find any atm values
+                * in stack then this call was made in error
+                */
+               if (atm_value == NULL) {
+                       return KERN_FAILURE;
+               }
+               if (in_content == NULL || in_content_size != sizeof(atm_guard_t)){
+                       return KERN_INVALID_ARGUMENT;
+               }
+
+               memcpy(&guard, in_content, sizeof(atm_guard_t));
+               task = current_task();
+               task_descriptor = task->atm_context;
+
+               kr = atm_value_register(atm_value, task_descriptor, guard);
+
+               break;
+
+       case ATM_ACTION_GETSUBAID:
+               if (out_content == NULL || *out_content_size != sizeof(mach_atm_subaid_t))
+                       return KERN_FAILURE;
+
+               next_subaid = get_subaid();
+               memcpy(out_content, &next_subaid, sizeof(mach_atm_subaid_t));
+               break;
+
        default:
                kr = KERN_INVALID_ARGUMENT;
                break;
@@ -565,12 +615,12 @@ atm_release(
 kern_return_t
 atm_invoke_collection(
        atm_value_t atm_value,
-       subaid_t sub_activity_id,
+       mach_atm_subaid_t sub_aid,
        uint32_t flags)
 {
        aid_t aid = atm_value->aid;
        kern_return_t kr = KERN_SUCCESS;
-       uint32_t array_count = 0, i = 0, requestor_index = 0;
+       uint32_t array_count = 0, i = 0, j = 0, requestor_index = 0;
        uint64_t *sizes_array = NULL;
        atm_link_object_t link_object = NULL;
        mach_port_t *mem_array = NULL;
@@ -632,7 +682,12 @@ atm_invoke_collection(
        }
 
        if (i > 0) {
-               kr = atm_send_user_notification(aid, sub_activity_id, mem_array, sizes_array, i, flags);
+               kr = atm_send_user_notification(aid, sub_aid, mem_array, sizes_array, i, flags);
+       }
+
+       for (j = 0; j < i; j++) {
+               if (mem_array[j] != NULL)
+                       ipc_port_release_send(mem_array[j]);
        }
 
        kfree(mem_array, sizeof(mach_port_t) * array_count);
@@ -650,7 +705,7 @@ atm_invoke_collection(
 kern_return_t
 atm_send_user_notification(
        aid_t aid,
-       subaid_t subaid,
+       mach_atm_subaid_t sub_aid,
        mach_port_t *buffers_array,
        uint64_t *sizes_array,
        mach_msg_type_number_t count,
@@ -658,12 +713,24 @@ atm_send_user_notification(
 {
        mach_port_t user_port;
        int                     error;
+       thread_t th = current_thread();
+       kern_return_t kr;
+
        error = host_get_atm_notification_port(host_priv_self(), &user_port);
        if ((error != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
                return KERN_FAILURE;
        }
 
-       return atm_collect_trace_info(user_port, aid, subaid, flags, buffers_array, count, sizes_array, count);
+       /* Set the honor queue limit option on the thread. */
+       th->options |= TH_OPT_HONOR_QLIMIT;
+       kr = atm_collect_trace_info(user_port, aid, sub_aid, flags, buffers_array, count, sizes_array, count);
+       /* Make sure that honor queue limit option is unset on the thread. */
+       th->options &= (~TH_OPT_HONOR_QLIMIT);
+
+       if (kr == MACH_SEND_TIMED_OUT) {
+               kr = KERN_SUCCESS;
+       }
+       return kr;
 }
 
 /*
@@ -682,9 +749,14 @@ atm_send_proc_inspect_notification(
 {
        mach_port_t user_port = MACH_PORT_NULL;
        mach_port_t memory_port = MACH_PORT_NULL;
+       kern_return_t kr;
        atm_task_descriptor_t task_descriptor = ATM_TASK_DESCRIPTOR_NULL;
        uint64_t buffer_size = 0;
        int                     error;
+       thread_t th = current_thread();
+
+       if (disable_atm || (atm_get_diagnostic_config() & ATM_TRACE_DISABLE))
+               return KERN_NOT_SUPPORTED;
 
        /* look for the requested memory in target task */
        if (!task)
@@ -712,7 +784,18 @@ atm_send_proc_inspect_notification(
                return KERN_FAILURE;
        }
 
-       return atm_inspect_process_buffer(user_port, traced_pid, traced_uniqueid, buffer_size, memory_port);
+       /* Set the honor queue limit option on the thread. */
+       th->options |= TH_OPT_HONOR_QLIMIT;
+       kr =  atm_inspect_process_buffer(user_port, traced_pid, traced_uniqueid, buffer_size, memory_port);
+       /* Make sure that honor queue limit option is unset on the thread. */
+       th->options &= (~TH_OPT_HONOR_QLIMIT);
+
+       if (kr == MACH_SEND_TIMED_OUT) {
+               kr = KERN_SUCCESS;
+       }
+
+       ipc_port_release_send(memory_port);
+       return kr;
 }
 
 /*
@@ -722,7 +805,7 @@ atm_send_proc_inspect_notification(
  *          ATM_VALUE_NULL: On failure.
  */
 static atm_value_t
-atm_value_alloc_init()
+atm_value_alloc_init(aid_t aid)
 {
        atm_value_t new_atm_value = ATM_VALUE_NULL;
 
@@ -730,7 +813,7 @@ atm_value_alloc_init()
        if (new_atm_value == ATM_VALUE_NULL)
                panic("Ran out of ATM values structure.\n\n");
 
-       new_atm_value->aid = get_aid();
+       new_atm_value->aid = aid;
        queue_init(&new_atm_value->listeners);
        new_atm_value->sync = 1;
        new_atm_value->listener_count = 0;
@@ -760,6 +843,20 @@ get_aid()
 }
 
 
+/*
+ * Routine: get_subaid
+ * Purpose: Increment the global subaid counter and return it.
+ * Returns: subaid
+ */
+static mach_atm_subaid_t
+get_subaid()
+{
+       mach_atm_subaid_t next_subaid;
+       next_subaid = (mach_atm_subaid_t)OSIncrementAtomic64((SInt64 *)&global_subaid);
+       return next_subaid;
+}
+
+
 /*
  * Routine: atm_value_dealloc
  * Purpose: Drops the reference on atm value and deallocates.
@@ -769,17 +866,11 @@ get_aid()
 static void
 atm_value_dealloc(atm_value_t atm_value)
 {
-       lck_mtx_lock(&atm_value->listener_lock);
-
-       atm_value->reference_count--;
-       assert(atm_value->reference_count >= 0);
-
-       if (atm_value->reference_count > 0) {
-               lck_mtx_unlock(&atm_value->listener_lock);
+       if (0 < atm_value_release_internal(atm_value)) {
                return;
        }
 
-       lck_mtx_unlock(&atm_value->listener_lock);
+       assert(atm_value->reference_count == 0);
 
        /* Free up the atm value and also remove all the listeners. */
        atm_listener_delete_all(atm_value);
@@ -816,21 +907,37 @@ atm_hash_table_init()
 /*
  * Routine: atm_value_hash_table_insert
  * Purpose: Insert an atm value in the hash table.
- * Returns: None.
+ * Returns: KERN_SUCCESS on success.
+ *          KERN_NAME_EXISTS if atm value already in the hash table.
  */
-static void
+static kern_return_t
 atm_value_hash_table_insert(atm_value_t new_atm_value)
 {
        int hash_index;
        atm_value_hash_t hash_list_head;
        aid_t aid = new_atm_value->aid;
+       atm_value_t next;
 
        hash_index = AID_TO_HASH(aid);
        hash_list_head = &atm_value_hash_table[hash_index];
 
+       /* Lock the atm list and search for the aid. */
        lck_mtx_lock(&hash_list_head->hash_list_lock);
+
+       queue_iterate(&hash_list_head->hash_list, next, atm_value_t, vid_hash_elt) {
+               if (next->aid == aid) {
+                       /*
+                        * aid found. return error.
+                        */
+                       lck_mtx_unlock(&hash_list_head->hash_list_lock);
+                       return (KERN_NAME_EXISTS);
+               }
+       }
+
+       /* Enter the aid in hash and return success. */
        queue_enter(&hash_list_head->hash_list, new_atm_value, atm_value_t, vid_hash_elt);
        lck_mtx_unlock(&hash_list_head->hash_list_lock);
+       return KERN_SUCCESS;
 }
 
 
@@ -899,9 +1006,7 @@ get_atm_value_from_aid(aid_t aid)
 static void
 atm_value_get_ref(atm_value_t atm_value)
 {
-       lck_mtx_lock(&atm_value->listener_lock);
-       atm_value->reference_count++;
-       lck_mtx_unlock(&atm_value->listener_lock);
+       atm_value_reference_internal(atm_value);
 }
 
 
@@ -915,48 +1020,86 @@ static kern_return_t
 atm_listener_insert(
        atm_value_t             atm_value,
        atm_task_descriptor_t   task_descriptor,
-       mailbox_offset_t        mailbox_offset)
+       atm_guard_t             guard)
 {
        atm_link_object_t new_link_object;
-       atm_link_object_t next;
-       void *mailbox = (void *)((char *)task_descriptor->mailbox_kernel_addr + mailbox_offset);
+       atm_link_object_t next, elem;
+       int32_t freed_count = 0, dead_but_not_freed = 0, listener_count;
+       boolean_t element_found = FALSE;
+       queue_head_t free_listeners;
 
        new_link_object = (atm_link_object_t) zalloc(atm_link_objects_zone);
        new_link_object->descriptor = task_descriptor;
        new_link_object->reference_count = 1;
-       new_link_object->flags = 0;
-       new_link_object->mailbox = mailbox;
+       new_link_object->guard = guard;
 
        /* Get a reference on the task descriptor */
        atm_descriptor_get_reference(task_descriptor);
+       queue_init(&free_listeners);
+       listener_count = atm_value->listener_count;
 
-       /* Check if the task mailbox is already on the listener list */
+       /* Check if the task is already on the listener list */
        lck_mtx_lock(&atm_value->listener_lock);
-       queue_iterate(&atm_value->listeners, next, atm_link_object_t, listeners_element) {
-               if (next->descriptor == task_descriptor) {
-                       /* 
-                        * Replace the mailbox with the new one, the old mailbox is anyways on unregister path.
-                        * There is a race when get_min_sub_aid would cache the mailbox, and this function will
-                        * replace it. It would just behave as if the get value call happened after get_min_sub_aid
-                        * was already completed.
-                        */
-                       next->mailbox = mailbox;
-                       lck_mtx_unlock(&atm_value->listener_lock);
+
+       next = (atm_link_object_t)(void *) queue_first(&atm_value->listeners);
+       while (!queue_end(&atm_value->listeners, (queue_entry_t)next)) {
+               elem = next;
+               next = (atm_link_object_t)(void *) queue_next(&next->listeners_element);
+
+               /* Check for dead tasks */
+               if (elem->descriptor->flags == ATM_TASK_DEAD) {
+                       if ((dead_but_not_freed > ATM_LIST_DEAD_MAX) || elem->guard == 0) {
+                               queue_remove(&atm_value->listeners, elem, atm_link_object_t, listeners_element);
+                               queue_enter(&free_listeners, elem, atm_link_object_t, listeners_element);
+                               atm_listener_count_decr_internal(atm_value);
+                               freed_count++;
+                       } else {
+                               dead_but_not_freed++;
+                       }
+                       continue;
+               }
+
+               if (element_found)
+                       continue;
+
+               if (elem->descriptor == task_descriptor) {
+                       /* Increment reference count on Link object. */
+                       atm_link_get_reference(elem);
+
+                       /* Replace the guard with the new one, the old guard is anyways on unregister path. */
+                       elem->guard = guard;
+                       element_found = TRUE;
                        KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_GETVALUE_INFO, (ATM_VALUE_REPLACED))) | DBG_FUNC_NONE,
-                               VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, mailbox_offset, 0, 0);
+                               VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, guard, 0, 0);
 
-                       /* Drop the extra reference on task descriptor taken by this function. */
-                       atm_task_descriptor_dealloc(task_descriptor);
-                       zfree(atm_link_objects_zone, new_link_object);
-                       return KERN_SUCCESS;
                }
        }
-       KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_GETVALUE_INFO, (ATM_VALUE_ADDED))) | DBG_FUNC_NONE,
-                               VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, mailbox_offset, 0, 0);
 
-       queue_enter(&atm_value->listeners, new_link_object, atm_link_object_t, listeners_element);
-       atm_value->listener_count++;
-       lck_mtx_unlock(&atm_value->listener_lock);
+       if (element_found) {
+               lck_mtx_unlock(&atm_value->listener_lock);
+               /* Drop the extra reference on task descriptor taken by this function. */
+               atm_task_descriptor_dealloc(task_descriptor);
+               zfree(atm_link_objects_zone, new_link_object);
+       } else {
+               KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_GETVALUE_INFO, (ATM_VALUE_ADDED))) | DBG_FUNC_NONE,
+                               VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, guard, 0, 0);
+
+               queue_enter(&atm_value->listeners, new_link_object, atm_link_object_t, listeners_element);
+               atm_listener_count_incr_internal(atm_value);
+               lck_mtx_unlock(&atm_value->listener_lock);
+       }
+
+       /* Free the link objects */
+       while(!queue_empty(&free_listeners)) {
+               queue_remove_first(&free_listeners, next, atm_link_object_t, listeners_element);
+
+               /* Deallocate the link object */
+               atm_link_dealloc(next);
+       }
+
+       KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_SUBAID_INFO, (ATM_LINK_LIST_TRIM))) | DBG_FUNC_NONE,
+               listener_count, freed_count, dead_but_not_freed, VM_KERNEL_ADDRPERM(atm_value), 1);
+
        return KERN_SUCCESS;
 }
 
@@ -974,7 +1117,7 @@ atm_listener_delete_all(atm_value_t atm_value)
        while(!queue_empty(&atm_value->listeners)) {
                queue_remove_first(&atm_value->listeners, next, atm_link_object_t, listeners_element);
 
-               /* Drops the reference on the link object */
+               /* Deallocate the link object */
                atm_link_dealloc(next);
        }
 }
@@ -984,18 +1127,17 @@ atm_listener_delete_all(atm_value_t atm_value)
  * Routine: atm_listener_delete
  * Purpose: Deletes a listerner for an atm value.
  * Returns: KERN_SUCCESS on successful unregister.
- *          KERN_INVALID_VALUE on finding a different mailbox.
+ *          KERN_INVALID_VALUE on finding a different guard.
  *          KERN_FAILURE on failure.
  */
 static kern_return_t
 atm_listener_delete(
        atm_value_t atm_value,
        atm_task_descriptor_t task_descriptor,
-       mailbox_offset_t mailbox_offset)
+       atm_guard_t guard)
 {
        queue_head_t free_listeners;
        atm_link_object_t next, elem;
-       void *mailbox = (void *)((char *)task_descriptor->mailbox_kernel_addr + mailbox_offset);
        kern_return_t kr = KERN_FAILURE;
 
        queue_init(&free_listeners);
@@ -1008,22 +1150,24 @@ atm_listener_delete(
                next = (atm_link_object_t)(void *) queue_next(&next->listeners_element);
 
                if (elem->descriptor == task_descriptor) {
-                       if (elem->mailbox == mailbox) {
+                       if (elem->guard == guard) {
                                KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_UNREGISTER_INFO,
                                        (ATM_VALUE_UNREGISTERED))) | DBG_FUNC_NONE,
-                                       VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, mailbox_offset, 0, 0);
-                               queue_remove(&atm_value->listeners, elem, atm_link_object_t, listeners_element);
-                               queue_enter(&free_listeners, elem, atm_link_object_t, listeners_element);
-                               atm_value->listener_count--;
+                                       VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, guard, elem->reference_count, 0);
+                               elem->guard = 0;
                                kr = KERN_SUCCESS;
-                               break;
                        } else {
                                KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_UNREGISTER_INFO,
                                        (ATM_VALUE_DIFF_MAILBOX))) | DBG_FUNC_NONE,
-                                       VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, 0, 0, 0);
+                                       VM_KERNEL_ADDRPERM(atm_value), atm_value->aid, elem->guard, elem->reference_count, 0);
                                kr = KERN_INVALID_VALUE;
-                               break;
                        }
+                       if (0 == atm_link_object_release_internal(elem)) {
+                               queue_remove(&atm_value->listeners, elem, atm_link_object_t, listeners_element);
+                               queue_enter(&free_listeners, elem, atm_link_object_t, listeners_element);
+                               atm_listener_count_decr_internal(atm_value);
+                       }
+                       break;
                }
        }
        lck_mtx_unlock(&atm_value->listener_lock);
@@ -1031,7 +1175,7 @@ atm_listener_delete(
        while(!queue_empty(&free_listeners)) {
                queue_remove_first(&free_listeners, next, atm_link_object_t, listeners_element);
        
-               /* Drops the reference on the link object */
+               /* Deallocate the link object */
                atm_link_dealloc(next);
        }
        return kr;
@@ -1048,8 +1192,6 @@ static atm_task_descriptor_t
 atm_task_descriptor_alloc_init(
        mach_port_t             trace_buffer,
        uint64_t                buffer_size,
-       void *                  mailbox_addr,
-       uint64_t                mailbox_array_size,
        task_t                  __assert_only task)
 {
        atm_task_descriptor_t new_task_descriptor;
@@ -1058,8 +1200,6 @@ atm_task_descriptor_alloc_init(
 
        new_task_descriptor->trace_buffer = trace_buffer;
        new_task_descriptor->trace_buffer_size = buffer_size;
-       new_task_descriptor->mailbox_array_size = mailbox_array_size;
-       new_task_descriptor->mailbox_kernel_addr = mailbox_addr;
        new_task_descriptor->reference_count = 1;
        new_task_descriptor->flags = 0;
        lck_mtx_init(&new_task_descriptor->lock, &atm_lock_grp, &atm_lock_attr);
@@ -1083,9 +1223,7 @@ atm_task_descriptor_alloc_init(
 static void
 atm_descriptor_get_reference(atm_task_descriptor_t task_descriptor)
 {
-       lck_mtx_lock(&task_descriptor->lock);
-       task_descriptor->reference_count++;
-       lck_mtx_unlock(&task_descriptor->lock);
+       atm_task_desc_reference_internal(task_descriptor);
 }
 
 
@@ -1097,26 +1235,19 @@ atm_descriptor_get_reference(atm_task_descriptor_t task_descriptor)
 static void
 atm_task_descriptor_dealloc(atm_task_descriptor_t task_descriptor)
 {
-       lck_mtx_lock(&task_descriptor->lock);
-       task_descriptor->reference_count--;
-       assert(task_descriptor->reference_count >= 0);
-       if (task_descriptor->reference_count > 0) {
-               lck_mtx_unlock(&task_descriptor->lock);
+       if (0 < atm_task_desc_release_internal(task_descriptor)) {
                return;
        }
 
+       assert(task_descriptor->reference_count == 0);
+
 #if DEVELOPMENT || DEBUG
        lck_mtx_lock(&atm_descriptors_list_lock);
        queue_remove(&atm_descriptors_list, task_descriptor, atm_task_descriptor_t, descriptor_elt);
        lck_mtx_unlock(&atm_descriptors_list_lock);
 #endif
-       mach_vm_deallocate(kernel_map, (mach_vm_address_t)task_descriptor->mailbox_kernel_addr,
-               task_descriptor->mailbox_array_size);
-       task_descriptor->mailbox_kernel_addr = NULL;
-       task_descriptor->mailbox_array_size = 0;
        /* release the send right for the named memory entry */
        ipc_port_release_send(task_descriptor->trace_buffer);
-       lck_mtx_unlock(&task_descriptor->lock);
        lck_mtx_destroy(&task_descriptor->lock, &atm_lock_grp);
        zfree(atm_descriptors_zone, task_descriptor);
        return;
@@ -1143,12 +1274,6 @@ atm_link_get_reference(atm_link_object_t link_object)
 static void
 atm_link_dealloc(atm_link_object_t link_object)
 {
-       if (0 < atm_link_object_release_internal(link_object)) {
-               return;
-       }
-
-       assert(link_object->reference_count == 0);
-
        /* Drop the reference on atm task descriptor. */
        atm_task_descriptor_dealloc(link_object->descriptor);
        zfree(atm_link_objects_zone, link_object);
@@ -1165,15 +1290,13 @@ kern_return_t
 atm_register_trace_memory(
        task_t                  task,
        uint64_t                trace_buffer_address,
-       uint64_t                buffer_size,
-       uint64_t                mailbox_array_size)
+       uint64_t                buffer_size)
 {
        atm_task_descriptor_t task_descriptor;
        mach_port_t trace_buffer = MACH_PORT_NULL;
-       mach_vm_offset_t mailbox_kernel_ptr = 0;
        kern_return_t kr = KERN_SUCCESS;
 
-       if (disable_atm)
+       if (disable_atm || (atm_get_diagnostic_config() & ATM_TRACE_DISABLE))
                return KERN_NOT_SUPPORTED;
 
        if (task != current_task())
@@ -1183,11 +1306,7 @@ atm_register_trace_memory(
            || (void *)trace_buffer_address == NULL
            || buffer_size == 0
            || (buffer_size & PAGE_MASK) != 0
-           || buffer_size > MAX_TRACE_BUFFER_SIZE
-           || mailbox_array_size == 0
-           || mailbox_array_size >= buffer_size
-           || mailbox_array_size > MAX_MAILBOX_SIZE
-           || mailbox_array_size & PAGE_MIN_MASK) {
+           || buffer_size > MAX_TRACE_BUFFER_SIZE) {
                return KERN_INVALID_ARGUMENT;
        }
 
@@ -1202,28 +1321,9 @@ atm_register_trace_memory(
        if (kr != KERN_SUCCESS)
                return kr;
 
-       kr = mach_vm_map(kernel_map,
-                                &mailbox_kernel_ptr,
-                                mailbox_array_size,
-                                0,
-                                VM_FLAGS_ANYWHERE,
-                                trace_buffer,
-                                0,
-                                FALSE,
-                                VM_PROT_READ,
-                                VM_PROT_READ,
-                                VM_INHERIT_NONE
-                                );
-
-       if (kr != KERN_SUCCESS){
-               ipc_port_release_send(trace_buffer);
-               return kr;
-       }
-
-       task_descriptor = atm_task_descriptor_alloc_init(trace_buffer, buffer_size, (void *)mailbox_kernel_ptr, mailbox_array_size, task);
+       task_descriptor = atm_task_descriptor_alloc_init(trace_buffer, buffer_size, task);
        if (task_descriptor == ATM_TASK_DESCRIPTOR_NULL) {
                ipc_port_release_send(trace_buffer);
-               mach_vm_deallocate(kernel_map, (mach_vm_address_t)mailbox_kernel_ptr, mailbox_array_size);
                return KERN_NO_SPACE;
        }
 
@@ -1243,156 +1343,35 @@ atm_register_trace_memory(
        return KERN_SUCCESS;
 }
 
-
 /*
- * Routine: atm_get_min_sub_aid_array
- * Purpose: For an array of aid, lookup the atm value and fill the minimum subaid.
- * Returns: None.
+ * Routine: atm_set_diagnostic_config
+ * Purpose: Set global atm_diagnostic_config and update the commpage to reflect
+ *          the new value.
+ * Returns: Error if ATM is disabled.
  */
-static void
-atm_get_min_sub_aid_array(
-       aid_t                           *aid_array,
-       mach_atm_subaid_t       *subaid_array,
-       uint32_t                        count)
+extern uint32_t atm_diagnostic_config; /* Proxied to commpage for fast user access */
+kern_return_t
+atm_set_diagnostic_config(uint32_t diagnostic_config)
 {
-       atm_value_t atm_value;
-       uint32_t i;
-
-       KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_CALLED))) | DBG_FUNC_START,
-                       0, 0, 0, 0, 0);
-
-       for (i = 0; i < count; i++) {
-               atm_value = get_atm_value_from_aid(aid_array[i]);
-               if (atm_value == ATM_VALUE_NULL) {
-                       subaid_array[i] = ATM_SUBAID32_MAX;
-                       continue;
-               }
-               subaid_array[i] = atm_get_min_sub_aid(atm_value);
-               atm_value_dealloc(atm_value);
-       }
+       if (disable_atm)
+               return KERN_NOT_SUPPORTED;
 
-       KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_CALLED))) | DBG_FUNC_END,
-                       count, 0, 0, 0, 0);
+       atm_diagnostic_config = diagnostic_config;
+       commpage_update_atm_diagnostic_config(atm_diagnostic_config);
 
+       return KERN_SUCCESS;
 }
 
 
 /*
- * Routine: atm_get_min_sub_aid
- * Purpose: Walk the list of listeners and get the min sub-aid for an activity id.
- * Returns: Minimum sub-aid to keep.
- * Note: Unlock the listener lock before accessing the mailbox, since it may page fault and
- *       might take long time. Also cleans the listeners list for the tasks which are dead 
- *       and atm_task_descriptors do not hold any useful data.
+ * Routine: atm_get_diagnostic_config
+ * Purpose: Get global atm_diagnostic_config.
+ * Returns: Diagnostic value
  */
-static mach_atm_subaid_t
-atm_get_min_sub_aid(atm_value_t atm_value)
+uint32_t
+atm_get_diagnostic_config(void)
 {
-       int32_t i = 0, j, freed_count = 0, dead_but_not_freed = 0;
-       int32_t listener_count;
-       atm_subaid32_t min_subaid = ATM_SUBAID32_MAX, subaid, max_subaid;
-       atm_link_object_t *link_object_array = NULL;
-       atm_link_object_t next, elem;
-       queue_head_t free_listeners;
-
-       KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_LINK_LIST))) | DBG_FUNC_START,
-                       0, 0, 0, 0, 0);
-
-       lck_mtx_lock(&atm_value->listener_lock);
-       listener_count = atm_value->listener_count;
-       lck_mtx_unlock(&atm_value->listener_lock);
-
-       /* separate memory access from locked iterate since memory read may fault */
-       link_object_array = (atm_link_object_t *) kalloc(sizeof(atm_link_object_t) * listener_count);
-       if (link_object_array == NULL) {
-               return 0;
-       }
-
-       /* Iterate the list and take a ref on link objects and store it in an array */ 
-       lck_mtx_lock(&atm_value->listener_lock);
-       queue_iterate(&atm_value->listeners, next, atm_link_object_t, listeners_element) {
-               /* Additional listener are added between the allocation of array and iterating the list */
-               if (i >= listener_count)
-                       break;
-
-               /* Get a ref on the link object */
-               atm_link_get_reference(next);
-               link_object_array[i] = (atm_link_object_t)next;
-               i++;
-       }
-       lck_mtx_unlock(&atm_value->listener_lock);
-       j = i;
-
-       /* Iterate the array to find the min */
-       for (i = 0; i < j; i++) {
-               /* Ignore the min value of the dead processes. */
-               if (link_object_array[i]->descriptor->flags == ATM_TASK_DEAD)
-                       continue;
-               /* Dereference the mailbox to get the min subaid */
-               subaid = *((atm_subaid32_t *)link_object_array[i]->mailbox);
-               if (subaid < min_subaid)
-                       min_subaid = subaid;
-       }
-
-       /*
-        * Mark the link object that can be freed, and release the ref on the link object
-        * Mark the link object of dead task free after the dead task descriptor count
-        * increases than ATM_LIST_DEAD_MAX.
-        */
-       for (i = j - 1; i >= 0; i--) {
-               if (link_object_array[i]->descriptor->flags == ATM_TASK_DEAD) {
-                       if (dead_but_not_freed > ATM_LIST_DEAD_MAX) {
-                               link_object_array[i]->flags = ATM_LINK_REMOVE;
-                               freed_count++;
-                       } else {
-                               max_subaid = *(((atm_subaid32_t *)link_object_array[i]->mailbox) + 1);
-                               if (max_subaid < min_subaid) {
-                                       link_object_array[i]->flags = ATM_LINK_REMOVE;
-                                       freed_count++;
-                               } else {
-                                       dead_but_not_freed++;
-                               }
-                       }
-               }
-               atm_link_dealloc(link_object_array[i]);
-               link_object_array[i] = NULL;
-       }
-
-       /* Check if the number of live entries in list is less than maxproc */
-       assert((j - (freed_count + dead_but_not_freed)) <= maxproc);
-
-       kfree(link_object_array, (sizeof(atm_link_object_t) * listener_count));
-
-       /* Remove the marked link objects from the list */
-       lck_mtx_lock(&atm_value->listener_lock);
-       
-       queue_init(&free_listeners);
-       next = (atm_link_object_t)(void *) queue_first(&atm_value->listeners);
-       while (!queue_end(&atm_value->listeners, (queue_entry_t)next)) {
-               elem = next;
-               next = (atm_link_object_t)(void *) queue_next(&next->listeners_element);
-
-               if (elem->flags == ATM_LINK_REMOVE) {
-                       queue_remove(&atm_value->listeners, elem, atm_link_object_t, listeners_element);
-                       queue_enter(&free_listeners, elem, atm_link_object_t, listeners_element);
-                       atm_value->listener_count--;
-               }
-       }
-       lck_mtx_unlock(&atm_value->listener_lock);
-
-       /* Free the link objects */
-       while(!queue_empty(&free_listeners)) {
-               queue_remove_first(&free_listeners, next, atm_link_object_t, listeners_element);
-       
-               /* Drops the reference on the link object */
-               atm_link_dealloc(next);
-       }
-       
-       KERNEL_DEBUG_CONSTANT((ATM_CODE(ATM_SUBAID_INFO, (ATM_MIN_LINK_LIST))) | DBG_FUNC_END,
-                       j, freed_count, dead_but_not_freed, 0, 0);
-
-       /* explicitly upgrade uint32_t to 64 bit mach size */
-       return CAST_DOWN(mach_atm_subaid_t, min_subaid);
+       return atm_diagnostic_config;
 }
 
 
@@ -1400,26 +1379,48 @@ atm_get_min_sub_aid(atm_value_t atm_value)
  * Routine: atm_value_unregister
  * Purpose: Unregisters a process from an activity id.
  * Returns: KERN_SUCCESS on successful unregister.
- *          KERN_INVALID_VALUE on finding a diff mailbox.
+ *          KERN_INVALID_VALUE on finding a diff guard.
  *          KERN_FAILURE on failure.
  */
 static kern_return_t
 atm_value_unregister(
        atm_value_t atm_value,
        atm_task_descriptor_t task_descriptor,
-       mailbox_offset_t mailbox_offset)
+       atm_guard_t guard)
 {
        kern_return_t kr;
 
        if (task_descriptor == ATM_TASK_DESCRIPTOR_NULL)
-               return KERN_INVALID_ARGUMENT;
-       if (mailbox_offset > task_descriptor->mailbox_array_size)
-               return KERN_INVALID_ARGUMENT;
+               return KERN_INVALID_TASK;
        
-       kr = atm_listener_delete(atm_value, task_descriptor, mailbox_offset);
+       kr = atm_listener_delete(atm_value, task_descriptor, guard);
        return kr;
 }
 
+
+/*
+ * Routine: atm_value_register
+ * Purpose: Registers a process for an activity id.
+ * Returns: KERN_SUCCESS on successful register.
+ *          KERN_INVALID_TASK on finding a null task atm context.
+ *          KERN_FAILURE on failure.
+ */
+static kern_return_t
+atm_value_register(
+       atm_value_t atm_value,
+       atm_task_descriptor_t task_descriptor,
+       atm_guard_t guard)
+{
+       kern_return_t kr;
+
+       if (task_descriptor == ATM_TASK_DESCRIPTOR_NULL)
+               return KERN_INVALID_TASK;
+
+       kr = atm_listener_insert(atm_value, task_descriptor, guard);
+       return kr;
+}
+
+
 void
 atm_task_descriptor_destroy(atm_task_descriptor_t task_descriptor)
 {
index 6fbc32b657a6ccaa69be11ee4ae25796247ab02c..73d2e0c0d60d9c9b18c1b163fb862ba6929f0af7 100644 (file)
 typedef mach_voucher_attr_value_handle_t atm_voucher_id_t;
 
 struct atm_task_descriptor {
-       decl_lck_mtx_data(,lock)             /* lock to protect reference count */
-       mach_port_t     trace_buffer;            /* named memory entry registered by user */
-       uint64_t        trace_buffer_size;   /* size of the trace_buffer registered */
-       uint64_t        mailbox_array_size;      /* Mailbox array size in bytes. */
-       void *          mailbox_kernel_addr; /* Kernel address where the mailbox is mapped. */
-       uint32_t        reference_count:31,
-                       flags:1;
+       decl_lck_mtx_data(,lock)                /* lock to protect reference count */
+       mach_port_t     trace_buffer;           /* named memory entry registered by user */
+       uint64_t        trace_buffer_size;      /* size of the trace_buffer registered */
+       uint32_t        reference_count;
+       uint8_t         flags;
 #if DEVELOPMENT || DEBUG
-       task_t          task;           /* task pointer for debugging purposes */
-       queue_chain_t   descriptor_elt; /* global chain of all descriptors */
+       task_t          task;                   /* task pointer for debugging purposes */
+       queue_chain_t   descriptor_elt;         /* global chain of all descriptors */
 #endif
 };
 
+#define atm_task_desc_reference_internal(elem) \
+       (hw_atomic_add(&(elem)->reference_count, 1))
+
+#define atm_task_desc_release_internal(elem)   \
+       (hw_atomic_sub(&(elem)->reference_count, 1))
+
 typedef struct atm_task_descriptor *atm_task_descriptor_t;
 #define ATM_TASK_DESCRIPTOR_NULL NULL
 
 struct atm_value {
-       aid_t            aid;                   /* activity id */
-       queue_head_t     listeners;             /* List of listeners who register for this activity */
-       decl_lck_mtx_data( ,listener_lock)      /* Lock to protect listener list */
-       queue_chain_t    vid_hash_elt;          /* Next hash element in the global hash table */
+       aid_t            aid;                   /* activity id */
+       queue_head_t     listeners;             /* List of listeners who register for this activity */
+       decl_lck_mtx_data( ,listener_lock)      /* Lock to protect listener list */
+       queue_chain_t    vid_hash_elt;          /* Next hash element in the global hash table */
 #if DEVELOPMENT || DEBUG
-       queue_chain_t    value_elt;             /* global chain of all values */
+       queue_chain_t    value_elt;             /* global chain of all values */
 #endif
-       uint32_t         sync;                  /* Made ref count given to voucher sub system. */
-       uint32_t         listener_count;        /* Number of Listerners listening on the value. */
-       int32_t          reference_count;       /* use count on the atm value, 1 taken by the global hash table */      
+       uint32_t         sync;                  /* Made ref count given to voucher sub system. */
+       uint32_t         listener_count;        /* Number of Listerners listening on the value. */
+       uint32_t         reference_count;       /* use count on the atm value, 1 taken by the global hash table */      
 };
 
+#define atm_value_reference_internal(elem)     \
+       (hw_atomic_add(&(elem)->reference_count, 1))
+
+#define atm_value_release_internal(elem)       \
+       (hw_atomic_sub(&(elem)->reference_count, 1))
+
+#define atm_listener_count_incr_internal(elem) \
+       (hw_atomic_add(&(elem)->listener_count, 1))
+
+#define atm_listener_count_decr_internal(elem) \
+       (hw_atomic_sub(&(elem)->listener_count, 1))
+
+#define atm_sync_reference_internal(elem)      \
+       (hw_atomic_add(&(elem)->sync, 1))
+
 typedef struct atm_value *atm_value_t;
 #define ATM_VALUE_NULL NULL
 
@@ -86,10 +105,9 @@ typedef struct atm_value *atm_value_t;
 
 struct atm_link_object {
        atm_task_descriptor_t  descriptor;
-       void *                 mailbox;              /* Offset in the mailbox registered by the user for an activity. */
-       uint32_t                reference_count;     /* Refernece count for link object */
-       uint8_t                flags;                /* Flags used mark for deletion from the listener list */
        queue_chain_t          listeners_element;    /* Head is atm_value->listeners. */
+       atm_guard_t            guard;                /* Guard registered by the user for an activity. */
+       uint32_t               reference_count;      /* Refernece count for link object */
 };
 
 typedef struct atm_link_object *atm_link_object_t;
@@ -109,9 +127,12 @@ typedef struct atm_value_hash *atm_value_hash_t;
 
 void atm_init(void);
 void atm_task_descriptor_destroy(atm_task_descriptor_t task_descriptor);
-kern_return_t atm_register_trace_memory(task_t task, uint64_t trace_buffer_address, uint64_t buffer_size, uint64_t mailbox_array_size);
+kern_return_t atm_register_trace_memory(task_t task, uint64_t trace_buffer_address, uint64_t buffer_size);
 kern_return_t atm_send_proc_inspect_notification(task_t task, int32_t traced_pid, uint64_t traced_uniqueid);
 
+kern_return_t atm_set_diagnostic_config(uint32_t);
+uint32_t atm_get_diagnostic_config(void);
+
 #endif /* MACH_KERNEL_PRIVATE */
 
 #endif /* _ATM_ATM_INTERNAL_H_ */
index 2bd03c0fe789c334665db88f35b9482af33dd465..afdd1ce37467fce578136d50dec1da1310ab5245 100644 (file)
@@ -42,9 +42,13 @@ typedef uint32_t atm_action_t;
 #define ATM_ACTION_LOGFAIL     0x3
 #define ATM_FIND_MIN_SUB_AID   0x4
 #define ATM_ACTION_UNREGISTER  0x5
+#define ATM_ACTION_REGISTER     0x6
+#define ATM_ACTION_GETSUBAID    0x7
 
-/* Deprecated. will be removed soon */
+typedef uint64_t atm_guard_t;
 typedef uint64_t aid_t;
+
+/* Deprecated. will be removed soon */
 typedef uint64_t subaid_t;
 typedef uint64_t mailbox_offset_t;
 #define SUB_AID_MAX (UINT64_MAX)
@@ -60,5 +64,6 @@ typedef atm_memory_descriptor_t *atm_memory_descriptor_array_t;
 typedef uint64_t *atm_memory_size_array_t;
 
 #define ATM_SUBAID32_MAX       (UINT32_MAX)
+#define ATM_TRACE_DISABLE       (0x100)
 
 #endif /* _ATM_ATM_TYPES_H_ */
index e1fe605985dff9beb435cec826c75b9f1a2409ce..03e862e9332db450e79dbfb67e42a2a6b8102f4c 100644 (file)
@@ -641,7 +641,7 @@ get_bank_task_context(task_t task)
        }
        /* We won the race. Take a ref on the ledger and initialize bank task. */
        bank_task->bt_creditcard = task->ledger;
-       bank_task->bt_pid = audit_token_pid_from_task(task);
+       bank_task->bt_pid = task_pid(task);
 #if DEVELOPMENT || DEBUG
        bank_task->bt_task = task;
 #endif
@@ -763,6 +763,12 @@ bank_rollup_chit_to_tasks(
                return;
        }
 
+#if DEVELOPMENT || DEBUG
+       if (debit != 0) {
+               panic("bank_rollup: debit: %lld non zero\n", debit);
+       }
+#endif
+
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (BANK_CODE(BANK_ACCOUNT_INFO, (BANK_SETTLE_CPU_TIME))) | DBG_FUNC_NONE,
                        bank_merchant->bt_pid, bank_holder->bt_pid, credit, debit, 0);
 #if CONFIG_BANK
@@ -822,6 +828,7 @@ bank_billed_time(bank_task_t bank_task)
 #ifdef CONFIG_BANK
        bank_account_t bank_account;
        int64_t temp = 0;
+       kern_return_t kr;
 #endif
        if (bank_task == BANK_TASK_NULL) {
                return balance;
@@ -830,13 +837,27 @@ bank_billed_time(bank_task_t bank_task)
 #ifdef CONFIG_BANK
        lck_mtx_lock(&bank_task->bt_acc_to_pay_lock);
 
-       ledger_get_balance(bank_task->bt_creditcard, task_ledgers.cpu_time_billed_to_me, &temp);
-       balance +=temp;
+       kr = ledger_get_balance(bank_task->bt_creditcard, task_ledgers.cpu_time_billed_to_me, &temp);
+       if (kr == KERN_SUCCESS && temp >= 0) {
+               balance += temp;
+       }
+#if DEVELOPMENT || DEBUG
+       else {
+               printf("bank_bill_time: ledger_get_balance failed or negative balance in ledger: %lld\n", temp);
+       }
+#endif /* DEVELOPMENT || DEBUG */
 
        queue_iterate(&bank_task->bt_accounts_to_pay, bank_account, bank_account_t, ba_next_acc_to_pay) {
                temp = 0;
-               ledger_get_balance(bank_account->ba_bill, bank_ledgers.cpu_time, &temp);
-               balance += temp;
+               kr = ledger_get_balance(bank_account->ba_bill, bank_ledgers.cpu_time, &temp);
+               if (kr == KERN_SUCCESS && temp >= 0) {
+                       balance += temp;
+               }
+#if DEVELOPMENT || DEBUG
+               else {
+                       printf("bank_bill_time: ledger_get_balance failed or negative balance in ledger: %lld\n", temp);
+               }
+#endif /* DEVELOPMENT || DEBUG */
        }
        lck_mtx_unlock(&bank_task->bt_acc_to_pay_lock);
 #endif
@@ -855,6 +876,7 @@ bank_serviced_time(bank_task_t bank_task)
 #ifdef CONFIG_BANK
        bank_account_t bank_account;
        int64_t temp = 0;
+       kern_return_t kr;
 #endif
        if (bank_task == BANK_TASK_NULL) {
                return balance;
@@ -863,13 +885,27 @@ bank_serviced_time(bank_task_t bank_task)
 #ifdef CONFIG_BANK
        lck_mtx_lock(&bank_task->bt_acc_to_charge_lock);
 
-       ledger_get_balance(bank_task->bt_creditcard, task_ledgers.cpu_time_billed_to_others, &temp);
-       balance +=temp;
+       kr = ledger_get_balance(bank_task->bt_creditcard, task_ledgers.cpu_time_billed_to_others, &temp);
+       if (kr == KERN_SUCCESS && temp >= 0) {
+               balance += temp;
+       }
+#if DEVELOPMENT || DEBUG
+       else {
+               printf("bank_serviced_time: ledger_get_balance failed or negative balance in ledger: %lld\n", temp);
+       }
+#endif /* DEVELOPMENT || DEBUG */
 
        queue_iterate(&bank_task->bt_accounts_to_charge, bank_account, bank_account_t, ba_next_acc_to_charge) {
                temp = 0;
-               ledger_get_balance(bank_account->ba_bill, bank_ledgers.cpu_time, &temp);
-               balance += temp;
+               kr = ledger_get_balance(bank_account->ba_bill, bank_ledgers.cpu_time, &temp);
+               if (kr == KERN_SUCCESS && temp >= 0) {
+                       balance += temp;
+               }
+#if DEVELOPMENT || DEBUG
+               else {
+                       printf("bank_serviced_time: ledger_get_balance failed or negative balance in ledger: %lld\n", temp);
+               }
+#endif /* DEVELOPMENT || DEBUG */
        }
        lck_mtx_unlock(&bank_task->bt_acc_to_charge_lock);
 #endif
index 1a827aadd46f237008e197fe230251b3b7ac92b6..ca78a961420d228c007444ecafe7d793349238ae 100644 (file)
@@ -185,148 +185,6 @@ chudxnu_is_64bit_task(task_t task)
        return (task_has_64BitAddr(task));
 }
 
-#define THING_TASK             0
-#define THING_THREAD   1
-
-// an exact copy of processor_set_things() except no mig conversion at the end!
-static kern_return_t
-chudxnu_private_processor_set_things(
-       processor_set_t         pset,
-       mach_port_t             **thing_list,
-       mach_msg_type_number_t  *count,
-       int                     type)
-{
-       unsigned int actual;    /* this many things */
-       unsigned int maxthings;
-       unsigned int i;
-
-       vm_size_t size, size_needed;
-       void  *addr;
-
-       if (pset == PROCESSOR_SET_NULL || pset != &pset0)
-               return (KERN_INVALID_ARGUMENT);
-
-       size = 0; addr = NULL;
-
-       for (;;) {
-               lck_mtx_lock(&tasks_threads_lock);
-
-               if (type == THING_TASK)
-                       maxthings = tasks_count;
-               else
-                       maxthings = threads_count;
-
-               /* do we have the memory we need? */
-
-               size_needed = maxthings * sizeof (mach_port_t);
-               if (size_needed <= size)
-                       break;
-
-               lck_mtx_unlock(&tasks_threads_lock);
-
-               if (size != 0)
-                       kfree(addr, size);
-
-               assert(size_needed > 0);
-               size = size_needed;
-
-               addr = kalloc(size);
-               if (addr == 0)
-                       return (KERN_RESOURCE_SHORTAGE);
-       }
-
-       /* OK, have memory and the processor_set is locked & active */
-
-       actual = 0;
-       switch (type) {
-
-       case THING_TASK:
-       {
-               task_t          task, *task_list = (task_t *)addr;
-
-               for (task = (task_t)queue_first(&tasks);
-                               !queue_end(&tasks, (queue_entry_t)task);
-                                       task = (task_t)queue_next(&task->tasks)) {
-                       task_reference_internal(task);
-                       task_list[actual++] = task;
-               }
-
-               break;
-       }
-
-       case THING_THREAD:
-       {
-               thread_t        thread, *thread_list = (thread_t *)addr;
-
-               for (i = 0, thread = (thread_t)queue_first(&threads);
-                               !queue_end(&threads, (queue_entry_t)thread);
-                                       thread = (thread_t)queue_next(&thread->threads)) {
-                       thread_reference_internal(thread);
-                       thread_list[actual++] = thread;
-               }
-
-               break;
-       }
-       }
-               
-       lck_mtx_unlock(&tasks_threads_lock);
-
-       if (actual < maxthings)
-               size_needed = actual * sizeof (mach_port_t);
-
-       if (actual == 0) {
-               /* no things, so return null pointer and deallocate memory */
-               *thing_list = NULL;
-               *count = 0;
-
-               if (size != 0)
-                       kfree(addr, size);
-       }
-       else {
-               /* if we allocated too much, must copy */
-
-               if (size_needed < size) {
-                       void *newaddr;
-
-                       newaddr = kalloc(size_needed);
-                       if (newaddr == 0) {
-                               switch (type) {
-
-                               case THING_TASK:
-                               {
-                                       task_t          *task_list = (task_t *)addr;
-
-                                       for (i = 0; i < actual; i++)
-                                               task_deallocate(task_list[i]);
-                                       break;
-                               }
-
-                               case THING_THREAD:
-                               {
-                                       thread_t        *thread_list = (thread_t *)addr;
-
-                                       for (i = 0; i < actual; i++)
-                                               thread_deallocate(thread_list[i]);
-                                       break;
-                               }
-                               }
-
-                               kfree(addr, size);
-                               return (KERN_RESOURCE_SHORTAGE);
-                       }
-
-                       bcopy((void *) addr, (void *) newaddr, size_needed);
-                       kfree(addr, size);
-                       addr = newaddr;
-               }
-
-               *thing_list = (mach_port_t *)addr;
-               *count = actual;
-       }
-
-       return (KERN_SUCCESS);
-}
-
 // an exact copy of task_threads() except no mig conversion at the end!
 static kern_return_t
 chudxnu_private_task_threads(
@@ -438,7 +296,7 @@ chudxnu_all_tasks(
        task_array_t            *task_list,
        mach_msg_type_number_t  *count)
 {
-       return chudxnu_private_processor_set_things(&pset0, (mach_port_t **)task_list, count, THING_TASK);      
+       return processor_set_things(&pset0, (void **)task_list, count, PSET_THING_TASK);        
 }
 
 __private_extern__ kern_return_t
@@ -467,7 +325,7 @@ chudxnu_all_threads(
        thread_array_t          *thread_list,
        mach_msg_type_number_t  *count)
 {
-       return chudxnu_private_processor_set_things(&pset0, (mach_port_t **)thread_list, count, THING_THREAD);
+       return processor_set_things(&pset0, (void **)thread_list, count, PSET_THING_THREAD);
 }
 
 __private_extern__ kern_return_t
index a958aa754e58b0f8978c0147507cc975e5140e67..ba2a18786358b20c6dd540d49624439cb9dbd9c7 100644 (file)
@@ -656,8 +656,8 @@ kern_return_t chudxnu_thread_get_callstack64_internal(
                uint64_t rsp = 0ULL;
 
                // backtrace the 64bit side.
-               kr = do_backtrace64(task, thread, regs64, callstack, &bufferIndex, 
-                       bufferMaxIndex, TRUE);
+               kr = do_backtrace64(task, thread, regs64, callstack, &bufferIndex,
+                                   bufferMaxIndex - 1, TRUE);
 
                if(KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t) regs64->isf.rsp, sizeof(uint64_t)) && 
                        bufferIndex < bufferMaxIndex) {
@@ -668,8 +668,8 @@ kern_return_t chudxnu_thread_get_callstack64_internal(
                uint32_t esp = 0UL;
 
                // backtrace the 32bit side.
-               kr = do_backtrace32(task, thread, regs32, callstack, &bufferIndex, 
-                       bufferMaxIndex, TRUE);
+               kr = do_backtrace32(task, thread, regs32, callstack, &bufferIndex,
+                                   bufferMaxIndex - 1, TRUE);
                
                if(KERN_SUCCESS == chudxnu_kern_read(&esp, (vm_offset_t) regs32->uesp, sizeof(uint32_t)) && 
                        bufferIndex < bufferMaxIndex) {
@@ -679,8 +679,8 @@ kern_return_t chudxnu_thread_get_callstack64_internal(
                /* backtrace user land */
                uint64_t rsp = 0ULL;
                
-               kr = do_backtrace64(task, thread, u_regs64, callstack, &bufferIndex, 
-                       bufferMaxIndex, FALSE);
+               kr = do_backtrace64(task, thread, u_regs64, callstack, &bufferIndex,
+                                   bufferMaxIndex - 1, FALSE);
 
                if(KERN_SUCCESS == chudxnu_task_read(task, &rsp, (addr64_t) u_regs64->isf.rsp, sizeof(uint64_t)) && 
                        bufferIndex < bufferMaxIndex) {
@@ -690,8 +690,8 @@ kern_return_t chudxnu_thread_get_callstack64_internal(
        } else if(u_regs32 && !kern_only) {
                uint32_t esp = 0UL;
                
-               kr = do_backtrace32(task, thread, u_regs32, callstack, &bufferIndex, 
-                       bufferMaxIndex, FALSE);
+               kr = do_backtrace32(task, thread, u_regs32, callstack, &bufferIndex,
+                                   bufferMaxIndex - 1, FALSE);
 
                if(KERN_SUCCESS == chudxnu_task_read(task, &esp, (addr64_t) u_regs32->uesp, sizeof(uint32_t)) && 
                        bufferIndex < bufferMaxIndex) {
index a1354f4694363f20f905e3ff1e9eea0174a6e9fb..f22798e237cf43f1aacef11c188d87b023f07423 100644 (file)
@@ -23,9 +23,8 @@ OBJS_NO_CAST_ALIGN =                  \
                atm_notification_user.o \
                model_dep.o             \
                chud_thread.o           \
-               chud_thread_arm.o   \
+               chud_thread_arm.o       \
                video_console.o         \
-               kern_stackshot.o                        \
                kdp_udp.o               \
                kdp_machdep.o           \
                host.o                  \
@@ -73,7 +72,14 @@ OBJS_NO_CAST_ALIGN =                 \
                cchmac_final.o          \
                cchmac_init.o           \
                ccsha1.o                \
-
+               dp_memory_object.o      \
+               ipc_object.o            \
+               ipc_kmsg.o              \
+               ipc_right.o             \
+               bsd_vm.o                \
+               vm_map_store.o          \
+               vm_map_store_ll.o       \
+               vm_map_store_rb.o
 
 # Objects that don't want -Wsign-compare warning (15294427)
 OBJS_NO_SIGN_COMPARE =                 \
@@ -146,13 +152,13 @@ $(SOBJS): .SFLAGS
 $(COMPONENT).filelist: $(OBJS)
        $(_v)for hib_file in ${HIB_FILES};              \
        do      \
-                $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \
-                mv $${hib_file}__ $${hib_file} ; \
+                $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} || exit 1; \
+                mv $${hib_file}__ $${hib_file} || exit 1; \
        done
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 do_all: $(COMPONENT).filelist
 
@@ -174,10 +180,10 @@ endif
 genassym.o: .CFLAGS $(firstword $(MAKEFILE_LIST))
 genassym.o: $(SOURCE_DIR)/$(COMPONENT)/$(GENASSYM_LOCATION)/genassym.c
        @echo GENASSYM $<
-       $(_v)${KCC} ${CFLAGS} ${CFLAGS_NOLTO_FLAG} -MD -S -o ${@} ${INCFLAGS} $<
+       $(_v)${GENASSYM_KCC} ${CFLAGS} ${CFLAGS_NOLTO_FLAG} -MD -S -o ${@} ${INCFLAGS} $<
 
 assym.s: genassym.o
-       $(_v)sed -e '/#DEFINITION#/!d' -e 's/^.*#DEFINITION#//' -e 's/\$$//' -e 'p' -e 's/#//2' -e 's/[^A-Za-z0-9_]*\([A-Za-z0-9_]*\)/ \1_NUM/2' genassym.o > $@
+       $(_v)sed -e '/^[[:space:]]*DEFINITION__define__/!d;{N;s/\n//;}' -e 's/^[[:space:]]*DEFINITION__define__\([^:]*\):.*ascii.*\"[\$$]*\([-0-9\#]*\)\".*$$/#define \1 \2/' -e 'p'  -e 's/#//2' -e 's/^[[:space:]]*#define \([A-Za-z0-9_]*\)[[:space:]]*[\$$#]*\([-0-9]*\).*$$/#define \1_NUM \2/' genassym.o > $@
 
 ${SOBJS}: assym.s
 
index d6313ca2eec2077f5101a9a50176236e2b3a03b3..37cd38840f02b300c56ce044527afc8b98cdebfe 100644 (file)
@@ -59,7 +59,6 @@ OPTIONS/importance_debug      optional importance_debug
 OPTIONS/config_ecc_logging     optional config_ecc_logging
 
 OPTIONS/config_dtrace          optional config_dtrace
-OPTIONS/config_counters                optional config_counters
 
 OPTIONS/no_kextd               optional no_kextd
 
@@ -98,6 +97,7 @@ osfmk/UserNotification/KUNCUserNotifications.c        standard
 osfmk/kdp/kdp.c                        optional config_kdp_interactive_debugging
 osfmk/kern/kern_stackshot.c    standard
 osfmk/kdp/kdp_udp.c                    optional mach_kdp
+osfmk/kdp/kdp_core.c                   optional mach_kdp
 osfmk/kdp/kdp_serial.c                 optional config_serial_kdp
 osfmk/ipc/ipc_entry.c                  standard
 osfmk/ipc/ipc_hash.c                   standard
@@ -155,6 +155,7 @@ osfmk/kern/sched_average.c          standard
 osfmk/kern/sched_dualq.c       optional config_sched_multiq
 osfmk/kern/sched_prim.c                standard
 osfmk/kern/sched_proto.c       optional config_sched_proto
+osfmk/kern/sched_traditional.c optional config_sched_traditional
 osfmk/kern/sched_grrr.c        optional config_sched_grrr_core
 osfmk/kern/sched_multiq.c      optional config_sched_multiq
 osfmk/kern/sfi.c                       standard
@@ -165,6 +166,7 @@ osfmk/kern/sync_sema.c              standard
 osfmk/kern/syscall_emulation.c standard
 osfmk/kern/syscall_subr.c              standard
 osfmk/kern/syscall_sw.c                standard
+osfmk/kern/sysdiagnose.c       optional config_sysdiagnose
 osfmk/kern/task.c                      standard
 osfmk/kern/task_policy.c       standard
 osfmk/kern/task_swap.c         standard
@@ -174,13 +176,12 @@ osfmk/kern/thread_call.c  standard
 osfmk/kern/thread_policy.c     standard
 osfmk/kern/timer.c                     standard
 osfmk/kern/timer_call.c                standard
-osfmk/kern/wait_queue.c                standard
+osfmk/kern/waitq.c                     standard
 osfmk/kern/xpr.c                       optional xpr_debug
 osfmk/kern/zalloc.c                    standard
 osfmk/kern/gzalloc.c           optional config_gzalloc
 osfmk/kern/bsd_kern.c          optional mach_bsd
 osfmk/kern/hibernate.c         optional hibernation
-osfmk/pmc/pmc.c                                standard 
 ./mach/clock_server.c                  standard
 ./mach/clock_priv_server.c             standard
 ./mach/clock_reply_user.c              standard
@@ -203,11 +204,14 @@ osfmk/pmc/pmc.c                           standard
 ./mach/upl_server.c                    standard
 ./mach/audit_triggers_user.c           standard
 ./mach/task_access_user.c              standard
+osfmk/corpses/corpse.c                 standard
+osfmk/kern/kern_cdata.c                        standard
 ./mach/telemetry_notification_user.c optional config_telemetry
 osfmk/bank/bank.c              optional config_bank
 osfmk/atm/atm.c                        optional config_atm
 ./atm/atm_notification_user.c          optional config_atm
 ./mach/coalition_notification_user.c   optional config_coalitions
+./mach/sysdiagnose_notification_user.c optional config_sysdiagnose
 #
 # For now, no external pagers
 #
@@ -279,6 +283,7 @@ osfmk/kperf/callstack.c                 optional kperf
 osfmk/kperf/pet.c                       optional kperf
 # osfmk/kperf/kperfbsd.c                    optional kperf # bsd/conf/files
 osfmk/kperf/threadinfo.c                optional kperf
+osfmk/kperf/meminfo.c                   optional kperf
 osfmk/kperf/timetrigger.c               optional kperf
 osfmk/kperf/kperf_kpc.c                 optional kperf
 osfmk/kern/kpc_thread.c                 optional kpc
@@ -289,6 +294,7 @@ osfmk/console/serial_general.c      standard
 osfmk/kern/telemetry.c                 optional config_telemetry
 
 # Built-in corecrypto for early_random():
+osfmk/corecrypto/cc/src/cc_clear.c                     standard
 osfmk/corecrypto/ccdbrg/src/ccdrbg_nisthmac.c          standard
 osfmk/corecrypto/ccdigest/src/ccdigest_init.c          standard
 osfmk/corecrypto/ccdigest/src/ccdigest_update.c                standard
index 79065790488f0f031649d1d87659ef5bde68482c..6b2389a45d25883ed29317a12517cfce43bac990 100644 (file)
@@ -120,6 +120,7 @@ osfmk/kern/hv_support.c                             optional hypervisor
 
 # Kernel performance monitoring
 osfmk/kperf/x86_64/kperf_mp.c   optional kperf
+osfmk/kperf/x86_64/kperf_meminfo.c  optional kperf
 osfmk/x86_64/kpc_x86.c              optional kpc
 
 osfmk/i386/startup64.c         standard
index 1758e7a5a7e0fcfb704ef5dad429a1b9b2a5001a..245a995fef671fa34f685120a410084ab0aa3d24 100644 (file)
@@ -96,7 +96,7 @@ console_init(void)
 
        console_ring.len = PAGE_SIZE;
        ret = kmem_alloc(kernel_map, (vm_offset_t *) &console_ring.buffer,
-                        console_ring.len);
+                        console_ring.len, VM_KERN_MEMORY_OSFMK);
        if (ret != KERN_SUCCESS)
                panic("console_ring_init() "
                      "failed to allocate ring buffer, error %d\n", ret);
@@ -115,7 +115,7 @@ console_cpu_alloc(__unused boolean_t boot_processor)
        console_buf_t   *cbp;
 
        ret = kmem_alloc(kernel_map, (vm_offset_t *) &cbp,
-                               sizeof(console_buf_t));
+                               sizeof(console_buf_t), VM_KERN_MEMORY_OSFMK);
        if (ret != KERN_SUCCESS) {
                printf("console_cpu_alloc() "
                      "failed to allocate cpu buffer, error=%d\n", ret);
index e696d5c308665156059073bbd94d59a02a48e7c6..4ba6ed72df7efe130ed1941e8c22e10739f14b94 100644 (file)
@@ -441,6 +441,10 @@ gc_enable( boolean_t enable )
                                        if ( buffer_colorcodes ) kfree( buffer_colorcodes, buffer_size );
                                        if ( buffer_tab_stops  ) kfree( buffer_tab_stops,  buffer_columns );
 
+                                       buffer_attributes = NULL;
+                                       buffer_characters = NULL;
+                                       buffer_colorcodes = NULL;
+                                       buffer_tab_stops  = NULL;
                                        buffer_columns = 0;
                                        buffer_rows    = 0;
                                        buffer_size    = 0;
@@ -1856,6 +1860,7 @@ static boolean_t          vc_needsave;
 static void *                  vc_saveunder;
 static vm_size_t               vc_saveunder_len;
 static int8_t                  vc_uiscale = 1;
+int                             vc_user_options;
 decl_simple_lock_data(,vc_progress_lock)
 
 static int                     vc_progress_withmeter = 3;
@@ -1863,6 +1868,8 @@ int                             vc_progressmeter_enable;
 static int                      vc_progressmeter_drawn;
 int                                    vc_progressmeter_value;
 static uint32_t                vc_progressmeter_count;
+static uint32_t                vc_progress_meter_start;
+static uint32_t                vc_progress_meter_end;
 static uint64_t                vc_progressmeter_interval;
 static uint64_t                vc_progressmeter_deadline;
 static thread_call_data_t      vc_progressmeter_call;
@@ -1870,6 +1877,7 @@ static void *                   vc_progressmeter_backbuffer;
 static boolean_t                vc_progressmeter_hold;
 static uint32_t                 vc_progressmeter_diskspeed = 256;
 
+
 enum {
     kSave          = 0x10,
     kDataIndexed   = 0x20,
@@ -1922,8 +1930,9 @@ static void vc_blit_rect(int x, int y, int bx,
                            void * backBuffer,
                            unsigned int flags)
 {
-    if(!vinfo.v_depth)
-        return;
+    if (!vinfo.v_depth)                                return;
+    if (((unsigned int)(x + width))  > vinfo.v_width)  return;
+    if (((unsigned int)(y + height)) > vinfo.v_height) return;
 
     switch( vinfo.v_depth) {
        case 8:
@@ -2165,9 +2174,8 @@ static void vc_blit_rect_30(int x, int y, int bx,
     {
         for( col = 0; col < width; col++)
        {
-           if (col < sourceRow)
-               data = *dataPtr++;
-
+           if (sourceRow) data = dataPtr[((sx + (col * a) + (line * b)) >> 16)
+                               + sourceRow * (((sy + (col * c) + (line * d)) >> 16))];
            if (backPtr) {
                if (kSave & flags) {
                    back = *(dst + col);
@@ -2535,6 +2543,17 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay)
 }
 
 
+static uint32_t vc_progressmeter_range(uint32_t pos)
+{
+    uint32_t ret;
+
+    if (pos > kProgressMeterEnd) pos = kProgressMeterEnd;
+    ret = vc_progress_meter_start 
+       + ((pos * (vc_progress_meter_end - vc_progress_meter_start)) / kProgressMeterEnd);
+
+    return (ret);
+}
+
 static void
 vc_progressmeter_task(__unused void *arg0, __unused void *arg)
 {
@@ -2546,7 +2565,7 @@ vc_progressmeter_task(__unused void *arg0, __unused void *arg)
     if (vc_progressmeter_enable)
     {
        uint32_t pos = (vc_progressmeter_count >> 13);
-       internal_set_progressmeter(pos);
+       internal_set_progressmeter(vc_progressmeter_range(pos));
        if (pos < kProgressMeterEnd)
        {
             static uint16_t incr[8] = { 10000, 10000, 8192, 4096, 2048, 384, 384, 64 };
@@ -2840,7 +2859,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 
                case kPEAcquireScreen:
                        if ( gc_acquired ) break;
-                       vc_progress_set( graphics_now, vc_acquire_delay );
+                       vc_progress_set( graphics_now, (kVCDarkReboot & vc_user_options) ? 120 : vc_acquire_delay );
                        gc_enable( !graphics_now );
                        gc_acquired = TRUE;
                        gc_desire_text = FALSE;
@@ -2907,7 +2926,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                    simple_lock(&vc_progress_lock);
 
                    vc_progressmeter_drawn = 0;
-                   internal_set_progressmeter(vc_progressmeter_count >> 13);
+                   internal_set_progressmeter(vc_progressmeter_range(vc_progressmeter_count >> 13));
 
                    simple_unlock(&vc_progress_lock);
                    splx(s);
@@ -2958,9 +2977,22 @@ vcattach(void)
 {
        vm_initialized = TRUE;
 
+        const boot_args * bootargs  = (typeof(bootargs)) PE_state.bootArgs;
+
        vc_progress_white = (0 != ((kBootArgsFlagBlackBg | kBootArgsFlagLoginUI) 
-                                         & ((boot_args *) PE_state.bootArgs)->flags));
+                                         & bootargs->flags));
        PE_parse_boot_argn("meter", &vc_progress_withmeter, sizeof(vc_progress_withmeter));
+
+       if (kBootArgsFlagInstallUI & bootargs->flags)
+       {
+           vc_progress_meter_start = (bootargs->bootProgressMeterStart * kProgressMeterMax) / 65535;
+           vc_progress_meter_end   = (bootargs->bootProgressMeterEnd   * kProgressMeterMax) / 65535;
+       }
+       else
+       {
+           vc_progress_meter_start = 0;
+           vc_progress_meter_end   = kProgressMeterMax;
+       }
        simple_lock_init(&vc_progress_lock, 0);
 
        if ( gc_graphics_boot == FALSE )
@@ -3190,3 +3222,10 @@ vc_set_progressmeter(int new_value)
 }
 
 
+void
+vc_set_options(int new_value)
+{
+     vc_user_options = new_value;
+}
+
+
index 5e38a23f076e922927ba86570e9e13625feaa86e..c4631540cabb6d702c6ea6208fa5f10c74728908 100644 (file)
@@ -112,6 +112,15 @@ extern int vc_progressmeter_value;
 extern void vc_progress_setdiskspeed(uint32_t speed);
 
 
+
+extern int vc_user_options;
+
+enum
+{
+    kVCDarkReboot = 0x00000001,
+};
+extern void vc_set_options(int new_value);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/osfmk/corecrypto/cc/src/cc_clear.c b/osfmk/corecrypto/cc/src/cc_clear.c
new file mode 100644 (file)
index 0000000..79f9d97
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ *  cc_clear.c
+ *  corecrypto
+ *
+ *  Created on 05/21/2014
+ *
+ *  Copyright (c) 2014,2015 Apple Inc. All rights reserved.
+ *
+ */
+
+#include <corecrypto/cc.h>
+
+void cc_clear(size_t len, void *dst)
+{
+#if ( CC_HAS_MEMSET_S == 1 ) && (defined( __STDC_WANT_LIB_EXT1__ ) && ( __STDC_WANT_LIB_EXT1__ == 1 ) )
+    memset_s(dst,len,0,len);
+#else
+    volatile size_t ctr=0;
+    volatile uint8_t *data=dst;
+    if (len) {
+        cc_zero(len,dst);
+        (void)data[ctr]; // Touch the buffer so that the compiler does not
+            // Optimize out the zeroing
+    }
+#endif
+}
+
index f797d03785e21f9cf824ad0bb36f4f4314db5993..e47a1d438846fc6d7f48941ff0b49b2d18bdece8 100644 (file)
@@ -2,34 +2,18 @@
  *  ccdrbg_nisthmac.c
  *  corecrypto
  *
- *  Created by John Hurley on 04/30/14.
- *  Copyright 2014 Apple, Inc. All rights reserved.
+ *  Created on 05/09/2014
+ *
+ *  Copyright (c) 2014,2015 Apple Inc. All rights reserved.
  *
  */
 
 #include <corecrypto/ccdrbg.h>
 #include <corecrypto/cchmac.h>
 #include <corecrypto/ccsha2.h>
-#if !CC_KERNEL
+#include <corecrypto/cc_priv.h>
 #include <corecrypto/cc_debug.h>
-#endif
-
-
-#if CC_KERNEL
-#include <pexpert/pexpert.h>
-static int hmac_dbrg_error(int val, __unused const char *msg) {
-       return val;
-}
-#else
-static int hmac_dbrg_error(int val, const char *msg) {
-    if (msg) {
-        char buffer[1024];
-        snprintf(buffer, sizeof(buffer)-1, "Error: %s", msg);
-        cc_print(buffer, 0, NULL);
-    }
-    return val;
-}
-#endif
+#include <corecrypto/cc_macros.h>
 
 // Test vectors at:
 //      http://csrc.nist.gov/groups/STM/cavp/#05
@@ -37,34 +21,29 @@ static int hmac_dbrg_error(int val, const char *msg) {
 //
 
 /*
-    This HMAC DBRG is described in:
-
-    SP 800-90 A Rev. 1 (2nd Draft)
-    DRAFT Recommendation for Random Number Generation Using Deterministic Random Bit Generators
-    April 2014
-
-    SP 800-90A (revision 1), Recommendation for Random Number Generation Using Deterministic Random Bit Generators
-    http://csrc.nist.gov/publications/drafts/800-90/sp800_90a_r1_draft.pdf
-
-    See in particular
-    - 10.1.2 HMAC_DRBG (p 45)
-    - B.2 HMAC_DRBGExample (p 83)
-
-    We only support one security strength, 256 bits
-    In addition, we limit the personalization string to 20 bytes
-    Note that the example in B.2 is very limited, refer to §10.1.2 for more
-*/
-
-
+ This HMAC DBRG is described in:
+ SP 800-90 A Rev. 1 (2nd Draft)
+ DRAFT Recommendation for Random Number Generation Using Deterministic Random Bit Generators
+ April 2014
+  
+ See in particular
+ - 10.1.2 HMAC_DRBG (p 45)
+ - B.2 HMAC_DRBGExample (p 83)
+ We support maximum security strength of 256 bits
+ Note that the example in B.2 is very limited, refer to §10.1.2 for more
+ */
 
 /*
- The Get_entropy_input function is specified in pseudocode in [SP 800-90C] for various RBG constructions; 
+ The Get_entropy_input function is specified in pseudocode in [SP 800-90C] for various RBG constructions;
  however, in general, the function has the following meaning:
  Get_entropy_input: A function that is used to obtain entropy input. The function call is:
  (status, entropy_input) = Get_entropy_input (min_entropy, min_ length, max_ length, prediction_resistance_request),
  which requests a string of bits (entropy_input) with at least min_entropy bits of entropy. The length for the string
- shall be equal to or greater than min_length bits, and less than or equal to max_length bits. The 
- prediction_resistance_request parameter indicates whether or not prediction resistance is to be provided during the request 
+ shall be equal to or greater than min_length bits, and less than or equal to max_length bits. The
+ prediction_resistance_request parameter indicates whether or not prediction resistance is to be provided during the request
  (i.e., whether fresh entropy is required). A status code is also returned from the function.
  */
 
@@ -98,20 +77,15 @@ static int hmac_dbrg_error(int val, const char *msg) {
 // Defines below based on 10.1, Table 2: Definitions for Hash-Based DRBG Mechanisms (p 39)
 //
 
-#define NH_MAX_SECURITY_STRENGTH    256                             // in bits
 #define NH_MAX_OUTPUT_BLOCK_SIZE    (CCSHA512_OUTPUT_SIZE)          // 512 bits, i.e. 64 bytes (CCSHA512_OUTPUT_SIZE)
 #define NH_MAX_KEY_SIZE             (CCSHA512_OUTPUT_SIZE)          // 512 bits, i.e. 64 bytes (CCSHA512_OUTPUT_SIZE)
-#define NH_REQUIRED_MIN_ENTROPY(s)  (3*(s)/2)
-#define NH_MAX_BYTES_PER_REQUEST    (0xffff)                        // in bytes, 2^^16
-#define NH_RESEED_INTERVAL          ((unsigned long)0xffffffffffff) // 2^^48 requests between reseeds
-#define NH_MAX_PERSONALIZE_LEN      (1024)                          // 1024 bytes
-#define NH_MIN_ENTROPY_LEN          (NH_MAX_SECURITY_STRENGTH/8)
-#define NH_MAX_ENTROPY_LEN          (0xffffffff)                    // in bytes, 2^^32
+
+#define MIN_REQ_ENTROPY(di)            ((di)->output_size/2)
 
 struct ccdrbg_nisthmac_state {
-    const struct ccdrbg_info *info;
-       size_t bytesLeft;
-    size_t reseed_counter;
+    const struct ccdrbg_nisthmac_custom *custom; //ccdrbg_nisthmac_state does not need to store ccdrbg_info. ccdrbg_nisthmac_custom is sufficient
+    size_t bytesLeft;
+    uint64_t reseed_counter; // the reseed counter should be able to hole 2^^48. size_t might be smaller than 48 bits
     size_t vsize;
     size_t keysize;
     uint8_t v[NH_MAX_OUTPUT_BLOCK_SIZE];
@@ -127,7 +101,7 @@ static void dumpState(const char *label, struct ccdrbg_nisthmac_state *state) {
 
 /*
  NIST SP 800-90A, Rev. 1 HMAC_DRBG April 2014, p 46
-
  HMAC_DRBG_Update (provided_data, K, V):
  1. provided_data: The data to be used.
  2. K: The current value of Key.
@@ -135,9 +109,9 @@ static void dumpState(const char *label, struct ccdrbg_nisthmac_state *state) {
  Output:
  1. K: The new value for Key.
  2. V: The new value for V.
-
  HMAC_DRBG Update Process:
-
  1. K = HMAC (K, V || 0x00 || provided_data).
  2. V=HMAC(K,V).
  3. If (provided_data = Null), then return K and V.
@@ -160,14 +134,14 @@ static int hmac_dbrg_update(struct ccdrbg_state *drbg,
                             )
 {
     struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg;
-    const struct ccdrbg_nisthmac_custom *custom = state->info->custom;
-    const struct ccdigest_info *di = custom->di;
-
+    const struct ccdigest_info *di = state->custom->di;
+    
     const unsigned char cZero = 0x00;
     const unsigned char cOne  = 0x01;
     cchmac_ctx_decl(di->state_size, di->block_size, ctx);
-
+    
     cchmac_init(di, ctx, state->keysize, state->key);
+    
     // 1. K = HMAC (K, V || 0x00 || provided_data).
     cchmac_update(di, ctx, state->vsize, state->v);
     cchmac_update(di, ctx, 1, &cZero);
@@ -175,15 +149,15 @@ static int hmac_dbrg_update(struct ccdrbg_state *drbg,
     if (db && dbLen) cchmac_update(di, ctx, dbLen, db);
     if (dc && dcLen) cchmac_update(di, ctx, dcLen, dc);
     cchmac_final(di, ctx, state->key);
-
+    
     //  2. V=HMAC(K,V).
     cchmac(di, state->keysize, state->key, state->vsize, state->v, state->v);
-
+    
     // 3. If (provided_data = Null), then return K and V.
     // One parameter must be non-empty, or return
     if (!((da && daLen) || (db && dbLen) || (dc && dcLen)))
-        return 0;
-
+        return CCDRBG_STATUS_OK;
+    
     // 4. K = HMAC (K, V || 0x01 || provided_data).
     cchmac_init(di, ctx, state->keysize, state->key);
     cchmac_update(di, ctx, state->vsize, state->v);
@@ -192,30 +166,62 @@ static int hmac_dbrg_update(struct ccdrbg_state *drbg,
     if (db && dbLen) cchmac_update(di, ctx, dbLen, db);
     if (dc && dcLen) cchmac_update(di, ctx, dcLen, dc);
     cchmac_final(di, ctx, state->key);
-
+    
     //  5. V=HMAC(K,V).
     cchmac(di, state->keysize, state->key, state->vsize, state->v, state->v);
+    
+    return CCDRBG_STATUS_OK;
+}
 
-    return 0;
+//make sure state is initialized, before calling this function
+static int validate_inputs(struct ccdrbg_nisthmac_state *state,
+                           unsigned long entropyLength,
+                           unsigned long additionalInputLength,
+                           unsigned long psLength)
+{
+    int rc;
+    const struct ccdrbg_nisthmac_custom *custom=state->custom;
+    const struct ccdigest_info *di  = custom->di;
+    
+    rc =CCDRBG_STATUS_ERROR;
+    //buffer size checks
+    cc_require (di->output_size<=sizeof(state->v), end); //digest size too long
+    cc_require (di->output_size<=sizeof(state->key), end); //digest size too long
+    
+    //NIST SP800 compliance checks
+    //the following maximum checks are redundant if long is 32 bits.
+    
+    rc=CCDRBG_STATUS_PARAM_ERROR;
+    cc_require (psLength <= CCDRBG_MAX_PSINPUT_SIZE, end); //personalization string too long
+    cc_require (entropyLength <= CCDRBG_MAX_ENTROPY_SIZE, end); //supplied too much entropy
+    cc_require (additionalInputLength <= CCDRBG_MAX_ADDITIONALINPUT_SIZE, end); //additional input too long
+    cc_require (entropyLength >=  MIN_REQ_ENTROPY(di), end); //supplied too litle entropy
+    
+    cc_require(di->output_size<=NH_MAX_OUTPUT_BLOCK_SIZE, end); //the requested security strength is not supported
+    
+    rc=CCDRBG_STATUS_OK;
+end:
+    return rc;
 }
 
 /*
   NIST SP 800-90A, Rev. 1 April 2014 B.2.2, p 84
-
   HMAC_DRBG_Instantiate_algorithm (...):
-    Input: bitstring (entropy_input, personalization_string). 
   Output: bitstring (V, Key), integer reseed_counter.
-
   Process:
   1. seed_material = entropy_input || personalization_string.
   2. Set Key to outlen bits of zeros.
   3. Set V to outlen/8 bytes of 0x01.
   4. (Key, V) = HMAC_DRBG_Update (seed_material, Key, V).
   5. reseed_counter = 1.
   6. Return (V, Key, reseed_counter).
-*/
+ NIST SP 800-90A, Rev. 1 April 2014 B.2.2, p 84
+ HMAC_DRBG_Instantiate_algorithm (...):
+ Input: bitstring (entropy_input, personalization_string).
+ Output: bitstring (V, Key), integer reseed_counter.
+ Process:
+ 1. seed_material = entropy_input || personalization_string.
+ 2. Set Key to outlen bits of zeros.
+ 3. Set V to outlen/8 bytes of 0x01.
+ 4. (Key, V) = HMAC_DRBG_Update (seed_material, Key, V).
+ 5. reseed_counter = 1.
+ 6. Return (V, Key, reseed_counter).
+ */
 
 // This version does not do memory allocation
+//SP800-90 A: Required minimum entropy for instantiate and reseed=security_strength
 
 static int hmac_dbrg_instantiate_algorithm(struct ccdrbg_state *drbg,
                                            unsigned long entropyLength, const void *entropy,
@@ -223,27 +229,31 @@ static int hmac_dbrg_instantiate_algorithm(struct ccdrbg_state *drbg,
                                            unsigned long psLength, const void *ps)
 {
     // TODO: The NIST code passes nonce (i.e. HMAC key) to generate, but cc interface isn't set up that way
-
-    struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg;
-
+    struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg;
+    
     // 1. seed_material = entropy_input || nonce || personalization_string.
-
+    
     // 2. Set Key to outlen bits of zeros.
     cc_zero(state->keysize, state->key);
-
+    
     // 3. Set V to outlen/8 bytes of 0x01.
     CC_MEMSET(state->v, 0x01, state->vsize);
-
+    
     // 4. (Key, V) = HMAC_DRBG_Update (seed_material, Key, V).
     hmac_dbrg_update(drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps);
-
+    
     // 5. reseed_counter = 1.
     state->reseed_counter = 1;
     
-    return 0;
+    return CCDRBG_STATUS_OK;
 }
 
 //  In NIST terminology, the nonce is the HMAC key and ps is the personalization string
+//  We assume that the caller has passed in
+//      min_entropy = NH_REQUIRED_MIN_ENTROPY(security_strength)
+//  bytes of entropy
+
+static void done(struct ccdrbg_state *drbg);
 
 static int init(const struct ccdrbg_info *info, struct ccdrbg_state *drbg,
                 unsigned long entropyLength, const void* entropy,
@@ -251,72 +261,68 @@ static int init(const struct ccdrbg_info *info, struct ccdrbg_state *drbg,
                 unsigned long psLength, const void* ps)
 {
     struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg;
-    const struct ccdrbg_nisthmac_custom *custom = NULL;
-    const struct ccdigest_info *di = NULL;
-    size_t security_strength;
-    size_t min_entropy;
-
     state->bytesLeft = 0;
-    state->info = info;
-    custom = state->info->custom;
-    di = custom->di;
-    state->vsize = di->output_size;    // TODO: state_size? or output_size
-    state->keysize = di->output_size; // TODO: state size?
-
-    security_strength = NH_MAX_SECURITY_STRENGTH;
-
-    if (psLength > NH_MAX_PERSONALIZE_LEN)  // "Personalization_string too long"
-        return hmac_dbrg_error(-1, "Personalization_string too long");
-
-    if (entropyLength > NH_MAX_ENTROPY_LEN) // Supplied too much entropy
-        return hmac_dbrg_error(-1, "Supplied too much entropy");
-
-    // 4. min_entropy = 1.5 × security_strength.
-    min_entropy = NH_REQUIRED_MIN_ENTROPY(security_strength);
+    state->custom = info->custom; //we only need to get the custom parameter from the info structure.
+    
+    int rc = validate_inputs(state , entropyLength, 0, psLength);
+    if(rc!=CCDRBG_STATUS_OK){
+        //clear everything if cannot initialize. The idea is that if the caller doesn't check the output of init() and init() fails,
+        //the system crashes by NULL dereferencing after a call to generate, rather than generating bad random numbers.
+        done(drbg);
+        return rc;
+    }
 
+    const struct ccdigest_info *di = state->custom->di;
+    state->vsize = di->output_size;
+    state->keysize = di->output_size;
+    
     // 7. (V, Key, reseed_counter) = HMAC_DRBG_Instantiate_algorithm (entropy_input, personalization_string).
-
     hmac_dbrg_instantiate_algorithm(drbg, entropyLength, entropy, nonceLength, nonce, psLength, ps);
     
 #ifdef DEBUGFOO
     dumpState("Init: ", state);
 #endif
-       return 0;
+    return CCDRBG_STATUS_OK;
+
 }
 
 /*
-    10.1.2.4 Reseeding an HMAC_DRBG Instantiation
-    Notes for the reseed function specified in Section 9.2:
-    The reseeding of an HMAC_DRBG instantiation requires a call to the Reseed_function specified in Section 9.2. 
-    Process step 6 of that function calls the reseed algorithm specified in this section. The values for min_length 
-    are provided in Table 2 of Section 10.1.
-
-    The reseed algorithm:
-    Let HMAC_DRBG_Update be the function specified in Section 10.1.2.2. The following process or its equivalent 
-    shall be used as the reseed algorithm for this DRBG mechanism (see step 6 of the reseed process in Section 9.2):
-
-    HMAC_DRBG_Reseed_algorithm (working_state, entropy_input, additional_input):
-    1.  working_state: The current values for V, Key and reseed_counter (see Section 10.1.2.1).
-    2.  entropy_input: The string of bits obtained from the source of entropy input.
-    3.  additional_input: The additional input string received from the consuming application. 
-        Note that the length of the additional_input string may be zero.
-
-    Output:
-    1.  new_working_state: The new values for V, Key and reseed_counter. HMAC_DRBG Reseed Process:
-    1.  seed_material = entropy_input || additional_input.
-    2.  (Key, V) = HMAC_DRBG_Update (seed_material, Key, V). 3. reseed_counter = 1.
-    4.  Return V, Key and reseed_counter as the new_working_state.
-*/
-
-static int reseed(struct ccdrbg_state *drbg,
-                  unsigned long entropyLength, const void *entropy,
-                  unsigned long inputlen, const void *input)
-{
-    struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg;
+ 10.1.2.4 Reseeding an HMAC_DRBG Instantiation
+ Notes for the reseed function specified in Section 9.2:
+ The reseeding of an HMAC_DRBG instantiation requires a call to the Reseed_function specified in Section 9.2.
+ Process step 6 of that function calls the reseed algorithm specified in this section. The values for min_length
+ are provided in Table 2 of Section 10.1.
+ The reseed algorithm:
+ Let HMAC_DRBG_Update be the function specified in Section 10.1.2.2. The following process or its equivalent
+ shall be used as the reseed algorithm for this DRBG mechanism (see step 6 of the reseed process in Section 9.2):
+ HMAC_DRBG_Reseed_algorithm (working_state, entropy_input, additional_input):
+ 1.  working_state: The current values for V, Key and reseed_counter (see Section 10.1.2.1).
+ 2.  entropy_input: The string of bits obtained from the source of entropy input.
+ 3.  additional_input: The additional input string received from the consuming application.
+ Note that the length of the additional_input string may be zero.
+ Output:
+ 1.  new_working_state: The new values for V, Key and reseed_counter. HMAC_DRBG Reseed Process:
+ 1.  seed_material = entropy_input || additional_input.
+ 2.  (Key, V) = HMAC_DRBG_Update (seed_material, Key, V). 3. reseed_counter = 1.
+ 4.  Return V, Key and reseed_counter as the new_working_state.
+ */
 
-    int rx = hmac_dbrg_update(drbg, entropyLength, entropy, inputlen, input, 0, NULL);
+static int
+reseed(struct ccdrbg_state *drbg,
+       unsigned long entropyLength, const void *entropy,
+       unsigned long additionalLength, const void *additional)
+{
+    
+    struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg;
+    int rc = validate_inputs(state, entropyLength, additionalLength, 0);
+    if(rc!=CCDRBG_STATUS_OK) return rc;
+    
+    int rx = hmac_dbrg_update(drbg, entropyLength, entropy, additionalLength, additional, 0, NULL);
     state->reseed_counter = 1;
-
+    
 #ifdef DEBUGFOO
     dumpState("Reseed: ", state);
 #endif
@@ -324,74 +330,87 @@ static int reseed(struct ccdrbg_state *drbg,
 }
 
 /*
-    HMAC_DRBG_Generate_algorithm:
-    Input: bitstring (V, Key), integer (reseed_counter, requested_number_of_bits). 
-    Output: string status, bitstring (pseudorandom_bits, V, Key), integer reseed_counter.
-
-    Process:
-    1.      If (reseed_counter ≥ 10,000), then Return (“Reseed required”, Null, V, Key, reseed_counter).
-    2.      temp = Null.
-    3.      While (len (temp) < requested_no_of_bits) do:
-    3.1         V = HMAC (Key, V).
-    3.2         temp = temp || V.
-    4.      pseudorandom_bits = Leftmost (requested_no_of_bits) of temp.
-    5.      (Key, V) = HMAC_DRBG_Update (Null, Key, V).
-    6.      reseed_counter = reseed_counter + 1.
-    7.      Return (“Success”, pseudorandom_bits, V, Key, reseed_counter).
-*/
-
-static int generate(struct ccdrbg_state *drbg, unsigned long numBytes, void *outBytes,
-                    unsigned long inputLen, const void *input)
-{
-    struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg;
-    const struct ccdrbg_nisthmac_custom *custom = state->info->custom;
-    const struct ccdigest_info *di = custom->di;
+ HMAC_DRBG_Generate_algorithm:
+ Input: bitstring (V, Key), integer (reseed_counter, requested_number_of_bits).
+ Output: string status, bitstring (pseudorandom_bits, V, Key), integer reseed_counter.
+ Process:
+ 1.      If (reseed_counter ≥ 10,000), then Return (“Reseed required”, Null, V, Key, reseed_counter).
+ 2.      temp = Null.
+ 3.      While (len (temp) < requested_no_of_bits) do:
+ 3.1         V = HMAC (Key, V).
+ 3.2         temp = temp || V.
+ 4.      pseudorandom_bits = Leftmost (requested_no_of_bits) of temp.
+ 5.      (Key, V) = HMAC_DRBG_Update (Null, Key, V).
+ 6.      reseed_counter = reseed_counter + 1.
+ 7.      Return (“Success”, pseudorandom_bits, V, Key, reseed_counter).
+ */
 
-    if (numBytes > NH_MAX_BYTES_PER_REQUEST)
-        return hmac_dbrg_error(CCDRBG_STATUS_PARAM_ERROR,
-                              "Requested too many bytes in one request");
+static int validate_gen_params(uint64_t reseed_counter,  unsigned long dataOutLength, unsigned long additionalLength)
 
+{
+    int rc=CCDRBG_STATUS_PARAM_ERROR;
+    
+    cc_require (dataOutLength >= 1, end); //Requested zero byte in one request
+    cc_require (dataOutLength <= CCDRBG_MAX_REQUEST_SIZE, end); //Requested too many bytes in one request
+    cc_require (additionalLength<=CCDRBG_MAX_ADDITIONALINPUT_SIZE, end); //Additional input too long
+    
     // 1. If (reseed_counter > 2^^48), then Return (“Reseed required”, Null, V, Key, reseed_counter).
-    if (state->reseed_counter > NH_RESEED_INTERVAL)
-        return hmac_dbrg_error(CCDRBG_STATUS_NEED_RESEED, "Reseed required");
+     rc = CCDRBG_STATUS_NEED_RESEED;
+     cc_require (reseed_counter <= CCDRBG_RESEED_INTERVAL, end); //Reseed required
+    
+    rc=CCDRBG_STATUS_OK;
+    
+end:
+    return rc;
+}
 
+static int generate(struct ccdrbg_state *drbg, unsigned long dataOutLength, void *dataOut,
+                    unsigned long additionalLength, const void *additional)
+{
+    struct ccdrbg_nisthmac_state *state = (struct ccdrbg_nisthmac_state *)drbg;
+    const struct ccdrbg_nisthmac_custom *custom = state->custom;
+    const struct ccdigest_info *di = custom->di;
+    
+    int rc = validate_gen_params(state->reseed_counter, dataOutLength, additional==NULL?0:additionalLength);
+    if(rc!=CCDRBG_STATUS_OK) return rc;
+    
     // 2. If additional_input ≠ Null, then (Key, V) = HMAC_DRBG_Update (additional_input, Key, V).
-    if (input && inputLen)
-        hmac_dbrg_update(drbg, inputLen, input, 0, NULL, 0, NULL);
-
+    if (additional && additionalLength)
+        hmac_dbrg_update(drbg, additionalLength, additional, 0, NULL, 0, NULL);
+    
     // hmac_dbrg_generate_algorithm
-    char *outPtr = (char *) outBytes;
-    while (numBytes > 0) {
+    char *outPtr = (char *) dataOut;
+    while (dataOutLength > 0) {
         if (!state->bytesLeft) {
             //  5. V=HMAC(K,V).
             cchmac(di, state->keysize, state->key, state->vsize, state->v, state->v);
             state->bytesLeft = di->output_size;//di->output_size;  state->vsize
         }
-        size_t outLength = numBytes > state->bytesLeft ? state->bytesLeft : numBytes;
-        memcpy(outPtr, state->v, outLength);
+        size_t outLength = dataOutLength > state->bytesLeft ? state->bytesLeft : dataOutLength;
+        CC_MEMCPY(outPtr, state->v, outLength);
         state->bytesLeft -= outLength;
         outPtr += outLength;
-        numBytes -= outLength;
+        dataOutLength -= outLength;
     }
-
+    
     // 6. (Key, V) = HMAC_DRBG_Update (additional_input, Key, V).
-    hmac_dbrg_update(drbg, inputLen, input, 0, NULL, 0, NULL);
-
+    hmac_dbrg_update(drbg, additionalLength, additional, 0, NULL, 0, NULL);
+    
     // 7. reseed_counter = reseed_counter + 1.
     state->reseed_counter++;
-
+    
 #ifdef DEBUGFOO
     dumpState("generate: ", state);
 #endif
     
-    return 0;
+    return CCDRBG_STATUS_OK;
 }
 
 static void done(struct ccdrbg_state *drbg)
 {
     struct ccdrbg_nisthmac_state *state=(struct ccdrbg_nisthmac_state *)drbg;
-    cc_zero(sizeof(state->v), state->v);
-    cc_zero(sizeof(state->key), state->key);
+    cc_clear(sizeof(struct ccdrbg_nisthmac_state), state); //clear v, key as well as internal variables
 }
 
 struct ccdrbg_info ccdrbg_nisthmac_info = {
index b9b3b8852a764588e431bcd9a246b7132b2d4ed5..f5ccb3d717403710195e2a14debb758e1da5e6e1 100644 (file)
@@ -2,13 +2,14 @@
  *  ccdigest_init.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 11/30/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 11/30/2010
+ *
+ *  Copyright (c) 2010,2011,2015 Apple Inc. All rights reserved.
  *
  */
 
 #include <corecrypto/ccdigest.h>
-#include <corecrypto/ccn.h>
+#include <corecrypto/cc_priv.h>
 
 void ccdigest_init(const struct ccdigest_info *di, ccdigest_ctx_t ctx) {
     ccdigest_copy_state(di, ccdigest_state_ccn(di, ctx), di->initial_state);
index 1f8b9e54ba8d22c8d05ce4cb75fc365a41dc05e2..ce652362a14fde81e06f7bb99efa4d79c37d6931 100644 (file)
@@ -2,8 +2,9 @@
  *  ccdigest_update.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 11/30/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 11/30/2010
+ *
+ *  Copyright (c) 2010,2011,2014,2015 Apple Inc. All rights reserved.
  *
  */
 
@@ -12,7 +13,7 @@
 
 void ccdigest_update(const struct ccdigest_info *di, ccdigest_ctx_t ctx,
                      unsigned long len, const void *data) {
-    char * data_ptr = (char *) data;
+    const char * data_ptr = data;
     while (len > 0) {
         if (ccdigest_num(di, ctx) == 0 && len > di->block_size) {
             unsigned long nblocks = len / di->block_size;
index 28a9a2fe4104cf769c2c9f0a932a106df601bef8..eb38024db3d8918fc213845365ab3a586994f886 100644 (file)
@@ -2,8 +2,9 @@
  *  cchmac.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/7/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 6ac62eedf9de3b520c56774a5a4876daea70ab05..3c189a3fa043f474b49c0ed27af753fd717f828d 100644 (file)
@@ -2,13 +2,15 @@
  *  cchmac_final.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/7/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2015 Apple Inc. All rights reserved.
  *
  */
 
 #include <corecrypto/cchmac.h>
 #include <corecrypto/ccn.h>
+#include <corecrypto/cc_priv.h>
 
 void cchmac_final(const struct ccdigest_info *di, cchmac_ctx_t hc,
                   unsigned char *mac) {
index 0e2db29e52b5f32140d471d063b77945dd303182..8d426e8c824959aee203117e8e1fddfbd0a26f4c 100644 (file)
@@ -2,14 +2,15 @@
  *  cchmac_init.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/7/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2015 Apple Inc. All rights reserved.
  *
  */
 
 #include <corecrypto/cchmac.h>
 #include <corecrypto/ccn.h>
-#include <pexpert/pexpert.h>
+#include <corecrypto/cc_priv.h>
 
 /* The HMAC_<DIG> transform looks like:
    <DIG> (K XOR opad || <DIG> (K XOR ipad || text))
index b6c5df262c9d21d387cdbf4864ecbd1a11e58df8..26abc62e0e92977f7eb8c710157510e19f54e51d 100644 (file)
@@ -2,8 +2,9 @@
  *  cchmac_update.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/7/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2011,2015 Apple Inc. All rights reserved.
  *
  */
 
index bd95a27a7e4786ed54f722a9287899ca93abe7bd..26cdab52a78aa24d85fabb718c2903086e67c6c2 100644 (file)
@@ -1,16 +1,19 @@
-//
-//  ccn_set.c
-//  corecrypto
-//
-//  Created by Fabrice Gautier on 2/17/12.
-//  Copyright (c) 2012 Apple, Inc. All rights reserved.
-//
+/*
+ *  ccn_set.c
+ *  corecrypto
+ *
+ *  Created on 02/17/2012
+ *
+ *  Copyright (c) 2012,2014,2015 Apple Inc. All rights reserved.
+ *
+ */
 
 #include <corecrypto/ccn.h>
+#include <corecrypto/cc_priv.h>
 
 #if !CCN_SET_ASM
 void ccn_set(cc_size n, cc_unit *r, const cc_unit *s)
 {
-    CC_MEMCPY(r, s, ccn_sizeof_n(n));
+    CC_MEMMOVE(r, s, ccn_sizeof_n(n));
 }
 #endif
index 68d2cd8aa5e188304a22a1b96ff6d6a63a1d6be6..2102709e944e505811d6722adfd11d73f24c935b 100644 (file)
@@ -2,8 +2,9 @@
  *  ccdigest_final_64be.c
  *  corecrypto
  *
- *  Created by Michael Brouwer on 12/1/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/06/2010
+ *
+ *  Copyright (c) 2010,2011,2015 Apple Inc. All rights reserved.
  *
  */
 
index 84a9887a035e60062676b510dca1de340fed6a7a..a709adcf69d27bce666aeb74cd61dcaf7b952fbc 100644 (file)
@@ -2,10 +2,9 @@
  *  ccsha1_eay.c
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/6/10.
- *  Copyright 2010,2011 Apple Inc. All rights reserved.
+ *  Created on 12/06/2010
  *
- *  Based on ssleay implementation.
+ *  Copyright (c) 2010,2011,2012,2015 Apple Inc. All rights reserved.
  *
  */
 
index 2a5bc5e2d4b2581ed99c65412fb5b8955ad8031e..fffabd452652073c2664235b65e1d41f6f56ec73 100644 (file)
@@ -2,8 +2,9 @@
  *  ccsha1_initial_state.c
  *  corecrypto
  *
- *  Created by Fabrice Gautier on 12/7/10.
- *  Copyright 2010 Apple, Inc. All rights reserved.
+ *  Created on 12/07/2010
+ *
+ *  Copyright (c) 2010,2015 Apple Inc. All rights reserved.
  *
  */
 
diff --git a/osfmk/corpses/Makefile b/osfmk/corpses/Makefile
new file mode 100644 (file)
index 0000000..ded3ef9
--- /dev/null
@@ -0,0 +1,21 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+DATAFILES = task_corpse.h
+
+INSTALL_MI_LIST        = ${DATAFILES}
+
+INSTALL_MI_DIR = corpses
+
+EXPORT_MI_LIST = ${DATAFILES}
+
+EXPORT_MI_DIR = corpses
+
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/osfmk/corpses/corpse.c b/osfmk/corpses/corpse.c
new file mode 100644 (file)
index 0000000..27a0c13
--- /dev/null
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2012-2013, 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+/*
+ * Corpses Overview
+ * ================
+ * 
+ * A corpse is a state of process that is past the point of its death. This means that process has
+ * completed all its termination operations like releasing file descriptors, mach ports, sockets and
+ * other constructs used to identify a process. For all the processes this mimics the behavior as if
+ * the process has died and no longer available by any means.
+ * 
+ * Why do we need Corpses?
+ * -----------------------
+ * For crash inspection we need to inspect the state and data that is associated with process so that
+ * crash reporting infrastructure can build backtraces, find leaks etc. For example a crash
+ * 
+ * Corpses functionality in kernel
+ * ===============================
+ * The corpse functionality is an extension of existing exception reporting mechanisms we have. The
+ * exception_triage calls will try to deliver the first round of exceptions allowing
+ * task/debugger/ReportCrash/launchd level exception handlers to  respond to exception. If even after
+ * notification the exception is not handled, then the process begins the death operations and during
+ * proc_prepareexit, we decide to create a corpse for inspection. Following is a sample run through
+ * of events and data shuffling that happens when corpses is enabled.
+ * 
+ *   * a process causes an exception during normal execution of threads.
+ *   * The exception generated by either mach(e.g GUARDED_MARCHPORT) or bsd(eg SIGABORT, GUARDED_FD
+ *     etc) side is passed through the exception_triage() function to follow the thread -> task -> host
+ *     level exception handling system. This set of steps are same as before and allow for existing
+ *     crash reporting systems (both internal and 3rd party) to catch and create reports as required.
+ *   * If above exception handling returns failed (when nobody handles the notification), then the
+ *     proc_prepareexit path has logic to decide to create corpse.
+ *   * The task_mark_corpse function allocates userspace vm memory and attaches the information
+ *     kcdata_descriptor_t to task->corpse_info field of task.
+ *     - All the task's threads are marked with the "inspection" flag which signals the termination
+ *       daemon to not reap them but hold until they are being inspected.
+ *     - task flags t_flags reflect the corpse bit and also a PENDING_CORPSE bit. PENDING_CORPSE
+ *       prevents task_terminate from stripping important data from task.
+ *     - It marks all the threads to terminate and return to AST for termination.
+ *     - The allocation logic takes into account the rate limiting policy of allowing only
+ *       TOTAL_CORPSES_ALLOWED in flight.
+ *   * The proc exit threads continues and collects required information in the allocated vm region.
+ *     Once complete it marks itself for termination.
+ *   * In the thread_terminate_self(), the last thread to enter will do a call to proc_exit().
+ *     Following this is a check to see if task is marked for corpse notification and will
+ *     invoke the the task_deliver_crash_notification().
+ *   * Once EXC_CORPSE_NOTIFY is delivered, it removes the PENDING_CORPSE flag from task (and
+ *     inspection flag from all its threads) and allows task_terminate to go ahead and continue
+ *     the mach task termination process.
+ *   * ASIDE: The rest of the threads that are reaching the thread_terminate_daemon() with the
+ *     inspection flag set are just bounced to another holding queue (crashed_threads_queue).
+ *     Only after the corpse notification these are pulled out from holding queue and enqueued
+ *     back to termination queue
+ * 
+ * 
+ * Corpse info format
+ * ==================
+ * The kernel (task_mark_corpse()) makes a vm allocation in the dead task's vm space (with tag
+ *     VM_MEMORY_CORPSEINFO (80)). Within this memory all corpse information is saved by various
+ *     subsystems like
+ *   * bsd proc exit path may write down pid, parent pid, number of file descriptors etc
+ *   * mach side may append data regarding ledger usage, memory stats etc
+ * See detailed info about the memory structure and format in kern_cdata.h documentation.
+ * 
+ * Configuring Corpses functionality
+ * =================================
+ *   boot-arg: -no_corpses disables the corpse generation. This can be added/removed without affecting
+ *     any other subsystem.
+ *   TOTAL_CORPSES_ALLOWED : (recompilation required) - Changing this number allows for controlling
+ *     the number of corpse instances to be held for inspection before allowing memory to be reclaimed
+ *     by system.
+ *   CORPSEINFO_ALLOCATION_SIZE: is the default size of vm allocation. If in future there is much more
+ *     data to be put in, then please re-tune this parameter.
+ * 
+ * Debugging/Visibility
+ * ====================
+ *   * lldbmacros for thread and task summary are updated to show "C" flag for corpse task/threads.
+ *   * there are macros to see list of threads in termination queue (dumpthread_terminate_queue)
+ *     and holding queue (dumpcrashed_thread_queue).
+ *   * In case of corpse creation is disabled of ignored then the system log is updated with
+ *     printf data with reason.
+ * 
+ * Limitations of Corpses
+ * ======================
+ *   With holding off memory for inspection, it creates vm pressure which might not be desirable
+ *   on low memory devices. There are limits to max corpses being inspected at a time which is
+ *   marked by TOTAL_CORPSES_ALLOWED.
+ * 
+ */
+
+
+#include <kern/assert.h>
+#include <mach/mach_types.h>
+#include <mach/boolean.h>
+#include <mach/vm_param.h>
+#include <kern/kern_types.h>
+#include <kern/mach_param.h>
+#include <kern/thread.h>
+#include <kern/task.h>
+#include <corpses/task_corpse.h>
+#include <kern/kalloc.h>
+#include <kern/kern_cdata.h>
+#include <mach/mach_vm.h>
+
+unsigned long  total_corpses_count = 0;
+unsigned long  total_corpses_created = 0;
+boolean_t corpse_enabled_config = TRUE;
+
+kcdata_descriptor_t task_get_corpseinfo(task_t task);
+kcdata_descriptor_t task_crashinfo_alloc_init(mach_vm_address_t crash_data_p, unsigned size);
+kern_return_t task_crashinfo_destroy(kcdata_descriptor_t data);
+static kern_return_t task_crashinfo_get_ref();
+static kern_return_t task_crashinfo_release_ref();
+
+
+
+void corpses_init(){
+       char temp_buf[20];
+       if (PE_parse_boot_argn("-no_corpses", temp_buf, sizeof(temp_buf))) {
+               corpse_enabled_config = FALSE;
+       }
+}
+
+/*
+ * Routine: corpses_enabled
+ * returns FALSE if not enabled
+ */
+boolean_t corpses_enabled()
+{
+       return corpse_enabled_config;
+}
+
+/*
+ * Routine: task_crashinfo_get_ref()
+ *          Grab a slot at creating a corpse.
+ * Returns: KERN_SUCCESS if the policy allows for creating a corpse.
+ */
+kern_return_t task_crashinfo_get_ref()
+{
+       unsigned long counter = total_corpses_count;
+       counter = OSIncrementAtomic((SInt32 *)&total_corpses_count);
+       if (counter >= TOTAL_CORPSES_ALLOWED) {
+               OSDecrementAtomic((SInt32 *)&total_corpses_count);
+               return KERN_RESOURCE_SHORTAGE;
+       }
+       OSIncrementAtomicLong((volatile long *)&total_corpses_created);
+       return KERN_SUCCESS;
+}
+
+/*
+ * Routine: task_crashinfo_release_ref
+ *          release the slot for corpse being used.
+ */
+kern_return_t task_crashinfo_release_ref()
+{
+       unsigned long __assert_only counter;
+       counter =       OSDecrementAtomic((SInt32 *)&total_corpses_count);
+       assert(counter > 0);
+       return KERN_SUCCESS;
+}
+
+
+kcdata_descriptor_t task_crashinfo_alloc_init(mach_vm_address_t crash_data_p, unsigned size)
+{
+       if(KERN_SUCCESS != task_crashinfo_get_ref()) {
+               return NULL;
+       }
+
+       return kcdata_memory_alloc_init(crash_data_p, TASK_CRASHINFO_BEGIN, size, KCFLAG_USE_COPYOUT);
+}
+
+
+/*
+ * Free up the memory associated with task_crashinfo_data
+ */
+kern_return_t task_crashinfo_destroy(kcdata_descriptor_t data)
+{
+       if (!data) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       task_crashinfo_release_ref();
+       return kcdata_memory_destroy(data);
+}
+
+/*
+ * Routine: task_get_corpseinfo
+ * params: task - task which has corpse info setup.
+ * returns: crash info data attached to task.
+ *          NULL if task is null or has no corpse info
+ */
+kcdata_descriptor_t task_get_corpseinfo(task_t task)
+{
+       kcdata_descriptor_t retval = NULL;
+       if (task != NULL){
+               retval = task->corpse_info;
+       }
+       return retval;
+}
+
+
diff --git a/osfmk/corpses/task_corpse.h b/osfmk/corpses/task_corpse.h
new file mode 100644 (file)
index 0000000..5a40081
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2012-2013, 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _TASK_CORPSE_H_
+#define _TASK_CORPSE_H_
+
+#include <stdint.h>
+#include <mach/mach_types.h>
+#include <kern/kern_cdata.h>
+
+typedef struct kcdata_item     *task_crashinfo_item_t;
+
+/*
+ * NOTE: Please update libkdd/kcdata/kcdtypes.c if you make any changes
+ * in TASK_CRASHINFO_* types.
+ */
+
+#define TASK_CRASHINFO_BEGIN                KCDATA_BUFFER_BEGIN_CRASHINFO
+#define TASK_CRASHINFO_STRING_DESC          KCDATA_TYPE_STRING_DESC
+#define TASK_CRASHINFO_UINT32_DESC          KCDATA_TYPE_UINT32_DESC
+#define TASK_CRASHINFO_UINT64_DESC          KCDATA_TYPE_UINT64_DESC
+
+#define TASK_CRASHINFO_EXTMODINFO           0x801
+#define TASK_CRASHINFO_BSDINFOWITHUNIQID    0x802 /* struct proc_uniqidentifierinfo */
+#define TASK_CRASHINFO_TASKDYLD_INFO        0x803
+#define TASK_CRASHINFO_UUID                 0x804
+#define TASK_CRASHINFO_PID                  0x805
+#define TASK_CRASHINFO_PPID                 0x806
+#define TASK_CRASHINFO_RUSAGE               0x807  /* struct rusage */
+#define TASK_CRASHINFO_RUSAGE_INFO          0x808  /* struct rusage_info_current */
+#define TASK_CRASHINFO_PROC_NAME            0x809  /* char * */
+#define TASK_CRASHINFO_PROC_STARTTIME       0x80B  /* struct timeval64 */
+#define TASK_CRASHINFO_USERSTACK            0x80C  /* uint64_t */
+#define TASK_CRASHINFO_ARGSLEN              0x80D
+#define TASK_CRASHINFO_EXCEPTION_CODES      0x80E  /* mach_exception_data_t */
+#define TASK_CRASHINFO_PROC_PATH            0x80F  /* string of len MAXPATHLEN */
+#define TASK_CRASHINFO_PROC_CSFLAGS         0x810  /* uint32_t */
+#define TASK_CRASHINFO_PROC_STATUS          0x811  /* char */
+#define TASK_CRASHINFO_UID                  0x812  /* uid_t */
+#define TASK_CRASHINFO_GID                  0x813  /* gid_t */
+#define TASK_CRASHINFO_PROC_ARGC            0x814  /* int */
+#define TASK_CRASHINFO_PROC_FLAGS           0x815  /* unsigned int */
+#define TASK_CRASHINFO_CPUTYPE              0x816  /* cpu_type_t */
+#define TASK_CRASHINFO_WORKQUEUEINFO        0x817  /* struct proc_workqueueinfo */
+#define TASK_CRASHINFO_RESPONSIBLE_PID      0x818  /* pid_t */
+#define TASK_CRASHINFO_DIRTY_FLAGS          0x819  /* int */
+#define TASK_CRASHINFO_CRASHED_THREADID     0x81A  /* uint64_t */
+
+#define TASK_CRASHINFO_END                  KCDATA_TYPE_BUFFER_END
+
+/* Deprecated: use the KCDATA_* macros for all future use */
+#define CRASHINFO_ITEM_TYPE(item)                KCDATA_ITEM_TYPE(item)
+#define CRASHINFO_ITEM_SIZE(item)                KCDATA_ITEM_SIZE(item)
+#define CRASHINFO_ITEM_DATA_PTR(item)    KCDATA_ITEM_DATA_PTR(item)
+
+#define CRASHINFO_ITEM_NEXT_HEADER(item)  KCDATA_ITEM_NEXT_HEADER(item)
+
+#define CRASHINFO_ITEM_FOREACH(head)     KCDATA_ITEM_FOREACH(head)
+
+
+#ifndef KERNEL
+#define task_crashinfo_get_data_with_desc kcdata_get_data_with_desc
+
+#endif /* KERNEL */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+#define CORPSEINFO_ALLOCATION_SIZE (1024 * 1024 * 2)
+#define TOTAL_CORPSES_ALLOWED 5
+
+
+
+extern kern_return_t task_mark_corpse(task_t task);
+
+extern kern_return_t task_deliver_crash_notification(task_t task);
+
+extern kcdata_descriptor_t task_get_corpseinfo(task_t task);
+
+extern kcdata_descriptor_t  task_crashinfo_alloc_init(
+                                       mach_vm_address_t crash_data_p,
+                                       unsigned size);
+extern kern_return_t  task_crashinfo_destroy(kcdata_descriptor_t data);
+
+extern void corpses_init(void);
+
+extern boolean_t corpses_enabled(void);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _TASK_CORPSE_H_ */
index 4b5062cc22ffdc0de4ee06f1b5e83d20fee8aee8..71e998fe95ee9da07daa8979ab53e11c12b61509 100644 (file)
@@ -173,13 +173,13 @@ default_pager(void)
                if (dpt_array[id] == NULL)
                        Panic("alloc pager thread");
                kr = vm_allocate(kernel_map, &((dpt_array[id])->dpt_buffer),
-                                vm_page_size << vstruct_def_clshift, VM_FLAGS_ANYWHERE);
+                                vm_page_size << vstruct_def_clshift, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK));
                if (kr != KERN_SUCCESS)
                        Panic("alloc thread buffer");
                kr = vm_map_wire(kernel_map, (dpt_array[id])->dpt_buffer, 
                        ((dpt_array[id])->dpt_buffer)
                                        +(vm_page_size << vstruct_def_clshift), 
-                       VM_PROT_DEFAULT,
+                       VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
                        FALSE);
                if (kr != KERN_SUCCESS)
                        Panic("wire thread buffer");
index 205e612a2dea6e6f92921eb5eb420b8c9648cef6..819af3cdc6980c93181e63a93bea62dca6b4904a 100644 (file)
@@ -757,10 +757,10 @@ ps_delete(
                if(dp_pages_free < cluster_transfer_minimum)
                        error = KERN_FAILURE;
                else {
-                       vm_object_t     transfer_object;
-                       unsigned int    count;
-                       upl_t           upl;
-                       int             upl_flags;
+                       vm_object_t         transfer_object;
+                       unsigned int        count;
+                       upl_t               upl;
+                       upl_control_flags_t upl_flags;
 
                        transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER);
                        count = 0;
@@ -1234,7 +1234,7 @@ vs_alloc_async(void)
                        alias_struct = (struct vstruct_alias *) 
                                kalloc(sizeof (struct vstruct_alias));
                        if(alias_struct != NULL) {
-                               alias_struct->vs = (struct vstruct *)vsa;
+                               __IGNORE_WCASTALIGN(alias_struct->vs = (struct vstruct *)vsa);
                                alias_struct->name = &default_pager_ops;
                                reply_port->ip_alias = (uintptr_t) alias_struct;
                                vsa->reply_port = reply_port;
@@ -2698,7 +2698,7 @@ ps_read_device(
        *residualp = size - total_read;
        if((dev_buffer != *bufferp) && (total_read != 0)) {
                vm_offset_t temp_buffer;
-               vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE);
+               vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK));
                memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
                if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read, 
                        VM_MAP_COPYIN_OPT_SRC_DESTROY | 
index 3ffa10e447a17a31aa5a3fb06cc7ddaa09136469..21fe6fd7bbb3cee447210169d31fa9b0ed62de31 100644 (file)
@@ -988,7 +988,7 @@ default_pager_objects(
        osize = vm_map_round_page(actual * sizeof (*objects),
                                  vm_map_page_mask(ipc_kernel_map));
        opotential = (unsigned int) (osize / sizeof (*objects));
-       kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
+       kr = kmem_alloc(ipc_kernel_map, &oaddr, osize, VM_KERN_MEMORY_IPC);
        if (KERN_SUCCESS != kr) {
                kfree(pagers, psize);
                return KERN_RESOURCE_SHORTAGE;
@@ -1161,7 +1161,7 @@ default_pager_object_pages(
 
                size = vm_map_round_page(actual * sizeof (*pages),
                                         vm_map_page_mask(ipc_kernel_map));
-               kr = kmem_alloc(ipc_kernel_map, &addr, size);
+               kr = kmem_alloc(ipc_kernel_map, &addr, size, VM_KERN_MEMORY_IPC);
                if (KERN_SUCCESS != kr)
                        return KERN_RESOURCE_SHORTAGE;
 
index f230f04d562531204eb17d81f0567ba1a75d2985..a788cd1c3df32ef3a50b0d79daa2611700da7632 100644 (file)
@@ -15,10 +15,15 @@ DATAFILES = \
        device_types.h device_port.h device_types.defs \
        ${MIG_DEFS}
 
+PRIVATE_DATAFILES = \
+       device_types.h
+
 MIGINCLUDES = \
 
 INSTALL_MI_LIST        = ${DATAFILES} ${_MIG_HDRS_} ${MIGINCLUDES}
 
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+
 INSTALL_MI_DIR = device
 
 EXPORT_MI_LIST = ${DATAFILES} ${_MIG_HDRS_} ${MIGINCLUDES}
index 133b63cd59a90503200907ed8f2971833f51c78d..94c706eff7fa391e2688b1328c506afa7e894a56 100644 (file)
@@ -92,6 +92,7 @@ type reply_port_t = MACH_MSG_TYPE_MAKE_SEND_ONCE | polymorphic
 
 type io_name_t            = c_string[*:128];
 type io_string_t          = c_string[*:512];
+type io_string_inband_t   = c_string[*:4096];
 type io_struct_inband_t   = array[*:4096] of char;
 type io_buf_ptr_t        = ^array[] of MACH_MSG_TYPE_INTEGER_8;
 type NDR_record_t         = struct[8] of char;
@@ -788,6 +789,25 @@ routine FUNC_NAME(io_service_add_notification_bin)(
 skip;
 #endif
 
+#if !IOKITSIMD
+
+routine io_registry_entry_get_path_ool(
+           registry_entry      : io_object_t;
+       in  plane               : io_name_t;
+       out path                : io_string_inband_t;
+       out path_ool            : io_buf_ptr_t, physicalcopy
+       );
+
+routine io_registry_entry_from_path_ool(
+           master_port         : mach_port_t;
+       in  path                : io_string_inband_t;
+       in  path_ool            : io_buf_ptr_t, physicalcopy;
+        out result             : kern_return_t;
+       out registry_entry      : io_object_t
+       );
+
+#endif
+
 #endif /* IOKIT */
 
 /* vim: set ft=c : */
index 8b00349ebde1cc4fd851a041e755b8c661209ade..8bf42dbfaff5a39d7f2981bea492a2534be13b33 100644 (file)
@@ -80,7 +80,7 @@
 #include <device/device_port.h>
 
 ipc_port_t     master_device_port;
-void        *master_device_kobject;
+void           *master_device_kobject;
 
 lck_grp_attr_t * dev_lck_grp_attr;
 lck_grp_t * dev_lck_grp;
@@ -94,8 +94,8 @@ device_service_create(void)
        if (master_device_port == IP_NULL)
            panic("can't allocate master device port");
 
-    ipc_kobject_set(master_device_port, (ipc_kobject_t)&master_device_kobject, IKOT_MASTER_DEVICE);
-    kernel_set_special_port(host_priv_self(), HOST_IO_MASTER_PORT,
+       ipc_kobject_set(master_device_port, (ipc_kobject_t)&master_device_kobject, IKOT_MASTER_DEVICE);
+       kernel_set_special_port(host_priv_self(), HOST_IO_MASTER_PORT,
                                ipc_port_make_send(master_device_port));
 
        /* allocate device lock group attribute and group */
index e7466c05e71598451a1e909c4716999fd8244326..af4991c497505b9690fe6a616319395484d473a8 100644 (file)
@@ -72,7 +72,7 @@
 #include <mach/port.h>
 
 #if PRIVATE
-#define IOKIT_SERVER_VERSION   20140421
+#define IOKIT_SERVER_VERSION   20150715
 #endif
 
 
@@ -90,6 +90,7 @@ typedef char *        io_buf_ptr_t;
 /* must match device_types.defs */
 typedef        char                    io_name_t[128];
 typedef        char                    io_string_t[512];       
+typedef        char                    io_string_inband_t[4096];
 typedef char                   io_struct_inband_t[4096];
 
 #if KERNEL
index 4134279df5ede6bc06c6e024a8dc32ea713aa97e..89d2c87db691a08f87dbf6acd29a856465acff1c 100644 (file)
@@ -8,18 +8,18 @@ include $(MakeInc_def)
 
 MIG_DEFS = gssd_mach.defs
 
-DATAFILES = gssd_mach_types.h ${MIG_DEFS}
+PRIVATE_DATAFILES = gssd_mach_types.h ${MIG_DEFS}
 
 INSTALL_MI_LIST =
 
-INSTALL_MI_LCL_LIST    = ${DATAFILES} 
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} 
 
 INSTALL_MI_GEN_LIST = 
 
 INSTALL_MI_DIR = gssd
 
 EXPORT_MI_LIST = \
-       ${DATAFILES}
+       ${PRIVATE_DATAFILES}
 
 EXPORT_MI_GEN_LIST = gssd_mach.h
 
index 35d4bf27973e946dd4ab5f2eb00ef4433383dfa0..a697ebe0f0818f7ff5fecf287cd79640331b0a2b 100644 (file)
 #include <libkern/kernel_mach_header.h>
 #include <libkern/OSKextLibPrivate.h>
 
+#include <mach/branch_predicates.h>
+
 #if    DEBUG
 #define DPRINTF(x...)  kprintf(x)
 #else
@@ -892,7 +894,7 @@ Debugger(
        int cn = cpu_number();
        task_t task = current_task();
        int     task_pid = pid_from_task(task);
-
+       boolean_t old_doprnt_hide_pointers = doprnt_hide_pointers;
 
        hw_atomic_add(&debug_mode, 1);   
        if (!panic_is_inited) {
@@ -900,6 +902,8 @@ Debugger(
                asm("hlt");
        }
 
+       doprnt_hide_pointers = FALSE;
+
        printf("Debugger called: <%s>\n", message);
        kprintf("Debugger called: <%s>\n", message);
 
@@ -922,7 +926,7 @@ Debugger(
                __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
 
                /* Print backtrace - callee is internally synchronized */
-               if ((task_pid == 1) && (init_task_died)) {
+               if (task_pid == 1 && (init_task_died)) {
                        /* Special handling of launchd died panics */
                        print_launchd_info();
                } else {
@@ -1009,6 +1013,7 @@ Debugger(
                }
         }
 
+       doprnt_hide_pointers = old_doprnt_hide_pointers;
        __asm__("int3");
        hw_atomic_sub(&debug_mode, 1);   
 }
@@ -1148,6 +1153,7 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
        uint64_t bt_tsc_timeout;
        boolean_t keepsyms = FALSE;
        int cn = cpu_number();
+       boolean_t old_doprnt_hide_pointers = doprnt_hide_pointers;
 
        if(pbtcpu != cn) {
                hw_atomic_add(&pbtcnt, 1);
@@ -1158,6 +1164,12 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
                pbtcpu = cn;
        }
 
+       if (__improbable(doprnt_hide_pointers == TRUE)) {
+               /* If we're called directly, the Debugger() function will not be called,
+                * so we need to reset the value in here. */
+               doprnt_hide_pointers = FALSE;
+       }
+
        panic_check_hook();
 
        PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
@@ -1252,6 +1264,8 @@ out:
 
        panic_display_system_configuration();
 
+       doprnt_hide_pointers = old_doprnt_hide_pointers;
+
        /* Release print backtrace lock, to permit other callers in the
         * event of panics on multiple processors.
         */
@@ -1324,7 +1338,7 @@ print_tasks_user_threads(task_t task)
        for (j = 0, thread = (thread_t) queue_first(&task->threads); j < task->thread_count;
                        ++j, thread = (thread_t) queue_next(&thread->task_threads)) {
 
-               kdb_printf("Thread  %p\n", thread);
+               kdb_printf("Thread %d: %p\n", j, thread);
                pmap = get_task_pmap(task);
                savestate = get_user_regs(thread);
                rbp = savestate->ss_64.rbp;
@@ -1333,6 +1347,23 @@ print_tasks_user_threads(task_t task)
                }
 }
 
+void
+print_thread_num_that_crashed(task_t task)
+{
+       thread_t                c_thread = current_thread();
+       thread_t                thread;
+       int             j;
+       
+       for (j = 0, thread = (thread_t) queue_first(&task->threads); j < task->thread_count;
+                       ++j, thread = (thread_t) queue_next(&thread->task_threads)) {
+
+               if (c_thread == thread) {
+                       kdb_printf("\nThread %d crashed\n", j);
+                       break;
+               }
+       }
+}
+
 #define PANICLOG_UUID_BUF_SIZE 256
 
 void print_uuid_info(task_t task)
@@ -1425,6 +1456,7 @@ void print_launchd_info(void)
        }
        
        print_uuid_info(task);
+       print_thread_num_that_crashed(task);
        print_threads_registers(thread);
        print_tasks_user_threads(task);
        kdb_printf("Mac OS version: %s\n", (osversion[0] != 0) ? osversion : "Not yet set");
index 067018965af6cb9737638bb222dadb3c45c35a96..3a513ab721abbd4a4365d4fd1c77f8da9958c132 100644 (file)
@@ -153,6 +153,7 @@ diagCall64(x86_saved_state_t * state)
 
                        lastRuptClear = mach_absolute_time();   /* Get the time of clear */
                        rval = 1;       /* Normal return */
+                       (void) ml_set_interrupts_enabled(FALSE);
                        break;
                }
 
@@ -178,6 +179,7 @@ diagCall64(x86_saved_state_t * state)
                                                                         * slot */
                }
                rval = 1;
+               (void) ml_set_interrupts_enabled(FALSE);
                break;
 
        case dgPowerStat:
@@ -221,7 +223,10 @@ diagCall64(x86_saved_state_t * state)
                rdmsr64_carefully(MSR_IA32_RING_PERF_STATUS, &pkes.ring_ratio_instantaneous);
 
                pkes.IA_frequency_clipping_cause = ~0ULL;
-               rdmsr64_carefully(MSR_IA32_IA_PERF_LIMIT_REASONS, &pkes.IA_frequency_clipping_cause);
+
+               uint32_t ia_perf_limits = MSR_IA32_IA_PERF_LIMIT_REASONS;
+
+               rdmsr64_carefully(ia_perf_limits, &pkes.IA_frequency_clipping_cause);
 
                pkes.GT_frequency_clipping_cause = ~0ULL;
                rdmsr64_carefully(MSR_IA32_GT_PERF_LIMIT_REASONS, &pkes.GT_frequency_clipping_cause);
@@ -267,13 +272,14 @@ diagCall64(x86_saved_state_t * state)
                        cest.cpu_urc = cpu_data_ptr[i]->cpu_cur_urc;
 #if DIAG_ALL_PMCS
                        bcopy(&cpu_data_ptr[i]->cpu_gpmcs[0], &cest.gpmcs[0], sizeof(cest.gpmcs));
-#endif /* DIAG_ALL_PMCS */                     
+#endif /* DIAG_ALL_PMCS */
                        (void) ml_set_interrupts_enabled(TRUE);
 
                        copyout(&cest, curpos, sizeof(cest));
                        curpos += sizeof(cest);
                }
                rval = 1;
+               (void) ml_set_interrupts_enabled(FALSE);
        }
                break;
        case dgEnaPMC:
@@ -300,6 +306,7 @@ diagCall64(x86_saved_state_t * state)
                        kfree(ptr, 1024);
                        *ptr = 0x42;
                }
+               (void) ml_set_interrupts_enabled(FALSE);
        }
        break;
 #endif
@@ -310,6 +317,7 @@ diagCall64(x86_saved_state_t * state)
                (void) ml_set_interrupts_enabled(TRUE);
                if (diagflag)
                        rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL);
+               (void) ml_set_interrupts_enabled(FALSE);
        }
                break;
 #endif /* PERMIT_PERMCHECK */
@@ -319,6 +327,7 @@ diagCall64(x86_saved_state_t * state)
 
        regs->rax = rval;
 
+       assert(ml_get_interrupts_enabled() == FALSE);
        return rval;
 }
 
index a7cbd51fc60ab1b3add16e4f5eba998fc358db0a..f1873bb62838b1db925697d1c5e9c6a02e3df669 100644 (file)
@@ -9,6 +9,7 @@ include $(MakeInc_def)
 EXPORT_ONLY_FILES =    \
                    apic.h \
                    asm.h \
+                   atomic.h \
                    bit_routines.h \
                    cpu_number.h \
                    cpu_capabilities.h  \
@@ -36,6 +37,7 @@ EXPORT_ONLY_FILES =   \
                    rtclock_protos.h \
                    seg.h \
                    simple_lock.h \
+                   smp.h \
                    tsc.h \
                    tss.h \
                    ucode.h \
index 76617a0da90dd32591e606b7b4a20337cd6f8fb8..9bd836e2c29fb5587789a3b039f064ec3b772b53 100644 (file)
@@ -89,12 +89,7 @@ acpi_install_wake_handler(void)
 #endif
 }
 
-#if HIBERNATION
-struct acpi_hibernate_callback_data {
-       acpi_sleep_callback func;
-       void *refcon;
-};
-typedef struct acpi_hibernate_callback_data acpi_hibernate_callback_data_t;
+#if CONFIG_SLEEP
 
 unsigned int           save_kdebug_enable = 0;
 static uint64_t                acpi_sleep_abstime;
@@ -102,7 +97,13 @@ static uint64_t             acpi_idle_abstime;
 static uint64_t                acpi_wake_abstime, acpi_wake_postrebase_abstime;
 boolean_t              deep_idle_rebase = TRUE;
 
-#if CONFIG_SLEEP
+#if HIBERNATION
+struct acpi_hibernate_callback_data {
+       acpi_sleep_callback func;
+       void *refcon;
+};
+typedef struct acpi_hibernate_callback_data acpi_hibernate_callback_data_t;
+
 static void
 acpi_hibernate(void *refcon)
 {
@@ -149,8 +150,8 @@ acpi_hibernate(void *refcon)
 
        /* should never get here! */
 }
-#endif /* CONFIG_SLEEP */
 #endif /* HIBERNATION */
+#endif /* CONFIG_SLEEP */
 
 extern void                    slave_pstart(void);
 extern void                    hibernate_rebuild_vm_structs(void);
@@ -296,7 +297,9 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        if (lapic_probe())
                lapic_configure();
 
+#if HIBERNATION
        hibernate_rebuild_vm_structs();
+#endif
 
        elapsed += mach_absolute_time() - start;
        acpi_wake_abstime = mach_absolute_time();
@@ -323,6 +326,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 
        IOCPURunPlatformActiveActions();
 
+#if HIBERNATION
        if (did_hibernate) {
                elapsed += mach_absolute_time() - start;
                
@@ -334,6 +338,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 
                KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0);
        } else
+#endif /* HIBERNATION */
                KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 
        /* Restore power management register state */
diff --git a/osfmk/i386/ast.h b/osfmk/i386/ast.h
deleted file mode 100644 (file)
index 727695a..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#ifndef        _I386_AST_H_
-#define        _I386_AST_H_
-
-/*
- * Machine-dependent AST file for machines with no hardware AST support.
- *
- */
-
-#endif /* _I386_AST_H_ */
diff --git a/osfmk/i386/ast_types.h b/osfmk/i386/ast_types.h
deleted file mode 100644 (file)
index 68dad8d..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-/*
- */
-
-#ifndef        _I386_AST_TYPES_H_
-#define        _I386_AST_TYPES_H_
-
-/*
- *     Data type for remote ast_check() invocation support.  Currently
- *     not implemented.  Do this first to avoid include problems.
- */
-typedef        int     ast_check_t;
-
-#endif /* _I386_AST_TYPES_H_ */
diff --git a/osfmk/i386/atomic.h b/osfmk/i386/atomic.h
new file mode 100644 (file)
index 0000000..2cbeae6
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _I386_ATOMIC_H_
+#define _I386_ATOMIC_H_
+
+#include <i386/smp.h>
+
+#if    __SMP__
+
+#define memory_order_consume_smp memory_order_consume
+#define memory_order_acquire_smp memory_order_acquire
+#define memory_order_release_smp memory_order_release
+#define memory_order_acq_rel_smp memory_order_acq_rel
+#define memory_order_seq_cst_smp memory_order_seq_cst
+
+#else
+
+#define memory_order_consume_smp memory_order_relaxed
+#define memory_order_acquire_smp memory_order_relaxed
+#define memory_order_release_smp memory_order_relaxed
+#define memory_order_acq_rel_smp memory_order_relaxed
+#define memory_order_seq_cst_smp memory_order_relaxed
+
+#endif
+
+#endif // _I386_ATOMIC_H_
+
index bc510068bfffe182168ed9762e44c61a8b6eefa8..a70d68ae3668ee7af40b9085d36e81044f049689 100644 (file)
@@ -416,6 +416,12 @@ mach_call_munger(x86_saved_state_t *state)
        struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
        x86_saved_state32_t     *regs;
 
+#if PROC_REF_DEBUG
+       struct uthread *ut = get_bsdthread_info(current_thread());
+
+       uthread_reset_proc_refcount(ut);
+#endif
+
        assert(is_saved_state32(state));
        regs = saved_state32(state);
 
@@ -475,6 +481,12 @@ mach_call_munger(x86_saved_state_t *state)
 
        throttle_lowpri_io(1);
 
+#if PROC_REF_DEBUG
+       if (__improbable(uthread_get_proc_refcount(ut) != 0)) {
+               panic("system call returned with uu_proc_refcount != 0");
+       }
+#endif
+
        thread_exception_return();
        /* NOTREACHED */
 }
@@ -491,6 +503,12 @@ mach_call_munger64(x86_saved_state_t *state)
        struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
        x86_saved_state64_t     *regs;
 
+#if PROC_REF_DEBUG
+       struct uthread *ut = get_bsdthread_info(current_thread());
+
+       uthread_reset_proc_refcount(ut);
+#endif
+
        assert(is_saved_state64(state));
        regs = saved_state64(state);
 
@@ -549,6 +567,12 @@ mach_call_munger64(x86_saved_state_t *state)
 
        throttle_lowpri_io(1);
 
+#if PROC_REF_DEBUG
+       if (__improbable(uthread_get_proc_refcount(ut) != 0)) {
+               panic("system call returned with uu_proc_refcount != 0");
+       }
+#endif
+
        thread_exception_return();
        /* NOTREACHED */
 }
index eea96ab64532140928cde2de32f09d0d8c0478ad..f11de05351d709c7f8fdd35891731f05a3331670 100644 (file)
 
 #include <sys/kdebug.h>
 
+#if CONFIG_ATM
+#include <atm/atm_internal.h>
+#endif
+
 /* the lists of commpage routines are in commpage_asm.s  */
 extern commpage_descriptor*    commpage_32_routines[];
 extern commpage_descriptor*    commpage_64_routines[];
@@ -122,10 +126,24 @@ commpage_allocate(
        if (submap == NULL)
                panic("commpage submap is null");
 
-       if ((kr = vm_map(kernel_map,&kernel_addr,area_used,0,VM_FLAGS_ANYWHERE,NULL,0,FALSE,VM_PROT_ALL,VM_PROT_ALL,VM_INHERIT_NONE)))
+       if ((kr = vm_map(kernel_map,
+                        &kernel_addr,
+                        area_used,
+                        0,
+                        VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
+                        NULL,
+                        0,
+                        FALSE,
+                        VM_PROT_ALL,
+                        VM_PROT_ALL,
+                        VM_INHERIT_NONE)))
                panic("cannot allocate commpage %d", kr);
 
-       if ((kr = vm_map_wire(kernel_map,kernel_addr,kernel_addr+area_used,VM_PROT_DEFAULT,FALSE)))
+       if ((kr = vm_map_wire(kernel_map,
+                             kernel_addr,
+                             kernel_addr+area_used,
+                             VM_PROT_DEFAULT|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
+                             FALSE)))
                panic("cannot wire commpage: %d", kr);
 
        /* 
@@ -138,7 +156,7 @@ commpage_allocate(
         */
        if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr, VM_MAP_PAGE_MASK(kernel_map)), &entry) || entry->is_sub_map))
                panic("cannot find commpage entry %d", kr);
-       entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
+       VME_OBJECT(entry)->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 
        if ((kr = mach_make_memory_entry( kernel_map,           // target map
                                    &size,              // size 
@@ -466,6 +484,9 @@ commpage_populate( void )
        commpage_mach_approximate_time_init();
        rtc_nanotime_init_commpage();
        commpage_update_kdebug_enable();
+#if CONFIG_ATM
+       commpage_update_atm_diagnostic_config(atm_get_diagnostic_config());
+#endif
 }
 
 /* Fill in the common routines during kernel initialization. 
@@ -724,6 +745,27 @@ commpage_update_kdebug_enable(void)
        }
 }
 
+/* Ditto for atm_diagnostic_config */
+void
+commpage_update_atm_diagnostic_config(uint32_t diagnostic_config)
+{
+       volatile uint32_t *saved_data_ptr;
+       char *cp;
+
+       cp = commPagePtr32;
+       if (cp) {
+               cp += (_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG - _COMM_PAGE32_BASE_ADDRESS);
+               saved_data_ptr = (volatile uint32_t *)cp;
+               *saved_data_ptr = diagnostic_config;
+       }
+
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG - _COMM_PAGE32_START_ADDRESS);
+               saved_data_ptr = (volatile uint32_t *)cp;
+               *saved_data_ptr = diagnostic_config;
+       }
+}
 
 /*
  * update the commpage data for last known value of mach_absolute_time()
index 1abe641132bf907cfb6739a55e8fcb0740a6d7cf..6f2a3418d6860577dbc73274d5f179cade5d5e44 100644 (file)
@@ -147,6 +147,7 @@ extern      void    commpage_sched_gen_inc(void);
 extern void    commpage_update_active_cpus(void);
 extern void    commpage_update_mach_approximate_time(uint64_t abstime);
 extern void    commpage_update_kdebug_enable(void);
+extern void    commpage_update_atm_diagnostic_config(uint32_t);
 
 extern uint32_t        commpage_is_in_pfz32(uint32_t);
 extern uint32_t        commpage_is_in_pfz64(addr64_t);
index 2b6b6864b1dc54129337f8bfa58a3d00d2699788..7de91627ab7d41af5655a44ab93bd1a119b009b1 100644 (file)
@@ -199,7 +199,7 @@ cpu_processor_alloc(boolean_t is_boot_cpu)
        if (is_boot_cpu)
                return &processor_master;
 
-       ret = kmem_alloc(kernel_map, (vm_offset_t *) &proc, sizeof(*proc));
+       ret = kmem_alloc(kernel_map, (vm_offset_t *) &proc, sizeof(*proc), VM_KERN_MEMORY_OSFMK);
        if (ret != KERN_SUCCESS)
                return NULL;
 
index 46f08196ecb4bd0222c4d51ddec75502590724c3..1c25db2940c380296e6aff985b8e32317b906a24 100644 (file)
@@ -185,7 +185,9 @@ int _NumCPUs( void )
 
 #define _COMM_PAGE_CPUFAMILY           (_COMM_PAGE_START_ADDRESS+0x040)        /* uint32_t hw.cpufamily, x86*/
 #define _COMM_PAGE_KDEBUG_ENABLE       (_COMM_PAGE_START_ADDRESS+0x044)        /* uint32_t export "kdebug_enable" to userspace */
-#define _COMM_PAGE_UNUSED2             (_COMM_PAGE_START_ADDRESS+0x048)        /* [0x48,0x50) unused */
+#define        _COMM_PAGE_ATM_DIAGNOSTIC_CONFIG        (_COMM_PAGE_START_ADDRESS+0x48) /* uint32_t export "atm_diagnostic_config" to userspace */
+
+#define _COMM_PAGE_UNUSED2             (_COMM_PAGE_START_ADDRESS+0x04C)        /* [0x4C,0x50) unused */
 
 #define        _COMM_PAGE_TIME_DATA_START      (_COMM_PAGE_START_ADDRESS+0x050)        /* base of offsets below (_NT_SCALE etc) */
 #define _COMM_PAGE_NT_TSC_BASE         (_COMM_PAGE_START_ADDRESS+0x050)        /* used by nanotime() */
index b4b4bd4b90cab7965a0c738aeed2e235c05dae0b..466d62f23021814671e1384c88ef3366fb28f403 100644 (file)
@@ -182,10 +182,6 @@ typedef struct cpu_data
        uint64_t                cpu_dr7; /* debug control register */
        uint64_t                cpu_int_event_time;     /* intr entry/exit time */
        pal_rtc_nanotime_t      *cpu_nanotime;          /* Nanotime info */
-#if    CONFIG_COUNTERS
-       thread_t                csw_old_thread;
-       thread_t                csw_new_thread;
-#endif /* CONFIG COUNTERS */   
 #if KPC
        /* double-buffered performance counter data */
        uint64_t                *cpu_kpc_buf[2];
index 9cfd5892b4c6322e5033f39314c7b2eae16b5447..7709295b164ea8ebcdd01841140ae7537fea5537 100644 (file)
@@ -694,18 +694,31 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
        }
 
        if (info_p->cpuid_max_basic >= 0xd) {
-               cpuid_xsave_leaf_t      *xsp = &info_p->cpuid_xsave_leaf;
+               cpuid_xsave_leaf_t      *xsp;
                /*
                 * XSAVE Features:
                 */
-               cpuid_fn(0xd, info_p->cpuid_xsave_leaf.extended_state);
+               xsp = &info_p->cpuid_xsave_leaf[0];
                info_p->cpuid_xsave_leafp = xsp;
+               xsp->extended_state[eax] = 0xd;
+               xsp->extended_state[ecx] = 0;
+               cpuid(xsp->extended_state);
+               DBG(" XSAVE Main leaf:\n");
+               DBG("  EAX           : 0x%x\n", xsp->extended_state[eax]);
+               DBG("  EBX           : 0x%x\n", xsp->extended_state[ebx]);
+               DBG("  ECX           : 0x%x\n", xsp->extended_state[ecx]);
+               DBG("  EDX           : 0x%x\n", xsp->extended_state[edx]);
 
-               DBG(" XSAVE Leaf:\n");
+               xsp = &info_p->cpuid_xsave_leaf[1];
+               xsp->extended_state[eax] = 0xd;
+               xsp->extended_state[ecx] = 1;
+               cpuid(xsp->extended_state);
+               DBG(" XSAVE Sub-leaf1:\n");
                DBG("  EAX           : 0x%x\n", xsp->extended_state[eax]);
                DBG("  EBX           : 0x%x\n", xsp->extended_state[ebx]);
                DBG("  ECX           : 0x%x\n", xsp->extended_state[ecx]);
                DBG("  EDX           : 0x%x\n", xsp->extended_state[edx]);
+
        }
 
        if (info_p->cpuid_model >= CPUID_MODEL_IVYBRIDGE) {
@@ -719,8 +732,6 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                DBG("  EBX           : 0x%x\n", reg[ebx]);
                DBG("  ECX           : 0x%x\n", reg[ecx]);
        }
-
-       return;
 }
 
 static uint32_t
@@ -940,9 +951,9 @@ leaf7_feature_map[] = {
        {CPUID_LEAF7_FEATURE_BMI2,     "BMI2"},
        {CPUID_LEAF7_FEATURE_INVPCID,  "INVPCID"},
        {CPUID_LEAF7_FEATURE_RTM,      "RTM"},
+       {CPUID_LEAF7_FEATURE_SMAP,     "SMAP"},
        {CPUID_LEAF7_FEATURE_RDSEED,   "RDSEED"},
        {CPUID_LEAF7_FEATURE_ADX,      "ADX"},
-       {CPUID_LEAF7_FEATURE_SMAP,     "SMAP"},
        {0, 0}
 };
 
@@ -1002,7 +1013,7 @@ void
 cpuid_feature_display(
        const char      *header)
 {
-       char    buf[256];
+       char    buf[320];
 
        kprintf("%s: %s", header,
                 cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf)));
index 980945d50a80ba4943166263530f07eefed497d4..adb8c4a62742a6af30affcc54d3a4d06a8de70d3 100644 (file)
@@ -348,7 +348,7 @@ typedef struct {
 #define cpuid_mwait_sub_Cstates                cpuid_mwait_leaf.sub_Cstates
        cpuid_thermal_leaf_t    cpuid_thermal_leaf;
        cpuid_arch_perf_leaf_t  cpuid_arch_perf_leaf;
-       cpuid_xsave_leaf_t      cpuid_xsave_leaf;
+       uint32_t        unused[4];                      /* cpuid_xsave_leaf */
 
        /* Cache details: */
        uint32_t        cpuid_cache_linesize;
@@ -383,6 +383,7 @@ typedef struct {
        cpuid_arch_perf_leaf_t  *cpuid_arch_perf_leafp;
        cpuid_xsave_leaf_t      *cpuid_xsave_leafp;
        uint64_t                cpuid_leaf7_features;
+       cpuid_xsave_leaf_t      cpuid_xsave_leaf[2];
 } i386_cpu_info_t;
 
 #ifdef MACH_KERNEL_PRIVATE
diff --git a/osfmk/i386/flipc_page.h b/osfmk/i386/flipc_page.h
deleted file mode 100644 (file)
index 8236be5..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#include <mach/machine/vm_param.h>
-
-/* 
- * Machine specific defines to allow flipc to work with pages.
- * Included from flipc_usermsg.c only.
- */
-#define FLIPC_PAGESIZE I386_PGBYTES
-#define FLIPC_PAGERND_FN i386_round_page
-#define FLIPC_BTOP i386_btop
index 5f4ef89341a60a02a4ec4b7bc2bcd03a01a29056..df870f71e5aa2e524cff136905f12554b08f3d7c 100644 (file)
@@ -241,13 +241,10 @@ init_fpu(void)
         * AVX/YMM registers
         */
        if (cpuid_features() & CPUID_FEATURE_XSAVE) {
-               cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
+               cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf[0];
                if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
                        assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
                        /* XSAVE container size for all features */
-                       if (xsp->extended_state[2] != sizeof(struct x86_avx_thread_state))
-                               kprintf("sizeof(struct x86_avx_thread_state)=%lu != xsp->extended_state[2]=%u\n",
-                                       sizeof(struct x86_avx_thread_state), xsp->extended_state[2]);
                        fp_register_state_size = sizeof(struct x86_avx_thread_state);
                        fpu_YMM_present = TRUE;
                        set_cr4(get_cr4() | CR4_OSXSAVE);
@@ -255,7 +252,8 @@ init_fpu(void)
                        /* Re-evaluate CPUID, once, to reflect OSXSAVE */
                        if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
                                cpuid_set_info();
-                       /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
+                       /* Verify that now selected state can be accommodated */
+                       assert(xsp->extended_state[1] == fp_register_state_size);
                }
        }
        else
@@ -734,6 +732,8 @@ fpinit(void)
  * Coprocessor not present.
  */
 
+uint64_t x86_isr_fp_simd_use;
+
 void
 fpnoextflt(void)
 {
@@ -763,11 +763,17 @@ fpnoextflt(void)
        clear_ts();                     /*  Enable FPU use */
 
        if (__improbable(get_interrupt_level())) {
+               /* Track number of #DNA traps at interrupt context,
+                * which is likely suboptimal. Racy, but good enough.
+                */
+               x86_isr_fp_simd_use++;
                /*
-                * Save current coprocessor context if valid
-                * Initialize coprocessor live context
+                * Save current FP/SIMD context if valid
+                * Initialize live FP/SIMD registers
                 */
-               fp_save(thr_act);
+               if (pcb->ifps) {
+                       fp_save(thr_act);
+               }
                fpinit();
        } else {
                if (pcb->ifps == 0) {
index 287aa8bd05e4588b5382812c4af0bf62cb4d6cf2..1de6184730296a6e6cd6414fe46c5f8d52efd4b1 100644 (file)
  */
 
 #define DECLARE(SYM,VAL) \
-       __asm("#DEFINITION##define " SYM "\t%0" : : "n" ((u_int)(VAL)))
+       __asm("DEFINITION__define__" SYM ":\t .ascii \"%0\"" : : "n"  ((u_int)(VAL)))
 
 int    main(
                int             argc,
@@ -139,7 +139,6 @@ main(
        DECLARE("MUTEX_PTR",   offsetof(lck_mtx_t, lck_mtx_ptr));
        DECLARE("MUTEX_STATE", offsetof(lck_mtx_t, lck_mtx_state));
        DECLARE("MUTEX_IND",    LCK_MTX_TAG_INDIRECT);
-       DECLARE("MUTEX_PTR",    offsetof(lck_mtx_t, lck_mtx_ptr));
        DECLARE("MUTEX_ASSERT_OWNED",   LCK_MTX_ASSERT_OWNED);
        DECLARE("MUTEX_ASSERT_NOTOWNED",LCK_MTX_ASSERT_NOTOWNED);
        DECLARE("GRP_MTX_STAT_UTIL",    offsetof(lck_grp_t, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt));
@@ -310,17 +309,7 @@ main(
        DECLARE("ASM_COMM_PAGE32_START_ADDRESS",  _COMM_PAGE32_START_ADDRESS);
        DECLARE("ASM_COMM_PAGE_SCHED_GEN",  _COMM_PAGE_SCHED_GEN);
 
-       DECLARE("PDESHIFT",     PDESHIFT);
-       DECLARE("PTEMASK",      PTEMASK);
-       DECLARE("PTEINDX",      PTEINDX);
-       DECLARE("INTEL_PTE_PFN",        INTEL_PTE_PFN);
-       DECLARE("INTEL_PTE_VALID",      INTEL_PTE_VALID);
-       DECLARE("INTEL_PTE_WRITE",      INTEL_PTE_WRITE);
-       DECLARE("INTEL_PTE_PS",       INTEL_PTE_PS);
-       DECLARE("INTEL_PTE_USER",        INTEL_PTE_USER);
-       DECLARE("INTEL_PTE_INVALID",    INTEL_PTE_INVALID);
-       DECLARE("NPGPTD", NPGPTD);
-       DECLARE("KERNEL_PML4_INDEX",KERNEL_PML4_INDEX);
+       DECLARE("KERNEL_PML4_INDEX", KERNEL_PML4_INDEX);
        DECLARE("IDTSZ",        IDTSZ);
        DECLARE("GDTSZ",        GDTSZ);
        DECLARE("LDTSZ",        LDTSZ);
@@ -423,8 +412,6 @@ main(
        DECLARE("CPU_PMAP_PCID_FLUSHES",
            offsetof(cpu_data_t, cpu_pmap_pcid_flushes));
 #endif
-       DECLARE("CPU_TLB_INVALID",
-               offsetof(cpu_data_t, cpu_tlb_invalid));
        DECLARE("CPU_TLB_INVALID_LOCAL",
            offsetof(cpu_data_t, cpu_tlb_invalid_local));
        DECLARE("CPU_TLB_INVALID_GLOBAL",
@@ -443,15 +430,6 @@ main(
        DECLARE("dgMisc4",              offsetof(struct diagWork, dgMisc4));
        DECLARE("dgMisc5",              offsetof(struct diagWork, dgMisc5));
 
-       DECLARE("INTEL_PTE_KERNEL",     INTEL_PTE_VALID|INTEL_PTE_WRITE);
-       DECLARE("PDESHIFT",     PDESHIFT);
-       DECLARE("PDESIZE",     PDESIZE);
-       DECLARE("PTESIZE",     PTESIZE);
-
-       DECLARE("KERNELBASEPDE",
-               (LINEAR_KERNEL_ADDRESS >> PDESHIFT) *
-               sizeof(pt_entry_t));
-
        DECLARE("TSS_ESP0",     offsetof(struct i386_tss, esp0));
        DECLARE("TSS_SS0",      offsetof(struct i386_tss, ss0));
        DECLARE("TSS_LDT",      offsetof(struct i386_tss, ldt));
index 0a7df1871c5cec568108194f2b00eca7281747b3..844b547506852547f395d359844187647e76340b 100644 (file)
@@ -219,7 +219,7 @@ physmap_init(void)
                            ((i * PTE_PER_PAGE + j) << PDSHIFT)
                                                        | INTEL_PTE_PS
                                                        | INTEL_PTE_VALID
-                                                       | INTEL_PTE_NX
+                                                       | INTEL_PTE_NX
                                                        | INTEL_PTE_WRITE;
                }
        }
@@ -336,7 +336,8 @@ vstart(vm_offset_t boot_args_start)
                kernelBootArgs = (boot_args *)boot_args_start;
                lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize;
                physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1));
-#if DEBUG
+
+#if DEVELOPMENT || DEBUG
                pal_serial_init();
 #endif
                DBG("revision      0x%x\n", kernelBootArgs->Revision);
@@ -351,6 +352,8 @@ vstart(vm_offset_t boot_args_start)
                        kernelBootArgs, 
                        &kernelBootArgs->ksize,
                        &kernelBootArgs->kaddr);
+               DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize);
+
                /*
                 * Setup boot args given the physical start address.
                 * Note: PE_init_platform needs to be called before Idle_PTs_init
@@ -412,6 +415,7 @@ i386_init(void)
        unsigned int    cpus = 0;
        boolean_t       fidn;
        boolean_t       IA32e = TRUE;
+       char            namep[16];
 
        postcode(I386_INIT_ENTRY);
 
@@ -419,7 +423,7 @@ i386_init(void)
        tsc_init();
        rtclock_early_init();   /* mach_absolute_time() now functionsl */
 
-       kernel_debug_string("i386_init");
+       kernel_debug_string_simple("i386_init");
        pstate_trace();
 
 #if CONFIG_MCA
@@ -436,10 +440,13 @@ i386_init(void)
        panic_init();                   /* Init this in case we need debugger */
 
        /* setup debugging output if one has been chosen */
-       kernel_debug_string("PE_init_kprintf");
+       kernel_debug_string_simple("PE_init_kprintf");
        PE_init_kprintf(FALSE);
 
-       kernel_debug_string("kernel_early_bootstrap");
+       if(PE_parse_boot_argn("-show_pointers", &namep, sizeof (namep)))
+               doprnt_hide_pointers = FALSE;
+
+       kernel_debug_string_simple("kernel_early_bootstrap");
        kernel_early_bootstrap();
 
        if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags)))
@@ -456,7 +463,7 @@ i386_init(void)
        }
 
        /* setup console output */
-       kernel_debug_string("PE_init_printf");
+       kernel_debug_string_simple("PE_init_printf");
        PE_init_printf(FALSE);
 
        kprintf("version_variant = %s\n", version_variant);
@@ -498,7 +505,7 @@ i386_init(void)
         * VM initialization, after this we're using page tables...
         * Thn maximum number of cpus must be set beforehand.
         */
-       kernel_debug_string("i386_vm_init");
+       kernel_debug_string_simple("i386_vm_init");
        i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
 
        /* create the console for verbose or pretty mode */
@@ -506,13 +513,13 @@ i386_init(void)
        PE_init_platform(TRUE, kernelBootArgs);
        PE_create_console();
 
-       kernel_debug_string("power_management_init");
+       kernel_debug_string_simple("power_management_init");
        power_management_init();
        processor_bootstrap();
        thread_bootstrap();
 
        pstate_trace();
-       kernel_debug_string("machine_startup");
+       kernel_debug_string_simple("machine_startup");
        machine_startup();
        pstate_trace();
 }
index bceb1559d3e19f3986d0299a96f0aa3b58acf1cd..61355263f6304ec933c85c9923d713f547cf1c20 100644 (file)
@@ -1600,6 +1600,31 @@ Llmu_ext:
 
 
        
+LEAF_ENTRY(lck_mtx_ilk_try_lock)
+       mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
+
+       mov     M_STATE(%rdx), %ecx
+
+       test    $(M_ILOCKED_MSK), %ecx  /* can't have the interlock yet */
+       jnz     3f
+
+       mov     %rcx, %rax              /* eax contains snapshot for cmpxchgl */
+       or      $(M_ILOCKED_MSK), %ecx
+
+       PREEMPTION_DISABLE
+       lock
+       cmpxchg %ecx, M_STATE(%rdx)     /* atomic compare and exchange */
+       jne     2f                      /* return failure after re-enabling preemption */
+
+       mov     $1, %rax                /* return success with preemption disabled */
+       LEAF_RET
+2:     
+       PREEMPTION_ENABLE               /* need to re-enable preemption */
+3:     
+       xor     %rax, %rax              /* return failure */
+       LEAF_RET
+       
+
 LEAF_ENTRY(lck_mtx_ilk_unlock)
        mov     %rdi, %rdx              /* fetch lock pointer - no indirection here */
 
@@ -1608,7 +1633,6 @@ LEAF_ENTRY(lck_mtx_ilk_unlock)
        PREEMPTION_ENABLE               /* need to re-enable preemption */
 
        LEAF_RET
-       
 
        
 LEAF_ENTRY(lck_mtx_lock_grab_mutex)
index 8a1d753b588ff8fbf8d5474f1c4d5a806f680133..81b9d6f51c8f239566b6d58254c2f3868e36e872 100644 (file)
@@ -196,7 +196,7 @@ i386_vm_init(uint64_t       maxmem,
        vm_kernel_base_page = i386_btop(args->kaddr);
        vm_offset_t base_address;
        vm_offset_t static_base_address;
-
+    
        /*
         * Establish the KASLR parameters.
         */
@@ -251,10 +251,10 @@ i386_vm_init(uint64_t     maxmem,
                                        "__LINKEDIT", &segSizeLINK);
        segHIBB  = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
                                        "__HIB", &segSizeHIB);
-       segPRELINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
-                                       "__PRELINK_TEXT", &segSizePRELINK);
+    segPRELINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
+                                                     "__PRELINK_TEXT", &segSizePRELINK);
     segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header,
-                    "__PRELINK_INFO", &segSizePRELINKINFO);
+                                                     "__PRELINK_INFO", &segSizePRELINKINFO);
        segTEXT = getsegbynamefromheader(&_mh_execute_header,
                                        "__TEXT");
        segDATA = getsegbynamefromheader(&_mh_execute_header,
@@ -295,7 +295,7 @@ i386_vm_init(uint64_t       maxmem,
        DBG("segLINKB    = %p\n", (void *) segLINKB);
        DBG("segHIBB     = %p\n", (void *) segHIBB);
        DBG("segPRELINKB = %p\n", (void *) segPRELINKB);
-    DBG("segPRELINKINFOB = %p\n", (void *) segPRELINKINFOB);
+       DBG("segPRELINKINFOB = %p\n", (void *) segPRELINKINFOB);
        DBG("sHIB        = %p\n", (void *) sHIB);
        DBG("eHIB        = %p\n", (void *) eHIB);
        DBG("stext       = %p\n", (void *) stext);
@@ -310,6 +310,7 @@ i386_vm_init(uint64_t       maxmem,
        vm_kernel_top   = (vm_offset_t) &last_kernel_symbol;
        vm_kernel_stext = stext;
        vm_kernel_etext = etext;
+
     vm_prelink_stext = segPRELINKB;
     vm_prelink_etext = segPRELINKB + segSizePRELINK;
     vm_prelink_sinfo = segPRELINKINFOB;
@@ -401,15 +402,10 @@ i386_vm_init(uint64_t     maxmem,
                         * sane_size should reflect the total amount of physical
                         * RAM in the system, not just the amount that is
                         * available for the OS to use.
-                        * FIXME:Consider deriving this value from SMBIOS tables
+                        * We now get this value from SMBIOS tables
                         * rather than reverse engineering the memory map.
-                        * Alternatively, see
-                        * <rdar://problem/4642773> Memory map should
-                        * describe all memory
-                        * Firmware on some systems guarantees that the memory
-                        * map is complete via the "RomReservedMemoryTracked"
-                        * feature field--consult that where possible to
-                        * avoid the "round up to 128M" workaround below.
+                        * But the legacy computation of "sane_size" is kept
+                        * for diagnostic information.
                         */
 
                case kEfiRuntimeServicesCode:
@@ -614,14 +610,19 @@ i386_vm_init(uint64_t     maxmem,
 #endif
 
        avail_start = first_avail;
-       mem_actual = sane_size;
+       mem_actual = args->PhysicalMemorySize;
 
        /*
-        * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory
-        * not reported by EFI.
+        * For user visible memory size, round up to 128 Mb
+        * - accounting for the various stolen memory not reported by EFI.
+        * This is maintained for historical, comparison purposes but
+        * we now use the memory size reported by EFI/Booter.
         */
-
        sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
+       if (sane_size != mem_actual)
+               printf("mem_actual: 0x%llx\n legacy sane_size: 0x%llx\n",
+                       mem_actual, sane_size);
+       sane_size = mem_actual;
 
        /*
         * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 96GB for K64).
index 1c141785addd65e47dcc0dfd7a2b57f2ebf9ab88..f684729554055514111819195c1f8f0935f38119 100644 (file)
@@ -85,7 +85,7 @@ io_map(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
                               flags);
        }
        else {
-           (void) kmem_alloc_pageable(kernel_map, &start, round_page(size));
+           (void) kmem_alloc_pageable(kernel_map, &start, round_page(size), VM_KERN_MEMORY_IOKIT);
            (void) pmap_map(start, phys_addr, phys_addr + round_page(size),
                            VM_PROT_READ|VM_PROT_WRITE,
                            flags);
index b8b62f6c25f5bdb4df24ffdc7ae6453c6e086920..1a1bc5845f24dfa5c96c4a21887d9d07cb6fc430 100644 (file)
 #include <i386/machine_check.h>
 #endif
 
-#if CONFIG_COUNTERS
-#include <pmc/pmc.h>
-#endif
-
 #include <sys/kdebug.h>
 
 #if    MP_DEBUG
@@ -121,7 +117,7 @@ legacy_init(void)
                result = vm_map_find_space(kernel_map,
                                           &lapic_vbase64,
                                           round_page(LAPIC_SIZE), 0,
-                                          VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
+                                          VM_MAKE_TAG(VM_KERN_MEMORY_IOKIT), &entry);
                /* Convert 64-bit vm_map_offset_t to "pointer sized" vm_offset_t
                 */
                lapic_vbase = (vm_offset_t) lapic_vbase64;
@@ -800,15 +796,6 @@ lapic_interrupt(int interrupt_num, x86_saved_state_t *state)
                break;
        case LAPIC_PMC_SW_INTERRUPT: 
                {
-#if CONFIG_COUNTERS
-                       thread_t old, new;
-                       ml_get_csw_threads(&old, &new);
-
-                       if (pmc_context_switch(old, new) == TRUE) {
-                               retval = 1;
-                               /* No EOI required for SWI */
-                       }
-#endif /* CONFIG_COUNTERS */
                }
                break;
        case LAPIC_KICK_INTERRUPT:
@@ -977,7 +964,7 @@ void
 lapic_trigger_MC(void)
 {
        /* A 64-bit access to any register will do it. */
-       volatile uint64_t dummy = *(uint64_t *) (void *) LAPIC_MMIO(ID);
+       volatile uint64_t dummy = *(volatile uint64_t *) (volatile void *) LAPIC_MMIO(ID);
        dummy++;
 }
 #endif
index 22e1a01f9d13db56da781013088a894ef4275961..2934da1cec4e0e2208d776893a3cf3ed2fe690e5 100644 (file)
@@ -152,6 +152,8 @@ typedef struct _lck_mtx_ext_ {
 #define        LCK_MTX_ATTR_STAT       0x2
 #define        LCK_MTX_ATTR_STATb      1
 
+#define LCK_MTX_EVENT(lck) ((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)))
+
 #else /* MACH_KERNEL_PRIVATE */
 #ifdef XNU_KERNEL_PRIVATE
 typedef struct {
index 4dd253e01dea0f8226683c58b155f83bef11fb97..130ba126a4028aeb0516ae451af389bf87b50bb4 100644 (file)
@@ -291,12 +291,12 @@ lck_spin_try_lock(
 }
 
 /*
- *      Routine: lck_spin_is_acquired
+ *      Routine: kdp_lck_spin_is_acquired
  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
  *      Returns: TRUE if lock is acquired.
  */
 boolean_t
-lck_spin_is_acquired(lck_spin_t *lck) {
+kdp_lck_spin_is_acquired(lck_spin_t *lck) {
        if (not_in_kdp) {
                panic("panic: spinlock acquired check done outside of kernel debugger");
        }
@@ -860,6 +860,7 @@ void
 lck_rw_lock_exclusive_gen(
        lck_rw_t        *lck)
 {
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
        uint64_t        deadline = 0;
        int             slept = 0;
        int             gotlock = 0;
@@ -900,12 +901,12 @@ lck_rw_lock_exclusive_gen(
 
                deadline = lck_rw_deadline_for_spin(lck);
 
-               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
                
                while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
                        lck_rw_lock_pause(istate);
 
-               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0);
+               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
 
                if (gotlock)
                        break;
@@ -920,7 +921,7 @@ lck_rw_lock_exclusive_gen(
 
                        if (lck->lck_rw_want_write) {
 
-                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
                                lck->lck_w_waiting = TRUE;
 
@@ -931,7 +932,7 @@ lck_rw_lock_exclusive_gen(
                                        res = thread_block(THREAD_CONTINUE_NULL);
                                        slept++;
                                }
-                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
+                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
                        } else {
                                lck->lck_rw_want_write = TRUE;
                                lck_interlock_unlock(lck, istate);
@@ -979,12 +980,12 @@ lck_rw_lock_exclusive_gen(
 
                deadline = lck_rw_deadline_for_spin(lck);
 
-               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
                while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
                        lck_rw_lock_pause(istate);
 
-               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0);
+               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
 
                if ( !lockheld)
                        break;
@@ -998,7 +999,7 @@ lck_rw_lock_exclusive_gen(
                        istate = lck_interlock_lock(lck);
 
                        if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
-                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
+                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
 
                                lck->lck_w_waiting = TRUE;
 
@@ -1009,7 +1010,7 @@ lck_rw_lock_exclusive_gen(
                                        res = thread_block(THREAD_CONTINUE_NULL);
                                        slept++;
                                }
-                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0);
+                               KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
                        } else {
                                lck_interlock_unlock(lck, istate);
                                /*
@@ -1198,12 +1199,13 @@ void
 lck_rw_lock_shared_gen(
        lck_rw_t        *lck)
 {
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
        uint64_t        deadline = 0;
        int             gotlock = 0;
        int             slept = 0;
        wait_result_t   res = 0;
        boolean_t       istate = -1;
-       
+
 #if    CONFIG_DTRACE
        uint64_t wait_interval = 0;
        int readers_at_sleep = 0;
@@ -1235,13 +1237,13 @@ lck_rw_lock_shared_gen(
                deadline = lck_rw_deadline_for_spin(lck);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
-                            (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
 
                while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
                        lck_rw_lock_pause(istate);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
-                            (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
+                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
 
                if (gotlock)
                        break;
@@ -1258,7 +1260,7 @@ lck_rw_lock_shared_gen(
                            ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
 
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
-                                            (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+                                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
 
                                lck->lck_r_waiting = TRUE;
 
@@ -1270,7 +1272,7 @@ lck_rw_lock_shared_gen(
                                        slept++;
                                }
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
-                                            (int)lck, res, slept, 0, 0);
+                                            trace_lck, res, slept, 0, 0);
                        } else {
                                lck->lck_rw_shared_count++;
                                lck_interlock_unlock(lck, istate);
@@ -1340,7 +1342,7 @@ lck_rw_lock_shared_to_exclusive_failure(
                thread_wakeup(RW_LOCK_WRITER_EVENT(lck));
        }
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
-                    (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
+                    VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
 
        return (FALSE);
 }
@@ -1358,6 +1360,7 @@ boolean_t
 lck_rw_lock_shared_to_exclusive_success(
        lck_rw_t        *lck)
 {
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
        uint64_t        deadline = 0;
        int             slept = 0;
        int             still_shared = 0;
@@ -1395,13 +1398,13 @@ lck_rw_lock_shared_to_exclusive_success(
                deadline = lck_rw_deadline_for_spin(lck);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
-                            (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
+                            trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
 
                while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
                        lck_rw_lock_pause(istate);
 
                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
-                            (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
+                            trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
 
                if ( !still_shared)
                        break;
@@ -1416,7 +1419,7 @@ lck_rw_lock_shared_to_exclusive_success(
                        
                        if (lck->lck_rw_shared_count != 0) {
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
-                                            (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
+                                            trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
 
                                lck->lck_w_waiting = TRUE;
 
@@ -1428,7 +1431,7 @@ lck_rw_lock_shared_to_exclusive_success(
                                        slept++;
                                }
                                KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
-                                            (int)lck, res, slept, 0, 0);
+                                            trace_lck, res, slept, 0, 0);
                        } else {
                                lck_interlock_unlock(lck, istate);
                                break;
@@ -1467,7 +1470,8 @@ lck_rw_lock_exclusive_to_shared_gen(
        lck_rw_t        *lck,
        int             prior_lock_state)
 {
-       lck_rw_t        *fake_lck;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+       lck_rw_t                *fake_lck;
 
        /*
         * prior_lock state is a snapshot of the 1st word of the
@@ -1478,7 +1482,7 @@ lck_rw_lock_exclusive_to_shared_gen(
        fake_lck = (lck_rw_t *)&prior_lock_state;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
-                            (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
+                            trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
 
        /*
         * don't wake up anyone waiting to take the lock exclusively
@@ -1492,7 +1496,7 @@ lck_rw_lock_exclusive_to_shared_gen(
                thread_wakeup(RW_LOCK_READER_EVENT(lck));
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
-                            (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
+                            trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
 
 #if CONFIG_DTRACE
        LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
@@ -1572,6 +1576,19 @@ lck_rw_clear_promotions_x86(thread_t thread)
 }
 
 
+/*
+ * Routine: kdp_lck_rw_lock_is_acquired_exclusive
+ * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
+ */
+boolean_t
+kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
+       if (not_in_kdp) {
+               panic("panic: rw lock exclusive check done outside of kernel debugger");
+       }
+       return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_write) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
+}
+
+
 #ifdef MUTEX_ZONE
 extern zone_t lck_mtx_zone;
 #endif
@@ -1747,7 +1764,8 @@ lck_mtx_unlock_wakeup_x86 (
        lck_mtx_t       *mutex,
        int             prior_lock_state)
 {
-       lck_mtx_t       fake_lck;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
+       lck_mtx_t               fake_lck;
 
        /*
         * prior_lock state is a snapshot of the 2nd word of the
@@ -1758,13 +1776,13 @@ lck_mtx_unlock_wakeup_x86 (
        fake_lck.lck_mtx_state = prior_lock_state;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
-                    mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
+                    trace_lck, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
 
        if (__probable(fake_lck.lck_mtx_waiters)) {
                if (fake_lck.lck_mtx_waiters > 1)
-                       thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
+                       thread_wakeup_one_with_pri(LCK_MTX_EVENT(mutex), fake_lck.lck_mtx_pri);
                else
-                       thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
+                       thread_wakeup_one(LCK_MTX_EVENT(mutex));
        }
 
        if (__improbable(fake_lck.lck_mtx_promoted)) {
@@ -1787,16 +1805,16 @@ lck_mtx_unlock_wakeup_x86 (
                                        /* Thread still has a RW lock promotion */
                                } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
                                        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
-                                                             thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
+                                                             thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
 
                                        set_sched_pri(thread, DEPRESSPRI);
                                }
                                else {
-                                       if (thread->priority < thread->sched_pri) {
+                                       if (thread->base_pri < thread->sched_pri) {
                                                KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
-                                                                     thread->sched_pri, thread->priority, 0, mutex, 0);
+                                                                     thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
 
-                                               SCHED(compute_priority)(thread, FALSE);
+                                               thread_recompute_sched_pri(thread, FALSE);
                                        }
                                }
                        }
@@ -1805,7 +1823,7 @@ lck_mtx_unlock_wakeup_x86 (
                }
        }
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
-                    mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+                    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
 }
 
 
@@ -1823,12 +1841,13 @@ void
 lck_mtx_lock_acquire_x86(
        lck_mtx_t       *mutex)
 {
-       thread_t        thread;
-       integer_t       priority;
-       spl_t           s;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
+       thread_t                thread;
+       integer_t               priority;
+       spl_t                   s;
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
-                    mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+                    trace_lck, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 
        if (mutex->lck_mtx_waiters)
                priority = mutex->lck_mtx_pri;
@@ -1840,7 +1859,7 @@ lck_mtx_lock_acquire_x86(
        if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
 
                KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
-                                     thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
+                                     thread->sched_pri, priority, thread->was_promoted_on_wakeup, trace_lck, 0);
 
                s = splsched();
                thread_lock(thread);
@@ -1862,10 +1881,31 @@ lck_mtx_lock_acquire_x86(
                splx(s);
        }
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
-                    mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+                    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
 }
 
 
+static int
+lck_mtx_interlock_try_lock(lck_mtx_t *mutex, boolean_t *istate)
+{
+       int             retval;
+
+       *istate = ml_set_interrupts_enabled(FALSE);
+       retval = lck_mtx_ilk_try_lock(mutex);
+
+       if (retval == 0)
+               ml_set_interrupts_enabled(*istate);
+
+       return retval;
+}
+
+static void
+lck_mtx_interlock_unlock(lck_mtx_t *mutex, boolean_t istate)
+{               
+       lck_mtx_ilk_unlock(mutex);
+       ml_set_interrupts_enabled(istate);
+}
+
 
 /*
  * Routine:    lck_mtx_lock_spinwait_x86
@@ -1883,16 +1923,20 @@ int
 lck_mtx_lock_spinwait_x86(
        lck_mtx_t       *mutex)
 {
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
        thread_t        holder;
-       uint64_t        deadline;
+       uint64_t        overall_deadline;
+       uint64_t        check_owner_deadline;
+       uint64_t        cur_time;
        int             retval = 1;
        int             loopcount = 0;
 
-
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
-                    mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
+                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
 
-       deadline = mach_absolute_time() + MutexSpin;
+       cur_time = mach_absolute_time();
+       overall_deadline = cur_time + MutexSpin;
+       check_owner_deadline = cur_time;
 
        /*
         * Spin while:
@@ -1907,25 +1951,42 @@ lck_mtx_lock_spinwait_x86(
                        retval = 0;
                        break;
                }
-               if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
+               cur_time = mach_absolute_time();
 
-                       if ( !(holder->machine.specFlags & OnProc) ||
-                            (holder->state & TH_IDLE)) {
-                               if (loopcount == 0)
-                                       retval = 2;
-                               break;
+               if (cur_time >= overall_deadline)
+                       break;
+
+               if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
+                       boolean_t       istate;
+
+                       if (lck_mtx_interlock_try_lock(mutex, &istate)) {
+
+                               if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
+
+                                       if ( !(holder->machine.specFlags & OnProc) ||
+                                            (holder->state & TH_IDLE)) {
+
+                                               lck_mtx_interlock_unlock(mutex, istate);
+
+                                               if (loopcount == 0)
+                                                       retval = 2;
+                                               break;
+                                       }
+                               }
+                               lck_mtx_interlock_unlock(mutex, istate);
+
+                               check_owner_deadline = cur_time + (MutexSpin / 4);
                        }
                }
                cpu_pause();
 
                loopcount++;
 
-       } while (mach_absolute_time() < deadline);
-
+       } while (TRUE);
 
 #if    CONFIG_DTRACE
        /*
-        * We've already kept a count via deadline of how long we spun.
+        * We've already kept a count via overall_deadline of how long we spun.
         * If dtrace is active, then we compute backwards to decide how
         * long we spun.
         *
@@ -1936,16 +1997,16 @@ lck_mtx_lock_spinwait_x86(
         */
        if (__probable(mutex->lck_mtx_is_ext == 0)) {
                LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
-                   mach_absolute_time() - (deadline - MutexSpin));
+                       mach_absolute_time() - (overall_deadline - MutexSpin));
        } else {
                LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
-                   mach_absolute_time() - (deadline - MutexSpin));
+                       mach_absolute_time() - (overall_deadline - MutexSpin));
        }
        /* The lockstat acquire event is recorded by the assembly code beneath us. */
 #endif
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
-                    mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
+                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
 
        return retval;
 }
@@ -1965,6 +2026,7 @@ void
 lck_mtx_lock_wait_x86 (
        lck_mtx_t       *mutex)
 {
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
        thread_t        self = current_thread();
        thread_t        holder;
        integer_t       priority;
@@ -1977,12 +2039,12 @@ lck_mtx_lock_wait_x86 (
        }
 #endif
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
-                    mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 
        priority = self->sched_pri;
 
-       if (priority < self->priority)
-               priority = self->priority;
+       if (priority < self->base_pri)
+               priority = self->base_pri;
        if (priority < BASEPRI_DEFAULT)
                priority = BASEPRI_DEFAULT;
 
@@ -2004,7 +2066,7 @@ lck_mtx_lock_wait_x86 (
                if (holder->sched_pri < mutex->lck_mtx_pri) {
                        KERNEL_DEBUG_CONSTANT(
                                MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
-                               holder->sched_pri, priority, thread_tid(holder), mutex, 0);
+                               holder->sched_pri, priority, thread_tid(holder), trace_lck, 0);
                        /* Assert that we're not altering the priority of a
                         * thread above the MAXPRI_PROMOTE band
                         */
@@ -2021,14 +2083,14 @@ lck_mtx_lock_wait_x86 (
                thread_unlock(holder);
                splx(s);
        }
-       assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
+       assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
 
        lck_mtx_ilk_unlock(mutex);
 
        thread_block(THREAD_CONTINUE_NULL);
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
-                    mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
+                    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 
 #if    CONFIG_DTRACE
        /*
@@ -2046,3 +2108,23 @@ lck_mtx_lock_wait_x86 (
        }
 #endif
 }
+
+/*
+ *      Routine: kdp_lck_mtx_lock_spin_is_acquired
+ *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
+ *      Returns: TRUE if lock is acquired.
+ */
+boolean_t
+kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t    *lck)
+{
+       if (not_in_kdp) {
+               panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
+       }
+
+       if (lck->lck_mtx_sw.lck_mtxd.lck_mtxd_ilocked || lck->lck_mtx_sw.lck_mtxd.lck_mtxd_mlocked) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
index 85999aef6c4ff0378961dc2bedc7af6897095fd7..a90d68178a80f6dfc3ea6372a097d9778cba0aff 100644 (file)
@@ -153,14 +153,17 @@ ml_static_mfree(
                        }
                        pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE);
                        assert(pmap_valid_page(ppn));
-
                        if (IS_MANAGED_PAGE(ppn)) {
                                vm_page_create(ppn,(ppn+1));
-                               vm_page_wire_count--;
                                freed_pages++;
                        }
                }
        }
+       vm_page_lockspin_queues();
+       vm_page_wire_count -= freed_pages;
+       vm_page_wire_count_initial -= freed_pages;
+       vm_page_unlock_queues();
+
 #if    DEBUG   
        kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
 #endif
@@ -361,6 +364,20 @@ machine_signal_idle(
        cpu_interrupt(processor->cpu_id);
 }
 
+void
+machine_signal_idle_deferred(
+       __unused processor_t processor)
+{
+       panic("Unimplemented");
+}
+
+void
+machine_signal_idle_cancel(
+       __unused processor_t processor)
+{
+       panic("Unimplemented");
+}
+
 static kern_return_t
 register_cpu(
         uint32_t        lapic_id,
@@ -394,19 +411,7 @@ register_cpu(
                goto failed;
 
 #if KPC
-       this_cpu_datap->cpu_kpc_buf[0] = kpc_counterbuf_alloc();
-       if(this_cpu_datap->cpu_kpc_buf[0] == NULL )
-               goto failed;
-       this_cpu_datap->cpu_kpc_buf[1] = kpc_counterbuf_alloc();
-       if(this_cpu_datap->cpu_kpc_buf[1] == NULL )
-               goto failed;
-
-       this_cpu_datap->cpu_kpc_shadow = kpc_counterbuf_alloc();
-       if(this_cpu_datap->cpu_kpc_shadow == NULL )
-               goto failed;
-
-       this_cpu_datap->cpu_kpc_reload = kpc_counterbuf_alloc();
-       if(this_cpu_datap->cpu_kpc_reload == NULL )
+       if (kpc_register_cpu(this_cpu_datap) != TRUE)
                goto failed;
 #endif
 
@@ -647,6 +652,12 @@ ml_init_lock_timeout(void)
                TLBTimeOut = LockTimeOut;
        }
 
+       if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof (slto))) {
+               default_timeout_ns = slto * NSEC_PER_USEC;
+               nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
+               reportphyreaddelayabs = abstime;
+       }
+
        if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
                if (mtxspin > USEC_PER_SEC>>4)
                        mtxspin =  USEC_PER_SEC>>4;
index eabfb5c87d7ce8901016c1cd9859e4c59d1583e4..6f8dc6809616e4d4fd4ec73d50908ec04c5417ed 100644 (file)
@@ -312,10 +312,6 @@ void bzero_phys(
 /* Bytes available on current stack */
 vm_offset_t ml_stack_remaining(void);
 
-#if CONFIG_COUNTERS
-void ml_get_csw_threads(thread_t * /*old*/, thread_t * /*new*/);
-#endif /* CONFIG_COUNTERS */
-
 __END_DECLS
 
 #ifdef XNU_KERNEL_PRIVATE
@@ -335,5 +331,9 @@ boolean_t ml_timer_forced_evaluation(void);
 void ml_gpu_stat_update(uint64_t);
 uint64_t ml_gpu_stat(thread_t);
 boolean_t ml_recent_wake(void);
+
+extern uint64_t reportphyreaddelayabs;
+extern uint32_t reportphyreadosbt;
+
 #endif /* XNU_KERNEL_PRIVATE */
 #endif /* _I386_MACHINE_ROUTINES_H_ */
index 708404a9803df3a17d62481edacb6af773c28ca4..6ad7834f551c278bdccc17f17e30f944752bd7e5 100644 (file)
@@ -83,6 +83,11 @@ extern void          blkclr(
                               const char       *from,
                               int              nbytes);
 
+extern void            memset_word(
+                              int              *dst,
+                              int              pattern,
+                              int              nwords);
+       
 
 /* Move arbitrarily-aligned data from one physical address to another */
 extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t nbytes);
index ec0cda52cc49cd79132ca8b462a079780b4d3eb8..ae4a98a94f511f4e7e72adf3aa2d07b4784704f0 100644 (file)
@@ -1237,7 +1237,7 @@ mp_cpus_call_wait(boolean_t       intrs_enabled,
                        mp_cpus_call_wait_timeout = TRUE;
                        cpus_unresponsive = cpus_called & ~(*cpus_responded);
                        mp_cpus_NMIPI(cpus_unresponsive);
-                       panic("mp_cpus_call_wait() timeout, cpus: 0x%lx",
+                       panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
                                cpus_unresponsive);
                }
        }
index 4e333ce7dcb4c2273532b86cf5187f7da9fc4ffc..5fffdf0c16cd28610bf13f56e1dd466119f5e66e 100644 (file)
@@ -66,7 +66,7 @@
 #include <i386/apic.h>
 #include <i386/mp_events.h>
 
-#define MAX_CPUS       32              /* (8*sizeof(long)) */  
+#define MAX_CPUS       64              /* 8 * sizeof(cpumask_t) */
 
 #ifndef        ASSEMBLER
 #include <stdint.h>
@@ -147,13 +147,13 @@ typedef enum      {KDP_XCPU_NONE = 0xffff, KDP_CURRENT_LCPU = 0xfffe} kdp_cpu_t;
 #endif
 
 typedef uint32_t cpu_t;
-typedef volatile long cpumask_t;
+typedef volatile uint64_t cpumask_t;
 static inline cpumask_t
 cpu_to_cpumask(cpu_t cpu)
 {
-       return (cpu < 32) ? (1 << cpu) : 0;
+       return (cpu < MAX_CPUS) ? (1ULL << cpu) : 0;
 }
-#define CPUMASK_ALL    0xffffffff
+#define CPUMASK_ALL    0xffffffffffffffffULL
 #define CPUMASK_SELF   cpu_to_cpumask(cpu_number())
 #define CPUMASK_OTHERS (CPUMASK_ALL & ~CPUMASK_SELF)
 
index 4b4306ad8578a21f7463a67c62301f0db08d7d4a..a3dbca3b107ba8638c748b30415c1747087f87d3 100644 (file)
@@ -592,7 +592,7 @@ cpu_data_alloc(boolean_t is_boot_cpu)
        /*
         * Allocate per-cpu data:
         */
-       ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t));
+       ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t), VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                printf("cpu_data_alloc() failed, ret=%d\n", ret);
                goto abort;
@@ -605,7 +605,7 @@ cpu_data_alloc(boolean_t is_boot_cpu)
         */
        ret = kmem_alloc(kernel_map, 
                         (vm_offset_t *) &cdp->cpu_int_stack_top,
-                        INTSTACK_SIZE);
+                        INTSTACK_SIZE, VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                printf("cpu_data_alloc() int stack failed, ret=%d\n", ret);
                goto abort;
@@ -618,7 +618,8 @@ cpu_data_alloc(boolean_t is_boot_cpu)
         */
        ret = kmem_alloc(kernel_map, 
                         (vm_offset_t *) &cdp->cpu_desc_tablep,
-                        sizeof(cpu_desc_table64_t));
+                        sizeof(cpu_desc_table64_t),
+                        VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                printf("cpu_data_alloc() desc_table failed, ret=%d\n", ret);
                goto abort;
@@ -629,7 +630,8 @@ cpu_data_alloc(boolean_t is_boot_cpu)
         */
        ret = kmem_alloc(kernel_map, 
                         (vm_offset_t *) &cdp->cpu_ldtp,
-                        sizeof(struct real_descriptor) * LDTSZ);
+                        sizeof(struct real_descriptor) * LDTSZ,
+                        VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                printf("cpu_data_alloc() ldt failed, ret=%d\n", ret);
                goto abort;
@@ -775,7 +777,7 @@ cpu_userwindow_init(int cpu)
 
                if (vm_allocate(kernel_map, &vaddr,
                                        (NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE,
-                                       VM_FLAGS_ANYWHERE) != KERN_SUCCESS)
+                                       VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_CPU)) != KERN_SUCCESS)
                        panic("cpu_userwindow_init: "
                                        "couldn't allocate user map window");
 
@@ -822,7 +824,7 @@ cpu_physwindow_init(int cpu)
 
        if (phys_window == 0) {
                if (vm_allocate(kernel_map, &phys_window,
-                               PAGE_SIZE, VM_FLAGS_ANYWHERE)
+                               PAGE_SIZE, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_CPU))
                                != KERN_SUCCESS)
                        panic("cpu_physwindow_init: "
                                "couldn't allocate phys map window");
@@ -863,14 +865,14 @@ cpu_data_realloc(void)
        cpu_data_t      *cdp;
        boolean_t       istate;
 
-       ret = kmem_alloc(kernel_map, &istk, INTSTACK_SIZE);
+       ret = kmem_alloc(kernel_map, &istk, INTSTACK_SIZE, VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                panic("cpu_data_realloc() stack alloc, ret=%d\n", ret);
        }
        bzero((void*) istk, INTSTACK_SIZE);
        istk += INTSTACK_SIZE;
 
-       ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t));
+       ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t), VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                panic("cpu_data_realloc() cpu data alloc, ret=%d\n", ret);
        }
@@ -883,7 +885,7 @@ cpu_data_realloc(void)
        timer_call_queue_init(&cdp->rtclock_timer.queue);
 
        /* Allocate the separate fault stack */
-       ret = kmem_alloc(kernel_map, &fstk, PAGE_SIZE);
+       ret = kmem_alloc(kernel_map, &fstk, PAGE_SIZE, VM_KERN_MEMORY_CPU);
        if (ret != KERN_SUCCESS) {
                panic("cpu_data_realloc() fault stack alloc, ret=%d\n", ret);
        }
index cb83084e63e61166d45991fcf740adb462a6f44a..7adc8841aaca4c4ac762852d9f91d5d6273e4c13 100644 (file)
@@ -347,8 +347,3 @@ void
 pal_preemption_assert(void)
 {
 }
-
-void
-hibernate_pal_prepare(void)
-{
-}
index 40ebdf0bc668ee9f19f6a91d4570bd64fa741ca2..356cb79e9deef5978f50303e46ca148c378c5585 100644 (file)
@@ -144,9 +144,6 @@ void pal_register_cache_state(thread_t thread, pal_cache_state_t state);
 /* Catch code running on the except thread that shouldn't be */
 void pal_preemption_assert(void);
 
-void hibernate_pal_prepare(void);
-void pal_efi_hibernate_prepare(void);
-
 /* Include a PAL-specific header, too, for xnu-internal overrides */
 #include <i386/pal_native.h>
 
index e561a690026fc6abd94b6f194371e5b5f9887661..2989940815e3f23d60dfc3ef891050d46526c6fb 100644 (file)
@@ -118,12 +118,12 @@ void panic_dump_mem(const void *addr, int len)
 {
        void *scratch = panic_dump_buf + 4096;
 
-       for (; len > 0; addr = (uint8_t *)addr + PAGE_SIZE, len -= PAGE_SIZE) {
+       for (; len > 0; addr = (const uint8_t *)addr + PAGE_SIZE, len -= PAGE_SIZE) {
                if (!kvtophys((vm_offset_t)addr))
                        continue;
 
                // 4095 is multiple of 3 -- see below
-               int n = WKdm_compress_new((WK_word *)addr, (WK_word *)(void *)panic_dump_buf, 
+               int n = WKdm_compress_new((const WK_word *)addr, (WK_word *)(void *)panic_dump_buf,
                                                                  scratch, 4095);
 
                if (n == -1)
@@ -184,7 +184,7 @@ boolean_t panic_phys_range_before(const void *addr, uint64_t *pphys,
        if (count > 1024)       // Sanity check
                return FALSE;
 
-       for (uint32_t i = 0; i < count; ++i, r = (EfiMemoryRange *)(void *)((uint8_t *)r + size)) {
+       for (uint32_t i = 0; i < count; ++i, r = (const EfiMemoryRange *)(const void *)((const uint8_t *)r + size)) {
                if (r->PhysicalStart + r->NumberOfPages * PAGE_SIZE > *pphys)
                        continue;
 
index dce92552405869ef738555bc139235c0057ba2f7..84456b323b08a5b86430b0ece3995f9aa24cf00b 100644 (file)
 #include <i386/machine_routines.h>
 #include <i386/lapic.h> /* LAPIC_PMC_SWI_VECTOR */
 
-#if CONFIG_COUNTERS
-#include <pmc/pmc.h>
-#endif /* CONFIG_COUNTERS */
-
-#if KPC
-#include <kern/kpc.h>
-#endif
-
 #if KPERF
 #include <kperf/kperf.h>
+#include <kperf/kperf_kpc.h>
 #endif
 
 #if HYPERVISOR
  * Maps state flavor to number of words in the state:
  */
 unsigned int _MachineStateCount[] = {
-       /* FLAVOR_LIST */
-        0,
-       x86_THREAD_STATE32_COUNT,
-       x86_FLOAT_STATE32_COUNT,
-       x86_EXCEPTION_STATE32_COUNT,
-       x86_THREAD_STATE64_COUNT,
-       x86_FLOAT_STATE64_COUNT,
-       x86_EXCEPTION_STATE64_COUNT,
-       x86_THREAD_STATE_COUNT,
-       x86_FLOAT_STATE_COUNT,
-       x86_EXCEPTION_STATE_COUNT,
-       0,
-       x86_SAVED_STATE32_COUNT,
-       x86_SAVED_STATE64_COUNT,
-       x86_DEBUG_STATE32_COUNT,
-       x86_DEBUG_STATE64_COUNT,
-       x86_DEBUG_STATE_COUNT
+       [x86_THREAD_STATE32]    = x86_THREAD_STATE32_COUNT,
+       [x86_THREAD_STATE64]    = x86_THREAD_STATE64_COUNT,
+       [x86_THREAD_STATE]      = x86_THREAD_STATE_COUNT,
+       [x86_FLOAT_STATE32]     = x86_FLOAT_STATE32_COUNT,
+       [x86_FLOAT_STATE64]     = x86_FLOAT_STATE64_COUNT,
+       [x86_FLOAT_STATE]       = x86_FLOAT_STATE_COUNT,
+       [x86_EXCEPTION_STATE32] = x86_EXCEPTION_STATE32_COUNT,
+       [x86_EXCEPTION_STATE64] = x86_EXCEPTION_STATE64_COUNT,
+       [x86_EXCEPTION_STATE]   = x86_EXCEPTION_STATE_COUNT,
+       [x86_DEBUG_STATE32]     = x86_DEBUG_STATE32_COUNT,
+       [x86_DEBUG_STATE64]     = x86_DEBUG_STATE64_COUNT,
+       [x86_DEBUG_STATE]       = x86_DEBUG_STATE_COUNT,
+       [x86_AVX_STATE32]       = x86_AVX_STATE32_COUNT,
+       [x86_AVX_STATE64]       = x86_AVX_STATE64_COUNT,
+       [x86_AVX_STATE]         = x86_AVX_STATE_COUNT,
 };
 
 zone_t         iss_zone;               /* zone for saved_state area */
@@ -160,58 +151,6 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts);
 static int
 set_thread_state64(thread_t thread, x86_thread_state64_t *ts);
 
-#if CONFIG_COUNTERS
-static inline void
-machine_pmc_cswitch(thread_t /* old */, thread_t /* new */);
-
-static inline void
-pmc_swi(thread_t /* old */, thread_t /*new */);
-
-static inline void
-pmc_swi(thread_t old, thread_t new) {
-       current_cpu_datap()->csw_old_thread = old;
-       current_cpu_datap()->csw_new_thread = new;
-       pal_pmc_swi();
-}
-
-static inline void
-machine_pmc_cswitch(thread_t old, thread_t new) {
-       if (pmc_thread_eligible(old) || pmc_thread_eligible(new)) {
-               pmc_swi(old, new);
-       }
-}
-
-void ml_get_csw_threads(thread_t *old, thread_t *new) {
-       *old = current_cpu_datap()->csw_old_thread;
-       *new = current_cpu_datap()->csw_new_thread;
-}
-
-#endif /* CONFIG_COUNTERS */
-
-#if KPC
-static inline void
-ml_kpc_cswitch(thread_t old, thread_t new)
-{
-       if(!kpc_threads_counting)
-               return;
-       
-       /* call the kpc function */
-       kpc_switch_context( old, new );
-}
-#endif
-
-#if KPERF
-static inline void
-ml_kperf_cswitch(thread_t old, thread_t new)
-{
-       if(!kperf_cswitch_hook)
-               return;
-       
-       /* call the kpc function */
-       kperf_switch_context( old, new );
-}
-#endif
-
 #if HYPERVISOR
 static inline void
 ml_hv_cswitch(thread_t old, thread_t new)
@@ -225,13 +164,13 @@ ml_hv_cswitch(thread_t old, thread_t new)
 #endif
 
 /*
- * Don't let an illegal value for dr7 get set. Specifically,
- * check for undefined settings.  Setting these bit patterns
+ * Don't let an illegal value for the lower 32-bits of dr7 get set.
+ * Specifically, check for undefined settings.  Setting these bit patterns
  * result in undefined behaviour and can lead to an unexpected
  * TRCTRAP.
  */
 static boolean_t
-dr7_is_valid(uint32_t *dr7)
+dr7d_is_valid(uint32_t *dr7d)
 {
        int i;
        uint32_t mask1, mask2;
@@ -243,7 +182,7 @@ dr7_is_valid(uint32_t *dr7)
        if (!(get_cr4() & CR4_DE))
                for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 4; 
                                i++, mask1 <<= 4, mask2 <<= 4)
-                       if ((*dr7 & mask1) == mask2)
+                       if ((*dr7d & mask1) == mask2)
                                return (FALSE);
 
        /*
@@ -252,33 +191,33 @@ dr7_is_valid(uint32_t *dr7)
         * to "00B"
         */
        for (i = 0; i < 4; i++)
-               if (((((*dr7 >> (16 + i*4))) & 0x3) == 0) &&
-                               ((((*dr7 >> (18 + i*4))) & 0x3) != 0))
+               if (((((*dr7d >> (16 + i*4))) & 0x3) == 0) &&
+                               ((((*dr7d >> (18 + i*4))) & 0x3) != 0))
                        return (FALSE);
 
        /*
         * Intel docs have these bits fixed.
         */
-       *dr7 |= 0x1 << 10; /* set bit 10 to 1 */
-       *dr7 &= ~(0x1 << 11); /* set bit 11 to 0 */
-       *dr7 &= ~(0x1 << 12); /* set bit 12 to 0 */
-       *dr7 &= ~(0x1 << 14); /* set bit 14 to 0 */
-       *dr7 &= ~(0x1 << 15); /* set bit 15 to 0 */
+       *dr7d |= 0x1 << 10; /* set bit 10 to 1 */
+       *dr7d &= ~(0x1 << 11); /* set bit 11 to 0 */
+       *dr7d &= ~(0x1 << 12); /* set bit 12 to 0 */
+       *dr7d &= ~(0x1 << 14); /* set bit 14 to 0 */
+       *dr7d &= ~(0x1 << 15); /* set bit 15 to 0 */
 
        /*
         * We don't allow anything to set the global breakpoints.
         */
 
-       if (*dr7 & 0x2)
+       if (*dr7d & 0x2)
                return (FALSE);
 
-       if (*dr7 & (0x2<<2))
+       if (*dr7d & (0x2<<2))
                return (FALSE);
 
-       if (*dr7 & (0x2<<4))
+       if (*dr7d & (0x2<<4))
                return (FALSE);
 
-       if (*dr7 & (0x2<<6))
+       if (*dr7d & (0x2<<6))
                return (FALSE);
 
        return (TRUE);
@@ -289,17 +228,16 @@ extern void set_64bit_debug_regs(x86_debug_state64_t *ds);
 boolean_t
 debug_state_is_valid32(x86_debug_state32_t *ds) 
 {
-       if (!dr7_is_valid(&ds->dr7))
+       if (!dr7d_is_valid(&ds->dr7))
                return FALSE;
 
-
        return TRUE;
 }
 
 boolean_t
 debug_state_is_valid64(x86_debug_state64_t *ds)
 {
-       if (!dr7_is_valid((uint32_t *)&ds->dr7))
+       if (!dr7d_is_valid((uint32_t *)&ds->dr7))
                return FALSE;
 
        /*
@@ -322,6 +260,9 @@ debug_state_is_valid64(x86_debug_state64_t *ds)
                if (ds->dr3 >= VM_MAX_PAGE_ADDRESS)
                        return FALSE;
 
+       /* For x86-64, we must ensure the upper 32-bits of DR7 are clear */
+       ds->dr7 &= 0xffffffffULL;
+
        return TRUE;
 }
 
@@ -448,9 +389,6 @@ void
 machine_load_context(
        thread_t                new)
 {
-#if CONFIG_COUNTERS
-       machine_pmc_cswitch(NULL, new);
-#endif
        new->machine.specFlags |= OnProc;
        act_machine_switch_pcb(NULL, new);
        Load_context(new);
@@ -470,14 +408,8 @@ machine_switch_context(
 #if MACH_RT
         assert(current_cpu_datap()->cpu_active_stack == old->kernel_stack);
 #endif
-#if CONFIG_COUNTERS
-       machine_pmc_cswitch(old, new);
-#endif
-#if KPC
-       ml_kpc_cswitch(old, new);
-#endif
 #if KPERF
-       ml_kperf_cswitch(old, new);
+       kperf_kpc_cswitch(old, new);
 #endif
        /*
         *      Save FP registers if in use.
@@ -1835,14 +1767,8 @@ machine_stack_handoff(thread_t old,
        assert(new);
        assert(old);
 
-#if CONFIG_COUNTERS
-       machine_pmc_cswitch(old, new);
-#endif
-#if KPC
-       ml_kpc_cswitch(old, new);
-#endif
 #if KPERF
-       ml_kperf_cswitch(old, new);
+       kperf_kpc_cswitch(old, new);
 #endif
 
        stack = old->kernel_stack;
index 94a005637baad128fd79410e588d539561b906a5..a08f86a01045a357aa825df09901e541c9ce52bc 100644 (file)
@@ -730,8 +730,6 @@ pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
                        *rt_deadline = arg2;
        }
 
-       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), urgency, arg1, arg2, 0, 0);
-
        return(urgency);
 }
 
@@ -746,6 +744,7 @@ void
 thread_tell_urgency(int urgency,
     uint64_t rt_period,
     uint64_t rt_deadline,
+    uint64_t sched_latency,
     thread_t nthread)
 {
        uint64_t        urgency_notification_time_start, delta;
@@ -759,7 +758,7 @@ thread_tell_urgency(int urgency,
            || pmDispatch->pmThreadTellUrgency == NULL)
                return;
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0);
+       SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
 
        if (__improbable((urgency_assert == TRUE)))
                urgency_notification_time_start = mach_absolute_time();
@@ -782,7 +781,38 @@ thread_tell_urgency(int urgency,
                }
        }
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
+       SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
+}
+
+void
+machine_thread_going_on_core(__unused thread_t      new_thread,
+                                                        __unused int           urgency,
+                                                        __unused uint64_t      sched_latency)
+{
+}
+
+void
+machine_thread_going_off_core(__unused thread_t old_thread, __unused boolean_t thread_terminating)
+{
+}
+
+void
+machine_max_runnable_latency(__unused uint64_t bg_max_latency,
+                                                        __unused uint64_t default_max_latency,
+                                                        __unused uint64_t realtime_max_latency)
+{
+}
+
+void
+machine_work_interval_notify(__unused thread_t thread,
+                                                        __unused uint64_t work_interval_id,
+                                                        __unused uint64_t start_abstime,
+                                                        __unused uint64_t finish_abstime,
+                                                        __unused uint64_t deadline_abstime,
+                                                        __unused uint64_t next_start_abstime,
+                                                        __unused uint16_t urgency,
+                                                        __unused uint32_t flags)
+{
 }
 
 void
index eb9e7e2977fce5e643fa2c16a2fed016b7c948ba..939e47174605d98a377a3a33963f02104a7b63e4 100644 (file)
@@ -297,8 +297,8 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
 #define INTEL_PTE_PS           0x00000080ULL
 #define INTEL_PTE_PTA          0x00000080ULL
 #define INTEL_PTE_GLOBAL       0x00000100ULL
-#define INTEL_PTE_WIRED                0x00000200ULL
-#define INTEL_PDPTE_NESTED     0x00000400ULL
+#define INTEL_PTE_WIRED                0x00000400ULL
+#define INTEL_PDPTE_NESTED     0x00000800ULL
 #define INTEL_PTE_PFN          PG_FRAME
 
 #define INTEL_PTE_NX           (1ULL << 63)
@@ -307,7 +307,7 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
 /* This is conservative, but suffices */
 #define INTEL_PTE_RSVD         ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
 
-#define INTEL_PTE_COMPRESSED   INTEL_PTE_REF /* marker, for invalid PTE only */
+#define INTEL_COMPRESSED       (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/
 
 #define        pa_to_pte(a)            ((a) & INTEL_PTE_PFN) /* XXX */
 #define        pte_to_pa(p)            ((p) & INTEL_PTE_PFN) /* XXX */
@@ -315,16 +315,112 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
 
 #define pte_kernel_rw(p)          ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
 #define pte_kernel_ro(p)          ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
-#define pte_user_rw(p)            ((pt_entry)t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
+#define pte_user_rw(p)            ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
 #define pte_user_ro(p)            ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
 
+#define PMAP_INVEPT_SINGLE_CONTEXT     1
+
+
+#define INTEL_EPTP_AD          0x00000040ULL
+
+#define INTEL_EPT_READ         0x00000001ULL
+#define INTEL_EPT_WRITE        0x00000002ULL
+#define INTEL_EPT_EX           0x00000004ULL
+#define INTEL_EPT_IPTA         0x00000040ULL
+#define INTEL_EPT_PS           0x00000080ULL
+#define INTEL_EPT_REF          0x00000100ULL
+#define INTEL_EPT_MOD          0x00000200ULL
+
+#define INTEL_EPT_CACHE_MASK   0x00000038ULL
+#define INTEL_EPT_NCACHE       0x00000000ULL
+#define INTEL_EPT_WC           0x00000008ULL
+#define INTEL_EPT_WTHRU        0x00000020ULL
+#define INTEL_EPT_WP           0x00000028ULL
+#define INTEL_EPT_WB           0x00000030ULL
+
+/*
+ * Routines to filter correct bits depending on the pmap type
+ */
+
+static inline pt_entry_t
+pte_remove_ex(pt_entry_t pte, boolean_t is_ept)
+{
+       if (__probable(!is_ept)) {
+               return (pte | INTEL_PTE_NX);
+       }
+
+       return (pte & (~INTEL_EPT_EX));
+}
+
+static inline pt_entry_t
+pte_set_ex(pt_entry_t pte, boolean_t is_ept)
+{
+       if (__probable(!is_ept)) {
+               return (pte & (~INTEL_PTE_NX));
+       }
+
+       return (pte | INTEL_EPT_EX);
+}
+
+static inline pt_entry_t
+physmap_refmod_to_ept(pt_entry_t physmap_pte)
+{
+       pt_entry_t ept_pte = 0;
+
+       if (physmap_pte & INTEL_PTE_MOD) {
+               ept_pte |= INTEL_EPT_MOD;
+       }
+
+       if (physmap_pte & INTEL_PTE_REF) {
+               ept_pte |= INTEL_EPT_REF;
+       }
+
+       return ept_pte;
+}
+
+static inline pt_entry_t
+ept_refmod_to_physmap(pt_entry_t ept_pte)
+{
+       pt_entry_t physmap_pte = 0;
+
+       assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0);
+
+       if (ept_pte & INTEL_EPT_REF) {
+               physmap_pte |= INTEL_PTE_REF;
+       }
+
+       if (ept_pte & INTEL_EPT_MOD) {
+               physmap_pte |= INTEL_PTE_MOD;
+       }
+
+       return physmap_pte;
+}
+
+/*
+ * Note: Not all Intel processors support EPT referenced access and dirty bits.
+ *      During pmap_init() we check the VMX capability for the current hardware
+ *      and update this variable accordingly.
+ */
+extern boolean_t pmap_ept_support_ad;
+
+#define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID)
+#define PTE_READ(is_ept)       ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID)
+#define PTE_WRITE(is_ept)      ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE)
+#define PTE_PS                 INTEL_PTE_PS
+#define PTE_COMPRESSED         INTEL_COMPRESSED
+#define PTE_NCACHE(is_ept)     ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE)
+#define PTE_WTHRU(is_ept)      ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU)
+#define PTE_REF(is_ept)        ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF)
+#define PTE_MOD(is_ept)        ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD)
+#define PTE_WIRED              INTEL_PTE_WIRED
+
+
 #define PMAP_DEFAULT_CACHE     0
 #define PMAP_INHIBIT_CACHE     1
 #define PMAP_GUARDED_CACHE     2
 #define PMAP_ACTIVATE_CACHE    4
 #define PMAP_NO_GUARD_CACHE    8
 
-
 #ifndef        ASSEMBLER
 
 #include <sys/queue.h>
@@ -395,6 +491,7 @@ static      inline void * PHYSMAP_PTOV_check(void *paddr) {
 struct pmap {
        decl_simple_lock_data(,lock)    /* lock on map */
        pmap_paddr_t    pm_cr3;         /* physical addr */
+       pmap_paddr_t    pm_eptp;        /* EPTP */
        boolean_t       pm_shared;
         pd_entry_t      *dirbase;        /* page directory pointer */
         vm_object_t     pm_obj;         /* object to hold pde's */
@@ -403,7 +500,7 @@ struct pmap {
        pml4_entry_t    *pm_pml4;       /* VKA of top level */
        vm_object_t     pm_obj_pdpt;    /* holds pdpt pages */
        vm_object_t     pm_obj_pml4;    /* holds pml4 pages */
-#define        PMAP_PCID_MAX_CPUS      (48)    /* Must be a multiple of 8 */
+#define        PMAP_PCID_MAX_CPUS      MAX_CPUS        /* Must be a multiple of 8 */
        pcid_t          pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
        volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
        struct pmap_statistics  stats;  /* map statistics */
@@ -412,6 +509,20 @@ struct pmap {
        ledger_t        ledger;         /* ledger tracking phys mappings */
 };
 
+static inline boolean_t
+is_ept_pmap(pmap_t p)
+{
+       if (__probable(p->pm_cr3 != 0)) {
+               assert(p->pm_eptp == 0);
+               return FALSE;
+       }
+
+       assert(p->pm_eptp != 0);
+
+       return TRUE;
+}
+
+void hv_ept_pmap_create(void **ept_pmap, void **eptp);
 
 #if NCOPY_WINDOWS > 0
 #define PMAP_PDPT_FIRST_WINDOW 0
@@ -547,7 +658,7 @@ extern void         x86_filter_TLB_coherency_interrupts(boolean_t);
 /*
  * Get cache attributes (as pagetable bits) for the specified phys page
  */
-extern unsigned        pmap_get_cache_attributes(ppnum_t);
+extern unsigned        pmap_get_cache_attributes(ppnum_t, boolean_t is_ept);
 #if NCOPY_WINDOWS > 0
 extern struct cpu_pmap *pmap_cpu_alloc(
                                boolean_t       is_boot_cpu);
index ec05a97e22d03e6c3983638aba8588a503f93e7d..6fe3641c392b76d25619ebe018660cd4c700d9dd 100644 (file)
@@ -62,7 +62,7 @@ event_t       mapping_replenish_event, pmap_user_pv_throttle_event;
 uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
 
 unsigned int pmap_cache_attributes(ppnum_t pn) {
-       if (pmap_get_cache_attributes(pn) & INTEL_PTE_NCACHE)
+       if (pmap_get_cache_attributes(pn, FALSE) & INTEL_PTE_NCACHE)
                return (VM_WIMG_IO);
        else
                return (VM_WIMG_COPYBACK);
@@ -108,28 +108,57 @@ void      pmap_set_cache_attributes(ppnum_t pn, unsigned int cacheattr) {
        }
 }
 
-unsigned       pmap_get_cache_attributes(ppnum_t pn) {
+unsigned       pmap_get_cache_attributes(ppnum_t pn, boolean_t is_ept) {
        if (last_managed_page == 0)
                return 0;
 
-       if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) {
-           return INTEL_PTE_NCACHE;
-       }
+       if (!IS_MANAGED_PAGE(ppn_to_pai(pn)))
+           return PTE_NCACHE(is_ept);
 
        /*
         * The cache attributes are read locklessly for efficiency.
         */
        unsigned int attr = pmap_phys_attributes[ppn_to_pai(pn)];
        unsigned int template = 0;
-       
-       if (attr & PHYS_PTA)
+
+       /*
+        * The PTA bit is currently unsupported for EPT PTEs.
+        */
+       if ((attr & PHYS_PTA) && !is_ept)
                template |= INTEL_PTE_PTA;
+
+       /*
+        * If the page isn't marked as NCACHE, the default for EPT entries
+        * is WB.
+        */
        if (attr & PHYS_NCACHE)
-               template |= INTEL_PTE_NCACHE;
+               template |= PTE_NCACHE(is_ept);
+       else if (is_ept)
+               template |= INTEL_EPT_WB;
+
        return template;
 }
 
-
+boolean_t 
+pmap_has_managed_page(ppnum_t first, ppnum_t last)
+{
+       ppnum_t   pn;
+    boolean_t result;
+
+    assert(last_managed_page);
+    assert(first <= last);
+
+    for (result = FALSE, pn = first; 
+       !result 
+         && (pn <= last)
+         && (pn <= last_managed_page); 
+        pn++)
+    {
+       result = (0 != (pmap_phys_attributes[pn] & PHYS_MANAGED));
+    }
+
+       return (result);
+}
 
 boolean_t
 pmap_is_noencrypt(ppnum_t pn)
index 6227e50f78239207384ae1f75bf9bdbc0858d735..4f5580dfa2d7a3a3821543f56c61c89e048c789c 100644 (file)
@@ -385,16 +385,16 @@ static inline void pmap_pv_throttle(__unused pmap_t p) {
  */
 #define        PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
 #define        PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
-#define PHYS_MANAGED   INTEL_PTE_VALID /* page is managed */
-#define PHYS_NOENCRYPT INTEL_PTE_USER  /* no need to encrypt this page in the hibernation image */
+#define        PHYS_MANAGED    INTEL_PTE_VALID /* page is managed */
+#define        PHYS_NOENCRYPT  INTEL_PTE_USER  /* no need to encrypt this page in the hibernation image */
 #define        PHYS_NCACHE     INTEL_PTE_NCACHE
 #define        PHYS_PTA        INTEL_PTE_PTA
 #define        PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
-#define PHYS_INTERNAL  INTEL_PTE_WTHRU /* page from internal object */
-#define PHYS_REUSABLE  INTEL_PTE_WRITE /* page is "reusable" */
+#define        PHYS_INTERNAL   INTEL_PTE_WTHRU /* page from internal object */
+#define        PHYS_REUSABLE   INTEL_PTE_WRITE /* page is "reusable" */
 
-extern const boolean_t pmap_disable_kheap_nx;
-extern const boolean_t pmap_disable_kstack_nx;
+extern boolean_t       pmap_disable_kheap_nx;
+extern boolean_t       pmap_disable_kstack_nx;
 
 #define PMAP_EXPAND_OPTIONS_NONE (0x0)
 #define PMAP_EXPAND_OPTIONS_NOWAIT (PMAP_OPTIONS_NOWAIT)
@@ -653,6 +653,7 @@ pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *
        pmap_t pvpmap = pv_h->pmap;
        vm_map_offset_t pvva = pv_h->va;
        boolean_t ppcd = FALSE;
+       boolean_t is_ept;
 
        /* Ideally, we'd consult the Mach VM here to definitively determine
         * the nature of the mapping for this address space and address.
@@ -664,6 +665,7 @@ pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *
 
        /* As a precautionary measure, mark A+D */
        pmap_phys_attributes[ppn_to_pai(ppn)] |= (PHYS_MODIFIED | PHYS_REFERENCED);
+       is_ept = is_ept_pmap(pmap);
 
        /*
         * Correct potential single bit errors in either (but not both) element
@@ -704,9 +706,11 @@ pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *
                goto pmap_cpc_exit;
        }
 
-       /* Check for malformed/inconsistent entries */
-
-       if ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) ==  (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU)) {
+       /*
+        * Check for malformed/inconsistent entries.
+        * The first check here isn't useful for EPT PTEs because INTEL_EPT_NCACHE == 0
+        */
+       if (!is_ept && ((cpte & (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU | INTEL_PTE_PTA)) ==  (INTEL_PTE_NCACHE | INTEL_PTE_WTHRU))) {
                action = PMAP_ACTION_IGNORE;
                suppress_reason = PTE_INVALID_CACHEABILITY;
        }
@@ -714,7 +718,7 @@ pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t *
                action = PMAP_ACTION_IGNORE;
                suppress_reason = PTE_RSVD;
        }
-       else if ((pmap != kernel_pmap) && ((cpte & INTEL_PTE_USER) == 0)) {
+       else if ((pmap != kernel_pmap) && (!is_ept) && ((cpte & INTEL_PTE_USER) == 0)) {
                action = PMAP_ACTION_IGNORE;
                suppress_reason = PTE_SUPERVISOR;
        }
@@ -1010,9 +1014,12 @@ pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
 {
        pml4_entry_t    newpf;
        pml4_entry_t    *pml4;
+       boolean_t       is_ept;
 
        pml4 = pmap64_pml4(pmap, vaddr);
-       if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
+       is_ept = is_ept_pmap(pmap);
+
+       if (pml4 && (*pml4 & PTE_VALID_MASK(is_ept))) {
                newpf = *pml4 & PG_FRAME;
                return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
                        [(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
@@ -1027,10 +1034,12 @@ pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
 {
        pdpt_entry_t    newpf;
        pdpt_entry_t    *pdpt;
+       boolean_t       is_ept;
 
        pdpt = pmap64_pdpt(pmap, vaddr);
+       is_ept = is_ept_pmap(pmap);
 
-       if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
+       if (pdpt && (*pdpt & PTE_VALID_MASK(is_ept))) {
                newpf = *pdpt & PG_FRAME;
                return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
                        [(vaddr >> PDSHIFT) & (NPDPG-1)];
@@ -1060,12 +1069,15 @@ pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
 {
        pd_entry_t      *pde;
        pd_entry_t      newpf;
+       boolean_t       is_ept;
 
        assert(pmap);
        pde = pmap64_pde(pmap, vaddr);
 
-       if (pde && ((*pde & INTEL_PTE_VALID))) {
-               if (*pde & INTEL_PTE_PS) 
+       is_ept = is_ept_pmap(pmap);
+
+       if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+               if (*pde & PTE_PS)
                        return pde;
                newpf = *pde & PG_FRAME;
                return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
index ba50c3320c7c14fc45616ae2664427a460143e0b..9841a0754f64cb741ad22f023d5f554a1b53c4e2 100644 (file)
@@ -94,6 +94,9 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
        unsigned int    i;
        uint64_t        num_pde;
 
+       assert(!is_ept_pmap(grand));
+       assert(!is_ept_pmap(subord));
+
        if ((size & (pmap_nesting_size_min-1)) ||
            (va_start & (pmap_nesting_size_min-1)) ||
            (nstart & (pmap_nesting_size_min-1)) ||
@@ -230,6 +233,8 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
                    grand, vaddr, size);
        }
 
+       assert(!is_ept_pmap(grand));
+
        /* align everything to PDE boundaries */
        va_start = vaddr & ~(NBPDE-1);
        va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
@@ -268,6 +273,15 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
        return KERN_SUCCESS;
 }
 
+kern_return_t
+pmap_unnest_options(
+       pmap_t grand,
+       addr64_t vaddr,
+       __unused uint64_t size,
+       __unused unsigned int options) {
+       return pmap_unnest(grand, vaddr, size);
+}
+
 /* Invoked by the Mach VM to determine the platform specific unnest region */
 
 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
@@ -311,6 +325,9 @@ pmap_find_phys(pmap_t pmap, addr64_t va)
        ppnum_t         ppn = 0;
        pd_entry_t      pde;
        pt_entry_t      pte;
+       boolean_t       is_ept;
+
+       is_ept = is_ept_pmap(pmap);
 
        mp_disable_preemption();
 
@@ -324,14 +341,14 @@ pmap_find_phys(pmap_t pmap, addr64_t va)
 
        pdep = pmap_pde(pmap, va);
 
-       if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
-               if (pde & INTEL_PTE_PS) {
+       if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
+               if (pde & PTE_PS) {
                        ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
                        ppn += (ppnum_t) ptenum(va);
                }
                else {
                        ptp = pmap_pte(pmap, va);
-                       if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
+                       if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
                                ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
                        }
                }
@@ -345,9 +362,13 @@ pfp_exit:
 /*
  * Update cache attributes for all extant managed mappings.
  * Assumes PV for this page is locked, and that the page
- * is managed.
+ * is managed. We assume that this physical page may be mapped in
+ * both EPT and normal Intel PTEs, so we convert the attributes
+ * to the corresponding format for each pmap.
+ *
+ * We assert that the passed set of attributes is a subset of the
+ * PHYS_CACHEABILITY_MASK.
  */
-
 void
 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
        pv_rooted_entry_t       pv_h, pv_e;
@@ -355,8 +376,17 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
        vm_map_offset_t vaddr;
        pmap_t  pmap;
        pt_entry_t      *ptep;
+       boolean_t       is_ept;
+       unsigned        ept_attributes;
        
        assert(IS_MANAGED_PAGE(pn));
+       assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
+
+       /* We don't support the PTA bit for EPT PTEs */
+       if (attributes & INTEL_PTE_NCACHE)
+               ept_attributes = INTEL_EPT_NCACHE;
+       else
+               ept_attributes = INTEL_EPT_WB;
 
        pv_h = pai_to_pvh(pn);
        /* TODO: translate the PHYS_* bits to PTE bits, while they're
@@ -377,12 +407,18 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
                        pmap = pv_e->pmap;
                        vaddr = pv_e->va;
                        ptep = pmap_pte(pmap, vaddr);
-               
+                       
                        if (0 == ptep)
                                panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 
+                       is_ept = is_ept_pmap(pmap);
+
                        nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
-                       pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
+                       if (!is_ept) {
+                               pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
+                       } else {
+                               pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
+                       }
                        PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                        pvh_e = nexth;
                } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
@@ -460,12 +496,15 @@ pmap_enter_options(
        uint64_t                delpage_pde_index = 0;
        pt_entry_t              old_pte;
        kern_return_t           kr_expand;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
 
        if (pmap == PMAP_NULL)
                return KERN_INVALID_ARGUMENT;
 
+       is_ept = is_ept_pmap(pmap);
+
        /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
         * unused value for that scenario.
         */
@@ -534,7 +573,7 @@ Retry:
                return KERN_SUCCESS;
        }
 
-       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
+       if (superpage && *pte && !(*pte & PTE_PS)) {
                /*
                 * There is still an empty page table mapped that
                 * was used for a previous base page mapping.
@@ -551,7 +590,7 @@ Retry:
        old_pa_locked = FALSE;
 
        if (old_pa == 0 &&
-           (*pte & INTEL_PTE_COMPRESSED)) {
+           (*pte & PTE_COMPRESSED)) {
                /* one less "compressed" */
                OSAddAtomic64(-1, &pmap->stats.compressed);
                /* marker will be cleared below */
@@ -578,32 +617,58 @@ Retry:
         */
        if (old_pa == pa) {
                pt_entry_t old_attributes =
-                   *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
+                   *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept));
 
                /*
                 *      May be changing its wired attribute or protection
                 */
 
-               template = pa_to_pte(pa) | INTEL_PTE_VALID;
-               template |= pmap_get_cache_attributes(pa_index(pa));
+               template =  pa_to_pte(pa);
+
+               /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
+               if (!is_ept) {
+                       template |= INTEL_PTE_VALID;
+               } else {
+                       template |= INTEL_EPT_IPTA;
+               }
+
+               template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
 
-               if (VM_MEM_NOT_CACHEABLE ==
-                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
+               /*
+                * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
+                */
+               if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
+                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
                        if (!(flags & VM_MEM_GUARDED))
                                template |= INTEL_PTE_PTA;
                        template |= INTEL_PTE_NCACHE;
                }
-               if (pmap != kernel_pmap)
+               if (pmap != kernel_pmap && !is_ept)
                        template |= INTEL_PTE_USER;
+
+               if (prot & VM_PROT_READ)
+                       template |= PTE_READ(is_ept);
+
                if (prot & VM_PROT_WRITE) {
-                       template |= INTEL_PTE_WRITE;
+                       template |= PTE_WRITE(is_ept);
+                       if (is_ept && !pmap_ept_support_ad) {
+                               template |= PTE_MOD(is_ept);
+                               if (old_pa_locked) {
+                                       assert(IS_MANAGED_PAGE(pai));
+                                       pmap_phys_attributes[pai] |= PHYS_MODIFIED;
+                               }
+                       }
+               }
+               if (prot & VM_PROT_EXECUTE) {
+                       assert(set_NX == 0);
+                       template = pte_set_ex(template, is_ept);
                }
 
                if (set_NX)
-                       template |= INTEL_PTE_NX;
+                       template = pte_remove_ex(template, is_ept);
 
                if (wired) {
-                       template |= INTEL_PTE_WIRED;
+                       template |= PTE_WIRED;
                        if (!iswired(old_attributes))  {
                                OSAddAtomic(+1, &pmap->stats.wired_count);
                                pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
@@ -615,26 +680,36 @@ Retry:
                                pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
                }
+
                if (superpage)          /* this path can not be used */
-                       template |= INTEL_PTE_PS;       /* to change the page size! */
+                       template |= PTE_PS;     /* to change the page size! */
 
                if (old_attributes == template)
                        goto dont_update_pte;
 
                /* Determine delta, PV locked */
                need_tlbflush =
-                   ((old_attributes ^ template) != INTEL_PTE_WIRED);
+                   ((old_attributes ^ template) != PTE_WIRED);
                
-               if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) {
-                       if ((old_attributes ^ template) == INTEL_PTE_WRITE)
+               if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
+                       if ((old_attributes ^ template) == PTE_WRITE(is_ept))
                                need_tlbflush = FALSE;
                }
 
+               /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
+               if (is_ept && !pmap_ept_support_ad) {
+                       template |= PTE_REF(is_ept);
+                       if (old_pa_locked) {
+                               assert(IS_MANAGED_PAGE(pai));
+                               pmap_phys_attributes[pai] |= PHYS_REFERENCED;
+                       }
+               }
+
                /* store modified PTE and preserve RC bits */
                pt_entry_t npte, opte;;
                do {
                        opte = *pte;
-                       npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
+                       npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
                } while (!pmap_cmpx_pte(pte, opte, npte));
 dont_update_pte:
                if (old_pa_locked) {
@@ -666,12 +741,12 @@ dont_update_pte:
                 */
 
                /* invalidate the PTE */
-               pmap_update_pte(pte, INTEL_PTE_VALID, 0);
+               pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
                /* propagate invalidate everywhere */
                PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                /* remember reference and change */
                old_pte = *pte;
-               oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
+               oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
                /* completely invalidate the PTE */
                pmap_store_pte(pte, 0);
 
@@ -699,7 +774,12 @@ dont_update_pte:
                                pmap_ledger_debit(pmap, task_ledgers.wired_mem,
                                    PAGE_SIZE);
                        }
-                       pmap_phys_attributes[pai] |= oattr;
+
+                       if (!is_ept) {
+                               pmap_phys_attributes[pai] |= oattr;
+                       } else {
+                               pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
+                       }
 
                        /*
                         *      Remove the mapping from the pvlist for
@@ -854,25 +934,49 @@ dont_update_pte:
         *      Build a template to speed up entering -
         *      only the pfn changes.
         */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
+       template = pa_to_pte(pa);
+
+       if (!is_ept) {
+               template |= INTEL_PTE_VALID;
+       } else {
+               template |= INTEL_EPT_IPTA;
+       }
+
+
        /*
         * DRK: It may be worth asserting on cache attribute flags that diverge
         * from the existing physical page attributes.
         */
 
-       template |= pmap_get_cache_attributes(pa_index(pa));
-       
-       if (flags & VM_MEM_NOT_CACHEABLE) {
+       template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
+
+       /*
+        * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
+        */
+       if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
                if (!(flags & VM_MEM_GUARDED))
                        template |= INTEL_PTE_PTA;
                template |= INTEL_PTE_NCACHE;
        }
-       if (pmap != kernel_pmap)
+       if (pmap != kernel_pmap && !is_ept)
                template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
+       if (prot & VM_PROT_READ)
+               template |= PTE_READ(is_ept);
+       if (prot & VM_PROT_WRITE) {
+               template |= PTE_WRITE(is_ept);
+               if (is_ept && !pmap_ept_support_ad) {
+                       template |= PTE_MOD(is_ept);
+                       if (IS_MANAGED_PAGE(pai))
+                               pmap_phys_attributes[pai] |= PHYS_MODIFIED;
+               }
+       }
+       if (prot & VM_PROT_EXECUTE) {
+               assert(set_NX == 0);
+               template = pte_set_ex(template, is_ept);
+       }
+
        if (set_NX)
-               template |= INTEL_PTE_NX;
+               template = pte_remove_ex(template, is_ept);
        if (wired) {
                template |= INTEL_PTE_WIRED;
                OSAddAtomic(+1,  & pmap->stats.wired_count);
@@ -880,6 +984,14 @@ dont_update_pte:
        }
        if (superpage)
                template |= INTEL_PTE_PS;
+
+       /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
+       if (is_ept && !pmap_ept_support_ad) {
+               template |= PTE_REF(is_ept);
+               if (IS_MANAGED_PAGE(pai))
+                       pmap_phys_attributes[pai] |= PHYS_REFERENCED;
+       }
+
        pmap_store_pte(pte, template);
 
        /*
@@ -912,8 +1024,8 @@ Done:
                m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
                if (m == VM_PAGE_NULL)
                    panic("pmap_enter: pte page not in object");
-               vm_object_unlock(delpage_pm_obj);
                VM_PAGE_FREE(m);
+               vm_object_unlock(delpage_pm_obj);
                OSAddAtomic(-1,  &inuse_ptepages_count);
                PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
        }
@@ -941,7 +1053,8 @@ pmap_remove_range(
        pt_entry_t              *spte,
        pt_entry_t              *epte)
 {
-       pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0);
+       pmap_remove_range_options(pmap, start_vaddr, spte, epte,
+                                 PMAP_OPTIONS_REMOVE);
 }
 
 void
@@ -963,6 +1076,7 @@ pmap_remove_range_options(
        ppnum_t                 pai;
        pmap_paddr_t            pa;
        vm_map_offset_t         vaddr;
+       boolean_t               is_ept = is_ept_pmap(pmap);
 
        num_removed = 0;
        num_unwired = 0;
@@ -983,12 +1097,12 @@ pmap_remove_range_options(
                if (pa == 0) {
                        if (pmap != kernel_pmap &&
                            (options & PMAP_OPTIONS_REMOVE) &&
-                           (p & INTEL_PTE_COMPRESSED)) {
+                           (p & PTE_COMPRESSED)) {
                                /* one less "compressed" */
                                num_compressed++;
                                /* clear marker */
                                /* XXX probably does not need to be atomic! */
-                               pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0);
+                               pmap_update_pte(cpte, PTE_COMPRESSED, 0);
                        }
                        continue;
                }
@@ -1009,11 +1123,11 @@ pmap_remove_range_options(
                        continue;
                }
 
-               if ((p & INTEL_PTE_VALID) == 0)
+               if ((p & PTE_VALID_MASK(is_ept)) == 0)
                        num_invalid++;
 
                /* invalidate the PTE */
-               pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
+               pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
        }
 
        if (num_found == 0) {
@@ -1141,7 +1255,7 @@ pmap_remove(
        addr64_t        s64,
        addr64_t        e64)
 {
-       pmap_remove_options(map, s64, e64, 0);
+       pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
 }
 
 void
@@ -1155,12 +1269,15 @@ pmap_remove_options(
        pt_entry_t     *spte, *epte;
        addr64_t        l64;
        uint64_t        deadline;
+       boolean_t       is_ept;
 
        pmap_intr_assert();
 
        if (map == PMAP_NULL || s64 == e64)
                return;
 
+       is_ept = is_ept_pmap(map);
+
        PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
                   map,
                   (uint32_t) (s64 >> 32), s64,
@@ -1209,8 +1326,8 @@ pmap_remove_options(
                        l64 = e64;
                pde = pmap_pde(map, s64);
 
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
                                /*
                                 * If we're removing a superpage, pmap_remove_range()
                                 * must work on level 2 instead of level 1; and we're
@@ -1285,6 +1402,7 @@ pmap_page_protect_options(
        pmap_t                  pmap;
        boolean_t               remove;
        pt_entry_t              new_pte_value;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
@@ -1334,7 +1452,15 @@ pmap_page_protect_options(
        do {
                vm_map_offset_t vaddr;
 
+               if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
+                   (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
+                       /* page was modified, so it will be compressed */
+                       options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                       options |= PMAP_OPTIONS_COMPRESSOR;
+               }
+
                pmap = pv_e->pmap;
+               is_ept = is_ept_pmap(pmap);
                vaddr = pv_e->va;
                pte = pmap_pte(pmap, vaddr);
 
@@ -1362,12 +1488,8 @@ pmap_page_protect_options(
                        if (pmap != kernel_pmap &&
                            (options & PMAP_OPTIONS_COMPRESSOR) &&
                            IS_INTERNAL_PAGE(pai)) {
-                               /* adjust "reclaimed" stats */
-                               OSAddAtomic64(+1, &pmap->stats.compressed);
-                               PMAP_STATS_PEAK(pmap->stats.compressed);
-                               pmap->stats.compressed_lifetime++;
                                /* mark this PTE as having been "reclaimed" */
-                               new_pte_value = INTEL_PTE_COMPRESSED;
+                               new_pte_value = PTE_COMPRESSED;
                        } else {
                                new_pte_value = 0;
                        }
@@ -1383,13 +1505,40 @@ pmap_page_protect_options(
                                /*
                                 * Remove the mapping, collecting dirty bits.
                                 */
-                               pmap_update_pte(pte, INTEL_PTE_VALID, 0);
+                               pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
 
                                PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-                               pmap_phys_attributes[pai] |=
-                                       *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                               if ((options &
+                                    PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
+                                   ! (pmap_phys_attributes[pai] &
+                                      PHYS_MODIFIED) &&
+                                   (*pte & PHYS_MODIFIED)) {
+                                       /*
+                                        * Page is actually "modified" and
+                                        * will be compressed.  Start
+                                        * accounting for it as "compressed".
+                                        */
+                                       options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                                       options |= PMAP_OPTIONS_COMPRESSOR;
+                                       new_pte_value = PTE_COMPRESSED;
+                               }
+                               if (!is_ept) {
+                                       pmap_phys_attributes[pai] |=
+                                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                               } else {
+                                       pmap_phys_attributes[pai] |=
+                                               ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+                               }
                                pmap_store_pte(pte, new_pte_value);
                        }
+
+                       if (new_pte_value == PTE_COMPRESSED) {
+                               /* one more "compressed" page */
+                               OSAddAtomic64(+1, &pmap->stats.compressed);
+                               PMAP_STATS_PEAK(pmap->stats.compressed);
+                               pmap->stats.compressed_lifetime++;
+                       }
+
 #if TESTING
                        if (pmap->stats.resident_count < 1)
                                panic("pmap_page_protect: resident_count");
@@ -1445,9 +1594,14 @@ pmap_page_protect_options(
                        /*
                         * Write-protect, after opportunistic refmod collect
                         */
-                       pmap_phys_attributes[pai] |=
-                           *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-                       pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
+                       if (!is_ept) {
+                               pmap_phys_attributes[pai] |=
+                                       *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                       } else {
+                               pmap_phys_attributes[pai] |=
+                                       ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+                       }
+                       pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
 
                        if (options & PMAP_OPTIONS_NOFLUSH)
                                PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
@@ -1503,7 +1657,9 @@ phys_attribute_clear(
        int                     pai;
        pmap_t                  pmap;
        char                    attributes = 0;
-       boolean_t               is_internal, is_reusable;
+       boolean_t               is_internal, is_reusable, is_ept;
+       int                     ept_bits_to_clear;
+       boolean_t               ept_keep_global_mod = FALSE;
 
        if ((bits & PHYS_MODIFIED) &&
            (options & PMAP_OPTIONS_NOFLUSH) &&
@@ -1513,6 +1669,11 @@ phys_attribute_clear(
                      pn, bits, options, arg);
        }
 
+       /* We only support converting MOD and REF bits for EPT PTEs in this function */
+       assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
+
+       ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
+
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
        if (pn == vm_page_guard_addr)
@@ -1534,6 +1695,7 @@ phys_attribute_clear(
 
        LOCK_PVH(pai);
 
+
        /*
         * Walk down PV list, clearing all modify or reference bits.
         * We do not have to lock the pv_list because we have
@@ -1554,18 +1716,29 @@ phys_attribute_clear(
                        char pte_bits;
 
                        pmap = pv_e->pmap;
+                       is_ept = is_ept_pmap(pmap);
                        va = pv_e->va;
                        pte_bits = 0;
 
                        if (bits) {
                                pte = pmap_pte(pmap, va);
                                /* grab ref/mod bits from this PTE */
-                               pte_bits = (*pte & (PHYS_MODIFIED |
-                                                   PHYS_REFERENCED));
+                               pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
                                /* propagate to page's global attributes */
-                               attributes |= pte_bits;
+                               if (!is_ept) {
+                                       attributes |= pte_bits;
+                               } else {
+                                       attributes |= ept_refmod_to_physmap(pte_bits);
+                                       if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
+                                               ept_keep_global_mod = TRUE;
+                                       }
+                               }
                                /* which bits to clear for this PTE? */
-                               pte_bits &= bits;
+                               if (!is_ept) {
+                                       pte_bits &= bits;
+                               } else {
+                                       pte_bits &= ept_bits_to_clear;
+                               }
                        }
 
                         /*
@@ -1639,7 +1812,17 @@ phys_attribute_clear(
         */
 
        pmap_phys_attributes[pai] |= attributes;
-       pmap_phys_attributes[pai] &= (~bits);
+
+       if (ept_keep_global_mod) {
+               /*
+                * If the hardware doesn't support AD bits for EPT PTEs and someone is
+                * requesting that we clear the modified bit for a phys page, we need
+                * to ensure that there are no EPT mappings for the page with the
+                * modified bit set. If there are, we cannot clear the global modified bit.
+                */
+               bits &= ~PHYS_MODIFIED;
+       }
+       pmap_phys_attributes[pai] &= ~(bits);
 
        /* update this page's "reusable" status */
        if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
@@ -1668,9 +1851,11 @@ phys_attribute_test(
        int                     pai;
        pmap_t                  pmap;
        int                     attributes = 0;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
+       assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
        if (pn == vm_page_guard_addr)
                return 0;
 
@@ -1714,13 +1899,19 @@ phys_attribute_test(
                        vm_map_offset_t va;
 
                        pmap = pv_e->pmap;
+                       is_ept = is_ept_pmap(pmap);
                        va = pv_e->va;
                        /*
                         * pick up modify and/or reference bits from mapping
                         */
 
                        pte = pmap_pte(pmap, va);
-                       attributes |= (int)(*pte & bits);
+                       if (!is_ept) {
+                               attributes |= (int)(*pte & bits);
+                       } else {
+                               attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
+
+                       }
 
                        pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
 
@@ -1759,7 +1950,7 @@ pmap_change_wiring(
                 */
                pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
                OSAddAtomic(+1,  &map->stats.wired_count);
-               pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
+               pmap_update_pte(pte, 0, PTE_WIRED);
        }
        else if (!wired && iswired(*pte)) {
                /*
@@ -1768,7 +1959,7 @@ pmap_change_wiring(
                assert(map->stats.wired_count >= 1);
                OSAddAtomic(-1,  &map->stats.wired_count);
                pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
-               pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
+               pmap_update_pte(pte, PTE_WIRED, 0);
        }
 
        PMAP_UNLOCK(map);
@@ -1833,18 +2024,27 @@ unsigned int
 pmap_query_resident(
        pmap_t          pmap,
        addr64_t        s64,
-       addr64_t        e64)
+       addr64_t        e64,
+       unsigned int    *compressed_count_p)
 {
        pt_entry_t     *pde;
        pt_entry_t     *spte, *epte;
        addr64_t        l64;
        uint64_t        deadline;
        unsigned int    result;
+       boolean_t       is_ept;
+       unsigned int    compressed_count;
 
        pmap_intr_assert();
 
-       if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
+       if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
+               if (compressed_count_p) {
+                       *compressed_count_p = 0;
+               }
                return 0;
+       }
+
+       is_ept = is_ept_pmap(pmap);
 
        PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
                   pmap,
@@ -1852,6 +2052,7 @@ pmap_query_resident(
                   (uint32_t) (e64 >> 32), e64);
 
        result = 0;
+       compressed_count = 0;
 
        PMAP_LOCK(pmap);
 
@@ -1863,8 +2064,8 @@ pmap_query_resident(
                        l64 = e64;
                pde = pmap_pde(pmap, s64);
 
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
                                /* superpage: not supported */
                        } else {
                                spte = pmap_pte(pmap,
@@ -1875,6 +2076,8 @@ pmap_query_resident(
                                for (; spte < epte; spte++) {
                                        if (pte_to_pa(*spte) != 0) {
                                                result++;
+                                       } else if (*spte & PTE_COMPRESSED) {
+                                               compressed_count++;
                                        }
                                }
 
@@ -1894,6 +2097,9 @@ pmap_query_resident(
        PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
                   pmap, 0, 0, 0, 0);
 
+       if (compressed_count_p) {
+               *compressed_count_p = compressed_count;
+       }
        return result;
 }
 
index ba0f1b1e5494eaf373df2dd7ef915ea55e474402..ab5042f22cce7f38d48a8b5c46a31992a8cfde4f 100644 (file)
@@ -268,7 +268,7 @@ static inline uintptr_t get_cr2(void)
 
 static inline uintptr_t get_cr3_raw(void)
 {
-       register uintptr_t cr3;
+       uintptr_t cr3;
        __asm__ volatile("mov %%cr3, %0" : "=r" (cr3));
        return(cr3);
 }
@@ -280,7 +280,7 @@ static inline void set_cr3_raw(uintptr_t value)
 
 static inline uintptr_t get_cr3_base(void)
 {
-       register uintptr_t cr3;
+       uintptr_t cr3;
        __asm__ volatile("mov %%cr3, %0" : "=r" (cr3));
        return(cr3 & ~(0xFFFULL));
 }
@@ -576,6 +576,7 @@ __END_DECLS
 #define MSR_IA32_VMX_VMCS_ENUM                         MSR_IA32_VMX_BASE+10
 #define MSR_IA32_VMX_PROCBASED_CTLS2           MSR_IA32_VMX_BASE+11
 #define MSR_IA32_VMX_EPT_VPID_CAP                      MSR_IA32_VMX_BASE+12
+#define                MSR_IA32_VMX_EPT_VPID_CAP_AD_SHIFT      21
 #define MSR_IA32_VMX_TRUE_PINBASED_CTLS                MSR_IA32_VMX_BASE+13
 #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS       MSR_IA32_VMX_BASE+14
 #define MSR_IA32_VMX_TRUE_VMEXIT_CTLS          MSR_IA32_VMX_BASE+15
@@ -597,6 +598,7 @@ __END_DECLS
 
 #define MSR_IA32_PP0_ENERGY_STATUS             0x639
 #define MSR_IA32_PP1_ENERGY_STATUS             0x641
+
 #define MSR_IA32_IA_PERF_LIMIT_REASONS         0x690
 #define MSR_IA32_GT_PERF_LIMIT_REASONS         0x6B0
 
index d0a2ed840eaf034467d94eb19a4a845d67b43806..7cfbf2631311de3718551c617647e4a28bf33fe0 100644 (file)
@@ -491,6 +491,12 @@ mach_absolute_time(void)
        return rtc_nanotime_read();
 }
 
+uint64_t
+mach_approximate_time(void)
+{
+       return rtc_nanotime_read();
+}
+
 void
 clock_interval_to_absolutetime_interval(
        uint32_t                interval,
diff --git a/osfmk/i386/smp.h b/osfmk/i386/smp.h
new file mode 100644 (file)
index 0000000..e98af29
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _I386_SMP_H_
+#define _I386_SMP_H_
+
+/* x86_64 kernels are always built SMP, even if only 1 CPU is active */
+#define __SMP__ 1
+
+#endif /* _I386_SMP_H_ */
index b75e36a990027b95528ea0c4b2170db3f2a71201..acefe5531eefa196fbf79d9975198ad65ea880a4 100644 (file)
@@ -163,6 +163,7 @@ extern void *act_thread_csave(void);
 extern void act_thread_catt(void *ctx);
 extern void act_thread_cfree(void *ctx);
 
+#define FIND_PERFCONTROL_STATE(th)     (PERFCONTROL_STATE_NULL)
 
 /*
  *     On the kernel stack is:
index 3a99e32a7de3cc328f70cf115a0a67db5e88cf05..592f75895f5e8180aca1b5188ce9c8dcfbc022f7 100644 (file)
@@ -116,7 +116,7 @@ extern void kprint_state(x86_saved_state64_t *saved_state);
  * Forward declarations
  */
 static void user_page_fault_continue(kern_return_t kret);
-static void panic_trap(x86_saved_state64_t *saved_state);
+static void panic_trap(x86_saved_state64_t *saved_state, uint32_t pl);
 static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip);
 
 volatile perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */
@@ -391,12 +391,7 @@ interrupt(x86_saved_state_t *state)
        SCHED_STATS_INTERRUPT(current_processor());
 
 #if CONFIG_TELEMETRY
-       if (telemetry_needs_record
-               && (current_task() != kernel_task)
-#if CONFIG_SCHED_IDLE_IN_PLACE
-               && ((current_thread()->state & TH_IDLE) == 0) /* idle-in-place should be treated like the idle thread */
-#endif
-               ) {
+       if (telemetry_needs_record) {
                telemetry_mark_curthread(user_mode);
        }
 #endif
@@ -476,6 +471,7 @@ interrupt(x86_saved_state_t *state)
                MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END,
                interrupt_num, 0, 0, 0, 0);
 
+       assert(ml_get_interrupts_enabled() == FALSE);
 }
 
 static inline void
@@ -517,7 +513,8 @@ kernel_trap(
        int                     fault_in_copy_window = -1;
 #endif
        int                     is_user = 0;
-       
+       int                     trap_pl = get_preemption_level();
+
        thread = current_thread();
 
        if (__improbable(is_saved_state32(state)))
@@ -791,7 +788,7 @@ debugger_entry:
 #endif
        }
        pal_cli();
-       panic_trap(saved_state);
+       panic_trap(saved_state, trap_pl);
        /*
         * NO RETURN
         */
@@ -808,7 +805,7 @@ set_recovery_ip(x86_saved_state64_t  *saved_state, vm_offset_t ip)
 
 
 static void
-panic_trap(x86_saved_state64_t *regs)
+panic_trap(x86_saved_state64_t *regs, uint32_t pl)
 {
        const char      *trapname = "Unknown";
        pal_cr_t        cr0, cr2, cr3, cr4;
@@ -854,7 +851,7 @@ panic_trap(x86_saved_state64_t *regs)
              "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
              "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
              "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
-             "Fault CR2: 0x%016llx, Error code: 0x%016llx, Fault CPU: 0x%x%s%s%s%s\n",
+             "Fault CR2: 0x%016llx, Error code: 0x%016llx, Fault CPU: 0x%x%s%s%s%s, PL: %d\n",
              regs->isf.rip, regs->isf.trapno, trapname,
              cr0, cr2, cr3, cr4,
              regs->rax, regs->rbx, regs->rcx, regs->rdx,
@@ -866,7 +863,7 @@ panic_trap(x86_saved_state64_t *regs)
              virtualized ? " VMM" : "",
              potential_kernel_NX_fault ? " Kernel NX fault" : "",
              potential_smep_fault ? " SMEP/User NX fault" : "",
-             potential_smap_fault ? " SMAP fault" : "");
+             potential_smap_fault ? " SMAP fault" : "", pl);
        /*
         * This next statement is not executed,
         * but it's needed to stop the compiler using tail call optimization
index 619f87eaf4741efa2d951218596e696c995f09f6..e92e605d79d5a874a9d357c0a5b09b425438d21b 100644 (file)
@@ -152,6 +152,7 @@ extern volatile perfCallback perfIntHook;
 
 extern void            panic_i386_backtrace(void *, int, const char *, boolean_t, x86_saved_state_t *);
 extern void    print_one_backtrace(pmap_t pmap, vm_offset_t topfp, const char *cur_marker,     boolean_t is_64_bit, boolean_t nvram_format);
+extern void    print_thread_num_that_crashed(task_t task);
 extern void    print_tasks_user_threads(task_t task);
 extern void    print_threads_registers(thread_t thread);
 extern void    print_uuid_info(task_t task);
index ebf7f59937bd4f6e1ffe6cc769c9cc1a55fc4d37..c186ffc089330d15938f9133b1d4a74d4d7d86eb 100644 (file)
@@ -90,14 +90,12 @@ uint64_t    tsc_at_boot = 0;
 
 #define CPU_FAMILY_PENTIUM_M   (0x6)
 
-static const char      FSB_Frequency_prop[] = "FSBFrequency";
-static const char      TSC_at_boot_prop[]   = "InitialTSC";
 /*
- * This routine extracts the bus frequency in Hz from the device tree.
+ * This routine extracts a frequency property in Hz from the device tree.
  * Also reads any initial TSC value at boot from the device tree.
  */
 static uint64_t
-EFI_FSB_frequency(void)
+EFI_get_frequency(const char *prop)
 {
        uint64_t        frequency = 0;
        DTEntry         entry;
@@ -105,34 +103,27 @@ EFI_FSB_frequency(void)
        unsigned int    size;
 
        if (DTLookupEntry(0, "/efi/platform", &entry) != kSuccess) {
-               kprintf("EFI_FSB_frequency: didn't find /efi/platform\n");
+               kprintf("EFI_get_frequency: didn't find /efi/platform\n");
                return 0;
        }
-       if (DTGetProperty(entry,FSB_Frequency_prop,&value,&size) != kSuccess) {
-               kprintf("EFI_FSB_frequency: property %s not found\n",
-                       FSB_Frequency_prop);
+       if (DTGetProperty(entry,prop,&value,&size) != kSuccess) {
+               kprintf("EFI_get_frequency: property %s not found\n", prop);
                return 0;
        }
        if (size == sizeof(uint64_t)) {
                frequency = *(uint64_t *) value;
-               kprintf("EFI_FSB_frequency: read %s value: %llu\n",
-                       FSB_Frequency_prop, frequency);
-               if (!(90*Mega < frequency && frequency < 10*Giga)) {
-                       kprintf("EFI_FSB_frequency: value out of range\n");
-                       frequency = 0;
-               }
-       } else {
-               kprintf("EFI_FSB_frequency: unexpected size %d\n", size);
+               kprintf("EFI_get_frequency: read %s value: %llu\n",
+                       prop, frequency);
        }
 
        /*
         * While we're here, see if EFI published an initial TSC value.
         */
-       if (DTGetProperty(entry,TSC_at_boot_prop,&value,&size) == kSuccess) {
+       if (DTGetProperty(entry,"InitialTSC",&value,&size) == kSuccess) {
                if (size == sizeof(uint64_t)) {
                        tsc_at_boot = *(uint64_t *) value;
-                       kprintf("EFI_FSB_frequency: read %s value: %llu\n",
-                               TSC_at_boot_prop, tsc_at_boot);
+                       kprintf("EFI_get_frequency: read InitialTSC: %llu\n",
+                               tsc_at_boot);
                }
        }
 
@@ -173,11 +164,6 @@ tsc_init(void)
                }
        }
 
-       /*
-        * Get the FSB frequency and conversion factors from EFI.
-        */
-       busFreq = EFI_FSB_frequency();
-
        switch (cpuid_cpufamily()) {
        default: {
                uint64_t msr_flex_ratio;
@@ -197,6 +183,7 @@ tsc_init(void)
                                tscGranularity = flex_ratio;
                }
 
+               busFreq = EFI_get_frequency("FSBFrequency");
                /* If EFI isn't configured correctly, use a constant 
                 * value. See 6036811.
                 */
@@ -212,6 +199,8 @@ tsc_init(void)
                prfsts = rdmsr64(IA32_PERF_STS);
                tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
                N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
+
+               busFreq = EFI_get_frequency("FSBFrequency");
            }
        }
 
@@ -229,25 +218,34 @@ tsc_init(void)
                (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
                (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t);
 
-       /*
-        * Get the TSC increment.  The TSC is incremented by this
-        * on every bus tick.  Calculate the TSC conversion factors
-        * to and from nano-seconds.
-        * The tsc granularity is also called the "bus ratio". If the N/2 bit
-        * is set this indicates the bus ration is 0.5 more than this - i.e.
-        * that the true bus ratio is (2*tscGranularity + 1)/2. If we cannot
-        * determine the TSC conversion, assume it ticks at the bus frequency.
-        */
-       if (tscGranularity == 0)
+       if (tscFreq == busFreq) {
+               bus2tsc = 1;
                tscGranularity = 1;
+               tscFCvtn2t = busFCvtn2t;
+               tscFCvtt2n = busFCvtt2n;
+       } else {
+               /*
+                * Get the TSC increment.  The TSC is incremented by this
+                * on every bus tick.  Calculate the TSC conversion factors
+                * to and from nano-seconds.
+                * The tsc granularity is also called the "bus ratio".
+                * If the N/2 bit is set this indicates the bus ration is
+                * 0.5 more than this - i.e.  that the true bus ratio
+                * is (2*tscGranularity + 1)/2.
+                */
+               if (N_by_2_bus_ratio)
+                       tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
+               else
+                       tscFCvtt2n = busFCvtt2n / tscGranularity;
 
-       if (N_by_2_bus_ratio)
-               tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
-       else
-               tscFCvtt2n = busFCvtt2n / tscGranularity;
+               tscFreq = ((1 * Giga)  << 32) / tscFCvtt2n;
+               tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
-       tscFreq = ((1 * Giga)  << 32) / tscFCvtt2n;
-       tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
+               /*
+                * Calculate conversion from BUS to TSC
+                */
+               bus2tsc = tmrCvt(busFCvtt2n, tscFCvtn2t);
+       }
 
        kprintf(" TSC: Frequency = %6d.%06dMHz, "
                "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
@@ -256,11 +254,6 @@ tsc_init(void)
                (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
                (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
                tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
-
-       /*
-        * Calculate conversion from BUS to TSC
-        */
-       bus2tsc = tmrCvt(busFCvtt2n, tscFCvtn2t);
 }
 
 void
index 7f057ed03e88c3b97ace9ec3f5f3779e5dbbe7c8..e83a1655d606e0ef4217ce3f9378a21bd74b8101 100644 (file)
@@ -109,7 +109,7 @@ copyin_update(uint64_t inaddr)
         * It need only be aligned to 16-bytes, according to the SDM.
         * This also wires it down
         */
-       ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&update, size);
+       ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&update, size, VM_KERN_MEMORY_OSFMK);
        if (ret != KERN_SUCCESS)
                return ENOMEM;
 
index 2cedc19bd722744f6bed707df6c1202ab92b6495..806ed701bb9a369ce0dfd157df1bb2e07c3597ca 100644 (file)
@@ -39,7 +39,7 @@ vmx_pcalloc(void)
 {
        char               *pptr;
        kern_return_t   ret;
-       ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&pptr, PAGE_SIZE);
+       ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&pptr, PAGE_SIZE, VM_KERN_MEMORY_OSFMK);
        if (ret != KERN_SUCCESS) return (NULL);
        bzero(pptr, PAGE_SIZE);
        return (pptr);
index a8aa9c4005a807af1122b2220801a5538f81cd32..e234c3a18361b3822dd268ff19985f9fbc6613ca 100644 (file)
@@ -519,7 +519,7 @@ ipc_importance_task_check_transition(
                return FALSE;
 
 #if IMPORTANCE_DEBUG
-       int target_pid = (TASK_NULL != target_task) ? audit_token_pid_from_task(target_task) : -1;
+       int target_pid = task_pid(target_task);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (((boost) ? IMP_HOLD : IMP_DROP) | TASK_POLICY_INTERNAL))) | DBG_FUNC_START,
                                          proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_EXTERN(task_imp), 0);
@@ -542,7 +542,7 @@ ipc_importance_task_check_transition(
                        if (target_task != TASK_NULL) {
                                printf("Over-release of kernel-internal importance assertions for pid %d (%s), "
                                       "dropping %d assertion(s) but task only has %d remaining (%d external).\n",
-                                      audit_token_pid_from_task(target_task),
+                                      task_pid(target_task),
                                       (target_task->bsd_info == NULL) ? "" : proc_name_address(target_task->bsd_info),
                                       delta,
                                       task_imp->iit_assertcnt,
@@ -1039,19 +1039,31 @@ ipc_importance_task_propagate_assertion_locked(
                temp_task_imp->iit_updatepolicy = 0;
                if (need_update && TASK_NULL != temp_task_imp->iit_task) {
                        if (NULL == temp_task_imp->iit_updateq) {
-                               temp_task_imp->iit_updatetime = 0;
-                               temp_task_imp->iit_updateq = &updates;
-                               ipc_importance_task_reference_internal(temp_task_imp);
-                               if (boost) {
-                                       queue_enter(&updates, temp_task_imp,
-                                                   ipc_importance_task_t, iit_updates);
+
+                               /*
+                                * If a downstream task that needs an update is subjects to AppNap,
+                                * drop boosts according to the delay hysteresis.  Otherwise,
+                                * immediate update it.
+                                */
+                               if (!boost && temp_task_imp != task_imp &&
+                                   ipc_importance_delayed_drop_call != NULL &&
+                                   ipc_importance_task_is_marked_denap_receiver(temp_task_imp)) {
+                                       ipc_importance_task_delayed_drop(temp_task_imp);
                                } else {
-                                       queue_enter_first(&updates, temp_task_imp,
-                                                         ipc_importance_task_t, iit_updates);
+                                       temp_task_imp->iit_updatetime = 0;
+                                       temp_task_imp->iit_updateq = &updates;
+                                       ipc_importance_task_reference_internal(temp_task_imp);
+                                       if (boost) {
+                                               queue_enter(&updates, temp_task_imp,
+                                                           ipc_importance_task_t, iit_updates);
+                                       } else {
+                                               queue_enter_first(&updates, temp_task_imp,
+                                                                 ipc_importance_task_t, iit_updates);
+                                       }
                                }
                        } else {
                                /* Must already be on the AppNap hysteresis queue */
-                               assert(&ipc_importance_delayed_drop_queue);
+                               assert(ipc_importance_delayed_drop_call != NULL);
                                assert(ipc_importance_task_is_marked_denap_receiver(temp_task_imp));
                        }       
                }
@@ -1242,7 +1254,7 @@ ipc_importance_task_hold_legacy_external_assertion(ipc_importance_task_t task_im
        target_task = task_imp->iit_task;
 
 #if IMPORTANCE_DEBUG
-       int target_pid = (TASK_NULL != target_task) ? audit_token_pid_from_task(target_task) : -1;
+       int target_pid = task_pid(target_task);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | TASK_POLICY_EXTERNAL))) | DBG_FUNC_START,
                proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0);
@@ -1278,7 +1290,7 @@ ipc_importance_task_hold_legacy_external_assertion(ipc_importance_task_t task_im
                printf("BUG in process %s[%d]: "
                       "attempt to acquire an additional legacy external boost assertion without holding an existing legacy external assertion. "
                       "(%d total, %d external, %d legacy-external)\n",
-                      proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task),
+                      proc_name_address(target_task->bsd_info), task_pid(target_task),
                       target_assertcnt, target_externcnt, target_legacycnt);
        }
 
@@ -1316,7 +1328,7 @@ ipc_importance_task_drop_legacy_external_assertion(ipc_importance_task_t task_im
        target_task = task_imp->iit_task;
 
 #if IMPORTANCE_DEBUG
-       int target_pid = (TASK_NULL != target_task) ? audit_token_pid_from_task(target_task) : -1;
+       int target_pid = task_pid(target_task);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | TASK_POLICY_EXTERNAL))) | DBG_FUNC_START,
                proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_LEGACY_EXTERN(task_imp), 0);
@@ -1370,7 +1382,7 @@ ipc_importance_task_drop_legacy_external_assertion(ipc_importance_task_t task_im
        /* delayed printf for user-supplied data failures */
        if (KERN_FAILURE == ret && TASK_NULL != target_task) {
                printf("BUG in process %s[%d]: over-released legacy external boost assertions (%d total, %d external, %d legacy-external)\n",
-                      proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task),
+                      proc_name_address(target_task->bsd_info), task_pid(target_task),
                       target_assertcnt, target_externcnt, target_legacycnt);
        }
 
@@ -1394,7 +1406,7 @@ ipc_importance_task_externalize_legacy_assertion(ipc_importance_task_t task_imp,
        }
 
 #if IMPORTANCE_DEBUG
-       int target_pid = audit_token_pid_from_task(target_task);
+       int target_pid = task_pid(target_task);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_START,
                proc_selfpid(), target_pid, task_imp->iit_assertcnt, IIT_EXTERN(task_imp), 0);
@@ -1458,7 +1470,7 @@ ipc_importance_task_update_live_donor(ipc_importance_task_t task_imp)
        task_live_donor = target_task->effective_policy.t_live_donor;
 
 #if IMPORTANCE_DEBUG
-       int target_pid = audit_token_pid_from_task(target_task);
+       int target_pid = task_pid(target_task);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_DONOR_CHANGE, IMP_DONOR_UPDATE_LIVE_DONOR_STATE)) | DBG_FUNC_START,
@@ -1520,7 +1532,7 @@ ipc_importance_task_mark_donor(ipc_importance_task_t task_imp, boolean_t donatin
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_DONOR_CHANGE, IMP_DONOR_INIT_DONOR_STATE)) | DBG_FUNC_NONE,
-                                 audit_token_pid_from_task(task_imp->iit_task), donating,
+                                 task_pid(task_imp->iit_task), donating,
                                  old_donor, task_imp->iit_donor, 0);
        
        ipc_importance_unlock();
@@ -1901,7 +1913,7 @@ void task_importance_update_owner_info(task_t task) {
        if (task != TASK_NULL && task->task_imp_base != IIT_NULL) {
                ipc_importance_task_t task_elem = task->task_imp_base;
 
-               task_elem->iit_bsd_pid = audit_token_pid_from_task(task);
+               task_elem->iit_bsd_pid = task_pid(task);
                if (task->bsd_info) {
                        strncpy(&task_elem->iit_procname[0], proc_name_address(task->bsd_info), 16);
                        task_elem->iit_procname[16] = '\0';
@@ -2174,7 +2186,7 @@ ipc_importance_send(
                unsigned int sender_pid = dbgtrailer->msgh_audit.val[5];
                mach_msg_id_t imp_msgh_id = kmsg->ikm_header->msgh_id;
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_START,
-                                          audit_token_pid_from_task(task), sender_pid, imp_msgh_id, 0, 0);
+                                          task_pid(task), sender_pid, imp_msgh_id, 0, 0);
        }
 #endif /* IMPORTANCE_DEBUG */
 
@@ -2526,29 +2538,36 @@ ipc_importance_receive(
 
                assert(IIE_NULL == kmsg->ikm_importance);
 
-               /* replace the importance attribute with the handle we created */
-               /*  our made reference on the inhert is donated to the voucher */
-               recipe = (ipc_voucher_attr_recipe_t)&recipes[recipe_size];
-               recipe->key = MACH_VOUCHER_ATTR_KEY_IMPORTANCE;
-               recipe->command = MACH_VOUCHER_ATTR_SET_VALUE_HANDLE;
-               recipe->previous_voucher = IPC_VOUCHER_NULL;
-               recipe->content_size = sizeof(mach_voucher_attr_value_handle_t);
-               *(mach_voucher_attr_value_handle_t *)(void *)recipe->content = handle;
-               recipe_size += sizeof(*recipe) + sizeof(mach_voucher_attr_value_handle_t);
-
-               kr = ipc_voucher_attr_control_create_mach_voucher(ipc_importance_control,
-                                                                 recipes,
-                                                                 recipe_size,
-                                                                 &recv_voucher);
-               assert(KERN_SUCCESS == kr);
-
-               /* swap the voucher port (and set voucher bits in case it didn't already exist) */
-               kmsg->ikm_header->msgh_bits |= (MACH_MSG_TYPE_MOVE_SEND << 16);
-               ipc_port_release_send(kmsg->ikm_voucher);
-               kmsg->ikm_voucher = convert_voucher_to_port(recv_voucher);
-               if (III_NULL != inherit)
-                       impresult = 2;
-
+               /*
+                * Only create a new voucher if we have an inherit object
+                * (from the ikm_importance field of the incoming message), OR
+                * we have a valid incoming voucher. If we have neither of
+                * these things then there is no need to create a new voucher.
+                */
+               if (IP_VALID(kmsg->ikm_voucher) || inherit != III_NULL) {
+                       /* replace the importance attribute with the handle we created */
+                       /*  our made reference on the inherit is donated to the voucher */
+                       recipe = (ipc_voucher_attr_recipe_t)&recipes[recipe_size];
+                       recipe->key = MACH_VOUCHER_ATTR_KEY_IMPORTANCE;
+                       recipe->command = MACH_VOUCHER_ATTR_SET_VALUE_HANDLE;
+                       recipe->previous_voucher = IPC_VOUCHER_NULL;
+                       recipe->content_size = sizeof(mach_voucher_attr_value_handle_t);
+                       *(mach_voucher_attr_value_handle_t *)(void *)recipe->content = handle;
+                       recipe_size += sizeof(*recipe) + sizeof(mach_voucher_attr_value_handle_t);
+
+                       kr = ipc_voucher_attr_control_create_mach_voucher(ipc_importance_control,
+                                                                         recipes,
+                                                                         recipe_size,
+                                                                         &recv_voucher);
+                       assert(KERN_SUCCESS == kr);
+
+                       /* swap the voucher port (and set voucher bits in case it didn't already exist) */
+                       kmsg->ikm_header->msgh_bits |= (MACH_MSG_TYPE_MOVE_SEND << 16);
+                       ipc_port_release_send(kmsg->ikm_voucher);
+                       kmsg->ikm_voucher = convert_voucher_to_port(recv_voucher);
+                       if (III_NULL != inherit)
+                               impresult = 2;
+               }
        } else { /* Don't want a voucher */
 
                /* got linked importance? have to drop */
@@ -2588,7 +2607,7 @@ ipc_importance_receive(
 #if IMPORTANCE_DEBUG
        if (-1 < impresult)
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_DELV)) | DBG_FUNC_NONE,
-                               sender_pid, audit_token_pid_from_task(task_self),
+                               sender_pid, task_pid(task_self),
                                kmsg->ikm_header->msgh_id, impresult, 0);
        if (impresult == 2){
                /*
@@ -2596,7 +2615,7 @@ ipc_importance_receive(
                 * will trigger the probe in ipc_importance_task_externalize_assertion() 
                 * above and have impresult==1 here.
                 */
-               DTRACE_BOOST5(receive_boost, task_t, task_self, int, audit_token_pid_from_task(task_self), int, sender_pid, int, 1, int, task_self->task_imp_base->iit_assertcnt);
+               DTRACE_BOOST5(receive_boost, task_t, task_self, int, task_pid(task_self), int, sender_pid, int, 1, int, task_self->task_imp_base->iit_assertcnt);
     }
 #endif /* IMPORTANCE_DEBUG */
 }
@@ -2939,22 +2958,22 @@ ipc_importance_extract_content(
                        ipc_importance_inherit_t inherit = III_NULL;
                        ipc_importance_task_t task_imp;
                        task_t task;
-                       int task_pid;
+                       int t_pid;
 
                        if (IIE_TYPE_TASK == IIE_TYPE(elem)) {
                                task_imp = (ipc_importance_task_t)elem;
                                task = task_imp->iit_task;
-                               task_pid = (TASK_NULL != task) ?
-                                          audit_token_pid_from_task(task) : -1;
-                               snprintf((char *)out_content + size, *in_out_content_size - size, "%d", task_pid);
+                               t_pid = (TASK_NULL != task) ?
+                                          task_pid(task) : -1;
+                               snprintf((char *)out_content + size, *in_out_content_size - size, "%d", t_pid);
                        } else {
                                inherit = (ipc_importance_inherit_t)elem;
                                task_imp = inherit->iii_to_task;
                                task = task_imp->iit_task;
-                               task_pid = (TASK_NULL != task) ?
-                                          audit_token_pid_from_task(task) : -1;
+                               t_pid = (TASK_NULL != task) ?
+                                          task_pid(task) : -1;
                                snprintf((char *)out_content + size, *in_out_content_size - size, 
-                                        "%d (%d of %d boosts) %s from pid ", task_pid,
+                                        "%d (%d of %d boosts) %s from pid ", t_pid,
                                         III_EXTERN(inherit), inherit->iii_externcnt, 
                                         (inherit->iii_donating) ? "donated" : "linked");
                        }
@@ -3037,15 +3056,21 @@ ipc_importance_command(
                return KERN_SUCCESS;
        }
 
+       to_task = inherit->iii_to_task;
+       assert(ipc_importance_task_is_any_receiver_type(to_task));
+
+       /* if not donating to a denap receiver, it was called incorrectly */
+       if (!ipc_importance_task_is_marked_denap_receiver(to_task)) {
+               ipc_importance_unlock();
+               return KERN_INVALID_ARGUMENT; /* keeps dispatch happy */
+       }
+
        /* Enough external references left to drop? */
        if (III_EXTERN(inherit) < refs) {
                ipc_importance_unlock();
                return KERN_FAILURE;
        }
 
-       to_task = inherit->iii_to_task;
-       assert(ipc_importance_task_is_any_receiver_type(to_task));
-
        /* re-base external and internal counters at the inherit and the to-task (if apropos) */
        if (inherit->iii_donating) {
                assert(IIT_EXTERN(to_task) >= III_EXTERN(inherit));
@@ -3185,9 +3210,9 @@ ipc_importance_thread_call_init(void)
  *             Will panic the system otherwise.
  */
 extern int
-task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int max_count)
+task_importance_list_pids(task_t task, int flags, char *pid_list, unsigned int max_count)
 {
-       if (lck_spin_is_acquired(&ipc_importance_lock_data) ||
+       if (kdp_lck_spin_is_acquired(&ipc_importance_lock_data) ||
              max_count < 1 ||
              task->task_imp_base == IIT_NULL ||
              pid_list == NULL ||
@@ -3200,12 +3225,13 @@ task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int ma
        ipc_kmsg_t temp_kmsg;
        ipc_importance_inherit_t temp_inherit;
        ipc_importance_elem_t elem;
-       int target_pid;
+       int target_pid = 0, previous_pid;
 
        queue_iterate(&task_imp->iit_inherits, temp_inherit, ipc_importance_inherit_t, iii_inheritance) {
                /* check space in buffer */
                if (pidcount >= max_count) 
                        break;
+               previous_pid = target_pid;
                target_pid = -1;
 
                if (temp_inherit->iii_donating) {
@@ -3215,20 +3241,24 @@ task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int ma
 #else
                        temp_task = temp_inherit->iii_to_task->iit_task;
                        if (temp_task != TASK_NULL) {
-                               target_pid = audit_token_pid_from_task(temp_task);
+                               target_pid = task_pid(temp_task);
                        }
 #endif
                }
 
-               if (target_pid != -1) {
-                       pid_list[pidcount++] = target_pid;
+               if (target_pid != -1 && previous_pid != target_pid) {
+                       memcpy(pid_list, &target_pid, sizeof(target_pid));
+                       pid_list += sizeof(target_pid);
+                       pidcount++;
                }
 
        }
 
+       target_pid = 0;
        queue_iterate(&task_imp->iit_kmsgs, temp_kmsg, ipc_kmsg_t, ikm_inheritance) {
                if (pidcount >= max_count)
                        break;
+               previous_pid = target_pid;
                target_pid = -1;
                elem = temp_kmsg->ikm_importance;
                temp_task = TASK_NULL;
@@ -3243,7 +3273,7 @@ task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int ma
 
                if (IIE_TYPE_TASK == IIE_TYPE(elem) && 
                        (((ipc_importance_task_t)elem)->iit_task != TASK_NULL)) {
-                       target_pid = audit_token_pid_from_task(((ipc_importance_task_t)elem)->iit_task);
+                       target_pid = task_pid(((ipc_importance_task_t)elem)->iit_task);
                } else {
                        temp_inherit = (ipc_importance_inherit_t)elem;
 #if DEVELOPMENT || DEBUG
@@ -3251,13 +3281,15 @@ task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int ma
 #else
                        temp_task = temp_inherit->iii_to_task->iit_task;
                        if (temp_task != TASK_NULL) {
-                               target_pid = audit_token_pid_from_task(temp_task);
+                               target_pid = task_pid(temp_task);
                        }
 #endif
                }
 
-               if (target_pid != -1) {
-                       pid_list[pidcount++] = target_pid;
+               if (target_pid != -1 && previous_pid != target_pid) {
+                       memcpy(pid_list, &target_pid, sizeof(target_pid));
+                       pid_list += sizeof(target_pid);
+                       pidcount++;
                }
        }
 
index f811faf165678c1772a630edb721c845b71044c0..3b009b42d67e6dae47b3bed54337133a0682a899 100644 (file)
@@ -257,7 +257,7 @@ extern void task_importance_update_owner_info(task_t task);
 
 #if XNU_KERNEL_PRIVATE 
 #define TASK_IMP_LIST_DONATING_PIDS  0x1
-extern int task_importance_list_pids(task_t task, int flags, int *pid_list, unsigned int max_count);
+extern int task_importance_list_pids(task_t task, int flags, char *pid_list, unsigned int max_count);
 #endif
 
 __END_DECLS
index 2211d1b18f19cc0ebb1b3676e3ca4be03f86cd5b..d81098ce99fe28a456af3681823ba5ed9ecc29c6 100644 (file)
@@ -222,7 +222,7 @@ ipc_bootstrap(void)
 /* 
  * XXX tunable, belongs in mach.message.h 
  */
-#define MSG_OOL_SIZE_SMALL_MAX 4096
+#define MSG_OOL_SIZE_SMALL_MAX (2*PAGE_SIZE)
 vm_size_t msg_ool_size_small;
 
 /*
@@ -238,13 +238,13 @@ ipc_init(void)
        vm_offset_t min;
 
        retval = kmem_suballoc(kernel_map, &min, ipc_kernel_map_size,
-                              TRUE, VM_FLAGS_ANYWHERE, &ipc_kernel_map);
+                              TRUE, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC), &ipc_kernel_map);
 
        if (retval != KERN_SUCCESS)
                panic("ipc_init: kmem_suballoc of ipc_kernel_map failed");
 
        retval = kmem_suballoc(kernel_map, &min, ipc_kernel_copy_map_size,
-                              TRUE, VM_FLAGS_ANYWHERE, &ipc_kernel_copy_map);
+                              TRUE, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC), &ipc_kernel_copy_map);
 
        if (retval != KERN_SUCCESS)
                panic("ipc_init: kmem_suballoc of ipc_kernel_copy_map failed");
@@ -264,6 +264,8 @@ ipc_init(void)
        else {
                msg_ool_size_small = MSG_OOL_SIZE_SMALL_MAX;
        }
+       /* account for overhead to avoid spilling over a page */
+       msg_ool_size_small -= cpy_kdata_hdr_sz;
 
        ipc_host_init();
 
index 1005a37d9b5512500596eb1520641eeb83c2838c..3954e31f3e46b72fddde62357bc63562283cd49f 100644 (file)
@@ -397,7 +397,7 @@ mm_copy_options_string64(
                name = "VIRTUAL";
                break;
            case MACH_MSG_OVERWRITE:
-               name = "OVERWRITE";
+               name = "OVERWRITE(DEPRECATED)";
                break;
            case MACH_MSG_ALLOCATE:
                name = "ALLOCATE";
@@ -1373,9 +1373,19 @@ ipc_kmsg_send(
        mach_msg_timeout_t      send_timeout)
 {
        ipc_port_t port;
+       thread_t th = current_thread();
        mach_msg_return_t error = MACH_MSG_SUCCESS;
        spl_t s;
 
+       /* Check if honor qlimit flag is set on thread. */
+       if ((th->options & TH_OPT_HONOR_QLIMIT) == TH_OPT_HONOR_QLIMIT) {
+               /* Remove the MACH_SEND_ALWAYS flag to honor queue limit. */
+               option &= (~MACH_SEND_ALWAYS);
+               /* Add the timeout flag since the message queue might be full. */
+               option |= MACH_SEND_TIMEOUT;
+               th->options &= (~TH_OPT_HONOR_QLIMIT);
+       }
+
 #if IMPORTANCE_INHERITANCE
        boolean_t did_importance = FALSE;
 #if IMPORTANCE_DEBUG
@@ -1495,7 +1505,7 @@ retry:
                }
 #if IMPORTANCE_DEBUG
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_END,
-                                         audit_token_pid_from_task(current_task()), sender_pid, imp_msgh_id, importance_cleared, 0);
+                                         task_pid(current_task()), sender_pid, imp_msgh_id, importance_cleared, 0);
 #endif /* IMPORTANCE_DEBUG */
        }
 #endif /* IMPORTANCE_INHERITANCE */
@@ -1670,17 +1680,11 @@ ipc_kmsg_copyin_header(
        ipc_entry_t reply_entry = IE_NULL;
        ipc_entry_t voucher_entry = IE_NULL;
 
-#if IMPORTANCE_INHERITANCE
        int assertcnt = 0;
+#if IMPORTANCE_INHERITANCE
        boolean_t needboost = FALSE;
 #endif /* IMPORTANCE_INHERITANCE */
 
-       queue_head_t links_data;
-       queue_t links = &links_data;
-       wait_queue_link_t wql;
-
-       queue_init(links);
-
        if ((mbits != msg->msgh_bits) ||
            (!MACH_MSG_TYPE_PORT_ANY_SEND(dest_type)) ||
            ((reply_type == 0) ?
@@ -1796,21 +1800,11 @@ ipc_kmsg_copyin_header(
                 *      Perform the delayed reply right copyin (guaranteed success).
                 */
                if (reply_entry != IE_NULL) {
-#if IMPORTANCE_INHERITANCE
                        kr = ipc_right_copyin(space, reply_name, reply_entry,
                                              reply_type, TRUE,
                                              &reply_port, &reply_soright,
-                                             &release_port,
-                                             &assertcnt,
-                                             links);
+                                             &release_port, &assertcnt);
                        assert(assertcnt == 0);
-#else
-                       kr = ipc_right_copyin(space, reply_name, reply_entry,
-                                             reply_type, TRUE,
-                                             &reply_port, &reply_soright,
-                                             &release_port,
-                                             links);
-#endif /* IMPORTANCE_INHERITANCE */
                        assert(kr == KERN_SUCCESS);
                }
 
@@ -1901,21 +1895,11 @@ ipc_kmsg_copyin_header(
                        /*
                         *      copyin the destination.
                         */
-#if IMPORTANCE_INHERITANCE
                        kr = ipc_right_copyin(space, dest_name, dest_entry,
                                              dest_type, FALSE,
                                              &dest_port, &dest_soright,
-                                             &release_port,
-                                             &assertcnt,
-                                             links);
+                                             &release_port, &assertcnt);
                        assert(assertcnt == 0);
-#else
-                       kr = ipc_right_copyin(space, dest_name, dest_entry,
-                                             dest_type, FALSE,
-                                             &dest_port, &dest_soright,
-                                             &release_port,
-                                             links);
-#endif /* IMPORTANCE_INHERITANCE */
                        if (kr != KERN_SUCCESS) {
                                goto invalid_dest;
                        }
@@ -1927,21 +1911,11 @@ ipc_kmsg_copyin_header(
                         *      It's OK if the reply right has gone dead in the meantime.
                         */
                        if (MACH_PORT_VALID(reply_name)) {
-#if IMPORTANCE_INHERITANCE
                                kr = ipc_right_copyin(space, reply_name, reply_entry,
                                                      reply_type, TRUE,
                                                      &reply_port, &reply_soright,
-                                                     &release_port,
-                                                     &assertcnt,
-                                                     links);
+                                                     &release_port, &assertcnt);
                                assert(assertcnt == 0);
-#else
-                               kr = ipc_right_copyin(space, reply_name, reply_entry,
-                                                     reply_type, TRUE,
-                                                     &reply_port, &reply_soright,
-                                                     &release_port,
-                                                     links);
-#endif /* IMPORTANCE_INHERITANCE */
                                assert(kr == KERN_SUCCESS);
                        } else {
                                /* convert invalid name to equivalent ipc_object type */
@@ -1954,23 +1928,13 @@ ipc_kmsg_copyin_header(
                 * are fully copied in (guaranteed success).
                 */
                if (IE_NULL != voucher_entry) {
-#if IMPORTANCE_INHERITANCE
                        kr = ipc_right_copyin(space, voucher_name, voucher_entry,
                                              voucher_type, FALSE,
                                              (ipc_object_t *)&voucher_port,
                                              &voucher_soright,
                                              &voucher_release_port,
-                                             &assertcnt,
-                                             links);
+                                             &assertcnt);
                        assert(assertcnt == 0);
-#else
-                       kr = ipc_right_copyin(space, voucher_name, voucher_entry,
-                                             voucher_type, FALSE,
-                                             (ipc_object_t *)&voucher_port,
-                                             &voucher_soright,
-                                             &voucher_release_port,
-                                             links);
-#endif /* IMPORTANCE_INHERITANCE */
                        assert(KERN_SUCCESS == kr);
                        assert(IP_VALID(voucher_port));
                        assert(ip_active(voucher_port));
@@ -2072,11 +2036,6 @@ ipc_kmsg_copyin_header(
        msg->msgh_remote_port = (ipc_port_t)dest_port;
        msg->msgh_local_port = (ipc_port_t)reply_port;
 
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
-
        if (release_port != IP_NULL)
                ip_release(release_port);
 
@@ -2088,11 +2047,6 @@ ipc_kmsg_copyin_header(
 invalid_reply:
        is_write_unlock(space);
 
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
-
        if (release_port != IP_NULL)
                ip_release(release_port);
 
@@ -2104,11 +2058,6 @@ invalid_reply:
 invalid_dest:
        is_write_unlock(space);
 
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
-
        if (release_port != IP_NULL)
                ip_release(release_port);
 
@@ -2573,7 +2522,7 @@ ipc_kmsg_copyin_body(
      */
     if (space_needed) {
         if (vm_allocate(ipc_kernel_copy_map, &paddr, space_needed, 
-                    VM_FLAGS_ANYWHERE) != KERN_SUCCESS) {
+                    VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC)) != KERN_SUCCESS) {
             ipc_kmsg_clean_partial(kmsg, 0, NULL, 0, 0);
             mr = MACH_MSG_VM_KERNEL;
             goto out;
@@ -3485,55 +3434,12 @@ ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t *dsc, mach_msg_descrip
     copy_options = dsc->copy;
     assert(copy_options != MACH_MSG_KALLOC_COPY_T);
     dsc_type = dsc->type;
-    rcv_addr = 0;
 
     if (copy != VM_MAP_COPY_NULL) {
-        /*
-         * Check to see if there is an overwrite descriptor
-         * specified in the scatter list for this ool data.
-         * The descriptor has already been verified.
-         */
-#if 0
-        if (saddr != MACH_MSG_DESCRIPTOR_NULL) {
-            if (differs) {
-                OTHER_OOL_DESCRIPTOR *scatter_dsc;
-
-                scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr;
-                if (scatter_dsc->copy == MACH_MSG_OVERWRITE) {
-                    rcv_addr = (mach_vm_offset_t) scatter_dsc->address;
-                    copy_options = MACH_MSG_OVERWRITE;
-                } else {
-                    copy_options = MACH_MSG_VIRTUAL_COPY;
-                }
-            } else {
-                mach_msg_ool_descriptor_t *scatter_dsc;
-
-                scatter_dsc = &saddr->out_of_line;
-                if (scatter_dsc->copy == MACH_MSG_OVERWRITE) {
-                    rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address);
-                    copy_options = MACH_MSG_OVERWRITE;
-                } else {
-                    copy_options = MACH_MSG_VIRTUAL_COPY;
-                }
-            }
-            INCREMENT_SCATTER(saddr, sdsc_count, differs);
-        }
-#endif
-
+       kern_return_t kr;
 
-        /*
-         * Whether the data was virtually or physically
-         * copied we have a vm_map_copy_t for it.
-         * If there's an overwrite region specified
-         * overwrite it, otherwise do a virtual copy out.
-         */
-        kern_return_t kr;
-        if (copy_options == MACH_MSG_OVERWRITE && rcv_addr != 0) {
-            kr = vm_map_copy_overwrite(map, rcv_addr,
-                    copy, TRUE);
-        } else {
-            kr = vm_map_copyout(map, &rcv_addr, copy);
-        }      
+        rcv_addr = 0;
+        kr = vm_map_copyout(map, &rcv_addr, copy);
         if (kr != KERN_SUCCESS) {
             if (kr == KERN_RESOURCE_SHORTAGE)
                 *mr |= MACH_MSG_VM_KERNEL;
@@ -3656,8 +3562,9 @@ ipc_kmsg_copyout_ool_ports_descriptor(mach_msg_ool_ports_descriptor_t *dsc,
             /*
              * Dynamically allocate the region
              */
-            int anywhere = VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|
-                VM_FLAGS_ANYWHERE;
+            int anywhere = VM_FLAGS_ANYWHERE;
+           if (vm_kernel_map_is_kernel(map)) anywhere |= VM_MAKE_TAG(VM_KERN_MEMORY_IPC);
+           else                              anywhere |= VM_MAKE_TAG(VM_MEMORY_MACH_MSG);
 
             kern_return_t kr;
             if ((kr = mach_vm_allocate(map, &rcv_addr, 
@@ -4078,163 +3985,6 @@ ipc_kmsg_copyout_dest(
        }
 }
 
-/*
- *      Routine:        ipc_kmsg_copyin_scatter
- *      Purpose:
- *              allocate and copyin a scatter list
- *      Algorithm:
- *              The gather (kmsg) is valid since it has been copied in.
- *              Gather list descriptors are sequentially paired with scatter
- *              list descriptors, with port descriptors in either list ignored.
- *              Descriptors are consistent if the type fileds match and size
- *              of the scatter descriptor is less than or equal to the
- *              size of the gather descriptor.  A MACH_MSG_ALLOCATE copy
- *              strategy in a scatter descriptor matches any size in the
- *              corresponding gather descriptor assuming they are the same type.
- *              Either list may be larger than the other.  During the
- *              subsequent copy out, excess scatter descriptors are ignored
- *              and excess gather descriptors default to dynamic allocation.
- *
- *              In the case of a size error, the scatter list is released.
- *      Conditions:
- *              Nothing locked.
- *      Returns:
- *              the allocated message body containing the scatter list.
- */
-
-mach_msg_body_t *
-ipc_kmsg_get_scatter(
-       mach_vm_address_t       msg_addr,
-       mach_msg_size_t         slist_size,
-       ipc_kmsg_t              kmsg)
-{
-        mach_msg_body_t         *slist;
-        mach_msg_body_t         *body;
-        mach_msg_descriptor_t   *gstart, *gend;
-        mach_msg_descriptor_t   *sstart, *send;
-
-#if defined(__LP64__)
-        panic("ipc_kmsg_get_scatter called!");
-#endif
-
-        if (slist_size < sizeof(mach_msg_base_t))
-                return MACH_MSG_BODY_NULL;
-
-        slist_size -= (mach_msg_size_t)sizeof(mach_msg_header_t);
-        slist = (mach_msg_body_t *)kalloc(slist_size);
-        if (slist == MACH_MSG_BODY_NULL)
-                return slist;
-
-        if (copyin(msg_addr + sizeof(mach_msg_header_t), (char *)slist, slist_size)) {
-                kfree(slist, slist_size);
-                return MACH_MSG_BODY_NULL;
-        }
-
-        if ((slist->msgh_descriptor_count* sizeof(mach_msg_descriptor_t)
-             + sizeof(mach_msg_size_t)) > slist_size) {
-                kfree(slist, slist_size);
-                return MACH_MSG_BODY_NULL;
-        }
-
-        body = (mach_msg_body_t *) (kmsg->ikm_header + 1);
-        gstart = (mach_msg_descriptor_t *) (body + 1);
-        gend = gstart + body->msgh_descriptor_count;
-
-        sstart = (mach_msg_descriptor_t *) (slist + 1);
-        send = sstart + slist->msgh_descriptor_count;
-
-        while (gstart < gend) {
-            mach_msg_descriptor_type_t  g_type;
-
-            /*
-             * Skip port descriptors in gather list.
-             */
-            g_type = gstart->type.type;
-
-            if (g_type != MACH_MSG_PORT_DESCRIPTOR) {
-
-             /*
-              * A scatter list with a 0 descriptor count is treated as an
-              * automatic size mismatch.
-              */
-             if (slist->msgh_descriptor_count == 0) {
-                        kfree(slist, slist_size);
-                        return MACH_MSG_BODY_NULL;
-             }
-
-             /*
-              * Skip port descriptors in  scatter list.
-              */
-             while (sstart < send) {
-                    if (sstart->type.type != MACH_MSG_PORT_DESCRIPTOR)
-                        break;
-                    sstart++;
-             }
-
-             /*
-              * No more scatter descriptors, we're done
-              */
-             if (sstart >= send) {
-                    break;
-             }
-
-             /*
-              * Check type, copy and size fields
-              */
-                if (g_type == MACH_MSG_OOL_DESCRIPTOR ||
-                    g_type == MACH_MSG_OOL_VOLATILE_DESCRIPTOR) {
-                    if (sstart->type.type != MACH_MSG_OOL_DESCRIPTOR &&
-                        sstart->type.type != MACH_MSG_OOL_VOLATILE_DESCRIPTOR) {
-                        kfree(slist, slist_size);
-                        return MACH_MSG_BODY_NULL;
-                    }
-                    if (sstart->out_of_line.copy == MACH_MSG_OVERWRITE &&
-                        gstart->out_of_line.size > sstart->out_of_line.size) {
-                        kfree(slist, slist_size);
-                        return MACH_MSG_BODY_NULL;
-                    }
-                }
-                else {
-                 if (sstart->type.type != MACH_MSG_OOL_PORTS_DESCRIPTOR) {
-                        kfree(slist, slist_size);
-                        return MACH_MSG_BODY_NULL;
-                 }
-                    if (sstart->ool_ports.copy == MACH_MSG_OVERWRITE &&
-                        gstart->ool_ports.count > sstart->ool_ports.count) {
-                        kfree(slist, slist_size);
-                        return MACH_MSG_BODY_NULL;
-                    }
-                }
-                sstart++;
-            }
-            gstart++;
-        }
-        return slist;
-}
-
-
-/*
- *      Routine:        ipc_kmsg_free_scatter
- *      Purpose:
- *              Deallocate a scatter list.  Since we actually allocated
- *              a body without a header, and since the header was originally
- *              accounted for in slist_size, we have to ajust it down
- *              before freeing the scatter list.
- */
-void
-ipc_kmsg_free_scatter(
-        mach_msg_body_t *slist,
-        mach_msg_size_t slist_size)
-{
-#if defined(__LP64__)
-        panic("%s called; halting!", __func__);
-#endif
-
-        slist_size -= (mach_msg_size_t)sizeof(mach_msg_header_t);
-        kfree(slist, slist_size);
-}
-
-
 /*
  *     Routine:        ipc_kmsg_copyout_to_kernel
  *     Purpose:
index 89fa465235f73f60d631f831caa14e0c0e20884c..c020a3d39daf8605a20a590133ef3cbcead7dcb0 100644 (file)
@@ -391,18 +391,6 @@ extern void ipc_kmsg_copyout_to_kernel_legacy(
        ipc_space_t             space);
 #endif
 
-/* get a scatter list and check consistency */
-extern mach_msg_body_t *ipc_kmsg_get_scatter(
-        mach_vm_address_t       msg_addr,
-        mach_msg_size_t         slist_size,
-        ipc_kmsg_t              kmsg);
-
-/* free a scatter list */
-extern void ipc_kmsg_free_scatter(
-        mach_msg_body_t        *slist,
-        mach_msg_size_t                slist_size);
-
-
 extern mach_msg_trailer_size_t
 ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, 
                mach_msg_option_t option, thread_t thread, 
index 1a98bab1f1929e30f64f060cc20c26132debd9bf..1b5d82c194477545b0ee3ac6adb8a35c31261569 100644 (file)
@@ -82,7 +82,7 @@
 #include <kern/misc_protos.h>
 #include <kern/task.h>
 #include <kern/thread.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 
 #include <ipc/ipc_mqueue.h>
 #include <ipc/ipc_kmsg.h>
@@ -108,12 +108,15 @@ void ipc_mqueue_receive_results(wait_result_t result);
 void
 ipc_mqueue_init(
        ipc_mqueue_t    mqueue,
-       boolean_t       is_set)
+       boolean_t       is_set,
+       uint64_t        *reserved_link)
 {
        if (is_set) {
-               wait_queue_set_init(&mqueue->imq_set_queue, SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST);
+               waitq_set_init(&mqueue->imq_set_queue,
+                              SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST|SYNC_POLICY_DISABLE_IRQ,
+                              reserved_link);
        } else {
-               wait_queue_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO);
+               waitq_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
                ipc_kmsg_queue_init(&mqueue->imq_messages);
                mqueue->imq_seqno = 0;
                mqueue->imq_msgcount = 0;
@@ -122,6 +125,53 @@ ipc_mqueue_init(
        }
 }
 
+void ipc_mqueue_deinit(
+       ipc_mqueue_t            mqueue)
+{
+       boolean_t is_set = imq_is_set(mqueue);
+
+       if (is_set)
+               waitq_set_deinit(&mqueue->imq_set_queue);
+       else
+               waitq_deinit(&mqueue->imq_wait_queue);
+}
+
+/*
+ *     Routine:        imq_reserve_and_lock
+ *     Purpose:
+ *             Atomically lock an ipc_mqueue_t object and reserve
+ *             an appropriate number of prepost linkage objects for
+ *             use in wakeup operations.
+ *     Conditions:
+ *             mq is unlocked
+ */
+void
+imq_reserve_and_lock(ipc_mqueue_t mq, uint64_t *reserved_prepost, spl_t *spl)
+{
+       *reserved_prepost = waitq_prepost_reserve(&mq->imq_wait_queue, 0,
+                                                 WAITQ_KEEP_LOCKED, spl);
+
+}
+
+
+/*
+ *     Routine:        imq_release_and_unlock
+ *     Purpose:
+ *             Unlock an ipc_mqueue_t object, re-enable interrupts,
+ *             and release any unused prepost object reservations.
+ *     Conditions:
+ *             mq is locked
+ */
+void
+imq_release_and_unlock(ipc_mqueue_t mq, uint64_t reserved_prepost, spl_t spl)
+{
+       assert(imq_held(mq));
+       waitq_unlock(&mq->imq_wait_queue);
+       splx(spl);
+       waitq_prepost_release_reserve(reserved_prepost);
+}
+
+
 /*
  *     Routine:        ipc_mqueue_member
  *     Purpose:
@@ -139,10 +189,10 @@ ipc_mqueue_member(
        ipc_mqueue_t            port_mqueue,
        ipc_mqueue_t            set_mqueue)
 {
-       wait_queue_t    port_waitq = &port_mqueue->imq_wait_queue;
-       wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;
+       struct waitq *port_waitq = &port_mqueue->imq_wait_queue;
+       struct waitq_set *set_waitq = &set_mqueue->imq_set_queue;
 
-       return (wait_queue_member(port_waitq, set_waitq));
+       return waitq_member(port_waitq, set_waitq);
 
 }
 
@@ -156,13 +206,12 @@ ipc_mqueue_member(
 kern_return_t
 ipc_mqueue_remove(
        ipc_mqueue_t      mqueue,
-       ipc_mqueue_t      set_mqueue,
-       wait_queue_link_t *wqlp)
+       ipc_mqueue_t      set_mqueue)
 {
-       wait_queue_t     mq_waitq = &mqueue->imq_wait_queue;
-       wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;
+       struct waitq *mq_waitq = &mqueue->imq_wait_queue;
+       struct waitq_set *set_waitq = &set_mqueue->imq_set_queue;
 
-       return wait_queue_unlink_nofree(mq_waitq, set_waitq, wqlp);
+       return waitq_unlink(mq_waitq, set_waitq);
 }
 
 /*
@@ -173,13 +222,11 @@ ipc_mqueue_remove(
  *             Nothing locked.
  */
 void
-ipc_mqueue_remove_from_all(
-       ipc_mqueue_t    mqueue,
-       queue_t         links)
+ipc_mqueue_remove_from_all(ipc_mqueue_t        mqueue)
 {
-       wait_queue_t    mq_waitq = &mqueue->imq_wait_queue;
+       struct waitq *mq_waitq = &mqueue->imq_wait_queue;
 
-       wait_queue_unlink_all_nofree(mq_waitq, links);
+       waitq_unlink_all(mq_waitq);
        return;
 }
 
@@ -187,17 +234,15 @@ ipc_mqueue_remove_from_all(
  *     Routine:        ipc_mqueue_remove_all
  *     Purpose:
  *             Remove all the member queues from the specified set.
+ *             Also removes the queue from any containing sets.
  *     Conditions:
  *             Nothing locked.
  */
 void
-ipc_mqueue_remove_all(
-       ipc_mqueue_t    mqueue,
-       queue_t         links)
+ipc_mqueue_remove_all(ipc_mqueue_t     mqueue)
 {
-       wait_queue_set_t        mq_setq = &mqueue->imq_set_queue;
-
-       wait_queue_set_unlink_all_nofree(mq_setq, links);
+       struct waitq_set *mq_setq = &mqueue->imq_set_queue;
+       waitq_set_unlink_all(mq_setq);
        return;
 }
 
@@ -215,28 +260,39 @@ ipc_mqueue_remove_all(
  */
 kern_return_t
 ipc_mqueue_add(
-       ipc_mqueue_t     port_mqueue,
-       ipc_mqueue_t     set_mqueue,
-       wait_queue_link_t wql)
+       ipc_mqueue_t    port_mqueue,
+       ipc_mqueue_t    set_mqueue,
+       uint64_t        *reserved_link,
+       uint64_t        *reserved_prepost)
 {
-       wait_queue_t     port_waitq = &port_mqueue->imq_wait_queue;
-       wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;
+       struct waitq     *port_waitq = &port_mqueue->imq_wait_queue;
+       struct waitq_set *set_waitq = &set_mqueue->imq_set_queue;
        ipc_kmsg_queue_t kmsgq;
        ipc_kmsg_t       kmsg, next;
        kern_return_t    kr;
        spl_t            s;
 
-       kr = wait_queue_link_noalloc(port_waitq, set_waitq, wql);
-       if (kr != KERN_SUCCESS)
+       assert(reserved_link && *reserved_link != 0);
+
+       s = splsched();
+       imq_lock(port_mqueue);
+
+       /*
+        * The link operation is now under the same lock-hold as
+        * message iteration and thread wakeup, but doesn't have to be...
+        */
+       kr = waitq_link(port_waitq, set_waitq, WAITQ_ALREADY_LOCKED, reserved_link);
+       if (kr != KERN_SUCCESS) {
+               imq_unlock(port_mqueue);
+               splx(s);
                return kr;
+       }
 
        /*
         * Now that the set has been added to the port, there may be
         * messages queued on the port and threads waiting on the set
         * waitq.  Lets get them together.
         */
-       s = splsched();
-       imq_lock(port_mqueue);
        kmsgq = &port_mqueue->imq_messages;
        for (kmsg = ipc_kmsg_queue_first(kmsgq);
             kmsg != IKM_NULL;
@@ -246,12 +302,13 @@ ipc_mqueue_add(
                for (;;) {
                        thread_t th;
                        mach_msg_size_t msize;
+                       spl_t th_spl;
 
-                       th = wait_queue_wakeup64_identity_locked(
+                       th = waitq_wakeup64_identity_locked(
                                                port_waitq,
                                                IPC_MQUEUE_RECEIVE,
-                                               THREAD_AWAKENED,
-                                               FALSE);
+                                               THREAD_AWAKENED, &th_spl,
+                                               reserved_prepost, WAITQ_KEEP_LOCKED);
                        /* waitq/mqueue still locked, thread locked */
 
                        if (th == THREAD_NULL)
@@ -265,6 +322,7 @@ ipc_mqueue_add(
                         */
                        if (th->ith_state != MACH_RCV_IN_PROGRESS) {
                                  thread_unlock(th);
+                                 splx(th_spl);
                                  continue;
                        }
 
@@ -289,6 +347,7 @@ ipc_mqueue_add(
                                        th->ith_kmsg = IKM_NULL;
                                        th->ith_seqno = 0;
                                        thread_unlock(th);
+                                       splx(th_spl);
                                        continue; /* find another thread */
                                }
                        } else {
@@ -300,14 +359,14 @@ ipc_mqueue_add(
                         * so give it to him.
                         */
                        ipc_kmsg_rmqueue(kmsgq, kmsg);
-                       ipc_mqueue_release_msgcount(port_mqueue);
+                       ipc_mqueue_release_msgcount(port_mqueue, IMQ_NULL);
 
                        th->ith_kmsg = kmsg;
                        th->ith_seqno = port_mqueue->imq_seqno++;
                        thread_unlock(th);
+                       splx(th_spl);
                        break;  /* go to next message */
                }
-                       
        }
  leave:
        imq_unlock(port_mqueue);
@@ -327,11 +386,12 @@ void
 ipc_mqueue_changed(
        ipc_mqueue_t            mqueue)
 {
-       wait_queue_wakeup64_all_locked(
-                               &mqueue->imq_wait_queue,
-                               IPC_MQUEUE_RECEIVE,
-                               THREAD_RESTART,
-                               FALSE);         /* unlock waitq? */
+       waitq_wakeup64_all_locked(&mqueue->imq_wait_queue,
+                                 IPC_MQUEUE_RECEIVE,
+                                 THREAD_RESTART,
+                                 NULL,
+                                 WAITQ_ALL_PRIORITIES,
+                                 WAITQ_KEEP_LOCKED);
 }
 
 
@@ -402,12 +462,12 @@ ipc_mqueue_send(
                        clock_interval_to_deadline(send_timeout, 1000*NSEC_PER_USEC, &deadline);
                else
                        deadline = 0;
-               wresult = wait_queue_assert_wait64_locked(
+               wresult = waitq_assert_wait64_locked(
                                                &mqueue->imq_wait_queue,
                                                IPC_MQUEUE_FULL,
                                                THREAD_ABORTSAFE,
                                                TIMEOUT_URGENCY_USER_NORMAL,
-                                               deadline, 0,
+                                               deadline, TIMEOUT_NO_LEEWAY,
                                                cur_thread);
                thread_unlock(cur_thread);
                imq_unlock(mqueue);
@@ -419,15 +479,19 @@ ipc_mqueue_send(
                }
                
                switch (wresult) {
+
+               case THREAD_AWAKENED:
+                       /* 
+                        * we can proceed - inherited msgcount from waker
+                        * or the message queue has been destroyed and the msgcount
+                        * has been reset to zero (will detect in ipc_mqueue_post()).
+                        */
+                       break;
+                       
                case THREAD_TIMED_OUT:
                        assert(option & MACH_SEND_TIMEOUT);
                        return MACH_SEND_TIMED_OUT;
                        
-               case THREAD_AWAKENED:
-                       /* we can proceed - inherited msgcount from waker */
-                       assert(mqueue->imq_msgcount > 0);
-                       break;
-                       
                case THREAD_INTERRUPTED:
                        return MACH_SEND_INTERRUPTED;
                        
@@ -453,28 +517,43 @@ ipc_mqueue_send(
  *     Conditions:
  *             The message queue is locked.
  *             The message corresponding to this reference is off the queue.
+ *             There is no need to pass reserved preposts because this will
+ *             never prepost to anyone
  */
 void
-ipc_mqueue_release_msgcount(
-       ipc_mqueue_t mqueue)    
+ipc_mqueue_release_msgcount(ipc_mqueue_t port_mq, ipc_mqueue_t set_mq)
 {
-       assert(imq_held(mqueue));
-       assert(mqueue->imq_msgcount > 1 || ipc_kmsg_queue_empty(&mqueue->imq_messages));
+       (void)set_mq;
+       assert(imq_held(port_mq));
+       assert(port_mq->imq_msgcount > 1 || ipc_kmsg_queue_empty(&port_mq->imq_messages));
 
-       mqueue->imq_msgcount--;
+       port_mq->imq_msgcount--;
 
-       if (!imq_full(mqueue) && mqueue->imq_fullwaiters) {
-               if (wait_queue_wakeup64_one_locked(
-                                               &mqueue->imq_wait_queue,
-                                               IPC_MQUEUE_FULL,
-                                               THREAD_AWAKENED,
-                                               FALSE) != KERN_SUCCESS) {
-                       mqueue->imq_fullwaiters = FALSE;
+       if (!imq_full(port_mq) && port_mq->imq_fullwaiters) {
+               /*
+                * boost the priority of the awoken thread
+                * (WAITQ_PROMOTE_PRIORITY) to ensure it uses
+                * the message queue slot we've just reserved.
+                *
+                * NOTE: this will never prepost
+                */
+               if (waitq_wakeup64_one_locked(&port_mq->imq_wait_queue,
+                                             IPC_MQUEUE_FULL,
+                                             THREAD_AWAKENED,
+                                             NULL,
+                                             WAITQ_PROMOTE_PRIORITY,
+                                             WAITQ_KEEP_LOCKED) != KERN_SUCCESS) {
+                       port_mq->imq_fullwaiters = FALSE;
                } else {
                        /* gave away our slot - add reference back */
-                       mqueue->imq_msgcount++; 
+                       port_mq->imq_msgcount++;
                }
        }
+
+       if (ipc_kmsg_queue_empty(&port_mq->imq_messages)) {
+               /* no more msgs: invalidate the port's prepost object */
+               waitq_clear_prepost_locked(&port_mq->imq_wait_queue, NULL);
+       }
 }
 
 /*
@@ -485,6 +564,7 @@ ipc_mqueue_release_msgcount(
  *             the message queue.
  *
  *     Conditions:
+ *             mqueue is unlocked
  *             If we need to queue, our space in the message queue is reserved.
  */
 void
@@ -493,6 +573,7 @@ ipc_mqueue_post(
        register ipc_kmsg_t             kmsg)
 {
        spl_t s;
+       uint64_t reserved_prepost = 0;
 
        /*
         *      While the msg queue     is locked, we have control of the
@@ -500,27 +581,41 @@ ipc_mqueue_post(
         *
         *      Check for a receiver for the message.
         */
-       s = splsched();
-       imq_lock(mqueue);
+       imq_reserve_and_lock(mqueue, &reserved_prepost, &s);
        for (;;) {
-               wait_queue_t waitq = &mqueue->imq_wait_queue;
+               struct waitq *waitq = &mqueue->imq_wait_queue;
+               spl_t th_spl;
                thread_t receiver;
                mach_msg_size_t msize;
 
-               receiver = wait_queue_wakeup64_identity_locked(
-                                                       waitq,
-                                                       IPC_MQUEUE_RECEIVE,
-                                                       THREAD_AWAKENED,
-                                                       FALSE);
+               receiver = waitq_wakeup64_identity_locked(waitq,
+                                                         IPC_MQUEUE_RECEIVE,
+                                                         THREAD_AWAKENED,
+                                                         &th_spl,
+                                                         &reserved_prepost,
+                                                         WAITQ_KEEP_LOCKED);
                /* waitq still locked, thread locked */
 
                if (receiver == THREAD_NULL) {
+                       
                        /* 
-                        * no receivers; queue kmsg
+                        * no receivers; queue kmsg if space still reserved.
                         */
-                       assert(mqueue->imq_msgcount > 0);
-                       ipc_kmsg_enqueue_macro(&mqueue->imq_messages, kmsg);
-                       break;
+                       if (mqueue->imq_msgcount > 0) {
+                               ipc_kmsg_enqueue_macro(&mqueue->imq_messages, kmsg);
+                               break;
+                       }
+
+                       /*
+                        * Otherwise, the message queue must belong to an inactive
+                        * port, so just destroy the message and pretend it was posted.
+                        */
+                       /* clear the waitq boost we may have been given */
+                       waitq_clear_promotion_locked(waitq, current_thread());
+                       imq_release_and_unlock(mqueue, reserved_prepost, s);
+                       ipc_kmsg_destroy(kmsg);
+                       current_task()->messages_sent++;
+                       return;
                }
        
                /*
@@ -531,6 +626,7 @@ ipc_mqueue_post(
                 */
                if (receiver->ith_state != MACH_RCV_IN_PROGRESS) {
                                  thread_unlock(receiver);
+                                 splx(th_spl);
                                  continue;
                }
 
@@ -560,9 +656,10 @@ ipc_mqueue_post(
                        receiver->ith_kmsg = kmsg;
                        receiver->ith_seqno = mqueue->imq_seqno++;
                        thread_unlock(receiver);
+                       splx(th_spl);
 
                        /* we didn't need our reserved spot in the queue */
-                       ipc_mqueue_release_msgcount(mqueue);
+                       ipc_mqueue_release_msgcount(mqueue, IMQ_NULL);
                        break;
                }
 
@@ -575,10 +672,12 @@ ipc_mqueue_post(
                receiver->ith_kmsg = IKM_NULL;
                receiver->ith_seqno = 0;
                thread_unlock(receiver);
+               splx(th_spl);
        }
 
-       imq_unlock(mqueue);
-       splx(s);
+       /* clear the waitq boost we may have been given */
+       waitq_clear_promotion_locked(&mqueue->imq_wait_queue, current_thread());
+       imq_release_and_unlock(mqueue, reserved_prepost, s);
        
        current_task()->messages_sent++;
        return;
@@ -707,6 +806,32 @@ ipc_mqueue_receive(
        ipc_mqueue_receive_results(wresult);
 }
 
+static int mqueue_process_prepost_receive(void *ctx, struct waitq *waitq,
+                                         struct waitq_set *wqset)
+{
+       ipc_mqueue_t     port_mq, *pmq_ptr;
+
+       (void)wqset;
+       port_mq = (ipc_mqueue_t)waitq;
+
+       /*
+        * If there are no messages on this queue, skip it and remove
+        * it from the prepost list
+        */
+       if (ipc_kmsg_queue_empty(&port_mq->imq_messages))
+               return WQ_ITERATE_INVALIDATE_CONTINUE;
+
+       /*
+        * There are messages waiting on this port.
+        * Instruct the prepost iteration logic to break, but keep the
+        * waitq locked.
+        */
+       pmq_ptr = (ipc_mqueue_t *)ctx;
+       if (pmq_ptr)
+               *pmq_ptr = port_mq;
+       return WQ_ITERATE_BREAK_KEEP_LOCKED;
+}
+
 wait_result_t
 ipc_mqueue_receive_on_thread(
         ipc_mqueue_t            mqueue,
@@ -716,92 +841,65 @@ ipc_mqueue_receive_on_thread(
        int                     interruptible,
        thread_t                thread)
 {
-       ipc_kmsg_queue_t        kmsgs;
        wait_result_t           wresult;
        uint64_t                deadline;
        spl_t                   s;
 
        s = splsched();
        imq_lock(mqueue);
+       /* no need to reserve anything: we never prepost to anyone */
        
        if (imq_is_set(mqueue)) {
-               queue_t q;
-
-               q = &mqueue->imq_preposts;
+               ipc_mqueue_t port_mq = IMQ_NULL;
+               spl_t set_spl;
 
-               /*
-                * If we are waiting on a portset mqueue, we need to see if
-                * any of the member ports have work for us.  Ports that
-                * have (or recently had) messages will be linked in the
-                * prepost queue for the portset. By holding the portset's
-                * mqueue lock during the search, we tie up any attempts by
-                * mqueue_deliver or portset membership changes that may
-                * cross our path.
-                */
-       search_set:
-               while(!queue_empty(q)) {
-                       wait_queue_link_t wql;
-                       ipc_mqueue_t port_mq;
-
-                       queue_remove_first(q, wql, wait_queue_link_t, wql_preposts);
-                       assert(!wql_is_preposted(wql));
-
-                       /*
-                        * This is a lock order violation, so we have to do it
-                        * "softly," putting the link back on the prepost list
-                        * if it fails (at the tail is fine since the order of
-                        * handling messages from different sources in a set is
-                        * not guaranteed and we'd like to skip to the next source
-                        * if one is available).
-                        */
-                       port_mq = (ipc_mqueue_t)wql->wql_queue;
-                       if (!imq_lock_try(port_mq)) {
-                               queue_enter(q, wql, wait_queue_link_t, wql_preposts);
-                               imq_unlock(mqueue);
-                               splx(s);
-                               mutex_pause(0);
-                               s = splsched();
-                               imq_lock(mqueue);
-                               goto search_set; /* start again at beginning - SMP */
-                       }
+               (void)waitq_set_iterate_preposts(&mqueue->imq_set_queue,
+                                                &port_mq,
+                                                mqueue_process_prepost_receive,
+                                                &set_spl);
 
+               if (port_mq != IMQ_NULL) {
                        /*
-                        * If there are no messages on this queue, just skip it
-                        * (we already removed the link from the set's prepost queue).
+                        * We get here if there is at least one message
+                        * waiting on port_mq. We have instructed the prepost
+                        * iteration logic to leave both the port_mq and the
+                        * set mqueue locked.
+                        *
+                        * TODO: previously, we would place this port at the
+                        *       back of the prepost list...
                         */
-                       kmsgs = &port_mq->imq_messages;
-                       if (ipc_kmsg_queue_first(kmsgs) == IKM_NULL) {
-                               imq_unlock(port_mq);
-                               continue;
-                       }
+                       imq_unlock(mqueue);
 
-                       /*
-                        * There are messages, so reinsert the link back
-                        * at the tail of the preposted queue (for fairness)
-                        * while we still have the portset mqueue locked.
+                       /* TODO: if/when port mqueues become non irq safe,
+                        *       we won't need this spl, and we should be
+                        *       able to call splx(s) (if that's even
+                        *       necessary).
+                        * For now, we've still disabled interrupts via
+                        * imq_reserve_and_lock();
                         */
-                       queue_enter(q, wql, wait_queue_link_t, wql_preposts);
-                       imq_unlock(mqueue);
+                       splx(set_spl);
 
                        /*
                         * Continue on to handling the message with just
                         * the port mqueue locked.
                         */
-                       ipc_mqueue_select_on_thread(port_mq, option, max_size, thread);
+                       ipc_mqueue_select_on_thread(port_mq, mqueue, option,
+                                                   max_size, thread);
+
                        imq_unlock(port_mq);
                        splx(s);
                        return THREAD_NOT_WAITING;
-                       
                }
-
        } else {
+               ipc_kmsg_queue_t kmsgs;
 
                /*
                 * Receive on a single port. Just try to get the messages.
                 */
                kmsgs = &mqueue->imq_messages;
                if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) {
-                       ipc_mqueue_select_on_thread(mqueue, option, max_size, thread);
+                       ipc_mqueue_select_on_thread(mqueue, IMQ_NULL, option,
+                                                   max_size, thread);
                        imq_unlock(mqueue);
                        splx(s);
                        return THREAD_NOT_WAITING;
@@ -822,6 +920,7 @@ ipc_mqueue_receive_on_thread(
                }
        }
 
+       /* NOTE: need splsched() here if mqueue no longer needs irq disabled */
        thread_lock(thread);
        thread->ith_state = MACH_RCV_IN_PROGRESS;
        thread->ith_option = option;
@@ -832,12 +931,13 @@ ipc_mqueue_receive_on_thread(
        else
                deadline = 0;
 
-       wresult = wait_queue_assert_wait64_locked(&mqueue->imq_wait_queue,
-                                                 IPC_MQUEUE_RECEIVE,
-                                                 interruptible, 
-                                                 TIMEOUT_URGENCY_USER_NORMAL,
-                                                 deadline, 0,
-                                                 thread);
+       wresult = waitq_assert_wait64_locked(&mqueue->imq_wait_queue,
+                                            IPC_MQUEUE_RECEIVE,
+                                            interruptible,
+                                            TIMEOUT_URGENCY_USER_NORMAL,
+                                            deadline,
+                                            TIMEOUT_NO_LEEWAY,
+                                            thread);
        /* preposts should be detected above, not here */
        if (wresult == THREAD_AWAKENED)
                panic("ipc_mqueue_receive_on_thread: sleep walking");
@@ -859,13 +959,16 @@ ipc_mqueue_receive_on_thread(
  *             mqueue locked.
  *              thread not locked.
  *             There is a message.
+ *             No need to reserve prepost objects - it will never prepost
+ *
  *     Returns:
  *             MACH_MSG_SUCCESS        Actually selected a message for ourselves.
  *             MACH_RCV_TOO_LARGE  May or may not have pull it, but it is large
  */
 void
 ipc_mqueue_select_on_thread(
-       ipc_mqueue_t            mqueue,
+       ipc_mqueue_t            port_mq,
+       ipc_mqueue_t            set_mq,
        mach_msg_option_t       option,
        mach_msg_size_t         max_size,
        thread_t                thread)
@@ -878,7 +981,7 @@ ipc_mqueue_select_on_thread(
         * Do some sanity checking of our ability to receive
         * before pulling the message off the queue.
         */
-       kmsg = ipc_kmsg_queue_first(&mqueue->imq_messages);
+       kmsg = ipc_kmsg_queue_first(&port_mq->imq_messages);
        assert(kmsg != IKM_NULL);
 
        /*
@@ -891,7 +994,7 @@ ipc_mqueue_select_on_thread(
        if (rcv_size + REQUESTED_TRAILER_SIZE(thread_is_64bit(thread), option) > max_size) {
                mr = MACH_RCV_TOO_LARGE;
                if (option & MACH_RCV_LARGE) {
-                       thread->ith_receiver_name = mqueue->imq_receiver_name;
+                       thread->ith_receiver_name = port_mq->imq_receiver_name;
                        thread->ith_kmsg = IKM_NULL;
                        thread->ith_msize = rcv_size;
                        thread->ith_seqno = 0;
@@ -900,9 +1003,9 @@ ipc_mqueue_select_on_thread(
                }
        }
 
-       ipc_kmsg_rmqueue_first_macro(&mqueue->imq_messages, kmsg);
-       ipc_mqueue_release_msgcount(mqueue);
-       thread->ith_seqno = mqueue->imq_seqno++;
+       ipc_kmsg_rmqueue_first_macro(&port_mq->imq_messages, kmsg);
+       ipc_mqueue_release_msgcount(port_mq, set_mq);
+       thread->ith_seqno = port_mq->imq_seqno++;
        thread->ith_kmsg = kmsg;
        thread->ith_state = mr;
 
@@ -923,14 +1026,14 @@ ipc_mqueue_select_on_thread(
  *             Caller holds reference on the message queue.
  */
 unsigned
-ipc_mqueue_peek(ipc_mqueue_t           mq,
-               mach_port_seqno_t       *seqnop,
-               mach_msg_size_t         *msg_sizep,
-               mach_msg_id_t           *msg_idp,
-               mach_msg_max_trailer_t  *msg_trailerp)
+ipc_mqueue_peek(ipc_mqueue_t mq,
+                mach_port_seqno_t * seqnop,
+                mach_msg_size_t * msg_sizep,
+                mach_msg_id_t * msg_idp,
+                mach_msg_max_trailer_t * msg_trailerp)
 {
        ipc_kmsg_queue_t kmsgq;
-       ipc_kmsg_t kmsg; 
+       ipc_kmsg_t kmsg;
        mach_port_seqno_t seqno, msgoff;
        int res = 0;
        spl_t s;
@@ -940,7 +1043,9 @@ ipc_mqueue_peek(ipc_mqueue_t               mq,
        s = splsched();
        imq_lock(mq);
 
-       seqno = (seqnop != NULL) ? seqno = *seqnop : 0;
+       seqno = 0;
+       if (seqnop != NULL)
+               seqno = *seqnop;
 
        if (seqno == 0) {
                seqno = mq->imq_seqno;
@@ -980,6 +1085,29 @@ ipc_mqueue_peek(ipc_mqueue_t              mq,
        return res;
 }
 
+
+/*
+ * peek at the contained port message queues, break prepost iteration as soon
+ * as we spot a message on one of the message queues referenced by the set's
+ * prepost list.  No need to lock each message queue, as only the head of each
+ * queue is checked. If a message wasn't there before we entered here, no need
+ * to find it (if we do, great).
+ */
+static int mqueue_peek_iterator(void *ctx, struct waitq *waitq,
+                               struct waitq_set *wqset)
+{
+       ipc_mqueue_t port_mq = (ipc_mqueue_t)waitq;
+       ipc_kmsg_queue_t kmsgs = &port_mq->imq_messages;
+
+       (void)ctx;
+       (void)wqset;
+               
+       if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL)
+               return WQ_ITERATE_BREAK; /* break out of the prepost iteration */
+
+       return WQ_ITERATE_CONTINUE;
+}
+
 /*
  *     Routine:        ipc_mqueue_set_peek
  *     Purpose:
@@ -993,83 +1121,91 @@ ipc_mqueue_peek(ipc_mqueue_t             mq,
 unsigned
 ipc_mqueue_set_peek(ipc_mqueue_t mq)
 {
-       wait_queue_link_t       wql;
-       queue_t                 q;
        spl_t s;
-       int res;
+       int ret;
 
        assert(imq_is_set(mq));
 
        s = splsched();
        imq_lock(mq);
 
-       /* 
-        * peek at the contained port message queues, return as soon as
-        * we spot a message on one of the message queues linked on the
-        * prepost list.  No need to lock each message queue, as only the
-        * head of each queue is checked. If a message wasn't there before
-        * we entered here, no need to find it (if we do, great).
-        */
-       res = 0;
-       q = &mq->imq_preposts;
-       queue_iterate(q, wql, wait_queue_link_t, wql_preposts) {
-               ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue;
-               ipc_kmsg_queue_t kmsgs = &port_mq->imq_messages;
-                       
-               if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) {
-                       res = 1;
-                       break;
-               }
-       }
+       ret = waitq_set_iterate_preposts(&mq->imq_set_queue, NULL,
+                                        mqueue_peek_iterator, NULL);
+
        imq_unlock(mq);
        splx(s);
-       return res;
+       return (ret == WQ_ITERATE_BREAK);
 }
 
 /*
  *     Routine:        ipc_mqueue_set_gather_member_names
  *     Purpose:
- *             Iterate a message queue set to identify the member port
- *             names. Actual returned names is limited to maxnames entries,
- *             but we keep counting the actual number of members to let
- *             the caller decide to retry if necessary.
+ *             Discover all ports which are members of a given port set.
+ *             Because the waitq linkage mechanism was redesigned to save
+ *             significan amounts of memory, it no longer keeps back-pointers
+ *             from a port set to a port. Therefore, we must iterate over all
+ *             ports within a given IPC space and individually query them to
+ *             see if they are members of the given set. Port names of ports
+ *             found to be members of the given set will be gathered into the
+ *             provided 'names' array.  Actual returned names are limited to
+ *             maxnames entries, but we keep counting the actual number of
+ *             members to let the caller decide to retry if necessary.
  *
  *     Conditions:
  *             Locks may be held by callers, so this routine cannot block.
- *             Caller holds reference on the message queue.
+ *             Caller holds reference on the message queue (via port set).
  */
 void
 ipc_mqueue_set_gather_member_names(
-       ipc_mqueue_t mq, 
-       ipc_entry_num_t maxnames, 
+       ipc_space_t space,
+       ipc_mqueue_t set_mq,
+       ipc_entry_num_t maxnames,
        mach_port_name_t *names,
        ipc_entry_num_t *actualp)
 {
-       wait_queue_link_t       wql;
-       queue_t                 q;
-       spl_t s;
+       ipc_entry_t table;
+       ipc_entry_num_t tsize;
+       struct waitq_set *wqset;
        ipc_entry_num_t actual = 0;
 
-       assert(imq_is_set(mq));
+       assert(set_mq != IMQ_NULL);
+       wqset = &set_mq->imq_set_queue;
 
-       s = splsched();
-       imq_lock(mq);
+       assert(space != IS_NULL);
+       is_read_lock(space);
+       if (!is_active(space)) {
+               is_read_unlock(space);
+               goto out;
+       }
 
-       /* 
-        * Iterate over the member ports through the mqueue set links
-        * capturing as many names as we can.
-        */
-       q = &mq->imq_setlinks;
-       queue_iterate(q, wql, wait_queue_link_t, wql_setlinks) {
-               ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue;
+       if (!waitq_set_is_valid(wqset)) {
+               is_read_unlock(space);
+               goto out;
+       }
 
-               if (actual < maxnames)
-                       names[actual] = port_mq->imq_receiver_name;
-               actual++;
+       table = space->is_table;
+       tsize = space->is_table_size;
+       for (ipc_entry_num_t idx = 0; idx < tsize; idx++) {
+               ipc_entry_t entry = &table[idx];
+
+               /* only receive rights can be members of port sets */
+               if ((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) != MACH_PORT_TYPE_NONE) {
+                       __IGNORE_WCASTALIGN(ipc_port_t port = (ipc_port_t)entry->ie_object);
+                       ipc_mqueue_t mq = &port->ip_messages;
+
+                       assert(IP_VALID(port));
+                       if (ip_active(port) &&
+                           waitq_member(&mq->imq_wait_queue, wqset)) {
+                               if (actual < maxnames)
+                                       names[actual] = mq->imq_receiver_name;
+                               actual++;
+                       }
+               }
        }
-       imq_unlock(mq);
-       splx(s);
 
+       is_read_unlock(space);
+
+out:
        *actualp = actual;
 }
 
@@ -1093,17 +1229,23 @@ ipc_mqueue_destroy(
        boolean_t reap = FALSE;
        spl_t s;
 
+       assert(!imq_is_set(mqueue));
+
        s = splsched();
        imq_lock(mqueue);
+
        /*
         *      rouse all blocked senders
+        *      (don't boost anyone - we're tearing this queue down)
+        *      (never preposts)
         */
        mqueue->imq_fullwaiters = FALSE;
-       wait_queue_wakeup64_all_locked(
-                               &mqueue->imq_wait_queue,
-                               IPC_MQUEUE_FULL,
-                               THREAD_RESTART,
-                               FALSE);
+       waitq_wakeup64_all_locked(&mqueue->imq_wait_queue,
+                                 IPC_MQUEUE_FULL,
+                                 THREAD_RESTART,
+                                 NULL,
+                                 WAITQ_ALL_PRIORITIES,
+                                 WAITQ_KEEP_LOCKED);
 
        /*
         * Move messages from the specified queue to the per-thread
@@ -1117,9 +1259,28 @@ ipc_mqueue_destroy(
                        reap = first;
        }
 
+       /*
+        * Wipe out message count, both for messages about to be
+        * reaped and for reserved space for (previously) woken senders.
+        * This is the indication to them that their reserved space is gone
+        * (the mqueue was destroyed).
+        */
+       mqueue->imq_msgcount = 0;
+
+       /* clear out any preposting we may have done */
+       waitq_clear_prepost_locked(&mqueue->imq_wait_queue, &s);
+
        imq_unlock(mqueue);
        splx(s);
 
+       /*
+        * assert that we're destroying a queue that's not a
+        * member of any other queue
+        */
+       assert(mqueue->imq_wait_queue.waitq_prepost_id == 0);
+       assert(mqueue->imq_wait_queue.waitq_set_id == 0);
+
+
        /*
         * Destroy the messages we enqueued if we aren't nested
         * inside some other attempt to drain the same queue.
@@ -1156,17 +1317,25 @@ ipc_mqueue_set_qlimit(
                 wakeup = qlimit - mqueue->imq_qlimit;
 
                 for (i = 0; i < wakeup; i++) {
-                        if (wait_queue_wakeup64_one_locked(
-                                                       &mqueue->imq_wait_queue,
-                                                       IPC_MQUEUE_FULL,
-                                                       THREAD_AWAKENED,
-                                                       FALSE) == KERN_NOT_WAITING) {
-                                        mqueue->imq_fullwaiters = FALSE;
-                                        break;
-                        }
-                        mqueue->imq_msgcount++;  /* give it to the awakened thread */
+                       /*
+                        * boost the priority of the awoken thread
+                        * (WAITQ_PROMOTE_PRIORITY) to ensure it uses
+                        * the message queue slot we've just reserved.
+                        *
+                        * NOTE: this will never prepost
+                        */
+                       if (waitq_wakeup64_one_locked(&mqueue->imq_wait_queue,
+                                                     IPC_MQUEUE_FULL,
+                                                     THREAD_AWAKENED,
+                                                     NULL,
+                                                     WAITQ_PROMOTE_PRIORITY,
+                                                     WAITQ_KEEP_LOCKED) == KERN_NOT_WAITING) {
+                               mqueue->imq_fullwaiters = FALSE;
+                               break;
+                       }
+                       mqueue->imq_msgcount++;  /* give it to the awakened thread */
                 }
-        }
+       }
        mqueue->imq_qlimit = qlimit;
        imq_unlock(mqueue);
        splx(s);
@@ -1239,7 +1408,7 @@ ipc_mqueue_copyin(
        if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) {
                ipc_port_t port;
 
-               port = (ipc_port_t) object;
+               __IGNORE_WCASTALIGN(port = (ipc_port_t) object);
                assert(port != IP_NULL);
 
                ip_lock(port);
@@ -1252,7 +1421,7 @@ ipc_mqueue_copyin(
        } else if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) {
                ipc_pset_t pset;
 
-               pset = (ipc_pset_t) object;
+               __IGNORE_WCASTALIGN(pset = (ipc_pset_t) object);
                assert(pset != IPS_NULL);
 
                ips_lock(pset);
@@ -1278,4 +1447,3 @@ ipc_mqueue_copyin(
        *mqueuep = mqueue;
        return MACH_MSG_SUCCESS;
 }
-
index 26aa7fe0d25b2f2a705cea01c657a1f893a28805..401a3cae3d316821ed86d81c2be18c51be4f3896 100644 (file)
@@ -74,7 +74,7 @@
 #include <kern/macro_help.h>
 #include <kern/kern_types.h>
 #include <kern/spl.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 
 #include <ipc/ipc_kmsg.h>
 #include <ipc/ipc_object.h>
 typedef struct ipc_mqueue {
        union {
                struct {
-                       struct  wait_queue      wait_queue;     
+                       struct  waitq           waitq;
                        struct ipc_kmsg_queue   messages;
-                       mach_port_msgcount_t    msgcount;
-                       mach_port_msgcount_t    qlimit;
                        mach_port_seqno_t       seqno;
                        mach_port_name_t        receiver_name;
-                       boolean_t               fullwaiters;
-                       natural_t               pset_count;
-               } port;
+                       uint16_t                msgcount;
+                       uint16_t                qlimit;
+               } __attribute__((__packed__)) port;
                struct {
-                       struct wait_queue_set   set_queue;
+                       struct waitq_set        setq;
                        mach_port_name_t        local_name;
-               } pset;
+               } __attribute__((__packed__)) pset;
        } data;
 } *ipc_mqueue_t;
 
 #define        IMQ_NULL                ((ipc_mqueue_t) 0)
 
-#define imq_wait_queue         data.port.wait_queue
+#define imq_wait_queue         data.port.waitq
 #define imq_messages           data.port.messages
 #define imq_msgcount           data.port.msgcount
 #define imq_qlimit             data.port.qlimit
 #define imq_seqno              data.port.seqno
 #define imq_receiver_name      data.port.receiver_name
-#define imq_fullwaiters                data.port.fullwaiters
-#define imq_pset_count         data.port.pset_count
 
-#define imq_set_queue          data.pset.set_queue
-#define imq_setlinks           data.pset.set_queue.wqs_setlinks
-#define imq_preposts           data.pset.set_queue.wqs_preposts
+/*
+ * we can use the 'eventmask' bits of the waitq b/c
+ * they are only used by global queues
+ */
+#define imq_fullwaiters                data.port.waitq.waitq_eventmask
+#define imq_in_pset            data.port.waitq.waitq_set_id
+
+#define imq_set_queue          data.pset.setq
 #define imq_local_name         data.pset.local_name
-#define imq_is_set(mq)         wait_queue_is_set(&(mq)->imq_set_queue)
+#define imq_is_set(mq)         waitqs_is_set(&(mq)->imq_set_queue)
+
+#define        imq_lock(mq)            waitq_lock(&(mq)->imq_wait_queue)
+#define        imq_lock_try(mq)        waitq_lock_try(&(mq)->imq_wait_queue)
+#define        imq_unlock(mq)          waitq_unlock(&(mq)->imq_wait_queue)
+#define imq_held(mq)           waitq_held(&(mq)->imq_wait_queue)
 
-#define        imq_lock(mq)            wait_queue_lock(&(mq)->imq_wait_queue)
-#define        imq_lock_try(mq)        wait_queue_lock_try(&(mq)->imq_wait_queue)
-#define        imq_unlock(mq)          wait_queue_unlock(&(mq)->imq_wait_queue)
-#define imq_held(mq)           wait_queue_held(&(mq)->imq_wait_queue)
+extern void imq_reserve_and_lock(ipc_mqueue_t mq,
+                                uint64_t *reserved_prepost, spl_t *spl);
+
+extern void imq_release_and_unlock(ipc_mqueue_t mq,
+                                  uint64_t reserved_prepost, spl_t spl);
 
 #define imq_full(mq)           ((mq)->imq_msgcount >= (mq)->imq_qlimit)
 #define imq_full_kernel(mq)    ((mq)->imq_msgcount >= MACH_PORT_QLIMIT_KERNEL)
@@ -139,7 +146,12 @@ extern int ipc_mqueue_full;
 /* Initialize a newly-allocated message queue */
 extern void ipc_mqueue_init(
        ipc_mqueue_t            mqueue,
-       boolean_t               is_set);
+       boolean_t               is_set,
+       uint64_t                *reserved_link);
+
+/* de-initialize / cleanup an mqueue (specifically waitq resources) */
+extern void ipc_mqueue_deinit(
+       ipc_mqueue_t            mqueue);
 
 /* destroy an mqueue */
 extern void ipc_mqueue_destroy(
@@ -153,7 +165,8 @@ extern void ipc_mqueue_changed(
 extern kern_return_t ipc_mqueue_add(
        ipc_mqueue_t            mqueue,
        ipc_mqueue_t            set_mqueue,
-       wait_queue_link_t       wql);
+       uint64_t                *reserved_link,
+       uint64_t                *reserved_prepost);
 
 /* Check to see if mqueue is member of set_mqueue */
 extern boolean_t ipc_mqueue_member(
@@ -163,18 +176,15 @@ extern boolean_t ipc_mqueue_member(
 /* Remove an mqueue from a specific set */
 extern kern_return_t ipc_mqueue_remove(
        ipc_mqueue_t            mqueue,
-       ipc_mqueue_t            set_mqueue,
-       wait_queue_link_t       *wqlp);
+       ipc_mqueue_t            set_mqueue);
 
 /* Remove an mqueue from all sets */
 extern void ipc_mqueue_remove_from_all(
-       ipc_mqueue_t            mqueue,
-       queue_t                 links);
+       ipc_mqueue_t            mqueue);
 
 /* Remove all the members of the specifiied set */
 extern void ipc_mqueue_remove_all(
-       ipc_mqueue_t            mqueue,
-       queue_t                 links);
+       ipc_mqueue_t            mqueue);
 
 /* Send a message to a port */
 extern mach_msg_return_t ipc_mqueue_send(
@@ -220,7 +230,8 @@ extern void ipc_mqueue_receive_continue(
 
 /* Select a message from a queue and try to post it to ourself */
 extern void ipc_mqueue_select_on_thread(
-       ipc_mqueue_t            mqueue,
+       ipc_mqueue_t            port_mq,
+       ipc_mqueue_t            set_mq,
        mach_msg_option_t       option,
        mach_msg_size_t         max_size,
        thread_t                thread);
@@ -239,14 +250,16 @@ extern unsigned ipc_mqueue_set_peek(
 
 /* Gather the names of member port for a given set */
 extern void ipc_mqueue_set_gather_member_names(
-       ipc_mqueue_t            mqueue,
+       ipc_space_t             space,
+       ipc_mqueue_t            set_mq,
        ipc_entry_num_t         maxnames,
        mach_port_name_t        *names,
        ipc_entry_num_t         *actualp);
 
 /* Clear a message count reservation */
 extern void ipc_mqueue_release_msgcount(
-       ipc_mqueue_t            mqueue);
+       ipc_mqueue_t            port_mq,
+       ipc_mqueue_t            set_mq);
 
 /* Change a queue limit */
 extern void ipc_mqueue_set_qlimit(
index 49b7e46900197739ac9622f3be86d32a87cd6566..166c9d6da1352149e00dd7ee4d6b1b5bdf42209a 100644 (file)
@@ -501,15 +501,7 @@ ipc_object_copyin(
        ipc_port_t soright;
        ipc_port_t release_port;
        kern_return_t kr;
-       queue_head_t links_data;
-       queue_t links = &links_data;
-       wait_queue_link_t wql;
-
-#if IMPORTANCE_INHERITANCE
        int assertcnt = 0;
-#endif
-
-       queue_init(links);
 
        /*
         *      Could first try a read lock when doing
@@ -527,19 +519,11 @@ ipc_object_copyin(
                              msgt_name, TRUE,
                              objectp, &soright,
                              &release_port,
-#if IMPORTANCE_INHERITANCE
-                             &assertcnt,
-#endif /* IMPORTANCE_INHERITANCE */
-                             links);
+                             &assertcnt);
        if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
                ipc_entry_dealloc(space, name, entry);
        is_write_unlock(space);
 
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
-
 #if IMPORTANCE_INHERITANCE
        if (0 < assertcnt && ipc_importance_task_is_any_receiver_type(current_task()->task_imp_base)) {
                ipc_importance_task_drop_internal_assertion(current_task()->task_imp_base, assertcnt);
index ef967b0552b5b3894641cd93e89e9d4115fe07b2..62e8bc253cd3f3b0d07aecee91085003a87d31d4 100644 (file)
@@ -102,7 +102,7 @@ struct ipc_object {
        ipc_object_bits_t io_bits;
        ipc_object_refs_t io_references;
        lck_spin_t      io_lock_data;
-};
+} __attribute__((__packed__));
 
 /*
  * If another object type needs to participate in io_kotype()-based
index e3e727ba89563fa994571f73af52152d6d5a02ea..e8fbd9449aaac567419d578c56ff0b762f080b8e 100644 (file)
@@ -77,7 +77,7 @@
 #include <kern/ipc_kobject.h>
 #include <kern/thread.h>
 #include <kern/misc_protos.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <ipc/ipc_entry.h>
 #include <ipc/ipc_space.h>
 #include <ipc/ipc_object.h>
@@ -547,8 +547,7 @@ ipc_port_nsrequest(
 
 void
 ipc_port_clear_receiver(
-       ipc_port_t      port,
-       queue_t         links)
+       ipc_port_t      port)
 {
        spl_t           s;
 
@@ -557,9 +556,9 @@ ipc_port_clear_receiver(
        /*
         * pull ourselves from any sets.
         */
-       if (port->ip_pset_count != 0) {
-               ipc_pset_remove_from_all(port, links);
-               assert(port->ip_pset_count == 0);
+       if (port->ip_in_pset != 0) {
+               ipc_pset_remove_from_all(port);
+               assert(port->ip_in_pset == 0);
        }
 
        /*
@@ -602,7 +601,6 @@ ipc_port_init(
        port->ip_pdrequest = IP_NULL;
        port->ip_requests = IPR_NULL;
 
-       port->ip_pset_count = 0;
        port->ip_premsg = IKM_NULL;
        port->ip_context = 0;
 
@@ -617,7 +615,8 @@ ipc_port_init(
 
        port->ip_reserved    = 0;
 
-       ipc_mqueue_init(&port->ip_messages, FALSE /* set */);
+       ipc_mqueue_init(&port->ip_messages,
+                       FALSE /* !set */, NULL /* no reserved link */);
 }
 
 /*
@@ -870,7 +869,7 @@ ipc_port_destroy(
        assert(ip_active(port));
        /* port->ip_receiver_name is garbage */
        /* port->ip_receiver/port->ip_destination is garbage */
-       assert(port->ip_pset_count == 0);
+       assert(port->ip_in_pset == 0);
        assert(port->ip_mscount == 0);
 
        /* check for a backup port */
@@ -949,6 +948,9 @@ ipc_port_destroy(
        mqueue = &port->ip_messages;
        ipc_mqueue_destroy(mqueue);
 
+       /* cleanup waitq related resources */
+       ipc_mqueue_deinit(mqueue);
+
        /* generate dead-name notifications */
        ipc_port_dnnotify(port);
 
@@ -1497,7 +1499,7 @@ ipc_port_lookup_notify(
        if ((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) == 0)
                return IP_NULL;
 
-       port = (ipc_port_t) entry->ie_object;
+       __IGNORE_WCASTALIGN(port = (ipc_port_t) entry->ie_object);
        assert(port != IP_NULL);
 
        ip_lock(port);
@@ -1791,7 +1793,7 @@ ipc_port_alloc_special(
 {
        ipc_port_t port;
 
-       port = (ipc_port_t) io_alloc(IOT_PORT);
+       __IGNORE_WCASTALIGN(port = (ipc_port_t) io_alloc(IOT_PORT));
        if (port == IP_NULL)
                return IP_NULL;
 
@@ -1869,6 +1871,8 @@ ipc_port_finalize(
                it_requests_free(its, requests);
                port->ip_requests = IPR_NULL;
        }
+
+       ipc_mqueue_deinit(&port->ip_messages);
        
 #if    MACH_ASSERT
        ipc_port_track_dealloc(port);
@@ -1883,8 +1887,10 @@ ipc_port_finalize(
  *     Allocation is intercepted via ipc_port_init;
  *     deallocation is intercepted via io_free.
  */
+#if 0
 queue_head_t   port_alloc_queue;
 lck_spin_t     port_alloc_queue_lock;
+#endif
 
 unsigned long  port_count = 0;
 unsigned long  port_count_warning = 20000;
@@ -1907,9 +1913,10 @@ int              db_port_walk(
 void
 ipc_port_debug_init(void)
 {
+#if 0
        queue_init(&port_alloc_queue);
-
        lck_spin_init(&port_alloc_queue_lock, &ipc_lck_grp, &ipc_lck_attr);
+#endif
 
        if (!PE_parse_boot_argn("ipc_portbt", &ipc_portbt, sizeof (ipc_portbt)))
                ipc_portbt = 0;
index ec1f3efc965af53926bedddf4c12acb4dbaa4714..48a2fc49dbf570eb24bdfd50ad5845871578b3f4 100644 (file)
@@ -120,6 +120,15 @@ struct ipc_port {
        struct ipc_object ip_object;
        struct ipc_mqueue ip_messages;
 
+       natural_t ip_sprequests:1,      /* send-possible requests outstanding */
+                 ip_spimportant:1,     /* ... at least one is importance donating */
+                 ip_impdonation:1,     /* port supports importance donation */
+                 ip_tempowner:1,       /* dont give donations to current receiver */
+                 ip_guarded:1,         /* port guarded (use context value as guard) */
+                 ip_strict_guard:1,    /* Strict guarding; Prevents user manipulation of context values directly */
+                 ip_reserved:2,
+                 ip_impcount:24;       /* number of importance donations in nested queue */
+
        union {
                struct ipc_space *receiver;
                struct ipc_port *destination;
@@ -137,39 +146,29 @@ struct ipc_port {
        struct ipc_port_request *ip_requests;
        struct ipc_kmsg *ip_premsg;
 
+       mach_vm_address_t ip_context;
+
        mach_port_mscount_t ip_mscount;
        mach_port_rights_t ip_srights;
        mach_port_rights_t ip_sorights;
 
-       natural_t ip_sprequests:1,      /* send-possible requests outstanding */
-                 ip_spimportant:1,     /* ... at least one is importance donating */
-                 ip_impdonation:1,     /* port supports importance donation */
-                 ip_tempowner:1,       /* dont give donations to current receiver */
-                 ip_guarded:1,         /* port guarded (use context value as guard) */
-                 ip_strict_guard:1,    /* Strict guarding; Prevents user manipulation of context values directly */
-                 ip_reserved:2,
-                 ip_impcount:24;       /* number of importance donations in nested queue */
-
-       mach_vm_address_t ip_context;
-
-
 #if    MACH_ASSERT
 #define        IP_NSPARES              4
 #define        IP_CALLSTACK_MAX        16
-       queue_chain_t   ip_port_links;  /* all allocated ports */
+/*     queue_chain_t   ip_port_links;*//* all allocated ports */
        thread_t        ip_thread;      /* who made me?  thread context */
        unsigned long   ip_timetrack;   /* give an idea of "when" created */
        uintptr_t       ip_callstack[IP_CALLSTACK_MAX]; /* stack trace */
        unsigned long   ip_spares[IP_NSPARES]; /* for debugging */
 #endif /* MACH_ASSERT */
-};
+} __attribute__((__packed__));
 
 
 #define ip_references          ip_object.io_references
 #define ip_bits                        ip_object.io_bits
 
 #define ip_receiver_name       ip_messages.imq_receiver_name
-#define        ip_pset_count           ip_messages.imq_pset_count
+#define        ip_in_pset              ip_messages.imq_in_pset
 
 #define        ip_receiver             data.receiver
 #define        ip_destination          data.destination
@@ -393,8 +392,7 @@ MACRO_END
 
 /* Prepare a receive right for transmission/destruction */
 extern void ipc_port_clear_receiver(
-       ipc_port_t              port,
-       queue_t                 links);
+       ipc_port_t              port);
 
 /* Initialize a newly-allocated port */
 extern void ipc_port_init(
index 533ee3f08a67453003d642e7566d171bca6dfb65..de389c96937c25251ba219065d9df3390dab7ac0 100644 (file)
@@ -101,18 +101,25 @@ ipc_pset_alloc(
        ipc_pset_t pset;
        mach_port_name_t name;
        kern_return_t kr;
+       uint64_t reserved_link;
+
+       reserved_link = waitq_link_reserve(NULL);
 
        kr = ipc_object_alloc(space, IOT_PORT_SET,
                              MACH_PORT_TYPE_PORT_SET, 0,
                              &name, (ipc_object_t *) &pset);
-       if (kr != KERN_SUCCESS)
+       if (kr != KERN_SUCCESS) {
+               waitq_link_release(reserved_link);
                return kr;
+       }
        /* pset and space are locked */
 
        pset->ips_local_name = name;
-       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */, &reserved_link);
        is_write_unlock(space);
 
+       waitq_link_release(reserved_link);
+
        *namep = name;
        *psetp = pset;
        return KERN_SUCCESS;
@@ -140,17 +147,24 @@ ipc_pset_alloc_name(
 {
        ipc_pset_t pset;
        kern_return_t kr;
+       uint64_t reserved_link;
+
 
+       reserved_link = waitq_link_reserve(NULL);
 
        kr = ipc_object_alloc_name(space, IOT_PORT_SET,
                                   MACH_PORT_TYPE_PORT_SET, 0,
                                   name, (ipc_object_t *) &pset);
-       if (kr != KERN_SUCCESS)
+       if (kr != KERN_SUCCESS) {
+               waitq_link_release(reserved_link);
                return kr;
+       }
        /* pset is locked */
 
        pset->ips_local_name = name;
-       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+       ipc_mqueue_init(&pset->ips_messages, TRUE /* set */, &reserved_link);
+
+       waitq_link_release(reserved_link);
 
        *psetp = pset;
        return KERN_SUCCESS;
@@ -188,17 +202,16 @@ kern_return_t
 ipc_pset_add(
        ipc_pset_t        pset,
        ipc_port_t        port,
-       wait_queue_link_t wql)
+       uint64_t         *reserved_link,
+       uint64_t         *reserved_prepost)
 {
        kern_return_t kr;
 
        assert(ips_active(pset));
        assert(ip_active(port));
        
-       kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages, wql);
-
-       if (kr == KERN_SUCCESS)
-               port->ip_pset_count++;
+       kr = ipc_mqueue_add(&port->ip_messages, &pset->ips_messages,
+                           reserved_link, reserved_prepost);
 
        return kr;
 }
@@ -218,20 +231,16 @@ ipc_pset_add(
 kern_return_t
 ipc_pset_remove(
        ipc_pset_t        pset,
-       ipc_port_t        port,
-       wait_queue_link_t *wqlp)
+       ipc_port_t        port)
 {
        kern_return_t kr;
 
        assert(ip_active(port));
        
-       if (port->ip_pset_count == 0)
+       if (port->ip_in_pset == 0)
                return KERN_NOT_IN_SET;
 
-       kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages, wqlp);
-
-       if (kr == KERN_SUCCESS)
-               port->ip_pset_count--;
+       kr = ipc_mqueue_remove(&port->ip_messages, &pset->ips_messages);
 
        return kr;
 }
@@ -246,19 +255,17 @@ ipc_pset_remove(
 
 kern_return_t
 ipc_pset_remove_from_all(
-       ipc_port_t      port,
-       queue_t         links)
+       ipc_port_t      port)
 {
        assert(ip_active(port));
        
-       if (port->ip_pset_count == 0)
+       if (port->ip_in_pset == 0)
                return KERN_NOT_IN_SET;
 
        /* 
         * Remove the port's mqueue from all sets
         */
-       ipc_mqueue_remove_from_all(&port->ip_messages, links);
-       port->ip_pset_count = 0;
+       ipc_mqueue_remove_from_all(&port->ip_messages);
        return KERN_SUCCESS;
 }
 
@@ -278,11 +285,6 @@ ipc_pset_destroy(
        ipc_pset_t      pset)
 {
        spl_t           s;
-       queue_head_t link_data;
-       queue_t links = &link_data;
-       wait_queue_link_t wql;
-
-       queue_init(links);
 
        assert(ips_active(pset));
 
@@ -290,8 +292,9 @@ ipc_pset_destroy(
 
        /*
         * remove all the member message queues
+        * AND remove this message queue from any containing sets
         */
-       ipc_mqueue_remove_all(&pset->ips_messages, links);
+       ipc_mqueue_remove_all(&pset->ips_messages);
        
        /*
         * Set all waiters on the portset running to
@@ -303,14 +306,10 @@ ipc_pset_destroy(
        imq_unlock(&pset->ips_messages);
        splx(s);
 
+       ipc_mqueue_deinit(&pset->ips_messages);
+
        ips_unlock(pset);
        ips_release(pset);       /* consume the ref our caller gave us */
-
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
-
 }
 
 /* Kqueue EVFILT_MACHPORT support */
@@ -320,7 +319,7 @@ ipc_pset_destroy(
 static int      filt_machportattach(struct knote *kn);
 static void    filt_machportdetach(struct knote *kn);
 static int     filt_machport(struct knote *kn, long hint);
-static void     filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type);
+static void     filt_machporttouch(struct knote *kn, struct kevent_internal_s *kev, long type);
 static unsigned filt_machportpeek(struct knote *kn);
 struct filterops machport_filtops = {
         .f_attach = filt_machportattach,
@@ -335,7 +334,7 @@ filt_machportattach(
         struct knote *kn)
 {
         mach_port_name_t        name = (mach_port_name_t)kn->kn_kevent.ident;
-       wait_queue_link_t       wql = wait_queue_link_allocate();
+        uint64_t                wq_link_id = waitq_link_reserve(NULL);
         ipc_pset_t              pset = IPS_NULL;
         int                     result = ENOSYS;
         kern_return_t           kr;
@@ -344,7 +343,7 @@ filt_machportattach(
                                   MACH_PORT_RIGHT_PORT_SET,
                                   (ipc_object_t *)&pset);
         if (kr != KERN_SUCCESS) {
-               wait_queue_link_free(wql);
+               waitq_link_release(wq_link_id);
                 return (kr == KERN_INVALID_NAME ? ENOENT : ENOTSUP);
         }
         /* We've got a lock on pset */
@@ -355,8 +354,9 @@ filt_machportattach(
         * rather than having to call knote() from the Mach code on each
         * message.
         */
-       result = knote_link_wait_queue(kn, &pset->ips_messages.imq_wait_queue, wql);
+       result = knote_link_waitq(kn, &pset->ips_messages.imq_wait_queue, &wq_link_id);
        if (result == 0) {
+               waitq_link_release(wq_link_id);
                /* keep a reference for the knote */
                kn->kn_ptr.p_pset = pset; 
                ips_reference(pset);
@@ -365,7 +365,7 @@ filt_machportattach(
        }
 
        ips_unlock(pset);
-       wait_queue_link_free(wql);
+       waitq_link_release(wq_link_id);
        return result;
 }
 
@@ -374,19 +374,16 @@ filt_machportdetach(
         struct knote *kn)
 {
         ipc_pset_t              pset = kn->kn_ptr.p_pset;
-       wait_queue_link_t       wql = WAIT_QUEUE_LINK_NULL;
 
        /*
         * Unlink the portset wait queue from knote/kqueue,
         * and release our reference on the portset.
         */
        ips_lock(pset);
-       (void)knote_unlink_wait_queue(kn, &pset->ips_messages.imq_wait_queue, &wql);
+       (void)knote_unlink_waitq(kn, &pset->ips_messages.imq_wait_queue);
        kn->kn_ptr.p_pset = IPS_NULL;
        ips_unlock(pset);
        ips_release(pset);
-       if (wql != WAIT_QUEUE_LINK_NULL)
-               wait_queue_link_free(wql);
 }
 
 static int
@@ -451,7 +448,6 @@ filt_machport(
        self->ith_object = (ipc_object_t)pset;
        self->ith_msize = size;
        self->ith_option = option;
-       self->ith_scatter_list_size = 0;
        self->ith_receiver_name = MACH_PORT_NULL;
        self->ith_continuation = NULL;
        option |= MACH_RCV_TIMEOUT; // never wait
@@ -512,7 +508,7 @@ filt_machport(
 }
 
 static void
-filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type)
+filt_machporttouch(struct knote *kn, struct kevent_internal_s *kev, long type)
 {
         switch (type) {
         case EVENT_REGISTER:
@@ -537,7 +533,7 @@ filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type)
 /*
  * Peek to see if the portset associated with the knote has any
  * events. This pre-hook is called when a filter uses the stay-
- * on-queue mechanism (as the knote_link_wait_queue mechanism
+ * on-queue mechanism (as the knote_link_waitq mechanism
  * does).
  *
  * This is called with the kqueue that the knote belongs to still
index 5dd8af59352531915f46d43043e84127d1bfac32..b6f56fffeb11b6a1a709789e4ecdd58855154527 100644 (file)
@@ -110,7 +110,8 @@ extern kern_return_t ipc_pset_alloc_name(
 extern kern_return_t ipc_pset_add(
        ipc_pset_t      pset,
        ipc_port_t      port,
-       wait_queue_link_t wql);
+       uint64_t        *reserved_link,
+       uint64_t        *reserved_prepost);
 
 /* determine if port is a member of set */
 extern boolean_t ipc_pset_member(
@@ -120,13 +121,11 @@ extern boolean_t ipc_pset_member(
 /* Remove a port from a port set */
 extern kern_return_t ipc_pset_remove(
        ipc_pset_t      pset,
-       ipc_port_t      port,
-       wait_queue_link_t *wqlp);
+       ipc_port_t      port);
 
 /* Remove a port from all its current port sets */
 extern kern_return_t ipc_pset_remove_from_all(
-       ipc_port_t      port,
-       queue_t         links);
+       ipc_port_t      port);
 
 /* Destroy a port_set */
 extern void ipc_pset_destroy(
index 205f91ca780eeeaea2b3c8d146e3170acea99f9b..b8feb54dc2cc826e8512b6e4d3b5c9434227271c 100644 (file)
@@ -659,20 +659,11 @@ ipc_right_terminate(
                }
 
                if (type & MACH_PORT_TYPE_RECEIVE) {
-                       wait_queue_link_t wql;
-                       queue_head_t links_data;
-                       queue_t links = &links_data;
-
                        assert(port->ip_receiver_name == name);
                        assert(port->ip_receiver == space);
 
-                       queue_init(links);
-                       ipc_port_clear_receiver(port, links);
+                       ipc_port_clear_receiver(port);
                        ipc_port_destroy(port); /* consumes our ref, unlocks */
-                       while(!queue_empty(links)) {
-                               wql = (wait_queue_link_t) dequeue(links);
-                               wait_queue_link_free(wql);
-                       }
 
                } else if (type & MACH_PORT_TYPE_SEND_ONCE) {
                        assert(port->ip_sorights > 0);
@@ -813,23 +804,12 @@ ipc_right_destroy(
                }
 
                if (type & MACH_PORT_TYPE_RECEIVE) {
-                       queue_head_t links_data;
-                       queue_t links = &links_data;
-                       wait_queue_link_t wql;
-
                        assert(ip_active(port));
                        assert(port->ip_receiver == space);
 
-                       queue_init(links);
-
-                       ipc_port_clear_receiver(port, links);
+                       ipc_port_clear_receiver(port);
                        ipc_port_destroy(port); /* consumes our ref, unlocks */
 
-                       while(!queue_empty(links)) {
-                               wql = (wait_queue_link_t) dequeue(links);
-                               wait_queue_link_free(wql);
-                       }
-
                } else if (type & MACH_PORT_TYPE_SEND_ONCE) {
                        assert(port->ip_sorights > 0);
                        ip_unlock(port);
@@ -1122,9 +1102,6 @@ ipc_right_delta(
 
            case MACH_PORT_RIGHT_RECEIVE: {
                ipc_port_t request = IP_NULL;
-               queue_head_t links_data;
-               queue_t links = &links_data;
-               wait_queue_link_t wql;
 
                if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
                        goto invalid_right;
@@ -1211,15 +1188,9 @@ ipc_right_delta(
                }
                is_write_unlock(space);
 
-               queue_init(links);
-               ipc_port_clear_receiver(port, links);
+               ipc_port_clear_receiver(port);
                ipc_port_destroy(port); /* consumes ref, unlocks */
 
-               while(!queue_empty(links)) {
-                       wql = (wait_queue_link_t) dequeue(links);
-                       wait_queue_link_free(wql);
-               }
-
                if (request != IP_NULL)
                        ipc_notify_port_deleted(request, name);
                break;
@@ -1451,10 +1422,6 @@ ipc_right_destruct(
        ipc_port_t port = IP_NULL;
        ipc_entry_bits_t bits;
 
-       queue_head_t links_data;
-       queue_t links = &links_data;
-       wait_queue_link_t wql;
-
        mach_port_urefs_t urefs;
        ipc_port_t request = IP_NULL;
        ipc_port_t nsrequest = IP_NULL;
@@ -1585,15 +1552,9 @@ ipc_right_destruct(
        if (nsrequest != IP_NULL)
                ipc_notify_no_senders(nsrequest, mscount);
 
-       queue_init(links);
-       ipc_port_clear_receiver(port, links);
+       ipc_port_clear_receiver(port);
        ipc_port_destroy(port); /* consumes ref, unlocks */
 
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
-
        if (request != IP_NULL)
                ipc_notify_port_deleted(request, name);
        
@@ -1783,19 +1744,13 @@ ipc_right_copyin(
        ipc_object_t            *objectp,
        ipc_port_t              *sorightp,
        ipc_port_t              *releasep,
-#if IMPORTANCE_INHERITANCE
-       int                     *assertcntp,
-#endif /* IMPORTANCE_INHERITANCE */
-       queue_t                 links)
+       int                     *assertcntp)
 {
        ipc_entry_bits_t bits;
        ipc_port_t port;
 
        *releasep = IP_NULL;
-
-#if IMPORTANCE_INHERITANCE
        *assertcntp = 0;
-#endif
 
        bits = entry->ie_bits;
 
@@ -1881,7 +1836,7 @@ ipc_right_copyin(
                entry->ie_bits = bits &~ MACH_PORT_TYPE_RECEIVE;
                ipc_entry_modified(space, name, entry);
 
-               ipc_port_clear_receiver(port, links);
+               ipc_port_clear_receiver(port);
                port->ip_receiver_name = MACH_PORT_NULL;
                port->ip_destination = IP_NULL;
 
@@ -2036,6 +1991,7 @@ ipc_right_copyin(
 
                if (ipc_right_check(space, port, name, entry)) {
                        bits = entry->ie_bits;
+                       *releasep = port;
                        goto move_dead;
                }
                /* port is locked and active */
@@ -2304,15 +2260,8 @@ ipc_right_copyin_two(
        ipc_port_t              *sorightp,
        ipc_port_t              *releasep)
 {
-       queue_head_t links_data;
-       queue_t links = &links_data;
        kern_return_t kr;
-
-#if IMPORTANCE_INHERITANCE
        int assertcnt = 0;
-#endif
-
-       queue_init(links);
 
        assert(MACH_MSG_TYPE_PORT_ANY_SEND(msgt_one));
        assert(MACH_MSG_TYPE_PORT_ANY_SEND(msgt_two));
@@ -2355,18 +2304,11 @@ ipc_right_copyin_two(
                 */
                ipc_object_t object_two;
 
-#if IMPORTANCE_INHERITANCE
                kr = ipc_right_copyin(space, name, entry,
                                      msgt_one, FALSE,
                                      objectp, sorightp, releasep,
-                                     &assertcnt, links);
+                                     &assertcnt);
                assert(assertcnt == 0);
-#else
-               kr = ipc_right_copyin(space, name, entry,
-                                     msgt_one, FALSE,
-                                     objectp, sorightp, releasep,
-                                     links);
-#endif /* IMPORTANCE_INHERITANCE */
                if (kr != KERN_SUCCESS) {
                        return kr;
                }
@@ -2381,18 +2323,11 @@ ipc_right_copyin_two(
                 *      as no valid disposition can make us lose our
                 *      receive right.
                 */
-#if IMPORTANCE_INHERITANCE
                kr = ipc_right_copyin(space, name, entry,
                                      msgt_two, FALSE,
                                      &object_two, sorightp, releasep,
-                                     &assertcnt, links);
+                                     &assertcnt);
                assert(assertcnt == 0);
-#else
-               kr = ipc_right_copyin(space, name, entry,
-                                     msgt_two, FALSE,
-                                     &object_two, sorightp, releasep,
-                                     links);
-#endif /* IMPORTANCE_INHERITANCE */
                assert(kr == KERN_SUCCESS);
                assert(*sorightp == IP_NULL);
                assert(*releasep == IP_NULL);
@@ -2430,18 +2365,11 @@ ipc_right_copyin_two(
                        msgt_name = MACH_MSG_TYPE_COPY_SEND;
                }
 
-#if IMPORTANCE_INHERITANCE
                kr = ipc_right_copyin(space, name, entry,
                                      msgt_name, FALSE,
                                      objectp, sorightp, releasep,
-                                     &assertcnt, links);
+                                     &assertcnt);
                assert(assertcnt == 0);
-#else
-               kr = ipc_right_copyin(space, name, entry,
-                                     msgt_name, FALSE,
-                                     objectp, sorightp, releasep,
-                                     links);
-#endif /* IMPORTANCE_INHERITANCE */
                if (kr != KERN_SUCCESS) {
                        return kr;
                }
@@ -2454,8 +2382,6 @@ ipc_right_copyin_two(
                (void)ipc_port_copy_send((ipc_port_t)*objectp);
        }
 
-       assert(queue_empty(links));
-
        return KERN_SUCCESS;
 }
 
index b42be8d21086ede099f46ff03d35a17eabd7de1a..a3d4af17e1f86a89926586b77da8d34771324040 100644 (file)
@@ -189,10 +189,7 @@ extern kern_return_t ipc_right_copyin(
        ipc_object_t            *objectp,
        ipc_port_t              *sorightp,
        ipc_port_t              *releasep,
-#if IMPORTANCE_INHERITANCE
-       int                     *assertcntp,
-#endif
-       queue_t                 links);
+       int                     *assertcntp);
 
 /* Undo the effects of an ipc_right_copyin */
 extern void ipc_right_copyin_undo(
index 67086488c13f56f17407655e85b712c5a8f4eaeb..b65071b28bdb49768e7bb54b2a6fe36d03f85357 100644 (file)
 #include <kern/kalloc.h>
 #include <vm/vm_kern.h>
 
-/*
- * Forward declarations
- */
-void ipc_table_fill(
-       ipc_table_size_t        its,
-       unsigned int            num,
-       unsigned int            min,
-       vm_size_t               elemsize);
-
-/*
- *     We borrow the kalloc map, rather than creating
- *     yet another submap of the kernel map.
- */
-
-extern vm_map_t kalloc_map;
-
 ipc_table_size_t ipc_table_entries;
-unsigned int ipc_table_entries_size = 512;
+unsigned int ipc_table_entries_size = CONFIG_IPC_TABLE_ENTRIES_STEPS;
 
 ipc_table_size_t ipc_table_requests;
 unsigned int ipc_table_requests_size = 64;
 
-void
+static void
 ipc_table_fill(
        ipc_table_size_t        its,         /* array to fill */
        unsigned int            num,         /* size of array */
@@ -108,7 +92,7 @@ ipc_table_fill(
        /* first use powers of two, up to the page size */
 
        for (index = 0, size = 1;
-            (index < num) && (size < PAGE_SIZE);
+            (index < num) && (size < PAGE_MAX_SIZE);
             size <<= 1) {
                if (size >= minsize) {
                        its[index].its_size = (ipc_table_elems_t)(size / elemsize);
@@ -118,7 +102,7 @@ ipc_table_fill(
 
        /* then increments of a page, then two pages, etc. */
 
-       for (incrsize = PAGE_SIZE; index < num;) {
+       for (incrsize = PAGE_MAX_SIZE; index < num;) {
                unsigned int period;
 
                for (period = 0;
@@ -129,7 +113,7 @@ ipc_table_fill(
                                index++;
                        }
                }
-               if (incrsize < (vm_size_t)(PAGE_SIZE << 3))
+               if (incrsize < (vm_size_t)(PAGE_MAX_SIZE << 3))
                        incrsize <<= 1;
        }
 }
@@ -176,23 +160,14 @@ void *
 ipc_table_alloc(
        vm_size_t       size)
 {
-       vm_offset_t table;
-
-       if (size < PAGE_SIZE)
-               return kalloc(size);
-
-       if (kmem_alloc(kalloc_map, &table, size) != KERN_SUCCESS)
-               table = 0;
-
-       return (void *)table;
+       return kalloc(size);
 }
 
 
 /*
  *     Routine:        ipc_table_free
  *     Purpose:
- *             Free a table allocated with ipc_table_alloc or
- *             ipc_table_realloc.
+ *             Free a table allocated with ipc_table_alloc.
  *     Conditions:
  *             May block.
  */
@@ -202,8 +177,5 @@ ipc_table_free(
        vm_size_t       size,
        void *          table)
 {
-       if (size < PAGE_SIZE)
-               kfree(table, size);
-       else
-               kmem_free(kalloc_map, (vm_offset_t)table, size);
+       kfree(table, size);
 }
index 36e77dfd54e61e6fc9419535d2411684dfd9a885..43880d7f933e25921f14563476d7016fd89af96f 100644 (file)
@@ -824,23 +824,17 @@ ivac_grow_table(ipc_voucher_attr_control_t ivac)
        ivac->ivac_is_growing = 1;
        if (ivac->ivac_table_size >= IVAC_ENTRIES_MAX) {
                panic("Cannot grow ipc space beyond IVAC_ENTRIES_MAX. Some process is leaking vouchers");
+               return;
        }
 
        old_size = ivac->ivac_table_size;
        ivac_unlock(ivac);
 
-       /*
-        * if initial size is not leading to page aligned allocations,
-        * set new_size such that new_size * sizeof(ivac_entry) is page aligned.
-        */
-       
-       if ((old_size * sizeof(ivac_entry)) & PAGE_MASK){
-               new_size = (iv_index_t)round_page((old_size * sizeof(ivac_entry)))/(sizeof (ivac_entry));
-       } else {
-               new_size = old_size * 2;
-       }
+       new_size = old_size * 2;
 
        assert(new_size > old_size);
+       assert(new_size < IVAC_ENTRIES_MAX);
+
        new_table = kalloc(sizeof(ivac_entry) * new_size);
        if (!new_table){
                panic("Failed to grow ivac table to size %d\n", new_size);
index d66481376f4a892bf120b41e26f15a1db17a6c66..c8e45673e258d17f1a78fbca76161b58a8310c60 100644 (file)
@@ -84,6 +84,8 @@
 #include <ipc/ipc_hash.h>
 #include <ipc/ipc_table.h>
 #include <ipc/ipc_right.h>
+
+#include <security/mac_mach_internal.h>
 #endif
 
 /*
@@ -186,6 +188,12 @@ mach_port_space_info(
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
 
+#if !(DEVELOPMENT | DEBUG)
+       const boolean_t dbg_ok = (mac_task_check_expose_task(kernel_task) == 0);
+#else
+       const boolean_t dbg_ok = TRUE;
+#endif
+
        /* start with in-line memory */
 
        table_size = 0;
@@ -213,7 +221,7 @@ mach_port_space_info(
                if (table_size != table_size_needed) {
                        if (table_size != 0)
                                kmem_free(ipc_kernel_map, table_addr, table_size);
-                       kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size_needed);
+                       kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size_needed, VM_KERN_MEMORY_IPC);
                        if (kr != KERN_SUCCESS) {
                                return KERN_RESOURCE_SHORTAGE;
                        }
@@ -243,7 +251,7 @@ mach_port_space_info(
                iin->iin_type = IE_BITS_TYPE(bits);
                if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE &&
                    entry->ie_request != IE_REQ_NONE) {
-                       ipc_port_t port = (ipc_port_t) entry->ie_object;
+                       __IGNORE_WCASTALIGN(ipc_port_t port = (ipc_port_t) entry->ie_object);
 
                        assert(IP_VALID(port));
                        ip_lock(port);
@@ -252,7 +260,7 @@ mach_port_space_info(
                }
 
                iin->iin_urefs = IE_BITS_UREFS(bits);
-               iin->iin_object = (natural_t)VM_KERNEL_ADDRPERM((uintptr_t)entry->ie_object);
+               iin->iin_object = (dbg_ok) ? (natural_t)VM_KERNEL_ADDRPERM((uintptr_t)entry->ie_object) : 0;
                iin->iin_next = entry->ie_next;
                iin->iin_hash = entry->ie_index;
        }
@@ -319,6 +327,7 @@ mach_port_space_basic_info(
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
 
+
        is_read_lock(space);
        if (!is_active(space)) {
                is_read_unlock(space);
@@ -464,7 +473,7 @@ mach_port_kobject(
                return KERN_INVALID_RIGHT;
        }
 
-       port = (ipc_port_t) entry->ie_object;
+       __IGNORE_WCASTALIGN(port = (ipc_port_t) entry->ie_object);
        assert(port != IP_NULL);
 
        ip_lock(port);
@@ -479,20 +488,16 @@ mach_port_kobject(
        kaddr = (mach_vm_address_t)port->ip_kobject;
        ip_unlock(port);
 
-#if !(DEVELOPMENT || DEBUG)
-       /* disable this interface on release kernels */
-        *addrp = 0;
-#else
+#if (DEVELOPMENT || DEBUG)
        if (0 != kaddr && is_ipc_kobject(*typep))
                *addrp = VM_KERNEL_UNSLIDE_OR_PERM(kaddr);
        else
-               *addrp = 0;
 #endif
+               *addrp = 0;
 
        return KERN_SUCCESS;
 }
 #endif /* MACH_IPC_DEBUG */
-
 /*
  *     Routine:        mach_port_kernel_object [Legacy kernel call]
  *     Purpose:
index a12c7091944902f024593a116e3c1c52c926c650..98282d7304f4506838bf4c791ccc550f35a26607 100644 (file)
@@ -335,22 +335,7 @@ mach_msg_receive_results(void)
 
        trailer_size = ipc_kmsg_add_trailer(kmsg, space, option, self, seqno, FALSE, 
                        kmsg->ikm_header->msgh_remote_port->ip_context);
-       /*
-        * If MACH_RCV_OVERWRITE was specified, try to get the scatter
-        * list and verify it against the contents of the message.  If
-        * there is any problem with it, we will continue without it as
-        * normal.
-        */
-       if (option & MACH_RCV_OVERWRITE) {
-               mach_msg_size_t slist_size = self->ith_scatter_list_size;
-               mach_msg_body_t *slist;
-
-               slist = ipc_kmsg_get_scatter(msg_addr, slist_size, kmsg);
-               mr = ipc_kmsg_copyout(kmsg, space, map, slist, option);
-               ipc_kmsg_free_scatter(slist, slist_size);
-       } else {
-               mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL, option);
-       }
+       mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL, option);
 
        if (mr != MACH_MSG_SUCCESS) {
                /* already received importance, so have to undo that here */
@@ -401,7 +386,7 @@ mach_msg_receive(
        mach_port_name_t        rcv_name,
        mach_msg_timeout_t      rcv_timeout,
        void                    (*continuation)(mach_msg_return_t),
-       mach_msg_size_t         slist_size)
+       __unused mach_msg_size_t slist_size)
 {
        thread_t self = current_thread();
        ipc_space_t space = current_space();
@@ -419,7 +404,6 @@ mach_msg_receive(
        self->ith_object = object;
        self->ith_msize = rcv_size;
        self->ith_option = option;
-       self->ith_scatter_list_size = slist_size;
        self->ith_continuation = continuation;
 
        ipc_mqueue_receive(mqueue, option, rcv_size, rcv_timeout, THREAD_ABORTSAFE);
@@ -459,7 +443,6 @@ mach_msg_overwrite_trap(
        mach_msg_timeout_t      msg_timeout = args->timeout;
        __unused mach_port_name_t notify = args->notify;
        mach_vm_address_t       rcv_msg_addr = args->rcv_msg;
-        mach_msg_size_t                scatter_list_size = 0; /* NOT INITIALIZED - but not used in pactice */
        __unused mach_port_seqno_t temp_seqno = 0;
 
        mach_msg_return_t  mr = MACH_MSG_SUCCESS;
@@ -506,22 +489,13 @@ mach_msg_overwrite_trap(
                }
                /* hold ref for object */
 
-               /*
-                * 1. MACH_RCV_OVERWRITE is on, and rcv_msg is our scatter list
-                *    and receive buffer
-                * 2. MACH_RCV_OVERWRITE is off, and rcv_msg might be the
-                *    alternate receive buffer (separate send and receive buffers).
-                */
-               if (option & MACH_RCV_OVERWRITE) 
-                       self->ith_msg_addr = rcv_msg_addr;
-               else if (rcv_msg_addr != (mach_vm_address_t)0)
+               if (rcv_msg_addr != (mach_vm_address_t)0)
                        self->ith_msg_addr = rcv_msg_addr;
                else
                        self->ith_msg_addr = msg_addr;
                self->ith_object = object;
                self->ith_msize = rcv_size;
                self->ith_option = option;
-               self->ith_scatter_list_size = scatter_list_size;
                self->ith_receiver_name = MACH_PORT_NULL;
                self->ith_continuation = thread_syscall_return;
 
index d8e489eb56a8d921317645402ea71eb1390df6a0..444a6af2348f906f7034e2188f50c0d84cdcec64 100644 (file)
 #include <ipc/ipc_importance.h>
 #endif
 
+
 /*
  * Forward declarations
  */
@@ -162,7 +163,7 @@ mach_port_names_helper(
 
        bits = entry->ie_bits;
        request = entry->ie_request;
-       port = (ipc_port_t) entry->ie_object;
+       __IGNORE_WCASTALIGN(port = (ipc_port_t) entry->ie_object);
 
        if (bits & MACH_PORT_TYPE_RECEIVE) {
                assert(IP_VALID(port));
@@ -287,11 +288,11 @@ mach_port_names(
                }
                size = size_needed;
 
-               kr = vm_allocate(ipc_kernel_map, &addr1, size, VM_FLAGS_ANYWHERE);
+               kr = vm_allocate(ipc_kernel_map, &addr1, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
                if (kr != KERN_SUCCESS)
                        return KERN_RESOURCE_SHORTAGE;
 
-               kr = vm_allocate(ipc_kernel_map, &addr2, size, VM_FLAGS_ANYWHERE);
+               kr = vm_allocate(ipc_kernel_map, &addr2, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
                if (kr != KERN_SUCCESS) {
                        kmem_free(ipc_kernel_map, addr1, size);
                        return KERN_RESOURCE_SHORTAGE;
@@ -305,7 +306,7 @@ mach_port_names(
                                          VM_MAP_PAGE_MASK(ipc_kernel_map)),
                        vm_map_round_page(addr1 + size,
                                          VM_MAP_PAGE_MASK(ipc_kernel_map)),
-                       VM_PROT_READ|VM_PROT_WRITE,
+                       VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_IPC),
                        FALSE);
                if (kr != KERN_SUCCESS) {
                        kmem_free(ipc_kernel_map, addr1, size);
@@ -319,7 +320,7 @@ mach_port_names(
                                          VM_MAP_PAGE_MASK(ipc_kernel_map)),
                        vm_map_round_page(addr2 + size,
                                          VM_MAP_PAGE_MASK(ipc_kernel_map)),
-                       VM_PROT_READ|VM_PROT_WRITE,
+                       VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_IPC),
                        FALSE);
                if (kr != KERN_SUCCESS) {
                        kmem_free(ipc_kernel_map, addr1, size);
@@ -1252,14 +1253,14 @@ mach_port_get_set_status(
                ipc_object_t psobj;
                ipc_pset_t pset;
 
-               kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE);
+               kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
                if (kr != KERN_SUCCESS)
                        return KERN_RESOURCE_SHORTAGE;
 
                /* can't fault while we hold locks */
 
                kr = vm_map_wire(ipc_kernel_map, addr, addr + size,
-                                    VM_PROT_READ|VM_PROT_WRITE, FALSE);
+                                    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_IPC), FALSE);
                assert(kr == KERN_SUCCESS);
 
                kr = ipc_object_translate(space, name, MACH_PORT_RIGHT_PORT_SET, &psobj);
@@ -1269,14 +1270,14 @@ mach_port_get_set_status(
                }
 
                /* just use a portset reference from here on out */
-               pset = (ipc_pset_t) psobj;
+               __IGNORE_WCASTALIGN(pset = (ipc_pset_t) psobj);
                ips_reference(pset);
                ips_unlock(pset); 
 
                names = (mach_port_name_t *) addr;
                maxnames = (ipc_entry_num_t)(size / sizeof(mach_port_name_t));
 
-               ipc_mqueue_set_gather_member_names(&pset->ips_messages, maxnames, names, &actual);
+               ipc_mqueue_set_gather_member_names(space, &pset->ips_messages, maxnames, names, &actual);
 
                /* release the portset reference */
                ips_release(pset);
@@ -1364,9 +1365,8 @@ mach_port_move_member(
        ipc_port_t port;
        ipc_pset_t nset;
        kern_return_t kr;
-       wait_queue_link_t wql;
-       queue_head_t links_data;
-       queue_t links = &links_data;
+       uint64_t wq_link_id = 0;
+       uint64_t wq_reserved_prepost = 0;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -1374,14 +1374,25 @@ mach_port_move_member(
        if (!MACH_PORT_VALID(member))
                return KERN_INVALID_RIGHT;
 
-       if (after == MACH_PORT_DEAD)
+       if (after == MACH_PORT_DEAD) {
                return KERN_INVALID_RIGHT;
-       else if (after == MACH_PORT_NULL)
-               wql = WAIT_QUEUE_LINK_NULL;
-       else
-               wql = wait_queue_link_allocate();
-
-       queue_init(links);
+       } else if (after == MACH_PORT_NULL) {
+               wq_link_id = 0;
+       } else {
+               /*
+                * We reserve both a link, and
+                * enough prepost objects to complete
+                * the set move atomically - we can't block
+                * while we're holding the space lock, and
+                * the ipc_pset_add calls ipc_mqueue_add
+                * which may have to prepost this port onto
+                * this set.
+                */
+               wq_link_id = waitq_link_reserve(NULL);
+               wq_reserved_prepost = waitq_prepost_reserve(NULL, 10,
+                                                           WAITQ_DONT_LOCK,
+                                                           NULL);
+       }
 
        kr = ipc_right_lookup_read(space, member, &entry);
        if (kr != KERN_SUCCESS)
@@ -1394,7 +1405,7 @@ mach_port_move_member(
                goto done;
        }
 
-       port = (ipc_port_t) entry->ie_object;
+       __IGNORE_WCASTALIGN(port = (ipc_port_t) entry->ie_object);
        assert(port != IP_NULL);
 
        if (after == MACH_PORT_NULL)
@@ -1413,27 +1424,28 @@ mach_port_move_member(
                        goto done;
                }
 
-               nset = (ipc_pset_t) entry->ie_object;
+               __IGNORE_WCASTALIGN(nset = (ipc_pset_t) entry->ie_object);
                assert(nset != IPS_NULL);
        }
        ip_lock(port);
-       ipc_pset_remove_from_all(port, links);
+       ipc_pset_remove_from_all(port);
 
        if (nset != IPS_NULL) {
                ips_lock(nset);
-               kr = ipc_pset_add(nset, port, wql);
+               kr = ipc_pset_add(nset, port, &wq_link_id, &wq_reserved_prepost);
                ips_unlock(nset);
        }
        ip_unlock(port);
        is_read_unlock(space);
 
  done:
-       if (kr != KERN_SUCCESS && wql != WAIT_QUEUE_LINK_NULL)
-               wait_queue_link_free(wql);
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               wait_queue_link_free(wql);
-       }
+
+       /*
+        * on success the ipc_pset_add() will consume the wq_link_id
+        * value (resetting it to 0), so this function is always safe to call.
+        */
+       waitq_link_release(wq_link_id);
+       waitq_prepost_release_reserve(wq_reserved_prepost);
 
        return kr;
 }
@@ -1702,10 +1714,11 @@ void mach_port_get_status_helper(
        mach_port_status_t      *statusp)
 {
        spl_t s;
-       statusp->mps_pset = port->ip_pset_count;
 
        s = splsched();
        imq_lock(&port->ip_messages);
+       /* don't leak set IDs, just indicate that the port is in one or not */
+       statusp->mps_pset = !!(port->ip_in_pset);
        statusp->mps_seqno = port->ip_messages.imq_seqno;
        statusp->mps_qlimit = port->ip_messages.imq_qlimit;
        statusp->mps_msgcount = port->ip_messages.imq_msgcount;
@@ -2018,7 +2031,8 @@ mach_port_insert_member(
        ipc_object_t obj;
        ipc_object_t psobj;
        kern_return_t kr;
-       wait_queue_link_t wql;
+       uint64_t wq_link_id;
+       uint64_t wq_reserved_prepost;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -2026,7 +2040,9 @@ mach_port_insert_member(
        if (!MACH_PORT_VALID(name) || !MACH_PORT_VALID(psname))
                return KERN_INVALID_RIGHT;
 
-       wql = wait_queue_link_allocate();
+       wq_link_id = waitq_link_reserve(NULL);
+       wq_reserved_prepost = waitq_prepost_reserve(NULL, 10,
+                                                   WAITQ_DONT_LOCK, NULL);
 
        kr = ipc_object_translate_two(space, 
                                      name, MACH_PORT_RIGHT_RECEIVE, &obj,
@@ -2038,13 +2054,16 @@ mach_port_insert_member(
        assert(psobj != IO_NULL);
        assert(obj != IO_NULL);
 
-       kr = ipc_pset_add((ipc_pset_t)psobj, (ipc_port_t)obj, wql);
+       __IGNORE_WCASTALIGN(kr = ipc_pset_add((ipc_pset_t)psobj, (ipc_port_t)obj,
+                                           &wq_link_id, &wq_reserved_prepost));
+
        io_unlock(psobj);
        io_unlock(obj);
 
  done:
-       if (kr != KERN_SUCCESS)
-               wait_queue_link_free(wql);
+       /* on success, wq_link_id is reset to 0, so this is always safe */
+       waitq_link_release(wq_link_id);
+       waitq_prepost_release_reserve(wq_reserved_prepost);
 
        return kr;
 }
@@ -2076,7 +2095,6 @@ mach_port_extract_member(
        ipc_object_t psobj;
        ipc_object_t obj;
        kern_return_t kr;
-       wait_queue_link_t wql = WAIT_QUEUE_LINK_NULL;
 
        if (space == IS_NULL)
                return KERN_INVALID_TASK;
@@ -2094,13 +2112,11 @@ mach_port_extract_member(
        assert(psobj != IO_NULL);
        assert(obj != IO_NULL);
 
-       kr = ipc_pset_remove((ipc_pset_t)psobj, (ipc_port_t)obj, &wql);
+       __IGNORE_WCASTALIGN(kr = ipc_pset_remove((ipc_pset_t)psobj, (ipc_port_t)obj));
+
        io_unlock(psobj);
        io_unlock(obj);
 
-       if (wql != WAIT_QUEUE_LINK_NULL)
-               wait_queue_link_free(wql);
-
        return kr;
 }
 
@@ -2265,16 +2281,11 @@ mach_port_guard_exception(
 void
 mach_port_guard_ast(thread_t t)
 {
-       mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
-
-       code[0] = t->guard_exc_info.code;
-       code[1] = t->guard_exc_info.subcode;
-
        /* Raise an EXC_GUARD exception */
-       exception_triage(EXC_GUARD, code, EXCEPTION_CODE_MAX);
+       task_exception_notify(EXC_GUARD, t->guard_exc_info.code, t->guard_exc_info.subcode);
 
        /* Terminate task which caused the exception */
-       (void) task_terminate_internal(current_task());
+       task_bsdtask_kill(current_task());
        return;
 }
 
diff --git a/osfmk/kdp/kdp_core.c b/osfmk/kdp/kdp_core.c
new file mode 100644 (file)
index 0000000..8820b2a
--- /dev/null
@@ -0,0 +1,813 @@
+/*
+ * Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifdef CONFIG_KDP_INTERACTIVE_DEBUGGING
+
+#include <mach/mach_types.h>
+#include <mach/vm_attributes.h>
+#include <mach/vm_param.h>
+#include <mach/vm_map.h>
+#include <vm/vm_protos.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <libsa/types.h>
+#include <libkern/kernel_mach_header.h>
+#include <libkern/zlib.h>
+#include <kdp/kdp_internal.h>
+#include <kdp/kdp_core.h>
+#include <IOKit/IOPolledInterface.h>
+#include <IOKit/IOBSD.h>
+#include <sys/errno.h>
+#include <sys/msgbuf.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <i386/pmap_internal.h>
+#include <kdp/ml/i386/kdp_x86_common.h>
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+
+
+typedef int (*pmap_traverse_callback)(vm_map_offset_t start,
+                                     vm_map_offset_t end,
+                                     void *context);
+
+extern int pmap_traverse_present_mappings(pmap_t pmap,
+                                         vm_map_offset_t start,
+                                         vm_map_offset_t end,
+                                         pmap_traverse_callback callback,
+                                         void *context);
+
+
+static int
+kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start,
+                                              vm_map_offset_t end,
+                                              void *context);
+static int
+kern_dump_pmap_traverse_send_seg_callback(vm_map_offset_t start,
+                                             vm_map_offset_t end,
+                                             void *context);
+static int
+kern_dump_pmap_traverse_send_segdata_callback(vm_map_offset_t start,
+                                                 vm_map_offset_t end,
+                                                 void *context);
+
+struct kdp_core_out_vars;
+typedef int (*kern_dump_output_proc)(unsigned int request, char *corename, 
+                                    uint64_t length, void *panic_data);
+
+struct kdp_core_out_vars
+{
+     kern_dump_output_proc outproc;
+     z_output_func        zoutput;
+     size_t                zipped;
+     uint64_t              totalbytes;
+     uint64_t              lastpercent;
+     IOReturn              error;
+     unsigned              outremain;
+     unsigned              outlen;
+     unsigned              writes;
+     Bytef *               outbuf;
+};
+
+struct kern_dump_preflight_context
+{
+    uint32_t region_count;
+    uint64_t dumpable_bytes;
+};
+
+struct kern_dump_send_context
+{
+    struct kdp_core_out_vars * outvars;
+    uint64_t hoffset;
+    uint64_t foffset;
+    uint64_t header_size;
+    uint64_t dumpable_bytes;
+    uint32_t region_count;
+};
+
+extern uint32_t kdp_crashdump_pkt_size;
+
+static vm_offset_t kdp_core_zmem;
+static size_t      kdp_core_zsize;
+static size_t      kdp_core_zoffset;
+static z_stream           kdp_core_zs;
+
+
+#define DEBG   kdb_printf
+
+boolean_t kdp_has_polled_corefile(void)
+{
+    return (NULL != gIOPolledCoreFileVars);
+}
+
+static IOReturn 
+kern_dump_disk_proc(unsigned int request, __unused char *corename, 
+                   uint64_t length, void * data)
+{
+    uint64_t        noffset;
+    uint32_t        err = kIOReturnSuccess;
+
+    switch (request) 
+    {
+        case KDP_WRQ:
+           err = IOPolledFileSeek(gIOPolledCoreFileVars, 0);
+           if (kIOReturnSuccess != err) break;
+           err = IOPolledFilePollersOpen(gIOPolledCoreFileVars, kIOPolledBeforeSleepState, false);
+           break;
+
+        case KDP_SEEK:
+           noffset = *((uint64_t *) data);
+           err = IOPolledFileWrite(gIOPolledCoreFileVars, 0, 0, NULL);
+           if (kIOReturnSuccess != err) break;
+           err = IOPolledFileSeek(gIOPolledCoreFileVars, noffset);
+           break;
+
+        case KDP_DATA:
+           err = IOPolledFileWrite(gIOPolledCoreFileVars, data, length, NULL);
+           if (kIOReturnSuccess != err) break;
+           break;
+
+        case KDP_EOF:
+           err = IOPolledFileWrite(gIOPolledCoreFileVars, 0, 0, NULL);
+           if (kIOReturnSuccess != err) break;
+           err = IOPolledFilePollersClose(gIOPolledCoreFileVars, kIOPolledBeforeSleepState);
+           if (kIOReturnSuccess != err) break;
+           break;
+    }
+
+    return (err);
+}
+
+static int
+kdp_core_zoutput(z_streamp strm, Bytef *buf, unsigned len)
+{
+    struct kdp_core_out_vars * vars = (typeof(vars)) strm->opaque;
+    IOReturn                   ret;
+
+    vars->zipped += len;
+
+    if (vars->error >= 0)
+    {
+       if ((ret = (*vars->outproc)(KDP_DATA, NULL, len, buf)) != kIOReturnSuccess)
+       { 
+           DEBG("KDP_DATA(0x%x)\n", ret);
+           vars->error = ret;
+       }
+       if (!buf && !len) DEBG("100..");
+    }
+    return (len);
+}
+
+static int
+kdp_core_zoutputbuf(z_streamp strm, Bytef *inbuf, unsigned inlen)
+{
+    struct kdp_core_out_vars * vars = (typeof(vars)) strm->opaque;
+    unsigned remain;
+    IOReturn ret;
+    unsigned chunk;
+    boolean_t flush;
+
+    remain = inlen;
+    vars->zipped += inlen;
+    flush = (!inbuf && !inlen);
+
+    while ((vars->error >= 0) && (remain || flush))
+    {
+       chunk = vars->outremain;
+       if (chunk > remain) chunk = remain;
+       bcopy(inbuf, &vars->outbuf[vars->outlen - vars->outremain], chunk);
+       vars->outremain -= chunk;
+       remain          -= chunk;
+       inbuf           += chunk;
+       
+       if (vars->outremain && !flush) break;
+       if ((ret = (*vars->outproc)(KDP_DATA, NULL, 
+                                       vars->outlen - vars->outremain, 
+                                       vars->outbuf)) != kIOReturnSuccess)
+       { 
+           DEBG("KDP_DATA(0x%x)\n", ret);
+           vars->error = ret;
+       }
+       if (flush)
+       {
+           DEBG("100..");
+           flush = false;
+       }
+       vars->outremain = vars->outlen;
+    }
+    return (inlen);
+}
+
+static int
+kdp_core_zinput(z_streamp strm, Bytef *buf, unsigned size)
+{
+    struct kdp_core_out_vars * vars = (typeof(vars)) strm->opaque;
+    uint64_t                   percent;
+    unsigned                   len;
+
+    len = strm->avail_in;
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    if (strm->next_in != (Bytef *) strm) memcpy(buf, strm->next_in, len);
+    else                                bzero(buf, len);
+    strm->adler = z_crc32(strm->adler, buf, len);
+
+    strm->avail_in -= len;
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    if (0 == (511 & vars->writes++))
+    {
+       percent = (strm->total_in * 100) / vars->totalbytes;
+       if ((percent - vars->lastpercent) >= 10)
+       {
+           vars->lastpercent = percent;
+           DEBG("%lld..", percent);
+       }
+    }
+
+    return (int)len;
+}
+
+static IOReturn
+kdp_core_stream_output(struct kdp_core_out_vars * vars, uint64_t length, void * data)
+{
+    z_stream * zs;
+    int        zr;
+    boolean_t  flush;
+
+    flush = (!length && !data);
+    zr = Z_OK;
+
+    zs = &kdp_core_zs;
+    assert(!zs->avail_in);
+
+    while (vars->error >= 0)
+    {
+       if (!zs->avail_in && !flush)
+       {
+           if (!length) break;
+           zs->next_in = data ? data : (Bytef *) zs /* zero marker */;
+           zs->avail_in = (uInt)length;
+           length = 0;
+       }
+       if (!zs->avail_out)
+       {
+           zs->next_out  = (Bytef *) zs;
+           zs->avail_out = UINT32_MAX;
+       }
+       zr = deflate(zs, flush ? Z_FINISH : Z_NO_FLUSH);
+       if (Z_STREAM_END == zr) break;
+       if (zr != Z_OK) 
+       {
+           DEBG("ZERR %d\n", zr);
+           vars->error = zr;
+       }
+    }
+
+    if (flush) (*vars->zoutput)(zs, NULL, 0);
+
+    return (vars->error);
+}
+
+extern vm_offset_t c_buffers;
+extern vm_size_t   c_buffers_size;
+
+ppnum_t
+kernel_pmap_present_mapping(uint64_t vaddr, uint64_t * pvincr)
+{
+    ppnum_t ppn;
+    uint64_t vincr;
+    vincr = PAGE_SIZE_64;
+
+    assert(!(vaddr & PAGE_MASK_64));
+
+    /* VA ranges to exclude */
+    if (vaddr == c_buffers)
+    {
+       /* compressor data */
+       ppn = 0;
+       vincr = c_buffers_size;
+    }
+    else if (vaddr == kdp_core_zmem)
+    {
+       /* zlib working memory */
+       ppn = 0;
+       vincr = kdp_core_zsize;
+    }
+    else
+    ppn = pmap_find_phys(kernel_pmap, vaddr);
+
+    *pvincr = vincr;
+    return (ppn);
+}
+
+int
+pmap_traverse_present_mappings(pmap_t __unused pmap,
+                                  vm_map_offset_t start,
+                                  vm_map_offset_t end,
+                                  pmap_traverse_callback callback,
+                                  void *context)
+{
+    IOReturn        ret;
+    vm_map_offset_t vcurstart, vcur;
+    uint64_t        vincr;
+    vm_map_offset_t debug_start;
+    vm_map_offset_t debug_end;
+    boolean_t       lastvavalid;
+
+    debug_start = trunc_page((vm_map_offset_t) debug_buf_addr);
+    debug_end   = round_page((vm_map_offset_t) (debug_buf_addr + debug_buf_size));
+
+#if defined(__i386__) || defined(__x86_64__)
+    assert(!is_ept_pmap(pmap));
+#endif
+
+    /* Assumes pmap is locked, or being called from the kernel debugger */
+    
+    if (start > end) return (KERN_INVALID_ARGUMENT);
+
+    ret = KERN_SUCCESS;
+    lastvavalid = FALSE;
+    for (vcur = vcurstart = start; (ret == KERN_SUCCESS) && (vcur < end); ) {
+       ppnum_t ppn;
+
+       ppn = kernel_pmap_present_mapping(vcur, &vincr);
+       if (ppn != 0)
+       {
+           if (((vcur < debug_start) || (vcur >= debug_end))
+               && !pmap_valid_page(ppn))
+           {
+               /* not something we want */
+               ppn = 0;
+           }
+       }
+
+       if (ppn != 0) {
+           if (!lastvavalid) {
+               /* Start of a new virtual region */
+               vcurstart = vcur;
+               lastvavalid = TRUE;
+           }
+       } else {
+           if (lastvavalid) {
+               /* end of a virtual region */
+               ret = callback(vcurstart, vcur, context);
+               lastvavalid = FALSE;
+           }
+
+#if defined(__i386__) || defined(__x86_64__)
+           /* Try to skip by 2MB if possible */
+           if (((vcur & PDMASK) == 0) && cpu_64bit) {
+               pd_entry_t *pde;
+               pde = pmap_pde(pmap, vcur);
+               if (0 == pde || ((*pde & INTEL_PTE_VALID) == 0)) {
+                   /* Make sure we wouldn't overflow */
+                   if (vcur < (end - NBPD)) {
+                       vincr = NBPD;
+                   }
+               }
+           }
+#endif /* defined(__i386__) || defined(__x86_64__) */
+       }
+       vcur += vincr;
+    }
+    
+    if ((ret == KERN_SUCCESS) && lastvavalid) {
+       /* send previous run */
+       ret = callback(vcurstart, vcur, context);
+    }
+    return (ret);
+}
+
+int
+kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start,
+                                          vm_map_offset_t end,
+                                          void *context)
+{
+    struct kern_dump_preflight_context *kdc = (struct kern_dump_preflight_context *)context;
+    IOReturn ret = KERN_SUCCESS;
+
+    kdc->region_count++;
+    kdc->dumpable_bytes += (end - start);
+
+    return (ret);
+}
+
+int
+kern_dump_pmap_traverse_send_seg_callback(vm_map_offset_t start,
+                                         vm_map_offset_t end,
+                                         void *context)
+{
+    struct kern_dump_send_context *kdc = (struct kern_dump_send_context *)context;
+    IOReturn ret = KERN_SUCCESS;
+    kernel_segment_command_t sc;
+    vm_size_t size = (vm_size_t)(end - start);
+
+    if (kdc->hoffset + sizeof(sc) > kdc->header_size) {
+       return (KERN_NO_SPACE);
+    }
+
+    kdc->region_count++;
+    kdc->dumpable_bytes += (end - start);
+
+    /*
+     * Fill in segment command structure.
+     */
+
+    sc.cmd = LC_SEGMENT_KERNEL;
+    sc.cmdsize = sizeof(kernel_segment_command_t);
+    sc.segname[0] = 0;
+    sc.vmaddr = (vm_address_t)start;
+    sc.vmsize = size;
+    sc.fileoff = (vm_address_t)kdc->foffset;
+    sc.filesize = size;
+    sc.maxprot = VM_PROT_READ;
+    sc.initprot = VM_PROT_READ;
+    sc.nsects = 0;
+    sc.flags = 0;
+
+    if ((ret = kdp_core_stream_output(kdc->outvars, sizeof(kernel_segment_command_t), (caddr_t) &sc)) != kIOReturnSuccess) {
+       DEBG("kdp_core_stream_output(0x%x)\n", ret);
+       goto out;
+    }
+    
+    kdc->hoffset += sizeof(kernel_segment_command_t);
+    kdc->foffset += size;
+
+out:
+    return (ret);
+}
+
+
+int
+kern_dump_pmap_traverse_send_segdata_callback(vm_map_offset_t start,
+                                             vm_map_offset_t end,
+                                             void *context)
+{
+    struct kern_dump_send_context *kdc = (struct kern_dump_send_context *)context;
+    int ret = KERN_SUCCESS;
+    vm_size_t size = (vm_size_t)(end - start);
+
+    kdc->region_count++;
+    kdc->dumpable_bytes += size;
+    if ((ret = kdp_core_stream_output(kdc->outvars, (unsigned int)size, (caddr_t)(uintptr_t)start)) != kIOReturnSuccess)       {
+       DEBG("kdp_core_stream_output(0x%x)\n", ret);
+       goto out;
+    }
+    kdc->foffset += size;
+
+out:
+    return (ret);
+}
+
+static int
+do_kern_dump(kern_dump_output_proc outproc, bool local)
+{
+    struct kern_dump_preflight_context kdc_preflight;
+    struct kern_dump_send_context      kdc_sendseg;
+    struct kern_dump_send_context      kdc_send;
+    struct kdp_core_out_vars           outvars;
+    struct mach_core_fileheader         hdr;
+    kernel_mach_header_t mh;
+    uint32_t            segment_count, tstate_count;
+    size_t              command_size = 0, header_size = 0, tstate_size = 0;
+    uint64_t            hoffset, foffset;
+    int                  ret;
+    char *               log_start;
+    uint64_t             log_length;
+    uint64_t             new_logs;
+    boolean_t            opened;
+
+    opened     = false;
+    log_start  = debug_buf_ptr;
+    log_length = 0;
+    if (log_start >= debug_buf_addr)
+    {
+       log_length = log_start - debug_buf_addr;
+       if (log_length <= debug_buf_size) log_length = debug_buf_size - log_length;
+       else log_length = 0;
+    }
+
+    if (local)
+    {
+       if ((ret = (*outproc)(KDP_WRQ, NULL, 0, &hoffset)) != kIOReturnSuccess) {
+           DEBG("KDP_WRQ(0x%x)\n", ret);
+           goto out;
+       }
+    }
+    opened = true;
+
+    // init gzip
+    bzero(&outvars, sizeof(outvars));
+    bzero(&hdr, sizeof(hdr));
+    outvars.outproc = outproc;
+    kdp_core_zs.avail_in  = 0;
+    kdp_core_zs.next_in   = NULL;
+    kdp_core_zs.avail_out = 0;
+    kdp_core_zs.next_out  = NULL;
+    kdp_core_zs.opaque    = &outvars;
+    kdc_sendseg.outvars   = &outvars;
+    kdc_send.outvars      = &outvars;
+
+    if (local)
+    {
+       outvars.outbuf      = NULL;
+        outvars.outlen      = 0;
+        outvars.outremain   = 0;
+       outvars.zoutput     = kdp_core_zoutput;
+       // space for file header & log
+       foffset = (4096 + log_length + 4095) & ~4095ULL;
+       hdr.log_offset = 4096;
+       hdr.gzip_offset = foffset;
+       if ((ret = (*outproc)(KDP_SEEK, NULL, sizeof(foffset), &foffset)) != kIOReturnSuccess) { 
+               DEBG("KDP_SEEK(0x%x)\n", ret);
+               goto out;
+       } 
+    }
+    else
+    {
+       outvars.outbuf    = (Bytef *) (kdp_core_zmem + kdp_core_zoffset);
+       assert((kdp_core_zoffset + kdp_crashdump_pkt_size) <= kdp_core_zsize);
+        outvars.outlen    = kdp_crashdump_pkt_size;
+        outvars.outremain = outvars.outlen;
+       outvars.zoutput  = kdp_core_zoutputbuf;
+    }
+
+    deflateResetWithIO(&kdp_core_zs, kdp_core_zinput, outvars.zoutput);
+
+
+    kdc_preflight.region_count = 0;
+    kdc_preflight.dumpable_bytes = 0;
+
+    ret = pmap_traverse_present_mappings(kernel_pmap,
+                                        VM_MIN_KERNEL_AND_KEXT_ADDRESS,
+                                        VM_MAX_KERNEL_ADDRESS,
+                                        kern_dump_pmap_traverse_preflight_callback,
+                                        &kdc_preflight);
+    if (ret)
+    {
+       DEBG("pmap traversal failed: %d\n", ret);
+       return (ret);
+    }
+
+    outvars.totalbytes = kdc_preflight.dumpable_bytes;
+    assert(outvars.totalbytes);
+    segment_count = kdc_preflight.region_count;
+
+    kern_collectth_state_size(&tstate_count, &tstate_size);
+
+    command_size = segment_count * sizeof(kernel_segment_command_t) + tstate_count * tstate_size;
+
+    header_size = command_size + sizeof(kernel_mach_header_t);
+
+    /*
+     * Set up Mach-O header for currently executing kernel.
+     */
+
+    mh.magic = _mh_execute_header.magic;
+    mh.cputype = _mh_execute_header.cputype;;
+    mh.cpusubtype = _mh_execute_header.cpusubtype;
+    mh.filetype = MH_CORE;
+    mh.ncmds = segment_count + tstate_count;
+    mh.sizeofcmds = (uint32_t)command_size;
+    mh.flags = 0;
+#if defined(__LP64__)
+    mh.reserved = 0;
+#endif
+
+    hoffset = 0;                                       /* offset into header */
+    foffset = (uint64_t) round_page(header_size);      /* offset into file */
+
+    /* Transmit the Mach-O MH_CORE header, and segment and thread commands 
+     */
+    if ((ret = kdp_core_stream_output(&outvars, sizeof(kernel_mach_header_t), (caddr_t) &mh) != kIOReturnSuccess))
+    {
+       DEBG("KDP_DATA(0x%x)\n", ret);
+       goto out;
+    }
+
+    hoffset += sizeof(kernel_mach_header_t);
+
+    DEBG("%s", local ? "Writing local kernel core..." :
+                      "Transmitting kernel state, please wait:\n");
+
+    kdc_sendseg.region_count   = 0;
+    kdc_sendseg.dumpable_bytes = 0;
+    kdc_sendseg.hoffset = hoffset;
+    kdc_sendseg.foffset = foffset;
+    kdc_sendseg.header_size = header_size;
+
+    if ((ret = pmap_traverse_present_mappings(kernel_pmap,
+                                        VM_MIN_KERNEL_AND_KEXT_ADDRESS,
+                                        VM_MAX_KERNEL_ADDRESS,
+                                        kern_dump_pmap_traverse_send_seg_callback,
+                                        &kdc_sendseg)) != kIOReturnSuccess)
+    {
+       DEBG("pmap_traverse_present_mappings(0x%x)\n", ret);
+       goto out;
+    }
+
+    hoffset = kdc_sendseg.hoffset;
+    /*
+     * Now send out the LC_THREAD load command, with the thread information
+     * for the current activation.
+     */
+
+    if (tstate_size > 0)
+    {
+       void * iter;
+       char tstate[tstate_size];
+       iter = NULL;
+       do {
+           /*
+            * Now send out the LC_THREAD load command, with the thread information
+            */
+           kern_collectth_state (current_thread(), tstate, tstate_size, &iter);
+
+           if ((ret = kdp_core_stream_output(&outvars, tstate_size, tstate)) != kIOReturnSuccess) {
+                   DEBG("kdp_core_stream_output(0x%x)\n", ret);
+                   goto out;
+           }
+       }
+       while (iter);
+    }
+
+    kdc_send.region_count   = 0;
+    kdc_send.dumpable_bytes = 0;
+    foffset = (uint64_t) round_page(header_size);      /* offset into file */
+    kdc_send.foffset = foffset;
+    kdc_send.hoffset = 0;
+    foffset = round_page_64(header_size) - header_size;
+    if (foffset)
+    {
+       // zero fill to page align
+       if ((ret = kdp_core_stream_output(&outvars, foffset, NULL)) != kIOReturnSuccess) {
+               DEBG("kdp_core_stream_output(0x%x)\n", ret);
+               goto out;
+       }
+    }
+
+    ret = pmap_traverse_present_mappings(kernel_pmap,
+                                        VM_MIN_KERNEL_AND_KEXT_ADDRESS,
+                                        VM_MAX_KERNEL_ADDRESS,
+                                        kern_dump_pmap_traverse_send_segdata_callback,
+                                        &kdc_send);
+    if (ret) {
+       DEBG("pmap_traverse_present_mappings(0x%x)\n", ret);
+       goto out;
+    }
+
+    if ((ret = kdp_core_stream_output(&outvars, 0, NULL) != kIOReturnSuccess)) {
+       DEBG("kdp_core_stream_output(0x%x)\n", ret);
+       goto out;
+    }
+
+out:
+    if (kIOReturnSuccess == ret) DEBG("success\n");
+    else                         outvars.zipped = 0;
+
+    DEBG("Mach-o header: %lu\n", header_size);
+    DEBG("Region counts: [%u, %u, %u]\n", kdc_preflight.region_count,
+                                         kdc_sendseg.region_count, 
+                                         kdc_send.region_count);
+    DEBG("Byte counts  : [%llu, %llu, %llu, %lu, %llu]\n", kdc_preflight.dumpable_bytes, 
+                                                          kdc_sendseg.dumpable_bytes, 
+                                                          kdc_send.dumpable_bytes, 
+                                                          outvars.zipped, log_length);
+    if (local && opened)
+    {
+       // write debug log
+       foffset = 4096;
+       if ((ret = (*outproc)(KDP_SEEK, NULL, sizeof(foffset), &foffset)) != kIOReturnSuccess) { 
+           DEBG("KDP_SEEK(0x%x)\n", ret);
+           goto exit;
+       } 
+
+       new_logs = debug_buf_ptr - log_start;
+       if (new_logs > log_length) new_logs = log_length;
+       
+       if ((ret = (*outproc)(KDP_DATA, NULL, new_logs, log_start)) != kIOReturnSuccess)
+       { 
+           DEBG("KDP_DATA(0x%x)\n", ret);
+           goto exit;
+       } 
+
+       // write header
+
+       foffset = 0;
+       if ((ret = (*outproc)(KDP_SEEK, NULL, sizeof(foffset), &foffset)) != kIOReturnSuccess) { 
+           DEBG("KDP_SEEK(0x%x)\n", ret);
+           goto exit;
+       } 
+
+       hdr.signature  = MACH_CORE_FILEHEADER_SIGNATURE;
+       hdr.log_length = new_logs;
+        hdr.gzip_length = outvars.zipped;
+
+       if ((ret = (*outproc)(KDP_DATA, NULL, sizeof(hdr), &hdr)) != kIOReturnSuccess)
+       { 
+           DEBG("KDP_DATA(0x%x)\n", ret);
+           goto exit;
+       }
+    }
+
+exit:
+    /* close / last packet */
+    if ((ret = (*outproc)(KDP_EOF, NULL, 0, ((void *) 0))) != kIOReturnSuccess)
+    {
+       DEBG("KDP_EOF(0x%x)\n", ret);
+    }  
+
+
+    return (ret);
+}
+
+int
+kern_dump(boolean_t local)
+{
+    static boolean_t dumped_local;
+    if (local) {
+       if (dumped_local) return (0);
+       dumped_local = TRUE;
+       return (do_kern_dump(&kern_dump_disk_proc, true));
+    }
+#if CONFIG_KDP_INTERACTIVE_DEBUGGING
+    return (do_kern_dump(&kdp_send_crashdump_data, false));
+#else
+    return (-1);
+#endif
+}
+
+static void *
+kdp_core_zalloc(void * __unused ref, u_int items, u_int size)
+{
+    void * result;
+
+    result = (void *) (kdp_core_zmem + kdp_core_zoffset);
+    kdp_core_zoffset += ~31L & (31 + (items * size));    // 32b align for vector crc
+    assert(kdp_core_zoffset <= kdp_core_zsize);
+
+    return (result);
+}
+
+static void
+kdp_core_zfree(void * __unused ref, void * __unused ptr) {}
+
+
+#define LEVEL Z_BEST_SPEED
+#define NETBUF 1440
+
+void
+kdp_core_init(void)
+{
+    int wbits = 12;
+    int memlevel = 3;
+    kern_return_t kr;
+
+    if (kdp_core_zs.zalloc) return;
+    kdp_core_zsize = round_page(NETBUF + zlib_deflate_memory_size(wbits, memlevel));
+    printf("kdp_core zlib memory 0x%lx\n", kdp_core_zsize);
+    kr = kmem_alloc(kernel_map, &kdp_core_zmem, kdp_core_zsize, VM_KERN_MEMORY_DIAG);
+    assert (KERN_SUCCESS == kr);
+
+    kdp_core_zoffset = 0;
+    kdp_core_zs.zalloc = kdp_core_zalloc;
+    kdp_core_zs.zfree  = kdp_core_zfree;
+
+    if (deflateInit2(&kdp_core_zs, LEVEL, Z_DEFLATED,
+                    wbits + 16 /*gzip mode*/, memlevel, Z_DEFAULT_STRATEGY))
+    {
+       /* Allocation failed */
+       bzero(&kdp_core_zs, sizeof(kdp_core_zs));
+       kdp_core_zoffset = 0;
+    }
+}
+
+#endif /* CONFIG_KDP_INTERACTIVE_DEBUGGING */
index d99f2bbcd1ed670fbee2f548681ba62797ae698e..6192d2ded406403ef412fcff7c30bcd4f6cff75b 100644 (file)
@@ -33,6 +33,8 @@
 /* Various protocol definitions 
  * for the core transfer protocol, which is a variant of TFTP 
  */
+#ifndef __KDP_CORE_H
+#define __KDP_CORE_H
 
 /*
  * Packet types.
@@ -85,6 +87,7 @@ void kdp_set_dump_info(const uint32_t flags, const char *file, const char *desti
 void kdp_get_dump_info(uint32_t *flags, char *file, char *destip, char *routerip, 
                        uint32_t *port);
 
+extern int kern_dump(boolean_t local);
 
 struct corehdr *create_panic_header(unsigned int request, const char *corename, unsigned length, unsigned block);
 
@@ -92,6 +95,16 @@ int  kdp_send_crashdump_pkt(unsigned int request, char *corename,
                                uint64_t length, void *panic_data);
 
 int    kdp_send_crashdump_data(unsigned int request, char *corename,
-                               int64_t length, caddr_t txstart);
+                           uint64_t length, void * txstart);
+
+void kern_collectth_state_size(uint32_t * tstate_count, size_t * tstate_size);
+
+void kern_collectth_state(thread_t thread, void *buffer, size_t size, void **iter);
+
+boolean_t kdp_has_polled_corefile(void);
+
+void kdp_core_init(void);
 
 #define KDP_CRASHDUMP_POLL_COUNT (2500)
+
+#endif /* __KDP_CORE_H */
index 9b2bc17ec058ad8d3e4efe5451e7764429da004e..c93876ac824439a0f491a4111409d23038f25567 100644 (file)
@@ -47,6 +47,7 @@
 #include <kdp/kdp_en_debugger.h>
 #include <kdp/kdp_callout.h>
 #include <kdp/kdp_udp.h>
+#include <kdp/kdp_core.h>
 #if CONFIG_SERIAL_KDP
 #include <kdp/kdp_serial.h>
 #endif
@@ -310,7 +311,6 @@ extern unsigned int disableConsoleOutput;
 
 extern void            kdp_call(void);
 extern boolean_t       kdp_call_kdb(void);
-extern int             kern_dump(void);
 
 void * kdp_get_interface(void);
 void    kdp_set_gateway_mac(void *gatewaymac);
@@ -448,6 +448,7 @@ kdp_register_send_receive(
                corename_specified = TRUE;
 
        kdp_flag |= KDP_READY;
+
        if (current_debugger == NO_CUR_DB)
                current_debugger = KDP_CUR_DB;
        if ((kdp_current_ip_address != 0) && halt_in_debugger) {
@@ -1337,7 +1338,7 @@ kdp_debugger_loop(
     if (pkt.input)
        kdp_panic("kdp_raise_exception");
 
-    if (((kdp_flag & KDP_PANIC_DUMP_ENABLED) || (kdp_flag & PANIC_LOG_DUMP))
+    if (((kdp_flag & KDP_PANIC_DUMP_ENABLED) || (kdp_flag & PANIC_LOG_DUMP) || kdp_has_polled_corefile())
        && (panicstr != (char *) 0)) {
            kdp_panic_dump();
            if (kdp_flag & REBOOT_POST_CORE)
@@ -1537,15 +1538,15 @@ static int kdp_send_crashdump_seek(char *corename, uint64_t seek_off)
                return panic_error;
        }
 
-       return 0;
+       return KERN_SUCCESS;
 }
 
 int kdp_send_crashdump_data(unsigned int request, char *corename,
-    int64_t length, caddr_t txstart)
+                           uint64_t length, void * txstart)
 {
        int panic_error = 0;
 
-       while (length > 0) {
+       while ((length > 0) || !txstart) {
                uint64_t chunk = MIN(kdp_crashdump_pkt_size, length);
 
                panic_error = kdp_send_crashdump_pkt(request, corename, chunk,
@@ -1554,11 +1555,11 @@ int kdp_send_crashdump_data(unsigned int request, char *corename,
                        printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
                        return panic_error;
                }
-
-               txstart += chunk;
+               if (!txstart) break;
+               txstart = (void *)(((uintptr_t) txstart) + chunk);
                length  -= chunk;
        }
-       return 0;
+       return KERN_SUCCESS;
 }
 
 uint32_t kdp_crashdump_short_pkt;
@@ -1708,7 +1709,7 @@ RECEIVE_RETRY:
                printf("Minimum superblock transfer abstime: 0x%llx\n", kdp_min_superblock_dump_time);
                printf("Maximum superblock transfer abstime: 0x%llx\n", kdp_max_superblock_dump_time);
        }
-       return 1;
+       return KERN_SUCCESS;
 }
 
 static int 
@@ -1874,6 +1875,7 @@ void
 kdp_panic_dump(void)
 {
        char coreprefix[10];
+       char coresuffix[4];
        int panic_error;
 
        uint64_t        abstime;
@@ -1886,13 +1888,26 @@ kdp_panic_dump(void)
                
        printf("Entering system dump routine\n");
 
+       /* try a local disk dump */
+       if (kdp_has_polled_corefile()) {
+           flag_panic_dump_in_progress = TRUE;
+           kern_dump(TRUE);
+           abort_panic_transfer();
+       }
+
+       if (!strcmp("local", panicd_ip_str)) return;    /* disk only request */
+
        if (!kdp_en_recv_pkt || !kdp_en_send_pkt) {
-                       kdb_printf("Error: No transport device registered for kernel crashdump\n");
-                       return;
+               if (!kdp_has_polled_corefile()) {
+                   kdb_printf("Error: No transport device registered for kernel crashdump\n");
+               }
+               return;
        }
 
        if (!panicd_specified) {
-               kdb_printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n");
+               if (!kdp_has_polled_corefile()) {
+                   kdb_printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n");
+                }
                goto panic_dump_exit;
        }
 
@@ -1904,23 +1919,27 @@ kdp_panic_dump(void)
        kdp_get_xnu_version((char *) &pkt.data[0]);
 
         if (!corename_specified) {
+            coresuffix[0] = 0;
             /* Panic log bit takes precedence over core dump bit */
             if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP))
                strlcpy(coreprefix, "paniclog", sizeof(coreprefix));
             else if (kdp_flag & SYSTEM_LOG_DUMP) 
                strlcpy(coreprefix, "systemlog", sizeof(coreprefix));
-           else
+           else {
                strlcpy(coreprefix, "core", sizeof(coreprefix));
+               strlcpy(coresuffix, ".gz", sizeof(coresuffix));
+           }
   
             abstime = mach_absolute_time();
            pkt.data[20] = '\0';
-           snprintf (corename_str, sizeof(corename_str), "%s-%s-%d.%d.%d.%d-%x", 
+           snprintf (corename_str, sizeof(corename_str), "%s-%s-%d.%d.%d.%d-%x%s", 
                      coreprefix, &pkt.data[0],
                      (current_ip & 0xff000000) >> 24,
                      (current_ip & 0xff0000) >> 16,
                      (current_ip & 0xff00) >> 8,
                      (current_ip & 0xff),
-                     (unsigned int) (abstime & 0xffffffff));
+                     (unsigned int) (abstime & 0xffffffff),
+                     coresuffix);
         }
 
        if (0 == inet_aton(panicd_ip_str, (struct kdp_in_addr *) &panic_server_ip)) {
@@ -2005,7 +2024,7 @@ kdp_panic_dump(void)
         }
 
        /* We want a core dump if we're here */
-       kern_dump();
+       kern_dump(FALSE);
 
 panic_dump_exit:
        abort_panic_transfer();
@@ -2133,6 +2152,7 @@ kdp_init(void)
 
        kdp_timer_callout_init();
        kdp_crashdump_feature_mask = htonl(kdp_crashdump_feature_mask);
+       kdp_core_init();
 
 #if CONFIG_SERIAL_KDP
        char kdpname[80];
@@ -2200,13 +2220,30 @@ kdp_raise_exception(
 
 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
 
+    disable_preemption();
+    /*
+     * On ARM64, KDP debugging is disabled by default.
+     * It is compiled into the kernel for DEVELOPMENT and DEBUG,
+     * but still hidden behind a boot arg (thus PE_i_can_has_kdp()).
+     * For RELEASE, it is not compiled.
+     */
+    if (
+       (current_debugger != KDP_CUR_DB)
+    )
+    {
+           /* try a local disk dump */
+           if (kdp_has_polled_corefile()) {
+               flag_panic_dump_in_progress = TRUE;
+               kern_dump(TRUE);
+               abort_panic_transfer();
+           }
+    }
+
     if (current_debugger != KDP_CUR_DB) {
         kdb_printf("\nDebugger not configured. Hanging.\n");
         for (;;) { }
     }
 
-    disable_preemption();
-
     kdp_debugger_loop(exception, code, subcode, saved_state);
     not_in_kdp = initial_not_in_kdp;
     enable_preemption();
index 6d458128753ef7aac168ff4faaefe699635ea27c..3ce3b191dad6b74ca6f6e0e0a312f1fb0465994b 100644 (file)
@@ -68,13 +68,6 @@ boolean_t kdp_trans_off;
 
 addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
 
-int kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start,
-                                                                                          vm_map_offset_t end,
-                                                                                          void *context);
-int kern_dump_pmap_traverse_send_callback(vm_map_offset_t start,
-                                                                                 vm_map_offset_t end,
-                                                                                 void *context);
-
 pmap_t kdp_pmap = 0;
 
 addr64_t
@@ -403,290 +396,6 @@ kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, caddr_t data, uint16_t lcpu)
        return KDPERR_NO_ERROR;
 }
 
-int
-pmap_traverse_present_mappings(pmap_t pmap,
-                                                          vm_map_offset_t start,
-                                                          vm_map_offset_t end,
-                                                          pmap_traverse_callback callback,
-                                                          void *context)
-{
-       int ret = KERN_SUCCESS;
-       vm_map_offset_t vcurstart, vcur;
-       boolean_t lastvavalid = FALSE;
-
-       /* Assumes pmap is locked, or being called from the kernel debugger */
-       
-       if (start > end) {
-               return (KERN_INVALID_ARGUMENT);
-       }
-
-       if (start & PAGE_MASK_64) {
-               return (KERN_INVALID_ARGUMENT);
-       }
-
-       for (vcur = vcurstart = start; (ret == KERN_SUCCESS) && (vcur < end); ) {
-               ppnum_t ppn = pmap_find_phys(pmap, vcur);
-
-               if (ppn != 0 && !pmap_valid_page(ppn)) {
-                       /* not something we want */
-                       ppn = 0;
-               }
-
-               if (ppn != 0) {
-                       if (!lastvavalid) {
-                               /* Start of a new virtual region */
-                               vcurstart = vcur;
-                               lastvavalid = TRUE;
-                       }
-               } else {
-                       if (lastvavalid) {
-                               /* end of a virtual region */
-                               
-                               ret = callback(vcurstart, vcur, context);
-
-                               lastvavalid = FALSE;
-                       }
-
-                       /* Try to skip by 2MB if possible */
-                       if (((vcur & PDMASK) == 0) && cpu_64bit) {
-                               pd_entry_t *pde;
-
-                               pde = pmap_pde(pmap, vcur);
-                               if (0 == pde || ((*pde & INTEL_PTE_VALID) == 0)) {
-                                       /* Make sure we wouldn't overflow */
-                                       if (vcur < (end - NBPD)) {
-                                               vcur += NBPD;
-                                               continue;
-                                       }
-                               }
-                       }
-               }
-               
-               vcur += PAGE_SIZE_64;
-       }
-       
-       if ((ret == KERN_SUCCESS)
-               && lastvavalid) {
-               /* send previous run */
-
-               ret = callback(vcurstart, vcur, context);
-       }
-       return (ret);
-}
-
-struct kern_dump_preflight_context {
-       uint32_t        region_count;
-       uint64_t        dumpable_bytes;
-};
-
-struct kern_dump_send_context {
-       uint64_t        hoffset;
-       uint64_t        foffset;
-       uint64_t        header_size;
-};
-
-int
-kern_dump_pmap_traverse_preflight_callback(vm_map_offset_t start,
-                                                                                  vm_map_offset_t end,
-                                                                                  void *context)
-{
-       struct kern_dump_preflight_context *kdc = (struct kern_dump_preflight_context *)context;
-       int ret = KERN_SUCCESS;
-
-       kdc->region_count++;
-       kdc->dumpable_bytes += (end - start);
-
-       return (ret);
-}
-
-int
-kern_dump_pmap_traverse_send_callback(vm_map_offset_t start,
-                                                                         vm_map_offset_t end,
-                                                                         void *context)
-{
-       struct kern_dump_send_context *kdc = (struct kern_dump_send_context *)context;
-       int ret = KERN_SUCCESS;
-       kernel_segment_command_t sc;
-       vm_size_t size = (vm_size_t)(end - start);
-
-       if (kdc->hoffset + sizeof(sc) > kdc->header_size) {
-               return (KERN_NO_SPACE);
-       }
-
-       /*
-        *      Fill in segment command structure.
-        */
-    
-       sc.cmd = LC_SEGMENT_KERNEL;
-       sc.cmdsize = sizeof(kernel_segment_command_t);
-       sc.segname[0] = 0;
-       sc.vmaddr = (vm_address_t)start;
-       sc.vmsize = size;
-       sc.fileoff = (vm_address_t)kdc->foffset;
-       sc.filesize = size;
-       sc.maxprot = VM_PROT_READ;
-       sc.initprot = VM_PROT_READ;
-       sc.nsects = 0;
-       sc.flags = 0;
-
-       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(kdc->hoffset) , &kdc->hoffset)) < 0) { 
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
-               goto out;
-       } 
-    
-       if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(kernel_segment_command_t) , (caddr_t) &sc)) < 0) {
-               printf ("kdp_send_crashdump_data failed with error %d\n", ret);
-               goto out;
-       }
-       
-       kdc->hoffset += sizeof(kernel_segment_command_t);
-
-       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(kdc->foffset) , &kdc->foffset)) < 0) {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
-               goto out;
-       }
-               
-       if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, (unsigned int)size, (caddr_t)(uintptr_t)start)) < 0)        {
-               printf ("kdp_send_crashdump_data failed with error %d\n", ret);
-               goto out;
-       }
-       
-       kdc->foffset += size;
-
-out:
-       return (ret);
-}
-
-int
-kern_dump(void)
-{
-       int                     ret;
-       struct kern_dump_preflight_context kdc_preflight;
-       struct kern_dump_send_context kdc_send;
-       uint32_t        segment_count;
-       size_t          command_size = 0, header_size = 0, tstate_size = 0;
-       uint64_t        hoffset = 0, foffset = 0;
-       kernel_mach_header_t    mh;
-
-
-       kdc_preflight.region_count = 0;
-       kdc_preflight.dumpable_bytes = 0;
-
-       ret = pmap_traverse_present_mappings(kernel_pmap,
-                                                                                VM_MIN_KERNEL_AND_KEXT_ADDRESS,
-                                                                                VM_MAX_KERNEL_ADDRESS,
-                                                                                kern_dump_pmap_traverse_preflight_callback,
-                                                                                &kdc_preflight);
-       if (ret) {
-               printf("pmap traversal failed: %d\n", ret);
-               return (ret);
-       }
-
-       printf("Kernel dump region count: %u\n", kdc_preflight.region_count);
-       printf("Kernel dump byte count: %llu\n", kdc_preflight.dumpable_bytes);
-                       
-       segment_count = kdc_preflight.region_count;
-
-       tstate_size = sizeof(struct thread_command) + kern_collectth_state_size();
-
-       command_size = segment_count * sizeof(kernel_segment_command_t) +
-                               tstate_size;
-
-       header_size = command_size + sizeof(kernel_mach_header_t);
-
-       /*
-        *      Set up Mach-O header for currently executing kernel.
-        */
-       printf ("Generated Mach-O header size was %lu\n", header_size);
-
-       mh.magic = _mh_execute_header.magic;
-       mh.cputype = _mh_execute_header.cputype;;
-       mh.cpusubtype = _mh_execute_header.cpusubtype;
-       mh.filetype = MH_CORE;
-       mh.ncmds = segment_count + 1 /* thread */;
-       mh.sizeofcmds = (uint32_t)command_size;
-       mh.flags = 0;
-#if defined(__LP64__)
-       mh.reserved = 0;
-#endif
-
-       hoffset = 0;    /* offset into header */
-       foffset = (uint32_t)round_page(header_size);    /* offset into file */
-
-       /* Transmit the Mach-O MH_CORE header, and seek forward past the 
-        * area reserved for the segment and thread commands 
-        * to begin data transmission 
-        */
-       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
-               goto out;
-       } 
-       if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(kernel_mach_header_t), (caddr_t) &mh) < 0)) {
-               printf ("kdp_send_crashdump_data failed with error %d\n", ret);
-               goto out;
-       }
-
-       hoffset += sizeof(kernel_mach_header_t);
-
-       if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
-               goto out;
-       }
-
-       printf ("Transmitting kernel state, please wait: ");
-
-       kdc_send.hoffset = hoffset;
-       kdc_send.foffset = foffset;
-       kdc_send.header_size = header_size;
-
-       ret = pmap_traverse_present_mappings(kernel_pmap,
-                                                                                VM_MIN_KERNEL_AND_KEXT_ADDRESS,
-                                                                                VM_MAX_KERNEL_ADDRESS,
-                                                                                kern_dump_pmap_traverse_send_callback,
-                                                                                &kdc_send);
-       if (ret) {
-               kprintf("pmap traversal failed: %d\n", ret);
-               return (ret);
-       }
-
-       /* Reload mutated offsets */
-       hoffset = kdc_send.hoffset;
-       foffset = kdc_send.foffset;
-
-       /*
-        * Now send out the LC_THREAD load command, with the thread information
-        * for the current activation.
-        */
-       if (tstate_size > 0) {
-               char tstate[tstate_size];
-
-               kern_collectth_state (current_thread(), tstate, tstate_size);
-
-               if ((ret = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset), &hoffset)) < 0) { 
-                       printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
-                       goto out;
-               }
-               
-               if ((ret = kdp_send_crashdump_data (KDP_DATA, NULL, tstate_size, tstate)) < 0) {
-                       printf ("kdp_send_crashdump_data failed with error %d\n", ret);
-                       goto out;
-               }
-
-               hoffset += tstate_size;
-       }
-
-       /* last packet */
-       if ((ret = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0)
-       {
-               printf ("kdp_send_crashdump_pkt failed with error %d\n", ret);
-               goto out;
-       }       
-
-out:
-       return (ret);
-}
-
-
 pt_entry_t *debugger_ptep;
 vm_map_offset_t debugger_window_kva;
 
@@ -704,7 +413,7 @@ kdp_machine_init(void) {
        kern_return_t kr = vm_map_find_space(kernel_map,
            &debugger_window_kva,
            PAGE_SIZE, 0,
-           VM_MAKE_TAG(VM_MEMORY_IOKIT), &e);
+           VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK), &e);
 
        if (kr != KERN_SUCCESS) {
                panic("%s: vm_map_find_space failed with %d\n", __FUNCTION__, kr);
@@ -720,3 +429,5 @@ kdp_machine_init(void) {
        }
 }
 
+
+
index 8c1a7cee3192f50efe21267c5eef56aff61561ee..ec9a0bfeb9d30125c708c429efbb5d21d12b2a38 100644 (file)
 #include <mach/machine/vm_types.h>
 #include <i386/pmap.h>
 
-/*
- * Attempt to discover all virtually contiguous ranges in a pmap
- * that have valid mappings to DRAM (not MMIO device memory for example).
- * Results are returned via a callback. If the callback returns an error,
- * traversal is aborted.
- */
-typedef int (*pmap_traverse_callback)(vm_map_offset_t start,
-                                                                         vm_map_offset_t end,
-                                                                         void *context);
-
-extern int pmap_traverse_present_mappings(pmap_t pmap,
-                                                                                 vm_map_offset_t start,
-                                                                                 vm_map_offset_t end,
-                                                                                 pmap_traverse_callback callback,
-                                                                                 void *context);
-
-
-extern int kern_dump(void);
-extern size_t kern_collectth_state_size(void);
-extern void kern_collectth_state(thread_t thread, void *buffer, size_t size);
-
 #endif /* _KDP_X86_COMMON_H_ */
index 1f35a37bf53e5fb6930338576ac28b39305fbe80..91019f56a5aac398a113edd96ad2b85571f7e3fd 100644 (file)
@@ -57,7 +57,7 @@
 extern cpu_type_t cpuid_cputype(void);
 extern cpu_subtype_t cpuid_cpusubtype(void);
 
-extern vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr);
+extern vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr, vm_map_t map, uint32_t *thread_trace_flags);
 extern void machine_trace_thread_clear_validation_cache(void);
 
 void           print_saved_state(void *);
@@ -69,10 +69,10 @@ void                kdp_setstate(x86_thread_state64_t *);
 void           kdp_print_phys(int);
 
 int
-machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
+machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p, uint32_t *thread_trace_flags);
 
 int
-machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
+machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p, uint32_t *thread_trace_flags);
 
 unsigned
 machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len);
@@ -510,7 +510,7 @@ extern pmap_t kdp_pmap;
 #define RETURN_OFFSET 4
 
 int
-machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p)
+machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p, uint32_t *thread_trace_flags)
 {
        uint32_t *tracebuf = (uint32_t *)tracepos;
        uint32_t fence = 0;
@@ -535,6 +535,13 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra
        else
                panic("32-bit trace attempted on 64-bit kernel");
 
+    /* bounds check before we start advancing tracebuf */
+    if ((tracebound - ((char *)tracebuf)) < (4 * framesize)) {
+        machine_trace_thread_clear_validation_cache();
+        kdp_pmap = 0;
+        return 0;
+    }
+
        *tracebuf++ = init_eip;
 
        for (framecount = 0; framecount < nframes; framecount++) {
@@ -545,7 +552,7 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra
                }
 
                *tracebuf++ = stackptr;
-/* Invalid frame, or hit fence */
+               /* Invalid frame, or hit fence */
                if (!stackptr || (stackptr == fence)) {
                        break;
                }
@@ -563,9 +570,12 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra
                        break;
                }
 
-               kern_virt_addr = machine_trace_thread_get_kva(stackptr + RETURN_OFFSET);
+               kern_virt_addr = machine_trace_thread_get_kva(stackptr + RETURN_OFFSET, thread->task->map, thread_trace_flags);
 
                if (!kern_virt_addr) {
+                       if (thread_trace_flags) {
+                               *thread_trace_flags |= kThreadTruncatedBT;
+                       }
                        break;
                }
 
@@ -573,16 +583,21 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra
                tracebuf++;
                
                prevsp = stackptr;
-               kern_virt_addr = machine_trace_thread_get_kva(stackptr);
+               kern_virt_addr = machine_trace_thread_get_kva(stackptr, thread->task->map, thread_trace_flags);
 
                if (!kern_virt_addr) {
+                       if (thread_trace_flags) {
+                               *thread_trace_flags |= kThreadTruncatedBT;
+                       }
+
+                       /* We need to fill in a complete LR/FP record, even if we couldn't find a FP */
                        *tracebuf++ = 0;
                        break;
                }
 
                stackptr = *(uint32_t *)kern_virt_addr;
        }
-
+    
        machine_trace_thread_clear_validation_cache();
        kdp_pmap = 0;
 
@@ -599,7 +614,7 @@ machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len)
 }
 
 int
-machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p)
+machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p, uint32_t *thread_trace_flags)
 {
        uint64_t *tracebuf = (uint64_t *)tracepos;
        uint32_t fence = 0;
@@ -621,11 +636,17 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf
                stackptr = STACK_IKS(thread->kernel_stack)->k_rbp;
                init_rip = STACK_IKS(thread->kernel_stack)->k_rip;
                init_rip = VM_KERNEL_UNSLIDE(init_rip);
-               kdp_pmap = 0;
+        kdp_pmap = NULL;
        }
 
-       *tracebuf++ = init_rip;
-
+    /* bounds check before we start advancing tracebuf */
+    if ((uint32_t)(tracebound - ((char *)tracebuf)) < (4 * framesize)) {
+        machine_trace_thread_clear_validation_cache();
+        kdp_pmap = NULL;
+        return 0;
+    }
+    *tracebuf++ = init_rip;
+    
        for (framecount = 0; framecount < nframes; framecount++) {
 
                if ((uint32_t)(tracebound - ((char *)tracebuf)) < (4 * framesize)) {
@@ -647,9 +668,12 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf
                        break;
                }
 
-               kern_virt_addr = machine_trace_thread_get_kva(stackptr + RETURN_OFFSET64);
+               kern_virt_addr = machine_trace_thread_get_kva(stackptr + RETURN_OFFSET64, thread->task->map, thread_trace_flags);
 
                if (!kern_virt_addr) {
+                       if (thread_trace_flags) {
+                               *thread_trace_flags |= kThreadTruncatedBT;
+                       }
                        break;
                }
 
@@ -660,9 +684,14 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf
                tracebuf++;
 
                prevsp = stackptr;
-               kern_virt_addr = machine_trace_thread_get_kva(stackptr);
+               kern_virt_addr = machine_trace_thread_get_kva(stackptr, thread->task->map, thread_trace_flags);
 
                if (!kern_virt_addr) {
+                       if (thread_trace_flags) {
+                               *thread_trace_flags |= kThreadTruncatedBT;
+                       }
+
+                       /* We need to fill in a complete LR/FP record, even if we couldn't find a FP */
                        *tracebuf++ = 0;
                        break;
                }
index 2cd1c5cbaa3b11eb99962ea25b2460d219f5c637..89bc4778bb15adcaa6191b0a483fffa8e807f12e 100644 (file)
@@ -43,8 +43,8 @@ static const x86_state_hdr_t thread_flavor_array [] = {
        {x86_THREAD_STATE64, x86_THREAD_STATE64_COUNT}
 };
 
-size_t
-kern_collectth_state_size(void)
+void
+kern_collectth_state_size(uint32_t * tstate_count, size_t * ptstate_size)
 {
        unsigned int i;
        size_t tstate_size = 0;
@@ -53,16 +53,21 @@ kern_collectth_state_size(void)
                tstate_size += sizeof(x86_state_hdr_t) +
                    (thread_flavor_array[i].count * sizeof(int));
 
-       return tstate_size;
+       *tstate_count = 1;
+       *ptstate_size = sizeof(struct thread_command) + tstate_size;
 }
 
 void
-kern_collectth_state(thread_t thread, void *buffer, size_t size)
+kern_collectth_state(thread_t thread, void *buffer, size_t size, void ** iter)
 {
-       size_t                  hoffset;
+       size_t          hoffset;
+       size_t          tstate_size;
+        uint32_t        tstate_count;
        unsigned int    i;
        struct thread_command   *tc;
+       
 
+       *iter = NULL;
        /*
         *      Fill in thread command structure.
         */
@@ -71,9 +76,10 @@ kern_collectth_state(thread_t thread, void *buffer, size_t size)
        if (hoffset + sizeof(struct thread_command) > size)
                return;
 
+       kern_collectth_state_size(&tstate_count, &tstate_size);
        tc = (struct thread_command *) ((uintptr_t)buffer + hoffset);
        tc->cmd = LC_THREAD;
-       tc->cmdsize = (uint32_t)(sizeof(struct thread_command) + kern_collectth_state_size());
+       tc->cmdsize = (uint32_t) tstate_size;
        hoffset += sizeof(struct thread_command);
        /*
         * Follow with a struct thread_state_flavor and
index fcf236084b4f1647995fa707d73ce325e61fb9e0..d04e183e6a429493924eea971700ff5f3ba1e4a7 100644 (file)
@@ -7,11 +7,12 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 DATAFILES = \
-       exc_resource.h
+       exc_resource.h \
+       kern_cdata.h
 
 PRIVATE_DATAFILES = \
-       ecc.h \
-       exc_resource.h
+       debug.h \
+       ecc.h
 
 EXPORT_FILES = \
        affinity.h \
@@ -22,7 +23,6 @@ EXPORT_FILES = \
        coalition.h \
        cpu_number.h \
        cpu_data.h \
-       debug.h \
        energy_perf.h \
        extmod_statistics.h \
        hv_support.h \
@@ -51,18 +51,18 @@ EXPORT_FILES = \
        thread.h \
        thread_call.h \
        timer_call.h \
-       wait_queue.h \
+       waitq.h \
        zalloc.h
 
 INSTALL_MI_LIST = ${DATAFILES}
 
-INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES} debug.h
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} ${EXPORT_FILES} 
+INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${EXPORT_FILES}
 
 INSTALL_MI_DIR = kern
 
-EXPORT_MI_LIST = ${PRIVATE_DATAFILES} ${EXPORT_FILES}
+EXPORT_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${EXPORT_FILES}
 
 EXPORT_MI_DIR = kern
 
index 17d625506c202cf9640b28d6992e3b0da26df4e9..4c60056fbde755298b66df60136edb1643b68754 100644 (file)
@@ -85,14 +85,14 @@ __END_DECLS
 
 #define assert(ex)  \
        (__builtin_expect(!!((long)(ex)), 1L) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
-#define        assert_static(x)        assert(x)
+#define assert_static(ex) _Static_assert((ex), #ex)
 
 #define __assert_only
 
 #else  /* MACH_ASSERT */
 
 #define assert(ex) ((void)0)
-#define assert_static(ex) do {} while (0)
+#define assert_static(ex) _Static_assert((ex), #ex)
 
 #define __assert_only __unused
 
index 3a3caa03a0f0e86604e7cb7e2e49bb1c6f32fd7c..f2ceba343763afd22146f7933b61a4e7563de598 100644 (file)
@@ -78,7 +78,7 @@
 #if CONFIG_TELEMETRY
 #include <kern/telemetry.h>
 #endif
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/ledger.h>
 #include <mach/policy.h>
 #include <machine/trap.h> // for CHUD AST hook
@@ -135,7 +135,7 @@ ast_taken(
                 * Check for urgent preemption.
                 */
                if (    (reasons & AST_URGENT)                          &&
-                               wait_queue_assert_possible(thread)              ) {
+                               waitq_wait_possible(thread)             ) {
                        if (reasons & AST_PREEMPT) {
                                counter(c_ast_taken_block++);
                                thread_block_reason(THREAD_CONTINUE_NULL, NULL,
@@ -173,8 +173,10 @@ ast_taken(
                        /* 
                         * Thread APC hook.
                         */
-                       if (reasons & AST_APC)
-                               act_execute_returnhandlers();
+                       if (reasons & AST_APC) {
+                               thread_ast_clear(thread, AST_APC);
+                               special_handler(thread);
+                       }
                        
                        if (reasons & AST_GUARD) {
                                thread_ast_clear(thread, AST_GUARD);
@@ -209,11 +211,13 @@ ast_taken(
 
                        ml_set_interrupts_enabled(FALSE);
 
+#if CONFIG_SCHED_SFI
                        if (reasons & AST_SFI) {
                                sfi_ast(thread);
                        }
+#endif
 
-                       /* 
+                       /*
                         * Check for preemption. Conditions may have changed from when the AST_PREEMPT was originally set.
                         */
                        thread_lock(thread);
@@ -221,8 +225,9 @@ ast_taken(
                                reasons = csw_check(current_processor(), reasons & AST_QUANTUM);
                        thread_unlock(thread);
 
-                       if (    (reasons & AST_PREEMPT)                         &&
-                                       wait_queue_assert_possible(thread)              ) {             
+                       assert(waitq_wait_possible(thread));
+
+                       if (reasons & AST_PREEMPT) {
                                counter(c_ast_taken_block++);
                                thread_block_reason((thread_continue_t)thread_exception_return, NULL, reasons & AST_PREEMPTION);
                        }
@@ -237,13 +242,13 @@ ast_taken(
  */
 void
 ast_check(
-       processor_t             processor)
+       processor_t processor)
 {
-       thread_t                        thread = processor->active_thread;
+       thread_t thread = processor->active_thread;
 
-       if (    processor->state == PROCESSOR_RUNNING           ||
-                       processor->state == PROCESSOR_SHUTDOWN          ) {
-               ast_t                   preempt;
+       if (processor->state == PROCESSOR_RUNNING ||
+           processor->state == PROCESSOR_SHUTDOWN) {
+               ast_t preempt;
 
                /*
                 *      Propagate thread ast to processor.
@@ -263,6 +268,45 @@ ast_check(
 
                if ((preempt = csw_check(processor, AST_NONE)) != AST_NONE)
                        ast_on(preempt);
+
                thread_unlock(thread);
        }
 }
+
+/*
+ * Set AST flags on current processor
+ * Called at splsched
+ */
+void
+ast_on(ast_t reasons)
+{
+       ast_t *pending_ast = ast_pending();
+
+       *pending_ast |= reasons;
+}
+
+/*
+ * Clear AST flags on current processor
+ * Called at splsched
+ */
+void
+ast_off(ast_t reasons)
+{
+       ast_t *pending_ast = ast_pending();
+
+       *pending_ast &= ~reasons;
+}
+
+/*
+ * Re-set current processor's per-thread AST flags to those set on thread
+ * Called at splsched
+ */
+void
+ast_context(thread_t thread)
+{
+       ast_t *pending_ast = ast_pending();
+
+       *pending_ast = ((*pending_ast & ~AST_PER_THREAD) | thread->ast);
+}
+
+
index 28886bb2e97a09e56165bae851ab69c413704f36..c6ecb5efabf5b51c4d09a0e1dbe52c8bfdc242b7 100644 (file)
@@ -67,7 +67,6 @@
 #include <kern/assert.h>
 #include <kern/macro_help.h>
 #include <kern/spl.h>
-#include <machine/ast.h>
 
 /*
  * A processor takes an AST when it is about to return from an
@@ -111,12 +110,6 @@ typedef uint32_t           ast_t;
 #define AST_YIELD              0x10
 #define AST_APC                        0x20    /* migration APC hook */
 #define AST_LEDGER             0x40
-
-/*
- * JMM - This is here temporarily. AST_BSD is used to simulate a
- * general purpose mechanism for setting asynchronous procedure calls
- * from the outside.
- */
 #define AST_BSD                        0x80
 #define AST_KPERF              0x100   /* kernel profiling */
 #define        AST_MACF                0x200   /* MACF user ret pending */
@@ -126,7 +119,6 @@ typedef uint32_t            ast_t;
 #define AST_TELEMETRY_USER     0x2000  /* telemetry sample requested on interrupt from userspace */
 #define AST_TELEMETRY_KERNEL   0x4000  /* telemetry sample requested on interrupt from kernel */
 #define AST_TELEMETRY_WINDOWED 0x8000  /* telemetry sample meant for the window buffer */
-
 #define AST_SFI                        0x10000 /* Evaluate if SFI wait is needed before return to userspace */
 
 #define AST_NONE               0x00
@@ -138,16 +130,8 @@ typedef uint32_t           ast_t;
 #define AST_CHUD_ALL   (AST_CHUD_URGENT|AST_CHUD)
 #define AST_TELEMETRY_ALL      (AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL | AST_TELEMETRY_WINDOWED)
 
-#ifdef  MACHINE_AST
-/*
- *      machine/ast.h is responsible for defining aston and astoff.
- */
-#else   /* MACHINE_AST */
-
-#define aston(mycpu)
-#define astoff(mycpu)
-
-#endif  /* MACHINE_AST */
+/* Per-thread ASTs follow the thread at context-switch time. */
+#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | AST_LEDGER | AST_GUARD | AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL | AST_TELEMETRY_WINDOWED)
 
 /* Initialize module */
 extern void            ast_init(void);
@@ -158,70 +142,35 @@ extern void               ast_taken(
                                        boolean_t       enable);
 
 /* Check for pending ASTs */
-extern void            ast_check(
-                                       processor_t             processor);
+extern void ast_check(processor_t processor);
 
 /* Pending ast mask for the current processor */
-extern ast_t   *ast_pending(void);
+extern ast_t *ast_pending(void);
 
-/*
- * Per-thread ASTs are reset at context-switch time.
- */
-#ifndef MACHINE_AST_PER_THREAD
-#define MACHINE_AST_PER_THREAD  0
-#endif
+/* Set AST flags on current processor */
+extern void ast_on(ast_t reasons);
 
-#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | MACHINE_AST_PER_THREAD | AST_LEDGER | AST_GUARD | AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL | AST_TELEMETRY_WINDOWED)
-/*
- *     ast_pending(), ast_on(), ast_off(), ast_context(), and ast_propagate()
- *     assume splsched.
- */
+/* Clear AST flags on current processor */
+extern void ast_off(ast_t reasons);
 
-#define ast_on_fast(reasons)                                   \
-MACRO_BEGIN                                                                            \
-       ast_t   *_ast_myast = ast_pending();            \
-                                                                                               \
-       if ((*_ast_myast |= (reasons)) != AST_NONE)     \
-               { aston(_ast_myast); }                                  \
-MACRO_END
-
-#define ast_off_fast(reasons)                                  \
-MACRO_BEGIN                                                                            \
-       ast_t   *_ast_myast = ast_pending();            \
-                                                                                               \
-       if ((*_ast_myast &= ~(reasons)) == AST_NONE) \
-               { astoff(_ast_myast); }                                 \
-MACRO_END
-
-#define ast_propagate(reasons)         ast_on(reasons)
-
-#define ast_context(act)                                                                                                       \
-MACRO_BEGIN                                                                                                                                    \
-       ast_t   *myast = ast_pending();                                                                                 \
-                                                                                                                                                       \
-       if ((*myast = ((*myast &~ AST_PER_THREAD) | (act)->ast)) != AST_NONE)   \
-               { aston(myast); }                                                                                                       \
-       else                                                                                                                                    \
-               { astoff(myast); }                                                                                                      \
-MACRO_END
-
-#define ast_on(reason)                      ast_on_fast(reason)
-#define ast_off(reason)                             ast_off_fast(reason)
+/* Re-set current processor's per-thread AST flags to those set on thread */
+extern void ast_context(thread_t thread);
+
+#define ast_propagate(reasons) ast_on(reasons)
 
 /*
- *     NOTE: if thread is the current thread, thread_ast_set() should
- *  be followed by ast_propagate().
+ *     Set an AST on a thread with thread_ast_set.
+ *
+ *     You can then propagate it to the current processor with ast_propagate(),
+ *     or tell another processor to act on it with cause_ast_check().
+ *
+ *     See act_set_ast() for an example.
  */
-#define thread_ast_set(act, reason)            \
-                                               (hw_atomic_or_noret(&(act)->ast, (reason)))
-#define thread_ast_clear(act, reason)  \
-                                               (hw_atomic_and_noret(&(act)->ast, ~(reason)))
-#define thread_ast_clear_all(act)              \
-                                               (hw_atomic_and_noret(&(act)->ast, AST_NONE))
+#define thread_ast_set(act, reason)     (hw_atomic_or_noret(&(act)->ast, (reason)))
+#define thread_ast_clear(act, reason)   (hw_atomic_and_noret(&(act)->ast, ~(reason)))
 
 #ifdef MACH_BSD
 
-extern void astbsd_on(void);
 extern void act_set_astbsd(thread_t);
 extern void bsd_ast(thread_t);
 
index c2edb90698487d9979142a53aad9160541baf997..713466a53c35cf766f26ae8970d2e5b0fd9eeef0 100644 (file)
@@ -43,6 +43,7 @@
 #include <vm/pmap.h>
 #include <vm/vm_protos.h> /* last */
 #include <sys/resource.h>
+#include <sys/signal.h>
 
 #undef thread_should_halt
 
@@ -50,7 +51,6 @@
 
 task_t bsd_init_task = TASK_NULL;
 boolean_t init_task_died;
-char   init_task_failure_data[1024];
 extern unsigned int not_in_kdp; /* Skip acquiring locks if we're in kdp */
  
 thread_t get_firstthread(task_t);
@@ -64,6 +64,11 @@ int fill_task_rusage(task_t task, rusage_info_current *ri);
 int fill_task_io_rusage(task_t task, rusage_info_current *ri);
 int fill_task_qos_rusage(task_t task, rusage_info_current *ri);
 void fill_task_billed_usage(task_t task, rusage_info_current *ri);
+void task_bsdtask_kill(task_t);
+
+#if MACH_BSD
+extern void psignal(void *, int);
+#endif
 
 /*
  *
@@ -73,6 +78,13 @@ void  *get_bsdtask_info(task_t t)
        return(t->bsd_info);
 }
 
+void task_bsdtask_kill(task_t t)
+{
+       void * bsd_info = get_bsdtask_info(t);
+       if (bsd_info != NULL) {
+               psignal(bsd_info, SIGKILL);
+       }
+}
 /*
  *
  */
@@ -227,9 +239,9 @@ ledger_t  get_task_ledger(task_t t)
 
 /*
  * This is only safe to call from a thread executing in
- * in the task's context or if the task is locked  Otherwise,
+ * in the task's context or if the task is locked. Otherwise,
  * the map could be switched for the task (and freed) before
- * we to return it here.
+ * we go to return it here.
  */
 vm_map_t  get_task_map(task_t t)
 {
@@ -324,6 +336,10 @@ swap_task_map(task_t task, thread_t thread, vm_map_t map, boolean_t doswitch)
 
 /*
  *
+ * This is only safe to call from a thread executing in
+ * in the task's context or if the task is locked. Otherwise,
+ * the map could be switched for the task (and freed) before
+ * we go to return it here.
  */
 pmap_t  get_task_pmap(task_t t)
 {
@@ -359,16 +375,27 @@ uint64_t get_task_resident_max(task_t task)
 
 uint64_t get_task_purgeable_size(task_t task) 
 {
-       vm_map_t map;
-    mach_vm_size_t  volatile_virtual_size;
-    mach_vm_size_t  volatile_resident_size;
-    mach_vm_size_t  volatile_pmap_size;
-       
-       map = (task == kernel_task) ? kernel_map: task->map;
-       vm_map_query_volatile(map, &volatile_virtual_size, &volatile_resident_size, &volatile_pmap_size);
+       kern_return_t ret;
+       ledger_amount_t credit, debit;
+       uint64_t volatile_size = 0;
+
+       ret = ledger_get_entries(task->ledger, task_ledgers.purgeable_volatile, &credit, &debit);
+       if (ret != KERN_SUCCESS) {
+               return 0;
+       }
+
+       volatile_size += (credit - debit);
+
+       ret = ledger_get_entries(task->ledger, task_ledgers.purgeable_volatile_compressed, &credit, &debit);
+       if (ret != KERN_SUCCESS) {
+               return 0;
+       }
 
-       return((uint64_t)volatile_resident_size);
+       volatile_size += (credit - debit);
+
+       return volatile_size;
 }
+
 /*
  *
  */
@@ -414,13 +441,6 @@ uint64_t get_task_cpu_time(task_t task)
        return 0;
 }
 
-/*
- *
- */
-pmap_t  get_map_pmap(vm_map_t map)
-{
-       return(map->pmap);
-}
 /*
  *
  */
@@ -474,10 +494,11 @@ get_vmsubmap_entries(
        while((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
                if(entry->is_sub_map) {
                        total_entries +=        
-                               get_vmsubmap_entries(entry->object.sub_map, 
-                                       entry->offset, 
-                                       entry->offset + 
-                                       (entry->vme_end - entry->vme_start));
+                               get_vmsubmap_entries(VME_SUBMAP(entry), 
+                                                    VME_OFFSET(entry), 
+                                                    (VME_OFFSET(entry) + 
+                                                     entry->vme_end -
+                                                     entry->vme_start));
                } else {
                        total_entries += 1;
                }
@@ -502,10 +523,11 @@ get_vmmap_entries(
        while(entry != vm_map_to_entry(map)) {
                if(entry->is_sub_map) {
                        total_entries +=        
-                               get_vmsubmap_entries(entry->object.sub_map, 
-                                       entry->offset, 
-                                       entry->offset + 
-                                       (entry->vme_end - entry->vme_start));
+                               get_vmsubmap_entries(VME_SUBMAP(entry), 
+                                                    VME_OFFSET(entry),
+                                                    (VME_OFFSET(entry) + 
+                                                     entry->vme_end -
+                                                     entry->vme_start));
                } else {
                        total_entries += 1;
                }
@@ -621,17 +643,6 @@ task_act_iterate_wth_args(
 }
 
 
-void
-astbsd_on(void)
-{
-       boolean_t       reenable;
-
-       reenable = ml_set_interrupts_enabled(FALSE);
-       ast_on_fast(AST_BSD);
-       (void)ml_set_interrupts_enabled(reenable);
-}
-
-
 #include <sys/bsdtask_info.h>
 
 void
@@ -644,6 +655,8 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
        uint32_t syscalls_unix = 0;
        uint32_t syscalls_mach = 0;
 
+       task_lock(task);
+
        map = (task == kernel_task)? kernel_map: task->map;
 
        ptinfo->pti_virtual_size  = map->size;
@@ -651,8 +664,6 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
                (mach_vm_size_t)(pmap_resident_count(map->pmap))
                * PAGE_SIZE_64;
 
-       task_lock(task);
-
        ptinfo->pti_policy = ((task != kernel_task)?
                                           POLICY_TIMESHARE: POLICY_RR);
 
@@ -747,7 +758,7 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_th
                        ptinfo->pth_flags = basic_info.flags;
                        ptinfo->pth_sleep_time = basic_info.sleep_time;
                        ptinfo->pth_curpri = thact->sched_pri;
-                       ptinfo->pth_priority = thact->priority;
+                       ptinfo->pth_priority = thact->base_pri;
                        ptinfo->pth_maxpriority = thact->max_priority;
                        
                        if ((vpp != NULL) && (thact->uthread != NULL)) 
index 53a01e86d5f5de0716937fd048e7053020b8f125..c39dd166d6037396602c9f26c4ddb52186bc2e18 100644 (file)
@@ -122,7 +122,7 @@ btlog_create(size_t numrecords,
                                         (buffersize_needed - sizeof(btlog_t))/btrecord_size);
 
        if (kmem_alloc_ready) {
-               ret = kmem_alloc(kernel_map, &buffer, buffersize_needed);
+               ret = kmem_alloc(kernel_map, &buffer, buffersize_needed, VM_KERN_MEMORY_DIAG);
        } else {
                buffer = (vm_address_t)pmap_steal_memory(buffersize_needed);
                ret = KERN_SUCCESS;
index 96fe6e85644311a5a342e4d911f71fe91fc9ebc4..8b635f817068eb5a9629cc8d7c8df88982730f3d 100644 (file)
@@ -64,6 +64,7 @@ MACRO_BEGIN                                                   \
        (entry)->param0         = (call_entry_param_t)(p0);     \
        (entry)->queue          = NULL;                         \
        (entry)->deadline       = 0;                            \
+       queue_chain_init((entry)->q_link);                      \
 MACRO_END
 
 #define qe(x)          ((queue_entry_t)(x))
index 1bd578496eebf317883bf4ded4492c61e978fc90..f493edc67fe432f312fd1583d5174e5075c96ed7 100644 (file)
@@ -46,6 +46,8 @@
 #include <mach/mach_traps.h>
 #include <mach/mach_time.h>
 
+#include <sys/kdebug.h>
+
 uint32_t       hz_tick_interval = 1;
 
 
@@ -73,6 +75,7 @@ decl_simple_lock_data(,clock_lock)
 static struct clock_calend {
        uint64_t        epoch;
        uint64_t        offset;
+       uint64_t    epoch_absolute;
 
        int32_t         adjdelta;       /* Nanosecond time delta for this adjustment period */
        uint64_t        adjstart;       /* Absolute time value for start of this adjustment period */
@@ -121,12 +124,11 @@ static uint32_t           calend_set_adjustment(
 static void                    calend_adjust_call(void);
 static uint32_t                calend_adjust(void);
 
-static thread_call_data_t      calend_wakecall;
-
-extern void    IOKitResetTime(void);
-
 void _clock_delay_until_deadline(uint64_t              interval,
                                                                 uint64_t               deadline);
+void _clock_delay_until_deadline_with_leeway(uint64_t          interval,
+                                                                                        uint64_t               deadline,
+                                                                                        uint64_t               leeway);
 
 static uint64_t                clock_boottime;                         /* Seconds boottime epoch */
 
@@ -159,7 +161,6 @@ clock_config(void)
        clock_lock_init();
 
        timer_call_setup(&calend_adjcall, (timer_call_func_t)calend_adjust_call, NULL);
-       thread_call_setup(&calend_wakecall, (thread_call_func_t)IOKitResetTime, NULL);
 
        clock_oldconfig();
 }
@@ -438,6 +439,9 @@ clock_set_calendar_microtime(
 
        nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
 
+       clock_interval_to_absolutetime_interval((uint32_t) secs, NSEC_PER_SEC, &clock_calend.epoch_absolute);
+       clock_calend.epoch_absolute += clock_calend.offset;
+
        /*
         *      Cancel any adjustment in progress.
         */
@@ -471,11 +475,16 @@ clock_set_calendar_microtime(
  *
  *     Also sends host notifications.
  */
+
+uint64_t mach_absolutetime_asleep;
+uint64_t mach_absolutetime_last_sleep;
+
 void
 clock_initialize_calendar(void)
 {
        clock_sec_t                     sys, secs;
        clock_usec_t            microsys, microsecs;
+       uint64_t                        new_epoch;
        spl_t                           s;
 
     PEGetUTCTimeOfDay(&secs, &microsecs);
@@ -502,10 +511,28 @@ clock_initialize_calendar(void)
                /*
                 *      Set the new calendar epoch.
                 */
+
                clock_calend.epoch = secs;
 
                nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
 
+               clock_interval_to_absolutetime_interval((uint32_t) secs, NSEC_PER_SEC, &new_epoch);
+               new_epoch += clock_calend.offset;
+
+               if (clock_calend.epoch_absolute)
+               {
+                       mach_absolutetime_last_sleep = new_epoch - clock_calend.epoch_absolute;
+                       mach_absolutetime_asleep += mach_absolutetime_last_sleep;
+                       KERNEL_DEBUG_CONSTANT(
+                                 MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE,
+                                 (uintptr_t) mach_absolutetime_last_sleep, 
+                                 (uintptr_t) mach_absolutetime_asleep, 
+                                 (uintptr_t) (mach_absolutetime_last_sleep >> 32), 
+                                 (uintptr_t) (mach_absolutetime_asleep >> 32), 
+                                 0);
+               }
+               clock_calend.epoch_absolute = new_epoch;
+
                /*
                 *       Cancel any adjustment in progress.
                 */
@@ -773,19 +800,6 @@ calend_adjust(void)
        return (interval);
 }
 
-/*
- *     clock_wakeup_calendar:
- *
- *     Interface to power management, used
- *     to initiate the reset of the calendar
- *     on wake from sleep event.
- */
-void
-clock_wakeup_calendar(void)
-{
-       thread_call_enter(&calend_wakecall);
-}
-
 /*
  *     Wait / delay routines.
  */
@@ -843,6 +857,19 @@ _clock_delay_until_deadline(
        uint64_t                interval,
        uint64_t                deadline)
 {
+       _clock_delay_until_deadline_with_leeway(interval, deadline, 0);
+}
+
+/*
+ * Like _clock_delay_until_deadline, but it accepts a
+ * leeway value.
+ */
+void
+_clock_delay_until_deadline_with_leeway(
+       uint64_t                interval,
+       uint64_t                deadline,
+       uint64_t                leeway)
+{
 
        if (interval == 0)
                return;
@@ -852,13 +879,21 @@ _clock_delay_until_deadline(
                        ml_get_interrupts_enabled() == FALSE    ) {
                machine_delay_until(interval, deadline);
        } else {
-               assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
+               /*
+                * For now, assume a leeway request of 0 means the client does not want a leeway
+                * value. We may want to change this interpretation in the future.
+                */
+
+               if (leeway) {
+                       assert_wait_deadline_with_leeway((event_t)clock_delay_until, THREAD_UNINT, TIMEOUT_URGENCY_LEEWAY, deadline, leeway);
+               } else {
+                       assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
+               }
 
                thread_block(THREAD_CONTINUE_NULL);
        }
 }
 
-
 void
 delay_for_interval(
        uint32_t                interval,
@@ -871,6 +906,21 @@ delay_for_interval(
        _clock_delay_until_deadline(abstime, mach_absolute_time() + abstime);
 }
 
+void
+delay_for_interval_with_leeway(
+       uint32_t                interval,
+       uint32_t                leeway,
+       uint32_t                scale_factor)
+{
+       uint64_t                abstime_interval;
+       uint64_t                abstime_leeway;
+
+       clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime_interval);
+       clock_interval_to_absolutetime_interval(leeway, scale_factor, &abstime_leeway);
+
+       _clock_delay_until_deadline_with_leeway(abstime_interval, mach_absolute_time() + abstime_interval, abstime_leeway);
+}
+
 void
 delay(
        int             usec)
index a7641adb62349c4847ec4e6cedb5a892975e6cac..8918ca3c16b71097f09b27e0a656c0ca73f4d8ee 100644 (file)
@@ -225,6 +225,9 @@ extern void             nanoseconds_to_absolutetime(
                                                        uint64_t                nanoseconds,
                                                        uint64_t                *result);
 
+extern uint64_t mach_absolutetime_asleep;
+extern uint64_t mach_absolutetime_last_sleep;
+
 #ifdef KERNEL_PRIVATE
 
 /*
@@ -277,6 +280,11 @@ extern void                                delay_for_interval(
                                                        uint32_t                interval,
                                                        uint32_t                scale_factor);
 
+extern void                            delay_for_interval_with_leeway(
+                                                       uint32_t                interval,
+                                                       uint32_t                leeway,
+                                                       uint32_t                scale_factor);
+
 #endif /* KERNEL_PRIVATE */
 
 __END_DECLS
index 95babd63280f2211f645ba2ac4c114e4e5889126..a5cf30c939af6506fb6b5be5cec707330c0c83bb 100644 (file)
 
 #include <kern/coalition.h>
 #include <kern/host.h>
-#include <kern/ledger.h>
 #include <kern/kalloc.h>
+#include <kern/ledger.h>
 #include <kern/mach_param.h> /* for TASK_CHUNK */
 #include <kern/task.h>
 #include <kern/zalloc.h>
-#include <kern/sfi.h>
 
 #include <libkern/OSAtomic.h>
 
 
 #include <sys/errno.h>
 
+/*
+ * BSD interface functions
+ */
+int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz);
+boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal);
+int coalition_get_task_count(coalition_t coal);
+uint64_t coalition_get_page_count(coalition_t coal, int *ntasks);
+int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order,
+                                 int *pid_list, int list_sz);
+
 /* defined in task.c */
 extern ledger_template_t task_ledger_template;
 
@@ -67,20 +76,103 @@ lck_grp_attr_t coalitions_lck_grp_attr;
 decl_lck_mtx_data(static,coalitions_list_lock);
 static uint64_t coalition_count;
 static uint64_t coalition_next_id = 1;
-static queue_head_t coalitions;
+static queue_head_t coalitions_q;
 
-coalition_t default_coalition;
+coalition_t init_coalition[COALITION_NUM_TYPES];
 
 zone_t coalition_zone;
 
-struct coalition {
-       uint64_t id;                    /* monotonically increasing */
+static const char *coal_type_str(int type)
+{
+       switch(type) {
+       case COALITION_TYPE_RESOURCE:
+               return "RESOURCE";
+       case COALITION_TYPE_JETSAM:
+               return "JETSAM";
+       default:
+               return "<unknown>";
+       }
+}
+
+struct coalition_type {
+       int type;
+       int has_default;
+       /*
+        * init
+        * pre-condition: coalition just allocated (unlocked), unreferenced,
+        *                type field set
+        */
+       kern_return_t (*init)(coalition_t coal, boolean_t privileged);
+
+       /*
+        * dealloc
+        * pre-condition: coalition unlocked
+        * pre-condition: coalition refcount=0, active_count=0,
+        *                termrequested=1, terminated=1, reaped=1
+        */
+       void          (*dealloc)(coalition_t coal);
+
+       /*
+        * adopt_task
+        * pre-condition: coalition locked
+        * pre-condition: coalition !repead and !terminated
+        */
+       kern_return_t (*adopt_task)(coalition_t coal, task_t task);
+
+       /*
+        * remove_task
+        * pre-condition: coalition locked
+        * pre-condition: task has been removed from coalition's task list
+        */
+       kern_return_t (*remove_task)(coalition_t coal, task_t task);
+
+       /*
+        * set_taskrole
+        * pre-condition: coalition locked
+        * pre-condition: task added to coalition's task list,
+        *                active_count >= 1 (at least the given task is active)
+        */
+       kern_return_t (*set_taskrole)(coalition_t coal, task_t task, int role);
+
+       /*
+        * get_taskrole
+        * pre-condition: coalition locked
+        * pre-condition: task added to coalition's task list,
+        *                active_count >= 1 (at least the given task is active)
+        */
+       int (*get_taskrole)(coalition_t coal, task_t task);
+
+       /*
+        * iterate_tasks
+        * pre-condition: coalition locked
+        */
+       void (*iterate_tasks)(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t));
+};
+
+/*
+ * COALITION_TYPE_RESOURCE
+ */
 
+static kern_return_t i_coal_resource_init(coalition_t coal, boolean_t privileged);
+static void          i_coal_resource_dealloc(coalition_t coal);
+static kern_return_t i_coal_resource_adopt_task(coalition_t coal, task_t task);
+static kern_return_t i_coal_resource_remove_task(coalition_t coal, task_t task);
+static kern_return_t i_coal_resource_set_taskrole(coalition_t coal,
+                                                task_t task, int role);
+static int           i_coal_resource_get_taskrole(coalition_t coal, task_t task);
+static void          i_coal_resource_iterate_tasks(coalition_t coal, void *ctx,
+                                                  void (*callback)(coalition_t, void *, task_t));
+
+struct i_resource_coalition {
        ledger_t ledger;
        uint64_t bytesread;
        uint64_t byteswritten;
        uint64_t gpu_time;
 
+       uint64_t task_count;      /* tasks that have started in this coalition */
+       uint64_t dead_task_count; /* tasks that have exited in this coalition;
+                                    subtract from task_count to get count
+                                    of "active" tasks */
        /*
         * Count the length of time this coalition had at least one active task.
         * This can be a 'denominator' to turn e.g. cpu_time to %cpu.
@@ -88,33 +180,103 @@ struct coalition {
        uint64_t last_became_nonempty_time;
        uint64_t time_nonempty;
 
-       uint64_t task_count;            /* Count of tasks that have started in this coalition */
-       uint64_t dead_task_count;       /* Count of tasks that have exited in this coalition; subtract from task_count to get count of "active" */
-       queue_head_t tasks;             /* List of active tasks in the coalition */
-
-       queue_chain_t coalitions;       /* global list of coalitions */
+       queue_head_t tasks;         /* List of active tasks in the coalition */
+};
 
-       decl_lck_mtx_data(,lock)        /* Coalition lock. */
+/*
+ * COALITION_TYPE_JETSAM
+ */
 
-       uint32_t ref_count;             /* Number of references to the memory containing this struct */
-       uint32_t active_count;          /* Number of members of (tasks in) the coalition, plus vouchers referring to the coalition */
+static kern_return_t i_coal_jetsam_init(coalition_t coal, boolean_t privileged);
+static void          i_coal_jetsam_dealloc(coalition_t coal);
+static kern_return_t i_coal_jetsam_adopt_task(coalition_t coal, task_t task);
+static kern_return_t i_coal_jetsam_remove_task(coalition_t coal, task_t task);
+static kern_return_t i_coal_jetsam_set_taskrole(coalition_t coal,
+                                              task_t task, int role);
+static int           i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
+static void          i_coal_jetsam_iterate_tasks(coalition_t coal, void *ctx,
+                                                void (*callback)(coalition_t, void *, task_t));
+
+struct i_jetsam_coalition {
+       task_t       leader;
+       queue_head_t extensions;
+       queue_head_t services;
+       queue_head_t other;
+};
 
-       unsigned int privileged : 1;    /* Members of this coalition may create and manage coalitions and may posix_spawn processes into selected coalitions */
 
+/*
+ * main coalition structure
+ */
+struct coalition {
+       uint64_t id;                /* monotonically increasing */
+       uint32_t type;
+       uint32_t ref_count;         /* Number of references to the memory containing this struct */
+       uint32_t active_count;      /* Number of members of (tasks in) the
+                                      coalition, plus vouchers referring
+                                      to the coalition */
+       uint32_t focal_task_count;   /* Number of TASK_FOREGROUND_APPLICATION tasks in the coalition */
+       uint32_t nonfocal_task_count; /* Number of TASK_BACKGROUND_APPLICATION tasks in the coalition */
+
+       /* coalition flags */
+       uint32_t privileged : 1;    /* Members of this coalition may create
+                                      and manage coalitions and may posix_spawn
+                                      processes into selected coalitions */
        /* ast? */
-
        /* voucher */
+       uint32_t termrequested : 1; /* launchd has requested termination when coalition becomes empty */
+       uint32_t terminated : 1;    /* coalition became empty and spawns are now forbidden */
+       uint32_t reaped : 1;        /* reaped, invisible to userspace, but waiting for ref_count to go to zero */
+       uint32_t notified : 1;      /* no-more-processes notification was sent via special port */
+#if defined(DEVELOPMENT) || defined(DEBUG)
+       uint32_t should_notify : 1; /* should this coalition send notifications (default: yes) */
+#endif
 
-       /* state of the coalition */
-       unsigned int termrequested : 1;         /* launchd has requested termination when coalition becomes empty */
-       unsigned int terminated : 1;            /* coalition became empty and spawns are now forbidden */
-       unsigned int reaped : 1;                /* reaped, invisible to userspace, but waiting for ref_count to go to zero */
-       unsigned int notified : 1;              /* no-more-processes notification was sent via special port */
+       queue_chain_t coalitions;   /* global list of coalitions */
 
-       uint32_t focal_tasks_count;     /* count of TASK_FOREGROUND_APPLICATION tasks in the coalition */
-       uint32_t non_focal_tasks_count; /* count of TASK_BACKGROUND_APPLICATION tasks in the coalition */
+       decl_lck_mtx_data(,lock)    /* Coalition lock. */
+
+       /* put coalition type-specific structures here */
+       union {
+               struct i_resource_coalition  r;
+               struct i_jetsam_coalition    j;
+       };
+};
+
+/*
+ * register different coalition types:
+ * these must be kept in the order specified in coalition.h
+ */
+static const struct coalition_type
+s_coalition_types[COALITION_NUM_TYPES] = {
+       {
+               COALITION_TYPE_RESOURCE,
+               1,
+               i_coal_resource_init,
+               i_coal_resource_dealloc,
+               i_coal_resource_adopt_task,
+               i_coal_resource_remove_task,
+               i_coal_resource_set_taskrole,
+               i_coal_resource_get_taskrole,
+               i_coal_resource_iterate_tasks,
+       },
+       {
+               COALITION_TYPE_JETSAM,
+               1,
+               i_coal_jetsam_init,
+               i_coal_jetsam_dealloc,
+               i_coal_jetsam_adopt_task,
+               i_coal_jetsam_remove_task,
+               i_coal_jetsam_set_taskrole,
+               i_coal_jetsam_get_taskrole,
+               i_coal_jetsam_iterate_tasks,
+       },
 };
 
+#define coal_call(coal, func, ...) \
+       (s_coalition_types[(coal)->type].func)(coal, ## __VA_ARGS__)
+
+
 #define coalition_lock(c) do{ lck_mtx_lock(&c->lock); }while(0)
 #define coalition_unlock(c) do{ lck_mtx_unlock(&c->lock); }while(0)
 
@@ -133,26 +295,137 @@ coalition_notify_user(uint64_t id, uint32_t flags)
 }
 
 /*
- * coalition_find_by_id_internal
- * Returns: Coalition object with specified id, NOT referenced.
- *          If not found, returns COALITION_NULL.
- * Condition: coalitions_list_lock must be LOCKED.
+ *
+ * COALITION_TYPE_RESOURCE
+ *
  */
-static coalition_t
-coalition_find_by_id_internal(uint64_t coal_id)
+static kern_return_t
+i_coal_resource_init(coalition_t coal, boolean_t privileged)
 {
-       if (coal_id == 0) {
-               return COALITION_NULL;
+       (void)privileged;
+       assert(coal && coal->type == COALITION_TYPE_RESOURCE);
+       coal->r.ledger = ledger_instantiate(task_ledger_template,
+                                           LEDGER_CREATE_ACTIVE_ENTRIES);
+       if (coal->r.ledger == NULL)
+               return KERN_RESOURCE_SHORTAGE;
+
+       queue_init(&coal->r.tasks);
+
+       return KERN_SUCCESS;
+}
+
+static void
+i_coal_resource_dealloc(coalition_t coal)
+{
+       assert(coal && coal->type == COALITION_TYPE_RESOURCE);
+       ledger_dereference(coal->r.ledger);
+}
+
+static kern_return_t
+i_coal_resource_adopt_task(coalition_t coal, task_t task)
+{
+       struct i_resource_coalition *cr;
+
+       assert(coal && coal->type == COALITION_TYPE_RESOURCE);
+       assert(queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE]));
+
+       cr = &coal->r;
+       cr->task_count++;
+
+       if (cr->task_count < cr->dead_task_count) {
+               panic("%s: coalition %p id:%llu type:%s task_count(%llu) < dead_task_count(%llu)",
+                     __func__, coal, coal->id, coal_type_str(coal->type),
+                     cr->task_count, cr->dead_task_count);
        }
 
-       lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED);
-       coalition_t coal;
-       queue_iterate(&coalitions, coal, coalition_t, coalitions) {
-               if (coal->id == coal_id) {
-                       return coal;
-               }
+       /* If moving from 0->1 active tasks */
+       if (cr->task_count - cr->dead_task_count == 1) {
+               cr->last_became_nonempty_time = mach_absolute_time();
        }
-       return COALITION_NULL;
+
+       /* put the task on the coalition's list of tasks */
+       enqueue_tail(&cr->tasks, &task->task_coalition[COALITION_TYPE_RESOURCE]);
+
+       coal_dbg("Added PID:%d to id:%llu, task_count:%llu, dead_count:%llu, nonempty_time:%llu",
+                task_pid(task), coal->id, cr->task_count, cr->dead_task_count,
+                cr->last_became_nonempty_time);
+
+       return KERN_SUCCESS;
+}
+
+static kern_return_t
+i_coal_resource_remove_task(coalition_t coal, task_t task)
+{
+       struct i_resource_coalition *cr;
+
+       assert(coal && coal->type == COALITION_TYPE_RESOURCE);
+       assert(task->coalition[COALITION_TYPE_RESOURCE] == coal);
+       assert(!queue_empty(&task->task_coalition[COALITION_TYPE_RESOURCE]));
+
+       /*
+        * handle resource coalition accounting rollup for dead tasks
+        */
+       cr = &coal->r;
+
+       cr->dead_task_count++;
+
+       if (cr->task_count < cr->dead_task_count) {
+               panic("%s: coalition %p id:%llu type:%s task_count(%llu) < dead_task_count(%llu)",
+                     __func__, coal, coal->id, coal_type_str(coal->type), cr->task_count, cr->dead_task_count);
+       }
+
+       /* If moving from 1->0 active tasks */
+       if (cr->task_count - cr->dead_task_count == 0) {
+               uint64_t last_time_nonempty = mach_absolute_time() - cr->last_became_nonempty_time;
+               cr->last_became_nonempty_time = 0;
+               cr->time_nonempty += last_time_nonempty;
+       }
+
+       ledger_rollup(cr->ledger, task->ledger);
+       cr->bytesread += task->task_io_stats->disk_reads.size;
+       cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
+       cr->gpu_time += task_gpu_utilisation(task);
+
+       /* remove the task from the coalition's list */
+       remqueue(&task->task_coalition[COALITION_TYPE_RESOURCE]);
+       queue_chain_init(task->task_coalition[COALITION_TYPE_RESOURCE]);
+
+       coal_dbg("removed PID:%d from id:%llu, task_count:%llu, dead_count:%llu",
+                task_pid(task), coal->id, cr->task_count, cr->dead_task_count);
+
+       return KERN_SUCCESS;
+}
+
+static kern_return_t
+i_coal_resource_set_taskrole(__unused coalition_t coal,
+                           __unused task_t task, __unused int role)
+{
+       return KERN_SUCCESS;
+}
+
+static int
+i_coal_resource_get_taskrole(__unused coalition_t coal, __unused task_t task)
+{
+       task_t t;
+
+       assert(coal && coal->type == COALITION_TYPE_RESOURCE);
+
+       qe_foreach_element(t, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) {
+               if (t == task)
+                       return COALITION_TASKROLE_UNDEF;
+       }
+
+       return -1;
+}
+
+static void
+i_coal_resource_iterate_tasks(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t))
+{
+       task_t t;
+       assert(coal && coal->type == COALITION_TYPE_RESOURCE);
+
+       qe_foreach_element(t, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE])
+               callback(coal, ctx, t);
 }
 
 kern_return_t
@@ -161,10 +434,12 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
        kern_return_t kr;
        ledger_amount_t credit, debit;
 
+       if (coal->type != COALITION_TYPE_RESOURCE)
+               return KERN_INVALID_ARGUMENT;
+
        ledger_t sum_ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
-       if (sum_ledger == LEDGER_NULL) {
+       if (sum_ledger == LEDGER_NULL)
                return KERN_RESOURCE_SHORTAGE;
-       }
 
        coalition_lock(coal);
 
@@ -172,29 +447,51 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
         * Start with the coalition's ledger, which holds the totals from all
         * the dead tasks.
         */
-       ledger_rollup(sum_ledger, coal->ledger);
-       uint64_t bytesread = coal->bytesread;
-       uint64_t byteswritten = coal->byteswritten;
-       uint64_t gpu_time = coal->gpu_time;
+       ledger_rollup(sum_ledger, coal->r.ledger);
+       uint64_t bytesread = coal->r.bytesread;
+       uint64_t byteswritten = coal->r.byteswritten;
+       uint64_t gpu_time = coal->r.gpu_time;
+       int64_t cpu_time_billed_to_me = 0;
+       int64_t cpu_time_billed_to_others = 0;
+
+       kr = ledger_get_balance(sum_ledger, task_ledgers.cpu_time_billed_to_me, (int64_t *)&cpu_time_billed_to_me);
+       if (kr != KERN_SUCCESS || cpu_time_billed_to_me < 0) {
+#if DEVELOPMENT || DEBUG
+               printf("ledger_get_balance failed or ledger negative in coalition_resource_usage_internal: %lld\n", cpu_time_billed_to_me);
+#endif /* DEVELOPMENT || DEBUG */
+               cpu_time_billed_to_me = 0;
+       }
+
+       kr = ledger_get_balance(sum_ledger, task_ledgers.cpu_time_billed_to_others, (int64_t *)&cpu_time_billed_to_others);
+       if (kr != KERN_SUCCESS || cpu_time_billed_to_others < 0) {
+#if DEVELOPMENT || DEBUG
+               printf("ledger_get_balance failed or ledger negative in coalition_resource_usage_internal: %lld\n", cpu_time_billed_to_others);
+#endif /* DEVELOPMENT || DEBUG */
+               cpu_time_billed_to_others = 0;
+       }
 
        /*
         * Add to that all the active tasks' ledgers. Tasks cannot deallocate
         * out from under us, since we hold the coalition lock.
+        * Do not use the on-behalf of cpu time from ledger for live tasks, since
+        * it will not have cpu time for active linkages between tasks.
         */
        task_t task;
-       queue_iterate(&coal->tasks, task, task_t, coalition_tasks) {
+       qe_foreach_element(task, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) {
                ledger_rollup(sum_ledger, task->ledger);
                bytesread += task->task_io_stats->disk_reads.size;
                byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
                gpu_time += task_gpu_utilisation(task);
+               cpu_time_billed_to_me += (int64_t)bank_billed_time(task->bank_context);
+               cpu_time_billed_to_others += (int64_t)bank_serviced_time(task->bank_context);
        }
 
        /* collect information from the coalition itself */
-       cru_out->tasks_started = coal->task_count;
-       cru_out->tasks_exited = coal->dead_task_count;
+       cru_out->tasks_started = coal->r.task_count;
+       cru_out->tasks_exited = coal->r.dead_task_count;
 
-       uint64_t time_nonempty = coal->time_nonempty;
-       uint64_t last_became_nonempty_time = coal->last_became_nonempty_time;
+       uint64_t time_nonempty = coal->r.time_nonempty;
+       uint64_t last_became_nonempty_time = coal->r.last_became_nonempty_time;
 
        coalition_unlock(coal);
 
@@ -205,6 +502,8 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
                credit = 0;
        }
        cru_out->cpu_time = credit;
+       cru_out->cpu_time_billed_to_me = (uint64_t)cpu_time_billed_to_me;
+       cru_out->cpu_time_billed_to_others = (uint64_t)cpu_time_billed_to_others;
 
        kr = ledger_get_entries(sum_ledger, task_ledgers.interrupt_wakeups,
                        &credit, &debit);
@@ -235,43 +534,242 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us
        return KERN_SUCCESS;
 }
 
+/*
+ *
+ * COALITION_TYPE_JETSAM
+ *
+ */
+static kern_return_t
+i_coal_jetsam_init(coalition_t coal, boolean_t privileged)
+{
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+       (void)privileged;
+
+       coal->j.leader= TASK_NULL;
+       queue_head_init(coal->j.extensions);
+       queue_head_init(coal->j.services);
+       queue_head_init(coal->j.other);
+
+       return KERN_SUCCESS;
+}
+
+static void
+i_coal_jetsam_dealloc(__unused coalition_t coal)
+{
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+
+       /* the coalition should be completely clear at this point */
+       assert(queue_empty(&coal->j.extensions));
+       assert(queue_empty(&coal->j.services));
+       assert(queue_empty(&coal->j.other));
+       assert(coal->j.leader == TASK_NULL);
+}
+
+static kern_return_t
+i_coal_jetsam_adopt_task(coalition_t coal, task_t task)
+{
+       struct i_jetsam_coalition *cj;
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+
+       cj = &coal->j;
+
+       assert(queue_empty(&task->task_coalition[COALITION_TYPE_JETSAM]));
+
+       /* put each task initially in the "other" list */
+       enqueue_tail(&cj->other, &task->task_coalition[COALITION_TYPE_JETSAM]);
+       coal_dbg("coalition %lld adopted PID:%d as UNDEF",
+                coal->id, task_pid(task));
+
+       return KERN_SUCCESS;
+}
+
+static kern_return_t
+i_coal_jetsam_remove_task(coalition_t coal, task_t task)
+{
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+       assert(task->coalition[COALITION_TYPE_JETSAM] == coal);
+
+       coal_dbg("removing PID:%d from coalition id:%lld",
+                task_pid(task), coal->id);
+
+       if (task == coal->j.leader) {
+               coal->j.leader = NULL;
+               coal_dbg("    PID:%d was the leader!", task_pid(task));
+       } else {
+               assert(!queue_empty(&task->task_coalition[COALITION_TYPE_JETSAM]));
+       }
+
+       /* remove the task from the specific coalition role queue */
+       remqueue(&task->task_coalition[COALITION_TYPE_JETSAM]);
+       queue_chain_init(task->task_coalition[COALITION_TYPE_RESOURCE]);
+
+       return KERN_SUCCESS;
+}
+
+static kern_return_t
+i_coal_jetsam_set_taskrole(coalition_t coal, task_t task, int role)
+{
+       struct i_jetsam_coalition *cj;
+       queue_t q = NULL;
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+       assert(task->coalition[COALITION_TYPE_JETSAM] == coal);
+
+       cj = &coal->j;
+
+       switch (role) {
+       case COALITION_TASKROLE_LEADER:
+               coal_dbg("setting PID:%d as LEADER of %lld",
+                        task_pid(task), coal->id);
+               if (cj->leader != TASK_NULL) {
+                       /* re-queue the exiting leader onto the "other" list */
+                       coal_dbg("    re-queue existing leader (%d) as OTHER",
+                                task_pid(cj->leader));
+                       re_queue_tail(&cj->other, &cj->leader->task_coalition[COALITION_TYPE_JETSAM]);
+               }
+               /*
+                * remove the task from the "other" list
+                * (where it was put by default)
+                */
+               remqueue(&task->task_coalition[COALITION_TYPE_JETSAM]);
+               queue_chain_init(task->task_coalition[COALITION_TYPE_JETSAM]);
+
+               /* set the coalition leader */
+               cj->leader = task;
+               break;
+       case COALITION_TASKROLE_UNDEF:
+               coal_dbg("setting PID:%d as UNDEF in %lld",
+                        task_pid(task), coal->id);
+               q = (queue_t)&cj->other;
+               break;
+       case COALITION_TASKROLE_XPC:
+               coal_dbg("setting PID:%d as XPC in %lld",
+                        task_pid(task), coal->id);
+               q = (queue_t)&cj->services;
+               break;
+       case COALITION_TASKROLE_EXT:
+               coal_dbg("setting PID:%d as EXT in %lld",
+                        task_pid(task), coal->id);
+               q = (queue_t)&cj->extensions;
+               break;
+       default:
+               panic("%s: invalid role(%d) for task", __func__, role);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if (q != NULL)
+               re_queue_tail(q, &task->task_coalition[COALITION_TYPE_JETSAM]);
+
+       return KERN_SUCCESS;
+}
+
+static int
+i_coal_jetsam_get_taskrole(coalition_t coal, task_t task)
+{
+       struct i_jetsam_coalition *cj;
+       task_t t;
+
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+       assert(task->coalition[COALITION_TYPE_JETSAM] == coal);
+
+       cj = &coal->j;
+
+       if (task == cj->leader)
+               return COALITION_TASKROLE_LEADER;
+
+       qe_foreach_element(t, &cj->services, task_coalition[COALITION_TYPE_JETSAM]) {
+               if (t == task)
+                       return COALITION_TASKROLE_XPC;
+       }
+
+       qe_foreach_element(t, &cj->extensions, task_coalition[COALITION_TYPE_JETSAM]) {
+               if (t == task)
+                       return COALITION_TASKROLE_EXT;
+       }
+
+       qe_foreach_element(t, &cj->other, task_coalition[COALITION_TYPE_JETSAM]) {
+               if (t == task)
+                       return COALITION_TASKROLE_UNDEF;
+       }
+
+       /* task not in the coalition?! */
+       return -1;
+}
+
+static void
+i_coal_jetsam_iterate_tasks(coalition_t coal, void *ctx, void (*callback)(coalition_t, void *, task_t))
+{
+       struct i_jetsam_coalition *cj;
+       task_t t;
+
+       assert(coal && coal->type == COALITION_TYPE_JETSAM);
+
+       cj = &coal->j;
+
+       if (cj->leader)
+               callback(coal, ctx, cj->leader);
+
+       qe_foreach_element(t, &cj->services, task_coalition[COALITION_TYPE_JETSAM])
+               callback(coal, ctx, t);
+
+       qe_foreach_element(t, &cj->extensions, task_coalition[COALITION_TYPE_JETSAM])
+               callback(coal, ctx, t);
+
+       qe_foreach_element(t, &cj->other, task_coalition[COALITION_TYPE_JETSAM])
+               callback(coal, ctx, t);
+}
+
+
+/*
+ *
+ * Main Coalition implementation
+ *
+ */
+
 /*
  * coalition_create_internal
  * Returns: New coalition object, referenced for the caller and unlocked.
  * Condition: coalitions_list_lock must be UNLOCKED.
  */
 kern_return_t
-coalition_create_internal(coalition_t *out, boolean_t privileged)
+coalition_create_internal(int type, boolean_t privileged, coalition_t *out)
 {
-       struct coalition *new_coal = (struct coalition *)zalloc(coalition_zone);
-       if (new_coal == COALITION_NULL) {
+       kern_return_t kr;
+       struct coalition *new_coal;
+
+       if (type < 0 || type > COALITION_TYPE_MAX)
+               return KERN_INVALID_ARGUMENT;
+
+       new_coal = (struct coalition *)zalloc(coalition_zone);
+       if (new_coal == COALITION_NULL)
                return KERN_RESOURCE_SHORTAGE;
-       }
        bzero(new_coal, sizeof(*new_coal));
 
-       new_coal->ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
-       if (new_coal->ledger == NULL) {
+       new_coal->type = type;
+
+       /* initialize type-specific resources */
+       kr = coal_call(new_coal, init, privileged);
+       if (kr != KERN_SUCCESS) {
                zfree(coalition_zone, new_coal);
-               return KERN_RESOURCE_SHORTAGE;
+               return kr;
        }
 
        /* One for caller, one for coalitions list */
        new_coal->ref_count = 2;
 
        new_coal->privileged = privileged ? TRUE : FALSE;
+#if defined(DEVELOPMENT) || defined(DEBUG)
+       new_coal->should_notify = 1;
+#endif
 
        lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, &coalitions_lck_attr);
-       queue_init(&new_coal->tasks);
 
        lck_mtx_lock(&coalitions_list_lock);
        new_coal->id = coalition_next_id++;
        coalition_count++;
-       queue_enter(&coalitions, new_coal, coalition_t, coalitions);
+       enqueue_tail(&coalitions_q, &new_coal->coalitions);
        lck_mtx_unlock(&coalitions_list_lock);
 
-#if COALITION_DEBUG
-       printf("%s: new coal id %llu\n", __func__, new_coal->id);
-#endif
+       coal_dbg("id:%llu, type:%s", new_coal->id, coal_type_str(new_coal->type));
 
        *out = new_coal;
        return KERN_SUCCESS;
@@ -284,37 +782,61 @@ coalition_create_internal(coalition_t *out, boolean_t privileged)
 void
 coalition_release(coalition_t coal)
 {
-       boolean_t do_dealloc = FALSE;
-
        /* TODO: This can be done with atomics. */
        coalition_lock(coal);
        coal->ref_count--;
-       if (coal->ref_count == 0) {
-               do_dealloc = TRUE;
-       }
+
 #if COALITION_DEBUG
        uint32_t rc = coal->ref_count;
+       uint32_t ac = coal->active_count;
 #endif /* COALITION_DEBUG */
 
+       coal_dbg("id:%llu type:%s ref_count:%u active_count:%u%s",
+                coal->id, coal_type_str(coal->type), rc, ac,
+                rc <= 0 ? ", will deallocate now" : "");
+
+       if (coal->ref_count > 0) {
+               coalition_unlock(coal);
+               return;
+       }
+
+       assert(coal->termrequested);
+       assert(coal->terminated);
+       assert(coal->active_count == 0);
+       assert(coal->reaped);
+       assert(coal->focal_task_count == 0);
+       assert(coal->nonfocal_task_count == 0);
+
+       coal_call(coal, dealloc);
+
        coalition_unlock(coal);
 
-#if COALITION_DEBUG
-       printf("%s: coal %llu ref_count-- -> %u%s\n", __func__, coal->id, rc,
-                       do_dealloc ? ", will deallocate now" : "");
-#endif /* COALITION_DEBUG */
+       lck_mtx_destroy(&coal->lock, &coalitions_lck_grp);
 
-       if (do_dealloc) {
-               assert(coal->termrequested);
-               assert(coal->terminated);
-               assert(coal->active_count == 0);
-               assert(coal->reaped);
-               assert(coal->focal_tasks_count == 0);
-               assert(coal->non_focal_tasks_count == 0);
+       zfree(coalition_zone, coal);
+}
 
-               ledger_dereference(coal->ledger);
-               lck_mtx_destroy(&coal->lock, &coalitions_lck_grp);
-               zfree(coalition_zone, coal);
+/*
+ * coalition_find_by_id_internal
+ * Returns: Coalition object with specified id, NOT referenced.
+ *          If not found, returns COALITION_NULL.
+ * Condition: coalitions_list_lock must be LOCKED.
+ */
+static coalition_t
+coalition_find_by_id_internal(uint64_t coal_id)
+{
+       if (coal_id == 0) {
+               return COALITION_NULL;
        }
+
+       lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED);
+       coalition_t coal;
+       qe_foreach_element(coal, &coalitions_q, coalitions) {
+               if (coal->id == coal_id) {
+                       return coal;
+               }
+       }
+       return COALITION_NULL;
 }
 
 /*
@@ -346,8 +868,8 @@ coalition_find_by_id(uint64_t cid)
        }
 
        if (coal->ref_count == 0) {
-               panic("resurrecting coalition %p id %llu, active_count = %u\n",
-                               coal, coal->id, coal->active_count);
+               panic("resurrecting coalition %p id:%llu type:%s, active_count:%u\n",
+                               coal, coal->id, coal_type_str(coal->type), coal->active_count);
        }
        coal->ref_count++;
 #if COALITION_DEBUG
@@ -357,9 +879,9 @@ coalition_find_by_id(uint64_t cid)
        coalition_unlock(coal);
        lck_mtx_unlock(&coalitions_list_lock);
 
-#if COALITION_DEBUG
-       printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc);
-#endif
+       coal_dbg("id:%llu type:%s ref_count:%u",
+                coal->id, coal_type_str(coal->type), rc);
+
        return coal;
 }
 
@@ -397,8 +919,8 @@ coalition_find_and_activate_by_id(uint64_t cid)
        }
 
        if (coal->ref_count == 0) {
-               panic("resurrecting coalition %p id %llu, active_count = %u\n",
-                               coal, coal->id, coal->active_count);
+               panic("resurrecting coalition %p id:%llu type:%s, active_count:%u\n",
+                               coal, coal->id, coal_type_str(coal->type), coal->active_count);
        }
 
        coal->ref_count++;
@@ -412,10 +934,9 @@ coalition_find_and_activate_by_id(uint64_t cid)
        coalition_unlock(coal);
        lck_mtx_unlock(&coalitions_list_lock);
 
-#if COALITION_DEBUG
-       printf("%s: coal %llu ref_count++ -> %u, active_count++ -> %u\n",
-                       __func__, coal->id, rc, ac);
-#endif
+       coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u",
+                coal->id, coal_type_str(coal->type), rc, ac);
+
        return coal;
 }
 
@@ -425,10 +946,41 @@ coalition_id(coalition_t coal)
        return coal->id;
 }
 
-uint64_t
-task_coalition_id(task_t task)
+void
+task_coalition_ids(task_t task, uint64_t ids[COALITION_NUM_TYPES])
+{
+       int i;
+       for (i = 0; i < COALITION_NUM_TYPES; i++) {
+               if (task->coalition[i])
+                       ids[i] = task->coalition[i]->id;
+               else
+                       ids[i] = 0;
+       }
+}
+
+void
+task_coalition_roles(task_t task, int roles[COALITION_NUM_TYPES])
 {
-       return task->coalition->id;
+       int i;
+       memset(roles, 0, COALITION_NUM_TYPES * sizeof(roles[0]));
+
+       for (i = 0; i < COALITION_NUM_TYPES; i++) {
+               if (task->coalition[i]) {
+                       coalition_lock(task->coalition[i]);
+                       roles[i] = coal_call(task->coalition[i],
+                                            get_taskrole, task);
+                       coalition_unlock(task->coalition[i]);
+               } else {
+                       roles[i] = -1;
+               }
+       }
+}
+
+
+int
+coalition_type(coalition_t coal)
+{
+       return coal->type;
 }
 
 boolean_t
@@ -438,50 +990,107 @@ coalition_is_privileged(coalition_t coal)
 }
 
 boolean_t
-task_is_in_privileged_coalition(task_t task)
+task_is_in_privileged_coalition(task_t task, int type)
 {
-       return task->coalition->privileged || unrestrict_coalition_syscalls;
+       if (type < 0 || type > COALITION_TYPE_MAX)
+               return FALSE;
+       if (unrestrict_coalition_syscalls)
+               return TRUE;
+       if (!task->coalition[type])
+               return FALSE;
+       return task->coalition[type]->privileged;
 }
 
-/*
- * coalition_get_ledger
- * Returns: Coalition's ledger, NOT referenced.
- * Condition: Caller must have a coalition reference.
- */
-ledger_t
-coalition_get_ledger(coalition_t coal)
+void task_coalition_update_gpu_stats(task_t task, uint64_t gpu_ns_delta)
 {
-       return coal->ledger;
+       coalition_t coal;
+
+       assert(task != TASK_NULL);
+       if (gpu_ns_delta == 0)
+               return;
+
+       coal = task->coalition[COALITION_TYPE_RESOURCE];
+       assert(coal != COALITION_NULL);
+
+       coalition_lock(coal);
+       coal->r.gpu_time += gpu_ns_delta;
+       coalition_unlock(coal);
 }
 
-/*
- * This is the function to use when you already hold an activation on the
- * coalition, and want to extend it to a second activation owned by a new
- * object, like when a task in the coalition calls fork(). This is analogous
- * to taking a second reference when you already hold one.
- * See also coalition_find_and_activate_by_id.
- */
-kern_return_t
-coalition_extend_active(coalition_t coal)
+uint32_t task_coalition_adjust_focal_count(task_t task, int count)
 {
-       coalition_lock(coal);
+       coalition_t coal;
+       uint32_t ret;
 
-       if (coal->reaped) {
-               panic("cannot make a reaped coalition active again");
-       }
+       /*
+        * For now: only use the resource coalition. Perhaps in the
+        * future we may combine all coalition types, or even make
+        * a special coalition type just for this.
+        */
+       coal = task->coalition[COALITION_TYPE_RESOURCE];
+       assert(coal != COALITION_NULL);
 
-       if (coal->terminated) {
-               coalition_unlock(coal);
-               return KERN_TERMINATED;
-       }
+       ret = hw_atomic_add(&coal->focal_task_count, count);
 
-       assert(coal->active_count > 0);
-       coal->active_count++;
+       /* catch underflow */
+       assert(ret != UINT32_MAX);
+       return ret;
+}
+
+uint32_t task_coalition_focal_count(task_t task)
+{
+       coalition_t coal;
+       coal = task->coalition[COALITION_TYPE_RESOURCE];
+       assert(coal != COALITION_NULL);
+
+       return coal->focal_task_count;
+}
+
+uint32_t task_coalition_adjust_nonfocal_count(task_t task, int count)
+{
+       coalition_t coal;
+       uint32_t ret;
+
+       /*
+        * For now: only use the resource coalition. Perhaps in the
+        * future we may combine all coalition types, or even make
+        * a special coalition type just for this.
+        */
+       coal = task->coalition[COALITION_TYPE_RESOURCE];
+       assert(coal != COALITION_NULL);
+
+       ret = hw_atomic_add(&coal->nonfocal_task_count, count);
+
+       /* catch underflow */
+       assert(ret != UINT32_MAX);
+       return ret;
+}
+
+uint32_t task_coalition_nonfocal_count(task_t task)
+{
+       coalition_t coal;
+       coal = task->coalition[COALITION_TYPE_RESOURCE];
+       assert(coal != COALITION_NULL);
+
+       return coal->nonfocal_task_count;
+}
+
+void coalition_for_each_task(coalition_t coal, void *ctx,
+                            void (*callback)(coalition_t, void *, task_t))
+{
+       assert(coal != COALITION_NULL);
+
+       coal_dbg("iterating tasks in coalition %p id:%llu type:%s, active_count:%u",
+                coal, coal->id, coal_type_str(coal->type), coal->active_count);
+
+       coalition_lock(coal);
+
+       coal_call(coal, iterate_tasks, ctx, callback);
 
        coalition_unlock(coal);
-       return KERN_SUCCESS;
 }
 
+
 void
 coalition_remove_active(coalition_t coal)
 {
@@ -506,13 +1115,26 @@ coalition_remove_active(coalition_t coal)
                assert(!coal->notified);
 
                coal->notified = TRUE;
+#if defined(DEVELOPMENT) || defined(DEBUG)
+               do_notify = coal->should_notify;
+#else
                do_notify = TRUE;
+#endif
                notify_id = coal->id;
                notify_flags = 0;
        }
 
+#if COALITION_DEBUG
+       uint64_t cid = coal->id;
+       uint32_t rc = coal->ref_count;
+       int      ac = coal->active_count;
+       int      ct = coal->type;
+#endif
        coalition_unlock(coal);
 
+       coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u,%s",
+                cid, coal_type_str(ct), rc, ac, do_notify ? " NOTIFY" : " ");
+
        if (do_notify) {
                coalition_notify_user(notify_id, notify_flags);
        }
@@ -520,10 +1142,10 @@ coalition_remove_active(coalition_t coal)
 
 /* Used for kernel_task, launchd, launchd's early boot tasks... */
 kern_return_t
-coalition_default_adopt_task(task_t task)
+coalitions_adopt_init_task(task_t task)
 {
        kern_return_t kr;
-       kr = coalition_adopt_task(default_coalition, task);
+       kr = coalitions_adopt_task(init_coalition, task);
        if (kr != KERN_SUCCESS) {
                panic("failed to adopt task %p into default coalition: %d", task, kr);
        }
@@ -531,14 +1153,16 @@ coalition_default_adopt_task(task_t task)
 }
 
 /*
- * coalition_adopt_task
+ * coalition_adopt_task_internal
  * Condition: Coalition must be referenced and unlocked. Will fail if coalition
  * is already terminated.
  */
-kern_return_t
-coalition_adopt_task(coalition_t coal, task_t task)
+static kern_return_t
+coalition_adopt_task_internal(coalition_t coal, task_t task)
 {
-       if (task->coalition) {
+       kern_return_t kr;
+
+       if (task->coalition[coal->type]) {
                return KERN_ALREADY_IN_SET;
        }
 
@@ -549,72 +1173,152 @@ coalition_adopt_task(coalition_t coal, task_t task)
                return KERN_TERMINATED;
        }
 
+       kr = coal_call(coal, adopt_task, task);
+       if (kr != KERN_SUCCESS)
+               goto out_unlock;
+
        coal->active_count++;
 
        coal->ref_count++;
-       task->coalition = coal;
-
-       queue_enter(&coal->tasks, task, task_t, coalition_tasks);
-       coal->task_count++;
-
-       if(coal->task_count < coal->dead_task_count) {
-               panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id);
-       }
 
-       /* If moving from 0->1 active tasks */
-       if (coal->task_count - coal->dead_task_count == 1) {
-               coal->last_became_nonempty_time = mach_absolute_time();
-       }
+       task->coalition[coal->type] = coal;
 
+out_unlock:
 #if COALITION_DEBUG
+       (void)coal; /* need expression after label */
+       uint64_t cid = coal->id;
        uint32_t rc = coal->ref_count;
+       uint32_t ct = coal->type;
 #endif
-
        coalition_unlock(coal);
 
+       coal_dbg("task:%d, id:%llu type:%s ref_count:%u, kr=%d",
+                task_pid(task), cid, coal_type_str(ct), rc, kr);
+       return kr;
+}
+
+static kern_return_t
+coalition_remove_task_internal(task_t task, int type)
+{
+       kern_return_t kr;
+
+       coalition_t coal = task->coalition[type];
+
+       if (!coal)
+               return KERN_SUCCESS;
+
+       assert(coal->type == (uint32_t)type);
+
+       coalition_lock(coal);
+
+       kr = coal_call(coal, remove_task, task);
+
 #if COALITION_DEBUG
-       if (rc) {
-               printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc);
-       }
+       uint64_t cid = coal->id;
+       uint32_t rc = coal->ref_count;
+       int      ac = coal->active_count;
+       int      ct = coal->type;
 #endif
-       return KERN_SUCCESS;
+       coalition_unlock(coal);
+
+       coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u, kr=%d",
+                cid, coal_type_str(ct), rc, ac, kr);
+
+       coalition_remove_active(coal);
+
+       return kr;
 }
 
 /*
- * coalition_remove_task
- * Condition: task must be referenced and UNLOCKED; task's coalition must be UNLOCKED
+ * coalitions_adopt_task
+ * Condition: All coalitions must be referenced and unlocked.
+ * Will fail if any coalition is already terminated.
  */
 kern_return_t
-coalition_remove_task(task_t task)
+coalitions_adopt_task(coalition_t *coals, task_t task)
 {
-       coalition_t coal = task->coalition;
-       assert(coal);
+       int i;
+       kern_return_t kr;
 
-       coalition_lock(coal);
+       if (!coals || coals[COALITION_TYPE_RESOURCE] == COALITION_NULL)
+               return KERN_INVALID_ARGUMENT;
+
+       /* verify that the incoming coalitions are what they say they are */
+       for (i = 0; i < COALITION_NUM_TYPES; i++)
+               if (coals[i] && coals[i]->type != (uint32_t)i)
+                       return KERN_INVALID_ARGUMENT;
+
+       for (i = 0; i < COALITION_NUM_TYPES; i++) {
+               kr = KERN_SUCCESS;
+               if (coals[i])
+                       kr = coalition_adopt_task_internal(coals[i], task);
+               if (kr != KERN_SUCCESS) {
+                       /* dis-associate any coalitions that just adopted this task */
+                       while (--i >= 0) {
+                               if (task->coalition[i])
+                                       coalition_remove_task_internal(task, i);
+                       }
+                       break;
+               }
+       }
+       return kr;
+}
 
-       queue_remove(&coal->tasks, task, task_t, coalition_tasks);
-       coal->dead_task_count++;
+/*
+ * coalitions_remove_task
+ * Condition: task must be referenced and UNLOCKED; all task's coalitions must be UNLOCKED
+ */
+kern_return_t
+coalitions_remove_task(task_t task)
+{
+       kern_return_t kr;
+       int i;
 
-       if(coal->task_count < coal->dead_task_count) {
-               panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id);
+       for (i = 0; i < COALITION_NUM_TYPES; i++) {
+               kr = coalition_remove_task_internal(task, i);
+               assert(kr == KERN_SUCCESS);
        }
 
-       /* If moving from 1->0 active tasks */
-       if (coal->task_count - coal->dead_task_count == 0) {
-               uint64_t last_time_nonempty = mach_absolute_time() - coal->last_became_nonempty_time;
-               coal->last_became_nonempty_time = 0;
-               coal->time_nonempty += last_time_nonempty;
+       return kr;
+}
+
+/*
+ * task_release_coalitions
+ * helper function to release references to all coalitions in which
+ * 'task' is a member.
+ */
+void
+task_release_coalitions(task_t task)
+{
+       int i;
+       for (i = 0; i < COALITION_NUM_TYPES; i++) {
+               if (task->coalition[i])
+                       coalition_release(task->coalition[i]);
        }
+}
 
-       ledger_rollup(coal->ledger, task->ledger);
-       coal->bytesread += task->task_io_stats->disk_reads.size;
-       coal->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
-       coal->gpu_time += task_gpu_utilisation(task);
+/*
+ * coalitions_set_roles
+ * for each type of coalition, if the task is a member of a coalition of
+ * that type (given in the coalitions parameter) then set the role of
+ * the task within that that coalition.
+ */
+kern_return_t coalitions_set_roles(coalition_t coalitions[COALITION_NUM_TYPES],
+                                  task_t task, int roles[COALITION_NUM_TYPES])
+{
+       kern_return_t kr = KERN_SUCCESS;
+       int i;
 
-       coalition_unlock(coal);
+       for (i = 0; i < COALITION_NUM_TYPES; i++) {
+               if (!coalitions[i])
+                       continue;
+               coalition_lock(coalitions[i]);
+               kr = coal_call(coalitions[i], set_taskrole, task, roles[i]);
+               coalition_unlock(coalitions[i]);
+               assert(kr == KERN_SUCCESS);
+       }
 
-       coalition_remove_active(coal);
-       return KERN_SUCCESS;
+       return kr;
 }
 
 /*
@@ -624,7 +1328,9 @@ coalition_remove_task(task_t task)
 kern_return_t
 coalition_request_terminate_internal(coalition_t coal)
 {
-       if (coal == default_coalition) {
+       assert(coal->type >= 0 && coal->type <= COALITION_TYPE_MAX);
+
+       if (coal == init_coalition[coal->type]) {
                return KERN_DEFAULT_SET;
        }
 
@@ -649,8 +1355,9 @@ coalition_request_terminate_internal(coalition_t coal)
        if (coal->active_count == 0) {
                /*
                 * We only notify once, when active_count reaches zero.
-                * We just decremented, so if it reached zero, we mustn't have
-                * notified already.
+                * We just set termrequested to zero. If the active count
+                * was already at zero (tasks died before we could request
+                * a termination notification), we should notify.
                 */
                assert(!coal->terminated);
                coal->terminated = TRUE;
@@ -658,7 +1365,11 @@ coalition_request_terminate_internal(coalition_t coal)
                assert(!coal->notified);
 
                coal->notified = TRUE;
+#if defined(DEVELOPMENT) || defined(DEBUG)
+               do_notify = coal->should_notify;
+#else
                do_notify = TRUE;
+#endif
                note_id = coal->id;
                note_flags = 0;
        }
@@ -679,7 +1390,9 @@ coalition_request_terminate_internal(coalition_t coal)
 kern_return_t
 coalition_reap_internal(coalition_t coal)
 {
-       if (coal == default_coalition) {
+       assert(coal->type <= COALITION_TYPE_MAX);
+
+       if (coal == init_coalition[coal->type]) {
                return KERN_DEFAULT_SET;
        }
 
@@ -707,7 +1420,7 @@ coalition_reap_internal(coalition_t coal)
 
        lck_mtx_lock(&coalitions_list_lock);
        coalition_count--;
-       queue_remove(&coalitions, coal, coalition_t, coalitions);
+       remqueue(&coal->coalitions);
        lck_mtx_unlock(&coalitions_list_lock);
 
        /* Release the list's reference and launchd's reference. */
@@ -717,16 +1430,43 @@ coalition_reap_internal(coalition_t coal)
        return KERN_SUCCESS;
 }
 
+#if defined(DEVELOPMENT) || defined(DEBUG)
+int coalition_should_notify(coalition_t coal)
+{
+       int should;
+       if (!coal)
+               return -1;
+       coalition_lock(coal);
+       should = coal->should_notify;
+       coalition_unlock(coal);
+
+       return should;
+}
+
+void coalition_set_notify(coalition_t coal, int notify)
+{
+       if (!coal)
+               return;
+       coalition_lock(coal);
+       coal->should_notify = !!notify;
+       coalition_unlock(coal);
+}
+#endif
+
 void
-coalition_init(void)
+coalitions_init(void)
 {
+       kern_return_t kr;
+       int i;
+       const struct coalition_type *ctype;
+
        coalition_zone = zinit(
                        sizeof(struct coalition),
                        CONFIG_COALITION_MAX * sizeof(struct coalition),
                        COALITION_CHUNK * sizeof(struct coalition),
                        "coalitions");
        zone_change(coalition_zone, Z_NOENCRYPT, TRUE);
-       queue_init(&coalitions);
+       queue_head_init(coalitions_q);
 
        if (!PE_parse_boot_argn("unrestrict_coalition_syscalls", &unrestrict_coalition_syscalls,
                sizeof (unrestrict_coalition_syscalls))) {
@@ -740,54 +1480,402 @@ coalition_init(void)
 
        init_task_ledgers();
 
-       kern_return_t kr = coalition_create_internal(&default_coalition, TRUE);
-       if (kr != KERN_SUCCESS) {
-               panic("%s: could not create default coalition: %d", __func__, kr);
+       for (i = 0, ctype = &s_coalition_types[0]; i < COALITION_NUM_TYPES; ctype++, i++) {
+               /* verify the entry in the global coalition types array */
+               if (ctype->type != i ||
+                   !ctype->init ||
+                   !ctype->dealloc ||
+                   !ctype->adopt_task ||
+                   !ctype->remove_task) {
+                       panic("%s: Malformed coalition type %s(%d) in slot for type:%s(%d)",
+                             __func__, coal_type_str(ctype->type), ctype->type, coal_type_str(i), i);
+               }
+               if (!ctype->has_default)
+                       continue;
+               kr = coalition_create_internal(ctype->type, TRUE, &init_coalition[ctype->type]);
+               if (kr != KERN_SUCCESS)
+                       panic("%s: could not create init %s coalition: kr:%d",
+                             __func__, coal_type_str(i), kr);
        }
+
        /* "Leak" our reference to the global object */
 }
 
-/* coalition focal tasks */
-uint32_t coalition_adjust_focal_task_count(coalition_t coal, int count)
+/*
+ * BSD Kernel interface functions
+ *
+ */
+static void coalition_fill_procinfo(struct coalition *coal,
+                                   struct procinfo_coalinfo *coalinfo)
 {
-       return hw_atomic_add(&coal->focal_tasks_count, count);
+       coalinfo->coalition_id = coal->id;
+       coalinfo->coalition_type = coal->type;
+       coalinfo->coalition_tasks = coalition_get_task_count(coal);
 }
 
-uint32_t coalition_focal_task_count(coalition_t coal)
+
+int coalitions_get_list(int type, struct procinfo_coalinfo *coal_list, int list_sz)
 {
-       return coal->focal_tasks_count;
+       int ncoals = 0;
+       struct coalition *coal;
+
+       lck_mtx_lock(&coalitions_list_lock);
+       qe_foreach_element(coal, &coalitions_q, coalitions) {
+               if (!coal->reaped && (type < 0 || type == (int)coal->type)) {
+                       if (coal_list && ncoals < list_sz)
+                               coalition_fill_procinfo(coal, &coal_list[ncoals]);
+                       ++ncoals;
+               }
+       }
+       lck_mtx_unlock(&coalitions_list_lock);
+
+       return ncoals;
 }
 
-uint32_t coalition_adjust_non_focal_task_count(coalition_t coal, int count)
+/*
+ * Jetsam coalition interface
+ *
+ */
+boolean_t coalition_is_leader(task_t task, int coal_type, coalition_t *coal)
 {
-       return hw_atomic_add(&coal->non_focal_tasks_count, count);
+       coalition_t c;
+       boolean_t ret;
+
+       if (coal) /* handle the error cases gracefully */
+               *coal = COALITION_NULL;
+
+       if (!task)
+               return FALSE;
+
+       if (coal_type > COALITION_TYPE_MAX)
+               return FALSE;
+
+       c = task->coalition[coal_type];
+       if (!c)
+               return FALSE;
+
+       assert((int)c->type == coal_type);
+
+       coalition_lock(c);
+
+       if (coal)
+               *coal = c;
+
+       ret = FALSE;
+       if (c->type == COALITION_TYPE_JETSAM && c->j.leader == task)
+               ret = TRUE;
+
+       coalition_unlock(c);
+
+       return ret;
 }
 
-uint32_t coalition_non_focal_task_count(coalition_t coal)
+
+int coalition_get_task_count(coalition_t coal)
 {
-       return coal->non_focal_tasks_count;
+       int ntasks = 0;
+       struct queue_entry *qe;
+       if (!coal)
+               return 0;
+
+       coalition_lock(coal);
+       switch (coal->type) {
+       case COALITION_TYPE_RESOURCE:
+               qe_foreach(qe, &coal->r.tasks)
+                       ntasks++;
+               break;
+       case COALITION_TYPE_JETSAM:
+               if (coal->j.leader)
+                       ntasks++;
+               qe_foreach(qe, &coal->j.other)
+                       ntasks++;
+               qe_foreach(qe, &coal->j.extensions)
+                       ntasks++;
+               qe_foreach(qe, &coal->j.services)
+                       ntasks++;
+               break;
+       default:
+               break;
+       }
+       coalition_unlock(coal);
+
+       return ntasks;
 }
 
-/* Call sfi_reevaluate() for every thread in the coalition */
-void coalition_sfi_reevaluate(coalition_t coal, task_t updated_task) {
+
+static uint64_t i_get_list_footprint(queue_t list, int type, int *ntasks)
+{
        task_t task;
-       thread_t thread;
+       uint64_t bytes = 0;
 
-       coalition_lock(coal);
+       qe_foreach_element(task, list, task_coalition[type]) {
+               bytes += get_task_phys_footprint(task);
+               coal_dbg("    [%d] task_pid:%d, type:%d, footprint:%lld",
+                        *ntasks, task_pid(task), type, bytes);
+               *ntasks += 1;
+       }
 
-       queue_iterate(&coal->tasks, task, task_t, coalition_tasks) {
+       return bytes;
+}
 
-               /* Skip the task we're doing this on behalf of - it's already updated */
-               if (task == updated_task)
-                       continue;
+uint64_t coalition_get_page_count(coalition_t coal, int *ntasks)
+{
+       uint64_t bytes = 0;
+       int num_tasks = 0;
+
+       if (ntasks)
+               *ntasks = 0;
+       if (!coal)
+               return bytes;
 
-               task_lock(task);
+       coalition_lock(coal);
 
-               queue_iterate(&task->threads, thread, thread_t, task_threads) {
-                               sfi_reevaluate(thread);
+       switch (coal->type) {
+       case COALITION_TYPE_RESOURCE:
+               bytes += i_get_list_footprint(&coal->r.tasks, COALITION_TYPE_RESOURCE, &num_tasks);
+               break;
+       case COALITION_TYPE_JETSAM:
+               if (coal->j.leader) {
+                       bytes += get_task_phys_footprint(coal->j.leader);
+                       num_tasks = 1;
                }
-               task_unlock(task);
+               bytes += i_get_list_footprint(&coal->j.extensions, COALITION_TYPE_JETSAM, &num_tasks);
+               bytes += i_get_list_footprint(&coal->j.services, COALITION_TYPE_JETSAM, &num_tasks);
+               bytes += i_get_list_footprint(&coal->j.other, COALITION_TYPE_JETSAM, &num_tasks);
+               break;
+       default:
+               break;
        }
+
        coalition_unlock(coal);
+
+       if (ntasks)
+               *ntasks = num_tasks;
+
+       return bytes / PAGE_SIZE_64;
 }
 
+struct coal_sort_s {
+       int pid;
+       int usr_order;
+       uint64_t bytes;
+};
+
+/*
+ * return < 0 for a < b
+ *          0 for a == b
+ *        > 0 for a > b
+ */
+typedef int (*cmpfunc_t)(const void *a, const void *b);
+
+extern void
+qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
+
+static int dflt_cmp(const void *a, const void *b)
+{
+       const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
+       const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
+
+       /*
+        * if both A and B are equal, use a memory descending sort
+        */
+       if (csA->usr_order == csB->usr_order)
+               return (int)((int64_t)csB->bytes - (int64_t)csA->bytes);
+
+       /* otherwise, return the relationship between user specified orders */
+       return (csA->usr_order - csB->usr_order);
+}
+
+static int mem_asc_cmp(const void *a, const void *b)
+{
+       const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
+       const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
+
+       return (int)((int64_t)csA->bytes - (int64_t)csB->bytes);
+}
+
+static int mem_dec_cmp(const void *a, const void *b)
+{
+       const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
+       const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
+
+       return (int)((int64_t)csB->bytes - (int64_t)csA->bytes);
+}
+
+static int usr_asc_cmp(const void *a, const void *b)
+{
+       const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
+       const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
+
+       return (csA->usr_order - csB->usr_order);
+}
+
+static int usr_dec_cmp(const void *a, const void *b)
+{
+       const struct coal_sort_s *csA = (const struct coal_sort_s *)a;
+       const struct coal_sort_s *csB = (const struct coal_sort_s *)b;
+
+       return (csB->usr_order - csA->usr_order);
+}
+
+/* avoid dynamic allocation in this path */
+#define MAX_SORTED_PIDS  80
+
+static int coalition_get_sort_list(coalition_t coal, int sort_order, queue_t list,
+                                  struct coal_sort_s *sort_array, int array_sz)
+{
+       int ntasks = 0;
+       task_t task;
+
+       assert(sort_array != NULL);
+
+       if (array_sz <= 0)
+               return 0;
+
+       if (!list) {
+               /*
+                * this function will only be called with a NULL
+                * list for JETSAM-type coalitions, and is intended
+                * to investigate the leader process
+                */
+               if (coal->type != COALITION_TYPE_JETSAM ||
+                   coal->j.leader == TASK_NULL)
+                       return 0;
+               sort_array[0].pid = task_pid(coal->j.leader);
+               switch (sort_order) {
+               case COALITION_SORT_DEFAULT:
+                       sort_array[0].usr_order = 0;
+                       /* fall-through */
+               case COALITION_SORT_MEM_ASC:
+               case COALITION_SORT_MEM_DEC:
+                       sort_array[0].bytes = get_task_phys_footprint(coal->j.leader);
+                       break;
+               case COALITION_SORT_USER_ASC:
+               case COALITION_SORT_USER_DEC:
+                       sort_array[0].usr_order = 0;
+                       break;
+               default:
+                       break;
+               }
+               return 1;
+       }
+
+       qe_foreach_element(task, list, task_coalition[coal->type]) {
+               if (ntasks >= array_sz) {
+                       printf("WARNING: more than %d pids in coalition %llu\n",
+                              MAX_SORTED_PIDS, coal->id);
+                       break;
+               }
+
+               sort_array[ntasks].pid = task_pid(task);
+
+               switch (sort_order) {
+               case COALITION_SORT_DEFAULT:
+                       sort_array[ntasks].usr_order = 0;
+                       /* fall-through */
+               case COALITION_SORT_MEM_ASC:
+               case COALITION_SORT_MEM_DEC:
+                       sort_array[ntasks].bytes = get_task_phys_footprint(task);
+                       break;
+               case COALITION_SORT_USER_ASC:
+               case COALITION_SORT_USER_DEC:
+                       sort_array[ntasks].usr_order = 0;
+                       break;
+               default:
+                       break;
+               }
+
+               ntasks++;
+       }
+
+       return ntasks;
+}
+
+int coalition_get_pid_list(coalition_t coal, uint32_t rolemask, int sort_order,
+                          int *pid_list, int list_sz)
+{
+       struct i_jetsam_coalition *cj;
+       int ntasks = 0;
+       cmpfunc_t cmp_func = NULL;
+       struct coal_sort_s sort_array[MAX_SORTED_PIDS] = { {0,0,0} }; /* keep to < 2k */
+
+       if (!coal ||
+           !(rolemask & COALITION_ROLEMASK_ALLROLES) ||
+           !pid_list || list_sz < 1) {
+               coal_dbg("Invalid parameters: coal:%p, type:%d, rolemask:0x%x, "
+                        "pid_list:%p, list_sz:%d", coal, coal ? coal->type : -1,
+                        rolemask, pid_list, list_sz);
+               return -EINVAL;
+       }
+
+       switch (sort_order) {
+       case COALITION_SORT_NOSORT:
+               cmp_func = NULL;
+               break;
+       case COALITION_SORT_DEFAULT:
+               cmp_func = dflt_cmp;
+               break;
+       case COALITION_SORT_MEM_ASC:
+               cmp_func = mem_asc_cmp;
+               break;
+       case COALITION_SORT_MEM_DEC:
+               cmp_func = mem_dec_cmp;
+               break;
+       case COALITION_SORT_USER_ASC:
+               cmp_func = usr_asc_cmp;
+               break;
+       case COALITION_SORT_USER_DEC:
+               cmp_func = usr_dec_cmp;
+               break;
+       default:
+               return -ENOTSUP;
+       }
+
+       coalition_lock(coal);
+
+       if (coal->type == COALITION_TYPE_RESOURCE) {
+               ntasks += coalition_get_sort_list(coal, sort_order, &coal->r.tasks,
+                                                 sort_array, MAX_SORTED_PIDS);
+               goto unlock_coal;
+       }
+
+       cj = &coal->j;
+
+       if (rolemask & COALITION_ROLEMASK_UNDEF)
+               ntasks += coalition_get_sort_list(coal, sort_order, &cj->other,
+                                                 sort_array + ntasks,
+                                                 MAX_SORTED_PIDS - ntasks);
+
+       if (rolemask & COALITION_ROLEMASK_XPC)
+               ntasks += coalition_get_sort_list(coal, sort_order, &cj->services,
+                                                 sort_array + ntasks,
+                                                 MAX_SORTED_PIDS - ntasks);
+
+       if (rolemask & COALITION_ROLEMASK_EXT)
+               ntasks += coalition_get_sort_list(coal, sort_order, &cj->extensions,
+                                                 sort_array + ntasks,
+                                                 MAX_SORTED_PIDS - ntasks);
+
+       if (rolemask & COALITION_ROLEMASK_LEADER)
+               ntasks += coalition_get_sort_list(coal, sort_order, NULL,
+                                                 sort_array + ntasks,
+                                                 MAX_SORTED_PIDS - ntasks);
+
+unlock_coal:
+       coalition_unlock(coal);
+
+       /* sort based on the chosen criterion (no sense sorting 1 item) */
+       if (cmp_func && ntasks > 1)
+               qsort(sort_array, ntasks, sizeof(struct coal_sort_s), cmp_func);
+
+       for (int i = 0; i < ntasks; i++) {
+               if (i >= list_sz)
+                       break;
+               coal_dbg(" [%d] PID:%d, footprint:%lld, usr_order:%d",
+                        i, sort_array[i].pid, sort_array[i].bytes,
+                        sort_array[i].usr_order);
+               pid_list[i] = sort_array[i].pid;
+       }
+
+       return ntasks;
+}
index 1c996968849d5e00993771317919eaf39c6af238..0bd9d2d88c1207255e5d77c41a9d2fc9c912028b 100644 (file)
 #ifndef _KERN_COALITION_H_
 #define _KERN_COALITION_H_
 
+/* only kernel-private interfaces */
 #ifdef XNU_KERNEL_PRIVATE
+#include <mach/coalition.h>
 
-void coalition_init(void);
+#if CONFIG_COALITIONS
+
+void coalitions_init(void);
 
 /* These may return:
  * KERN_ALREADY_IN_SET task is already in a coalition (maybe this one, maybe a different one)
  * KERN_TERMINATED     coalition is already terminated (so it may not adopt any more tasks)
  */
-kern_return_t coalition_adopt_task(coalition_t coal, task_t task);
-kern_return_t coalition_default_adopt_task(task_t task);
+kern_return_t coalitions_adopt_task(coalition_t *coaltions, task_t task);
+kern_return_t coalitions_adopt_init_task(task_t task);
 
 /* Currently, no error conditions. If task is not already in a coalition,
  * KERN_SUCCESS is returned because removing it did not fail.
  */
-kern_return_t coalition_remove_task(task_t task);
+kern_return_t coalitions_remove_task(task_t task);
+void          task_release_coalitions(task_t task);
+
+/*
+ *
+ */
+kern_return_t coalitions_set_roles(coalition_t coalitions[COALITION_NUM_TYPES],
+                                  task_t task, int roles[COALITION_NUM_TYPES]);
 
 uint64_t coalition_id(coalition_t coal);
-uint64_t task_coalition_id(task_t task);
+void     task_coalition_ids(task_t task, uint64_t ids[COALITION_NUM_TYPES]);
+void     task_coalition_roles(task_t task, int roles[COALITION_NUM_TYPES]);
+int      coalition_type(coalition_t coal);
+
+void     task_coalition_update_gpu_stats(task_t task, uint64_t gpu_ns_delta);
+uint32_t task_coalition_adjust_focal_count(task_t task, int count);
+uint32_t task_coalition_focal_count(task_t task);
+uint32_t task_coalition_adjust_nonfocal_count(task_t task, int count);
+uint32_t task_coalition_nonfocal_count(task_t task);
+
+void coalition_for_each_task(coalition_t coal, void *ctx,
+                            void (*callback)(coalition_t, void *, task_t));
 
 /* Returns with a reference, or COALITION_NULL.
  * There is no coalition with id 0.
@@ -58,13 +80,6 @@ coalition_t coalition_find_by_id(uint64_t coal_id);
  */
 coalition_t coalition_find_and_activate_by_id(uint64_t coal_id);
 
-/* This may return:
- * KERN_TERMINATED     coalition is terminated
- * This will panic if the coalition is already reaped, which implies
- * that it wasn't active.
- */
-kern_return_t coalition_extend_active(coalition_t coal);
-
 void coalition_remove_active(coalition_t coal);
 
 void coalition_release(coalition_t coal);
@@ -94,35 +109,53 @@ kern_return_t coalition_request_terminate_internal(coalition_t coal);
  * KERN_RESOURCE_SHORTAGE      Unable to allocate kernel resources for a
  *                             new coalition.
  */
-kern_return_t coalition_create_internal(coalition_t *out, boolean_t privileged);
+kern_return_t coalition_create_internal(int type, boolean_t privileged, coalition_t *out);
 
 boolean_t coalition_is_privileged(coalition_t coal);
-boolean_t task_is_in_privileged_coalition(task_t task);
-
-/* This struct is also defined in bsd/sys/coalition.h. Keep in sync. */
-struct coalition_resource_usage {
-       uint64_t tasks_started;
-       uint64_t tasks_exited;
-       uint64_t time_nonempty;
-       uint64_t cpu_time;
-       uint64_t interrupt_wakeups;
-       uint64_t platform_idle_wakeups;
-       uint64_t bytesread;
-       uint64_t byteswritten;
-       uint64_t gpu_time;
-};
+boolean_t task_is_in_privileged_coalition(task_t task, int type);
 
 kern_return_t coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_usage *cru_out);
 
-ledger_t coalition_get_ledger(coalition_t coal);
-
-uint32_t coalition_adjust_focal_task_count(coalition_t coal, int count);
-uint32_t coalition_focal_task_count(coalition_t coal);
-uint32_t coalition_adjust_non_focal_task_count(coalition_t coal, int count);
-uint32_t coalition_non_focal_task_count(coalition_t coal);
-
-void coalition_sfi_reevaluate(coalition_t coal, task_t updated_task);
-
+/*
+ * development/debug interfaces
+ */
+#if defined(DEVELOPMENT) || defined(DEBUG)
+int coalition_should_notify(coalition_t coal);
+void coalition_set_notify(coalition_t coal, int notify);
+#endif
+
+#else /* !CONFIG_COALITIONS */
+
+static inline void task_coalition_update_gpu_stats(__unused task_t task,
+                                                  __unused uint64_t gpu_ns_delta)
+{
+       return;
+}
+
+static inline uint32_t task_coalition_adjust_focal_count(__unused task_t task,
+                                                        __unused int count)
+{
+       return 0;
+}
+
+static inline uint32_t task_coalition_adjust_nonfocal_count(__unused task_t task,
+                                                           __unused int count)
+{
+       return 0;
+}
+
+static inline uint32_t task_coalition_focal_count(__unused task_t task)
+{
+       return 0;
+}
+
+static inline void coalition_for_each_task(__unused coalition_t coal,
+                                          __unused void *ctx,
+                                          __unused void (*callback)(coalition_t, void *, task_t))
+{
+       return;
+}
+
+#endif /* CONFIG_COALITIONS */
 #endif /* XNU_KERNEL_PRIVATE */
-
 #endif /* _KERN_COALITION_H */
index 6f527a66fdb67e6c44fdbb5ff665e28b3d651177..2c79aacdf7eeb9f012676b4b9829a6e15fdfbaf1 100644 (file)
@@ -67,6 +67,7 @@
 #include <kern/clock.h>
 #include <kern/telemetry.h>
 #include <kern/ecc.h>
+#include <kern/kern_cdata.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 #include <stdarg.h>
@@ -87,6 +88,7 @@
 #include <libkern/OSAtomic.h>
 #include <libkern/kernel_mach_header.h>
 #include <uuid/uuid.h>
+#include <mach_debug/zone_info.h>
 
 #if (defined(__arm64__) || defined(NAND_PANIC_DEVICE)) && !defined(LEGACY_PANIC_LOGS)
 #include <pexpert/pexpert.h> /* For gPanicBase */
@@ -163,6 +165,7 @@ Assert(
        int saved_return_on_panic;
 
        if (!mach_assert) {
+               kprintf("%s:%d non-fatal Assertion: %s", file, line, expression);
                return;
        }
 
@@ -215,6 +218,10 @@ panic_init(void)
        simple_lock_init(&panic_lock, 0);
        panic_is_inited = 1;
        panic_caller = 0;
+
+       if (!PE_parse_boot_argn("assertions", &mach_assert, sizeof(mach_assert))) {
+               mach_assert = 1;
+       }
 }
 
 void
@@ -350,6 +357,7 @@ panic(const char *str, ...)
 {
        va_list listp;
        spl_t   s;
+       boolean_t       old_doprnt_hide_pointers = doprnt_hide_pointers;
 
 
        /* panic_caller is initialized to 0.  If set, don't change it */
@@ -357,6 +365,10 @@ panic(const char *str, ...)
                panic_caller = (unsigned long)(char *)__builtin_return_address(0);
        
        s = panic_prologue(str);
+
+       /* Never hide pointers from panic logs. */
+       doprnt_hide_pointers = FALSE;
+
        kdb_printf("panic(cpu %d caller 0x%lx): ", (unsigned) paniccpu, panic_caller);
        if (str) {
                va_start(listp, str);
@@ -370,6 +382,9 @@ panic(const char *str, ...)
         */
        panicwait = 0;
        Debugger("panic");
+
+       doprnt_hide_pointers = old_doprnt_hide_pointers;
+
        panic_epilogue(s);
 }
 
@@ -479,12 +494,13 @@ extern void *proc_name_address(void *p);
 
 static void
 panic_display_process_name(void) {
-       char proc_name[32] = "Unknown";
+       /* because of scoping issues len(p_comm) from proc_t is hard coded here */
+       char proc_name[17] = "Unknown";
        task_t ctask = 0;
        void *cbsd_info = 0;
 
        if (ml_nofault_copy((vm_offset_t)&current_thread()->task, (vm_offset_t) &ctask, sizeof(task_t)) == sizeof(task_t))
-               if(ml_nofault_copy((vm_offset_t)&ctask->bsd_info, (vm_offset_t)&cbsd_info, sizeof(&ctask->bsd_info)) == sizeof(&ctask->bsd_info))
+               if(ml_nofault_copy((vm_offset_t)&ctask->bsd_info, (vm_offset_t)&cbsd_info, sizeof(cbsd_info)) == sizeof(cbsd_info))
                        if (cbsd_info && (ml_nofault_copy((vm_offset_t) proc_name_address(cbsd_info), (vm_offset_t) &proc_name, sizeof(proc_name)) > 0))
                                proc_name[sizeof(proc_name) - 1] = '\0';
        kdb_printf("\nBSD process name corresponding to current thread: %s\n", proc_name);
@@ -579,6 +595,8 @@ extern long long alloc_ptepages_count;
 #endif
 
 extern boolean_t       panic_include_zprint;
+extern vm_offset_t     panic_kext_memory_info;
+extern vm_size_t       panic_kext_memory_size;
 
 __private_extern__ void panic_display_zprint()
 {
@@ -587,11 +605,12 @@ __private_extern__ void panic_display_zprint()
                unsigned int    i;
                struct zone     zone_copy;
 
+               kdb_printf("%-20s %10s %10s\n", "Zone Name", "Cur Size", "Free Size");
                if(first_zone!=NULL) {
                        if(ml_nofault_copy((vm_offset_t)first_zone, (vm_offset_t)&zone_copy, sizeof(struct zone)) == sizeof(struct zone)) {
                                for (i = 0; i < num_zones; i++) {
                                        if(zone_copy.cur_size > (1024*1024)) {
-                                               kdb_printf("%.20s:%lu\n",zone_copy.zone_name,(uintptr_t)zone_copy.cur_size);
+                                               kdb_printf("%-20s %10lu %10lu\n",zone_copy.zone_name, (uintptr_t)zone_copy.cur_size,(uintptr_t)(zone_copy.countfree * zone_copy.elem_size));
                                        }       
                                        
                                        if(zone_copy.next_zone == NULL) {
@@ -605,13 +624,22 @@ __private_extern__ void panic_display_zprint()
                        }
                }
 
-               kdb_printf("Kernel Stacks:%lu\n",(uintptr_t)(kernel_stack_size * stack_total));
+               kdb_printf("%-20s %10lu\n", "Kernel Stacks", (uintptr_t)(kernel_stack_size * stack_total));
 
 #if defined(__i386__) || defined (__x86_64__)
-               kdb_printf("PageTables:%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count));
+               kdb_printf("%-20s %10lu\n", "PageTables",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count));
 #endif
 
-               kdb_printf("Kalloc.Large:%lu\n",(uintptr_t)kalloc_large_total);
+               kdb_printf("%-20s %10lu\n", "Kalloc.Large", (uintptr_t)kalloc_large_total);
+               if (panic_kext_memory_info) {
+                       mach_memory_info_t *mem_info = (mach_memory_info_t *)panic_kext_memory_info;
+                       kdb_printf("\n%-5s %10s\n", "Kmod", "Size");
+                       for (i = 0; i < VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT; i++) {
+                               if (((mem_info[i].flags & VM_KERN_SITE_TYPE) == VM_KERN_SITE_KMOD) && (mem_info[i].size > (1024 * 1024))) {
+                                       kdb_printf("%-5lld %10lld\n", mem_info[i].site, mem_info[i].size);
+                               }
+                       }
+               }
        }
 }
 
@@ -676,9 +704,9 @@ void kdp_set_gateway_mac(void *);
 void kdp_set_interface(void *);
 void kdp_register_send_receive(void *, void *);
 void kdp_unregister_send_receive(void *, void *);
-void kdp_snapshot_preflight(int, void *, uint32_t, uint32_t);
+void kdp_snapshot_preflight(int, void *, uint32_t, uint32_t, kcdata_descriptor_t, boolean_t enable_faulting);
 int kdp_stack_snapshot_geterror(void);
-int kdp_stack_snapshot_bytes_traced(void);
+uint32_t kdp_stack_snapshot_bytes_traced(void);
 
 void *
 kdp_get_interface( void)
index 407d4b4f28f985133ed8a5b260795b5f80914078..14917ddc1f10a30bf74d79a8f5ea046a6afeb935 100644 (file)
@@ -32,6 +32,7 @@
 #include <sys/cdefs.h>
 #include <stdint.h>
 #include <uuid/uuid.h>
+#include <mach/boolean.h>
 
 #ifndef XNU_KERNEL_PRIVATE
 #include <TargetConditionals.h>
@@ -59,8 +60,11 @@ struct thread_snapshot {
        int32_t                 sched_pri;   /* scheduled (current) priority */
        int32_t                 sched_flags; /* scheduler flags */
        char                    ss_flags;
-       char                    ts_qos;
+       char                    ts_qos;      /* effective qos */
+       char                    ts_rqos;     /* requested qos */
+       char                    ts_rqos_override; /* requested qos override */
        char                    io_tier;
+       char                    _reserved[3]; /* pad for 4 byte alignement packing */
 
        /*
         * I/O Statistics
@@ -88,6 +92,28 @@ struct thread_snapshot {
 
 } __attribute__ ((packed));
 
+struct thread_snapshot_v2 {
+       uint64_t  ths_thread_id;
+       uint64_t  ths_wait_event;
+       uint64_t  ths_continuation;
+       uint64_t  ths_total_syscalls;
+       uint64_t  ths_voucher_identifier;
+       uint64_t  ths_dqserialnum;
+       uint64_t  ths_user_time;
+       uint64_t  ths_sys_time;
+       uint64_t  ths_ss_flags;
+       uint64_t  ths_last_run_time;
+       uint64_t  ths_last_made_runnable_time;
+       uint32_t  ths_state;
+       uint32_t  ths_sched_flags;
+       int16_t   ths_base_priority;
+       int16_t   ths_sched_priority;
+       uint8_t   ths_eqos;
+       uint8_t   ths_rqos;
+       uint8_t   ths_rqos_override;
+       uint8_t   ths_io_tier;
+} __attribute__ ((packed));
+
 struct task_snapshot {
        uint32_t                snapshot_magic;
        int32_t                 pid;
@@ -140,6 +166,49 @@ struct task_snapshot {
 
 } __attribute__ ((packed));
 
+struct io_stats_snapshot
+{
+       /*
+        * I/O Statistics
+        * XXX: These fields must be together.
+        */
+       uint64_t         ss_disk_reads_count;
+       uint64_t         ss_disk_reads_size;
+       uint64_t         ss_disk_writes_count;
+       uint64_t         ss_disk_writes_size;
+       uint64_t         ss_io_priority_count[STACKSHOT_IO_NUM_PRIORITIES];
+       uint64_t         ss_io_priority_size[STACKSHOT_IO_NUM_PRIORITIES];
+       uint64_t         ss_paging_count;
+       uint64_t         ss_paging_size;
+       uint64_t         ss_non_paging_count;
+       uint64_t         ss_non_paging_size;
+       uint64_t         ss_data_count;
+       uint64_t         ss_data_size;
+       uint64_t         ss_metadata_count;
+       uint64_t         ss_metadata_size;
+       /* XXX: I/O Statistics end */
+
+} __attribute__ ((packed));
+
+struct task_snapshot_v2 {
+       uint64_t  ts_unique_pid;
+       uint64_t  ts_ss_flags;
+       uint64_t  ts_user_time_in_terminated_threads;
+       uint64_t  ts_system_time_in_terminated_threads;
+       uint64_t  ts_p_start_sec;
+       uint64_t  ts_task_size;
+       uint64_t  ts_max_resident_size;
+       uint32_t  ts_suspend_count;
+       uint32_t  ts_faults;
+       uint32_t  ts_pageins;
+       uint32_t  ts_cow_faults;
+       uint32_t  ts_was_throttled;
+       uint32_t  ts_did_throttle;
+       uint32_t  ts_latency_qos;
+       int32_t   ts_pid;
+       char      ts_p_comm[32];
+} __attribute__ ((packed));
+
 struct micro_snapshot {
        uint32_t                snapshot_magic;
        uint32_t                ms_cpu;  /* cpu number this snapshot was recorded on */
@@ -162,7 +231,7 @@ struct mem_and_io_snapshot {
        uint32_t        compressions;
        uint32_t        decompressions;
        uint32_t        compressor_size;
-       int                     busy_buffer_count;
+       int             busy_buffer_count;
        uint32_t        pages_wanted;
        uint32_t        pages_reclaimed;
        uint8_t         pages_wanted_reclaimed_valid; // did mach_vm_pressure_monitor succeed?
@@ -219,29 +288,33 @@ enum generic_snapshot_flags {
        kKernel64_p             = 0x2
 };
 
- enum task_snapshot_flags {
-       kTaskRsrcFlagged        = 0x4,   // In the EXC_RESOURCE danger zone?
-       kTerminatedSnapshot     = 0x8,
-       kPidSuspended           = 0x10,  // true for suspended task     
-       kFrozen                         = 0x20,  // true for hibernated task (along with pidsuspended)
-       kTaskDarwinBG           = 0x40,
-       kTaskExtDarwinBG        = 0x80,
-       kTaskVisVisible         = 0x100,
-       kTaskVisNonvisible      = 0x200,
-       kTaskIsForeground       = 0x400,
-       kTaskIsBoosted          = 0x800,
-       kTaskIsSuppressed       = 0x1000,
-       kTaskIsTimerThrottled   = 0x2000,  /* deprecated */
-       kTaskIsImpDonor         = 0x4000,
-       kTaskIsLiveImpDonor = 0x8000
- };
+enum task_snapshot_flags {
+       kTaskRsrcFlagged      = 0x4, // In the EXC_RESOURCE danger zone?
+       kTerminatedSnapshot   = 0x8,
+       kPidSuspended         = 0x10, // true for suspended task
+       kFrozen               = 0x20, // true for hibernated task (along with pidsuspended)
+       kTaskDarwinBG         = 0x40,
+       kTaskExtDarwinBG      = 0x80,
+       kTaskVisVisible       = 0x100,
+       kTaskVisNonvisible    = 0x200,
+       kTaskIsForeground     = 0x400,
+       kTaskIsBoosted        = 0x800,
+       kTaskIsSuppressed     = 0x1000,
+       kTaskIsTimerThrottled = 0x2000, /* deprecated */
+       kTaskIsImpDonor       = 0x4000,
+       kTaskIsLiveImpDonor   = 0x8000
+};
 
 enum thread_snapshot_flags {
-       kHasDispatchSerial      = 0x4,
-       kStacksPCOnly           = 0x8,    /* Stack traces have no frame pointers. */
-       kThreadDarwinBG         = 0x10,   /* Thread is darwinbg */
-       kThreadIOPassive        = 0x20,   /* Thread uses passive IO */
-       kThreadSuspended        = 0x40    /* Thread is supsended */
+       kHasDispatchSerial = 0x4,
+       kStacksPCOnly      = 0x8,  /* Stack traces have no frame pointers. */
+       kThreadDarwinBG    = 0x10, /* Thread is darwinbg */
+       kThreadIOPassive   = 0x20, /* Thread uses passive IO */
+       kThreadSuspended   = 0x40, /* Thread is suspended */
+       kThreadTruncatedBT = 0x80, /* Unmapped pages caused truncated backtrace */
+       kGlobalForcedIdle  = 0x100, /* Thread performs global forced idle */
+       kThreadDecompressedBT = 0x200,   /* Some thread stack pages were decompressed as part of BT */
+       kThreadFaultedBT = 0x400   /* Some thread stack pages were faulted in as part of BT */
 };
 
 #define VM_PRESSURE_TIME_WINDOW 5 /* seconds */
@@ -260,9 +333,36 @@ enum {
        STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS          = 0x400,
        STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE       = 0x800,
        STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE      = 0x1000,
-       STACKSHOT_SAVE_IMP_DONATION_PIDS                = 0x2000
+       STACKSHOT_SAVE_IMP_DONATION_PIDS                = 0x2000,
+       STACKSHOT_SAVE_IN_KERNEL_BUFFER                 = 0x4000,
+       STACKSHOT_RETRIEVE_EXISTING_BUFFER              = 0x8000,
+       STACKSHOT_KCDATA_FORMAT                         = 0x10000,
+       STACKSHOT_ENABLE_FAULTING                       = 0x20000
 };
 
+/*
+ * NOTE: Please update libkdd/kcdata/kcdtypes.c if you make any changes
+ * in STACKSHOT_KCTYPE_* types.
+ */
+#define STACKSHOT_KCTYPE_IOSTATS                0x901  /* io_stats_snapshot */
+#define STACKSHOT_KCTYPE_GLOBAL_MEM_STATS       0x902  /* struct mem_and_io_snapshot */
+#define STACKSHOT_KCCONTAINER_TASK              0x903
+#define STACKSHOT_KCCONTAINER_THREAD            0x904
+#define STACKSHOT_KCTYPE_TASK_SNAPSHOT          0x905  /* task_snapshot_v2 */
+#define STACKSHOT_KCTYPE_THREAD_SNAPSHOT        0x906  /* thread_snapshot_v2 */
+#define STASKSHOT_KCTYPE_DONATING_PIDS          0x907  /* int[] */
+#define STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO   0x908  /* same as KCDATA_TYPE_LIBRARY_LOADINFO64 */
+#define STACKSHOT_KCTYPE_THREAD_NAME            0x909  /* char[] */
+#define STACKSHOT_KCTYPE_KERN_STACKFRAME        0x90A  /* struct stack_snapshot_frame32 */
+#define STACKSHOT_KCTYPE_KERN_STACKFRAME64      0x90B  /* struct stack_snapshot_frame64 */
+#define STACKSHOT_KCTYPE_USER_STACKFRAME        0x90C  /* struct stack_snapshot_frame32 */
+#define STACKSHOT_KCTYPE_USER_STACKFRAME64      0x90D  /* struct stack_snapshot_frame64 */
+#define STACKSHOT_KCTYPE_BOOTARGS               0x90E  /* boot args string */
+#define STACKSHOT_KCTYPE_OSVERSION              0x90F  /* os version string */
+#define STACKSHOT_KCTYPE_KERN_PAGE_SIZE         0x910  /* kernel page size in uint32_t */
+#define STACKSHOT_KCTYPE_JETSAM_LEVEL           0x911  /* jetsam level in uint32_t */
+
+
 #define STACKSHOT_THREAD_SNAPSHOT_MAGIC        0xfeedface
 #define STACKSHOT_TASK_SNAPSHOT_MAGIC          0xdecafbad
 #define STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC    0xbfcabcde
@@ -284,6 +384,8 @@ extern char kernel_uuid_string[];
 
 #ifdef MACH_KERNEL_PRIVATE
 
+extern boolean_t       doprnt_hide_pointers;
+
 extern unsigned int    halt_in_debugger;
 
 extern unsigned int     switch_debugger;
@@ -362,8 +464,9 @@ void        panic_display_ecc_errors(void);
                                                * post-panic crashdump/paniclog
                                                * dump.
                                                */
-#define DB_NMI_BTN_ENA  0x8000  /* Enable button to directly trigger NMI */
-#define DB_PRT_KDEBUG   0x10000 /* kprintf KDEBUG traces */
+#define DB_NMI_BTN_ENA         0x8000  /* Enable button to directly trigger NMI */
+#define DB_PRT_KDEBUG          0x10000 /* kprintf KDEBUG traces */
+#define DB_DISABLE_LOCAL_CORE   0x20000 /* ignore local core dump support */
 
 #if DEBUG
 /*
index e4524704456901de0fee6822c1802146439e5ce0..f37bf64bce708e44a865a7c356ad33b79c7922d8 100644 (file)
@@ -43,7 +43,6 @@ struct ecc_event {
 
 #ifdef KERNEL_PRIVATE
 extern kern_return_t   ecc_log_record_event(const struct ecc_event *ev);
-extern boolean_t       ecc_log_prefer_panic(void);
 #endif 
 
 #ifdef XNU_KERNEL_PRIVATE
index f44a09a73572fc66c32616c6977ff4550aa4faee..20e42be88bdca4625ba32d6b93f9e880d796fd21 100644 (file)
@@ -27,6 +27,7 @@
  */
 
 #include <kern/energy_perf.h>
+#include <libsa/types.h>
 #include <sys/kdebug.h>
 #include <stddef.h>
 #include <machine/machine_routines.h>
@@ -59,3 +60,27 @@ void io_rate_update_register(io_rate_update_callback_t io_rate_update_cb_new) {
 uint64_t io_rate_update(uint64_t io_rate_flags, uint64_t read_ops_delta, uint64_t write_ops_delta, uint64_t read_bytes_delta, uint64_t write_bytes_delta) {
        return io_rate_update_cb(io_rate_flags, read_ops_delta, write_ops_delta, read_bytes_delta, write_bytes_delta);
 }
+
+static uint64_t gpu_set_fceiling_cb_default(__unused uint32_t gfr, __unused uint64_t gfp) {
+       return 0ULL;
+}
+
+gpu_set_fceiling_t gpu_set_fceiling_cb = gpu_set_fceiling_cb_default;
+
+void gpu_fceiling_cb_register(gpu_set_fceiling_t gnewcb) {
+       if (gnewcb != NULL) {
+               gpu_set_fceiling_cb = gnewcb;
+       } else {
+               gpu_set_fceiling_cb = gpu_set_fceiling_cb_default;
+       }
+}
+
+void gpu_submission_telemetry(
+       __unused uint64_t gpu_ncmds,
+       __unused uint64_t gpu_noutstanding_avg,
+       __unused uint64_t gpu_busy_ns_total,
+       __unused uint64_t gpu_cycles,
+       __unused uint64_t gpu_telemetry_valid_flags,
+       __unused uint64_t gpu_telemetry_misc) {
+
+}
index b7e2e1b94df9c4f6036b65967bb40cb2361d3bbd..c6b1d40a131f5c4ec7004141f1239f9cfdea17dd 100644 (file)
@@ -94,6 +94,27 @@ typedef uint64_t (*io_rate_update_callback_t) (uint64_t, uint64_t, uint64_t, uin
 
 void io_rate_update_register(io_rate_update_callback_t);
 
+/* Interfaces for integrated GPUs to supply command submission telemetry.
+ */
+
+#define GPU_NCMDS_VALID (0x1)
+#define GPU_NOUTSTANDING_VALID (0x2)
+#define GPU_BUSY_VALID (0x4)
+#define GPU_CYCLE_COUNT_VALID (0x8)
+#define GPU_MISC_VALID (0x10)
+
+void gpu_submission_telemetry(
+       uint64_t gpu_ncmds_total,
+       uint64_t gpu_noutstanding,
+       uint64_t gpu_busy_ns_total,
+       uint64_t gpu_cycles,
+       uint64_t gpu_telemetry_valid_flags,
+       uint64_t gpu_telemetry_misc);
+
+typedef uint64_t (*gpu_set_fceiling_t) (uint32_t gpu_fceiling_ratio, uint64_t gpu_fceiling_param);
+
+void gpu_fceiling_cb_register(gpu_set_fceiling_t);
+
 __END_DECLS
 #endif /* KERNEL */
 
index 7d9cb9342386ab33bed144d116497c6ae57848e4..a47544027c0cdbdf142f464ebe798da49e2dc1be 100644 (file)
@@ -86,6 +86,8 @@
 #include <string.h>
 #include <pexpert/pexpert.h>
 
+extern int panic_on_exception_triage;
+
 unsigned long c_thr_exc_raise = 0;
 unsigned long c_thr_exc_raise_state = 0;
 unsigned long c_thr_exc_raise_state_id = 0;
@@ -103,7 +105,7 @@ kern_return_t exception_deliver(
        lck_mtx_t                       *mutex);
 
 static kern_return_t
-check_exc_receiver_dependancy(
+check_exc_receiver_dependency(
        exception_type_t exception, 
        struct exception_action *excp, 
        lck_mtx_t *mutex);
@@ -147,7 +149,7 @@ exception_deliver(
         *  Save work if we are terminating.
         *  Just go back to our AST handler.
         */
-       if (!thread->active)
+       if (!thread->active && !thread->inspection)
                return KERN_SUCCESS;
 
        /*
@@ -226,7 +228,7 @@ exception_deliver(
                                                state, state_cnt,
                                                state, &state_cnt);
                        }
-                       if (kr == MACH_MSG_SUCCESS)
+                       if (kr == MACH_MSG_SUCCESS && exception != EXC_CORPSE_NOTIFY)
                                kr = thread_setstatus(thread, flavor, 
                                                (thread_state_t)state,
                                                state_cnt);
@@ -287,7 +289,7 @@ exception_deliver(
                                                state, state_cnt,
                                                state, &state_cnt);
                        }
-                       if (kr == MACH_MSG_SUCCESS)
+                       if (kr == MACH_MSG_SUCCESS && exception != EXC_CORPSE_NOTIFY)
                                kr = thread_setstatus(thread, flavor,
                                                (thread_state_t)state,
                                                state_cnt);
@@ -303,7 +305,7 @@ exception_deliver(
 }
 
 /*
- * Routine: check_exc_receiver_dependancy
+ * Routine: check_exc_receiver_dependency
  * Purpose:
  *      Verify that the port destined for receiving this exception is not
  *      on the current task. This would cause hang in kernel for
@@ -317,7 +319,7 @@ exception_deliver(
  *      KERN_SUCCESS if its ok to send exception message.
  */
 kern_return_t
-check_exc_receiver_dependancy(
+check_exc_receiver_dependency(
        exception_type_t exception,
        struct exception_action *excp,
        lck_mtx_t *mutex)
@@ -339,7 +341,7 @@ check_exc_receiver_dependancy(
 }
 
 /*
- *     Routine:        exception
+ *     Routine:        exception_triage
  *     Purpose:
  *             The current thread caught an exception.
  *             We make an up-call to the thread's exception server.
@@ -349,9 +351,9 @@ check_exc_receiver_dependancy(
  *             thread_exception_return and thread_kdb_return
  *             are possible.
  *     Returns:
- *             Doesn't return.
+ *             KERN_SUCCESS if exception is handled by any of the handlers.
  */
-void
+kern_return_t
 exception_triage(
        exception_type_t        exception,
        mach_exception_data_t   code,
@@ -361,17 +363,29 @@ exception_triage(
        task_t                  task;
        host_priv_t             host_priv;
        lck_mtx_t               *mutex;
-       kern_return_t   kr;
+       kern_return_t   kr = KERN_FAILURE;
 
        assert(exception != EXC_RPC_ALERT);
 
+       /*
+        * If this behavior has been requested by the the kernel
+        * (due to the boot environment), we should panic if we
+        * enter this function.  This is intended as a debugging
+        * aid; it should allow us to debug why we caught an
+        * exception in environments where debugging is especially
+        * difficult.
+        */
+       if (panic_on_exception_triage) {
+               panic("called exception_triage when it was forbidden by the boot environment");
+       }
+
        thread = current_thread();
 
        /*
         * Try to raise the exception at the activation level.
         */
        mutex = &thread->mutex;
-       if (KERN_SUCCESS == check_exc_receiver_dependancy(exception, thread->exc_actions, mutex))
+       if (KERN_SUCCESS == check_exc_receiver_dependency(exception, thread->exc_actions, mutex))
        {
                kr = exception_deliver(thread, exception, code, codeCnt, thread->exc_actions, mutex);
                if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED)
@@ -383,7 +397,7 @@ exception_triage(
         */
        task = current_task();
        mutex = &task->lock;
-       if (KERN_SUCCESS == check_exc_receiver_dependancy(exception, task->exc_actions, mutex))
+       if (KERN_SUCCESS == check_exc_receiver_dependency(exception, task->exc_actions, mutex))
        {
                kr = exception_deliver(thread, exception, code, codeCnt, task->exc_actions, mutex);
                if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED)
@@ -396,24 +410,18 @@ exception_triage(
        host_priv = host_priv_self();
        mutex = &host_priv->lock;
        
-       if (KERN_SUCCESS == check_exc_receiver_dependancy(exception, host_priv->exc_actions, mutex))
+       if (KERN_SUCCESS == check_exc_receiver_dependency(exception, host_priv->exc_actions, mutex))
        {
                kr = exception_deliver(thread, exception, code, codeCnt, host_priv->exc_actions, mutex);
                if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED)
                        goto out;
        }
 
-       /*
-        * Nobody handled it, terminate the task.
-        */
-
-       (void) task_terminate(task);
-
 out:
        if ((exception != EXC_CRASH) && (exception != EXC_RESOURCE) &&
-           (exception != EXC_GUARD))
+           (exception != EXC_GUARD) && (exception != EXC_CORPSE_NOTIFY))
                thread_exception_return();
-       return;
+       return kr;
 }
 
 kern_return_t
@@ -450,14 +458,15 @@ kern_return_t task_exception_notify(exception_type_t exception,
 {
        mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
        wait_interrupt_t                wsave;
+       kern_return_t kr = KERN_SUCCESS;
 
        code[0] = exccode;
        code[1] = excsubcode;
 
        wsave = thread_interrupt_level(THREAD_UNINT);
-       exception_triage(exception, code, EXCEPTION_CODE_MAX);
+       kr = exception_triage(exception, code, EXCEPTION_CODE_MAX);
        (void) thread_interrupt_level(wsave);
-       return (KERN_SUCCESS);
+       return kr;
 }
 
 
index a35895d8b94d11c7c73a9e1472977aa1c8ece6a4..94786e7be806efa44ff534d2e5b7487487c3ae0c 100644 (file)
@@ -49,7 +49,7 @@ struct exception_action {
 };
 
 /* Make an up-call to a thread's exception server */
-extern void exception_triage(
+extern kern_return_t exception_triage(
        exception_type_t        exception,
        mach_exception_data_t   code,
        mach_msg_type_number_t  codeCnt);
index 10b315d7855a5432bd8b3d82b3fddbfcd60632d8..8db705a72a688a202563d37e180736d211a359bc 100644 (file)
@@ -157,7 +157,7 @@ void gzalloc_zone_init(zone_t z) {
                        } else {
                                kern_return_t kr;
 
-                               if ((kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&z->gz.gzfc, gzfcsz, 0, KMA_KOBJECT)) != KERN_SUCCESS) {
+                               if ((kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&z->gz.gzfc, gzfcsz, 0, KMA_KOBJECT, VM_KERN_MEMORY_OSFMK)) != KERN_SUCCESS) {
                                        panic("zinit/gzalloc: kernel_memory_allocate failed (%d) for 0x%lx bytes", kr, (unsigned long) gzfcsz);
                                }
                        }
@@ -239,7 +239,7 @@ void gzalloc_init(vm_size_t max_zonemap_size) {
 
        if (gzalloc_mode) {
                retval = kmem_suballoc(kernel_map, &gzalloc_map_min, (max_zonemap_size << 2),
-                   FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
+                   FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(VM_KERN_MEMORY_ZONE),
                    &gzalloc_map);
        
                if (retval != KERN_SUCCESS)
@@ -287,7 +287,8 @@ gzalloc_alloc(zone_t zone, boolean_t canblock) {
                else {
                        kern_return_t kr = kernel_memory_allocate(gzalloc_map,
                            &gzaddr, rounded_size + (1*PAGE_SIZE),
-                           0, KMA_KOBJECT | gzalloc_guard);
+                           0, KMA_KOBJECT | gzalloc_guard,
+                           VM_KERN_MEMORY_OSFMK);
                        if (kr != KERN_SUCCESS)
                                panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size, kr);
 
index c534a5c57d657f207f62341c407fb83e50219da3..c15eb172e1714f263d3aac0b2577110b6df98b38 100644 (file)
@@ -95,14 +95,11 @@ extern int sync_internal(void);
 
 kern_return_t 
 hibernate_setup(IOHibernateImageHeader * header,
-                        uint32_t  free_page_ratio,
-                        uint32_t  free_page_time,
                         boolean_t vmflush,
-                       hibernate_page_list_t * page_list,
+                       hibernate_page_list_t * page_list __unused,
                        hibernate_page_list_t * page_list_wired __unused,
                        hibernate_page_list_t * page_list_pal __unused)
 {
-    uint32_t               gobble_count;
     kern_return_t      retval = KERN_SUCCESS;
 
     hibernate_create_paddr_map();
@@ -120,17 +117,10 @@ hibernate_setup(IOHibernateImageHeader * header,
            hibernate_flush_memory();
     }
 
-
-    // pages we could force out to reduce hibernate image size
-    gobble_count = (uint32_t)((((uint64_t) page_list->page_count) * ((uint64_t) free_page_ratio)) / 100);
-
     // no failures hereafter
 
     hibernate_processor_setup(header);
 
-    if (gobble_count)
-           hibernate_gobble_pages(gobble_count, free_page_time);
-
     HIBLOG("hibernate_alloc_pages act %d, inact %d, anon %d, throt %d, spec %d, wire %d, wireinit %d\n",
            vm_page_active_count, vm_page_inactive_count, 
            vm_page_anonymous_count,  vm_page_throttled_count, vm_page_speculative_count,
index 0fa13c974f83e79d535eed1d2d9d70a26c03d733..81ba0aa9794d76a0d2d8200301d189d559e037c3 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
-/* 
+/*
  * Mach Operating System
  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  * All Rights Reserved.
- * 
+ *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- * 
+ *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
+ *
  * Carnegie Mellon requests users of this software to return to
- * 
+ *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
- * 
+ *
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  */
 #include <vm/vm_purgeable_internal.h>
 #include <vm/vm_pageout.h>
 
-host_data_t    realhost;
+#if CONFIG_ATM
+#include <atm/atm_internal.h>
+#endif
+
+#if CONFIG_MACF
+#include <security/mac_mach_internal.h>
+#endif
+
+host_data_t realhost;
 
 vm_extmod_statistics_data_t host_extmod_statistics;
 
 kern_return_t
-host_processors(
-       host_priv_t                             host_priv,
-       processor_array_t               *out_array,
-       mach_msg_type_number_t  *countp)
+host_processors(host_priv_t host_priv, processor_array_t * out_array, mach_msg_type_number_t * countp)
 {
-       register processor_t    processor, *tp;
-       void                                    *addr;
-       unsigned int                    count, i;
+       register processor_t processor, *tp;
+       void addr;
+       unsigned int count, i;
 
        if (host_priv == HOST_PRIV_NULL)
                return (KERN_INVALID_ARGUMENT);
@@ -113,11 +118,11 @@ host_processors(
        count = processor_count;
        assert(count != 0);
 
-       addr = kalloc((vm_size_t) (count * sizeof(mach_port_t)));
+       addr = kalloc((vm_size_t)(count * sizeof(mach_port_t)));
        if (addr == 0)
                return (KERN_RESOURCE_SHORTAGE);
 
-       tp = (processor_t *) addr;
+       tp = (processor_t *)addr;
        *tp++ = processor = processor_list;
 
        if (count > 1) {
@@ -133,32 +138,23 @@ host_processors(
        *out_array = (processor_array_t)addr;
 
        /* do the conversion that Mig should handle */
-
-       tp = (processor_t *) addr;
+       tp = (processor_t *)addr;
        for (i = 0; i < count; i++)
-               ((mach_port_t *) tp)[i] =
-                     (mach_port_t)convert_processor_to_port(tp[i]);
+               ((mach_port_t *)tp)[i] = (mach_port_t)convert_processor_to_port(tp[i]);
 
        return (KERN_SUCCESS);
 }
 
 kern_return_t
-host_info(
-       host_t                                  host,
-       host_flavor_t                   flavor,
-       host_info_t                             info,
-       mach_msg_type_number_t  *count)
+host_info(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_type_number_t * count)
 {
-
        if (host == HOST_NULL)
                return (KERN_INVALID_ARGUMENT);
-       
-       switch (flavor) {
 
-       case HOST_BASIC_INFO:
-       {
-               register host_basic_info_t      basic_info;
-               register int                            master_id;
+       switch (flavor) {
+       case HOST_BASIC_INFO: {
+               register host_basic_info_t basic_info;
+               register int master_id;
 
                /*
                 *      Basic information about this host.
@@ -166,7 +162,7 @@ host_info(
                if (*count < HOST_BASIC_INFO_OLD_COUNT)
                        return (KERN_FAILURE);
 
-               basic_info = (host_basic_info_t) info;
+               basic_info = (host_basic_info_t)info;
 
                basic_info->memory_size = machine_info.memory_size;
                basic_info->max_cpus = machine_info.max_cpus;
@@ -191,9 +187,8 @@ host_info(
                return (KERN_SUCCESS);
        }
 
-       case HOST_SCHED_INFO:
-       {
-               register host_sched_info_t      sched_info;
+       case HOST_SCHED_INFO: {
+               register host_sched_info_t sched_info;
                uint32_t quantum_time;
                uint64_t quantum_ns;
 
@@ -203,21 +198,19 @@ host_info(
                if (*count < HOST_SCHED_INFO_COUNT)
                        return (KERN_FAILURE);
 
-               sched_info = (host_sched_info_t) info;
+               sched_info = (host_sched_info_t)info;
 
                quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
                absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
 
-               sched_info->min_timeout = 
-                       sched_info->min_quantum = (uint32_t)(quantum_ns / 1000 / 1000);
+               sched_info->min_timeout = sched_info->min_quantum = (uint32_t)(quantum_ns / 1000 / 1000);
 
                *count = HOST_SCHED_INFO_COUNT;
 
                return (KERN_SUCCESS);
        }
 
-       case HOST_RESOURCE_SIZES:
-       { 
+       case HOST_RESOURCE_SIZES: {
                /*
                 * Return sizes of kernel data structures
                 */
@@ -227,24 +220,23 @@ host_info(
                /* XXX Fail until ledgers are implemented */
                return (KERN_INVALID_ARGUMENT);
        }
-                  
-       case HOST_PRIORITY_INFO:
-       {
-               register host_priority_info_t   priority_info;
+
+       case HOST_PRIORITY_INFO: {
+               register host_priority_info_t priority_info;
 
                if (*count < HOST_PRIORITY_INFO_COUNT)
                        return (KERN_FAILURE);
 
-               priority_info = (host_priority_info_t) info;
+               priority_info = (host_priority_info_t)info;
 
-               priority_info->kernel_priority  = MINPRI_KERNEL;
-               priority_info->system_priority  = MINPRI_KERNEL;
-               priority_info->server_priority  = MINPRI_RESERVED;
-               priority_info->user_priority    = BASEPRI_DEFAULT;
-               priority_info->depress_priority = DEPRESSPRI;
-               priority_info->idle_priority    = IDLEPRI;
-               priority_info->minimum_priority = MINPRI_USER;
-               priority_info->maximum_priority = MAXPRI_RESERVED;
+               priority_info->kernel_priority = MINPRI_KERNEL;
+               priority_info->system_priority = MINPRI_KERNEL;
+               priority_info->server_priority = MINPRI_RESERVED;
+               priority_info->user_priority = BASEPRI_DEFAULT;
+               priority_info->depress_priority = DEPRESSPRI;
+               priority_info->idle_priority = IDLEPRI;
+               priority_info->minimum_priority = MINPRI_USER;
+               priority_info->maximum_priority = MAXPRI_RESERVED;
 
                *count = HOST_PRIORITY_INFO_COUNT;
 
@@ -255,68 +247,83 @@ host_info(
         * Gestalt for various trap facilities.
         */
        case HOST_MACH_MSG_TRAP:
-       case HOST_SEMAPHORE_TRAPS:
-       {
+       case HOST_SEMAPHORE_TRAPS: {
                *count = 0;
                return (KERN_SUCCESS);
        }
 
-       case HOST_VM_PURGABLE:
-       {
+       case HOST_VM_PURGABLE: {
                if (*count < HOST_VM_PURGABLE_COUNT)
                        return (KERN_FAILURE);
 
-               vm_purgeable_stats((vm_purgeable_info_t) info, NULL);
+               vm_purgeable_stats((vm_purgeable_info_t)info, NULL);
 
                *count = HOST_VM_PURGABLE_COUNT;
                return (KERN_SUCCESS);
        }
 
-       default:
-               return (KERN_INVALID_ARGUMENT);
+       case HOST_DEBUG_INFO_INTERNAL: {
+#if DEVELOPMENT || DEBUG
+               if (*count < HOST_DEBUG_INFO_INTERNAL_COUNT)
+                       return (KERN_FAILURE);
+
+               host_debug_info_internal_t debug_info = (host_debug_info_internal_t)info;
+               bzero(debug_info, sizeof(host_debug_info_internal_data_t));
+               *count = HOST_DEBUG_INFO_INTERNAL_COUNT;
+
+#if CONFIG_COALITIONS
+               debug_info->config_coalitions = 1;
+#endif
+#if CONFIG_BANK
+               debug_info->config_bank = 1;
+#endif
+#if CONFIG_ATM
+               debug_info->config_atm = 1;
+#endif
+#if CONFIG_CSR
+               debug_info->config_csr = 1;
+#endif
+               return (KERN_SUCCESS);
+#else /* DEVELOPMENT || DEBUG */
+               return (KERN_NOT_SUPPORTED);
+#endif
+       }
+
+       default: return (KERN_INVALID_ARGUMENT);
        }
 }
 
 kern_return_t
-host_statistics(
-       host_t                                  host,
-       host_flavor_t                   flavor,
-       host_info_t                             info,
-       mach_msg_type_number_t  *count)
+host_statistics(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_type_number_t * count)
 {
-       uint32_t        i;
+       uint32_t i;
 
        if (host == HOST_NULL)
                return (KERN_INVALID_HOST);
-       
-       switch(flavor) {
 
-       case HOST_LOAD_INFO:
-       {
-               host_load_info_t        load_info;
+       switch (flavor) {
+       case HOST_LOAD_INFO: {
+               host_load_info_t load_info;
 
                if (*count < HOST_LOAD_INFO_COUNT)
                        return (KERN_FAILURE);
 
-               load_info = (host_load_info_t) info;
+               load_info = (host_load_info_t)info;
 
-               bcopy((char *) avenrun,
-                         (char *) load_info->avenrun, sizeof avenrun);
-               bcopy((char *) mach_factor,
-                         (char *) load_info->mach_factor, sizeof mach_factor);
+               bcopy((char *)avenrun, (char *)load_info->avenrun, sizeof avenrun);
+               bcopy((char *)mach_factor, (char *)load_info->mach_factor, sizeof mach_factor);
 
                *count = HOST_LOAD_INFO_COUNT;
                return (KERN_SUCCESS);
        }
 
-       case HOST_VM_INFO:
-       {
-               register processor_t            processor;
-               register vm_statistics64_t      stat;
-               vm_statistics64_data_t          host_vm_stat;
-               vm_statistics_t                 stat32;
-               mach_msg_type_number_t          original_count;
-                
+       case HOST_VM_INFO: {
+               register processor_t processor;
+               register vm_statistics64_t stat;
+               vm_statistics64_data_t host_vm_stat;
+               vm_statistics_t stat32;
+               mach_msg_type_number_t original_count;
+
                if (*count < HOST_VM_INFO_REV0_COUNT)
                        return (KERN_FAILURE);
 
@@ -330,7 +337,7 @@ host_statistics(
                        while ((processor = processor->processor_list) != NULL) {
                                stat = &PROCESSOR_DATA(processor, vm_stat);
 
-                               host_vm_stat.zero_fill_count += stat->zero_fill_count;
+                               host_vm_stat.zero_fill_count += stat->zero_fill_count;
                                host_vm_stat.reactivations += stat->reactivations;
                                host_vm_stat.pageins += stat->pageins;
                                host_vm_stat.pageouts += stat->pageouts;
@@ -343,14 +350,14 @@ host_statistics(
                        simple_unlock(&processor_list_lock);
                }
 
-               stat32 = (vm_statistics_t) info;
+               stat32 = (vm_statistics_t)info;
 
                stat32->free_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_free_count + vm_page_speculative_count);
                stat32->active_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_active_count);
-               
+
                if (vm_page_local_q) {
                        for (i = 0; i < vm_page_local_q_count; i++) {
-                               struct vpl      *lq;
+                               struct vpl lq;
 
                                lq = &vm_page_local_q[i].vpl_un.vpl;
 
@@ -392,24 +399,20 @@ host_statistics(
 
                return (KERN_SUCCESS);
        }
-                
-       case HOST_CPU_LOAD_INFO:
-       {
-               register processor_t    processor;
-               host_cpu_load_info_t    cpu_load_info;
+
+       case HOST_CPU_LOAD_INFO: {
+               register processor_t processor;
+               host_cpu_load_info_t cpu_load_info;
 
                if (*count < HOST_CPU_LOAD_INFO_COUNT)
                        return (KERN_FAILURE);
 
-#define GET_TICKS_VALUE(state, ticks)                   \
-MACRO_BEGIN                                                             \
-       cpu_load_info->cpu_ticks[(state)] +=                             \
-               (uint32_t)(ticks / hz_tick_interval);                    \
-MACRO_END
-#define GET_TICKS_VALUE_FROM_TIMER(processor, state, timer)                     \
-MACRO_BEGIN                                                             \
-       GET_TICKS_VALUE(state, timer_grab(&PROCESSOR_DATA(processor, timer))); \
-MACRO_END
+#define GET_TICKS_VALUE(state, ticks)                                                      \
+       MACRO_BEGIN cpu_load_info->cpu_ticks[(state)] += (uint32_t)(ticks / hz_tick_interval); \
+       MACRO_END
+#define GET_TICKS_VALUE_FROM_TIMER(processor, state, timer)                            \
+       MACRO_BEGIN GET_TICKS_VALUE(state, timer_grab(&PROCESSOR_DATA(processor, timer))); \
+       MACRO_END
 
                cpu_load_info = (host_cpu_load_info_t)info;
                cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
@@ -420,10 +423,10 @@ MACRO_END
                simple_lock(&processor_list_lock);
 
                for (processor = processor_list; processor != NULL; processor = processor->processor_list) {
-                       timer_t                 idle_state;
-                       uint64_t                idle_time_snapshot1, idle_time_snapshot2;
-                       uint64_t                idle_time_tstamp1, idle_time_tstamp2;
-                       
+                       timer_t idle_state;
+                       uint64_t idle_time_snapshot1, idle_time_snapshot2;
+                       uint64_t idle_time_tstamp1, idle_time_tstamp2;
+
                        /* See discussion in processor_info(PROCESSOR_CPU_LOAD_INFO) */
 
                        GET_TICKS_VALUE_FROM_TIMER(processor, CPU_STATE_USER, user_state);
@@ -437,12 +440,12 @@ MACRO_END
                        idle_state = &PROCESSOR_DATA(processor, idle_state);
                        idle_time_snapshot1 = timer_grab(idle_state);
                        idle_time_tstamp1 = idle_state->tstamp;
-                       
+
                        if (PROCESSOR_DATA(processor, current_state) != idle_state) {
                                /* Processor is non-idle, so idle timer should be accurate */
                                GET_TICKS_VALUE_FROM_TIMER(processor, CPU_STATE_IDLE, idle_state);
                        } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) ||
-                                          (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))){
+                                  (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))) {
                                /* Idle timer is being updated concurrently, second stamp is good enough */
                                GET_TICKS_VALUE(CPU_STATE_IDLE, idle_time_snapshot2);
                        } else {
@@ -451,7 +454,7 @@ MACRO_END
                                 * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
                                 */
                                idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1;
-                               
+
                                GET_TICKS_VALUE(CPU_STATE_IDLE, idle_time_snapshot1);
                        }
                }
@@ -462,8 +465,7 @@ MACRO_END
                return (KERN_SUCCESS);
        }
 
-       case HOST_EXPIRED_TASK_INFO:
-       {
+       case HOST_EXPIRED_TASK_INFO: {
                if (*count < TASK_POWER_INFO_COUNT) {
                        return (KERN_FAILURE);
                }
@@ -481,222 +483,200 @@ MACRO_END
                tinfo->total_system = dead_task_statistics.total_system_time;
 
                return (KERN_SUCCESS);
-
        }
-       default:
-               return (KERN_INVALID_ARGUMENT);
+       default: return (KERN_INVALID_ARGUMENT);
        }
 }
 
-extern uint32_t        c_segment_pages_compressed;
+extern uint32_t c_segment_pages_compressed;
 
 kern_return_t
-host_statistics64(
-       host_t                          host,
-       host_flavor_t                   flavor,
-       host_info64_t                   info,
-       mach_msg_type_number_t          *count)
+host_statistics64(host_t host, host_flavor_t flavor, host_info64_t info, mach_msg_type_number_t * count)
 {
-       uint32_t        i;
-       
+       uint32_t i;
+
        if (host == HOST_NULL)
                return (KERN_INVALID_HOST);
-       
-       switch(flavor) {
-
-               case HOST_VM_INFO64: /* We were asked to get vm_statistics64 */
-               {
-                       register processor_t            processor;
-                       register vm_statistics64_t      stat;
-                       vm_statistics64_data_t          host_vm_stat;
-                       mach_msg_type_number_t          original_count;
-                       unsigned int                    local_q_internal_count;
-                       unsigned int                    local_q_external_count;
-
-                       if (*count < HOST_VM_INFO64_REV0_COUNT)
-                               return (KERN_FAILURE);
-
-                       processor = processor_list;
-                       stat = &PROCESSOR_DATA(processor, vm_stat);
-                       host_vm_stat = *stat;
-
-                       if (processor_count > 1) {
-                               simple_lock(&processor_list_lock);
-
-                               while ((processor = processor->processor_list) != NULL) {
-                                       stat = &PROCESSOR_DATA(processor, vm_stat);
-
-                                       host_vm_stat.zero_fill_count += stat->zero_fill_count;
-                                       host_vm_stat.reactivations += stat->reactivations;
-                                       host_vm_stat.pageins += stat->pageins;
-                                       host_vm_stat.pageouts += stat->pageouts;
-                                       host_vm_stat.faults += stat->faults;
-                                       host_vm_stat.cow_faults += stat->cow_faults;
-                                       host_vm_stat.lookups += stat->lookups;
-                                       host_vm_stat.hits += stat->hits;
-                                       host_vm_stat.compressions += stat->compressions;
-                                       host_vm_stat.decompressions += stat->decompressions;
-                                       host_vm_stat.swapins += stat->swapins;
-                                       host_vm_stat.swapouts += stat->swapouts;
-                               }
-
-                               simple_unlock(&processor_list_lock);
-                       }
 
-                       stat = (vm_statistics64_t) info;
-
-                       stat->free_count = vm_page_free_count + vm_page_speculative_count;
-                       stat->active_count = vm_page_active_count;
-
-                       local_q_internal_count = 0;
-                       local_q_external_count = 0;
-                       if (vm_page_local_q) {
-                               for (i = 0; i < vm_page_local_q_count; i++) {
-                                       struct vpl      *lq;
-                               
-                                       lq = &vm_page_local_q[i].vpl_un.vpl;
-
-                                       stat->active_count += lq->vpl_count;
-                                       local_q_internal_count +=
-                                               lq->vpl_internal_count;
-                                       local_q_external_count +=
-                                               lq->vpl_external_count;
-                               }
-                       }
-                       stat->inactive_count = vm_page_inactive_count;
-                       stat->wire_count = vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count;
-                       stat->zero_fill_count = host_vm_stat.zero_fill_count;
-                       stat->reactivations = host_vm_stat.reactivations;
-                       stat->pageins = host_vm_stat.pageins;
-                       stat->pageouts = host_vm_stat.pageouts;
-                       stat->faults = host_vm_stat.faults;
-                       stat->cow_faults = host_vm_stat.cow_faults;
-                       stat->lookups = host_vm_stat.lookups;
-                       stat->hits = host_vm_stat.hits;
-               
-                       stat->purgeable_count = vm_page_purgeable_count;
-                       stat->purges = vm_page_purged_count;
-               
-                       stat->speculative_count = vm_page_speculative_count;
-
-                       /*
-                        * Fill in extra info added in later revisions of the
-                        * vm_statistics data structure.  Fill in only what can fit
-                        * in the data structure the caller gave us !
-                        */
-                       original_count = *count;
-                       *count = HOST_VM_INFO64_REV0_COUNT; /* rev0 already filled in */
-                       if (original_count >= HOST_VM_INFO64_REV1_COUNT) {
-                               /* rev1 added "throttled count" */
-                               stat->throttled_count = vm_page_throttled_count;
-                               /* rev1 added "compression" info */
-                               stat->compressor_page_count = VM_PAGE_COMPRESSOR_COUNT;
-                               stat->compressions = host_vm_stat.compressions;
-                               stat->decompressions = host_vm_stat.decompressions;
-                               stat->swapins = host_vm_stat.swapins;
-                               stat->swapouts = host_vm_stat.swapouts;
-                               /* rev1 added:
-                                * "external page count"
-                                * "anonymous page count"
-                                * "total # of pages (uncompressed) held in the compressor"
-                                */
-                               stat->external_page_count =
-                                       (vm_page_pageable_external_count +
-                                        local_q_external_count);
-                               stat->internal_page_count =
-                                       (vm_page_pageable_internal_count +
-                                        local_q_internal_count);
-                               stat->total_uncompressed_pages_in_compressor = c_segment_pages_compressed;
-                               *count = HOST_VM_INFO64_REV1_COUNT;
+       switch (flavor) {
+       case HOST_VM_INFO64: /* We were asked to get vm_statistics64 */
+       {
+               register processor_t processor;
+               register vm_statistics64_t stat;
+               vm_statistics64_data_t host_vm_stat;
+               mach_msg_type_number_t original_count;
+               unsigned int local_q_internal_count;
+               unsigned int local_q_external_count;
+
+               if (*count < HOST_VM_INFO64_REV0_COUNT)
+                       return (KERN_FAILURE);
+
+               processor = processor_list;
+               stat = &PROCESSOR_DATA(processor, vm_stat);
+               host_vm_stat = *stat;
+
+               if (processor_count > 1) {
+                       simple_lock(&processor_list_lock);
+
+                       while ((processor = processor->processor_list) != NULL) {
+                               stat = &PROCESSOR_DATA(processor, vm_stat);
+
+                               host_vm_stat.zero_fill_count += stat->zero_fill_count;
+                               host_vm_stat.reactivations += stat->reactivations;
+                               host_vm_stat.pageins += stat->pageins;
+                               host_vm_stat.pageouts += stat->pageouts;
+                               host_vm_stat.faults += stat->faults;
+                               host_vm_stat.cow_faults += stat->cow_faults;
+                               host_vm_stat.lookups += stat->lookups;
+                               host_vm_stat.hits += stat->hits;
+                               host_vm_stat.compressions += stat->compressions;
+                               host_vm_stat.decompressions += stat->decompressions;
+                               host_vm_stat.swapins += stat->swapins;
+                               host_vm_stat.swapouts += stat->swapouts;
                        }
 
-                       return(KERN_SUCCESS);
+                       simple_unlock(&processor_list_lock);
                }
 
-               case HOST_EXTMOD_INFO64: /* We were asked to get vm_statistics64 */
-               {
-                       vm_extmod_statistics_t          out_extmod_statistics;
+               stat = (vm_statistics64_t)info;
+
+               stat->free_count = vm_page_free_count + vm_page_speculative_count;
+               stat->active_count = vm_page_active_count;
 
-                       if (*count < HOST_EXTMOD_INFO64_COUNT)
-                               return (KERN_FAILURE);
+               local_q_internal_count = 0;
+               local_q_external_count = 0;
+               if (vm_page_local_q) {
+                       for (i = 0; i < vm_page_local_q_count; i++) {
+                               struct vpl * lq;
 
-                       out_extmod_statistics = (vm_extmod_statistics_t) info;
-                       *out_extmod_statistics = host_extmod_statistics;
+                               lq = &vm_page_local_q[i].vpl_un.vpl;
 
-                       *count = HOST_EXTMOD_INFO64_COUNT;      
+                               stat->active_count += lq->vpl_count;
+                               local_q_internal_count += lq->vpl_internal_count;
+                               local_q_external_count += lq->vpl_external_count;
+                       }
+               }
+               stat->inactive_count = vm_page_inactive_count;
+               stat->wire_count = vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count;
+               stat->zero_fill_count = host_vm_stat.zero_fill_count;
+               stat->reactivations = host_vm_stat.reactivations;
+               stat->pageins = host_vm_stat.pageins;
+               stat->pageouts = host_vm_stat.pageouts;
+               stat->faults = host_vm_stat.faults;
+               stat->cow_faults = host_vm_stat.cow_faults;
+               stat->lookups = host_vm_stat.lookups;
+               stat->hits = host_vm_stat.hits;
+
+               stat->purgeable_count = vm_page_purgeable_count;
+               stat->purges = vm_page_purged_count;
+
+               stat->speculative_count = vm_page_speculative_count;
 
-                       return(KERN_SUCCESS);
+               /*
+                * Fill in extra info added in later revisions of the
+                * vm_statistics data structure.  Fill in only what can fit
+                * in the data structure the caller gave us !
+                */
+               original_count = *count;
+               *count = HOST_VM_INFO64_REV0_COUNT; /* rev0 already filled in */
+               if (original_count >= HOST_VM_INFO64_REV1_COUNT) {
+                       /* rev1 added "throttled count" */
+                       stat->throttled_count = vm_page_throttled_count;
+                       /* rev1 added "compression" info */
+                       stat->compressor_page_count = VM_PAGE_COMPRESSOR_COUNT;
+                       stat->compressions = host_vm_stat.compressions;
+                       stat->decompressions = host_vm_stat.decompressions;
+                       stat->swapins = host_vm_stat.swapins;
+                       stat->swapouts = host_vm_stat.swapouts;
+                       /* rev1 added:
+                        * "external page count"
+                        * "anonymous page count"
+                        * "total # of pages (uncompressed) held in the compressor"
+                        */
+                       stat->external_page_count = (vm_page_pageable_external_count + local_q_external_count);
+                       stat->internal_page_count = (vm_page_pageable_internal_count + local_q_internal_count);
+                       stat->total_uncompressed_pages_in_compressor = c_segment_pages_compressed;
+                       *count = HOST_VM_INFO64_REV1_COUNT;
                }
 
-               default: /* If we didn't recognize the flavor, send to host_statistics */
-                       return(host_statistics(host, flavor, (host_info_t) info, count)); 
+               return (KERN_SUCCESS);
        }
-}
 
+       case HOST_EXTMOD_INFO64: /* We were asked to get vm_statistics64 */
+       {
+               vm_extmod_statistics_t out_extmod_statistics;
+
+               if (*count < HOST_EXTMOD_INFO64_COUNT)
+                       return (KERN_FAILURE);
+
+               out_extmod_statistics = (vm_extmod_statistics_t)info;
+               *out_extmod_statistics = host_extmod_statistics;
+
+               *count = HOST_EXTMOD_INFO64_COUNT;
+
+               return (KERN_SUCCESS);
+       }
+
+       default: /* If we didn't recognize the flavor, send to host_statistics */
+               return (host_statistics(host, flavor, (host_info_t)info, count));
+       }
+}
 
 /*
  * Get host statistics that require privilege.
  * None for now, just call the un-privileged version.
  */
 kern_return_t
-host_priv_statistics(
-       host_priv_t             host_priv,
-       host_flavor_t           flavor,
-       host_info_t             info,
-       mach_msg_type_number_t  *count)
+host_priv_statistics(host_priv_t host_priv, host_flavor_t flavor, host_info_t info, mach_msg_type_number_t * count)
 {
-       return(host_statistics((host_t)host_priv, flavor, info, count));
+       return (host_statistics((host_t)host_priv, flavor, info, count));
 }
 
 kern_return_t
-set_sched_stats_active(
-               boolean_t active) 
+set_sched_stats_active(boolean_t active)
 {
        sched_stats_active = active;
-       return KERN_SUCCESS;
+       return (KERN_SUCCESS);
 }
 
-
 kern_return_t
-get_sched_statistics( 
-               struct _processor_statistics_np *out, 
-               uint32_t *count)
+get_sched_statistics(struct _processor_statistics_np * out, uint32_t * count)
 {
        processor_t processor;
 
        if (!sched_stats_active) {
-               return KERN_FAILURE;
+               return (KERN_FAILURE);
        }
 
        simple_lock(&processor_list_lock);
-       
-       if (*count < (processor_count + 2) * sizeof(struct _processor_statistics_np)) { /* One for RT, one for FS */
+
+       if (*count < (processor_count + 1) * sizeof(struct _processor_statistics_np)) { /* One for RT */
                simple_unlock(&processor_list_lock);
-               return KERN_FAILURE;
+               return (KERN_FAILURE);
        }
 
        processor = processor_list;
        while (processor) {
-               struct processor_sched_statistics *stats = &processor->processor_data.sched_stats;
-
-               out->ps_cpuid                   = processor->cpu_id;
-               out->ps_csw_count               = stats->csw_count;
-               out->ps_preempt_count           = stats->preempt_count;
-               out->ps_preempted_rt_count      = stats->preempted_rt_count;
-               out->ps_preempted_by_rt_count   = stats->preempted_by_rt_count;
-               out->ps_rt_sched_count          = stats->rt_sched_count;
-               out->ps_interrupt_count         = stats->interrupt_count;
-               out->ps_ipi_count               = stats->ipi_count;
-               out->ps_timer_pop_count         = stats->timer_pop_count;
-               out->ps_runq_count_sum          = SCHED(processor_runq_stats_count_sum)(processor);
-               out->ps_idle_transitions        = stats->idle_transitions;
-               out->ps_quantum_timer_expirations       = stats->quantum_timer_expirations;
+               struct processor_sched_statistics * stats = &processor->processor_data.sched_stats;
+
+               out->ps_cpuid = processor->cpu_id;
+               out->ps_csw_count = stats->csw_count;
+               out->ps_preempt_count = stats->preempt_count;
+               out->ps_preempted_rt_count = stats->preempted_rt_count;
+               out->ps_preempted_by_rt_count = stats->preempted_by_rt_count;
+               out->ps_rt_sched_count = stats->rt_sched_count;
+               out->ps_interrupt_count = stats->interrupt_count;
+               out->ps_ipi_count = stats->ipi_count;
+               out->ps_timer_pop_count = stats->timer_pop_count;
+               out->ps_runq_count_sum = SCHED(processor_runq_stats_count_sum)(processor);
+               out->ps_idle_transitions = stats->idle_transitions;
+               out->ps_quantum_timer_expirations = stats->quantum_timer_expirations;
 
                out++;
                processor = processor->processor_list;
        }
 
-       *count = (uint32_t) (processor_count * sizeof(struct _processor_statistics_np));
+       *count = (uint32_t)(processor_count * sizeof(struct _processor_statistics_np));
 
        simple_unlock(&processor_list_lock);
 
@@ -707,47 +687,35 @@ get_sched_statistics(
        out++;
        *count += (uint32_t)sizeof(struct _processor_statistics_np);
 
-       /* And include Fair Share Queue information at the end */
-       bzero(out, sizeof(*out));
-       out->ps_cpuid = (-2);
-       out->ps_runq_count_sum = SCHED(fairshare_runq_stats_count_sum)();
-       *count += (uint32_t)sizeof(struct _processor_statistics_np);
-       
-       return KERN_SUCCESS;
+       return (KERN_SUCCESS);
 }
 
 kern_return_t
-host_page_size(
-       host_t          host,
-       vm_size_t       *out_page_size)
+host_page_size(host_t host, vm_size_t * out_page_size)
 {
        if (host == HOST_NULL)
-               return(KERN_INVALID_ARGUMENT);
+               return (KERN_INVALID_ARGUMENT);
 
-       vm_map_t map = get_task_map(current_task());
-       *out_page_size = vm_map_page_size(map);
+       *out_page_size = PAGE_SIZE;
 
-       return(KERN_SUCCESS);
+       return (KERN_SUCCESS);
 }
 
 /*
  *     Return kernel version string (more than you ever
  *     wanted to know about what version of the kernel this is).
  */
-extern char    version[];
+extern char version[];
 
 kern_return_t
-host_kernel_version(
-       host_t                  host,
-       kernel_version_t        out_version)
+host_kernel_version(host_t host, kernel_version_t out_version)
 {
-
        if (host == HOST_NULL)
-               return(KERN_INVALID_ARGUMENT);
+               return (KERN_INVALID_ARGUMENT);
 
-       (void) strncpy(out_version, version, sizeof(kernel_version_t));
+       (void)strncpy(out_version, version, sizeof(kernel_version_t));
 
-       return(KERN_SUCCESS);
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -756,12 +724,9 @@ host_kernel_version(
  *     List all processor sets on the host.
  */
 kern_return_t
-host_processor_sets(
-       host_priv_t                     host_priv,
-       processor_set_name_array_t      *pset_list,
-       mach_msg_type_number_t          *count)
+host_processor_sets(host_priv_t host_priv, processor_set_name_array_t * pset_list, mach_msg_type_number_t * count)
 {
-       void *addr;
+       void * addr;
 
        if (host_priv == HOST_PRIV_NULL)
                return (KERN_INVALID_ARGUMENT);
@@ -771,12 +736,12 @@ host_processor_sets(
         *      touched while holding a lock.
         */
 
-       addr = kalloc((vm_size_t) sizeof(mach_port_t));
+       addr = kalloc((vm_size_t)sizeof(mach_port_t));
        if (addr == 0)
                return (KERN_RESOURCE_SHORTAGE);
 
        /* do the conversion that Mig should handle */
-       *((ipc_port_t *) addr) = convert_pset_name_to_port(&pset0);
+       *((ipc_port_t *)addr) = convert_pset_name_to_port(&pset0);
 
        *pset_list = (processor_set_array_t)addr;
        *count = 1;
@@ -790,20 +755,17 @@ host_processor_sets(
  *     Return control port for given processor set.
  */
 kern_return_t
-host_processor_set_priv(
-       host_priv_t     host_priv,
-       processor_set_t pset_name,
-       processor_set_t *pset)
+host_processor_set_priv(host_priv_t host_priv, processor_set_t pset_name, processor_set_t * pset)
 {
-    if (host_priv == HOST_PRIV_NULL || pset_name == PROCESSOR_SET_NULL) {
+       if (host_priv == HOST_PRIV_NULL || pset_name == PROCESSOR_SET_NULL) {
                *pset = PROCESSOR_SET_NULL;
 
                return (KERN_INVALID_ARGUMENT);
-    }
+       }
 
-    *pset = pset_name;
+       *pset = pset_name;
 
-    return (KERN_SUCCESS);
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -814,22 +776,21 @@ host_processor_set_priv(
  *     in an OOL array.
  */
 kern_return_t
-host_processor_info(
-       host_t                                  host,
-       processor_flavor_t              flavor,
-       natural_t                               *out_pcount,
-       processor_info_array_t  *out_array,
-       mach_msg_type_number_t  *out_array_count)
+host_processor_info(host_t host,
+                    processor_flavor_t flavor,
+                    natural_t * out_pcount,
+                    processor_info_array_t * out_array,
+                    mach_msg_type_number_t * out_array_count)
 {
-       kern_return_t                   result;
-       processor_t                             processor;
-       host_t                                  thost;
-       processor_info_t                info;
-       unsigned int                    icount, tcount;
-       unsigned int                    pcount, i;
-       vm_offset_t                             addr;
-       vm_size_t                               size, needed;
-       vm_map_copy_t                   copy;
+       kern_return_t result;
+       processor_t processor;
+       host_t thost;
+       processor_info_t info;
+       unsigned int icount, tcount;
+       unsigned int pcount, i;
+       vm_offset_t addr;
+       vm_size_t size, needed;
+       vm_map_copy_t copy;
 
        if (host == HOST_NULL)
                return (KERN_INVALID_ARGUMENT);
@@ -842,13 +803,12 @@ host_processor_info(
        assert(pcount != 0);
 
        needed = pcount * icount * sizeof(natural_t);
-       size = vm_map_round_page(needed,
-                                VM_MAP_PAGE_MASK(ipc_kernel_map));
-       result = kmem_alloc(ipc_kernel_map, &addr, size);
+       size = vm_map_round_page(needed, VM_MAP_PAGE_MASK(ipc_kernel_map));
+       result = kmem_alloc(ipc_kernel_map, &addr, size, VM_KERN_MEMORY_IPC);
        if (result != KERN_SUCCESS)
                return (KERN_RESOURCE_SHORTAGE);
 
-       info = (processor_info_t) addr;
+       info = (processor_info_t)addr;
        processor = processor_list;
        tcount = icount;
 
@@ -874,23 +834,17 @@ host_processor_info(
                }
        }
 
-       if (size != needed) 
-               bzero((char *) addr + needed, size - needed);
+       if (size != needed)
+               bzero((char *)addr + needed, size - needed);
 
-       result = vm_map_unwire(
-               ipc_kernel_map,
-               vm_map_trunc_page(addr,
-                                 VM_MAP_PAGE_MASK(ipc_kernel_map)),
-               vm_map_round_page(addr + size,
-                                 VM_MAP_PAGE_MASK(ipc_kernel_map)),
-               FALSE);
+       result = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)),
+                              vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE);
        assert(result == KERN_SUCCESS);
-       result = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
-                              (vm_map_size_t)size, TRUE, &copy);
+       result = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size, TRUE, &copy);
        assert(result == KERN_SUCCESS);
 
        *out_pcount = pcount;
-       *out_array = (processor_info_array_t) copy;
+       *out_array = (processor_info_array_t)copy;
        *out_array_count = pcount * icount;
 
        return (KERN_SUCCESS);
@@ -900,10 +854,7 @@ host_processor_info(
  *      Kernel interface for setting a special port.
  */
 kern_return_t
-kernel_set_special_port(
-       host_priv_t     host_priv,              
-       int             id,
-       ipc_port_t      port)
+kernel_set_special_port(host_priv_t host_priv, int id, ipc_port_t port)
 {
        ipc_port_t old_port;
 
@@ -913,7 +864,7 @@ kernel_set_special_port(
        host_unlock(host_priv);
        if (IP_VALID(old_port))
                ipc_port_release_send(old_port);
-       return KERN_SUCCESS;
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -926,19 +877,18 @@ kernel_set_special_port(
  *      routine; use kernel_set_special_port() instead.
  */
 kern_return_t
-host_set_special_port(
-        host_priv_t     host_priv,
-        int             id,
-        ipc_port_t      port)
+host_set_special_port(host_priv_t host_priv, int id, ipc_port_t port)
 {
-       if (host_priv == HOST_PRIV_NULL ||
-           id <= HOST_MAX_SPECIAL_KERNEL_PORT || id > HOST_MAX_SPECIAL_PORT ) {
-               return KERN_INVALID_ARGUMENT;
-       }
+       if (host_priv == HOST_PRIV_NULL || id <= HOST_MAX_SPECIAL_KERNEL_PORT || id > HOST_MAX_SPECIAL_PORT)
+               return (KERN_INVALID_ARGUMENT);
 
-       return kernel_set_special_port(host_priv, id, port);
-}
+#if CONFIG_MACF
+       if (mac_task_check_set_host_special_port(current_task(), id, port) != 0)
+               return (KERN_NO_ACCESS);
+#endif
 
+       return (kernel_set_special_port(host_priv, id, port));
+}
 
 /*
  *      User interface for retrieving a special port.
@@ -950,39 +900,31 @@ host_set_special_port(
  */
 
 kern_return_t
-host_get_special_port(
-        host_priv_t     host_priv,
-        __unused int    node,
-        int             id,
-        ipc_port_t      *portp)
+host_get_special_port(host_priv_t host_priv, __unused int node, int id, ipc_port_t * portp)
 {
-       ipc_port_t      port;
+       ipc_port_t port;
 
-       if (host_priv == HOST_PRIV_NULL ||
-           id == HOST_SECURITY_PORT || id > HOST_MAX_SPECIAL_PORT || id < 0)
-               return KERN_INVALID_ARGUMENT;
+       if (host_priv == HOST_PRIV_NULL || id == HOST_SECURITY_PORT || id > HOST_MAX_SPECIAL_PORT || id < 0)
+               return (KERN_INVALID_ARGUMENT);
 
        host_lock(host_priv);
        port = realhost.special[id];
        *portp = ipc_port_copy_send(port);
        host_unlock(host_priv);
 
-       return KERN_SUCCESS;
+       return (KERN_SUCCESS);
 }
 
-
 /*
- *     host_get_io_master
+ *     host_get_io_master
  *
  *     Return the IO master access port for this host.
  */
 kern_return_t
-host_get_io_master(
-        host_t host,
-        io_master_t *io_masterp)
+host_get_io_master(host_t host, io_master_t * io_masterp)
 {
        if (host == HOST_NULL)
-               return KERN_INVALID_ARGUMENT;
+               return (KERN_INVALID_ARGUMENT);
 
        return (host_get_io_master_port(host_priv_self(), io_masterp));
 }
@@ -990,18 +932,33 @@ host_get_io_master(
 host_t
 host_self(void)
 {
-  return &realhost;
+       return (&realhost);
 }
 
 host_priv_t
 host_priv_self(void)
 {
-  return &realhost;
+       return (&realhost);
 }
 
 host_security_t
 host_security_self(void)
 {
-  return &realhost;
+       return (&realhost);
+}
+
+kern_return_t
+host_set_atm_diagnostic_flag(host_priv_t host_priv, uint32_t diagnostic_flag)
+{
+       if (host_priv == HOST_PRIV_NULL)
+               return (KERN_INVALID_ARGUMENT);
+
+       assert(host_priv == &realhost);
+
+#if CONFIG_ATM
+       return (atm_set_diagnostic_config(diagnostic_flag));
+#else
+       (void)diagnostic_flag;
+       return (KERN_NOT_SUPPORTED);
+#endif
 }
-         
index c60df98863f3de3d366d7c55dbc0e0c9150c4ad6..6803be30a1410031555dd609bd2bfd3f2b15dfb9 100644 (file)
@@ -48,7 +48,6 @@ hv_callbacks_t hv_callbacks = {
        .thread_destroy = NULL, /* thread is being destroyed */
        .task_destroy = NULL,   /* task is being destroyed */
        .volatile_state = NULL, /* thread state is becoming volatile */
-       .memory_pressure = NULL /* memory pressure notification */
 };
 
 /* trap tables for hv_*_trap syscalls */
@@ -64,12 +63,8 @@ static hv_trap_table_t hv_trap_table[] = {
 };
 
 static int hv_callbacks_enabled = 0;
-static int hv_mp_notify_enabled = 0;
-static int hv_mp_notify_destroy = 0;
 static lck_grp_t *hv_support_lck_grp = NULL;
 static lck_mtx_t *hv_support_lck_mtx = NULL;
-static thread_t hv_mp_notify_thread = THREAD_NULL;
-static void hv_mp_notify(void);
 
 /* hv_support boot initialization */
 void
@@ -130,65 +125,6 @@ hv_get_volatile_state(hv_volatile_state_t state) {
        return is_volatile;
 }
 
-/* memory pressure monitor thread */
-static void
-hv_mp_notify(void) {
-       while (1) {
-               mach_vm_pressure_monitor(TRUE, 0, NULL, NULL);
-
-               lck_mtx_lock(hv_support_lck_mtx);
-               if (hv_mp_notify_destroy == 1) {
-                       hv_mp_notify_destroy = 0;
-                       hv_mp_notify_enabled = 0;
-                       lck_mtx_unlock(hv_support_lck_mtx);
-                       break;
-               } else {
-                       hv_callbacks.memory_pressure();
-               }
-               lck_mtx_unlock(hv_support_lck_mtx);
-       }
-
-       thread_deallocate(current_thread());
-}
-
-/* subscribe to memory pressure notifications */
-kern_return_t
-hv_set_mp_notify(void) {
-       kern_return_t kr;
-
-       lck_mtx_lock(hv_support_lck_mtx);
-       if (hv_callbacks_enabled == 0) {
-               lck_mtx_unlock(hv_support_lck_mtx);
-               return KERN_FAILURE;
-       }
-
-       if (hv_mp_notify_enabled == 1) {
-               hv_mp_notify_destroy = 0;
-               lck_mtx_unlock(hv_support_lck_mtx);
-               return KERN_SUCCESS;
-       }
-
-       kr = kernel_thread_start((thread_continue_t) &hv_mp_notify, NULL,
-               &hv_mp_notify_thread);
-
-       if (kr == KERN_SUCCESS) {
-               hv_mp_notify_enabled = 1;
-       }
-       lck_mtx_unlock(hv_support_lck_mtx);
-
-       return kr;
-}
-
-/* unsubscribe from memory pressure notifications */
-void
-hv_release_mp_notify(void) {
-       lck_mtx_lock(hv_support_lck_mtx);
-       if (hv_mp_notify_enabled == 1) {
-               hv_mp_notify_destroy = 1;
-       }
-       lck_mtx_unlock(hv_support_lck_mtx);
-}
-
 /* register a list of trap handlers for the hv_*_trap syscalls */
 kern_return_t
 hv_set_traps(hv_trap_type_t trap_type, const hv_trap_t *traps,
index aaedb76ae5e0c604aba4014826cae3493a28d22c..fb2bfe55e58952f5bb0e027bd7294f005bfb7e3c 100644 (file)
@@ -72,8 +72,6 @@ extern void hv_set_thread_target(void *target);
 extern void *hv_get_task_target(void);
 extern void *hv_get_thread_target(void);
 extern int hv_get_volatile_state(hv_volatile_state_t state);
-extern kern_return_t hv_set_mp_notify(void);
-extern void hv_release_mp_notify(void);
 extern kern_return_t hv_set_traps(hv_trap_type_t trap_type,
        const hv_trap_t *traps, unsigned trap_count);
 extern void hv_release_traps(hv_trap_type_t trap_type);
index 73cc4d4d399224a91042670259023fcdb98906a1..88e629de46e15f76036d459a42737001d95da68b 100644 (file)
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_space.h>
 
+#if CONFIG_MACF
+#include <security/mac_mach_internal.h>
+#endif
+
 /*
  * Forward declarations
  */
@@ -533,7 +537,7 @@ convert_port_to_host_security(
  */
 kern_return_t
 host_set_exception_ports(
-       host_priv_t                             host_priv,
+       host_priv_t                     host_priv,
        exception_mask_t                exception_mask,
        ipc_port_t                      new_port,
        exception_behavior_t            new_behavior,
@@ -546,8 +550,6 @@ host_set_exception_ports(
                return KERN_INVALID_ARGUMENT;
        }
 
-       assert(host_priv == &realhost);
-
        if (exception_mask & ~EXC_MASK_VALID) {
                return KERN_INVALID_ARGUMENT;
        }
@@ -571,6 +573,13 @@ host_set_exception_ports(
        if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor))
                return (KERN_INVALID_ARGUMENT);
 
+#if CONFIG_MACF
+       if (mac_task_check_set_host_exception_ports(current_task(), exception_mask) != 0)
+               return KERN_NO_ACCESS;
+#endif
+
+       assert(host_priv == &realhost);
+
        host_lock(host_priv);
 
        for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
@@ -676,7 +685,7 @@ host_get_exception_ports(
 
 kern_return_t
 host_swap_exception_ports(
-       host_priv_t                             host_priv,
+       host_priv_t                     host_priv,
        exception_mask_t                exception_mask,
        ipc_port_t                      new_port,
        exception_behavior_t            new_behavior,
@@ -713,6 +722,11 @@ host_swap_exception_ports(
        if (new_flavor != 0 && !VALID_THREAD_STATE_FLAVOR(new_flavor))
                return (KERN_INVALID_ARGUMENT);
 
+#if CONFIG_MACF
+       if (mac_task_check_set_host_exception_ports(current_task(), exception_mask) != 0)
+               return KERN_NO_ACCESS;
+#endif /* CONFIG_MACF */
+
        host_lock(host_priv);
 
        assert(EXC_TYPES_COUNT > FIRST_EXCEPTION);
index 6fe3e7b9294c01bedadea7b6ec71ced483c51dc4..1789ae5f6bf16346d03b2212d339f71e71fca18f 100644 (file)
 #include <ipc/ipc_kmsg.h>
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_voucher.h>
+#include <kern/sync_sema.h>
 #include <kern/counters.h>
 
 #include <vm/vm_protos.h>
@@ -532,46 +533,49 @@ ipc_kobject_notify(
        ((mig_reply_error_t *) reply_header)->RetCode = MIG_NO_REPLY;
        switch (request_header->msgh_id) {
                case MACH_NOTIFY_NO_SENDERS:
-                  if (ip_kotype(port) == IKOT_VOUCHER) {
-                          ipc_voucher_notify(request_header);
-                          return TRUE;
-                  }
-                  if (ip_kotype(port) == IKOT_VOUCHER_ATTR_CONTROL) {
-                          ipc_voucher_attr_control_notify(request_header);
-                          return TRUE;
-                  }
-                  if(ip_kotype(port) == IKOT_NAMED_ENTRY) {
-                       ip_lock(port);
-
-                       /*
-                        * Bring the sequence number and mscount in
-                        * line with ipc_port_destroy assertion.
-                        */
-                       port->ip_mscount = 0;
-                       port->ip_messages.imq_seqno = 0;
-                       ipc_port_destroy(port); /* releases lock */
-                       return TRUE;
-                  }
-                  if (ip_kotype(port) == IKOT_UPL) {
-                          upl_no_senders(
-                               request_header->msgh_remote_port, 
-                               (mach_port_mscount_t) 
-                               ((mach_no_senders_notification_t *) 
-                                request_header)->not_count);
-                          reply_header->msgh_remote_port = MACH_PORT_NULL;
-                          return TRUE;
-                  }
+                       switch (ip_kotype(port)) {
+                       case IKOT_VOUCHER:
+                               ipc_voucher_notify(request_header);
+                               return TRUE;
+
+                       case IKOT_VOUCHER_ATTR_CONTROL:
+                               ipc_voucher_attr_control_notify(request_header);
+                               return TRUE;
+
+                       case IKOT_SEMAPHORE:
+                               semaphore_notify(request_header);
+                               return TRUE;
+                               
+                       case IKOT_NAMED_ENTRY:
+                               ip_lock(port);
+
+                               /*
+                                * Bring the sequence number and mscount in
+                                * line with ipc_port_destroy assertion.
+                                */
+                               port->ip_mscount = 0;
+                               port->ip_messages.imq_seqno = 0;
+                               ipc_port_destroy(port); /* releases lock */
+                               return TRUE;
+
+                       case IKOT_UPL:
+                               upl_no_senders(
+                                       request_header->msgh_remote_port, 
+                                       (mach_port_mscount_t) 
+                                       ((mach_no_senders_notification_t *) 
+                                        request_header)->not_count);
+                               reply_header->msgh_remote_port = MACH_PORT_NULL;
+                               return TRUE;
+
 #if    CONFIG_AUDIT
-                  if (ip_kotype(port) == IKOT_AU_SESSIONPORT) {
-                          audit_session_nosenders(request_header);
-                          return TRUE;
-                  }
+                       case IKOT_AU_SESSIONPORT:
+                               audit_session_nosenders(request_header);
+                               return TRUE;
 #endif
-                  if (ip_kotype(port) == IKOT_FILEPORT) {
-                       fileport_notify(request_header);
-                       return TRUE;
-                  }
-
+                       case IKOT_FILEPORT:
+                               fileport_notify(request_header);
+                               return TRUE;
+                       }
                   break;
 
                case MACH_NOTIFY_PORT_DELETED:
index 6219ddfbc7c5bd8d7cfe67e14ed3fc562a9a1b0a..7239bdb299b234a1d5275456b91571cb191bc8c4 100644 (file)
@@ -353,11 +353,11 @@ mach_msg_rpc_from_kernel_body(
        for (;;) {
                ipc_mqueue_t mqueue;
 
-               assert(reply->ip_pset_count == 0);
+               assert(reply->ip_in_pset == 0);
                assert(ip_active(reply));
 
                /* JMM - why this check? */
-               if (!self->active) {
+               if (!self->active && !self->inspection) {
                        ipc_port_dealloc_reply(reply);
                        self->ith_rpc_reply = IP_NULL;
                        return MACH_RCV_INTERRUPTED;
@@ -385,7 +385,7 @@ mach_msg_rpc_from_kernel_body(
 
                assert(reply == self->ith_rpc_reply);
 
-               if (self->handlers) {
+               if (self->ast & AST_APC) {
                        ipc_port_dealloc_reply(reply);
                        self->ith_rpc_reply = IP_NULL;
                        return(mr);
index 3e960b1d9abf7cdd77d4f75f27c75da517f4fbe1..fdc418c1dac168c731706147d633dbe84a49bf82 100644 (file)
 #include <ipc/port.h>
 #include <ipc/ipc_space.h>
 #include <ipc/ipc_port.h>
+#include <mach/mach_types.h>
 #include <mach/semaphore.h>
 #include <mach/lock_set_server.h>
 #include <mach/mach_port_server.h>
 #include <mach/port.h>
 
 
+/*
+ *     Routine:        port_name_to_semaphore
+ *     Purpose:
+ *             Convert from a port name in the current space to a semaphore.
+ *             Produces a semaphore ref, which may be null.
+ *     Conditions:
+ *             Nothing locked.
+ */
 kern_return_t
 port_name_to_semaphore(
        mach_port_name_t        name,
        semaphore_t             *semaphorep)
 {
-       semaphore_t semaphore;
        ipc_port_t kern_port;
        kern_return_t kr;
 
@@ -56,7 +64,7 @@ port_name_to_semaphore(
                *semaphorep = SEMAPHORE_NULL;
                return KERN_INVALID_NAME;
        }
-       
+
        kr = ipc_object_translate(current_space(), name, MACH_PORT_RIGHT_SEND,
                                  (ipc_object_t *) &kern_port);
        if (kr != KERN_SUCCESS) {
@@ -66,51 +74,127 @@ port_name_to_semaphore(
        /* have the port locked */
        assert(IP_VALID(kern_port));
 
-       if (!ip_active(kern_port) || (ip_kotype(kern_port) != IKOT_SEMAPHORE)) {
-               ip_unlock(kern_port);
-               *semaphorep = SEMAPHORE_NULL;
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       semaphore = (semaphore_t) kern_port->ip_kobject;
-       assert(semaphore != SEMAPHORE_NULL);
-       semaphore_reference(semaphore);
+       *semaphorep = convert_port_to_semaphore(kern_port);
        ip_unlock(kern_port);
 
-       *semaphorep = semaphore;
        return KERN_SUCCESS;
 }
-       
+
+/*
+ *     Routine:        convert_port_to_semaphore
+ *     Purpose:
+ *             Convert from a port to a semaphore.
+ *             Doesn't consume the port [send-right] ref;
+ *             produces a semaphore ref, which may be null.
+ *     Conditions:
+ *             Caller has a send-right reference to port.
+ *             Port may or may not be locked.
+ */
 semaphore_t
 convert_port_to_semaphore (ipc_port_t port)
 {
-       semaphore_t semaphore = SEMAPHORE_NULL;
 
-       if (IP_VALID (port)) {
-               ip_lock(port);
-               if (ip_active(port) && (ip_kotype(port) == IKOT_SEMAPHORE)) {
+       if (IP_VALID(port)) {
+               semaphore_t semaphore;
+
+               /*
+                * No need to lock because we have a reference on the
+                * port, and if it is a true semaphore port, that reference
+                * keeps the semaphore bound to the port (and active).
+                */
+               if (ip_kotype(port) == IKOT_SEMAPHORE) {
+                       assert(ip_active(port));
                        semaphore = (semaphore_t) port->ip_kobject;
                        semaphore_reference(semaphore);
+                       return (semaphore);
                }
-               ip_unlock(port);
        }
-
-       return (semaphore);
+       return SEMAPHORE_NULL;
 }
 
 
+/*
+ *     Routine:        convert_semaphore_to_port
+ *     Purpose:
+ *             Convert a semaphore reference to a send right to a
+ *             semaphore port.
+ *
+ *             Consumes the semaphore reference.  If the semaphore
+ *             port currently has no send rights (or doesn't exist
+ *             yet), the reference is donated to the port to represent
+ *             all extant send rights collectively.
+ */
 ipc_port_t
 convert_semaphore_to_port (semaphore_t semaphore)
 {
-       ipc_port_t port;
+       ipc_port_t port, send;
 
        if (semaphore == SEMAPHORE_NULL)
                return (IP_NULL);
 
        /* caller is donating a reference */
-       port = ipc_port_make_send(semaphore->port);
+       port = semaphore->port;
+
+       if (!IP_VALID(port)) {
+               port = ipc_port_alloc_kernel();
+               assert(IP_VALID(port));
+               ipc_kobject_set_atomically(port, (ipc_kobject_t) semaphore, IKOT_SEMAPHORE);
+
+               /* If we lose the race, deallocate and pick up the other guy's port */
+               if (!OSCompareAndSwapPtr(IP_NULL, port, &semaphore->port)) {
+                       ipc_port_dealloc_kernel(port);
+                       port = semaphore->port;
+                       assert(ip_kotype(port) == IKOT_SEMAPHORE);
+                       assert(port->ip_kobject == (ipc_kobject_t)semaphore);
+               }
+       }
+
+       ip_lock(port);
+       assert(ip_active(port));
+       send = ipc_port_make_send_locked(port);
+
+       if (1 == port->ip_srights) {
+               ipc_port_t old_notify;
+
+               /* transfer our ref to the port, and arm the no-senders notification */
+               assert(IP_NULL == port->ip_nsrequest);
+               ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
+               /* port unlocked */
+               assert(IP_NULL == old_notify);
+       } else {
+               /* piggyback on the existing port reference, so consume ours */
+               ip_unlock(port);
+               semaphore_dereference(semaphore);
+       }
+       return (send);
+}
+
+/*
+ * Routine:    semaphore_notify
+ * Purpose:
+ *     Called whenever the Mach port system detects no-senders
+ *     on the semaphore port.
+ *
+ *     When a send-right is first created, a no-senders
+ *     notification is armed (and a semaphore reference is donated).
+ *
+ *     A no-senders notification will be posted when no one else holds a
+ *     send-right (reference) to the semaphore's port. This notification function
+ *     will consume the semaphore reference donated to the extant collection of
+ *     send-rights.
+ */
+void
+semaphore_notify(mach_msg_header_t *msg)
+{
+       mach_no_senders_notification_t *notification = (void *)msg;
+       ipc_port_t port = notification->not_header.msgh_remote_port;
+       semaphore_t semaphore;
+
+       assert(ip_active(port));
+       assert(IKOT_SEMAPHORE == ip_kotype(port));
+       semaphore = (semaphore_t)port->ip_kobject;
+
        semaphore_dereference(semaphore);
-       return (port);
 }
 
 lock_set_t
index 0732b2b9dfc315e7306db1ac0f88dffc5c0e5ed5..c9fca597b869e30784344e53b1554f7ad83e0e4b 100644 (file)
 #include <ipc/ipc_types.h>
 #include <kern/spl.h>
 
-semaphore_t convert_port_to_semaphore (ipc_port_t port);
-ipc_port_t  convert_semaphore_to_port (semaphore_t semaphore);
+extern semaphore_t convert_port_to_semaphore (ipc_port_t port);
+extern ipc_port_t convert_semaphore_to_port (semaphore_t semaphore);
+extern kern_return_t port_name_to_semaphore(
+                                     mach_port_name_t  name,
+                                     semaphore_t       *semaphore);
+extern void semaphore_notify(mach_msg_header_t *msg);
 
 lock_set_t  convert_port_to_lock_set  (ipc_port_t port);
 ipc_port_t  convert_lock_set_to_port  (lock_set_t lock_set);
 
-kern_return_t  port_name_to_semaphore(
-                                     mach_port_name_t  name,
-                                     semaphore_t       *semaphore);
 #endif /* _KERN_IPC_SYNC_H_ */
index 9a870298f9f48e4a89845cef2f47f4445e17bf41..9a43453bd3f256deaaa5a42528996c820277421a 100644 (file)
@@ -396,11 +396,15 @@ ipc_task_reset(
        ipc_kobject_set(new_kport, (ipc_kobject_t) task, IKOT_TASK);
 
        for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
+               old_exc_actions[i] = IP_NULL;
+
+               if (i == EXC_CORPSE_NOTIFY && task_corpse_pending_report(task)) {
+                       continue;
+               }
+
                if (!task->exc_actions[i].privileged) {
                        old_exc_actions[i] = task->exc_actions[i].port;
                        task->exc_actions[i].port = IP_NULL;
-               } else {
-                       old_exc_actions[i] = IP_NULL;
                }
        }/* for */
        
@@ -565,7 +569,7 @@ ipc_thread_reset(
 
        old_kport = thread->ith_self;
 
-       if (old_kport == IP_NULL) {
+       if (old_kport == IP_NULL && thread->inspection == FALSE) {
                /* the  is already terminated (can this happen?) */
                thread_mtx_unlock(thread);
                ipc_port_dealloc_kernel(new_kport);
@@ -575,7 +579,9 @@ ipc_thread_reset(
        thread->ith_self = new_kport;
        old_sself = thread->ith_sself;
        thread->ith_sself = ipc_port_make_send(new_kport);
-       ipc_kobject_set(old_kport, IKO_NULL, IKOT_NONE);
+       if (old_kport != IP_NULL) {
+               ipc_kobject_set(old_kport, IKO_NULL, IKOT_NONE);
+       }
        ipc_kobject_set(new_kport, (ipc_kobject_t) thread, IKOT_THREAD);
 
        /*
@@ -608,7 +614,9 @@ ipc_thread_reset(
        }
 
        /* destroy the kernel port */
-       ipc_port_dealloc_kernel(old_kport);
+       if (old_kport != IP_NULL) {
+               ipc_port_dealloc_kernel(old_kport);
+       }
 }
 
 /*
@@ -1403,6 +1411,9 @@ convert_port_to_thread(
  *             A name of MACH_PORT_NULL is valid for the null thread.
  *     Conditions:
  *             Nothing locked.
+ *
+ *     TODO: Could this be faster if it were ipc_port_translate_send based, like thread_switch?
+ *           We could avoid extra lock/unlock and extra ref operations on the port.
  */
 thread_t
 port_name_to_thread(
index 5a799ee770c796a47692c59c9a414821293ea9b4..2ac827b63e8f59ea1d1cf294be40d6bd9f0bd24a 100644 (file)
@@ -89,6 +89,9 @@ vm_size_t kalloc_max;
 vm_size_t kalloc_max_prerounded;
 vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */
 
+/* how many times we couldn't allocate out of kalloc_map and fell back to kernel_map */
+unsigned long kalloc_fallback_count;
+
 unsigned int kalloc_large_inuse;
 vm_size_t    kalloc_large_total;
 vm_size_t    kalloc_large_max;
@@ -154,45 +157,39 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes)
 
 #if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
 
-/*
- * "Legacy" aka "power-of-2" backing zones with 16-byte minimum
- * size and alignment.  Users of this profile would probably
- * benefit from some tuning.
- */
-
 #define K_ZONE_SIZES                   \
        16,                             \
        32,                             \
-/* 6 */        64,                             \
-       128,                            \
+       48,                             \
+/* 3 */        64,                             \
+       80,                             \
+       96,                             \
+/* 6 */        128,                            \
+       160,                            \
        256,                            \
-/* 9 */        512,                            \
+/* 9 */        288,                            \
+       512,                            \
        1024,                           \
+/* C */        1280,                           \
        2048,                           \
-/* C */        4096
-
+       4096
 
 #define K_ZONE_NAMES                   \
        "kalloc.16",                    \
        "kalloc.32",                    \
-/* 6 */        "kalloc.64",                    \
-       "kalloc.128",                   \
+       "kalloc.48",                    \
+/* 3 */        "kalloc.64",                    \
+       "kalloc.80",                    \
+       "kalloc.96",                    \
+/* 6 */        "kalloc.128",                   \
+       "kalloc.160",                   \
        "kalloc.256",                   \
-/* 9 */        "kalloc.512",                   \
+/* 9 */        "kalloc.288",                   \
+       "kalloc.512",                   \
        "kalloc.1024",                  \
+/* C */        "kalloc.1280",                  \
        "kalloc.2048",                  \
-/* C */        "kalloc.4096"
-
-#define K_ZONE_MAXIMA                  \
-       1024,                           \
-       4096,                           \
-/* 6 */        4096,                           \
-       4096,                           \
-       4096,                           \
-/* 9 */        1024,                           \
-       1024,                           \
-       1024,                           \
-/* C */        1024
+       "kalloc.4096"
 
 #elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
 
@@ -204,11 +201,11 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes)
 /* 3 */        8,                              \
        16,     24,                     \
        32,     40,     48,             \
-/* 6 */        64,     88,     112,            \
+/* 6 */        64,     72,     88,     112,    \
        128,    192,                    \
-       256,    384,                    \
-/* 9 */        512,    768,                    \
-       1024,   1536,                   \
+       256,    288,    384,    440,    \
+/* 9 */        512,    768,                    \
+       1024,   1152,   1536,           \
        2048,   3072,                   \
        4096,   6144
 
@@ -216,40 +213,37 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes)
 /* 3 */        "kalloc.8",                     \
        "kalloc.16",    "kalloc.24",    \
        "kalloc.32",    "kalloc.40",    "kalloc.48",    \
-/* 6 */        "kalloc.64",    "kalloc.88",    "kalloc.112",   \
+/* 6 */        "kalloc.64",    "kalloc.72",    "kalloc.88",    "kalloc.112",   \
        "kalloc.128",   "kalloc.192",   \
-       "kalloc.256",   "kalloc.384",   \
+       "kalloc.256",   "kalloc.288",   "kalloc.384",   "kalloc.440",   \
 /* 9 */        "kalloc.512",   "kalloc.768",   \
-       "kalloc.1024",  "kalloc.1536",  \
+       "kalloc.1024",  "kalloc.1152",  "kalloc.1536",  \
        "kalloc.2048",  "kalloc.3072",  \
        "kalloc.4096",  "kalloc.6144"
 
-#define        K_ZONE_MAXIMA                   \
-/* 3 */        1024,                           \
-       1024,   1024,                   \
-       4096,   4096,   4096,           \
-/* 6 */        4096,   4096,   4096,           \
-       4096,   4096,                   \
-       4096,   4096,                   \
-/* 9 */        1024,   1024,                   \
-       1024,   1024,                   \
-       1024,   1024,                   \
-/* C */        1024,   64
-
 #else
 #error missing zone size parameters for kalloc
 #endif
 
 #define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
+#define KiB(x) (1024 * (x))
 
 static const int k_zone_size[] = {
        K_ZONE_SIZES,
-       8192,
-       16384,
-/* F */        32768
+       KiB(8),
+       KiB(16),
+       KiB(32)
+};
+
+#define MAX_K_ZONE     (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
+
+static const char *k_zone_name[MAX_K_ZONE] = {
+       K_ZONE_NAMES,
+       "kalloc.8192",
+       "kalloc.16384",
+       "kalloc.32768"
 };
 
-#define N_K_ZONE       (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
 
 /*
  * Many kalloc() allocations are for small structures containing a few
@@ -271,37 +265,13 @@ static int8_t k_zone_dlut[N_K_ZDLUT];     /* table of indices into k_zone[] */
  */
 static int k_zindex_start;
 
-static zone_t k_zone[N_K_ZONE];
-
-static const char *k_zone_name[N_K_ZONE] = {
-       K_ZONE_NAMES,
-       "kalloc.8192",
-       "kalloc.16384",
-/* F */        "kalloc.32768"
-};
-
-/*
- *  Max number of elements per zone.  zinit rounds things up correctly
- *  Doing things this way permits each zone to have a different maximum size
- *  based on need, rather than just guessing; it also
- *  means its patchable in case you're wrong!
- */
-unsigned int k_zone_max[N_K_ZONE] = {
-       K_ZONE_MAXIMA,
-       4096,
-       64,
-/* F */        64
-};
+static zone_t k_zone[MAX_K_ZONE];
 
 /* #define KALLOC_DEBUG                1 */
 
 /* forward declarations */
-void * kalloc_canblock(
-               vm_size_t       size,
-               boolean_t       canblock);
-
 
-lck_grp_t *kalloc_lck_grp;
+lck_grp_t kalloc_lck_grp;
 lck_mtx_t kalloc_lock;
 
 #define kalloc_spin_lock()     lck_mtx_lock_spin(&kalloc_lock)
@@ -354,7 +324,7 @@ kalloc_init(
                kalloc_map_size = KALLOC_MAP_SIZE_MIN;
 
        retval = kmem_suballoc(kernel_map, &min, kalloc_map_size,
-                              FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
+                              FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(0),
                               &kalloc_map);
 
        if (retval != KERN_SUCCESS)
@@ -364,29 +334,29 @@ kalloc_init(
        kalloc_map_max = min + kalloc_map_size - 1;
 
        /*
-        *      Ensure that zones up to size 8192 bytes exist.
-        *      This is desirable because messages are allocated
-        *      with kalloc, and messages up through size 8192 are common.
+        * Create zones up to a least 2 pages because small page-multiples are common
+        * allocations. Also ensure that zones up to size 8192 bytes exist. This is
+        * desirable because messages are allocated with kalloc(), and messages up
+        * through size 8192 are common.
         */
+       kalloc_max = PAGE_SIZE << 2;
+       if (kalloc_max < KiB(16)) {
+           kalloc_max = KiB(16);
+       }
+       assert(kalloc_max <= KiB(64)); /* assumption made in size arrays */
 
-       if (PAGE_SIZE < 16*1024)
-               kalloc_max = 16*1024;
-       else
-               kalloc_max = PAGE_SIZE;
        kalloc_max_prerounded = kalloc_max / 2 + 1;
-       /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */
+       /* allocations larger than 16 times kalloc_max go directly to kernel map */
        kalloc_kernmap_size = (kalloc_max * 16) + 1;
        kalloc_largest_allocated = kalloc_kernmap_size;
 
        /*
-        *      Allocate a zone for each size we are going to handle.
-        *      We specify non-paged memory.  Don't charge the caller
-        *      for the allocation, as we aren't sure how the memory
-        *      will be handled.
+        * Allocate a zone for each size we are going to handle. Don't charge the
+        * caller for the allocation, as we aren't sure how the memory will be
+        * handled.
         */
-       for (i = 0; (size = k_zone_size[i]) < kalloc_max; i++) {
-               k_zone[i] = zinit(size, k_zone_max[i] * size, size,
-                                 k_zone_name[i]);
+       for (i = 0; i < (int)MAX_K_ZONE && (size = k_zone_size[i]) < kalloc_max; i++) {
+               k_zone[i] = zinit(size, size, size, k_zone_name[i]);
                zone_change(k_zone[i], Z_CALLERACCT, FALSE);
        }
 
@@ -415,7 +385,7 @@ kalloc_init(
         * Useful when debugging/tweaking the array of zone sizes.
         * Cache misses probably more critical than compare-branches!
         */
-       for (i = 0; i < (int)N_K_ZONE; i++) {
+       for (i = 0; i < (int)MAX_K_ZONE; i++) {
                vm_size_t testsize = (vm_size_t)k_zone_size[i] - 1;
                int compare = 0;
                int zindex;
@@ -445,12 +415,13 @@ kalloc_init(
                    compare == 1 ? "" : "s");
        }
 #endif
-       kalloc_lck_grp = lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL);
-       lck_mtx_init(&kalloc_lock, kalloc_lck_grp, LCK_ATTR_NULL);
+
+       lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL);
+       lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL);
        OSMalloc_init();
-#ifdef MUTEX_ZONE      
+#ifdef MUTEX_ZONE
        lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx");
-#endif 
+#endif
 }
 
 /*
@@ -475,7 +446,7 @@ get_zone_search(vm_size_t size, int zindex)
        while ((vm_size_t)k_zone_size[zindex] < size)
                zindex++;
 
-       assert((unsigned)zindex < N_K_ZONE &&
+       assert((unsigned)zindex < MAX_K_ZONE &&
            (vm_size_t)k_zone_size[zindex] < kalloc_max);
 
        return (k_zone[zindex]);
@@ -483,8 +454,9 @@ get_zone_search(vm_size_t size, int zindex)
 
 void *
 kalloc_canblock(
-               vm_size_t       size,
-               boolean_t       canblock)
+               vm_size_t              size,
+               boolean_t              canblock,
+               vm_allocation_site_t * site)
 {
        zone_t z;
 
@@ -511,9 +483,15 @@ kalloc_canblock(
                else
                        alloc_map = kalloc_map;
 
-               if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) {
+               vm_tag_t tag;
+               tag = (site ? tag = vm_tag_alloc(site) : VM_KERN_MEMORY_KALLOC);
+
+               if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size, tag) != KERN_SUCCESS) {
                        if (alloc_map != kernel_map) {
-                               if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS)
+                               if (kalloc_fallback_count++ == 0) {
+                                       printf("%s: falling back to kernel_map\n", __func__);
+                               }
+                               if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size, tag) != KERN_SUCCESS)
                                        addr = NULL;
                        }
                        else
@@ -548,21 +526,17 @@ kalloc_canblock(
                    z, z->zone_name, (unsigned long)size);
 #endif
        assert(size <= z->elem_size);
-       return (zalloc_canblock(z, canblock));
+       return zalloc_canblock(z, canblock);
 }
 
 void *
-kalloc(
-       vm_size_t size)
-{
-       return( kalloc_canblock(size, TRUE) );
-}
-
+kalloc_external(
+       vm_size_t size);
 void *
-kalloc_noblock(
-              vm_size_t size)
+kalloc_external(
+       vm_size_t size)
 {
-       return( kalloc_canblock(size, FALSE) );
+       return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) );
 }
 
 volatile SInt32 kfree_nop_count = 0;
@@ -700,7 +674,7 @@ OSMalloc_Tagalloc(
 
        OSMTag->OSMT_refcnt = 1;
 
-       strncpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME);
+       strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME);
 
        OSMalloc_tag_spin_lock();
        enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag);
@@ -763,11 +737,10 @@ OSMalloc(
        OSMalloc_Tagref(tag);
        if ((tag->OSMT_attr & OSMT_PAGEABLE)
            && (size & ~PAGE_MASK)) {
-
-               if ((kr = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS)
+               if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS)
                        addr = NULL;
        } else 
-               addr = kalloc((vm_size_t)size);
+               addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
 
        if (!addr)
                OSMalloc_Tagrele(tag);
@@ -787,7 +760,7 @@ OSMalloc_nowait(
 
        OSMalloc_Tagref(tag);
        /* XXX: use non-blocking kalloc for now */
-       addr = kalloc_noblock((vm_size_t)size);
+       addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
        if (addr == NULL)
                OSMalloc_Tagrele(tag);
 
@@ -805,7 +778,7 @@ OSMalloc_noblock(
                return(NULL);
 
        OSMalloc_Tagref(tag);
-       addr = kalloc_noblock((vm_size_t)size);
+       addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
        if (addr == NULL)
                OSMalloc_Tagrele(tag);
 
index 5a3808a2d4873e9debec9a723e97eba4cec94299..caad32a3b3f4f699d466e1aaec601d85dce5f19c 100644 (file)
 #define _KERN_KALLOC_H_
 
 #include <mach/machine/vm_types.h>
+#include <mach/boolean.h>
 #include <sys/cdefs.h>
+#include <mach/vm_types.h>
 
 __BEGIN_DECLS
 
+#if XNU_KERNEL_PRIVATE
+
+extern void *
+kalloc_canblock(
+               vm_size_t              size,
+               boolean_t              canblock,
+               vm_allocation_site_t * site);
+
+#define kalloc(size)                           \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       kalloc_canblock((size), TRUE, &site); })
+
+#define kalloc_tag(size, tag)                  \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))) \
+               = { (tag), 0 } ; \
+       kalloc_canblock((size), TRUE, &site); })
+
+#define kalloc_tag_bt(size, tag)               \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))) \
+               = { (tag), VM_TAG_BT }; \
+       kalloc_canblock((size), TRUE, &site); })
+
+#define kalloc_noblock(size)                   \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))); \
+       kalloc_canblock((size), FALSE, &site); })
+
+#define kalloc_noblock_tag_bt(size, tag)       \
+       ({ static vm_allocation_site_t site __attribute__((section("__DATA, __data"))) \
+               = { (tag), VM_TAG_BT }; \
+       kalloc_canblock((size), FALSE, &site); })
+
+extern void kfree(void         *data,
+                 vm_size_t     size);
+
+#else /* XNU_KERNEL_PRIVATE */
+
 extern void *kalloc(vm_size_t  size);
 
 extern void *kalloc_noblock(vm_size_t  size);
@@ -71,6 +109,8 @@ extern void *kalloc_noblock(vm_size_t        size);
 extern void kfree(void         *data,
                  vm_size_t     size);
 
+#endif /* !XNU_KERNEL_PRIVATE */
+
 __END_DECLS
 
 #ifdef MACH_KERNEL_PRIVATE
diff --git a/osfmk/kern/kern_cdata.c b/osfmk/kern/kern_cdata.c
new file mode 100644 (file)
index 0000000..503032a
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/assert.h>
+#include <mach/mach_types.h>
+#include <mach/boolean.h>
+#include <mach/vm_param.h>
+#include <kern/kern_types.h>
+#include <kern/mach_param.h>
+#include <kern/thread.h>
+#include <kern/task.h>
+#include <kern/kern_cdata.h>
+#include <kern/kalloc.h>
+#include <mach/mach_vm.h>
+
+/*
+ *
+ * The format for data is setup in a generic format as follows
+ *
+ * Layout of data structure:
+ *
+ *   |         8 - bytes         |
+ *   |  type = MAGIC |  LENGTH   |
+ *   |            0              |
+ *   |      type     |  size     |
+ *   |          flags            |
+ *   |           data            |
+ *   |___________data____________|
+ *   |      type     |   size    |
+ *   |          flags            |
+ *   |___________data____________|
+ *   |  type = END   |  size=0   |
+ *   |            0              |
+ *
+ *
+ * The type field describes what kind of data is passed. For example type = TASK_CRASHINFO_UUID means the following data is a uuid.
+ * These types need to be defined in task_corpses.h for easy consumption by userspace inspection tools.
+ *
+ * Some range of types is reserved for special types like ints, longs etc. A cool new functionality made possible with this
+ * extensible data format is that kernel can decide to put more information as required without requiring user space tools to
+ * re-compile to be compatible. The case of rusage struct versions could be introduced without breaking existing tools.
+ *
+ * Feature description: Generic data with description
+ * -------------------
+ * Further more generic data with description is very much possible now. For example
+ *
+ *   - kcdata_add_uint64_with_description(cdatainfo, 0x700, "NUM MACH PORTS");
+ *   - and more functions that allow adding description.
+ * The userspace tools can then look at the description and print the data even if they are not compiled with knowledge of the field apriori.
+ *
+ *  Example data:
+ * 0000  57 f1 ad de 00 00 00 00 00 00 00 00 00 00 00 00  W...............
+ * 0010  01 00 00 00 00 00 00 00 30 00 00 00 00 00 00 00  ........0.......
+ * 0020  50 49 44 00 00 00 00 00 00 00 00 00 00 00 00 00  PID.............
+ * 0030  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+ * 0040  9c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+ * 0050  01 00 00 00 00 00 00 00 30 00 00 00 00 00 00 00  ........0.......
+ * 0060  50 41 52 45 4e 54 20 50 49 44 00 00 00 00 00 00  PARENT PID......
+ * 0070  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+ * 0080  01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+ * 0090  ed 58 91 f1
+ *
+ * Feature description: Container markers for compound data
+ * ------------------
+ * If a given kernel data type is complex and requires adding multiple optional fields inside a container
+ * object for a consumer to understand arbitrary data, we package it using container markers.
+ *
+ * For example, the stackshot code gathers information and describes the state of a given task with respect
+ * to many subsystems. It includes data such as io stats, vm counters, process names/flags and syscall counts.
+ *
+ * kcdata_add_container_marker(kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN, STACKSHOT_KCCONTAINER_TASK, task_uniqueid);
+ * // add multiple data, or add_<type>_with_description()s here
+ *
+ * kcdata_add_container_marker(kcdata_p, KCDATA_TYPE_CONTAINER_END, STACKSHOT_KCCONTAINER_TASK, task_uniqueid);
+ *
+ * Feature description: Custom Data formats on demand
+ * --------------------
+ * With the self describing nature of format, the kernel provider can describe a data type (uniquely identified by a number) and use
+ * it in the buffer for sending data. The consumer can parse the type information and have knowledge of describing incoming data.
+ * Following is an example of how we can describe a kernel specific struct sample_disk_io_stats in buffer.
+ *
+ * struct sample_disk_io_stats {
+ *     uint64_t        disk_reads_count;
+ *     uint64_t        disk_reads_size;
+ *     uint64_t        io_priority_count[4];
+ *     uint64_t        io_priority_size;
+ * } __attribute__ ((packed));
+ *
+ *
+ * struct kcdata_subtype_descriptor disk_io_stats_def[] = {
+ *     {KCS_SUBTYPE_FLAGS_NONE, KC_ST_UINT64, 0 * sizeof(uint64_t), sizeof(uint64_t), "disk_reads_count"},
+ *     {KCS_SUBTYPE_FLAGS_NONE, KC_ST_UINT64, 1 * sizeof(uint64_t), sizeof(uint64_t), "disk_reads_size"},
+ *     {KCS_SUBTYPE_FLAGS_ARRAY, KC_ST_UINT64, 2 * sizeof(uint64_t), KCS_SUBTYPE_PACK_SIZE(4, sizeof(uint64_t)), "io_priority_count"},
+ *     {KCS_SUBTYPE_FLAGS_ARRAY, KC_ST_UINT64, (2 + 4) * sizeof(uint64_t), sizeof(uint64_t), "io_priority_size"},
+ * };
+ *
+ * Now you can add this custom type definition into the buffer as
+ * kcdata_add_type_definition(kcdata_p, KCTYPE_SAMPLE_DISK_IO_STATS, "sample_disk_io_stats",
+ *          &disk_io_stats_def[0], sizeof(disk_io_stats_def)/sizeof(struct kcdata_subtype_descriptor));
+ *
+ */
+
+static kern_return_t kcdata_get_memory_addr_with_flavor(kcdata_descriptor_t data, uint32_t type, uint32_t size, uint64_t flags, mach_vm_address_t *user_addr);
+
+kcdata_descriptor_t kcdata_memory_alloc_init(mach_vm_address_t buffer_addr_p, unsigned data_type, unsigned size, unsigned flags)
+{
+       kcdata_descriptor_t data = NULL;
+       mach_vm_address_t user_addr = 0;
+
+       data = kalloc(sizeof(struct kcdata_descriptor));
+       if (data == NULL) {
+               return NULL;
+       }
+       bzero(data, sizeof(struct kcdata_descriptor));
+       data->kcd_addr_begin = buffer_addr_p;
+       data->kcd_addr_end = buffer_addr_p;
+       data->kcd_flags = (flags & KCFLAG_USE_COPYOUT)? KCFLAG_USE_COPYOUT : KCFLAG_USE_MEMCOPY;
+       data->kcd_length = size;
+
+       /* Initialize the BEGIN header */
+       if (KERN_SUCCESS != kcdata_get_memory_addr(data, data_type, 0, &user_addr)){
+               kcdata_memory_destroy(data);
+               return NULL;
+       }
+
+       return data;
+}
+
+kern_return_t kcdata_memory_static_init(kcdata_descriptor_t data, mach_vm_address_t buffer_addr_p, unsigned data_type, unsigned size, unsigned flags)
+{
+       mach_vm_address_t user_addr = 0;
+
+       if (data == NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
+       bzero(data, sizeof(struct kcdata_descriptor));
+       data->kcd_addr_begin = buffer_addr_p;
+       data->kcd_addr_end = buffer_addr_p;
+       data->kcd_flags = (flags & KCFLAG_USE_COPYOUT)? KCFLAG_USE_COPYOUT : KCFLAG_USE_MEMCOPY;
+       data->kcd_length = size;
+
+       /* Initialize the BEGIN header */
+       return kcdata_get_memory_addr(data, data_type, 0, &user_addr);
+}
+
+uint64_t kcdata_memory_get_used_bytes(kcdata_descriptor_t kcd)
+{
+       assert(kcd != NULL);
+       return ((uint64_t)kcd->kcd_addr_end - (uint64_t)kcd->kcd_addr_begin) + sizeof(struct kcdata_item);
+}
+
+/*
+ * Free up the memory associated with kcdata
+ */
+kern_return_t kcdata_memory_destroy(kcdata_descriptor_t data)
+{
+       if (!data) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /*
+        * data->kcd_addr_begin points to memory in not tracked by
+        * kcdata lib. So not clearing that here.
+        */
+       kfree(data, sizeof(struct kcdata_descriptor));
+       return KERN_SUCCESS;
+}
+
+
+
+/*
+ * Routine: kcdata_get_memory_addr
+ * Desc: get memory address in the userspace memory for corpse info
+ *       NOTE: The caller is responsible to zero the resulting memory or
+ *             user other means to mark memory if it has failed populating the
+ *             data in middle of operation.
+ * params:  data - pointer describing the crash info allocation
+ *             type - type of data to be put. See corpse.h for defined types
+ *          size - size requested. The header describes this size
+ * returns: mach_vm_address_t address in user memory for copyout().
+ */
+kern_return_t kcdata_get_memory_addr(
+               kcdata_descriptor_t data,
+               uint32_t type,
+               uint32_t size,
+               mach_vm_address_t *user_addr)
+{
+       return kcdata_get_memory_addr_with_flavor(data, type, size, 0, user_addr);
+}
+
+/*
+ * Routine: kcdata_get_memory_addr_with_flavor
+ * Desc: internal function with flags field. See documentation for kcdata_get_memory_addr for details
+ */
+
+static kern_return_t kcdata_get_memory_addr_with_flavor(
+               kcdata_descriptor_t data,
+               uint32_t type,
+               uint32_t size,
+               uint64_t flags,
+               mach_vm_address_t *user_addr)
+{
+       struct kcdata_item info;
+       uint32_t total_size;
+
+       if (user_addr == NULL || data == NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* make sure 16 byte aligned */
+       if (size & 0xf) {
+               size += (0x10 - (size & 0xf));
+       }
+
+       bzero(&info, sizeof(info));
+       KCDATA_ITEM_TYPE(&info) = type;
+       KCDATA_ITEM_SIZE(&info) = size;
+       KCDATA_ITEM_FLAGS(&info) = flags;
+       total_size = size + sizeof(info);
+
+       /* check available memory, including trailer size for KCDATA_TYPE_BUFFER_END */
+       if (data->kcd_length < ((data->kcd_addr_end - data->kcd_addr_begin) + total_size + sizeof(info))) {
+               return KERN_RESOURCE_SHORTAGE;
+       }
+
+       if (data->kcd_flags & KCFLAG_USE_COPYOUT) {
+               if (copyout(&info, data->kcd_addr_end, sizeof(info)))
+                       return KERN_NO_ACCESS;
+       } else {
+               memcpy((void *)data->kcd_addr_end, &info, sizeof(info));
+       }
+
+       data->kcd_addr_end += sizeof(info);
+       *user_addr = data->kcd_addr_end;
+       data->kcd_addr_end += size;
+
+       /* setup the end header as well */
+       bzero(&info, sizeof(info));
+       KCDATA_ITEM_TYPE(&info) = KCDATA_TYPE_BUFFER_END;
+       KCDATA_ITEM_SIZE(&info) = 0;
+
+       if (data->kcd_flags & KCFLAG_USE_COPYOUT) {
+               if (copyout(&info, data->kcd_addr_end, sizeof(info)))
+                       return KERN_NO_ACCESS;
+       } else {
+               memcpy((void *)data->kcd_addr_end, &info, sizeof(info));
+       }
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * Routine: kcdata_get_memory_addr_for_array
+ * Desc: get memory address in the userspace memory for corpse info
+ *       NOTE: The caller is responsible to zero the resulting memory or
+ *             user other means to mark memory if it has failed populating the
+ *             data in middle of operation.
+ * params:  data - pointer describing the crash info allocation
+ *          type_of_element - type of data to be put. See kern_cdata.h for defined types
+ *          size_of_element - size of element. The header describes this size
+ *          count - num of elements in array.
+ * returns: mach_vm_address_t address in user memory for copyout().
+ */
+
+kern_return_t kcdata_get_memory_addr_for_array(
+               kcdata_descriptor_t data,
+               uint32_t type_of_element,
+               uint32_t size_of_element,
+               uint32_t count,
+               mach_vm_address_t *user_addr)
+{
+       uint64_t flags = type_of_element;
+       flags = (flags << 32) | count;
+       uint32_t total_size = count * size_of_element;
+       return kcdata_get_memory_addr_with_flavor(data, KCDATA_TYPE_ARRAY, total_size, flags, user_addr);
+}
+
+/*
+ * Routine: kcdata_add_container_marker
+ * Desc: Add a container marker in the buffer for type and identifier.
+ * params:  data - pointer describing the crash info allocation
+ *          header_type - one of (KCDATA_TYPE_CONTAINER_BEGIN ,KCDATA_TYPE_CONTAINER_END)
+ *          container_type - type of data to be put. See kern_cdata.h for defined types
+ *          identifier - unique identifier. This is required to match nested containers.
+ * returns: return value of kcdata_get_memory_addr()
+ */
+
+kern_return_t kcdata_add_container_marker(
+               kcdata_descriptor_t data,
+               uint32_t header_type,
+               uint32_t container_type,
+               uint64_t identifier)
+{
+       mach_vm_address_t user_addr;
+       kern_return_t kr;
+       assert(header_type == KCDATA_TYPE_CONTAINER_END || header_type == KCDATA_TYPE_CONTAINER_BEGIN);
+       uint32_t data_size = (header_type == KCDATA_TYPE_CONTAINER_BEGIN)? sizeof(uint32_t): 0;
+       kr = kcdata_get_memory_addr_with_flavor(data, header_type, data_size, identifier, &user_addr);
+       if (kr != KERN_SUCCESS)
+               return kr;
+
+       if (data_size)
+               kr = kcdata_memcpy(data, user_addr, &container_type, data_size);
+       return kr;
+}
+
+/*
+ * Routine: kcdata_memcpy
+ * Desc: a common function to copy data out based on either copyout or memcopy flags
+ * params:  data - pointer describing the kcdata buffer
+ *          dst_addr - destination address
+ *          src_addr - source address
+ *          size - size in bytes to copy.
+ * returns: KERN_NO_ACCESS if copyout fails.
+ */
+
+kern_return_t kcdata_memcpy(kcdata_descriptor_t data, mach_vm_address_t dst_addr, void *src_addr, uint32_t size)
+{
+       if (data->kcd_flags & KCFLAG_USE_COPYOUT) {
+               if (copyout(src_addr, dst_addr, size))
+                       return KERN_NO_ACCESS;
+       } else {
+               memcpy((void *)dst_addr, src_addr, size);
+       }
+       return KERN_SUCCESS;
+}
+
+/*
+ * Routine: kcdata_add_type_definition
+ * Desc: add type definition to kcdata buffer.
+ *       see feature description in documentation above.
+ * params:  data - pointer describing the kcdata buffer
+ *          type_id - unique type identifier for this data
+ *          type_name - a string of max KCDATA_DESC_MAXLEN size for name of type
+ *          elements_array - address to descriptors for each field in struct
+ *          elements_count - count of how many fields are there in struct.
+ * returns: return code from kcdata_get_memory_addr in case of failure.
+ */
+
+kern_return_t kcdata_add_type_definition(
+               kcdata_descriptor_t data,
+               uint32_t type_id,
+               char *type_name,
+               struct kcdata_subtype_descriptor *elements_array_addr,
+               uint32_t elements_count)
+{
+       kern_return_t kr = KERN_SUCCESS;
+       struct kcdata_type_definition kc_type_definition;
+       mach_vm_address_t user_addr;
+       uint32_t total_size = sizeof(struct kcdata_type_definition);
+
+       if (strnlen(type_name, KCDATA_DESC_MAXLEN + 1) >= KCDATA_DESC_MAXLEN)
+               return KERN_INVALID_ARGUMENT;
+       strlcpy(&kc_type_definition.kct_name[0], type_name, KCDATA_DESC_MAXLEN);
+       kc_type_definition.kct_num_elements = elements_count;
+       kc_type_definition.kct_type_identifier = type_id;
+
+       total_size += elements_count * sizeof(struct kcdata_subtype_descriptor);
+       if (KERN_SUCCESS != (kr = kcdata_get_memory_addr_with_flavor(data, KCDATA_TYPE_TYPEDEFINTION, total_size, 0, &user_addr)))
+               return kr;
+       if (KERN_SUCCESS != (kr = kcdata_memcpy(data, user_addr, (void *)&kc_type_definition, sizeof(struct kcdata_type_definition))))
+               return kr;
+       user_addr += sizeof(struct kcdata_type_definition);
+       if (KERN_SUCCESS != (kr = kcdata_memcpy(data, user_addr, (void *)elements_array_addr, elements_count * sizeof(struct kcdata_subtype_descriptor))))
+               return kr;
+       return kr;
+}
+
+#pragma pack(4)
+
+/* Internal structs for convenience */
+struct _uint64_with_description_data {
+       char desc[KCDATA_DESC_MAXLEN];
+       uint64_t data;
+};
+
+struct _uint32_with_description_data {
+       char     desc[KCDATA_DESC_MAXLEN];
+       uint32_t data;
+};
+
+#pragma pack()
+
+kern_return_t kcdata_add_uint64_with_description(
+                               kcdata_descriptor_t data_desc,
+                               uint64_t data,
+                               const char *description)
+{
+       if (strnlen(description, KCDATA_DESC_MAXLEN + 1) >= KCDATA_DESC_MAXLEN)
+               return KERN_INVALID_ARGUMENT;
+
+       kern_return_t kr = 0;
+       mach_vm_address_t user_addr;
+       struct _uint64_with_description_data save_data;
+       const uint64_t size_req = sizeof(save_data);
+       bzero(&save_data, size_req);
+
+       strlcpy(&(save_data.desc[0]), description, sizeof(save_data.desc));
+       save_data.data = data;
+
+       kr = kcdata_get_memory_addr(data_desc, KCDATA_TYPE_UINT64_DESC, size_req, &user_addr);
+       if (kr != KERN_SUCCESS)
+               return kr;
+
+       if (data_desc->kcd_flags & KCFLAG_USE_COPYOUT) {
+               if (copyout(&save_data, user_addr, size_req))
+                       return KERN_NO_ACCESS;
+       } else {
+               memcpy((void *)user_addr, &save_data, size_req);
+       }
+       return KERN_SUCCESS;
+}
+
+kern_return_t kcdata_add_uint32_with_description(
+                               kcdata_descriptor_t data_desc,
+                               uint32_t data,
+                               const char *description)
+{
+       assert(strlen(description) < KCDATA_DESC_MAXLEN);
+       if (strnlen(description, KCDATA_DESC_MAXLEN + 1) >= KCDATA_DESC_MAXLEN)
+               return KERN_INVALID_ARGUMENT;
+       kern_return_t kr = 0;
+       mach_vm_address_t user_addr;
+       struct _uint32_with_description_data save_data;
+       const uint64_t size_req = sizeof(save_data);
+
+       bzero(&save_data, size_req);
+       strlcpy(&(save_data.desc[0]), description, sizeof(save_data.desc));
+       save_data.data = data;
+
+       kr = kcdata_get_memory_addr(data_desc, KCDATA_TYPE_UINT32_DESC, size_req, &user_addr);
+       if (kr != KERN_SUCCESS)
+               return kr;
+       if (data_desc->kcd_flags & KCFLAG_USE_COPYOUT) {
+               if (copyout(&save_data, user_addr, size_req))
+                       return KERN_NO_ACCESS;
+       } else {
+               memcpy((void *)user_addr, &save_data, size_req);
+       }
+       return KERN_SUCCESS;
+}
+
+
+/* end buffer management api */
diff --git a/osfmk/kern/kern_cdata.h b/osfmk/kern/kern_cdata.h
new file mode 100644 (file)
index 0000000..ac02b62
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KERN_CDATA_H_
+#define _KERN_CDATA_H_
+
+#include <stdint.h>
+#include <mach/mach_types.h>
+
+#define KCDATA_DESC_MAXLEN          32      /* including NULL byte at end */
+
+struct kcdata_item {
+       uint32_t type;
+       uint32_t size; /* len(data)  */
+       uint64_t flags;
+#ifndef KERNEL
+       char data[];  /* must be at the end */
+#endif
+};
+
+typedef struct kcdata_item * kcdata_item_t;
+
+enum KCDATA_SUBTYPE_TYPES { KC_ST_CHAR = 1, KC_ST_INT8, KC_ST_UINT8, KC_ST_INT16, KC_ST_UINT16, KC_ST_INT32, KC_ST_UINT32, KC_ST_INT64, KC_ST_UINT64 };
+typedef enum KCDATA_SUBTYPE_TYPES kctype_subtype_t;
+
+/*
+ * A subtype description structure that defines
+ * how a compound data is laid out in memory. This
+ * provides on the fly definition of types and consumption
+ * by the parser.
+ */
+struct kcdata_subtype_descriptor {
+       uint8_t              kcs_flags;
+#define KCS_SUBTYPE_FLAGS_NONE    0x0
+#define KCS_SUBTYPE_FLAGS_ARRAY   0x1
+       uint8_t              kcs_elem_type;                 /* restricted to kctype_subtype_t */
+       uint16_t             kcs_elem_offset;               /* offset in struct where data is found */
+       uint32_t             kcs_elem_size;                 /* size of element (or) packed state for array type */
+       char                 kcs_name[KCDATA_DESC_MAXLEN];  /* max 31 bytes for name of field */
+};
+
+typedef struct kcdata_subtype_descriptor * kcdata_subtype_descriptor_t;
+
+/*
+ * In case of array of basic c types in kctype_subtype_t,
+ * size is packed in lower 16 bits and
+ * count is packed in upper 16 bits of kcs_elem_size field.
+ */
+#define KCS_SUBTYPE_PACK_SIZE(e_count,e_size)      (((e_count) & 0xffff) << 16 | ((e_size) & 0xffff))
+
+static inline uint32_t
+kcs_get_elem_size(kcdata_subtype_descriptor_t d)
+{
+       if (d->kcs_flags & KCS_SUBTYPE_FLAGS_ARRAY) {
+               /* size is composed as ((count &0xffff)<<16 | (elem_size & 0xffff)) */
+               return (uint32_t)((d->kcs_elem_size & 0xffff) * ((d->kcs_elem_size & 0xffff0000)>>16));
+       }
+       return d->kcs_elem_size;
+}
+
+static inline uint32_t
+kcs_get_elem_count(kcdata_subtype_descriptor_t d)
+{
+       if (d->kcs_flags & KCS_SUBTYPE_FLAGS_ARRAY)
+               return (d->kcs_elem_size >> 16) & 0xffff;
+       return 1;
+}
+
+static inline kern_return_t
+kcs_set_elem_size(kcdata_subtype_descriptor_t d, uint32_t size, uint32_t count)
+{
+       if (count > 1) {
+               /* means we are setting up an array */
+               if (size > 0xffff || count > 0xffff)
+                       return KERN_INVALID_ARGUMENT;
+               d->kcs_elem_size = ((count & 0xffff) << 16 | (size & 0xffff));
+       }
+       else
+       {
+               d->kcs_elem_size = size;
+       }
+       return KERN_SUCCESS;
+}
+
+struct kcdata_type_definition {
+       uint32_t kct_type_identifier;
+       uint32_t kct_num_elements;
+       char kct_name[KCDATA_DESC_MAXLEN];
+#ifndef KERNEL
+       struct kcdata_subtype_descriptor kct_elements[];
+#endif
+};
+
+/* chunk type definitions. 0 - 0x7ff are reserved  and defined here
+ * NOTE: Please update libkdd/kcdata/kcdtypes.c if you make any changes
+ * in STACKSHOT_KCTYPE_* types.
+ */
+
+/*
+ * Types with description value.
+ * these will have KCDATA_DESC_MAXLEN-1 length string description
+ * and rest of KCDATA_ITEM_SIZE() - KCDATA_DESC_MAXLEN bytes as data
+ */
+#define KCDATA_TYPE_INVALID              0x0
+#define KCDATA_TYPE_STRING_DESC          0x1
+#define KCDATA_TYPE_UINT32_DESC          0x2
+#define KCDATA_TYPE_UINT64_DESC          0x3
+#define KCDATA_TYPE_INT32_DESC           0x4
+#define KCDATA_TYPE_INT64_DESC           0x5
+#define KCDATA_TYPE_BINDATA_DESC         0x6
+
+/*
+ * Compound type definitions
+ */
+#define KCDATA_TYPE_ARRAY                0x11       /* Array of data */
+#define KCDATA_TYPE_TYPEDEFINTION        0x12       /* Meta type that describes a type on the fly. */
+#define KCDATA_TYPE_CONTAINER_BEGIN      0x13       /* Container type which has corresponding CONTAINER_END header.
+                                                     * KCDATA_TYPE_CONTAINER_BEGIN has type in the data segment.
+                                                     * Both headers have (uint64_t) ID for matching up nested data.
+                                                     */
+#define KCDATA_TYPE_CONTAINER_END        0x14
+
+
+/*
+ * Generic data types that are most commonly used
+ */
+#define KCDATA_TYPE_LIBRARY_LOADINFO     0x30       /* struct dyld_uuid_info_32 */
+#define KCDATA_TYPE_LIBRARY_LOADINFO64   0x31       /* struct dyld_uuid_info_64 */
+#define KCDATA_TYPE_TIMEBASE             0x32       /* struct mach_timebase_info */
+#define KCDATA_TYPE_MACH_ABSOLUTE_TIME   0x33       /* uint64_t */
+#define KCDATA_TYPE_TIMEVAL              0x34       /* struct timeval64 */
+#define KCDATA_TYPE_USECS_SINCE_EPOCH    0x35       /* time in usecs uint64_t */
+
+#define KCDATA_TYPE_BUFFER_END      0xF19158ED
+
+/* MAGIC numbers defined for each class of chunked data */
+#define KCDATA_BUFFER_BEGIN_CRASHINFO  0xDEADF157   /* owner: corpses/task_corpse.h */
+                                                   /* type-range: 0x800 - 0x8ff */
+#define KCDATA_BUFFER_BEGIN_STACKSHOT  0x59a25807   /* owner: sys/stackshot.h */
+                                                   /* type-range: 0x900 - 0x9ff */
+
+/* next type range number available 0x1000 */
+
+/* Common MACROS and library functions */
+/* make header = sizeof(type, flags, size) */
+#define KCDATA_ITEM_HEADER_SIZE         (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint64_t))
+#define KCDATA_ITEM_TYPE(item)          (((kcdata_item_t)(item))->type)
+#define KCDATA_ITEM_SIZE(item)          (((kcdata_item_t)(item))->size)
+#define KCDATA_ITEM_FLAGS(item)          (((kcdata_item_t)(item))->flags)
+
+#define KCDATA_ITEM_ARRAY_GET_EL_TYPE(item)   ((KCDATA_ITEM_FLAGS(item) >> 32) & UINT32_MAX)
+#define KCDATA_ITEM_ARRAY_GET_EL_COUNT(item)  (KCDATA_ITEM_FLAGS(item) & UINT32_MAX)
+#define KCDATA_ITEM_ARRAY_GET_EL_SIZE(item)   (KCDATA_ITEM_SIZE(item) / KCDATA_ITEM_ARRAY_GET_EL_COUNT(item))
+
+#define KCDATA_CONTAINER_ID(item)             ((uint64_t)KCDATA_ITEM_FLAGS(item))
+
+#define KCDATA_ITEM_NEXT_HEADER(item)   ((kcdata_item_t)((uint64_t)((uintptr_t)(item)) + KCDATA_ITEM_HEADER_SIZE + KCDATA_ITEM_SIZE(item)))
+
+#define KCDATA_ITEM_FOREACH(head) for (; KCDATA_ITEM_TYPE(head) != KCDATA_TYPE_BUFFER_END; (head) = KCDATA_ITEM_NEXT_HEADER(head))
+
+static inline kcdata_item_t
+KCDATA_ITEM_FIND_TYPE(kcdata_item_t head, uint32_t type)
+{
+       KCDATA_ITEM_FOREACH(head)
+       {
+               if (KCDATA_ITEM_TYPE(head) == type) {
+                       break;
+               }
+       }
+       return (KCDATA_ITEM_TYPE(head) == type) ? (kcdata_item_t)head : 0;
+}
+
+#ifndef KERNEL
+#define KCDATA_ITEM_DATA_PTR(item)      (&((kcdata_item_t)(item))->data)
+
+static inline uint32_t kcdata_get_container_type(kcdata_item_t buffer) {
+       if (KCDATA_ITEM_TYPE(buffer) == KCDATA_TYPE_CONTAINER_BEGIN)
+               return *(uint32_t *)KCDATA_ITEM_DATA_PTR(buffer);
+       return 0;
+}
+
+static inline void kcdata_get_data_with_desc(kcdata_item_t buffer, char **desc_ptr, void **data_ptr) {
+       if (desc_ptr)
+               *desc_ptr = (char *)KCDATA_ITEM_DATA_PTR(buffer);
+       if (data_ptr)
+               *data_ptr = (void *)((uintptr_t)KCDATA_ITEM_DATA_PTR(buffer) + KCDATA_DESC_MAXLEN);
+}
+#endif /* KERNEL */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* Structure to save information about corpse data */
+struct kcdata_descriptor {
+       uint32_t            kcd_length;
+       uint32_t            kcd_flags;
+#define KCFLAG_USE_MEMCOPY  0x0
+#define KCFLAG_USE_COPYOUT  0x1
+       mach_vm_address_t   kcd_addr_begin;
+       mach_vm_address_t   kcd_addr_end;
+};
+
+typedef struct kcdata_descriptor * kcdata_descriptor_t;
+
+kcdata_descriptor_t kcdata_memory_alloc_init(mach_vm_address_t crash_data_p, unsigned data_type, unsigned size, unsigned flags);
+kern_return_t kcdata_memory_static_init(kcdata_descriptor_t data, mach_vm_address_t buffer_addr_p, unsigned data_type, unsigned size, unsigned flags);
+kern_return_t kcdata_memory_destroy(kcdata_descriptor_t data);
+uint64_t kcdata_memory_get_used_bytes(kcdata_descriptor_t kcd);
+kern_return_t kcdata_memcpy(kcdata_descriptor_t data, mach_vm_address_t dst_addr, void *src_addr, uint32_t size);
+
+kern_return_t kcdata_get_memory_addr(kcdata_descriptor_t data, uint32_t type, uint32_t size, mach_vm_address_t *user_addr);
+kern_return_t kcdata_get_memory_addr_for_array(kcdata_descriptor_t data, uint32_t type_of_element, uint32_t size_of_element, uint32_t count, mach_vm_address_t *user_addr);
+kern_return_t kcdata_add_container_marker(kcdata_descriptor_t data, uint32_t header_type, uint32_t container_type, uint64_t identifier);
+kern_return_t kcdata_add_type_definition(kcdata_descriptor_t data, uint32_t type_id, char *type_name, struct kcdata_subtype_descriptor *elements_array_addr, uint32_t elements_count);
+
+
+kern_return_t kcdata_add_uint64_with_description(kcdata_descriptor_t crashinfo, uint64_t data, const char *description);
+kern_return_t kcdata_add_uint32_with_description(kcdata_descriptor_t crashinfo, uint32_t data, const char *description);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _KERN_CDATA_H_ */
index 8821a0962fba641bb2c07544c8f2d66af09fb81a..d62ab817c13d411b121548c720b40d082b354548 100644 (file)
@@ -44,7 +44,6 @@ struct ecc_event              ecc_data[ECC_EVENT_BUFFER_COUNT];
 static uint32_t                        ecc_data_next_read; 
 static uint32_t                        ecc_data_next_write; 
 static boolean_t               ecc_data_empty = TRUE; // next read == next write : empty or full?
-static boolean_t               ecc_prefer_panic = TRUE; 
 static lck_grp_t               *ecc_data_lock_group;
 static lck_spin_t              ecc_data_lock;
 static uint32_t                        ecc_correction_count;
@@ -52,19 +51,11 @@ static uint32_t                     ecc_correction_count;
 void
 ecc_log_init()
 {
-       ecc_prefer_panic = !PE_reboot_on_panic();
        ecc_data_lock_group = lck_grp_alloc_init("ecc-data", NULL);
        lck_spin_init(&ecc_data_lock, ecc_data_lock_group, NULL);
        OSMemoryBarrier();
 }
 
-boolean_t 
-ecc_log_prefer_panic(void)
-{
-       OSMemoryBarrier();
-       return ecc_prefer_panic;
-}
-
 uint32_t
 ecc_log_get_correction_count()
 {
index cff2dbb7553a053302857f449490b0d95b09e5e4..fd20ff2a90d0eee9be53d92f1392f3be181985d0 100644 (file)
@@ -28,6 +28,9 @@
 
 #include <mach/mach_types.h>
 #include <mach/vm_param.h>
+#include <mach/mach_vm.h>
+#include <sys/errno.h>
+#include <sys/stackshot.h>
 #ifdef IMPORTANCE_INHERITANCE
 #include <ipc/ipc_importance.h>
 #endif
 
 #include <kern/processor.h>
 #include <kern/thread.h>
+#include <kern/telemetry.h>
 #include <kern/clock.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pageout.h>
+#include <vm/vm_fault.h>
 #include <vm/vm_shared_region.h>
 #include <libkern/OSKextLibPrivate.h>
 
+#if (defined(__arm64__) || defined(NAND_PANIC_DEVICE)) && !defined(LEGACY_PANIC_LOGS)
+#include <pexpert/pexpert.h> /* For gPanicBase/gPanicBase */
+#endif
+
 extern unsigned int not_in_kdp;
 
 /*
@@ -64,24 +73,39 @@ extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
 
 int kdp_snapshot = 0;
 static int stack_snapshot_ret = 0;
-static unsigned stack_snapshot_bytes_traced = 0;
+static uint32_t stack_snapshot_bytes_traced = 0;
 
+static kcdata_descriptor_t stackshot_kcdata_p = NULL;
 static void *stack_snapshot_buf;
 static uint32_t stack_snapshot_bufsize;
 int stack_snapshot_pid;
 static uint32_t stack_snapshot_flags;
-static uint32_t stack_snapshot_dispatch_offset;
 static unsigned int old_debugger;
+static boolean_t stack_enable_faulting;
+
+void *kernel_stackshot_buf = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
+int kernel_stackshot_buf_size =  0;
+
+void *stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
 
+__private_extern__ void stackshot_lock_init( void );
+static boolean_t memory_iszero(void *addr, size_t size);
+kern_return_t          stack_snapshot2(int pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
+kern_return_t          stack_snapshot_from_kernel_internal(int pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced);
+#if CONFIG_TELEMETRY
+kern_return_t          stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
+#endif
+uint32_t               get_stackshot_estsize(uint32_t prev_size_hint);
+kern_return_t          kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config,
+                                               size_t stackshot_config_size, boolean_t stackshot_from_user);
 void                   do_stackshot(void);
-void                   kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
-                               uint32_t flags, uint32_t dispatch_offset);
+void                   kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, kcdata_descriptor_t data_p, boolean_t enable_faulting);
 void                   kdp_snapshot_postflight(void);
-static int             kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size,
-                               uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
+static int             kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t *pbytesTraced);
+static int             kdp_stackshot_kcdata_format(int pid, uint32_t trace_flags, uint32_t *pBytesTraced);
 int                    kdp_stack_snapshot_geterror(void);
-int                    kdp_stack_snapshot_bytes_traced(void);
-int                    kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
+uint32_t               kdp_stack_snapshot_bytes_traced(void);
+int                    kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t *pbytesTraced);
 static int             pid_from_task(task_t task);
 static uint64_t        proc_uniqueid_from_task(task_t task);
 static void            kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
@@ -96,18 +120,21 @@ static uint64_t            proc_did_throttle_from_task(task_t task);
 extern void            proc_name_kdp(task_t  task, char *buf, int size);
 extern int             proc_threadname_kdp(void *uth, char *buf, size_t size);
 extern void            proc_starttime_kdp(void *p, uint64_t *tv_sec, uint64_t *tv_usec);
+extern uint64_t                get_dispatchqueue_serialno_offset_from_proc(void *p);
+static uint64_t                proc_dispatchqueue_serialno_offset_from_task(task_t task);
+extern int             memorystatus_get_pressure_status_kdp(void);
 
 extern int             count_busy_buffers(void);   /* must track with declaration in bsd/sys/buf_internal.h */
 extern void            bcopy_phys(addr64_t, addr64_t, vm_size_t);
-extern int             machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
-extern int             machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
+extern int             machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p, uint32_t *thread_trace_flags);
+extern int             machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p, uint32_t *thread_trace_flags);
 
 /* Validates that the given address is both a valid page and has
  * default caching attributes for the current kdp_pmap.  Returns
  * 0 if the address is invalid, and a kernel virtual address for
  * the given address if it is valid.
  */
-vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr);
+vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr, vm_map_t map, uint32_t *thread_trace_flags);
 
 /* Clears caching information used by the above validation routine
  * (in case the kdp_pmap has been changed or cleared).
@@ -115,6 +142,9 @@ vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr);
 void machine_trace_thread_clear_validation_cache(void);
 
 #define MAX_FRAMES 1000
+#define MAX_LOADINFOS 500
+#define USECSPERSEC 1000000
+#define TASK_IMP_WALK_LIMIT 20
 
 typedef struct thread_snapshot *thread_snapshot_t;
 typedef struct task_snapshot *task_snapshot_t;
@@ -130,6 +160,40 @@ static vm_offset_t prev_target_page = 0;
 static vm_offset_t prev_target_kva = 0;
 static boolean_t validate_next_addr = TRUE;
 
+/*
+ * Stackshot locking and other defines.
+ */
+static lck_grp_t       *stackshot_subsys_lck_grp;
+static lck_grp_attr_t  *stackshot_subsys_lck_grp_attr;
+static lck_attr_t      *stackshot_subsys_lck_attr;
+static lck_mtx_t       stackshot_subsys_mutex;
+
+#define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
+#define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
+#if defined(__i386__) || defined (__x86_64__)
+#define TRAP_DEBUGGER __asm__ volatile("int3")
+#else
+#error No TRAP_DEBUGGER definition for this architecture
+#endif
+
+/* Initialize the mutex governing access to the stack snapshot subsystem */
+__private_extern__ void
+stackshot_lock_init( void )
+{
+       stackshot_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
+
+       stackshot_subsys_lck_grp = lck_grp_alloc_init("stackshot_subsys_lock", stackshot_subsys_lck_grp_attr);
+
+       stackshot_subsys_lck_attr = lck_attr_alloc_init();
+
+       lck_mtx_init(&stackshot_subsys_mutex, stackshot_subsys_lck_grp, stackshot_subsys_lck_attr);
+}
+
+#define SANE_BOOTPROFILE_TRACEBUF_SIZE (64 * 1024 * 1024)
+#define SANE_TRACEBUF_SIZE (8 * 1024 * 1024)
+
+#define STACKSHOT_SUPP_SIZE (16 * 1024) /* Minimum stackshot size */
+#define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
 
 /* 
  * Method for grabbing timer values safely, in the sense that no infinite loop will occur 
@@ -152,15 +216,587 @@ static uint64_t safe_grab_timer_value(struct timer *t)
 #endif
 }
 
+/*
+ * Old, inefficient stackshot call. This will be removed in the next release and is being replaced with
+ * two syscalls -- stack_snapshot_with_config and stack_microsnapshot.
+ */
+kern_return_t
+stack_snapshot2(int pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
+{
+       boolean_t istate;
+       int error = KERN_SUCCESS;
+       unsigned bytesTraced = 0;
+
+#if CONFIG_TELEMETRY
+       if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
+               telemetry_global_ctl(1);
+               *retval = 0;
+               return (0);
+       } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
+               telemetry_global_ctl(0);
+               *retval = 0;
+               return (0);
+       }
+
+       if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE) {
+               error = telemetry_enable_window();
+
+               if (error != KERN_SUCCESS) {
+                       /* We are probably out of memory */
+                       *retval = -1;
+                       return KERN_RESOURCE_SHORTAGE;
+               }
+
+               *retval = 0;
+               return (0);
+       } else if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE) {
+               telemetry_disable_window();
+               *retval = 0;
+               return (0);
+       }
+#endif
+
+       *retval = -1;
+       /* Serialize tracing */
+       STACKSHOT_SUBSYS_LOCK();
+
+       if (tracebuf_size <= 0) {
+               error = KERN_INVALID_ARGUMENT;
+               goto error_exit;
+       }
+
+#if CONFIG_TELEMETRY
+       if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
+
+               if (tracebuf_size > SANE_TRACEBUF_SIZE) {
+                       error = KERN_INVALID_ARGUMENT;
+                       goto error_exit;
+               }
+
+               bytesTraced = tracebuf_size;
+               error = telemetry_gather(tracebuf, &bytesTraced,
+                                        (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE);
+               *retval = (int)bytesTraced;
+               goto error_exit;
+       }
+
+       if (flags & STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS) {
+
+               if (tracebuf_size > SANE_TRACEBUF_SIZE) {
+                       error = KERN_INVALID_ARGUMENT;
+                       goto error_exit;
+               }
+
+               bytesTraced = tracebuf_size;
+               error = telemetry_gather_windowed(tracebuf, &bytesTraced);
+               *retval = (int)bytesTraced;
+               goto error_exit;
+       }
+
+       if (flags & STACKSHOT_GET_BOOT_PROFILE) {
+
+               if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) {
+                       error = KERN_INVALID_ARGUMENT;
+                       goto error_exit;
+               }
+
+               bytesTraced = tracebuf_size;
+               error = bootprofile_gather(tracebuf, &bytesTraced);
+               *retval = (int)bytesTraced;
+               goto error_exit;
+       }
+#endif
+
+       if (tracebuf_size > SANE_TRACEBUF_SIZE) {
+               error = KERN_INVALID_ARGUMENT;
+               goto error_exit;
+       }
+
+       assert(stackshot_snapbuf == NULL);
+       if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&stackshot_snapbuf, tracebuf_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
+               error = KERN_RESOURCE_SHORTAGE;
+               goto error_exit;
+       }
+
+       if (panic_active()) {
+               error = KERN_RESOURCE_SHORTAGE;
+               goto error_exit;
+       }
+
+       istate = ml_set_interrupts_enabled(FALSE);
+       /* Preload trace parameters */
+       kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, flags, NULL, FALSE);
+
+       /* Trap to the debugger to obtain a coherent stack snapshot; this populates
+        * the trace buffer
+        */
+
+       TRAP_DEBUGGER;
+
+       ml_set_interrupts_enabled(istate);
+
+       bytesTraced = kdp_stack_snapshot_bytes_traced();
+
+       if (bytesTraced > 0) {
+               if ((error = copyout(stackshot_snapbuf, tracebuf,
+                       ((bytesTraced < tracebuf_size) ?
+                           bytesTraced : tracebuf_size))))
+                       goto error_exit;
+               *retval = bytesTraced;
+       }
+       else {
+               error = KERN_NOT_IN_SET;
+               goto error_exit;
+       }
+
+       error = kdp_stack_snapshot_geterror();
+       if (error == -1) {
+               error = KERN_NO_SPACE;
+               *retval = -1;
+               goto error_exit;
+       }
+
+error_exit:
+       if (stackshot_snapbuf != NULL)
+               kmem_free(kernel_map, (vm_offset_t) stackshot_snapbuf, tracebuf_size);
+       stackshot_snapbuf = NULL;
+       STACKSHOT_SUBSYS_UNLOCK();
+       return error;
+}
+
+kern_return_t
+stack_snapshot_from_kernel_internal(int pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytes_traced)
+{
+       int error = 0;
+       boolean_t istate;
+
+       if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       /* cap in individual stackshot to SANE_TRACEBUF_SIZE */
+       if (size > SANE_TRACEBUF_SIZE) {
+               size = SANE_TRACEBUF_SIZE;
+       }
+
+       /* Serialize tracing */
+       STACKSHOT_SUBSYS_LOCK();
+       istate = ml_set_interrupts_enabled(FALSE);
+
+
+       /* Preload trace parameters*/
+       kdp_snapshot_preflight(pid, buf, size, flags, NULL, FALSE);
+
+       /* Trap to the debugger to obtain a coherent stack snapshot; this populates
+        * the trace buffer
+        */
+       TRAP_DEBUGGER;
+
+       ml_set_interrupts_enabled(istate);
+
+       *bytes_traced = kdp_stack_snapshot_bytes_traced();
+
+       error = kdp_stack_snapshot_geterror();
+
+       STACKSHOT_SUBSYS_UNLOCK();
+
+       return error;
+}
+
+#if CONFIG_TELEMETRY
+kern_return_t
+stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
+{
+       int error = KERN_SUCCESS;
+       uint32_t bytes_traced = 0;
+
+       *retval = -1;
+
+       /*
+        * Control related operations
+        */
+       if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
+               telemetry_global_ctl(1);
+               *retval = 0;
+               goto exit;
+       } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
+               telemetry_global_ctl(0);
+               *retval = 0;
+               goto exit;
+       }
+
+       if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE) {
+               error = telemetry_enable_window();
+
+               if (error != KERN_SUCCESS) {
+                       /*
+                        * We are probably out of memory
+                        */
+                       *retval = -1;
+                       error = KERN_RESOURCE_SHORTAGE;
+                       goto exit;
+               }
+
+               *retval = 0;
+               goto exit;
+       } else if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE) {
+               telemetry_disable_window();
+               *retval = 0;
+               goto exit;
+       }
+
+       /*
+        * Data related operations
+        */
+       *retval = -1;
+
+       if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
+               error = KERN_INVALID_ARGUMENT;
+               goto exit;
+       }
+
+       STACKSHOT_SUBSYS_LOCK();
+
+       if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
+               if (tracebuf_size > SANE_TRACEBUF_SIZE) {
+                       error = KERN_INVALID_ARGUMENT;
+                       goto unlock_exit;
+               }
+
+               bytes_traced = tracebuf_size;
+               error = telemetry_gather(tracebuf, &bytes_traced,
+                                        (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE);
+               *retval = (int)bytes_traced;
+               goto unlock_exit;
+       }
+
+       if (flags & STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS) {
+
+               if (tracebuf_size > SANE_TRACEBUF_SIZE) {
+                       error = KERN_INVALID_ARGUMENT;
+                       goto unlock_exit;
+               }
+
+               bytes_traced = tracebuf_size;
+               error = telemetry_gather_windowed(tracebuf, &bytes_traced);
+               *retval = (int)bytes_traced;
+               goto unlock_exit;
+       }
+
+       if (flags & STACKSHOT_GET_BOOT_PROFILE) {
+
+               if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) {
+                       error = KERN_INVALID_ARGUMENT;
+                       goto unlock_exit;
+               }
+
+               bytes_traced = tracebuf_size;
+               error = bootprofile_gather(tracebuf, &bytes_traced);
+               *retval = (int)bytes_traced;
+       }
+
+unlock_exit:
+       STACKSHOT_SUBSYS_UNLOCK();
+exit:
+       return error;
+}
+#endif /* CONFIG_TELEMETRY */
+
+/*
+ * Return the estimated size of a stackshot based on the
+ * number of currently running threads and tasks.
+ */
+uint32_t
+get_stackshot_estsize(uint32_t prev_size_hint)
+{
+       vm_size_t thread_total;
+       vm_size_t task_total;
+       uint32_t estimated_size;
+
+       thread_total = (threads_count * sizeof(struct thread_snapshot));
+       task_total = (tasks_count  * (sizeof(struct task_snapshot) + TASK_UUID_AVG_SIZE));
+
+       estimated_size = (uint32_t) VM_MAP_ROUND_PAGE((thread_total + task_total + STACKSHOT_SUPP_SIZE), PAGE_MASK);
+       if (estimated_size < prev_size_hint) {
+               estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(prev_size_hint, PAGE_MASK);
+       }
+
+       return estimated_size;
+}
+
+/*
+ * stackshot_remap_buffer:     Utility function to remap bytes_traced bytes starting at stackshotbuf
+ *                             into the current task's user space and subsequently copy out the address
+ *                             at which the buffer has been mapped in user space to out_buffer_addr.
+ *
+ * Inputs:                     stackshotbuf - pointer to the original buffer in the kernel's address space
+ *                             bytes_traced - length of the buffer to remap starting from stackshotbuf
+ *                             out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
+ *                             out_size_addr - pointer to be filled in with the size of the buffer
+ *
+ * Outputs:                    ENOSPC if there is not enough free space in the task's address space to remap the buffer
+ *                             EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
+ *                             an error from copyout
+ */
+static kern_return_t
+stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
+{
+       int                     error = 0;
+       mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
+       vm_prot_t               cur_prot, max_prot;
+
+       error = mach_vm_remap(get_task_map(current_task()), &stackshotbuf_user_addr, bytes_traced, 0,
+                       VM_FLAGS_ANYWHERE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE, &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
+       /*
+        * If the call to mach_vm_remap fails, we return the appropriate converted error
+        */
+       if (error == KERN_SUCCESS) {
+               /*
+                * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
+                * we just made in the task's user space.
+                */
+               error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
+               if (error != KERN_SUCCESS) {
+                       mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
+                       return error;
+               }
+               error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
+               if (error != KERN_SUCCESS) {
+                       mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
+                       return error;
+               }
+       }
+       return error;
+}
+
+kern_return_t
+kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
+{
+       int error = 0;
+       boolean_t prev_interrupt_state;
+       uint32_t bytes_traced = 0;
+       uint32_t stackshotbuf_size = 0;
+       void * stackshotbuf = NULL;
+       kcdata_descriptor_t kcdata_p = NULL;
+
+       void * buf_to_free = NULL;
+       int size_to_free = 0;
+
+       /* Parsed arguments */
+       uint64_t                out_buffer_addr;
+       uint64_t                out_size_addr;
+       int                     pid = -1;
+       uint32_t                flags;
+       uint64_t                since_timestamp;
+       boolean_t               enable_faulting = FALSE;
+       uint32_t                size_hint = 0;
+
+       if(stackshot_config == NULL) {
+               return  KERN_INVALID_ARGUMENT;
+       }
+
+       switch (stackshot_config_version) {
+               case STACKSHOT_CONFIG_TYPE:
+                       if (stackshot_config_size != sizeof(stackshot_config_t)) {
+                               return KERN_INVALID_ARGUMENT;
+                       }
+                       stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
+                       out_buffer_addr = config->sc_out_buffer_addr;
+                       out_size_addr = config->sc_out_size_addr;
+                       pid = config->sc_pid;
+                       flags = config->sc_flags;
+                       since_timestamp = config->sc_since_timestamp;
+                       if (config->sc_size <= SANE_TRACEBUF_SIZE) {
+                               size_hint = config->sc_size;
+                       }
+                       break;
+               default:
+                       return KERN_NOT_SUPPORTED;
+       }
+
+       /*
+        * Currently saving a kernel buffer is only supported from the internal/KEXT API.
+        */
+       if (stackshot_from_user) {
+               if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
+                       return KERN_NO_ACCESS;
+               }
+       } else {
+               if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
+                       return KERN_NOT_SUPPORTED;
+               }
+       }
+
+       if (flags & STACKSHOT_ENABLE_FAULTING) {
+               return KERN_NOT_SUPPORTED;
+       }
+
+       /*
+        * If we're not saving the buffer in the kernel pointer, we need places to copy into.
+        */
+       if ((!out_buffer_addr || !out_size_addr) && !(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if (since_timestamp != 0) {
+               return KERN_NOT_SUPPORTED;
+       }
+
+       STACKSHOT_SUBSYS_LOCK();
+
+       if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
+               /*
+                * Don't overwrite an existing stackshot
+                */
+               if (kernel_stackshot_buf != NULL) {
+                       error = KERN_MEMORY_PRESENT;
+                       goto error_exit;
+               }
+       } else if (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
+               if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
+                       error = KERN_NOT_IN_SET;
+                       goto error_exit;
+               }
+               error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
+                                               out_buffer_addr, out_size_addr);
+               /*
+                * If we successfully remapped the buffer into the user's address space, we 
+                * set buf_to_free and size_to_free so the prior kernel mapping will be removed
+                * and then clear the kernel stackshot pointer and associated size.
+                */
+               if (error == KERN_SUCCESS) {
+                       buf_to_free = kernel_stackshot_buf;
+                       size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
+                       kernel_stackshot_buf = NULL;
+                       kernel_stackshot_buf_size = 0;
+               }
+               
+               goto error_exit;
+       }
+
+       stackshotbuf_size = get_stackshot_estsize(size_hint);
+
+       for (; stackshotbuf_size <= SANE_TRACEBUF_SIZE; stackshotbuf_size <<= 1) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
+                       error = KERN_RESOURCE_SHORTAGE;
+                       goto error_exit;
+               }
+
+               /*
+                * If someone has panicked, don't try and enter the debugger
+                */
+               if (panic_active()) {
+                       error = KERN_RESOURCE_SHORTAGE;
+                       goto error_exit;
+               }
+
+               if (flags & STACKSHOT_KCDATA_FORMAT) {
+                       kcdata_p = kcdata_memory_alloc_init((mach_vm_address_t)stackshotbuf, KCDATA_BUFFER_BEGIN_STACKSHOT, stackshotbuf_size, KCFLAG_USE_MEMCOPY);
+               }
+
+
+               /*
+                * Disable interrupts and save the current interrupt state.
+                */
+               prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
+
+               /*
+                * Load stackshot parameters.
+                */
+               kdp_snapshot_preflight(pid, stackshotbuf, stackshotbuf_size, flags, kcdata_p, enable_faulting);
+
+               /*
+                * Trap to the debugger to obtain a stackshot (this will populate the buffer).
+                */
+               TRAP_DEBUGGER;
+
+               ml_set_interrupts_enabled(prev_interrupt_state);
+
+               /*
+                * If we didn't allocate a big enough buffer, deallocate and try again.
+                */
+               error = kdp_stack_snapshot_geterror();
+               if (error == -1) {
+                       if (kcdata_p != NULL) {
+                               kcdata_memory_destroy(kcdata_p);
+                               kcdata_p = NULL;
+                               stackshot_kcdata_p = NULL;
+                       }
+                       kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
+                       stackshotbuf = NULL;
+                       continue;
+               }
+
+               bytes_traced = kdp_stack_snapshot_bytes_traced();
+
+               if (bytes_traced <= 0) {
+                       error = KERN_NOT_IN_SET;
+                       goto error_exit;
+               }
+
+               assert(bytes_traced <= stackshotbuf_size);
+               if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
+                       error = stackshot_remap_buffer(stackshotbuf, bytes_traced, out_buffer_addr, out_size_addr);
+                       goto error_exit;
+               }
+
+               /*
+                * Save the stackshot in the kernel buffer.
+                */
+               kernel_stackshot_buf = stackshotbuf;
+               kernel_stackshot_buf_size =  bytes_traced;
+               /*
+                * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
+                * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
+                * update size_to_free for kmem_free accordingly.
+                */
+               size_to_free = stackshotbuf_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
+
+               assert(size_to_free >= 0);
+
+               if (size_to_free != 0) {
+                       buf_to_free = (void *)((uint64_t)stackshotbuf + stackshotbuf_size - size_to_free);
+               }
+
+               stackshotbuf = NULL;
+               stackshotbuf_size = 0;
+               goto error_exit;
+       }
+
+       if (stackshotbuf_size > SANE_TRACEBUF_SIZE) {
+               error = KERN_RESOURCE_SHORTAGE;
+       }
+
+error_exit:
+       if (kcdata_p != NULL) {
+               kcdata_memory_destroy(kcdata_p);
+               kcdata_p = NULL;
+               stackshot_kcdata_p = NULL;
+       }
+
+       if (stackshotbuf != NULL) {
+               kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
+       }
+       if (buf_to_free  != NULL) {
+               kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
+       }
+       STACKSHOT_SUBSYS_UNLOCK();
+       return error;
+}
+
 /* Cache stack snapshot parameters in preparation for a trace */
 void
-kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset)
+kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags,
+                                          kcdata_descriptor_t data_p, boolean_t enable_faulting)
 {
        stack_snapshot_pid = pid;
        stack_snapshot_buf = tracebuf;
        stack_snapshot_bufsize = tracebuf_size;
        stack_snapshot_flags = flags;
-       stack_snapshot_dispatch_offset = dispatch_offset;
+       stack_enable_faulting = enable_faulting;
+       if (data_p != NULL) {
+               stackshot_kcdata_p = data_p;
+       }
        kdp_snapshot++;
        /* Mark this debugger as active, since the polled mode driver that 
         * ordinarily does this may not be enabled (yet), or since KDB may be
@@ -191,14 +827,567 @@ kdp_stack_snapshot_geterror(void)
        return stack_snapshot_ret;
 }
 
-int
+uint32_t
 kdp_stack_snapshot_bytes_traced(void)
 {
        return stack_snapshot_bytes_traced;
 }
 
+static boolean_t memory_iszero(void *addr, size_t size)
+{
+       char *data = (char *)addr;
+       for (size_t i = 0; i < size; i++){
+               if (data[i] != 0)
+                       return FALSE;
+       }
+       return TRUE;
+}
+
 static int
-kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced)
+kdp_stackshot_kcdata_format(int pid, uint32_t trace_flags, uint32_t *pBytesTraced)
+{
+       /* convenience macros specific only for this function */
+#define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
+#define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
+#define kcd_exit_on_error(action)                                 \
+       do {                                                      \
+               if (KERN_SUCCESS != (error = (action))) {         \
+                       if (error == KERN_RESOURCE_SHORTAGE) {    \
+                               error = -1;                       \
+                       }                                         \
+                       goto error_exit;                          \
+               }                                                 \
+       } while (0); /* end kcd_exit_on_error */
+
+       int error = 0;
+       mach_vm_address_t out_addr = 0;
+       uint64_t abs_time;
+       struct task_snapshot_v2 *cur_tsnap;
+       uint64_t system_state_flags = 0;
+       int saved_count = 0;
+       task_t task = TASK_NULL;
+       thread_t thread = THREAD_NULL;
+       mach_timebase_info_data_t timebase = {0, 0};
+       uint64_t microsecs = 0, secs = 0;
+       uint32_t length_to_copy, tmp32;
+
+       abs_time = mach_absolute_time();
+       clock_get_calendar_microtime((clock_sec_t*)&secs, (clock_usec_t*)&microsecs);
+
+       /* process the flags */
+       boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
+       boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
+       boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
+       boolean_t save_userframes_p = ((trace_flags & STACKSHOT_SAVE_KERNEL_FRAMES_ONLY) == 0);
+       boolean_t save_donating_pids_p = ((trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
+
+       if (sizeof(void *) == 8)
+               system_state_flags |= kKernel64_p;
+
+       if (stackshot_kcdata_p == NULL || pBytesTraced == NULL) {
+               error = -1;
+               goto error_exit;
+       }
+
+       /* begin saving data into the buffer */
+       *pBytesTraced = 0;
+       kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, trace_flags, "stackshot_in_flags"));
+       kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)pid, "stackshot_in_pid"));
+       kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
+       tmp32 = PAGE_SIZE;
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &out_addr));
+       memcpy((void *)out_addr, &tmp32, sizeof(tmp32));
+
+#if CONFIG_JETSAM
+       tmp32 = memorystatus_get_pressure_status_kdp();
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &out_addr));
+       memcpy((void *)out_addr, &tmp32, sizeof(tmp32));
+#endif
+
+       /* save boot-args and osversion string */
+       length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, &out_addr));
+       strlcpy((char*)out_addr, &version[0], length_to_copy);
+
+       length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), OSVERSIZE);
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, &out_addr));
+       strlcpy((char*)out_addr, PE_boot_args(), length_to_copy);
+
+       /* setup mach_absolute_time and timebase info */
+       clock_timebase_info(&timebase);
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &out_addr));
+       memcpy((void *)out_addr, &timebase, sizeof(timebase));
+
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &out_addr));
+       memcpy((void *)out_addr, &abs_time, sizeof(uint64_t));
+
+       microsecs = microsecs + (secs * USECSPERSEC);
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &out_addr));
+       memcpy((void *)out_addr, &microsecs, sizeof(uint64_t));
+
+       /* reserve space of system level shared cache load info */
+       struct dyld_uuid_info_64 *sys_shared_cache_loadinfo;
+       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(kernel_uuid_info), &out_addr));
+       sys_shared_cache_loadinfo = (struct dyld_uuid_info_64 *)out_addr;
+       bzero((void *)sys_shared_cache_loadinfo, sizeof(struct dyld_uuid_info_64));
+
+       /* Add requested information first */
+       if (trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
+               kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(struct mem_and_io_snapshot), &out_addr));
+               kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)out_addr);
+       }
+
+       /* Iterate over tasks */
+       queue_head_t *task_list = &tasks;
+       queue_iterate(task_list, task, task_t, tasks) {
+               int task_pid;
+               if ((task == NULL) || !ml_validate_nofault((vm_offset_t) task, sizeof(struct task)))
+                       goto error_exit;
+
+               task_pid = pid_from_task(task);
+               if (!task->active) {
+                       /*
+                        * Not interested in terminated tasks without threads, and
+                        * at the moment, stackshot can't handle a task  without a name.
+                        */
+                       if (queue_empty(&task->threads) || task_pid == -1) {
+                               continue;
+                       }
+               }
+
+               /* Trace everything, unless a process was specified */
+               if ((pid == -1) || (pid == task_pid)) {
+
+                       uint64_t task_uniqueid = proc_uniqueid_from_task(task);
+                       boolean_t task64 = task_has_64BitAddr(task);
+                       boolean_t have_map = (task->map != NULL) && (ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map)));
+                       boolean_t have_pmap = have_map && (task->map->pmap != NULL) && (ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
+
+                       /* add task snapshot marker */
+                       kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN, STACKSHOT_KCCONTAINER_TASK, task_uniqueid));
+
+                       /* add task_snapshot_v2 struct data */
+                       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
+                       cur_tsnap = (struct task_snapshot_v2 *)out_addr;
+                       bzero(cur_tsnap, sizeof(struct task_snapshot_v2));
+
+                       cur_tsnap->ts_pid = task_pid;
+                       cur_tsnap->ts_unique_pid = task_uniqueid;
+
+                       /* Add the BSD process identifiers */
+                       if (task_pid != -1 && task->bsd_info != NULL)
+                               proc_name_kdp(task, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
+                       else {
+                               cur_tsnap->ts_p_comm[0] = '\0';
+#if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
+                               if (task->task_imp_base != NULL) {
+                                       strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
+                                               MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
+                               }
+#endif
+                       }
+
+                       if (task64)
+                               cur_tsnap->ts_ss_flags |= kUser64_p;
+                       if (!task->active || task_is_a_corpse(task))
+                               cur_tsnap->ts_ss_flags |= kTerminatedSnapshot;
+                       if (task->pidsuspended)
+                               cur_tsnap->ts_ss_flags |= kPidSuspended;
+                       if (task->frozen)
+                               cur_tsnap->ts_ss_flags |= kFrozen;
+                       if (task->effective_policy.darwinbg == 1)
+                               cur_tsnap->ts_ss_flags |= kTaskDarwinBG;
+                       if (task->requested_policy.t_role == TASK_FOREGROUND_APPLICATION)
+                               cur_tsnap->ts_ss_flags |= kTaskIsForeground;
+                       if (task->requested_policy.t_boosted == 1)
+                               cur_tsnap->ts_ss_flags |= kTaskIsBoosted;
+                       if (task->effective_policy.t_sup_active == 1)
+                               cur_tsnap->ts_ss_flags |= kTaskIsSuppressed;
+
+#if IMPORTANCE_INHERITANCE
+                       if (task->task_imp_base) {
+                               if (task->task_imp_base->iit_donor)
+                                       cur_tsnap->ts_ss_flags |= kTaskIsImpDonor;
+                               if (task->task_imp_base->iit_live_donor)
+                                       cur_tsnap->ts_ss_flags |= kTaskIsLiveImpDonor;
+                       }
+#endif
+
+                       cur_tsnap->ts_latency_qos = (task->effective_policy.t_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
+                               LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.t_latency_qos);
+                       cur_tsnap->ts_suspend_count = task->suspend_count;
+                       cur_tsnap->ts_p_start_sec = 0;
+                       proc_starttime_kdp(task->bsd_info, &cur_tsnap->ts_p_start_sec, NULL);
+
+                       cur_tsnap->ts_task_size = have_pmap ? (pmap_resident_count(task->map->pmap) * PAGE_SIZE) : 0;
+                       cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
+                       cur_tsnap->ts_faults = task->faults;
+                       cur_tsnap->ts_pageins = task->pageins;
+                       cur_tsnap->ts_cow_faults = task->cow_faults;
+                       cur_tsnap->ts_user_time_in_terminated_threads = task->total_user_time;
+                       cur_tsnap->ts_system_time_in_terminated_threads = task->total_system_time;
+                       cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
+                       cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
+
+                       /* Check for shared cache information */
+                       do {
+                               uint8_t shared_cache_identifier[16];
+                               uint64_t shared_cache_slide;
+                               uint64_t shared_cache_base_address = 0;
+                               boolean_t found_shared_cache_info = TRUE;
+
+                               if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
+                                       struct vm_shared_region *sr = task->shared_region;
+                                       shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping;
+                               }
+
+                               if (!shared_cache_base_address ||
+                                               !kdp_copyin(task->map->pmap, shared_cache_base_address + offsetof(struct _dyld_cache_header, uuid), shared_cache_identifier, sizeof(shared_cache_identifier))
+                                  ) {
+                                       found_shared_cache_info = FALSE;
+                               }
+
+                               if (task->shared_region) {
+                                       /*
+                                        * No refcounting here, but we are in debugger
+                                        * context, so that should be safe.
+                                        */
+                                       shared_cache_slide = task->shared_region->sr_slide_info.slide;
+                               } else {
+                                       shared_cache_slide = 0;
+                               }
+
+                               if (found_shared_cache_info == FALSE)
+                                       break;
+
+                               if (task_pid == 1) {
+                                       /* save launchd's shared cache info as system level */
+                                       bcopy(shared_cache_identifier, sys_shared_cache_loadinfo->imageUUID, sizeof(sys_shared_cache_loadinfo->imageUUID));
+                                       sys_shared_cache_loadinfo->imageLoadAddress = shared_cache_slide;
+                                       break;
+                               } else {
+                                       if (shared_cache_slide == sys_shared_cache_loadinfo->imageLoadAddress &&
+                                                       0 == memcmp(shared_cache_identifier, sys_shared_cache_loadinfo->imageUUID, sizeof(sys_shared_cache_loadinfo->imageUUID))) {
+                                               /* skip adding shared cache info. its same as system level one */
+                                               break;
+                                       }
+                               }
+
+                               kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &out_addr));
+                               struct dyld_uuid_info_64 *shared_cache_data = (struct dyld_uuid_info_64 *)out_addr;
+                               shared_cache_data->imageLoadAddress = shared_cache_slide;
+                               bcopy(shared_cache_identifier, shared_cache_data->imageUUID, sizeof(shared_cache_data->imageUUID));
+
+                       } while(0);
+
+                       /* I/O Statistics if any counters are non zero */
+                       assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
+                       if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
+                               kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
+                               struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
+                               _iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
+                               _iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
+                               _iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
+                               _iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
+                               _iostat->ss_paging_count = task->task_io_stats->paging.count;
+                               _iostat->ss_paging_size = task->task_io_stats->paging.size;
+                               _iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
+                               _iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
+                               _iostat->ss_metadata_count = task->task_io_stats->metadata.count;
+                               _iostat->ss_metadata_size = task->task_io_stats->metadata.size;
+                               _iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
+                               _iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
+                               for(int i = 0; i < IO_NUM_PRIORITIES; i++) {
+                                       _iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
+                                       _iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
+                               }
+                       }
+
+#if IMPORTANCE_INHERITANCE
+                       if (save_donating_pids_p) {
+                               kcd_exit_on_error(((((mach_vm_address_t) kcd_end_address(stackshot_kcdata_p) + (TASK_IMP_WALK_LIMIT * sizeof(int32_t)))
+                                                       < (mach_vm_address_t) kcd_max_address(stackshot_kcdata_p)) ? KERN_SUCCESS : KERN_RESOURCE_SHORTAGE));
+                               saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS, (void *)kcd_end_address(stackshot_kcdata_p), TASK_IMP_WALK_LIMIT);
+                               if (saved_count > 0)
+                                       kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STASKSHOT_KCTYPE_DONATING_PIDS, sizeof(int32_t), saved_count, &out_addr));
+                       }
+#endif
+
+                       /* place load info and libraries now */
+                       uint32_t uuid_info_count = 0;
+                       mach_vm_address_t uuid_info_addr = 0;
+                       if (save_loadinfo_p && have_pmap && task->active && task_pid > 0) {
+                               /* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
+                               if (task64) {
+                                       struct user64_dyld_all_image_infos task_image_infos;
+                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user64_dyld_all_image_infos))) {
+                                               uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
+                                               uuid_info_addr = task_image_infos.uuidArray;
+                                       }
+                               } else {
+                                       struct user32_dyld_all_image_infos task_image_infos;
+                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user32_dyld_all_image_infos))) {
+                                               uuid_info_count = task_image_infos.uuidArrayCount;
+                                               uuid_info_addr = task_image_infos.uuidArray;
+                                       }
+                               }
+
+                               /*
+                                * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
+                                * this data structure), we zero the uuid_info_count so that we won't even try to save load info
+                                * for this task.
+                                */
+                               if (!uuid_info_addr) {
+                                       uuid_info_count = 0;
+                               }
+                       }
+
+                       if (have_pmap && task_pid == 0) {
+                               if (save_kextloadinfo_p && ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
+                                       uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
+                               } else {
+                                       uuid_info_count = 1; /* atleast include kernel uuid */
+                               }
+                       }
+
+                       if (task_pid > 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
+                               uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
+                               uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
+
+                               kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p,
+                                                       (task64 ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
+                                                       uuid_info_size,
+                                                       uuid_info_count,
+                                                       &out_addr));
+
+
+                               /* Copy in the UUID info array
+                                * It may be nonresident, in which case just fix up nloadinfos to 0 in the task_snap
+                                */
+                               if (have_pmap && !kdp_copyin(task->map->pmap, uuid_info_addr, (void *)out_addr, uuid_info_array_size)) {
+                                       bzero((void *)out_addr, uuid_info_array_size);
+                               }
+
+                       } else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
+                               uintptr_t image_load_address;
+
+                               do {
+                                       if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
+                                               /* Kernel UUID not found or inaccessible */
+                                               break;
+                                       }
+                                       kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p,
+                                                               (sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))? KCDATA_TYPE_LIBRARY_LOADINFO64: KCDATA_TYPE_LIBRARY_LOADINFO,
+                                                               sizeof(kernel_uuid_info), uuid_info_count, &out_addr)
+                                                       );
+                                       kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
+                                       image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
+                                       uuid_info_array[0].imageLoadAddress = image_load_address;
+                                       memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
+
+                                       if (save_kextloadinfo_p && ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
+                                                               gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
+                                               uint32_t kexti;
+                                               for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
+                                                       image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
+                                                       uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
+                                                       memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
+                                               }
+                                       }
+                               } while(0);
+                       }
+
+                       /* Iterate over task threads */
+                       queue_iterate(&task->threads, thread, thread_t, task_threads){
+                               uint64_t tval;
+                               uint64_t thread_uniqueid = 0;
+                               char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
+
+                               if ((thread == NULL) || !ml_validate_nofault((vm_offset_t) thread, sizeof(struct thread)))
+                                       goto error_exit;
+
+                               if (!save_userframes_p && thread->kernel_stack == 0)
+                                       continue;
+
+                               thread_uniqueid = thread_tid(thread);
+
+                               /* add thread marker */
+                               kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN, STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
+                               kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v2), &out_addr));
+                               struct thread_snapshot_v2 * cur_thread_snap = (struct thread_snapshot_v2 *)out_addr;
+
+                               /* Populate the thread snapshot header */
+                               cur_thread_snap->ths_thread_id = thread_uniqueid;
+                               cur_thread_snap->ths_state = thread->state;
+                               cur_thread_snap->ths_ss_flags = 0;
+                               cur_thread_snap->ths_base_priority = thread->base_pri;
+                               cur_thread_snap->ths_sched_priority = thread->sched_pri;
+                               cur_thread_snap->ths_sched_flags = thread->sched_flags;
+                               cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
+                               cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
+                               cur_thread_snap->ths_last_run_time = thread->last_run_time;
+                               cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
+                               cur_thread_snap->ths_io_tier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
+                               cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
+                               cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
+                               cur_thread_snap->ths_rqos_override = thread->requested_policy.thrp_qos_override;
+                               cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
+                               cur_thread_snap->ths_dqserialnum = 0;
+
+                               tval = safe_grab_timer_value(&thread->user_timer);
+                               cur_thread_snap->ths_user_time = tval;
+                               tval = safe_grab_timer_value(&thread->system_timer);
+
+                               if (thread->precise_user_kernel_time) {
+                                       cur_thread_snap->ths_sys_time = tval;
+                               } else {
+                                       cur_thread_snap->ths_user_time += tval;
+                                       cur_thread_snap->ths_sys_time = 0;
+                               }
+
+                               if (thread->effective_policy.darwinbg)
+                                       cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
+                               if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO))
+                                       cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
+                               if (thread->suspend_count > 0)
+                                       cur_thread_snap->ths_ss_flags |= kThreadSuspended;
+
+                               if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
+                                       cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
+                               }
+
+                               if (IPC_VOUCHER_NULL != thread->ith_voucher)
+                                       cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
+                               if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
+                                       uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
+                                       if (dqkeyaddr != 0) {
+                                               uint64_t dqaddr = 0;
+                                               if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
+                                                       uint64_t dqserialnumaddr = dqaddr + proc_dispatchqueue_serialno_offset_from_task(task);
+                                                       uint64_t dqserialnum = 0;
+                                                       if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) {
+                                                               cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
+                                                               cur_thread_snap->ths_dqserialnum = dqserialnum;
+                                                       }
+                                               }
+                                       }
+                               }
+
+                               /* if there is thread name then add to buffer */
+                               cur_thread_name[0] = '\0';
+                               proc_threadname_kdp(thread->uthread, cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
+                               if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
+                                       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
+                                       bcopy((void *)cur_thread_name, (void *)out_addr, sizeof(cur_thread_name));
+                               }
+
+                               /* I/O Statistics */
+                               assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
+                               if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
+                                       kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
+                                       struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
+                                       _iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
+                                       _iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
+                                       _iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
+                                       _iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
+                                       _iostat->ss_paging_count = thread->thread_io_stats->paging.count;
+                                       _iostat->ss_paging_size = thread->thread_io_stats->paging.size;
+                                       _iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
+                                       _iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
+                                       _iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
+                                       _iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
+                                       _iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
+                                       _iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
+                                       for(int i = 0; i < IO_NUM_PRIORITIES; i++) {
+                                               _iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
+                                               _iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
+                                       }
+                               }
+
+                               /* Trace user stack, if any */
+                               if (save_userframes_p && task->active && thread->task->map != kernel_map) {
+                                       uint32_t thread_snapshot_flags = 0;
+                                       /* 64-bit task? */
+                                       if (task_has_64BitAddr(thread->task)) {
+                                               out_addr = (mach_vm_address_t)kcd_end_address(stackshot_kcdata_p);
+                                               saved_count = machine_trace_thread64(thread, (char *)out_addr, (char *)kcd_max_address(stackshot_kcdata_p), MAX_FRAMES, TRUE, &thread_snapshot_flags);
+                                               if (saved_count > 0) {
+                                                       kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p,
+                                                                       STACKSHOT_KCTYPE_USER_STACKFRAME64,
+                                                                       sizeof(struct stack_snapshot_frame64),
+                                                                       saved_count/sizeof(struct stack_snapshot_frame64),
+                                                                       &out_addr));
+                                                       cur_thread_snap->ths_ss_flags |= kUser64_p;
+                                               }
+                                       }
+                                       else {
+                                               out_addr = (mach_vm_address_t)kcd_end_address(stackshot_kcdata_p);
+                                               saved_count = machine_trace_thread(thread, (char *)out_addr, (char *)kcd_max_address(stackshot_kcdata_p), MAX_FRAMES, TRUE, &thread_snapshot_flags);
+                                               if (saved_count > 0) {
+                                                       kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p,
+                                                                               STACKSHOT_KCTYPE_USER_STACKFRAME,
+                                                                               sizeof(struct stack_snapshot_frame32),
+                                                                               saved_count/sizeof(struct stack_snapshot_frame32),
+                                                                               &out_addr));
+                                               }
+                                       }
+
+                                       if (thread_snapshot_flags != 0) {
+                                               cur_thread_snap->ths_ss_flags |= thread_snapshot_flags;
+                                       }
+                               }
+
+                               /* Call through to the machine specific trace routines
+                                * Frames are added past the snapshot header.
+                                */
+                               if (thread->kernel_stack != 0) {
+                                       uint32_t thread_snapshot_flags = 0;
+#if defined(__LP64__)
+                                       out_addr = (mach_vm_address_t)kcd_end_address(stackshot_kcdata_p);
+                                       saved_count = machine_trace_thread64(thread, (char *)out_addr, (char *)kcd_max_address(stackshot_kcdata_p), MAX_FRAMES, FALSE, &thread_snapshot_flags);
+                                       if (saved_count > 0){
+                                               cur_thread_snap->ths_ss_flags |= kKernel64_p;
+                                               kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p,
+                                                                       STACKSHOT_KCTYPE_KERN_STACKFRAME64,
+                                                                       sizeof(struct stack_snapshot_frame64),
+                                                                       saved_count/sizeof(struct stack_snapshot_frame64),
+                                                                       &out_addr));
+                                       }
+#else
+                                       out_addr = (mach_vm_address_t)kcd_end_address(stackshot_kcdata_p);
+                                       saved_count = machine_trace_thread(thread, (char *)out_addr, (char *)kcd_max_address(stackshot_kcdata_p), MAX_FRAMES, FALSE, &thread_snapshot_flags);
+                                       if (saved_count > 0) {
+                                               kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p,
+                                                                       STACKSHOT_KCTYPE_KERN_STACKFRAME,
+                                                                       sizeof(struct stack_snapshot_frame32),
+                                                                       saved_count/sizeof(struct stack_snapshot_frame32),
+                                                                       &out_addr));
+                                       }
+#endif
+                                       if (thread_snapshot_flags != 0) {
+                                               cur_thread_snap->ths_ss_flags |= thread_snapshot_flags;
+                                       }
+                               }
+                               /* mark end of thread snapshot data */
+                               kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
+                       }
+                       /* mark end of task snapshot data */
+                       kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, STACKSHOT_KCCONTAINER_TASK, task_uniqueid));
+               }
+       }
+
+       /*  === END of populating stackshot data === */
+
+       *pBytesTraced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_kcdata_p);
+error_exit:
+       /* Release stack snapshot wait indicator */
+       kdp_snapshot_postflight();
+
+       return error;
+}
+
+static int
+kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t *pbytesTraced)
 {
        char *tracepos = (char *) tracebuf;
        char *tracebound = tracepos + tracebuf_size;
@@ -237,8 +1426,8 @@ walk_list:
                uint64_t task_uniqueid = proc_uniqueid_from_task(task);
                boolean_t task64 = task_has_64BitAddr(task);
 
-               if (!task->active) {
-                       /* 
+               if (!task->active || task_is_a_corpse(task)) {
+                       /*
                         * Not interested in terminated tasks without threads, and
                         * at the moment, stackshot can't handle a task  without a name.
                         */
@@ -283,9 +1472,11 @@ walk_list:
                                }
                        }
 
-                       if (have_pmap && save_kextloadinfo_p && task_pid == 0) {
-                               if (ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
+                       if (have_pmap && task_pid == 0) {
+                               if (save_kextloadinfo_p && ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
                                        uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
+                               }else {
+                                       uuid_info_count = 1; /* atleast include kernel uuid */
                                }
                        }
 
@@ -311,7 +1502,7 @@ walk_list:
                                task_snap->ss_flags |= kUser64_p;
                        if (task64 && task_pid == 0)
                                task_snap->ss_flags |= kKernel64_p;
-                       if (!task->active
+                       if (!task->active || task_is_a_corpse(task))
                                task_snap->ss_flags |= kTerminatedSnapshot;
                        if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended;
                        if(task->frozen) task_snap->ss_flags |= kFrozen;
@@ -430,15 +1621,14 @@ walk_list:
                        } else if (task_pid == 0 && uuid_info_count > 0) {
                                uint32_t uuid_info_size = (uint32_t)sizeof(kernel_uuid_info);
                                uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
-                               kernel_uuid_info *output_uuids;
+                               uint32_t  uuid_offset = offsetof(kernel_uuid_info, imageUUID);
+                               uintptr_t image_load_address;
 
                                if (tracepos + uuid_info_array_size > tracebound) {
                                        error = -1;
                                        goto error_exit;
                                }
 
-                               output_uuids = (kernel_uuid_info *)tracepos;
-
                                do {
 
                                        if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
@@ -446,31 +1636,35 @@ walk_list:
                                                task_snap->nloadinfos = 0;
                                                break;
                                        }
+                                       image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
+                                       memcpy(tracepos, &image_load_address, sizeof(uintptr_t));
+                                       memcpy((tracepos + uuid_offset), kernel_uuid, sizeof(uuid_t));
+                                       tracepos += uuid_info_size;
 
-                                       output_uuids[0].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
-                                       memcpy(&output_uuids[0].imageUUID, kernel_uuid, sizeof(uuid_t));
-
-                                       if (ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
+                                       if (save_kextloadinfo_p && ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
                                                                                        gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
                                                uint32_t kexti;
-
                                                for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
-                                                       output_uuids[1+kexti].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
-                                                       memcpy(&output_uuids[1+kexti].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
+                                                       image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
+                                                       memcpy(tracepos, &image_load_address, sizeof(uintptr_t));
+                                                       memcpy((tracepos + uuid_offset), &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
+                                                       tracepos += uuid_info_size;
                                                }
-
-                                               tracepos += uuid_info_array_size;
                                        } else {
                                                /* kext summary invalid, but kernel UUID was copied */
                                                task_snap->nloadinfos = 1;
-                                               tracepos += uuid_info_size;
                                                break;
                                        }
                                } while(0);
                        }
                        
                        if (save_donating_pids_p) {
-                               task_snap->donating_pid_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS, (int *)tracepos, (unsigned int)((tracebound - tracepos)/sizeof(int)));
+                               if (tracepos + (TASK_IMP_WALK_LIMIT * sizeof(int32_t)) > tracebound) {
+                                       error = -1;
+                                       goto error_exit;
+                               }
+
+                               task_snap->donating_pid_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS, tracepos, TASK_IMP_WALK_LIMIT);
                                tracepos += sizeof(int) * task_snap->donating_pid_count;
                        }
 
@@ -491,7 +1685,7 @@ walk_list:
                                tsnap = (thread_snapshot_t) tracepos;
                                tsnap->thread_id = thread_tid(thread);
                                tsnap->state = thread->state;
-                               tsnap->priority = thread->priority;
+                               tsnap->priority = thread->base_pri;
                                tsnap->sched_pri = thread->sched_pri;
                                tsnap->sched_flags = thread->sched_flags;
                                tsnap->wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
@@ -547,11 +1741,22 @@ walk_list:
                                if (thread->suspend_count > 0) {
                                        tsnap->ss_flags |= kThreadSuspended;
                                }
+
+                               if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
+                                       tsnap->ss_flags |= kGlobalForcedIdle;
+                               }
+
                                if (IPC_VOUCHER_NULL != thread->ith_voucher) {
                                        tsnap->voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
                                }
 
                                tsnap->ts_qos = thread->effective_policy.thep_qos;
+                               tsnap->ts_rqos = thread->requested_policy.thrp_qos;
+                               tsnap->ts_rqos_override = thread->requested_policy.thrp_qos_override;
+                               /* zero out unused data. */
+                               tsnap->_reserved[0] = 0;
+                               tsnap->_reserved[1] = 0;
+                               tsnap->_reserved[2] = 0;
                                tsnap->total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
 
                                if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
@@ -559,11 +1764,11 @@ walk_list:
                                        if (dqkeyaddr != 0) {
                                                uint64_t dqaddr = 0;
                                                if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
-                                                       uint64_t dqserialnumaddr = dqaddr + dispatch_offset;
+                                                       uint64_t dqserialnumaddr = dqaddr + proc_dispatchqueue_serialno_offset_from_task(task);
                                                        uint64_t dqserialnum = 0;
                                                        if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) {
                                                                tsnap->ss_flags |= kHasDispatchSerial;
-                                                               *(uint64_t *)tracepos = dqserialnum;
+                                                               memcpy(tracepos, &dqserialnum, sizeof(dqserialnum));
                                                                tracepos += 8;
                                                        }
                                                }
@@ -574,30 +1779,38 @@ walk_list:
  */
                                tracebytes = 0;
                                if (thread->kernel_stack != 0) {
+                                       uint32_t thread_snapshot_flags = 0;
 #if defined(__LP64__)                                  
-                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
+                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE, &thread_snapshot_flags);
                                        tsnap->ss_flags |= kKernel64_p;
                                        framesize = 16;
 #else
-                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
+                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE, &thread_snapshot_flags);
                                        framesize = 8;
 #endif
+                                       if (thread_snapshot_flags != 0) {
+                                               tsnap->ss_flags |= thread_snapshot_flags;
+                                       }
                                }
                                tsnap->nkern_frames = tracebytes/framesize;
                                tracepos += tracebytes;
                                tracebytes = 0;
                                /* Trace user stack, if any */
                                if (save_userframes_p && task->active && thread->task->map != kernel_map) {
+                                       uint32_t thread_snapshot_flags = 0;
                                        /* 64-bit task? */
                                        if (task_has_64BitAddr(thread->task)) {
-                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
+                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE, &thread_snapshot_flags);
                                                tsnap->ss_flags |= kUser64_p;
                                                framesize = 16;
                                        }
                                        else {
-                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
+                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE, &thread_snapshot_flags);
                                                framesize = 8;
                                        }
+                                       if (thread_snapshot_flags != 0) {
+                                               tsnap->ss_flags |= thread_snapshot_flags;
+                                       }
                                }
                                tsnap->nuser_frames = tracebytes/framesize;
                                tracepos += tracebytes;
@@ -633,8 +1846,11 @@ static int pid_from_task(task_t task)
 {
        int pid = -1;
 
-       if (task->bsd_info)
+       if (task->bsd_info) {
                pid = proc_pid(task->bsd_info);
+       } else {
+               pid = task_pid(task);
+       }
 
        return pid;
 }
@@ -672,6 +1888,18 @@ proc_did_throttle_from_task(task_t task)
        return did_throttle;
 }
 
+static uint64_t
+proc_dispatchqueue_serialno_offset_from_task(task_t task)
+{
+       uint64_t dq_serialno_offset = 0;
+
+       if (task->bsd_info) {
+               dq_serialno_offset = get_dispatchqueue_serialno_offset_from_proc(task->bsd_info);
+       }
+
+       return dq_serialno_offset;
+}
+
 static void
 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
 {
@@ -736,6 +1964,23 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size)
        size_t rem = size;
        char *kvaddr = dest;
 
+#if (defined(__arm64__) || defined(NAND_PANIC_DEVICE)) && !defined(LEGACY_PANIC_LOGS)
+       /* Identify if destination buffer is in panic storage area */
+       if ((vm_offset_t)dest >= gPanicBase && (vm_offset_t)dest < gPanicBase + gPanicSize) {
+               if (((vm_offset_t)dest + size) >= (gPanicBase + gPanicSize)) {
+                       return FALSE;
+               }
+               ppnum_t upn = pmap_find_phys(p, uaddr);
+               uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK);
+               void *src_va = (void*)phystokv(phys_src);
+               if (upn && pmap_valid_page(upn)) {
+                       bcopy(src_va, kvaddr, size);
+                       return TRUE;
+               }
+               return FALSE;
+       }
+#endif
+
        while (rem) {
                ppnum_t upn = pmap_find_phys(p, uaddr);
                uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK);
@@ -752,7 +1997,7 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size)
                        break;
                uaddr += cur_size;
                kvaddr += cur_size;
-               rem -= cur_size;        
+               rem -= cur_size;
        }
        return (rem == 0);
 }
@@ -760,11 +2005,16 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size)
 void
 do_stackshot()
 {
-    stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
+    if (stack_snapshot_flags & STACKSHOT_KCDATA_FORMAT) {
+        stack_snapshot_ret = kdp_stackshot_kcdata_format(stack_snapshot_pid,
+           stack_snapshot_flags,
+           &stack_snapshot_bytes_traced);
+    }
+    else {
+        stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
            stack_snapshot_buf, stack_snapshot_bufsize,
-           stack_snapshot_flags, stack_snapshot_dispatch_offset, 
-               &stack_snapshot_bytes_traced);
-
+           stack_snapshot_flags, &stack_snapshot_bytes_traced);
+    }
 }
 
 /*
@@ -783,7 +2033,7 @@ do_stackshot()
  * happen.
  */
 vm_offset_t
-machine_trace_thread_get_kva(vm_offset_t cur_target_addr)
+machine_trace_thread_get_kva(vm_offset_t cur_target_addr, vm_map_t map, uint32_t *thread_trace_flags)
 {
        unsigned cur_wimg_bits;
        vm_offset_t cur_target_page;
@@ -804,13 +2054,29 @@ machine_trace_thread_get_kva(vm_offset_t cur_target_addr)
                cur_phys_addr = kdp_vtophys(kdp_pmap ? kdp_pmap : kernel_pmap, cur_target_addr);
 
                if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
-                       return 0;
-               }
 
-               cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
+                       if (!stack_enable_faulting) {
+                               return 0;
+                       }
 
-               if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
-                       return 0;
+                       /*
+                        * The pmap doesn't have a valid page so we start at the top level
+                        * vm map and try a lightweight fault.
+                        */
+                       cur_phys_addr = kdp_lightweight_fault(map, (cur_target_addr & ~PAGE_MASK), thread_trace_flags);
+                       cur_phys_addr += (cur_target_addr & PAGE_MASK);
+
+                       if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr)))
+                               return 0;
+               } else {
+                       /*
+                        * This check is done in kdp_lightweight_fault for the fault path.
+                        */
+                       cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
+
+                       if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
+                               return 0;
+                       }
                }
 
 #if __x86_64__
index 6c19ab4dcdf01c0d5b00dabacb9579bf8eca504e..f73eff9e347290d25f84a929ac911f4bb0791741 100644 (file)
@@ -172,6 +172,14 @@ typedef int wait_timeout_urgency_t;
 
 #define TIMEOUT_URGENCY_FIRST_AVAIL    0x40            /* first available bit outside of urgency mask/leeway */
 #define        TIMEOUT_URGENCY_RATELIMITED     0x80
+
+/*
+ * Timeout and deadline tokens for waits.
+ * The following tokens define common values for leeway and deadline parameters.
+ */
+#define TIMEOUT_NO_LEEWAY              (0ULL)
+#define TIMEOUT_WAIT_FOREVER           (0ULL)
+
 #ifdef KERNEL_PRIVATE
 
 #ifdef MACH_KERNEL_PRIVATE
@@ -220,11 +228,8 @@ typedef struct _wait_queue_link    *wait_queue_link_t;
 #define WAIT_QUEUE_LINK_NULL   ((wait_queue_link_t)0)
 #define SIZEOF_WAITQUEUE_LINK  wait_queue_link_size()
 
-/* legacy definitions - going away */
-struct wait_queue_sub ;
-typedef struct wait_queue_sub  *wait_queue_sub_t;
-#define WAIT_QUEUE_SUB_NULL    ((wait_queue_sub_t)0)
-#define SIZEOF_WAITQUEUE_SUB   wait_queue_set_size()
+typedef struct perfcontrol_state       *perfcontrol_state_t;
+#define PERFCONTROL_STATE_NULL         ((perfcontrol_state_t)0)
 
 #endif /* KERNEL_PRIVATE */
 
index 336e58d07cd506d3536b10a4fc1a7098d91ae36f..6e370a86319ae3a525e9c0e412a8ac86d1521a37 100644 (file)
@@ -103,7 +103,7 @@ kext_alloc_init(void)
     /* Allocate the sub block of the kernel map */
     rval = kmem_suballoc(kernel_map, (vm_offset_t *) &kext_alloc_base, 
                         kext_alloc_size, /* pageable */ TRUE,
-                        VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE,
+                        VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE | VM_MAKE_TAG(VM_KERN_MEMORY_KEXT),
                         &g_kext_map);
     if (rval != KERN_SUCCESS) {
            panic("kext_alloc_init: kmem_suballoc failed 0x%x\n", rval);
@@ -141,6 +141,8 @@ kext_alloc(vm_offset_t *_addr, vm_size_t size, boolean_t fixed)
 #endif
     int flags = (fixed) ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE;
  
+    flags |= VM_MAKE_TAG(VM_KERN_MEMORY_KEXT);
+     
 #if CONFIG_KEXT_BASEMENT
     /* Allocate the kext virtual memory
      * 10608884 - use mach_vm_map since we want VM_FLAGS_ANYWHERE allocated past
index ddfc4ad069afa135bcb16fecd8ff797fb76c2b3f..461dd6704082d3d59e8e5b94617c208b24c99827 100644 (file)
@@ -32,6 +32,7 @@
 /* Kernel interfaces to KPC PMC infrastructure. */
 
 #include <machine/machine_kpc.h>
+#include <kern/thread.h> /* thread_* */
 
 /* cross-platform class constants */
 #define KPC_CLASS_FIXED         (0)
 #define KPC_CLASS_POWER_MASK         (1u << KPC_CLASS_POWER)
 #define KPC_CLASS_RAWPMU_MASK        (1u << KPC_CLASS_RAWPMU)
 
+#define KPC_PMU_ERROR     (0)
+#define KPC_PMU_INTEL_V3  (1)
+#define KPC_PMU_ARM_APPLE (2)
+#define KPC_PMU_INTEL_V2  (3)
+#define KPC_PMU_ARM_V2    (4)
+
 #define KPC_ALL_CPUS (1u << 31)
 
+/* action id setters/getters */
+#define FIXED_ACTIONID(ctr)                    (kpc_actionid[(ctr)])
+#define CONFIGURABLE_ACTIONID(ctr)             (kpc_actionid[(ctr) + kpc_fixed_count()])
+
+/* reload counter setters/getters */
+#define FIXED_RELOAD(ctr)                      (current_cpu_datap()->cpu_kpc_reload[(ctr)])
+#define FIXED_RELOAD_CPU(cpu, ctr)             (cpu_datap(cpu)->cpu_kpc_reload[(ctr)])
+#define CONFIGURABLE_RELOAD(ctr)               (current_cpu_datap()->cpu_kpc_reload[(ctr) + kpc_fixed_count()])
+#define CONFIGURABLE_RELOAD_CPU(cpu, ctr)      (cpu_datap(cpu)->cpu_kpc_reload[(ctr) + kpc_fixed_count()])
+
+/* shadow counter setters/getters */
+#define FIXED_SHADOW(ctr)                      (current_cpu_datap()->cpu_kpc_shadow[(ctr)])
+#define FIXED_SHADOW_CPU(cpu, ctr)             (cpu_datap(cpu)->cpu_kpc_shadow[(ctr)])
+#define CONFIGURABLE_SHADOW(ctr)               (current_cpu_datap()->cpu_kpc_shadow[(ctr) + kpc_fixed_count()])
+#define CONFIGURABLE_SHADOW_CPU(cpu, ctr)      (cpu_datap(cpu)->cpu_kpc_shadow[(ctr) + kpc_fixed_count()])
+
+/**
+ * Callback for notification when PMCs are acquired/released by a task. The
+ * argument is equal to TRUE if the Power Manager (PM) can use its reserved PMCs.
+ * Otherwise, the argument is equal to FALSE.
+ */
+typedef void (*kpc_pm_handler_t)(boolean_t);
+
+/*
+ * Register a CPU to kpc and allocate its buffers.
+ *
+ * @param cpu_data
+ * CPU data associated to the CPU being registered.
+ *
+ * @return
+ * TRUE if buffers are correctly allocated, FALSE otherwise.
+ */
+struct cpu_data;
+extern boolean_t kpc_register_cpu(struct cpu_data *cpu_data);
+
 /* bootstrap */
 extern void kpc_init(void);
 
+/* common initialization */
+extern void kpc_common_init(void);
+
 /* Architecture specific initialisation */
 extern void kpc_arch_init(void);
 
+/* Thread counting initialization */
+extern void kpc_thread_init(void);
+
 /* Get the bitmask of available classes */
 extern uint32_t kpc_get_classes(void);
 
 /* Get the bitmask of currently running counter classes  */
 extern uint32_t kpc_get_running(void);
 
+/* Get the version of KPC that's being run */
+extern int kpc_get_pmu_version(void);
+
 /* Set the bitmask of currently running counter classes. Specify
  * classes = 0 to stop counters
  */
@@ -107,9 +158,9 @@ extern int kpc_threads_counting;
 
 /* AST callback for KPC */
 extern void kpc_thread_ast_handler( thread_t thread );
-       
+
 /* context switch accounting between two threads */
-extern void kpc_switch_context( thread_t old, thread_t new );
+extern void kpc_switch_context( thread_t old_thread, thread_t new_thread );
 
 /* acquire/release the counters used by the Power Manager */
 extern int kpc_force_all_ctrs( task_t task, int val );
@@ -125,12 +176,48 @@ extern int kpc_get_whitelist_disabled( void );
 extern int kpc_disable_whitelist( int val );
 
 /*
- * Allow the Power Manager to register for KPC notification when the counters
- * are acquired/released by a task. The argument is equal to true if the Power
- * Manager can use the counters, otherwise it is equal to false.
+ * Register the Power Manager as a PMCs user.
+ *
+ * This is a deprecated function used by old Power Managers, new Power Managers
+ * should use the @em kpc_reserve_pm_counters() function. This function actually
+ * calls @em kpc_reserve_pm_counters() with the following arguments:
+ *     - handler       = handler
+ *     - pmc_mask      = 0x83
+ *     - custom_config = TRUE
+ *
+ * See @em kpc_reserve_pm_counters() for more details about the return value.
  */
 extern boolean_t kpc_register_pm_handler(void (*handler)(boolean_t));
 
+/*
+ * Register the Power Manager as a PMCs user.
+ *
+ * @param handler
+ * Notification callback to use when PMCs are acquired/released by a task.
+ *
+ * @param pmc_mask
+ * Bitmask of the configurable PMCs used by the Power Manager. The number of bits
+ * set must less or equal than the number of configurable counters
+ * available on the SoC.
+ *
+ * @param custom_config
+ * If custom_config=TRUE, the legacy sharing mode is enabled, otherwise the
+ * Modern Sharing mode is enabled. These modes are explained in more details in
+ * the kperf documentation.
+ *
+ * @return
+ * FALSE if a task has acquired all the PMCs, otherwise TRUE and the Power
+ * Manager can start using the reserved PMCs.
+ */
+extern boolean_t kpc_reserve_pm_counters(uint64_t pmc_mask, kpc_pm_handler_t handler,
+                                         boolean_t custom_config);
+
+/*
+ * Unregister the Power Manager as a PMCs user, and release the previously
+ * reserved counters.
+ */
+extern void kpc_release_pm_counters(void);
+
 /*
  * Is the PMU used by both the power manager and userspace?
  *
@@ -148,36 +235,76 @@ extern boolean_t kpc_multiple_clients(void);
  */
 extern boolean_t kpc_controls_fixed_counters(void);
 
+/*
+ * Is kpc controlling a specific PMC ?
+ */
+extern boolean_t kpc_controls_counter(uint32_t ctr);
+
+
 extern void kpc_idle(void);
 extern void kpc_idle_exit(void);
 
 
-/* KPC PRIVATE */
+/*
+ * KPC PRIVATE
+ */
+
 extern uint32_t kpc_actionid[KPC_MAX_COUNTERS];
-/* mp operations */
-struct kpc_config_remote
-{
+
+/* handler for mp operations */
+struct kpc_config_remote {
        uint32_t classes;
        kpc_config_t *configv;
+       uint64_t pmc_mask;
+};
+
+/* handler for mp operations */
+struct kpc_running_remote {
+       uint32_t        classes;                /* classes to run */
+       uint64_t        cfg_target_mask;        /* configurable counters selected */
+       uint64_t        cfg_state_mask;         /* configurable counters new state */
+};
+
+/* handler for mp operations */
+struct kpc_get_counters_remote {
+       uint32_t classes;
+       uint32_t nb_counters;
+       uint32_t buf_stride;
+       uint64_t *buf;
 };
 
+extern int kpc_get_all_cpus_counters(uint32_t classes, int *curcpu, uint64_t *buf);
+extern int kpc_get_curcpu_counters(uint32_t classes, int *curcpu, uint64_t *buf);
 extern int kpc_get_fixed_counters(uint64_t *counterv);
-extern int kpc_get_configurable_counters(uint64_t *counterv);
+extern int kpc_get_configurable_counters(uint64_t *counterv, uint64_t pmc_mask);
 extern boolean_t kpc_is_running_fixed(void);
-extern boolean_t kpc_is_running_configurable(void);
+extern boolean_t kpc_is_running_configurable(uint64_t pmc_mask);
 extern uint32_t kpc_fixed_count(void);
 extern uint32_t kpc_configurable_count(void);
 extern uint32_t kpc_fixed_config_count(void);
-extern uint32_t kpc_configurable_config_count(void);
+extern uint32_t kpc_configurable_config_count(uint64_t pmc_mask);
 extern uint32_t kpc_rawpmu_config_count(void);
 extern int kpc_get_fixed_config(kpc_config_t *configv);
-extern int kpc_get_configurable_config(kpc_config_t *configv);
+extern int kpc_get_configurable_config(kpc_config_t *configv, uint64_t pmc_mask);
 extern int kpc_get_rawpmu_config(kpc_config_t *configv);
 extern uint64_t kpc_fixed_max(void);
 extern uint64_t kpc_configurable_max(void);
 extern int kpc_set_config_arch(struct kpc_config_remote *mp_config);
 extern int kpc_set_period_arch(struct kpc_config_remote *mp_config);
 extern void kpc_sample_kperf(uint32_t actionid);
+extern int kpc_set_running_arch(struct kpc_running_remote *mp_config);
+
+
+/*
+ * Helpers
+ */
+
+/* count the number of bits set */
+extern uint8_t kpc_popcount(uint64_t value);
+
+/* for a set of classes, retrieve the configurable PMCs mask */
+extern uint64_t kpc_get_configurable_pmc_mask(uint32_t classes);
+
 
 /* Interface for kexts to publish a kpc interface */
 struct kpc_driver
index 111a203fbdd6001aa4e479910ae3a84016a75718..c091eb115322bb82ea7d6acef38a0ab59d1888e3 100644 (file)
 
 uint32_t kpc_actionid[KPC_MAX_COUNTERS];
 
+#define COUNTERBUF_SIZE_PER_CPU (KPC_MAX_COUNTERS * sizeof(uint64_t))
+#define COUNTERBUF_SIZE (machine_info.logical_cpu_max * \
+                         COUNTERBUF_SIZE_PER_CPU)
+
 /* locks */
 static lck_grp_attr_t *kpc_config_lckgrp_attr = NULL;
 static lck_grp_t      *kpc_config_lckgrp = NULL;
@@ -53,8 +57,10 @@ static lck_mtx_t       kpc_config_lock;
 /* state specifying if all counters have been requested by kperf */
 static boolean_t force_all_ctrs = FALSE;
 
-/* PM handler called when forcing/releasing all counters */
-static void (*pm_handler)(boolean_t) = NULL;
+/* power manager */
+static kpc_pm_handler_t                kpc_pm_handler;
+static boolean_t               kpc_pm_has_custom_config;
+static uint64_t                        kpc_pm_pmc_mask;
 
 void kpc_common_init(void);
 void
@@ -65,6 +71,52 @@ kpc_common_init(void)
        lck_mtx_init(&kpc_config_lock, kpc_config_lckgrp, LCK_ATTR_NULL);
 }
 
+boolean_t
+kpc_register_cpu(struct cpu_data *cpu_data)
+{
+       assert(cpu_data);
+       assert(cpu_data->cpu_kpc_buf[0] == NULL);
+       assert(cpu_data->cpu_kpc_buf[1] == NULL);
+       assert(cpu_data->cpu_kpc_shadow == NULL);
+       assert(cpu_data->cpu_kpc_reload == NULL);
+
+       /*
+        * Buffers allocated through kpc_counterbuf_alloc() are large enough to
+        * store all PMCs values from all CPUs. This mimics the userspace API.
+        * This does not suit well with the per-CPU kpc buffers, since:
+        *      1. Buffers don't need to be this large.
+        *      2. The actual number of CPUs is not known at this point.
+        *
+        * CPUs are asked to callout into kpc when being registered, we'll
+        * allocate the memory here.
+        */
+
+       if ((cpu_data->cpu_kpc_buf[0] = kalloc(COUNTERBUF_SIZE_PER_CPU)) == NULL)
+               goto error;
+       if ((cpu_data->cpu_kpc_buf[1] = kalloc(COUNTERBUF_SIZE_PER_CPU)) == NULL)
+               goto error;
+       if ((cpu_data->cpu_kpc_shadow = kalloc(COUNTERBUF_SIZE_PER_CPU)) == NULL)
+               goto error;
+       if ((cpu_data->cpu_kpc_reload = kalloc(COUNTERBUF_SIZE_PER_CPU)) == NULL)
+               goto error;
+
+       memset(cpu_data->cpu_kpc_buf[0], 0, COUNTERBUF_SIZE_PER_CPU);
+       memset(cpu_data->cpu_kpc_buf[1], 0, COUNTERBUF_SIZE_PER_CPU);
+       memset(cpu_data->cpu_kpc_shadow, 0, COUNTERBUF_SIZE_PER_CPU);
+       memset(cpu_data->cpu_kpc_reload, 0, COUNTERBUF_SIZE_PER_CPU);
+
+       /* success */
+       return TRUE;
+
+error:
+       kfree(cpu_data->cpu_kpc_buf[0], COUNTERBUF_SIZE_PER_CPU);
+       kfree(cpu_data->cpu_kpc_buf[1], COUNTERBUF_SIZE_PER_CPU);
+       kfree(cpu_data->cpu_kpc_shadow, COUNTERBUF_SIZE_PER_CPU);
+       kfree(cpu_data->cpu_kpc_reload, COUNTERBUF_SIZE_PER_CPU);
+
+       return FALSE;
+}
+
 static void
 kpc_task_set_forced_all_ctrs(task_t task, boolean_t state)
 {
@@ -88,9 +140,8 @@ kpc_task_get_forced_all_ctrs(task_t task)
 int
 kpc_force_all_ctrs(task_t task, int val)
 {
-       int             ret = 0;
-       boolean_t       new_state = val ? TRUE : FALSE;
-       boolean_t       old_state = kpc_get_force_all_ctrs();
+       boolean_t new_state = val ? TRUE : FALSE;
+       boolean_t old_state = kpc_get_force_all_ctrs();
 
        /*
         * Refuse to do the operation if the counters are already forced by
@@ -103,13 +154,9 @@ kpc_force_all_ctrs(task_t task, int val)
        if (old_state == new_state)
                return 0;
 
-       /* do the architecture specific work */
-       if ((ret = kpc_force_all_ctrs_arch(task, val)) != 0)
-               return ret;
-
        /* notify the power manager */
-       if (pm_handler)
-               pm_handler( new_state ? FALSE : TRUE );
+       if (kpc_pm_handler)
+               kpc_pm_handler( new_state ? FALSE : TRUE );
 
        /* update the task bits */
        kpc_task_set_forced_all_ctrs(task, val);
@@ -127,72 +174,90 @@ kpc_get_force_all_ctrs(void)
 }
 
 boolean_t
-kpc_register_pm_handler(void (*handler)(boolean_t))
+kpc_multiple_clients(void)
 {
-       if (!pm_handler) {
-               pm_handler = handler;
-       }
-
-       /* Notify machine-dependent code. Reserved PMCs could change. */
-       kpc_force_all_ctrs_arch(TASK_NULL, force_all_ctrs);
-
-       return force_all_ctrs ? FALSE : TRUE;
+       return kpc_pm_handler != NULL;
 }
 
 boolean_t
-kpc_multiple_clients(void)
+kpc_controls_fixed_counters(void)
 {
-       return pm_handler != NULL;
+       return !kpc_pm_handler || force_all_ctrs || !kpc_pm_has_custom_config;
 }
 
 boolean_t
-kpc_controls_fixed_counters(void)
+kpc_controls_counter(uint32_t ctr)
 {
-       return !pm_handler || force_all_ctrs;
+       uint64_t pmc_mask = 0ULL;
+
+       assert(ctr < (kpc_fixed_count() + kpc_configurable_count()));
+
+       if (ctr < kpc_fixed_count())
+               return kpc_controls_fixed_counters();
+
+       /*
+        * By default kpc manages all PMCs, but if the Power Manager registered
+        * with custom_config=TRUE, the Power Manager manages its reserved PMCs.
+        * However, kpc takes ownership back if a task acquired all PMCs via
+        * force_all_ctrs.
+        */
+       pmc_mask = (1ULL << (ctr - kpc_fixed_count()));
+       if ((pmc_mask & kpc_pm_pmc_mask) && kpc_pm_has_custom_config && !force_all_ctrs)
+               return FALSE;
+
+       return TRUE;
 }
 
 uint32_t
 kpc_get_running(void)
 {
+       uint64_t pmc_mask = 0;
        uint32_t cur_state = 0;
 
-       if( kpc_is_running_fixed() )
+       if (kpc_is_running_fixed())
                cur_state |= KPC_CLASS_FIXED_MASK;
 
-       if( kpc_is_running_configurable() )
+       pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
+       if (kpc_is_running_configurable(pmc_mask))
                cur_state |= KPC_CLASS_CONFIGURABLE_MASK;
 
+       pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
+       if ((pmc_mask != 0) && kpc_is_running_configurable(pmc_mask))
+               cur_state |= KPC_CLASS_POWER_MASK;
+
        return cur_state;
 }
 
-/* generic counter reading function */
+/* may be called from an IPI */
 int
-kpc_get_cpu_counters( boolean_t all_cpus, uint32_t classes, 
-                      int *curcpu, uint64_t *buf  )
+kpc_get_curcpu_counters(uint32_t classes, int *curcpu, uint64_t *buf)
 {
-       int r, enabled, offset = 0;
+       int enabled=0, offset=0;
+       uint64_t pmc_mask = 0ULL;
 
-       (void) all_cpus;
+       assert(buf);
 
-       /* grab counters and CPU number as close as possible */
        enabled = ml_set_interrupts_enabled(FALSE);
 
-       /* and the CPU ID */
-       if( curcpu )
+       /* grab counters and CPU number as close as possible */
+       if (curcpu)
                *curcpu = current_processor()->cpu_id;
 
-       if( classes & KPC_CLASS_FIXED_MASK )
-       {
-               kpc_get_fixed_counters( &buf[offset] );
-
+       if (classes & KPC_CLASS_FIXED_MASK) {
+               kpc_get_fixed_counters(&buf[offset]);
                offset += kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
        }
 
-       if( classes & KPC_CLASS_CONFIGURABLE_MASK )
-       {
-               r = kpc_get_configurable_counters(  &buf[offset] );
+       if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
+               kpc_get_configurable_counters(&buf[offset], pmc_mask);
+               offset += kpc_popcount(pmc_mask);
+       }
 
-               offset += kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+       if (classes & KPC_CLASS_POWER_MASK) {
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
+               kpc_get_configurable_counters(&buf[offset], pmc_mask);
+               offset += kpc_popcount(pmc_mask);
        }
 
        ml_set_interrupts_enabled(enabled);
@@ -200,35 +265,67 @@ kpc_get_cpu_counters( boolean_t all_cpus, uint32_t classes,
        return offset;
 }
 
+/* generic counter reading function, public api */
+int
+kpc_get_cpu_counters(boolean_t all_cpus, uint32_t classes,
+                     int *curcpu, uint64_t *buf)
+{
+       assert(buf);
+
+       /*
+        * Unlike reading the current CPU counters, reading counters from all
+        * CPUs is architecture dependent. This allows kpc to make the most of
+        * the platform if memory mapped registers is supported.
+        */
+       if (all_cpus)
+               return kpc_get_all_cpus_counters(classes, curcpu, buf);
+       else
+               return kpc_get_curcpu_counters(classes, curcpu, buf);
+}
+
 int
-kpc_get_shadow_counters( boolean_t all_cpus, uint32_t classes,
-                         int *curcpu, uint64_t *buf )
+kpc_get_shadow_counters(boolean_t all_cpus, uint32_t classes,
+                        int *curcpu, uint64_t *buf)
 {
-       int enabled, count, offset = 0;
+       int curcpu_id = current_processor()->cpu_id;
+       uint32_t cfg_count = kpc_configurable_count(), offset = 0;
+       uint64_t pmc_mask = 0ULL;
+       boolean_t enabled;
 
-       (void)all_cpus;
+       assert(buf);
 
        enabled = ml_set_interrupts_enabled(FALSE);
 
-       if( curcpu )
-               *curcpu = current_processor()->cpu_id;
+       curcpu_id = current_processor()->cpu_id;
+       if (curcpu)
+               *curcpu = curcpu_id;
 
-       if( classes & KPC_CLASS_FIXED_MASK )
-       {
-               count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
+       for (int cpu = 0; cpu < machine_info.logical_cpu_max; ++cpu) {
+               /* filter if the caller did not request all cpus */
+               if (!all_cpus && (cpu != curcpu_id))
+                       continue;
 
-               memcpy( &buf[offset], &FIXED_SHADOW(0), count*sizeof(uint64_t) );
+               if (classes & KPC_CLASS_FIXED_MASK) {
+                       uint32_t count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
+                       memcpy(&buf[offset], &FIXED_SHADOW_CPU(cpu, 0), count * sizeof(uint64_t));
+                       offset += count;
+               }
 
-               offset += count;
-       }
+               if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
+                       pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
 
-       if( classes & KPC_CLASS_CONFIGURABLE_MASK )
-       {
-               count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+                       for (uint32_t cfg_ctr = 0; cfg_ctr < cfg_count; ++cfg_ctr)
+                               if ((1ULL << cfg_ctr) & pmc_mask)
+                                       buf[offset++] = CONFIGURABLE_SHADOW_CPU(cpu, cfg_ctr);
+               }
 
-               memcpy( &buf[offset], &CONFIGURABLE_SHADOW(0), count*sizeof(uint64_t) );
+               if (classes & KPC_CLASS_POWER_MASK) {
+                       pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
 
-               offset += count;
+                       for (uint32_t cfg_ctr = 0; cfg_ctr < cfg_count; ++cfg_ctr)
+                               if ((1ULL << cfg_ctr) & pmc_mask)
+                                       buf[offset++] = CONFIGURABLE_SHADOW_CPU(cpu, cfg_ctr);
+               }
        }
 
        ml_set_interrupts_enabled(enabled);
@@ -239,13 +336,16 @@ kpc_get_shadow_counters( boolean_t all_cpus, uint32_t classes,
 uint32_t
 kpc_get_counter_count(uint32_t classes)
 {
-       int count = 0;
+       uint32_t count = 0;
 
-       if( classes & KPC_CLASS_FIXED_MASK )
+       if (classes & KPC_CLASS_FIXED_MASK)
                count += kpc_fixed_count();
 
-       if( classes & KPC_CLASS_CONFIGURABLE_MASK )
-               count += kpc_configurable_count() ;
+       if (classes & (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_POWER_MASK)) {
+               uint64_t pmc_msk = kpc_get_configurable_pmc_mask(classes);
+               uint32_t pmc_cnt = kpc_popcount(pmc_msk);
+               count += pmc_cnt;
+       }
 
        return count;
 }
@@ -253,15 +353,17 @@ kpc_get_counter_count(uint32_t classes)
 uint32_t
 kpc_get_config_count(uint32_t classes)
 {
-       int count = 0;
+       uint32_t count = 0;
 
-       if( classes & KPC_CLASS_FIXED_MASK )
+       if (classes & KPC_CLASS_FIXED_MASK)
                count += kpc_fixed_config_count();
 
-       if( classes & KPC_CLASS_CONFIGURABLE_MASK )
-               count += kpc_configurable_config_count();
+       if (classes & (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_POWER_MASK)) {
+               uint64_t pmc_mask = kpc_get_configurable_pmc_mask(classes);
+               count += kpc_configurable_config_count(pmc_mask);
+       }
 
-       if( (classes & KPC_CLASS_RAWPMU_MASK) && !kpc_multiple_clients() )
+       if ((classes & KPC_CLASS_RAWPMU_MASK) && !kpc_multiple_clients())
                count += kpc_rawpmu_config_count();
 
        return count;
@@ -270,21 +372,28 @@ kpc_get_config_count(uint32_t classes)
 int
 kpc_get_config(uint32_t classes, kpc_config_t *current_config)
 {
-       int count = 0;
+       uint32_t count = 0;
 
-       if( classes & KPC_CLASS_FIXED_MASK )
-       {
+       assert(current_config);
+
+       if (classes & KPC_CLASS_FIXED_MASK) {
                kpc_get_fixed_config(&current_config[count]);
                count += kpc_get_config_count(KPC_CLASS_FIXED_MASK);
        }
 
-       if( classes & KPC_CLASS_CONFIGURABLE_MASK )
-       {
-               kpc_get_configurable_config(&current_config[count]);
+       if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
+               uint64_t pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
+               kpc_get_configurable_config(&current_config[count], pmc_mask);
                count += kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
        }
 
-       if( classes & KPC_CLASS_RAWPMU_MASK )
+       if (classes & KPC_CLASS_POWER_MASK) {
+               uint64_t pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
+               kpc_get_configurable_config(&current_config[count], pmc_mask);
+               count += kpc_get_config_count(KPC_CLASS_POWER_MASK);
+       }
+
+       if (classes & KPC_CLASS_RAWPMU_MASK)
        {
                // Client shouldn't ask for config words that aren't available.
                // Most likely, they'd misinterpret the returned buffer if we
@@ -303,18 +412,30 @@ kpc_get_config(uint32_t classes, kpc_config_t *current_config)
 int
 kpc_set_config(uint32_t classes, kpc_config_t *configv)
 {
-       struct kpc_config_remote mp_config;
+       struct kpc_config_remote mp_config = {
+               .classes = classes, .configv = configv,
+               .pmc_mask = kpc_get_configurable_pmc_mask(classes)
+       };
+
+       assert(configv);
+
+       /* don't allow RAWPMU configuration when sharing counters */
+       if ((classes & KPC_CLASS_RAWPMU_MASK) && kpc_multiple_clients()) {
+               return EPERM;
+       }
 
-       // Don't allow RAWPMU configuration when sharing counters.
-       if( (classes & KPC_CLASS_RAWPMU_MASK) && kpc_multiple_clients() )
+       /* no clients have the right to modify both classes */
+       if ((classes & (KPC_CLASS_CONFIGURABLE_MASK)) &&
+           (classes & (KPC_CLASS_POWER_MASK)))
        {
                return EPERM;
        }
 
        lck_mtx_lock(&kpc_config_lock);
 
-       mp_config.classes = classes;
-       mp_config.configv = configv;
+       /* translate the power class for the machine layer */
+       if (classes & KPC_CLASS_POWER_MASK)
+               mp_config.classes |= KPC_CLASS_CONFIGURABLE_MASK;
 
        kpc_set_config_arch( &mp_config );
 
@@ -323,15 +444,16 @@ kpc_set_config(uint32_t classes, kpc_config_t *configv)
        return 0;
 }
 
-/* allocate a buffer big enough for all the counters */
+/* allocate a buffer large enough for all possible counters */
 uint64_t *
 kpc_counterbuf_alloc(void)
 {
-       uint64_t *buf;
+       uint64_t *buf = NULL;
 
-       buf = kalloc(KPC_MAX_COUNTERS * sizeof(uint64_t));
-       if(buf)
-               bzero( buf, KPC_MAX_COUNTERS * sizeof(uint64_t) );
+       buf = kalloc(COUNTERBUF_SIZE);
+       if (buf) {
+               bzero(buf, COUNTERBUF_SIZE);
+       }
 
        return buf;
 }
@@ -339,11 +461,13 @@ kpc_counterbuf_alloc(void)
 void
 kpc_counterbuf_free(uint64_t *buf)
 {
-       if( buf )
-               kfree(buf, KPC_MAX_COUNTERS * sizeof(uint64_t));
+       if (buf) {
+               kfree(buf, COUNTERBUF_SIZE);
+       }
 }
 
-void kpc_sample_kperf(uint32_t actionid)
+void
+kpc_sample_kperf(uint32_t actionid)
 {
        struct kperf_sample sbuf;
        struct kperf_context ctx;
@@ -368,24 +492,42 @@ void kpc_sample_kperf(uint32_t actionid)
 }
 
 
-int kpc_set_period(uint32_t classes, uint64_t *val)
+int
+kpc_set_period(uint32_t classes, uint64_t *val)
 {
-       struct kpc_config_remote mp_config;
+       struct kpc_config_remote mp_config = {
+               .classes = classes, .configv = val,
+               .pmc_mask = kpc_get_configurable_pmc_mask(classes)
+       };
+
+       assert(val);
+
+       /* no clients have the right to modify both classes */
+       if ((classes & (KPC_CLASS_CONFIGURABLE_MASK)) &&
+           (classes & (KPC_CLASS_POWER_MASK)))
+       {
+               return EPERM;
+       }
 
        lck_mtx_lock(&kpc_config_lock);
 
-#ifndef FIXED_COUNTER_SHADOW
+#ifdef FIXED_COUNTER_SHADOW
+       if ((classes & KPC_CLASS_FIXED_MASK) && !kpc_controls_fixed_counters()) {
+               lck_mtx_unlock(&kpc_config_lock);
+               return EPERM;
+       }
+# else
        if (classes & KPC_CLASS_FIXED_MASK) {
                lck_mtx_unlock(&kpc_config_lock);
-               return -1;
+               return EINVAL;
        }
 #endif
 
-       kprintf("setting period %u\n", classes);
-
-       mp_config.classes = classes;
-       mp_config.configv = val;
+       /* translate the power class for the machine layer */
+       if (classes & KPC_CLASS_POWER_MASK)
+               mp_config.classes |= KPC_CLASS_CONFIGURABLE_MASK;
 
+       kprintf("setting period %u\n", classes);
        kpc_set_period_arch( &mp_config );
 
        lck_mtx_unlock(&kpc_config_lock);
@@ -393,29 +535,41 @@ int kpc_set_period(uint32_t classes, uint64_t *val)
        return 0;
 }
 
-
-int kpc_get_period(uint32_t classes, uint64_t *val)
+int
+kpc_get_period(uint32_t classes, uint64_t *val)
 {
-       uint32_t i, count, offset = 0;
+       uint32_t count = 0 ;
+       uint64_t pmc_mask = 0ULL;
+
+       assert(val);
 
        lck_mtx_lock(&kpc_config_lock);
 
        if (classes & KPC_CLASS_FIXED_MASK) {
+               /* convert reload values to periods */
                count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
+               for (uint32_t i = 0; i < count; ++i)
+                       *val++ = kpc_fixed_max() - FIXED_RELOAD(i);
+       }
 
-               /* convert reload values to periods */
-               for (i = 0; i < count; i++)
-                       val[i] = kpc_fixed_max() - FIXED_RELOAD(i);
+       if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
 
-               offset += count;
+               /* convert reload values to periods */
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i)
+                       if ((1ULL << i) & pmc_mask)
+                               *val++ = kpc_configurable_max() - CONFIGURABLE_RELOAD(i);
        }
 
-       if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
-               count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+       if (classes & KPC_CLASS_POWER_MASK) {
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
 
                /* convert reload values to periods */
-               for (i = 0; i < count; i++)
-                       val[i + offset] = kpc_configurable_max() - CONFIGURABLE_RELOAD(i);
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i)
+                       if ((1ULL << i) & pmc_mask)
+                               *val++ = kpc_configurable_max() - CONFIGURABLE_RELOAD(i);
        }
 
        lck_mtx_unlock(&kpc_config_lock);
@@ -423,9 +577,13 @@ int kpc_get_period(uint32_t classes, uint64_t *val)
        return 0;
 }
 
-int kpc_set_actionid(uint32_t classes, uint32_t *val)
+int
+kpc_set_actionid(uint32_t classes, uint32_t *val)
 {
-       uint32_t count, offset = 0;
+       uint32_t count = 0;
+       uint64_t pmc_mask = 0ULL;
+
+       assert(val);
 
        /* NOTE: what happens if a pmi occurs while actionids are being
         * set is undefined. */
@@ -433,16 +591,26 @@ int kpc_set_actionid(uint32_t classes, uint32_t *val)
 
        if (classes & KPC_CLASS_FIXED_MASK) {
                count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
-
                memcpy(&FIXED_ACTIONID(0), val, count*sizeof(uint32_t));
-
-               offset += count;
+               val += count;
        }
 
        if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
-               count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
 
-               memcpy(&CONFIGURABLE_ACTIONID(0), &val[offset], count*sizeof(uint32_t));
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i)
+                       if ((1ULL << i) & pmc_mask)
+                               CONFIGURABLE_ACTIONID(i) = *val++;
+       }
+
+       if (classes & KPC_CLASS_POWER_MASK) {
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
+
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i)
+                       if ((1ULL << i) & pmc_mask)
+                               CONFIGURABLE_ACTIONID(i) = *val++;
        }
 
        lck_mtx_unlock(&kpc_config_lock);
@@ -452,22 +620,35 @@ int kpc_set_actionid(uint32_t classes, uint32_t *val)
 
 int kpc_get_actionid(uint32_t classes, uint32_t *val)
 {
-       uint32_t count, offset = 0;
+       uint32_t count = 0;
+       uint64_t pmc_mask = 0ULL;
+
+       assert(val);
 
        lck_mtx_lock(&kpc_config_lock);
 
        if (classes & KPC_CLASS_FIXED_MASK) {
                count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
-
                memcpy(val, &FIXED_ACTIONID(0), count*sizeof(uint32_t));
-
-               offset += count;
+               val += count;
        }
 
        if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
-               count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
 
-               memcpy(&val[offset], &CONFIGURABLE_ACTIONID(0), count*sizeof(uint32_t));
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i)
+                       if ((1ULL << i) & pmc_mask)
+                               *val++ = CONFIGURABLE_ACTIONID(i);
+       }
+
+       if (classes & KPC_CLASS_POWER_MASK) {
+               pmc_mask = kpc_get_configurable_pmc_mask(KPC_CLASS_POWER_MASK);
+
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i)
+                       if ((1ULL << i) & pmc_mask)
+                               *val++ = CONFIGURABLE_ACTIONID(i);
        }
 
        lck_mtx_unlock(&kpc_config_lock);
@@ -475,3 +656,133 @@ int kpc_get_actionid(uint32_t classes, uint32_t *val)
        return 0;
 
 }
+
+int
+kpc_set_running(uint32_t classes)
+{
+       uint32_t all_cfg_classes = KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_POWER_MASK;
+       struct kpc_running_remote mp_config = {
+               .classes = classes, .cfg_target_mask= 0ULL, .cfg_state_mask = 0ULL
+       };
+
+       /* target all available PMCs */
+       mp_config.cfg_target_mask = kpc_get_configurable_pmc_mask(all_cfg_classes);
+
+       /* translate the power class for the machine layer */
+       if (classes & KPC_CLASS_POWER_MASK)
+               mp_config.classes |= KPC_CLASS_CONFIGURABLE_MASK;
+
+       /* generate the state of each configurable PMCs */
+       mp_config.cfg_state_mask = kpc_get_configurable_pmc_mask(classes);
+
+       return kpc_set_running_arch(&mp_config);
+}
+
+boolean_t
+kpc_register_pm_handler(kpc_pm_handler_t handler)
+{
+       return kpc_reserve_pm_counters(0x38, handler, TRUE);
+}
+
+boolean_t
+kpc_reserve_pm_counters(uint64_t pmc_mask, kpc_pm_handler_t handler,
+                        boolean_t custom_config)
+{
+       uint64_t all_mask = (1ULL << kpc_configurable_count()) - 1;
+       uint64_t req_mask = 0ULL;
+
+       /* pre-condition */
+       assert(handler != NULL);
+       assert(kpc_pm_handler == NULL);
+
+       /* check number of counters requested */
+       req_mask = (pmc_mask & all_mask);
+       assert(kpc_popcount(req_mask) <= kpc_configurable_count());
+
+       /* save the power manager states */
+       kpc_pm_has_custom_config = custom_config;
+       kpc_pm_pmc_mask = req_mask;
+       kpc_pm_handler = handler;
+
+       printf("kpc: pm registered pmc_mask=%llx custom_config=%d\n",
+              req_mask, custom_config);
+
+       /* post-condition */
+       {
+               uint32_t cfg_count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+               uint32_t pwr_count = kpc_popcount(kpc_pm_pmc_mask);
+#pragma unused(cfg_count, pwr_count)
+               assert((cfg_count + pwr_count) == kpc_configurable_count());
+       }
+
+       return force_all_ctrs ? FALSE : TRUE;
+}
+
+void
+kpc_release_pm_counters(void)
+{
+       /* pre-condition */
+       assert(kpc_pm_handler != NULL);
+
+       /* release the counters */
+       kpc_pm_has_custom_config = FALSE;
+       kpc_pm_pmc_mask = 0ULL;
+       kpc_pm_handler = NULL;
+
+       printf("kpc: pm released counters\n");
+
+       /* post-condition */
+       assert(kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK) == kpc_configurable_count());
+}
+
+uint8_t
+kpc_popcount(uint64_t value)
+{
+       return __builtin_popcountll(value);
+}
+
+uint64_t
+kpc_get_configurable_pmc_mask(uint32_t classes)
+{
+       uint32_t configurable_count = kpc_configurable_count();
+       uint64_t cfg_mask = 0ULL, pwr_mask = 0ULL, all_cfg_pmcs_mask = 0ULL;
+
+       /* not configurable classes or no configurable counters */
+       if (((classes & (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_POWER_MASK)) == 0) ||
+           (configurable_count == 0))
+       {
+               goto exit;
+       }
+
+       assert(configurable_count < 64);
+       all_cfg_pmcs_mask = (1ULL << configurable_count) - 1;
+
+       if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
+               if (force_all_ctrs == TRUE)
+                       cfg_mask |= all_cfg_pmcs_mask;
+               else
+                       cfg_mask |= (~kpc_pm_pmc_mask) & all_cfg_pmcs_mask;
+       }
+
+       /*
+        * The power class exists iff:
+        *      - No tasks acquired all PMCs
+        *      - PM registered and uses kpc to interact with PMCs
+        */
+       if ((force_all_ctrs == FALSE) &&
+           (kpc_pm_handler != NULL) &&
+           (kpc_pm_has_custom_config == FALSE) &&
+           (classes & KPC_CLASS_POWER_MASK))
+       {
+               pwr_mask |= kpc_pm_pmc_mask & all_cfg_pmcs_mask;
+       }
+
+exit:
+       /* post-conditions */
+       assert( ((cfg_mask | pwr_mask) & (~all_cfg_pmcs_mask)) == 0 );
+       assert( kpc_popcount(cfg_mask | pwr_mask) <= kpc_configurable_count() );
+       assert( (cfg_mask & pwr_mask) == 0ULL );
+
+       return cfg_mask | pwr_mask;
+}
+
index 692aa02fe1bb21eebda81983abeff8304856e672..1ac250c39c3719beac70627b05a1a36e3e45b9b1 100644 (file)
 #include <kern/locks.h>
 #include <sys/errno.h>
 
+#include <kperf/kperf.h>
+#include <kperf/buffer.h>
+#include <kperf/context.h>
+#include <kperf/sample.h>
+#include <kperf/action.h>
+#include <kperf/kperf_kpc.h>
 #include <kern/kpc.h>
 
 
 /* global for whether to read PMCs on context switch */
-int kpc_threads_counting;
+int kpc_threads_counting = 0;
 
 /* current config and number of counters in that config */
 static uint32_t kpc_thread_classes = 0;
@@ -47,8 +53,6 @@ static lck_grp_attr_t *kpc_thread_lckgrp_attr = NULL;
 static lck_grp_t      *kpc_thread_lckgrp = NULL;
 static lck_mtx_t       kpc_thread_lock;
 
-void kpc_thread_init(void);
-
 void
 kpc_thread_init(void)
 {
@@ -112,6 +116,7 @@ kpc_set_thread_counting(uint32_t classes)
                }       
        }
 
+    kperf_kpc_cswitch_callback_update();
        lck_mtx_unlock(&kpc_thread_lock);
 
        return 0;
@@ -125,12 +130,7 @@ kpc_update_thread_counters( thread_t thread )
        uint64_t *tmp = NULL;
        cpu_data_t *cpu = NULL;
 
-/* TODO: Fix this...*/
-#if defined (__x86_64__)
        cpu = current_cpu_datap();
-#else
-#error architecture not yet supported
-#endif
 
        /* 1. stash current PMCs into latest CPU block */
        kpc_get_cpu_counters( FALSE, kpc_thread_classes, 
index 95a5d89c60aaa405af707f1e8ace29514c502e1c..1f90ef24c015d330f963c82fc724fc6dda962c0f 100644 (file)
@@ -33,6 +33,7 @@
 #include <kern/ledger.h>
 #include <kern/kalloc.h>
 #include <kern/task.h>
+#include <kern/thread.h>
 
 #include <kern/processor.h>
 #include <kern/machine.h>
@@ -322,7 +323,7 @@ ledger_key_lookup(ledger_template_t template, const char *key)
 
        template_lock(template);
        for (idx = 0; idx < template->lt_cnt; idx++)
-               if (template->lt_entries[idx].et_key &&
+               if (template->lt_entries != NULL &&
                    (strcmp(key, template->lt_entries[idx].et_key) == 0))
                        break;
 
@@ -651,7 +652,7 @@ ledger_refill(uint64_t now, ledger_t ledger, int entry)
  */
 #define TOCKSTAMP_IS_STALE(now, tock) ((((now) - (tock)) < NTOCKS) ? FALSE : TRUE)
 
-static void
+void
 ledger_check_new_balance(ledger_t ledger, int entry)
 {
        struct ledger_entry *le;
@@ -1150,29 +1151,6 @@ ledger_set_action(ledger_t ledger, int entry, int action)
        return (KERN_SUCCESS);
 }
 
-void
-set_astledger(thread_t thread)
-{
-       spl_t s = splsched();
-
-       if (thread == current_thread()) {
-               thread_ast_set(thread, AST_LEDGER);
-               ast_propagate(thread->ast);
-       } else {
-               processor_t p;
-
-               thread_lock(thread);
-               thread_ast_set(thread, AST_LEDGER);
-               p = thread->last_processor;
-               if ((p != PROCESSOR_NULL) && (p->state == PROCESSOR_RUNNING) &&
-                  (p->active_thread == thread))
-                       cause_ast_check(p);
-               thread_unlock(thread);
-       }
-       
-       splx(s);
-}
-
 kern_return_t
 ledger_debit(ledger_t ledger, int entry, ledger_amount_t amount)
 {
index d29f07d275ac84ed150483261d04e67025797b7c..b7f1eb0d3fa7a1796b3998d0df4263af58eb69c8 100644 (file)
@@ -120,6 +120,7 @@ extern kern_return_t ledger_set_period(ledger_t ledger, int entry,
     uint64_t period);
 extern kern_return_t ledger_disable_refill(ledger_t l, int entry);
 extern kern_return_t ledger_entry_setactive(ledger_t ledger, int entry);
+extern void ledger_check_new_balance(ledger_t ledger, int entry);
 extern kern_return_t ledger_credit(ledger_t ledger, int entry,
        ledger_amount_t amount);
 extern kern_return_t ledger_debit(ledger_t ledger, int entry,
@@ -135,7 +136,6 @@ extern kern_return_t ledger_disable_panic_on_negative(ledger_t ledger, int entry
 extern kern_return_t ledger_rollup(ledger_t to_ledger, ledger_t from_ledger);
 
 extern void ledger_ast(thread_t thread);
-extern void set_astledger(thread_t thread);
 
 extern int ledger_reference_count(ledger_t ledger);
 extern kern_return_t ledger_reference(ledger_t ledger);
index 87189b89f693d89f46a5dfec3fc1936df9a271f7..4a498b21447ae40d82b456bd834039e4f79b4afe 100644 (file)
@@ -87,7 +87,6 @@
 #define        LCK_MTX_LCK_WAIT_CODE           2
 #define        LCK_MTX_UNLCK_WAKEUP_CODE       3
 
-
 static queue_head_t    lck_grp_queue;
 static unsigned int    lck_grp_cnt;
 
@@ -196,7 +195,7 @@ lck_grp_attr_free(
 
 
 /*
- * Routine:    lck_grp_alloc_init
+ * Routine: lck_grp_alloc_init
  */
 
 lck_grp_t *
@@ -212,27 +211,23 @@ lck_grp_alloc_init(
        return(grp);
 }
 
-
 /*
- * Routine:    lck_grp_init
+ * Routine: lck_grp_init
  */
 
 void
-lck_grp_init(
-       lck_grp_t               *grp,               
-       const char*             grp_name,           
-       lck_grp_attr_t  *attr)             
+lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
 {
        bzero((void *)grp, sizeof(lck_grp_t));
 
-       (void) strncpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
+       (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
 
        if (attr != LCK_GRP_ATTR_NULL)
                grp->lck_grp_attr = attr->grp_attr_val;
        else if (LcksOpts & enaLkStat)
-                grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
-        else
-                grp->lck_grp_attr = LCK_ATTR_NONE;
+               grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
+       else
+               grp->lck_grp_attr = LCK_ATTR_NONE;
 
        grp->lck_grp_refcnt = 1;
 
@@ -240,10 +235,8 @@ lck_grp_init(
        enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
        lck_grp_cnt++;
        lck_mtx_unlock(&lck_grp_lock);
-
 }
 
-
 /*
  * Routine:    lck_grp_free
  */
@@ -484,6 +477,39 @@ lck_spin_sleep_deadline(
 }
 
 
+/*
+ * Routine:    lck_mtx_clear_promoted
+ *
+ * Handle clearing of TH_SFLAG_PROMOTED,
+ * adjusting thread priority as needed.
+ *
+ * Called with thread lock held
+ */
+static void
+lck_mtx_clear_promoted (
+       thread_t                        thread,
+       __kdebug_only uintptr_t         trace_lck)
+{
+       thread->sched_flags &= ~TH_SFLAG_PROMOTED;
+
+       if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
+               /* Thread still has a RW lock promotion */
+       } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+               KERNEL_DEBUG_CONSTANT(
+                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
+                               thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
+               set_sched_pri(thread, DEPRESSPRI);
+       } else {
+               if (thread->base_pri < thread->sched_pri) {
+                       KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
+                                       thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
+               }
+               thread_recompute_sched_pri(thread, FALSE);
+       }
+}
+
+
 /*
  * Routine:    lck_mtx_sleep
  */
@@ -498,7 +524,7 @@ lck_mtx_sleep(
        thread_t                thread = current_thread();
  
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
-                    (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0);
+                    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
 
        if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
                panic("Invalid lock sleep action %x\n", lck_sleep_action);
@@ -556,7 +582,7 @@ lck_mtx_sleep_deadline(
        thread_t                thread = current_thread();
 
        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
-                    (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0);
+                    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
 
        if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
                panic("Invalid lock sleep action %x\n", lck_sleep_action);
@@ -610,6 +636,8 @@ lck_mtx_lock_wait (
 {
        thread_t                self = current_thread();
        lck_mtx_t               *mutex;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
+       __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
        integer_t               priority;
        spl_t                   s = splsched();
 #if    CONFIG_DTRACE
@@ -625,11 +653,11 @@ lck_mtx_lock_wait (
        else
                mutex = &lck->lck_mtx_ptr->lck_mtx;
 
-       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);
+       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);
 
        priority = self->sched_pri;
-       if (priority < self->priority)
-               priority = self->priority;
+       if (priority < self->base_pri)
+               priority = self->base_pri;
        if (priority < BASEPRI_DEFAULT)
                priority = BASEPRI_DEFAULT;
 
@@ -640,11 +668,10 @@ lck_mtx_lock_wait (
        if (mutex->lck_mtx_pri == 0)
                holder->promotions++;
        holder->sched_flags |= TH_SFLAG_PROMOTED;
-       if (            mutex->lck_mtx_pri < priority   &&
-                               holder->sched_pri < priority            ) {
+       if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
                KERNEL_DEBUG_CONSTANT(
                        MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
-                                       holder->sched_pri, priority, holder, lck, 0);
+                                       holder->sched_pri, priority, trace_holder, trace_lck, 0);
                set_sched_pri(holder, priority);
        }
        thread_unlock(holder);
@@ -662,7 +689,7 @@ lck_mtx_lock_wait (
                mutex->lck_mtx_waiters++;
        }
 
-       assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
+       assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
        lck_mtx_ilk_unlock(mutex);
 
        thread_block(THREAD_CONTINUE_NULL);
@@ -701,6 +728,9 @@ lck_mtx_lock_acquire(
 {
        thread_t                thread = current_thread();
        lck_mtx_t               *mutex;
+       integer_t               priority;
+       spl_t                   s;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
 
        if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
                mutex = lck;
@@ -714,26 +744,38 @@ lck_mtx_lock_acquire(
                mutex->lck_mtx_waiters--;
        }
 
-       if (mutex->lck_mtx_waiters > 0) {
-               integer_t               priority = mutex->lck_mtx_pri;
-               spl_t                   s = splsched();
+       if (mutex->lck_mtx_waiters)
+               priority = mutex->lck_mtx_pri;
+       else {
+               mutex->lck_mtx_pri = 0;
+               priority = 0;
+       }
 
+       if (priority || thread->was_promoted_on_wakeup) {
+               s = splsched();
                thread_lock(thread);
-               thread->promotions++;
-               thread->sched_flags |= TH_SFLAG_PROMOTED;
-               if (thread->sched_pri < priority) {
-                       KERNEL_DEBUG_CONSTANT(
-                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
-                                               thread->sched_pri, priority, 0, lck, 0);
-                       /* Do not promote past promotion ceiling */
-                       assert(priority <= MAXPRI_PROMOTE);
-                       set_sched_pri(thread, priority);
+
+               if (priority) {
+                       thread->promotions++;
+                       thread->sched_flags |= TH_SFLAG_PROMOTED;
+                       if (thread->sched_pri < priority) {
+                               KERNEL_DEBUG_CONSTANT(
+                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
+                                                       thread->sched_pri, priority, 0, trace_lck, 0);
+                               /* Do not promote past promotion ceiling */
+                               assert(priority <= MAXPRI_PROMOTE);
+                               set_sched_pri(thread, priority);
+                       }
                }
+               if (thread->was_promoted_on_wakeup) {
+                       thread->was_promoted_on_wakeup = 0;
+                       if (thread->promotions == 0)
+                               lck_mtx_clear_promoted(thread, trace_lck);
+               }
+
                thread_unlock(thread);
                splx(s);
        }
-       else
-               mutex->lck_mtx_pri = 0;
 
 #if CONFIG_DTRACE
        if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
@@ -761,6 +803,7 @@ lck_mtx_unlock_wakeup (
 {
        thread_t                thread = current_thread();
        lck_mtx_t               *mutex;
+       __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
 
        if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
                mutex = lck;
@@ -770,40 +813,20 @@ lck_mtx_unlock_wakeup (
        if (thread != holder)
                panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
 
-       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);
+       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);
 
        assert(mutex->lck_mtx_waiters > 0);
-       thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
+       if (mutex->lck_mtx_waiters > 1)
+               thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
+       else
+               thread_wakeup_one(LCK_MTX_EVENT(lck));
 
        if (thread->promotions > 0) {
                spl_t           s = splsched();
 
                thread_lock(thread);
-               if (    --thread->promotions == 0                               &&
-                               (thread->sched_flags & TH_SFLAG_PROMOTED)               ) {
-                       thread->sched_flags &= ~TH_SFLAG_PROMOTED;
-
-                       if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
-                               /* Thread still has a RW lock promotion */
-                       } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
-                               KERNEL_DEBUG_CONSTANT(
-                                       MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
-                                                 thread->sched_pri, DEPRESSPRI, 0, lck, 0);
-
-                               set_sched_pri(thread, DEPRESSPRI);
-                       }
-                       else {
-                               if (thread->priority < thread->sched_pri) {
-                                       KERNEL_DEBUG_CONSTANT(
-                                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) |
-                                                                                                                       DBG_FUNC_NONE,
-                                                       thread->sched_pri, thread->priority,
-                                                                       0, lck, 0);
-                               }
-
-                               SCHED(compute_priority)(thread, FALSE);
-                       }
-               }
+               if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
+                       lck_mtx_clear_promoted(thread, trace_lck);
                thread_unlock(thread);
                splx(s);
        }
@@ -816,9 +839,9 @@ lck_mtx_unlockspin_wakeup (
        lck_mtx_t                       *lck)
 {
        assert(lck->lck_mtx_waiters > 0);
-       thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
+       thread_wakeup_one(LCK_MTX_EVENT(lck));
 
-       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, (int)lck, 0, 0, 1, 0);
+       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
 #if CONFIG_DTRACE
        /*
         * When there are waiters, we skip the hot-patch spot in the
@@ -1085,9 +1108,9 @@ void lck_rw_clear_promotion(thread_t thread)
                        set_sched_pri(thread, DEPRESSPRI);
                } else {
                        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
-                                                                 thread->sched_pri, thread->priority, 0, 0, 0);
+                                                                 thread->sched_pri, thread->base_pri, 0, 0, 0);
                        
-                       SCHED(compute_priority)(thread, FALSE);
+                       thread_recompute_sched_pri(thread, FALSE);
                }
        }
 
@@ -1118,7 +1141,7 @@ host_lockgroup_info(
 
        lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                                &lockgroup_info_addr, lockgroup_info_size);
+                                                &lockgroup_info_addr, lockgroup_info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                lck_mtx_unlock(&lck_grp_lock);
                return(kr);
index 425c8dc7e2b1b767a7424b234aa3d7c71f7e2664..c8768fe6d707818af86574c60f51f68212626d53 100644 (file)
@@ -267,7 +267,7 @@ extern wait_result_t        lck_spin_sleep_deadline(
 extern boolean_t               lck_spin_try_lock(                      lck_spin_t              *lck);
 
 /* NOT SAFE: To be used only by kernel debugger to avoid deadlock. */
-extern boolean_t               lck_spin_is_acquired(                   lck_spin_t              *lck);
+extern boolean_t               kdp_lck_spin_is_acquired(               lck_spin_t              *lck);
 
 struct _lck_mtx_ext_;
 extern void lck_mtx_init_ext(lck_mtx_t *lck, struct _lck_mtx_ext_ *lck_ext,
@@ -339,6 +339,8 @@ extern void                 lck_mtx_lock_spin(
 extern void                    lck_mtx_convert_spin(
                                                                        lck_mtx_t               *lck);
 
+extern boolean_t               kdp_lck_mtx_lock_spin_is_acquired(
+                                                                       lck_mtx_t               *lck);
 #define lck_mtx_unlock_always(l)       lck_mtx_unlock(l)
 
 #else
@@ -346,10 +348,14 @@ extern void                       lck_mtx_convert_spin(
 #define        lck_mtx_lock_spin(l)            lck_mtx_lock(l)
 #define lck_mtx_try_lock_spin_always(l)        lck_spin_try_lock(l)
 #define lck_mtx_lock_spin_always(l)    lck_spin_lock(l)
+#define kdp_lck_mtx_lock_spin_is_acquired(l) kdp_lck_spin_is_acquired(l)
 #define lck_mtx_unlock_always(l)       lck_spin_unlock(l)
 #define        lck_mtx_convert_spin(l)         do {} while (0)
 #endif
 
+extern boolean_t               kdp_lck_rw_lock_is_acquired_exclusive(
+                                                                       lck_rw_t                *lck);
+
 #endif /* KERNEL_PRIVATE */
 
 extern void                            lck_mtx_assert(
@@ -378,6 +384,9 @@ extern void                         lck_mtx_unlockspin_wakeup(
 extern boolean_t               lck_mtx_ilk_unlock(
                                                                        lck_mtx_t               *lck);
 
+extern boolean_t               lck_mtx_ilk_try_lock(
+                                                                       lck_mtx_t               *lck);
+
 #endif
 
 #define decl_lck_rw_data(class,name)     class lck_rw_t name;
index fcdd78f208d6d3418e3b094a2f351c7f54a98715..18c48dd2b33c22402ecc752e813fe4b00010c4e9 100644 (file)
@@ -92,9 +92,18 @@ extern void machine_idle(void);
 
 extern void machine_track_platform_idle(boolean_t);
 
+/* Signals a processor to bring it out of idle */
 extern void machine_signal_idle(
                                        processor_t         processor);
 
+/* Signals a processor to bring it out of idle unless canceled */
+extern void machine_signal_idle_deferred(
+                                       processor_t         processor);
+
+/* Cancels an outstanding machine_signal_idle_deferred, if this is supported */
+extern void machine_signal_idle_cancel(
+                                       processor_t         processor);
+
 extern void halt_cpu(void);
 
 extern void halt_all_cpus(
@@ -114,4 +123,28 @@ extern void machine_callstack(
 
 extern void consider_machine_collect(void);
 
+/*
+ * Machine-dependent routine to inform platform layer and external
+ * CPU power management about context switches
+ */
+
+extern void    machine_thread_going_on_core(thread_t   new_thread,
+                                       int             urgency,
+                                       uint64_t        sched_latency);
+
+extern void machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating);
+
+extern void machine_max_runnable_latency(uint64_t bg_max_latency,
+                                                                                uint64_t default_max_latency,
+                                                                                uint64_t realtime_max_latency);
+
+extern void machine_work_interval_notify(thread_t thread,
+                                                                                uint64_t work_id,
+                                                                                uint64_t start_abstime,
+                                                                                uint64_t finish_abstime,
+                                                                                uint64_t deadline_abstime,
+                                                                                uint64_t next_start_abstime,
+                                                                                uint16_t urgency,
+                                                                                uint32_t flags);
+
 #endif /* _KERN_MACHINE_H_ */
index fa9a5725160106c8a38d5690570ef64ac7e4edbf..010661a22205ae579cacbbdeb37c940eb0d048c4 100644 (file)
@@ -152,13 +152,22 @@ _doprnt(
        va_list                 *argp,
        void                    (*putc)(char),
        int                     radix);
+
+void
+_doprnt_log(
+       register const char     *fmt,
+       va_list                 *argp,
+       void                    (*putc)(char),
+       int                     radix);
+
 int
 __doprnt(
        register const char     *fmt,
        va_list                 argp,
        void                    (*putc)(int, void *),
        void                    *arg,
-       int                     radix);
+       int                     radix,
+       int                     is_log);
 
 extern void safe_gets(
        char    *str,
index ebad3cddfe0568b245c0684aded300f8681867ad..3860f9b39b64681ffc9d0b35298095ac12974153 100644 (file)
@@ -53,6 +53,7 @@ struct pager_crypt_info {
         void    (*crypt_end)(void *crypt_ops);
         /* Private data for the crypter */
         void    *crypt_ops;
+       volatile int    crypt_refcnt;
 };
 typedef struct pager_crypt_info pager_crypt_info_t;
 
index 2ce720c57a1954ac89a30d0f3a16127369a3191a..82ad32bb90acdcca8ea57016053e627bdcab0689 100644 (file)
@@ -214,6 +214,12 @@ printnum(
 
 boolean_t      _doprnt_truncates = FALSE;
 
+#if (DEVELOPMENT || DEBUG) 
+boolean_t      doprnt_hide_pointers = FALSE;
+#else
+boolean_t      doprnt_hide_pointers = TRUE;
+#endif
+
 int
 __doprnt(
        const char      *fmt,
@@ -221,7 +227,8 @@ __doprnt(
                                                /* character output routine */
        void                    (*putc)(int, void *arg),
        void                    *arg,
-       int                     radix)          /* default radix - for '%r' */
+       int                     radix,          /* default radix - for '%r' */
+       int                     is_log)
 {
        int             length;
        int             prec;
@@ -566,6 +573,21 @@ __doprnt(
 
                    if (truncate) u = (long long)((int)(u));
 
+                   if (doprnt_hide_pointers && is_log) {
+                       const char str[] = "<ptr>";
+                       const char* strp = str;
+                       int strl = sizeof(str) - 1;
+
+                       if (u >= VM_MIN_KERNEL_AND_KEXT_ADDRESS && u <= VM_MAX_KERNEL_ADDRESS) {
+                           while(*strp != '\0') {
+                               (*putc)(*strp, arg);
+                               strp++;
+                           }
+                           nprinted += strl;
+                           break;
+                       }
+                   }
+
                    if (u != 0 && altfmt) {
                        if (base == 8)
                            prefix = "0";
@@ -653,7 +675,18 @@ _doprnt(
        void                    (*putc)(char),
        int                     radix)          /* default radix - for '%r' */
 {
-    __doprnt(fmt, *argp, dummy_putc, putc, radix);
+    __doprnt(fmt, *argp, dummy_putc, putc, radix, FALSE);
+}
+
+void 
+_doprnt_log(
+       register const char     *fmt,
+       va_list                 *argp,
+                                               /* character output routine */
+       void                    (*putc)(char),
+       int                     radix)          /* default radix - for '%r' */
+{
+    __doprnt(fmt, *argp, dummy_putc, putc, radix, TRUE);
 }
 
 #if    MP_PRINTF 
@@ -770,7 +803,7 @@ printf(const char *fmt, ...)
        if (fmt) {
                disable_preemption();
                va_start(listp, fmt);
-               _doprnt(fmt, &listp, conslog_putc, 16);
+               _doprnt_log(fmt, &listp, conslog_putc, 16);
                va_end(listp);
                enable_preemption();
        }
@@ -815,7 +848,7 @@ kdb_printf(const char *fmt, ...)
        va_list listp;
 
        va_start(listp, fmt);
-       _doprnt(fmt, &listp, consdebug_putc, 16);
+       _doprnt_log(fmt, &listp, consdebug_putc, 16);
        va_end(listp);
        return 0;
 }
@@ -869,7 +902,7 @@ sprintf(char *buf, const char *fmt, ...)
 
         va_start(listp, fmt);
         copybyte_str = buf;
-        __doprnt(fmt, listp, copybyte, &copybyte_str, 16);
+        __doprnt(fmt, listp, copybyte, &copybyte_str, 16, FALSE);
         va_end(listp);
        *copybyte_str = '\0';
         return (int)strlen(buf);
index 0b64ac8aa13de300d24cc8ae9f2f225732f21516..ffc92cb7ba80952332cfe6da7722b2b7cd19e012 100644 (file)
@@ -75,6 +75,7 @@
 #include <kern/processor.h>
 #include <kern/ledger.h>
 #include <machine/machparam.h>
+#include <kern/machine.h>
 
 #ifdef CONFIG_MACH_APPROXIMATE_TIME
 #include <machine/commpage.h>  /* for commpage_update_mach_approximate_time */
@@ -97,8 +98,11 @@ thread_quantum_expire(
        thread_t                        thread = p1;
        ast_t                           preempt;
        uint64_t                        ctime;
+       int                                     urgency;
+       uint64_t                        ignore1, ignore2;
 
        assert(processor == current_processor());
+       assert(thread == current_thread());
 
        SCHED_STATS_QUANTUM_TIMER_EXPIRATION(processor);
 
@@ -121,17 +125,19 @@ thread_quantum_expire(
 
        ctime = mach_absolute_time();
 
+#ifdef CONFIG_MACH_APPROXIMATE_TIME
+       commpage_update_mach_approximate_time(ctime);
+#endif
+
        thread_lock(thread);
 
        /*
         * We've run up until our quantum expiration, and will (potentially)
         * continue without re-entering the scheduler, so update this now.
         */
+       processor->last_dispatch = ctime;
        thread->last_run_time = ctime;
 
-#ifdef CONFIG_MACH_APPROXIMATE_TIME
-       commpage_update_mach_approximate_time(ctime);
-#endif
        /*
         *      Check for fail-safe trip.
         */
@@ -160,16 +166,20 @@ thread_quantum_expire(
        else
                SCHED(lightweight_update_priority)(thread);
 
-       SCHED(quantum_expire)(thread);
-       
+       if (thread->sched_mode != TH_MODE_REALTIME)
+               SCHED(quantum_expire)(thread);
+
        processor->current_pri = thread->sched_pri;
        processor->current_thmode = thread->sched_mode;
 
+       /* Tell platform layer that we are still running this thread */
+       urgency = thread_get_urgency(thread, &ignore1, &ignore2);
+       machine_thread_going_on_core(thread, urgency, 0);
+
        /*
         *      This quantum is up, give this thread another.
         */
-       if (first_timeslice(processor))
-               processor->timeslice--;
+       processor->first_timeslice = FALSE;
 
        thread_quantum_init(thread);
 
@@ -190,8 +200,6 @@ thread_quantum_expire(
        }
 
        processor->quantum_end = ctime + thread->quantum_remaining;
-       timer_call_enter1(&processor->quantum_timer, thread,
-           processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
 
        /*
         *      Context switch check.
@@ -201,9 +209,13 @@ thread_quantum_expire(
 
        thread_unlock(thread);
 
+       timer_call_enter1(&processor->quantum_timer, thread,
+           processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
+
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
-       sched_traditional_consider_maintenance(ctime);
-#endif /* CONFIG_SCHED_TIMESHARE_CORE */       
+       sched_timeshare_consider_maintenance(ctime);
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
 }
 
 /*
@@ -212,35 +224,82 @@ thread_quantum_expire(
  *     Set the base priority of the thread
  *     and reset its scheduled priority.
  *
+ *     This is the only path to change base_pri.
+ *
  *     Called with the thread locked.
  */
 void
 sched_set_thread_base_priority(thread_t thread, int priority)
 {
-       thread->priority = priority;
-       SCHED(compute_priority)(thread, FALSE);
+       thread->base_pri = priority;
+
+       thread_recompute_sched_pri(thread, FALSE);
 }
 
+/*
+ *     thread_recompute_sched_pri:
+ *
+ *     Reset the scheduled priority of the thread
+ *     according to its base priority if the
+ *     thread has not been promoted or depressed.
+ *
+ *     This is the standard way to push base_pri changes into sched_pri,
+ *     or to recalculate the appropriate sched_pri after clearing
+ *     a promotion or depression.
+ *
+ *     Called at splsched with the thread locked.
+ */
+void
+thread_recompute_sched_pri(
+                           thread_t thread,
+                           boolean_t override_depress)
+{
+       int priority;
+
+       if (thread->sched_mode == TH_MODE_TIMESHARE)
+               priority = SCHED(compute_timeshare_priority)(thread);
+       else
+               priority = thread->base_pri;
 
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+       if ((!(thread->sched_flags & TH_SFLAG_PROMOTED_MASK)  || (priority > thread->sched_pri)) &&
+           (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || override_depress)) {
+               set_sched_pri(thread, priority);
+       }
+}
 
 void
-sched_traditional_quantum_expire(thread_t      thread __unused)
+sched_default_quantum_expire(thread_t thread __unused)
 {
-       /*
-        * No special behavior when a timeshare, fixed, or realtime thread
-        * uses up its entire quantum
-        */
+      /*
+       * No special behavior when a timeshare, fixed, or realtime thread
+       * uses up its entire quantum
+       */
 }
 
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+
+/*
+ *     lightweight_update_priority:
+ *
+ *     Update the scheduled priority for
+ *     a timesharing thread.
+ *
+ *     Only for use on the current thread.
+ *
+ *     Called with the thread locked.
+ */
 void
 lightweight_update_priority(thread_t thread)
 {
+       assert(thread->runq == PROCESSOR_NULL);
+       assert(thread == current_thread());
+
        if (thread->sched_mode == TH_MODE_TIMESHARE) {
-               register uint32_t       delta;
-               
+               int priority;
+               uint32_t delta;
+
                thread_timer_delta(thread, delta);
-               
+
                /*
                 *      Accumulate timesharing usage only
                 *      during contention for processor
@@ -248,18 +307,29 @@ lightweight_update_priority(thread_t thread)
                 */
                if (thread->pri_shift < INT8_MAX)
                        thread->sched_usage += delta;
-               
+
                thread->cpu_delta += delta;
-               
+
+               priority = sched_compute_timeshare_priority(thread);
+
                /*
-                * Adjust the scheduled priority if
-                * the thread has not been promoted
-                * and is not depressed.
+                * Adjust the scheduled priority like thread_recompute_sched_pri,
+                * except with the benefit of knowing the thread is on this core.
                 */
-               if (    !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) &&
-                       !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)                )
-                       compute_my_priority(thread);
-       }       
+               if ((!(thread->sched_flags & TH_SFLAG_PROMOTED_MASK)  || (priority > thread->sched_pri)) &&
+                   (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)) &&
+                   priority != thread->sched_pri) {
+
+                       thread->sched_pri = priority;
+
+                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
+                                             (uintptr_t)thread_tid(thread),
+                                             thread->base_pri,
+                                             thread->sched_pri,
+                                             0, /* eventually, 'reason' */
+                                             0);
+               }
+       }
 }
 
 /*
@@ -283,83 +353,27 @@ static struct shift_data  sched_decay_shifts[SCHED_DECAY_TICKS] = {
 };
 
 /*
- *     do_priority_computation:
+ *     sched_compute_timeshare_priority:
  *
  *     Calculate the timesharing priority based upon usage and load.
  */
 extern int sched_pri_decay_band_limit;
 
 
-static int do_priority_computation(thread_t th) {                                                                                                                      
-       register int priority = th->priority            /* start with base priority */          
-           - (th->sched_usage >> th->pri_shift);                               
-       if (priority < MINPRI_USER)                                                                                     
-               priority = MINPRI_USER;                                                                                 
-       else                                                                                                                            
-       if (priority > MAXPRI_KERNEL)                                                                                   
-               priority = MAXPRI_KERNEL;       
-
-       return priority;                                                                                
-}
-
-
-/*
- *     compute_priority:
- *
- *     Reset the scheduled priority of the thread
- *     according to its base priority if the
- *     thread has not been promoted or depressed.
- *
- *     Called with the thread locked.
- */
-void
-compute_priority(
-       register thread_t       thread,
-       boolean_t                       override_depress)
+int
+sched_compute_timeshare_priority(thread_t thread)
 {
-       register int            priority;
+       /* start with base priority */
+       int priority = thread->base_pri - (thread->sched_usage >> thread->pri_shift);
 
-       if (thread->sched_mode == TH_MODE_TIMESHARE)
-               priority = do_priority_computation(thread);
-       else
-               priority = thread->priority;
+       if (priority < MINPRI_USER)
+               priority = MINPRI_USER;
+       else if (priority > MAXPRI_KERNEL)
+               priority = MAXPRI_KERNEL;
 
-       if ((!(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) || (priority > thread->sched_pri)) &&
-               (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || override_depress)) {
-               set_sched_pri(thread, priority);
-       }
+       return priority;
 }
 
-/*
- *     compute_my_priority:
- *
- *     Reset the scheduled priority for
- *     a timesharing thread.
- *
- *     Only for use on the current thread
- *     if timesharing and not depressed.
- *
- *     Called with the thread locked.
- */
-void
-compute_my_priority(
-       register thread_t       thread)
-{
-       register int            priority;
-
-       priority = do_priority_computation(thread);
-       assert(thread->runq == PROCESSOR_NULL);
-
-       if (priority != thread->sched_pri) {
-               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_DECAY_PRIORITY)|DBG_FUNC_NONE,
-                            (uintptr_t)thread_tid(thread),
-                            thread->priority,
-                            thread->sched_pri,
-                            priority,
-                            0);
-       }
-       thread->sched_pri = priority;
-}
 
 /*
  *     can_update_priority
@@ -455,42 +469,36 @@ update_priority(
                sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
        }
 
-
        /*
         *      Recompute scheduled priority if appropriate.
         */
-       if (    (thread->sched_mode == TH_MODE_TIMESHARE)       &&
-                       !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) &&
-                       !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)                ) {
-               register int            new_pri;
-
-               new_pri = do_priority_computation(thread);
-               if (new_pri != thread->sched_pri) {
-                       boolean_t               removed = thread_run_queue_remove(thread);
-
-#if 0
-                       if (sched_use_combined_fgbg_decay && ((thread)->task->max_priority > MAXPRI_THROTTLE) && (new_pri == MAXPRI_THROTTLE)) {
-                               /* with the alternate (new) algorithm, would we have decayed this far? */
-                               int alt_pri = thread->priority - (thread->sched_usage >> sched_pri_shift);
-                               if ((alt_pri > new_pri) && (sched_background_count > 0)) {
-                                       printf("thread %p would have decayed to only %d instead of %d\n", thread, alt_pri, new_pri);
-                               }
-                       }
-#endif
+       if (thread->sched_mode == TH_MODE_TIMESHARE) {
+               int priority = sched_compute_timeshare_priority(thread);
+
+               /*
+                * Adjust the scheduled priority like thread_recompute_sched_pri,
+                * except without setting an AST.
+                */
+               if ((!(thread->sched_flags & TH_SFLAG_PROMOTED_MASK)  || (priority > thread->sched_pri)) &&
+                   (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)) &&
+                   priority != thread->sched_pri) {
+
+                       boolean_t removed = thread_run_queue_remove(thread);
+
+                       thread->sched_pri = priority;
 
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_DECAY_PRIORITY)|DBG_FUNC_NONE,
-                                                         (uintptr_t)thread_tid(thread),
-                                                         thread->priority,
-                                                         thread->sched_pri,
-                                                         new_pri,
-                                                         0);
-                       thread->sched_pri = new_pri;
+                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
+                                             (uintptr_t)thread_tid(thread),
+                                             thread->base_pri,
+                                             thread->sched_pri,
+                                             0, /* eventually, 'reason' */
+                                             0);
 
                        if (removed)
-                               thread_setrun(thread, SCHED_TAILQ);
+                               thread_run_queue_reinsert(thread, SCHED_TAILQ);
                }
        }
-       
+
        return;
 }
 
@@ -588,6 +596,7 @@ void
 sched_set_thread_mode(thread_t thread, sched_mode_t new_mode)
 {
        assert_thread_sched_count(thread);
+       assert(thread->runq == PROCESSOR_NULL);
 
        sched_mode_t old_mode = thread->sched_mode;
 
@@ -648,17 +657,16 @@ sched_thread_mode_demote(thread_t thread, uint32_t reason)
 
        boolean_t removed = thread_run_queue_remove(thread);
 
-       if (thread->sched_mode == TH_MODE_REALTIME)
-               thread->priority = DEPRESSPRI;
-
        thread->sched_flags |= reason;
 
        thread->saved_mode = thread->sched_mode;
 
        sched_set_thread_mode(thread, TH_MODE_TIMESHARE);
 
+       thread_recompute_priority(thread);
+
        if (removed)
-               thread_setrun(thread, SCHED_TAILQ);
+               thread_run_queue_reinsert(thread, SCHED_TAILQ);
 
        assert_thread_sched_count(thread);
 }
@@ -690,14 +698,10 @@ sched_thread_mode_undemote(thread_t thread, uint32_t reason)
 
        thread->saved_mode = TH_MODE_NONE;
 
-       if (thread->sched_mode == TH_MODE_REALTIME) {
-               thread->priority = BASEPRI_RTQUEUES;
-       }
-
-       SCHED(compute_priority)(thread, FALSE);
+       thread_recompute_priority(thread);
 
        if (removed)
-               thread_setrun(thread, SCHED_TAILQ);
+               thread_run_queue_reinsert(thread, SCHED_TAILQ);
 }
 
 /*
index 355b1b1dcb7bc859c004a21f4abdf9ad4bba417e..2c2dae409cc8443dc75ed087d38466aa8077d2f3 100644 (file)
@@ -78,6 +78,8 @@
 #include <ipc/ipc_port.h>
 #include <kern/kalloc.h>
 
+#include <security/mac_mach_internal.h>
+
 /*
  * Exported interface
  */
@@ -107,13 +109,6 @@ processor_t                master_processor;
 int                    master_cpu = 0;
 boolean_t              sched_stats_active = FALSE;
 
-/* Forwards */
-kern_return_t  processor_set_things(
-               processor_set_t         pset,
-               mach_port_t             **thing_list,
-               mach_msg_type_number_t  *count,
-               int                     type);
-
 void
 processor_bootstrap(void)
 {
@@ -160,10 +155,11 @@ processor_init(
        timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor);
        processor->quantum_end = UINT64_MAX;
        processor->deadline = UINT64_MAX;
-       processor->timeslice = 0;
+       processor->first_timeslice = FALSE;
        processor->processor_primary = processor; /* no SMT relationship known at this point */
        processor->processor_secondary = NULL;
        processor->is_SMT = FALSE;
+       processor->is_recommended = TRUE;
        processor->processor_self = IP_NULL;
        processor_data_init(processor);
        processor->processor_list = NULL;
@@ -229,11 +225,9 @@ processor_set_t
 pset_create(
        pset_node_t                     node)
 {
-#if defined(CONFIG_SCHED_MULTIQ)
-       /* multiq scheduler is not currently compatible with multiple psets */
-       if (sched_groups_enabled)
+       /* some schedulers do not support multiple psets */
+       if (SCHED(multiple_psets_enabled) == FALSE)
                return processor_pset(master_processor);
-#endif /* defined(CONFIG_SCHED_MULTIQ) */
 
        processor_set_t         *prev, pset = kalloc(sizeof (*pset));
 
@@ -274,6 +268,9 @@ pset_init(
        pset->cpu_set_low = pset->cpu_set_hi = 0;
        pset->cpu_set_count = 0;
        pset->pending_AST_cpu_mask = 0;
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+       pset->pending_deferred_AST_cpu_mask = 0;
+#endif
        pset_lock_init(pset);
        pset->pset_self = IP_NULL;
        pset->pset_name_self = IP_NULL;
@@ -511,6 +508,7 @@ processor_start(
                thread->bound_processor = processor;
                processor->next_thread = thread;
                thread->state = TH_RUN;
+               thread->last_made_runnable_time = mach_absolute_time();
                thread_unlock(thread);
                splx(s);
 
@@ -797,9 +795,6 @@ processor_set_policy_disable(
        return (KERN_INVALID_ARGUMENT);
 }
 
-#define THING_TASK     0
-#define THING_THREAD   1
-
 /*
  *     processor_set_things:
  *
@@ -807,167 +802,225 @@ processor_set_policy_disable(
  */
 kern_return_t
 processor_set_things(
-       processor_set_t                 pset,
-       mach_port_t                             **thing_list,
-       mach_msg_type_number_t  *count,
-       int                                             type)
+       processor_set_t pset,
+       void **thing_list,
+       mach_msg_type_number_t *count,
+       int type)
 {
-       unsigned int actual;    /* this many things */
-       unsigned int maxthings;
-       unsigned int i;
+       unsigned int i , j, used;
+       task_t task;
+       thread_t thread;
+
+       task_t *task_list;
+       unsigned int actual_tasks;
+       vm_size_t task_size, task_size_needed;
 
+       thread_t *thread_list;
+       unsigned int actual_threads;
+       vm_size_t thread_size, thread_size_needed;
+
+       void *addr, *newaddr;
        vm_size_t size, size_needed;
-       void  *addr;
 
        if (pset == PROCESSOR_SET_NULL || pset != &pset0)
                return (KERN_INVALID_ARGUMENT);
 
-       size = 0;
-       addr = NULL;
+       task_size = 0;
+       task_size_needed = 0;
+       task_list = NULL;
+       actual_tasks = 0;
+
+       thread_size = 0;
+       thread_size_needed = 0;
+       thread_list = NULL;
+       actual_threads = 0;
 
        for (;;) {
                lck_mtx_lock(&tasks_threads_lock);
 
-               if (type == THING_TASK)
-                       maxthings = tasks_count;
-               else
-                       maxthings = threads_count;
-
                /* do we have the memory we need? */
+               if (type == PSET_THING_THREAD)
+                       thread_size_needed = threads_count * sizeof(void *);
+#if !CONFIG_MACF
+               else
+#endif
+                       task_size_needed = tasks_count * sizeof(void *);
 
-               size_needed = maxthings * sizeof (mach_port_t);
-               if (size_needed <= size)
+               if (task_size_needed <= task_size &&
+                   thread_size_needed <= thread_size)
                        break;
 
                /* unlock and allocate more memory */
                lck_mtx_unlock(&tasks_threads_lock);
 
-               if (size != 0)
-                       kfree(addr, size);
+               /* grow task array */
+               if (task_size_needed > task_size) {
+                       if (task_size != 0)
+                               kfree(task_list, task_size);
 
-               assert(size_needed > 0);
-               size = size_needed;
+                       assert(task_size_needed > 0);
+                       task_size = task_size_needed;
 
-               addr = kalloc(size);
-               if (addr == 0)
-                       return (KERN_RESOURCE_SHORTAGE);
-       }
+                       task_list = (task_t *)kalloc(task_size);
+                       if (task_list == NULL) {
+                               if (thread_size != 0)
+                                       kfree(thread_list, thread_size);
+                               return (KERN_RESOURCE_SHORTAGE);
+                       }
+               }
 
-       /* OK, have memory and the list locked */
+               /* grow thread array */
+               if (thread_size_needed > thread_size) {
+                       if (thread_size != 0)
+                               kfree(thread_list, thread_size);
 
-       actual = 0;
-       switch (type) {
+                       assert(thread_size_needed > 0);
+                       thread_size = thread_size_needed;
+
+                       thread_list = (thread_t *)kalloc(thread_size);
+                       if (thread_list == 0) {
+                               if (task_size != 0)
+                                       kfree(task_list, task_size);
+                               return (KERN_RESOURCE_SHORTAGE);
+                       }
+               }
+       }
 
-       case THING_TASK: {
-               task_t          task, *task_list = (task_t *)addr;
+       /* OK, have memory and the list locked */
 
+       /* If we need it, get the thread list */
+       if (type == PSET_THING_THREAD) {
+               for (thread = (thread_t)queue_first(&threads);
+                    !queue_end(&threads, (queue_entry_t)thread);
+                    thread = (thread_t)queue_next(&thread->threads)) {
+#if defined(SECURE_KERNEL)
+                       if (thread->task != kernel_task) {
+#endif
+                               thread_reference_internal(thread);
+                               thread_list[actual_threads++] = thread;
+#if defined(SECURE_KERNEL)
+                       }
+#endif
+               }
+       }
+#if !CONFIG_MACF
+         else {
+#endif
+               /* get a list of the tasks */
                for (task = (task_t)queue_first(&tasks);
-                                               !queue_end(&tasks, (queue_entry_t)task);
-                                                               task = (task_t)queue_next(&task->tasks)) {
+                    !queue_end(&tasks, (queue_entry_t)task);
+                    task = (task_t)queue_next(&task->tasks)) {
 #if defined(SECURE_KERNEL)
                        if (task != kernel_task) {
 #endif
                                task_reference_internal(task);
-                               task_list[actual++] = task;
+                               task_list[actual_tasks++] = task;
 #if defined(SECURE_KERNEL)
                        }
 #endif
                }
-
-               break;
-       }
-
-       case THING_THREAD: {
-               thread_t        thread, *thread_list = (thread_t *)addr;
-
-               for (thread = (thread_t)queue_first(&threads);
-                                               !queue_end(&threads, (queue_entry_t)thread);
-                                                               thread = (thread_t)queue_next(&thread->threads)) {
-                       thread_reference_internal(thread);
-                       thread_list[actual++] = thread;
-               }
-
-               break;
+#if !CONFIG_MACF
        }
+#endif
 
-       }
-               
        lck_mtx_unlock(&tasks_threads_lock);
 
-       if (actual < maxthings)
-               size_needed = actual * sizeof (mach_port_t);
-
-       if (actual == 0) {
-               /* no things, so return null pointer and deallocate memory */
-               *thing_list = NULL;
-               *count = 0;
-
-               if (size != 0)
-                       kfree(addr, size);
+#if CONFIG_MACF
+       /* for each task, make sure we are allowed to examine it */
+       for (i = used = 0; i < actual_tasks; i++) {
+               if (mac_task_check_expose_task(task_list[i])) {
+                       task_deallocate(task_list[i]);
+                       continue;
+               }
+               task_list[used++] = task_list[i];
        }
-       else {
-               /* if we allocated too much, must copy */
-
-               if (size_needed < size) {
-                       void *newaddr;
-
-                       newaddr = kalloc(size_needed);
-                       if (newaddr == 0) {
-                               switch (type) {
+       actual_tasks = used;
+       task_size_needed = actual_tasks * sizeof(void *);
 
-                               case THING_TASK: {
-                                       task_t          *task_list = (task_t *)addr;
+       if (type == PSET_THING_THREAD) {
 
-                                       for (i = 0; i < actual; i++)
-                                               task_deallocate(task_list[i]);
-                                       break;
-                               }
-
-                               case THING_THREAD: {
-                                       thread_t        *thread_list = (thread_t *)addr;
+               /* for each thread (if any), make sure it's task is in the allowed list */
+               for (i = used = 0; i < actual_threads; i++) {
+                       boolean_t found_task = FALSE;
 
-                                       for (i = 0; i < actual; i++)
-                                               thread_deallocate(thread_list[i]);
+                       task = thread_list[i]->task;
+                       for (j = 0; j < actual_tasks; j++) {
+                               if (task_list[j] == task) {
+                                       found_task = TRUE;
                                        break;
                                }
-
-                               }
-
-                               kfree(addr, size);
-                               return (KERN_RESOURCE_SHORTAGE);
                        }
-
-                       bcopy((void *) addr, (void *) newaddr, size_needed);
-                       kfree(addr, size);
-                       addr = newaddr;
+                       if (found_task)
+                               thread_list[used++] = thread_list[i];
+                       else
+                               thread_deallocate(thread_list[i]);
                }
+               actual_threads = used;
+               thread_size_needed = actual_threads * sizeof(void *);
+
+               /* done with the task list */
+               for (i = 0; i < actual_tasks; i++)
+                       task_deallocate(task_list[i]);
+               kfree(task_list, task_size);
+               task_size = 0;
+               actual_tasks = 0;
+               task_list = NULL;
+       }
+#endif
 
-               *thing_list = (mach_port_t *)addr;
-               *count = actual;
-
-               /* do the conversion that Mig should handle */
-
-               switch (type) {
-
-               case THING_TASK: {
-                       task_t          *task_list = (task_t *)addr;
-
-                       for (i = 0; i < actual; i++)
-                               (*thing_list)[i] = convert_task_to_port(task_list[i]);
-                       break;
+       if (type == PSET_THING_THREAD) {
+               if (actual_threads == 0) {
+                       /* no threads available to return */
+                       assert(task_size == 0);
+                       if (thread_size != 0)
+                               kfree(thread_list, thread_size);
+                       *thing_list = NULL;
+                       *count = 0;
+                       return KERN_SUCCESS;
                }
+               size_needed = actual_threads * sizeof(void *);
+               size = thread_size;
+               addr = thread_list;
+       } else {
+               if (actual_tasks == 0) {
+                       /* no tasks available to return */
+                       assert(thread_size == 0);
+                       if (task_size != 0)
+                               kfree(task_list, task_size);
+                       *thing_list = NULL;
+                       *count = 0;
+                       return KERN_SUCCESS;
+               } 
+               size_needed = actual_tasks * sizeof(void *);
+               size = task_size;
+               addr = task_list;
+       }
 
-               case THING_THREAD: {
-                       thread_t        *thread_list = (thread_t *)addr;
-
-                       for (i = 0; i < actual; i++)
-                               (*thing_list)[i] = convert_thread_to_port(thread_list[i]);
-                       break;
+       /* if we allocated too much, must copy */
+       if (size_needed < size) {
+               newaddr = kalloc(size_needed);
+               if (newaddr == 0) {
+                       for (i = 0; i < actual_tasks; i++) {
+                               if (type == PSET_THING_THREAD)
+                                       thread_deallocate(thread_list[i]);
+                               else
+                                       task_deallocate(task_list[i]);
+                       }
+                       if (size)
+                               kfree(addr, size);
+                       return (KERN_RESOURCE_SHORTAGE);
                }
 
-               }
+               bcopy((void *) addr, (void *) newaddr, size_needed);
+               kfree(addr, size);
+
+               addr = newaddr;
+               size = size_needed;
        }
 
+       *thing_list = (void **)addr;
+       *count = (unsigned int)size / sizeof(void *);
+
        return (KERN_SUCCESS);
 }
 
@@ -983,7 +1036,17 @@ processor_set_tasks(
        task_array_t            *task_list,
        mach_msg_type_number_t  *count)
 {
-    return(processor_set_things(pset, (mach_port_t **)task_list, count, THING_TASK));
+       kern_return_t ret;
+       mach_msg_type_number_t i;
+
+       ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK);
+       if (ret != KERN_SUCCESS)
+               return ret;
+
+       /* do the conversion that Mig should handle */
+       for (i = 0; i < *count; i++)
+               (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
+       return KERN_SUCCESS;
 }
 
 /*
@@ -1007,7 +1070,17 @@ processor_set_threads(
        thread_array_t          *thread_list,
        mach_msg_type_number_t  *count)
 {
-    return(processor_set_things(pset, (mach_port_t **)thread_list, count, THING_THREAD));
+       kern_return_t ret;
+       mach_msg_type_number_t i;
+
+       ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD);
+       if (ret != KERN_SUCCESS)
+               return ret;
+
+       /* do the conversion that Mig should handle */
+       for (i = 0; i < *count; i++)
+               (*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
+       return KERN_SUCCESS;
 }
 #endif
 
index 922e20aa26f13e0aadb2e584fc5a67ccd3cb9e58..dd458613891c0a1e7819a1eda8c104989cdd31bf 100644 (file)
@@ -74,6 +74,7 @@
 #include <mach/mach_types.h>
 #include <kern/ast.h>
 #include <kern/cpu_number.h>
+#include <kern/smp.h>
 #include <kern/simple_lock.h>
 #include <kern/locks.h>
 #include <kern/queue.h>
@@ -81,8 +82,6 @@
 #include <mach/sfi_class.h>
 #include <kern/processor_data.h>
 
-#include <machine/ast_types.h>
-
 struct processor_set {
        queue_head_t            active_queue;   /* active processors */
        queue_head_t            idle_queue;             /* idle processors */
@@ -93,7 +92,9 @@ struct processor_set {
        int                                     cpu_set_low, cpu_set_hi;
        int                                     cpu_set_count;
 
+#if __SMP__
        decl_simple_lock_data(,sched_lock)      /* lock for above */
+#endif
 
 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ)
        struct run_queue        pset_runq;      /* runq for this processor set */
@@ -105,7 +106,21 @@ struct processor_set {
 #endif
 
        /* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */
-       uint32_t                        pending_AST_cpu_mask;
+       uint64_t                        pending_AST_cpu_mask;
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+       /*
+        * A seperate mask, for ASTs that we may be able to cancel.  This is dependent on
+        * some level of support for requesting an AST on a processor, and then quashing
+        * that request later.
+        *
+        * The purpose of this field (and the associated codepaths) is to infer when we
+        * no longer need a processor that is DISPATCHING to come up, and to prevent it
+        * from coming out of IDLE if possible.  This should serve to decrease the number
+        * of spurious ASTs in the system, and let processors spend longer periods in
+        * IDLE.
+        */
+       uint64_t                        pending_deferred_AST_cpu_mask;
+#endif
 
        struct ipc_port *       pset_self;              /* port for operations */
        struct ipc_port *       pset_name_self; /* port for information */
@@ -136,6 +151,7 @@ struct processor {
                                                                                 * MUST remain the first element */
        int                                     state;                  /* See below */
        boolean_t               is_SMT;
+       boolean_t               is_recommended;
        struct thread
                                                *active_thread, /* thread running on processor */
                                                *next_thread,   /* next thread when dispatched */
@@ -153,7 +169,7 @@ struct processor {
        uint64_t                        last_dispatch;  /* time of last dispatch */
 
        uint64_t                        deadline;               /* current deadline */
-       int                                     timeslice;              /* quanta before timeslice ends */
+       boolean_t               first_timeslice;                /* has the quantum expired since context switch */
 
 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ)
        struct run_queue        runq;                   /* runq for this processor */
@@ -211,7 +227,28 @@ extern boolean_t           sched_stats_active;
  *  When a processor is in DISPATCHING or RUNNING state, the current_pri,
  *  current_thmode, and deadline fields should be set, so that other
  *  processors can evaluate if it is an appropriate candidate for preemption.
-*/
+ */
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+/*
+ *           -------------------- SHUTDOWN
+ *          /                     ^     ^
+ *        _/                      |      \
+ *  OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
+ *         \_________________^   ^ ^______/ ^_____ /  /
+ *                                \__________________/
+ *
+ *  A DISPATCHING processor may be put back into IDLE, if another
+ *  processor determines that the target processor will have nothing to do
+ *  upon reaching the RUNNING state.  This is racy, but if the target
+ *  responds and becomes RUNNING, it will not break the processor state
+ *  machine.
+ *
+ *  This change allows us to cancel an outstanding signal/AST on a processor
+ *  (if such an operation is supported through hardware or software), and
+ *  push the processor back into the IDLE state as a power optimization.
+ */
+#endif
+
 #define PROCESSOR_OFF_LINE             0       /* Not available */
 #define PROCESSOR_SHUTDOWN             1       /* Going off-line */
 #define PROCESSOR_START                        2       /* Being started */
@@ -222,11 +259,17 @@ extern boolean_t          sched_stats_active;
 
 extern processor_t     current_processor(void);
 
-/* Lock macros */
+/* Lock macros, always acquired and released with interrupts disabled (splsched()) */
 
+#if __SMP__
 #define pset_lock(p)                   simple_lock(&(p)->sched_lock)
 #define pset_unlock(p)                 simple_unlock(&(p)->sched_lock)
 #define pset_lock_init(p)              simple_lock_init(&(p)->sched_lock, 0)
+#else
+#define pset_lock(p)                   do { (void)p; } while(0)
+#define pset_unlock(p)                 do { (void)p; } while(0)
+#define pset_lock_init(p)              do { (void)p; } while(0)
+#endif
 
 extern void            processor_bootstrap(void);
 
@@ -273,6 +316,15 @@ extern processor_t         machine_choose_processor(
 
 #define next_pset(p)   (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
 
+#define PSET_THING_TASK                0
+#define PSET_THING_THREAD      1
+
+extern kern_return_t   processor_set_things(
+                       processor_set_t pset,
+                       void **thing_list,
+                       mach_msg_type_number_t *count,
+                       int type);
+
 #else  /* MACH_KERNEL_PRIVATE */
 
 __BEGIN_DECLS
index da90d7b40492a45cac0c85b16c0ab242c097bb59..f2a1a8ba3efea646e187b064bb60e67809570eb4 100644 (file)
@@ -81,6 +81,14 @@ struct processor_data {
                ipc_kmsg_t                              entries[IKM_STASH];
                unsigned int                    avail;
        }                                               ikm_cache;
+
+       /* waitq prepost cache */
+#define WQP_CACHE_MAX  50
+       struct wqp_cache {
+               uint64_t                head;
+               unsigned int            avail;
+       } wqp_cache;
+
        int                                             start_color;
        unsigned long                   page_grab_count;
        void                                    *free_pages;
index 338395b17e333e135e1e18466bd550234b3670b5..f45899ce220e4fbeeb2b40c719f49c0c8150373e 100644 (file)
 __BEGIN_DECLS
 
 /*
- *     Queue of abstract objects.  Queue is maintained
- *     within that object.
+ * Queue Management APIs
  *
- *     Supports fast removal from within the queue.
+ * There are currently two subtly different methods of maintining
+ * a queue of objects. Both APIs are contained in this file, and
+ * unfortunately overlap.
+ * (there is also a third way maintained in bsd/sys/queue.h)
  *
- *     How to declare a queue of elements of type "foo_t":
- *             In the "*foo_t" type, you must have a field of
- *             type "queue_chain_t" to hold together this queue.
- *             There may be more than one chain through a
- *             "foo_t", for use by different queues.
+ * Both methods use a common queue head and linkage pattern:
+ *     The head of a queue is declared as:
+ *             queue_head_t q_head;
  *
- *             Declare the queue as a "queue_t" type.
+ *     Elements in this queue are chained together using
+ *     struct queue_entry objects embedded within a structure:
+ *             struct some_data {
+ *                     int field1;
+ *                     int field2;
+ *                     ...
+ *                     queue_chain_t link;
+ *                     ...
+ *                     int last_field;
+ *             };
+ *     struct some_data is referred to as the queue "element."
+ *     (note that queue_chain_t is typedef'd to struct queue_entry)
  *
- *             Elements of the queue (of type "foo_t", that is)
- *             are referred to by reference, and cast to type
- *             "queue_entry_t" within this module.
+ * IMPORTANT: The two queue iteration methods described below are not
+ *            compatible with one another. You must choose one and be careful
+ *            to use only the supported APIs for that method.
+ *
+ * Method 1: chaining of queue_chain_t (linkage chains)
+ *     This method uses the next and prev pointers of the struct queue_entry
+ *     linkage object embedded in a queue element to point to the next or
+ *     previous queue_entry structure in the chain. The head of the queue
+ *     (the queue_head_t object) will point to the first and last
+ *     struct queue_entry object, and both the next and prev pointer will
+ *     point back to the head if the queue is empty.
+ *
+ *     This method is the most flexible method of chaining objects together
+ *     as it allows multiple chains through a given object, by embedding
+ *     multiple queue_chain_t objects in the structure, while simultaneously
+ *     providing fast removal and insertion into the queue using only
+ *     struct queue_entry object pointers.
+ *
+ *     ++ Valid APIs for this style queue ++
+ *     -------------------------------------
+ *             [C] queue_init
+ *             [C] queue_first
+ *             [C] queue_next
+ *             [C] queue_last
+ *             [C] queue_prev
+ *             [C] queue_end
+ *             [C] queue_empty
+ *
+ *             [1] enqueue
+ *             [1] dequeue
+ *             [1] enqueue_head
+ *             [1] enqueue_tail
+ *             [1] dequeue_head
+ *             [1] dequeue_tail
+ *             [1] remqueue
+ *             [1] insque
+ *             [1] remque
+ *             [1] re_queue
+ *             [1] re_queue_tail
+ *             [1] movqueue
+ *             [1] qe_element
+ *             [1] qe_foreach
+ *             [1] qe_foreach_safe
+ *             [1] qe_foreach_element
+ *             [1] qe_foreach_element_safe
+ *
+ * Method 2: chaining of elements (element chains)
+ *     This method uses the next and prev pointers of the struct queue_entry
+ *     linkage object embedded in a queue element to point to the next or
+ *     previous queue element (not another queue_entry). The head of the
+ *     queue will point to the first and last queue element (struct some_data
+ *     from the above example) NOT the embedded queue_entry structure. The
+ *     first queue element will have a prev pointer that points to the
+ *     queue_head_t, and the last queue element will have a next pointer
+ *     that points to the queue_head_t.
+ *
+ *     This method requires knowledge of the queue_head_t of the queue on
+ *     which an element resides in order to remove the element. Iterating
+ *     through the elements of the queue is also more cumbersome because
+ *     a check against the head pointer plus a cast then offset operation
+ *     must be performed at each step of the iteration.
+ *
+ *     ++ Valid APIs for this style queue ++
+ *     -------------------------------------
+ *             [C] queue_init
+ *             [C] queue_first
+ *             [C] queue_next
+ *             [C] queue_last
+ *             [C] queue_prev
+ *             [C] queue_end
+ *             [C] queue_empty
+ *
+ *             [2] queue_enter
+ *             [2] queue_enter_first
+ *             [2] queue_insert_before
+ *             [2] queue_insert_after
+ *             [2] queue_field
+ *             [2] queue_remove
+ *             [2] queue_remove_first
+ *             [2] queue_remove_last
+ *             [2] queue_assign
+ *             [2] queue_new_head
+ *             [2] queue_iterate
+ *
+ * Legend:
+ *     [C] -> API common to both methods
+ *     [1] -> API used only in method 1 (linkage chains)
+ *     [2] -> API used only in method 2 (element chains)
  */
 
 /*
@@ -100,6 +196,7 @@ __BEGIN_DECLS
 struct queue_entry {
        struct queue_entry      *next;          /* next element */
        struct queue_entry      *prev;          /* previous element */
+
 };
 
 typedef struct queue_entry     *queue_t;
@@ -258,6 +355,171 @@ remque(
        __DEQUEUE_ELT_CLEANUP(elt);
 }
 
+/*
+ *     Function:       re_queue_head
+ *     Parameters:
+ *             queue_t que       : queue onto which elt will be pre-pended
+ *             queue_entry_t elt : element to re-queue
+ *     Description:
+ *             Remove elt from its current queue and put it onto the
+ *             head of a new queue
+ *     Note:
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+static __inline__ void
+re_queue_head(queue_t que, queue_entry_t elt)
+{
+       queue_entry_t   n_elt, p_elt;
+
+       __QUEUE_ELT_VALIDATE(elt);
+       __QUEUE_ELT_VALIDATE((queue_entry_t)que);
+
+       /* remqueue */
+       n_elt = elt->next;
+       p_elt = elt->prev; /* next_elt may equal prev_elt (and the queue head) if elt was the only element */
+       n_elt->prev = p_elt;
+       p_elt->next = n_elt;
+
+       /* enqueue_head */
+       n_elt = que->next;
+       elt->next = n_elt;
+       elt->prev = que;
+       n_elt->prev = elt;
+       que->next = elt;
+}
+
+/*
+ *     Function:       re_queue_tail
+ *     Parameters:
+ *             queue_t que       : queue onto which elt will be appended
+ *             queue_entry_t elt : element to re-queue
+ *     Description:
+ *             Remove elt from its current queue and put it onto the
+ *             end of a new queue
+ *     Note:
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+static __inline__ void
+re_queue_tail(queue_t que, queue_entry_t elt)
+{
+       queue_entry_t   n_elt, p_elt;
+
+       __QUEUE_ELT_VALIDATE(elt);
+       __QUEUE_ELT_VALIDATE((queue_entry_t)que);
+
+       /* remqueue */
+       n_elt = elt->next;
+       p_elt = elt->prev; /* next_elt may equal prev_elt (and the queue head) if elt was the only element */
+       n_elt->prev = p_elt;
+       p_elt->next = n_elt;
+
+       /* enqueue_tail */
+       p_elt = que->prev;
+       elt->next = que;
+       elt->prev = p_elt;
+       p_elt->next = elt;
+       que->prev = elt;
+}
+
+/*
+ *     Macro:          qe_element
+ *     Function:
+ *             Convert a queue_entry_t to a queue element pointer.
+ *             Get a pointer to the user-defined element containing
+ *             a given queue_entry_t
+ *     Header:
+ *             <type> * qe_element(queue_entry_t qe, <type>, field)
+ *                     qe      - queue entry to convert
+ *                     <type>  - what's in the queue (e.g., struct some_data)
+ *                     <field> - is the chain field in <type>
+ *     Note:
+ *             Do not use pointer types for <type>
+ */
+#define        qe_element(qe, type, field) \
+       ((type *)((void *)((char *)(qe) - __offsetof(type, field))))
+
+/*
+ *     Macro:          qe_foreach
+ *     Function:
+ *             Iterate over each queue_entry_t structure.
+ *             Generates a 'for' loop, setting 'qe' to
+ *             each queue_entry_t in the queue.
+ *     Header:
+ *             qe_foreach(queue_entry_t qe, queue_t head)
+ *                     qe   - iteration variable
+ *                     head - pointer to queue_head_t (head of queue)
+ *     Note:
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+#define qe_foreach(qe, head) \
+       for (qe = (head)->next; qe != (head); qe = (qe)->next)
+
+/*
+ *     Macro:          qe_foreach_safe
+ *     Function:
+ *             Safely iterate over each queue_entry_t structure.
+ *
+ *             Use this iterator macro if you plan to remove the
+ *             queue_entry_t, qe, from the queue during the
+ *             iteration.
+ *     Header:
+ *             qe_foreach_safe(queue_entry_t qe, queue_t head)
+ *                     qe   - iteration variable
+ *                     head - pointer to queue_head_t (head of queue)
+ *     Note:
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+#define qe_foreach_safe(qe, head) \
+       for (queue_entry_t _ne = ((head)->next)->next, \
+                __ ## qe ## _unused_shadow __unused = (qe = (head)->next); \
+            qe != (head); \
+            qe = _ne, _ne = (qe)->next)
+
+/*
+ *     Macro:          qe_foreach_element
+ *     Function:
+ *             Iterate over each _element_ in a queue
+ *             where each queue_entry_t points to another
+ *             queue_entry_t, i.e., managed by the [de|en]queue_head/
+ *             [de|en]queue_tail / remqueue / etc. function.
+ *     Header:
+ *             qe_foreach_element(<type> *elt, queue_t head, <field>)
+ *                     elt     - iteration variable
+ *                     <type>  - what's in the queue (e.g., struct some_data)
+ *                     <field> - is the chain field in <type>
+ *     Note:
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+#define qe_foreach_element(elt, head, field) \
+       for (elt = qe_element((head)->next, typeof(*(elt)), field); \
+            &((elt)->field) != (head); \
+            elt = qe_element((elt)->field.next, typeof(*(elt)), field))
+
+/*
+ *     Macro:          qe_foreach_element_safe
+ *     Function:
+ *             Safely iterate over each _element_ in a queue
+ *             where each queue_entry_t points to another
+ *             queue_entry_t, i.e., managed by the [de|en]queue_head/
+ *             [de|en]queue_tail / remqueue / etc. function.
+ *
+ *             Use this iterator macro if you plan to remove the
+ *             element, elt, from the queue during the iteration.
+ *     Header:
+ *             qe_foreach_element_safe(<type> *elt, queue_t head, <field>)
+ *                     elt     - iteration variable
+ *                     <type>  - what's in the queue (e.g., struct some_data)
+ *                     <field> - is the chain field in <type>
+ *     Note:
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+#define qe_foreach_element_safe(elt, head, field) \
+       for (typeof(*(elt)) *_nelt = qe_element(((head)->next)->next, typeof(*(elt)), field), \
+            *__ ## elt ## _unused_shadow __unused = \
+                (elt = qe_element((head)->next, typeof(*(elt)), field)); \
+            &((elt)->field) != (head); \
+            elt = _nelt, _nelt = qe_element((elt)->field.next, typeof(*(elt)), field)) \
+
 /*
  *     Macro:          queue_init
  *     Function:
@@ -272,6 +534,28 @@ MACRO_BEGIN                \
        (q)->prev = (q);\
 MACRO_END
 
+/*
+ *     Macro:          queue_head_init
+ *     Function:
+ *             Initialize the given queue head
+ *     Header:
+ *             void queue_head_init(q)
+ *                     queue_head_t    q;      \* MODIFIED *\
+ */
+#define queue_head_init(q) \
+       queue_init(&(q))
+
+/*
+ *     Macro:          queue_chain_init
+ *     Function:
+ *             Initialize the given queue chain element
+ *     Header:
+ *             void queue_chain_init(q)
+ *                     queue_chain_t   q;      \* MODIFIED *\
+ */
+#define queue_chain_init(q) \
+       queue_init(&(q))
+
 /*
  *     Macro:          queue_first
  *     Function:
@@ -334,6 +618,47 @@ MACRO_END
  */
 #define        queue_empty(q)          queue_end((q), queue_first(q))
 
+/*
+ *     Function:       movqueue
+ *     Parameters:
+ *             queue_t _old : head of a queue whose items will be moved
+ *             queue_t _new : new queue head onto which items will be moved
+ *     Description:
+ *             Rebase queue items in _old onto _new then re-initialize
+ *             the _old object to an empty queue.
+ *             Equivalent to the queue_new_head Method 2 macro
+ *     Note:
+ *             Similar to the queue_new_head macro, this macros is intented
+ *             to function as an initializer method for '_new' and thus may
+ *             leak any list items that happen to be on the '_new' list.
+ *             This should only be used with Method 1 queue iteration (linkage chains)
+ */
+static __inline__ void
+movqueue(queue_t _old, queue_t _new)
+{
+       queue_entry_t   next_elt, prev_elt;
+
+       __QUEUE_ELT_VALIDATE((queue_entry_t)_old);
+
+       if (queue_empty(_old)) {
+               queue_init(_new);
+               return;
+       }
+
+       /*
+        * move the queue at _old to _new
+        * and re-initialize _old
+        */
+       next_elt = _old->next;
+       prev_elt = _old->prev;
+
+       _new->next = next_elt;
+       _new->prev = prev_elt;
+       next_elt->prev = _new;
+       prev_elt->next = _new;
+
+       queue_init(_old);
+}
 
 /*----------------------------------------------------------------*/
 /*
@@ -352,6 +677,8 @@ MACRO_END
  *                     <type> elt;
  *                     <type> is what's in our queue
  *                     <field> is the chain field in (*<type>)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define queue_enter(head, elt, type, field)                    \
 MACRO_BEGIN                                                    \
@@ -380,6 +707,8 @@ MACRO_END
  *                     <type> elt;
  *                     <type> is what's in our queue
  *                     <field> is the chain field in (*<type>)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define queue_enter_first(head, elt, type, field)              \
 MACRO_BEGIN                                                    \
@@ -409,6 +738,8 @@ MACRO_END
  *                     <type> cur;
  *                     <type> is what's in our queue
  *                     <field> is the chain field in (*<type>)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define queue_insert_before(head, elt, cur, type, field)               \
 MACRO_BEGIN                                                            \
@@ -451,6 +782,8 @@ MACRO_END
  *                     <type> cur;
  *                     <type> is what's in our queue
  *                     <field> is the chain field in (*<type>)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define queue_insert_after(head, elt, cur, type, field)                        \
 MACRO_BEGIN                                                            \
@@ -487,6 +820,8 @@ MACRO_END
  *     Function:
  *             Find the queue_chain_t (or queue_t) for the
  *             given element (thing) in the given queue (head)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define        queue_field(head, thing, type, field)                   \
                (((head) == (thing)) ? (head) : &((type)(void *)(thing))->field)
@@ -498,6 +833,8 @@ MACRO_END
  *     Header:
  *             void queue_remove(q, qe, type, field)
  *                     arguments as in queue_enter
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define        queue_remove(head, elt, type, field)                    \
 MACRO_BEGIN                                                    \
@@ -528,6 +865,8 @@ MACRO_END
  *     Header:
  *             queue_remove_first(head, entry, type, field)
  *             entry is returned by reference
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define        queue_remove_first(head, entry, type, field)            \
 MACRO_BEGIN                                                    \
@@ -554,6 +893,8 @@ MACRO_END
  *     Header:
  *             queue_remove_last(head, entry, type, field)
  *             entry is returned by reference
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define        queue_remove_last(head, entry, type, field)             \
 MACRO_BEGIN                                                    \
@@ -574,6 +915,8 @@ MACRO_END
 
 /*
  *     Macro:          queue_assign
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define        queue_assign(to, from, type, field)                     \
 MACRO_BEGIN                                                    \
@@ -592,6 +935,8 @@ MACRO_END
  *                     queue_t new;
  *                     <type> is what's in our queue
  *                      <field> is the chain field in (*<type>)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define queue_new_head(old, new, type, field)                  \
 MACRO_BEGIN                                                    \
@@ -618,6 +963,8 @@ MACRO_END
  *                     <type> elt;
  *                     <type> is what's in our queue
  *                     <field> is the chain field in (*<type>)
+ *     Note:
+ *             This should only be used with Method 2 queue iteration (element chains)
  */
 #define queue_iterate(head, elt, type, field)                  \
        for ((elt) = (type)(void *) queue_first(head);          \
index 980891362d80efa735df85951ddc12011ea2d059..1a46180a9d28946b74f7bd4d03d985664abbcbc5 100644 (file)
@@ -170,7 +170,6 @@ typedef enum {
        TH_MODE_REALTIME,                                       /* time constraints supplied */
        TH_MODE_FIXED,                                          /* use fixed priorities, no decay */
        TH_MODE_TIMESHARE,                                      /* use timesharing algorithm */
-       TH_MODE_FAIRSHARE                                       /* use fair-share scheduling */         
 } sched_mode_t;
 
 /*
@@ -204,15 +203,6 @@ struct rt_queue {
        struct runq_stats       runq_stats;
 };
 
-#if defined(CONFIG_SCHED_FAIRSHARE_CORE)
-struct fairshare_queue {
-       int                                     count;                          /* # of threads total */
-       queue_head_t            queue;                          /* all runnable threads demoted to fairshare scheduling */
-       
-       struct runq_stats       runq_stats;
-};
-#endif /* CONFIG_SCHED_FAIRSHARE_CORE */
-
 #if defined(CONFIG_SCHED_GRRR_CORE)
 
 /*
@@ -259,17 +249,14 @@ struct grrr_run_queue {
 
 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
 
-#define first_timeslice(processor)             ((processor)->timeslice > 0)
-
 extern struct rt_queue         rt_runq;
 
 #if defined(CONFIG_SCHED_MULTIQ)
 sched_group_t   sched_group_create(void);
 void            sched_group_destroy(sched_group_t sched_group);
+#endif /* defined(CONFIG_SCHED_MULTIQ) */
 
-extern boolean_t sched_groups_enabled;
 
-#endif /* defined(CONFIG_SCHED_MULTIQ) */
 
 /*
  *     Scheduler routines.
@@ -348,7 +335,7 @@ extern uint32_t             sched_fixed_shift;
 extern int8_t          sched_load_shifts[NRQS];
 extern uint32_t                sched_decay_usage_age_factor;
 extern uint32_t                sched_use_combined_fgbg_decay;
-void sched_traditional_consider_maintenance(uint64_t);
+void sched_timeshare_consider_maintenance(uint64_t);
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
 extern int32_t         sched_poll_yield_shift;
index b3edb3d9cc77acd72e13418bb5f4dda6bb94282d..a23ef953fe8f00de0a40a1c2ffd42e4ca66a9169 100644 (file)
@@ -75,7 +75,7 @@
 uint32_t       avenrun[3] = {0, 0, 0};
 uint32_t       mach_factor[3] = {0, 0, 0};
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
 /*
  * Values are scaled by LOAD_SCALE, defined in processor_info.h
  */
@@ -91,7 +91,7 @@ static uint32_t               fract[3] = {
 #undef base
 #undef frac
 
-#endif /* CONFIG_SCHED_TRADITIONAL */
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
 static unsigned int            sched_nrun;
 
@@ -210,7 +210,7 @@ compute_averages(uint64_t stdelta)
         */
        sched_nrun = nthreads;
        
-#if defined(CONFIG_SCHED_TRADITIONAL)
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
        /*
         *      The conversion factor consists of
@@ -242,7 +242,7 @@ compute_averages(uint64_t stdelta)
                                                (average_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
                }
        }
-#endif /* CONFIG_SCHED_TRADITIONAL */
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
        /*
         *      Compute averages in other components.
index 628ee743e55d913589d6cd8c8e17c7fda112bc28..f7cbccb40e672795ea08535a220e40c2c4d4f3ee 100644 (file)
@@ -53,7 +53,7 @@ static thread_t
 sched_dualq_steal_thread(processor_set_t pset);
 
 static void
-sched_dualq_thread_update_scan(void);
+sched_dualq_thread_update_scan(sched_update_scan_context_t scan_context);
 
 static boolean_t
 sched_dualq_processor_enqueue(processor_t processor, thread_t thread, integer_t options);
@@ -94,18 +94,17 @@ sched_dualq_processor_queue_shutdown(processor_t processor);
 static sched_mode_t
 sched_dualq_initial_thread_sched_mode(task_t parent_task);
 
-static boolean_t
-sched_dualq_should_current_thread_rechoose_processor(processor_t processor);
-
 const struct sched_dispatch_table sched_dualq_dispatch = {
+       .sched_name                                     = "dualq",
        .init                                           = sched_dualq_init,
-       .timebase_init                                  = sched_traditional_timebase_init,
+       .timebase_init                                  = sched_timeshare_timebase_init,
        .processor_init                                 = sched_dualq_processor_init,
        .pset_init                                      = sched_dualq_pset_init,
-       .maintenance_continuation                       = sched_traditional_maintenance_continue,
+       .maintenance_continuation                       = sched_timeshare_maintenance_continue,
        .choose_thread                                  = sched_dualq_choose_thread,
+       .steal_thread_enabled                           = TRUE,
        .steal_thread                                   = sched_dualq_steal_thread,
-       .compute_priority                               = compute_priority,
+       .compute_timeshare_priority                     = sched_compute_timeshare_priority,
        .choose_processor                               = choose_processor,
        .processor_enqueue                              = sched_dualq_processor_enqueue,
        .processor_queue_shutdown                       = sched_dualq_processor_queue_shutdown,
@@ -114,24 +113,19 @@ const struct sched_dispatch_table sched_dualq_dispatch = {
        .priority_is_urgent                             = priority_is_urgent,
        .processor_csw_check                            = sched_dualq_processor_csw_check,
        .processor_queue_has_priority                   = sched_dualq_processor_queue_has_priority,
-       .initial_quantum_size                           = sched_traditional_initial_quantum_size,
+       .initial_quantum_size                           = sched_timeshare_initial_quantum_size,
        .initial_thread_sched_mode                      = sched_dualq_initial_thread_sched_mode,
        .can_update_priority                            = can_update_priority,
        .update_priority                                = update_priority,
        .lightweight_update_priority                    = lightweight_update_priority,
-       .quantum_expire                                 = sched_traditional_quantum_expire,
-       .should_current_thread_rechoose_processor       = sched_dualq_should_current_thread_rechoose_processor,
+       .quantum_expire                                 = sched_default_quantum_expire,
        .processor_runq_count                           = sched_dualq_runq_count,
        .processor_runq_stats_count_sum                 = sched_dualq_runq_stats_count_sum,
-       .fairshare_init                                 = sched_traditional_fairshare_init,
-       .fairshare_runq_count                           = sched_traditional_fairshare_runq_count,
-       .fairshare_runq_stats_count_sum                 = sched_traditional_fairshare_runq_stats_count_sum,
-       .fairshare_enqueue                              = sched_traditional_fairshare_enqueue,
-       .fairshare_dequeue                              = sched_traditional_fairshare_dequeue,
-       .fairshare_queue_remove                         = sched_traditional_fairshare_queue_remove,
        .processor_bound_count                          = sched_dualq_processor_bound_count,
        .thread_update_scan                             = sched_dualq_thread_update_scan,
        .direct_dispatch_to_idle_processors             = FALSE,
+       .multiple_psets_enabled                         = TRUE,
+       .sched_groups_enabled                           = FALSE,
 };
 
 __attribute__((always_inline))
@@ -181,7 +175,7 @@ sched_dualq_pset_init(processor_set_t pset)
 static void
 sched_dualq_init(void)
 {
-       sched_traditional_init();
+       sched_timeshare_init();
 }
 
 static thread_t
@@ -250,7 +244,7 @@ sched_dualq_processor_csw_check(processor_t processor)
 
        pri = MAX(main_runq->highq, bound_runq->highq);
 
-       if (first_timeslice(processor)) {
+       if (processor->first_timeslice) {
                has_higher = (pri > processor->current_pri);
        } else {
                has_higher = (pri >= processor->current_pri);
@@ -263,9 +257,6 @@ sched_dualq_processor_csw_check(processor_t processor)
                if (bound_runq->urgency > 0)
                        return (AST_PREEMPT | AST_URGENT);
                
-               if (processor->active_thread && thread_eager_preemption(processor->active_thread))
-                       return (AST_PREEMPT | AST_URGENT);
-
                return AST_PREEMPT;
        }
 
@@ -285,12 +276,6 @@ sched_dualq_processor_queue_has_priority(processor_t    processor,
                return qpri > priority;
 }
 
-static boolean_t
-sched_dualq_should_current_thread_rechoose_processor(processor_t processor)
-{
-       return (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_primary != processor);
-}
-
 static int
 sched_dualq_runq_count(processor_t processor)
 {
@@ -407,7 +392,7 @@ sched_dualq_steal_thread(processor_set_t pset)
 }
 
 static void
-sched_dualq_thread_update_scan(void)
+sched_dualq_thread_update_scan(sched_update_scan_context_t scan_context)
 {
        boolean_t               restart_needed = FALSE;
        processor_t             processor = processor_list;
@@ -427,7 +412,7 @@ sched_dualq_thread_update_scan(void)
                        s = splsched();
                        pset_lock(pset);
 
-                       restart_needed = runq_scan(dualq_bound_runq(processor));
+                       restart_needed = runq_scan(dualq_bound_runq(processor), scan_context);
 
                        pset_unlock(pset);
                        splx(s);
@@ -456,7 +441,7 @@ sched_dualq_thread_update_scan(void)
                        s = splsched();
                        pset_lock(pset);
 
-                       restart_needed = runq_scan(&pset->pset_runq);
+                       restart_needed = runq_scan(&pset->pset_runq, scan_context);
 
                        pset_unlock(pset);
                        splx(s);
index 5774cc7b195d50dad76a8041341f7e41360a1ce2..8b70499e8f10422b384f164a2ccc5a61387def37 100644 (file)
@@ -52,7 +52,6 @@
 #include <kern/syscall_subr.h>
 #include <kern/task.h>
 #include <kern/thread.h>
-#include <kern/wait_queue.h>
 
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
@@ -127,9 +126,8 @@ sched_grrr_choose_thread(processor_t                processor,
 static thread_t
 sched_grrr_steal_thread(processor_set_t                pset);
 
-static void
-sched_grrr_compute_priority(thread_t   thread,
-                                                        boolean_t                      override_depress);
+static int
+sched_grrr_compute_priority(thread_t thread);
 
 static processor_t
 sched_grrr_choose_processor(   processor_set_t         pset,
@@ -180,12 +178,6 @@ sched_grrr_update_priority(thread_t        thread);
 static void
 sched_grrr_lightweight_update_priority(thread_t        thread);
 
-static void
-sched_grrr_quantum_expire(thread_t     thread);
-
-static boolean_t
-sched_grrr_should_current_thread_rechoose_processor(processor_t                        processor);
-
 static int
 sched_grrr_processor_runq_count(processor_t    processor);
 
@@ -196,17 +188,19 @@ static int
 sched_grrr_processor_bound_count(processor_t   processor);
 
 static void
-sched_grrr_thread_update_scan(void);
+sched_grrr_thread_update_scan(sched_update_scan_context_t scan_context);
 
 const struct sched_dispatch_table sched_grrr_dispatch = {
+       .sched_name                                     = "grrr",
        .init                                           = sched_grrr_init,
        .timebase_init                                  = sched_grrr_timebase_init,
        .processor_init                                 = sched_grrr_processor_init,
        .pset_init                                      = sched_grrr_pset_init,
        .maintenance_continuation                       = sched_grrr_maintenance_continuation,
        .choose_thread                                  = sched_grrr_choose_thread,
+       .steal_thread_enabled                           = FALSE,
        .steal_thread                                   = sched_grrr_steal_thread,
-       .compute_priority                               = sched_grrr_compute_priority,
+       .compute_timeshare_priority                     = sched_grrr_compute_priority,
        .choose_processor                               = sched_grrr_choose_processor,
        .processor_enqueue                              = sched_grrr_processor_enqueue,
        .processor_queue_shutdown                       = sched_grrr_processor_queue_shutdown,
@@ -220,19 +214,14 @@ const struct sched_dispatch_table sched_grrr_dispatch = {
        .can_update_priority                            = sched_grrr_can_update_priority,
        .update_priority                                = sched_grrr_update_priority,
        .lightweight_update_priority                    = sched_grrr_lightweight_update_priority,
-       .quantum_expire                                 = sched_grrr_quantum_expire,
-       .should_current_thread_rechoose_processor       = sched_grrr_should_current_thread_rechoose_processor,
+       .quantum_expire                                 = sched_default_quantum_expire,
        .processor_runq_count                           = sched_grrr_processor_runq_count,
        .processor_runq_stats_count_sum                 = sched_grrr_processor_runq_stats_count_sum,
-       .fairshare_init                                 = sched_grrr_fairshare_init,
-       .fairshare_runq_count                           = sched_grrr_fairshare_runq_count,
-       .fairshare_runq_stats_count_sum                 = sched_grrr_fairshare_runq_stats_count_sum,
-       .fairshare_enqueue                              = sched_grrr_fairshare_enqueue,
-       .fairshare_dequeue                              = sched_grrr_fairshare_dequeue,
-       .fairshare_queue_remove                         = sched_grrr_fairshare_queue_remove,
        .processor_bound_count                          = sched_grrr_processor_bound_count,
        .thread_update_scan                             = sched_grrr_thread_update_scan,
        .direct_dispatch_to_idle_processors             = TRUE,
+       .multiple_psets_enabled                         = TRUE,
+       .sched_groups_enabled                           = FALSE,
 };
 
 extern int     max_unsafe_quanta;
@@ -328,11 +317,10 @@ sched_grrr_steal_thread(processor_set_t           pset)
        
 }
 
-static void
-sched_grrr_compute_priority(thread_t   thread,
-                                                        boolean_t                      override_depress __unused)
+static int
+sched_grrr_compute_priority(thread_t thread)
 {
-       set_sched_pri(thread, thread->priority);
+       return thread->base_pri;
 }
 
 static processor_t
@@ -516,19 +504,6 @@ sched_grrr_lightweight_update_priority(thread_t    thread __unused)
        return;
 }
 
-static void
-sched_grrr_quantum_expire(
-                                                 thread_t      thread __unused)
-{
-}
-
-
-static boolean_t
-sched_grrr_should_current_thread_rechoose_processor(processor_t                        processor __unused)
-{
-       return (TRUE);
-}
-
 static int
 sched_grrr_processor_runq_count(processor_t    processor)
 {
@@ -548,7 +523,7 @@ sched_grrr_processor_bound_count(__unused processor_t       processor)
 }
 
 static void
-sched_grrr_thread_update_scan(void)
+sched_grrr_thread_update_scan(__unused sched_update_scan_context_t scan_context)
 {
 
 }
@@ -883,84 +858,3 @@ grrr_sorted_list_insert_group(grrr_run_queue_t rq,
 }
 
 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
-
-#if defined(CONFIG_SCHED_GRRR)
-
-static struct grrr_run_queue   fs_grrr_runq;
-#define FS_GRRR_RUNQ           ((processor_t)-2)
-decl_simple_lock_data(static,fs_grrr_lock);
-
-void
-sched_grrr_fairshare_init(void)
-{
-       grrr_priority_mapping_init();
-       
-       simple_lock_init(&fs_grrr_lock, 0);
-       grrr_runqueue_init(&fs_grrr_runq);
-}
-
-
-int
-sched_grrr_fairshare_runq_count(void)
-{
-       return fs_grrr_runq.count;
-}
-
-uint64_t
-sched_grrr_fairshare_runq_stats_count_sum(void)
-{
-       return fs_grrr_runq.runq_stats.count_sum;
-}
-
-void
-sched_grrr_fairshare_enqueue(thread_t thread)
-{
-       simple_lock(&fs_grrr_lock);
-       
-       (void)grrr_enqueue(&fs_grrr_runq, thread);
-
-       thread->runq = FS_GRRR_RUNQ;
-
-       simple_unlock(&fs_grrr_lock);   
-}
-
-thread_t       sched_grrr_fairshare_dequeue(void)
-{
-       thread_t thread;
-       
-       simple_lock(&fs_grrr_lock);
-       if (fs_grrr_runq.count > 0) {
-               thread = grrr_select(&fs_grrr_runq);
-               
-               simple_unlock(&fs_grrr_lock);
-               
-               return (thread);
-       }
-       simple_unlock(&fs_grrr_lock);           
-       
-       return THREAD_NULL;
-}
-
-boolean_t      sched_grrr_fairshare_queue_remove(thread_t thread)
-{
-       
-       simple_lock(&fs_grrr_lock);
-       
-       if (FS_GRRR_RUNQ == thread->runq) {
-               grrr_remove(&fs_grrr_runq, thread);
-               
-               simple_unlock(&fs_grrr_lock);
-               return (TRUE);
-       }
-       else {
-               /*
-                *      The thread left the run queue before we could
-                *      lock the run queue.
-                */
-               assert(thread->runq == PROCESSOR_NULL);
-               simple_unlock(&fs_grrr_lock);
-               return (FALSE);
-       }       
-}
-
-#endif /* defined(CONFIG_SCHED_GRRR) */
index 63519c677159f9e155dd86b280521257f19fee11..ac1cc6d24a8c8f0587f3b862219dbd3b277a996a 100644 (file)
@@ -194,9 +194,6 @@ struct sched_group {
        queue_chain_t           sched_groups;
 };
 
-/* TODO: Turn this into an attribute in the sched dispatch struct */
-boolean_t               sched_groups_enabled = FALSE;
-
 /*
  * Keep entry on the head of the runqueue while dequeueing threads.
  * Only cycle it to the end of the runqueue when a thread in the task
@@ -204,11 +201,6 @@ boolean_t               sched_groups_enabled = FALSE;
  */
 static boolean_t        deep_drain = FALSE;
 
-/*
- * Don't favor the task when an urgent thread is present.
- */
-static boolean_t        drain_urgent_first = TRUE;
-
 /* Verify the consistency of the runq before touching it */
 static boolean_t        multiq_sanity_check = FALSE;
 
@@ -226,6 +218,11 @@ static integer_t        drain_band_limit;
 #define DEFAULT_DRAIN_DEPTH_LIMIT MAXPRI_THROTTLE
 static integer_t        drain_depth_limit;
 
+/*
+ * Don't favor the task when there's something above this priority in another task.
+ */
+#define DEFAULT_DRAIN_CEILING BASEPRI_FOREGROUND
+static integer_t        drain_ceiling;
 
 static struct zone      *sched_group_zone;
 
@@ -246,7 +243,7 @@ static thread_t
 sched_multiq_steal_thread(processor_set_t pset);
 
 static void
-sched_multiq_thread_update_scan(void);
+sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context);
 
 static boolean_t
 sched_multiq_processor_enqueue(processor_t processor, thread_t thread, integer_t options);
@@ -290,18 +287,17 @@ sched_multiq_processor_queue_shutdown(processor_t processor);
 static sched_mode_t
 sched_multiq_initial_thread_sched_mode(task_t parent_task);
 
-static boolean_t
-sched_multiq_should_current_thread_rechoose_processor(processor_t processor);
-
 const struct sched_dispatch_table sched_multiq_dispatch = {
+       .sched_name                                     = "multiq",
        .init                                           = sched_multiq_init,
-       .timebase_init                                  = sched_traditional_timebase_init,
+       .timebase_init                                  = sched_timeshare_timebase_init,
        .processor_init                                 = sched_multiq_processor_init,
        .pset_init                                      = sched_multiq_pset_init,
-       .maintenance_continuation                       = sched_traditional_maintenance_continue,
+       .maintenance_continuation                       = sched_timeshare_maintenance_continue,
        .choose_thread                                  = sched_multiq_choose_thread,
+       .steal_thread_enabled                           = FALSE,
        .steal_thread                                   = sched_multiq_steal_thread,
-       .compute_priority                               = compute_priority,
+       .compute_timeshare_priority                     = sched_compute_timeshare_priority,
        .choose_processor                               = choose_processor,
        .processor_enqueue                              = sched_multiq_processor_enqueue,
        .processor_queue_shutdown                       = sched_multiq_processor_queue_shutdown,
@@ -310,39 +306,34 @@ const struct sched_dispatch_table sched_multiq_dispatch = {
        .priority_is_urgent                             = priority_is_urgent,
        .processor_csw_check                            = sched_multiq_processor_csw_check,
        .processor_queue_has_priority                   = sched_multiq_processor_queue_has_priority,
-       .initial_quantum_size                           = sched_traditional_initial_quantum_size,
+       .initial_quantum_size                           = sched_timeshare_initial_quantum_size,
        .initial_thread_sched_mode                      = sched_multiq_initial_thread_sched_mode,
        .can_update_priority                            = can_update_priority,
        .update_priority                                = update_priority,
        .lightweight_update_priority                    = lightweight_update_priority,
        .quantum_expire                                 = sched_multiq_quantum_expire,
-       .should_current_thread_rechoose_processor       = sched_multiq_should_current_thread_rechoose_processor,
        .processor_runq_count                           = sched_multiq_runq_count,
        .processor_runq_stats_count_sum                 = sched_multiq_runq_stats_count_sum,
-       .fairshare_init                                 = sched_traditional_fairshare_init,
-       .fairshare_runq_count                           = sched_traditional_fairshare_runq_count,
-       .fairshare_runq_stats_count_sum                 = sched_traditional_fairshare_runq_stats_count_sum,
-       .fairshare_enqueue                              = sched_traditional_fairshare_enqueue,
-       .fairshare_dequeue                              = sched_traditional_fairshare_dequeue,
-       .fairshare_queue_remove                         = sched_traditional_fairshare_queue_remove,
        .processor_bound_count                          = sched_multiq_processor_bound_count,
        .thread_update_scan                             = sched_multiq_thread_update_scan,
        .direct_dispatch_to_idle_processors             = FALSE,
+       .multiple_psets_enabled                         = FALSE,
+       .sched_groups_enabled                           = TRUE,
 };
 
 
 static void
 sched_multiq_init(void)
 {
-       sched_groups_enabled = TRUE;
-
 #if defined(MULTIQ_SANITY_CHECK)
        PE_parse_boot_argn("-multiq-sanity-check", &multiq_sanity_check, sizeof(multiq_sanity_check));
 #endif
 
        PE_parse_boot_argn("-multiq-deep-drain", &deep_drain, sizeof(deep_drain));
 
-       PE_parse_boot_argn("multiq_drain_urgent_first", &drain_urgent_first, sizeof(drain_urgent_first));
+       if (!PE_parse_boot_argn("multiq_drain_ceiling", &drain_ceiling, sizeof(drain_ceiling))) {
+               drain_ceiling = DEFAULT_DRAIN_CEILING;
+       }
 
        if (!PE_parse_boot_argn("multiq_drain_depth_limit", &drain_depth_limit, sizeof(drain_depth_limit))) {
                drain_depth_limit = DEFAULT_DRAIN_DEPTH_LIMIT;
@@ -352,8 +343,8 @@ sched_multiq_init(void)
                drain_band_limit = DEFAULT_DRAIN_BAND_LIMIT;
        }
 
-       printf("multiq scheduler config: deep-drain %d, urgent first %d, depth limit %d, band limit %d, sanity check %d\n",
-              deep_drain, drain_urgent_first, drain_depth_limit, drain_band_limit, multiq_sanity_check);
+       printf("multiq scheduler config: deep-drain %d, ceiling %d, depth limit %d, band limit %d, sanity check %d\n",
+              deep_drain, drain_ceiling, drain_depth_limit, drain_band_limit, multiq_sanity_check);
 
        sched_group_zone = zinit(
                                 sizeof(struct sched_group),
@@ -371,7 +362,7 @@ sched_multiq_init(void)
        lck_attr_setdefault(&sched_groups_lock_attr);
        lck_mtx_init(&sched_groups_lock, &sched_groups_lock_grp, &sched_groups_lock_attr);
 
-       sched_traditional_init();
+       sched_timeshare_init();
 }
 
 static void
@@ -400,7 +391,7 @@ sched_group_create(void)
 {
        sched_group_t       sched_group;
 
-       if (!sched_groups_enabled)
+       if (!SCHED(sched_groups_enabled))
                return SCHED_GROUP_NULL;
 
        sched_group = (sched_group_t)zalloc(sched_group_zone);
@@ -425,7 +416,7 @@ sched_group_create(void)
 void
 sched_group_destroy(sched_group_t sched_group)
 {
-       if (!sched_groups_enabled) {
+       if (!SCHED(sched_groups_enabled)) {
                assert(sched_group == SCHED_GROUP_NULL);
                return;
        }
@@ -492,6 +483,7 @@ entry_queue_first_entry(entry_queue_t rq)
 
 #if defined(MULTIQ_SANITY_CHECK)
 
+#if MACH_ASSERT
 __attribute__((always_inline))
 static inline boolean_t
 queue_chain_linked(queue_chain_t* chain)
@@ -504,6 +496,7 @@ queue_chain_linked(queue_chain_t* chain)
                return FALSE;
        }
 }
+#endif /* MACH_ASSERT */
 
 static thread_t
 group_first_thread(sched_group_t group)
@@ -711,6 +704,27 @@ entry_queue_remove_entry(
        entry->runq = 0;
 }
 
+static void
+entry_queue_change_entry(
+                          entry_queue_t rq,
+                          sched_entry_t entry,
+                          integer_t     options)
+{
+       int     sched_pri   = entry->sched_pri;
+       queue_t queue       = rq->queues + sched_pri;
+
+#if defined(MULTIQ_SANITY_CHECK)
+       if (multiq_sanity_check) {
+               entry_queue_check_entry(rq, entry, sched_pri);
+       }
+#endif
+       remqueue((queue_entry_t)entry);
+
+       if (options & SCHED_TAILQ)
+               enqueue_tail(queue, (queue_entry_t)entry);
+       else
+               enqueue_head(queue, (queue_entry_t)entry);
+}
 /*
  * The run queue must not be empty.
  *
@@ -963,6 +977,9 @@ sched_group_enqueue_thread(
                 * What effects would it have?
                 */
                entry_queue_enqueue_entry(main_entryq, &group->entries[sched_pri], options);
+       } else if (options & SCHED_HEADQ) {
+               /* The thread should be at the head of the line - move its entry to the front */
+               entry_queue_change_entry(main_entryq, &group->entries[sched_pri], options);
        }
 }
 
@@ -1033,33 +1050,44 @@ sched_multiq_choose_thread(
         * Should YIELD AST override drain limit?
         */
        if (group->runq.count != 0 && (reason & AST_PREEMPTION) == 0) {
-               boolean_t   drain_limit_hit = FALSE;
+               boolean_t favor_group = TRUE;
+
+               integer_t global_pri = main_entryq->highq;
+               integer_t group_pri  = group->runq.highq;
 
-               if (main_entryq->highq > group->runq.highq) {
+               /*
+                * Favor the current group if the group is still the globally highest.
+                *
+                * Otherwise, consider choosing a thread from the current group
+                * even if it's lower priority than the global highest priority.
+                */
+               if (global_pri > group_pri) {
                        /*
                         * If there's something elsewhere above the depth limit,
                         * don't pick a thread below the limit.
                         */
-                       if (main_entryq->highq > drain_depth_limit &&
-                           group->runq.highq <= drain_depth_limit)
-                               drain_limit_hit = TRUE;
+                       if (global_pri > drain_depth_limit && group_pri <= drain_depth_limit)
+                               favor_group = FALSE;
 
                        /*
-                        * Don't go more than X steps below the global highest
+                        * If there's something at or above the ceiling,
+                        * don't favor the group.
                         */
-                       if ((main_entryq->highq - group->runq.highq) >= drain_band_limit)
-                               drain_limit_hit = TRUE;
+                       if (global_pri >= drain_ceiling)
+                               favor_group = FALSE;
 
-                       /* Don't favor the task when an urgent thread is present. */
-                       if (drain_urgent_first && main_entryq->urgency > 0)
-                               drain_limit_hit = TRUE;
+                       /*
+                        * Don't go more than X steps below the global highest
+                        */
+                       if ((global_pri - group_pri) >= drain_band_limit)
+                               favor_group = FALSE;
                }
 
-               if (!drain_limit_hit) {
+               if (favor_group) {
                        /* Pull from local runq */
                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                            MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
-                           MACH_MULTIQ_GROUP, main_entryq->highq, group->runq.highq, 0, 0);
+                           MACH_MULTIQ_GROUP, global_pri, group_pri, 0, 0);
 
                        return sched_group_dequeue_thread(main_entryq, group);
                }
@@ -1133,8 +1161,7 @@ sched_multiq_quantum_expire(thread_t thread)
                sched_entry_t entry = group_entry_for_pri(thread->sched_group, processor->current_pri);
 
                if (entry->runq == MULTIQ_ERUNQ) {
-                       entry_queue_remove_entry(entryq, entry);
-                       entry_queue_enqueue_entry(entryq, entry, SCHED_TAILQ);
+                       entry_queue_change_entry(entryq, entry, SCHED_TAILQ);
                }
 
                pset_unlock(pset);
@@ -1155,13 +1182,13 @@ sched_multiq_processor_csw_check(processor_t processor)
        int             pri;
 
        entry_queue_t main_entryq = multiq_main_entryq(processor);
-       run_queue_t   bound_runq  = multiq_bound_runq(processor);       
+       run_queue_t   bound_runq  = multiq_bound_runq(processor);
 
        assert(processor->active_thread != NULL);
 
        pri = MAX(main_entryq->highq, bound_runq->highq);
 
-       if (first_timeslice(processor)) {
+       if (processor->first_timeslice) {
                has_higher = (pri > processor->current_pri);
        } else {
                has_higher = (pri >= processor->current_pri);
@@ -1173,9 +1200,6 @@ sched_multiq_processor_csw_check(processor_t processor)
 
                if (bound_runq->urgency > 0)
                        return (AST_PREEMPT | AST_URGENT);
-               
-               if (processor->active_thread && thread_eager_preemption(processor->active_thread))
-                       return (AST_PREEMPT | AST_URGENT);
 
                return AST_PREEMPT;
        }
@@ -1197,12 +1221,6 @@ sched_multiq_processor_queue_has_priority(
                return qpri > priority;
 }
 
-static boolean_t
-sched_multiq_should_current_thread_rechoose_processor(processor_t processor)
-{
-       return (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_primary != processor);
-}
-
 static int
 sched_multiq_runq_count(processor_t processor)
 {
@@ -1286,7 +1304,6 @@ sched_multiq_processor_queue_remove(
                                     thread_t    thread)
 {
        boolean_t removed = FALSE;
-
        processor_set_t pset = processor->processor_set;
 
        pset_lock(pset);
@@ -1332,7 +1349,7 @@ sched_multiq_steal_thread(processor_set_t pset)
  * Returns TRUE if retry is needed.
  */
 static boolean_t
-group_scan(entry_queue_t runq) {
+group_scan(entry_queue_t runq, sched_update_scan_context_t scan_context) {
        int             count;
        queue_t         q;
        sched_group_t   group;
@@ -1344,7 +1361,7 @@ group_scan(entry_queue_t runq) {
                        queue_iterate(q, entry, sched_entry_t, links) {
                                group = group_for_entry(entry);
                                if (group->runq.count > 0) {
-                                       if (runq_scan(&group->runq))
+                                       if (runq_scan(&group->runq, scan_context))
                                                return (TRUE);
                                }
                                count--;
@@ -1357,7 +1374,7 @@ group_scan(entry_queue_t runq) {
 }
 
 static void
-sched_multiq_thread_update_scan(void)
+sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context)
 {
        boolean_t               restart_needed = FALSE;
        processor_t             processor = processor_list;
@@ -1377,7 +1394,7 @@ sched_multiq_thread_update_scan(void)
                        s = splsched();
                        pset_lock(pset);
 
-                       restart_needed = runq_scan(multiq_bound_runq(processor));
+                       restart_needed = runq_scan(multiq_bound_runq(processor), scan_context);
 
                        pset_unlock(pset);
                        splx(s);
@@ -1406,7 +1423,7 @@ sched_multiq_thread_update_scan(void)
                        s = splsched();
                        pset_lock(pset);
 
-                       restart_needed = group_scan(&pset->pset_runq);
+                       restart_needed = group_scan(&pset->pset_runq, scan_context);
 
                        pset_unlock(pset);
                        splx(s);
index 3a88e11fb4fcf2d25f9ea6eefe81b6314aa3731f..9a0a9427cdf7af516c2b64a53482e6617e1c1127 100644 (file)
@@ -86,6 +86,7 @@
 #include <kern/counters.h>
 #include <kern/cpu_number.h>
 #include <kern/cpu_data.h>
+#include <kern/smp.h>
 #include <kern/debug.h>
 #include <kern/macro_help.h>
 #include <kern/machine.h>
@@ -98,9 +99,9 @@
 #include <kern/syscall_subr.h>
 #include <kern/task.h>
 #include <kern/thread.h>
-#include <kern/wait_queue.h>
 #include <kern/ledger.h>
 #include <kern/timer_queue.h>
+#include <kern/waitq.h>
 
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #endif
 
 struct rt_queue        rt_runq;
-#define RT_RUNQ                ((processor_t)-1)
-decl_simple_lock_data(static,rt_lock);
 
-#if defined(CONFIG_SCHED_FAIRSHARE_CORE)
-static struct fairshare_queue  fs_runq;
-#define FS_RUNQ                ((processor_t)-2)
-decl_simple_lock_data(static,fs_lock);
-#endif /* CONFIG_SCHED_FAIRSHARE_CORE */
+uintptr_t sched_thread_on_rt_queue = (uintptr_t)0xDEAFBEE0;
+
+/* Lock RT runq, must be done with interrupts disabled (under splsched()) */
+#if __SMP__
+decl_simple_lock_data(static,rt_lock);
+#define rt_lock_init()         simple_lock_init(&rt_lock, 0)
+#define rt_lock_lock()         simple_lock(&rt_lock)
+#define rt_lock_unlock()       simple_unlock(&rt_lock)
+#else
+#define rt_lock_init()         do { } while(0)
+#define rt_lock_lock()         do { } while(0)
+#define rt_lock_unlock()       do { } while(0)
+#endif
 
 #define                DEFAULT_PREEMPTION_RATE         100             /* (1/s) */
 int                    default_preemption_rate = DEFAULT_PREEMPTION_RATE;
@@ -197,49 +204,6 @@ thread_t sched_maintenance_thread;
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static boolean_t sched_traditional_use_pset_runqueue = FALSE;
-
-__attribute__((always_inline))
-static inline run_queue_t runq_for_processor(processor_t processor)
-{
-       if (sched_traditional_use_pset_runqueue)
-               return &processor->processor_set->pset_runq;
-       else
-               return &processor->runq;
-}
-
-__attribute__((always_inline))
-static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread)
-{
-       if (thread->bound_processor == PROCESSOR_NULL)
-               return;
-    
-       assert(thread->bound_processor == processor);
-    
-       if (sched_traditional_use_pset_runqueue)
-               processor->processor_set->pset_runq_bound_count++;
-    
-       processor->runq_bound_count++;
-}
-
-__attribute__((always_inline))
-static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread)
-{
-       if (thread->bound_processor == PROCESSOR_NULL)
-               return;
-    
-       assert(thread->bound_processor == processor);
-    
-       if (sched_traditional_use_pset_runqueue)
-               processor->processor_set->pset_runq_bound_count--;
-    
-       processor->runq_bound_count--;
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
 uint64_t       sched_one_second_interval;
 
 uint32_t       sched_run_count, sched_share_count, sched_background_count;
@@ -274,72 +238,11 @@ csw_check_locked( processor_t             processor,
                                        processor_set_t pset,
                                        ast_t                   check_reason);
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static thread_t        steal_thread(
-                                       processor_set_t         pset);
-
-static thread_t        steal_thread_disabled(
-                                       processor_set_t         pset) __attribute__((unused));
-
-
-static thread_t        steal_processor_thread(
-                                       processor_t                     processor);
-
-static void            thread_update_scan(void);
-
 static void processor_setrun(
                                 processor_t                    processor,
                                 thread_t                       thread,
                                 integer_t                      options);
 
-static boolean_t
-processor_enqueue(
-                                 processor_t           processor,
-                                 thread_t              thread,
-                                 integer_t             options);
-
-static boolean_t
-processor_queue_remove(
-                                          processor_t                  processor,
-                                          thread_t             thread);
-
-static boolean_t       processor_queue_empty(processor_t               processor);
-
-static ast_t           processor_csw_check(processor_t processor);
-
-static boolean_t       processor_queue_has_priority(processor_t                processor,
-                                                                                       int                             priority,
-                                                                                       boolean_t               gte);
-
-static boolean_t       should_current_thread_rechoose_processor(processor_t                    processor);
-
-static int     sched_traditional_processor_runq_count(processor_t   processor);
-
-static boolean_t       sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t          processor);
-
-static uint64_t     sched_traditional_processor_runq_stats_count_sum(processor_t   processor);
-
-static uint64_t                sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t   processor);
-
-static int      sched_traditional_processor_bound_count(processor_t processor);
-
-#endif
-
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static void
-sched_traditional_processor_init(processor_t processor);
-
-static void
-sched_traditional_pset_init(processor_set_t pset);
-
-static void
-sched_traditional_with_pset_runqueue_init(void);
-
-#endif
-
 static void
 sched_realtime_init(void);
 
@@ -349,19 +252,6 @@ sched_realtime_timebase_init(void);
 static void
 sched_timer_deadline_tracking_init(void);
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static sched_mode_t
-sched_traditional_initial_thread_sched_mode(task_t parent_task);
-
-static thread_t
-sched_traditional_choose_thread(
-                                processor_t     processor,
-                                int             priority,
-                       __unused ast_t           reason);
-
-#endif
-
 #if    DEBUG
 extern int debug_task;
 #define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
@@ -369,120 +259,19 @@ extern int debug_task;
 #define TLOG(a, fmt, args...) do {} while (0)
 #endif
 
-__assert_only static
-boolean_t      thread_runnable(
-                               thread_t                thread);
+static processor_t
+thread_bind_internal(
+       thread_t                thread,
+       processor_t             processor);
 
-/*
- *     State machine
- *
- * states are combinations of:
- *  R  running
- *  W  waiting (or on wait queue)
- *  N  non-interruptible
- *  O  swapped out
- *  I  being swapped in
- *
- * init        action 
- *     assert_wait thread_block    clear_wait          swapout swapin
- *
- * R   RW, RWN     R;   setrun     -                   -
- * RN  RWN         RN;  setrun     -                   -
- *
- * RW              W               R                   -
- * RWN             WN              RN                  -
- *
- * W                               R;   setrun         WO
- * WN                              RN;  setrun         -
- *
- * RO                              -                   -       R
- *
- */
+static void
+sched_vm_group_maintenance(void);
 
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 int8_t         sched_load_shifts[NRQS];
 int            sched_preempt_pri[NRQBM];
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-const struct sched_dispatch_table sched_traditional_dispatch = {
-       .init                                           = sched_traditional_init,
-       .timebase_init                                  = sched_traditional_timebase_init,
-       .processor_init                                 = sched_traditional_processor_init,
-       .pset_init                                      = sched_traditional_pset_init,
-       .maintenance_continuation                       = sched_traditional_maintenance_continue,
-       .choose_thread                                  = sched_traditional_choose_thread,
-       .steal_thread                                   = steal_thread,
-       .compute_priority                               = compute_priority,
-       .choose_processor                               = choose_processor,
-       .processor_enqueue                              = processor_enqueue,
-       .processor_queue_shutdown                       = processor_queue_shutdown,
-       .processor_queue_remove                         = processor_queue_remove,
-       .processor_queue_empty                          = processor_queue_empty,
-       .priority_is_urgent                             = priority_is_urgent,
-       .processor_csw_check                            = processor_csw_check,
-       .processor_queue_has_priority                   = processor_queue_has_priority,
-       .initial_quantum_size                           = sched_traditional_initial_quantum_size,
-       .initial_thread_sched_mode                      = sched_traditional_initial_thread_sched_mode,
-       .can_update_priority                            = can_update_priority,
-       .update_priority                                = update_priority,
-       .lightweight_update_priority                    = lightweight_update_priority,
-       .quantum_expire                                 = sched_traditional_quantum_expire,
-       .should_current_thread_rechoose_processor       = should_current_thread_rechoose_processor,
-       .processor_runq_count                           = sched_traditional_processor_runq_count,
-       .processor_runq_stats_count_sum                 = sched_traditional_processor_runq_stats_count_sum,
-       .fairshare_init                                 = sched_traditional_fairshare_init,
-       .fairshare_runq_count                           = sched_traditional_fairshare_runq_count,
-       .fairshare_runq_stats_count_sum                 = sched_traditional_fairshare_runq_stats_count_sum,
-       .fairshare_enqueue                              = sched_traditional_fairshare_enqueue,
-       .fairshare_dequeue                              = sched_traditional_fairshare_dequeue,
-       .fairshare_queue_remove                         = sched_traditional_fairshare_queue_remove,
-       .processor_bound_count                          = sched_traditional_processor_bound_count,
-       .thread_update_scan                             = thread_update_scan,
-       .direct_dispatch_to_idle_processors             = TRUE,
-};
-
-const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = {
-       .init                                           = sched_traditional_with_pset_runqueue_init,
-       .timebase_init                                  = sched_traditional_timebase_init,
-       .processor_init                                 = sched_traditional_processor_init,
-       .pset_init                                      = sched_traditional_pset_init,
-       .maintenance_continuation                       = sched_traditional_maintenance_continue,
-       .choose_thread                                  = sched_traditional_choose_thread,
-       .steal_thread                                   = steal_thread,
-       .compute_priority                               = compute_priority,
-       .choose_processor                               = choose_processor,
-       .processor_enqueue                              = processor_enqueue,
-       .processor_queue_shutdown                       = processor_queue_shutdown,
-       .processor_queue_remove                         = processor_queue_remove,
-       .processor_queue_empty                          = sched_traditional_with_pset_runqueue_processor_queue_empty,
-       .priority_is_urgent                             = priority_is_urgent,
-       .processor_csw_check                            = processor_csw_check,
-       .processor_queue_has_priority                   = processor_queue_has_priority,
-       .initial_quantum_size                           = sched_traditional_initial_quantum_size,
-       .initial_thread_sched_mode                      = sched_traditional_initial_thread_sched_mode,
-       .can_update_priority                            = can_update_priority,
-       .update_priority                                = update_priority,
-       .lightweight_update_priority                    = lightweight_update_priority,
-       .quantum_expire                                 = sched_traditional_quantum_expire,
-       .should_current_thread_rechoose_processor       = should_current_thread_rechoose_processor,
-       .processor_runq_count                           = sched_traditional_processor_runq_count,
-       .processor_runq_stats_count_sum                 = sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum,
-       .fairshare_init                                 = sched_traditional_fairshare_init,
-       .fairshare_runq_count                           = sched_traditional_fairshare_runq_count,
-       .fairshare_runq_stats_count_sum                 = sched_traditional_fairshare_runq_stats_count_sum,
-       .fairshare_enqueue                              = sched_traditional_fairshare_enqueue,
-       .fairshare_dequeue                              = sched_traditional_fairshare_dequeue,
-       .fairshare_queue_remove                         = sched_traditional_fairshare_queue_remove,
-       .processor_bound_count                          = sched_traditional_processor_bound_count,
-       .thread_update_scan                             = thread_update_scan,
-       .direct_dispatch_to_idle_processors             = FALSE,
-};
-
-#endif
-
 const struct sched_dispatch_table *sched_current_dispatch = NULL;
 
 /*
@@ -502,10 +291,9 @@ const struct sched_dispatch_table *sched_current_dispatch = NULL;
  * can run on multiple devices with different schedulers configured
  * in the device tree.
  */
-#define SCHED_STRING_MAX_LENGTH (48)
-
 char sched_string[SCHED_STRING_MAX_LENGTH];
-static enum sched_enum _sched_enum __attribute__((used)) = sched_enum_unknown;
+
+uint32_t sched_debug_flags;
 
 /* Global flag which indicates whether Background Stepper Context is enabled */
 static int cpu_throttle_enabled = 1;
@@ -541,75 +329,57 @@ sched_init(void)
                if (0) {
                        /* Allow pattern below */
 #if defined(CONFIG_SCHED_TRADITIONAL)
-               } else if (0 == strcmp(sched_arg, kSchedTraditionalString)) {
+               } else if (0 == strcmp(sched_arg, sched_traditional_dispatch.sched_name)) {
                        sched_current_dispatch = &sched_traditional_dispatch;
-                       _sched_enum = sched_enum_traditional;
-                       strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string));
-               } else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) {
+               } else if (0 == strcmp(sched_arg, sched_traditional_with_pset_runqueue_dispatch.sched_name)) {
                        sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
-                       _sched_enum = sched_enum_traditional_with_pset_runqueue;
-                       strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
 #endif
 #if defined(CONFIG_SCHED_PROTO)
-               } else if (0 == strcmp(sched_arg, kSchedProtoString)) {
+               } else if (0 == strcmp(sched_arg, sched_proto_dispatch.sched_name)) {
                        sched_current_dispatch = &sched_proto_dispatch;
-                       _sched_enum = sched_enum_proto;
-                       strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
 #endif
 #if defined(CONFIG_SCHED_GRRR)
-               } else if (0 == strcmp(sched_arg, kSchedGRRRString)) {
+               } else if (0 == strcmp(sched_arg, sched_grrr_dispatch.sched_name)) {
                        sched_current_dispatch = &sched_grrr_dispatch;
-                       _sched_enum = sched_enum_grrr;
-                       strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
 #endif
 #if defined(CONFIG_SCHED_MULTIQ)
-               } else if (0 == strcmp(sched_arg, kSchedMultiQString)) {
+               } else if (0 == strcmp(sched_arg, sched_multiq_dispatch.sched_name)) {
                        sched_current_dispatch = &sched_multiq_dispatch;
-                       _sched_enum = sched_enum_multiq;
-                       strlcpy(sched_string, kSchedMultiQString, sizeof(sched_string));
-               } else if (0 == strcmp(sched_arg, kSchedDualQString)) {
+               } else if (0 == strcmp(sched_arg, sched_dualq_dispatch.sched_name)) {
                        sched_current_dispatch = &sched_dualq_dispatch;
-                       _sched_enum = sched_enum_dualq;
-                       strlcpy(sched_string, kSchedDualQString, sizeof(sched_string));
 #endif
                } else {
 #if defined(CONFIG_SCHED_TRADITIONAL)
                        printf("Unrecognized scheduler algorithm: %s\n", sched_arg);
-                       printf("Scheduler: Using instead: %s\n", kSchedTraditionalWithPsetRunqueueString);
-
+                       printf("Scheduler: Using instead: %s\n", sched_traditional_with_pset_runqueue_dispatch.sched_name);
                        sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
-                       _sched_enum = sched_enum_traditional_with_pset_runqueue;
-                       strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
 #else
                        panic("Unrecognized scheduler algorithm: %s", sched_arg);
 #endif
                }
-               kprintf("Scheduler: Runtime selection of %s\n", sched_string);
+               kprintf("Scheduler: Runtime selection of %s\n", SCHED(sched_name));
        } else {
 #if   defined(CONFIG_SCHED_MULTIQ)
                sched_current_dispatch = &sched_multiq_dispatch;
-               _sched_enum = sched_enum_multiq;
-               strlcpy(sched_string, kSchedMultiQString, sizeof(sched_string));
 #elif defined(CONFIG_SCHED_TRADITIONAL)
                sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
-               _sched_enum = sched_enum_traditional_with_pset_runqueue;
-               strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
 #elif defined(CONFIG_SCHED_PROTO)
                sched_current_dispatch = &sched_proto_dispatch;
-               _sched_enum = sched_enum_proto;
-               strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
 #elif defined(CONFIG_SCHED_GRRR)
                sched_current_dispatch = &sched_grrr_dispatch;
-               _sched_enum = sched_enum_grrr;
-               strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
 #else
 #error No default scheduler implementation
 #endif
-               kprintf("Scheduler: Default of %s\n", sched_string);
+               kprintf("Scheduler: Default of %s\n", SCHED(sched_name));
+       }
+
+       strlcpy(sched_string, SCHED(sched_name), sizeof(sched_string));
+
+       if (PE_parse_boot_argn("sched_debug", &sched_debug_flags, sizeof(sched_debug_flags))) {
+               kprintf("Scheduler: Debug flags 0x%08x\n", sched_debug_flags);
        }
        
        SCHED(init)();
-       SCHED(fairshare_init)();
        sched_realtime_init();
        ast_init();
        sched_timer_deadline_tracking_init();
@@ -633,7 +403,7 @@ sched_timebase_init(void)
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
 void
-sched_traditional_init(void)
+sched_timeshare_init(void)
 {
        /*
         * Calculate the timeslicing quantum
@@ -657,7 +427,7 @@ sched_traditional_init(void)
 }
 
 void
-sched_traditional_timebase_init(void)
+sched_timeshare_timebase_init(void)
 {
        uint64_t        abstime;
        uint32_t        shift;
@@ -712,51 +482,10 @@ sched_traditional_timebase_init(void)
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static void
-sched_traditional_processor_init(processor_t processor)
-{
-       if (!sched_traditional_use_pset_runqueue) {
-               run_queue_init(&processor->runq);
-       }
-       processor->runq_bound_count = 0;
-}
-
-static void
-sched_traditional_pset_init(processor_set_t pset)
-{
-       if (sched_traditional_use_pset_runqueue) {
-               run_queue_init(&pset->pset_runq);
-       }
-       pset->pset_runq_bound_count = 0;
-}
-
-static void
-sched_traditional_with_pset_runqueue_init(void)
-{
-       sched_traditional_init();
-       sched_traditional_use_pset_runqueue = TRUE;
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
-#if defined(CONFIG_SCHED_FAIRSHARE_CORE)
-void
-sched_traditional_fairshare_init(void)
-{
-       simple_lock_init(&fs_lock, 0);
-       
-       fs_runq.count = 0;
-       queue_init(&fs_runq.queue);
-}
-#endif /* CONFIG_SCHED_FAIRSHARE_CORE */
-
 static void
 sched_realtime_init(void)
 {
-       simple_lock_init(&rt_lock, 0);
+       rt_lock_init();
 
        rt_runq.count = 0;
        queue_init(&rt_runq.queue);
@@ -877,16 +606,18 @@ thread_timer_expire(
  *
  *     Unblock thread on wake up.
  *
- *     Returns TRUE if the thread is still running.
+ *     Returns TRUE if the thread should now be placed on the runqueue.
  *
  *     Thread must be locked.
+ *
+ *     Called at splsched().
  */
 boolean_t
 thread_unblock(
        thread_t                thread,
        wait_result_t   wresult)
 {
-       boolean_t               result = FALSE;
+       boolean_t               ready_for_runq = FALSE;
        thread_t                cthread = current_thread();
        uint32_t                new_run_count;
 
@@ -912,6 +643,9 @@ thread_unblock(
 
        if (!(thread->state & TH_RUN)) {
                thread->state |= TH_RUN;
+               thread->last_made_runnable_time = mach_approximate_time();
+
+               ready_for_runq = TRUE;
 
                (*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
 
@@ -925,8 +659,7 @@ thread_unblock(
                        if (thread->sched_flags & TH_SFLAG_THROTTLED)
                                sched_background_incr(thread);
                }
-       }
-       else {
+       } else {
                /*
                 *      Signal if idling on another processor.
                 */
@@ -942,14 +675,14 @@ thread_unblock(
 #endif
 
                new_run_count = sched_run_count; /* updated in thread_select_idle() */
-               result = TRUE;
        }
 
+
        /*
         * Calculate deadline for real-time threads.
         */
        if (thread->sched_mode == TH_MODE_REALTIME) {
-               uint64_t                ctime;
+               uint64_t ctime;
 
                ctime = mach_absolute_time();
                thread->realtime.deadline = thread->realtime.constraint + ctime;
@@ -1014,7 +747,7 @@ thread_unblock(
 
        DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
 
-       return (result);
+       return (ready_for_runq);
 }
 
 /*
@@ -1024,27 +757,29 @@ thread_unblock(
  *     Conditions:
  *             thread lock held, IPC locks may be held.
  *             thread must have been pulled from wait queue under same lock hold.
- *  Returns:
+ *             thread must have been waiting
+ *     Returns:
  *             KERN_SUCCESS - Thread was set running
- *             KERN_NOT_WAITING - Thread was not waiting
+ *
+ * TODO: This should return void
  */
 kern_return_t
 thread_go(
-       thread_t                thread,
-       wait_result_t   wresult)
+          thread_t        thread,
+          wait_result_t   wresult)
 {
        assert(thread->at_safe_point == FALSE);
        assert(thread->wait_event == NO_EVENT64);
-       assert(thread->wait_queue == WAIT_QUEUE_NULL);
+       assert(thread->waitq == NULL);
 
-       if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
-               if (!thread_unblock(thread, wresult))
-                       thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
+       assert(!(thread->state & (TH_TERMINATE|TH_TERMINATE2)));
+       assert(thread->state & TH_WAIT);
 
-               return (KERN_SUCCESS);
-       }
 
-       return (KERN_NOT_WAITING);
+       if (thread_unblock(thread, wresult))
+               thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
+
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -1065,6 +800,7 @@ thread_mark_wait_locked(
        boolean_t               at_safe_point;
 
        assert(thread == current_thread());
+       assert(!(thread->state & (TH_WAIT|TH_IDLE|TH_UNINT|TH_TERMINATE2)));
 
        /*
         *      The thread may have certain types of interrupts/aborts masked
@@ -1141,7 +877,7 @@ assert_wait_possible(void)
        
        thread = current_thread();
 
-       return (thread == NULL || wait_queue_assert_possible(thread));
+       return (thread == NULL || waitq_wait_possible(thread));
 }
 
 /*
@@ -1155,19 +891,16 @@ assert_wait(
        event_t                         event,
        wait_interrupt_t        interruptible)
 {
-       register wait_queue_t   wq;
-       register int            index;
-
-       if(event == NO_EVENT)
-               panic("assert_wait() called with NO_EVENT");
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
                VM_KERNEL_UNSLIDE(event), 0, 0, 0, 0);
 
-       index = wait_hash(event);
-       wq = &wait_queues[index];
-       return wait_queue_assert_wait(wq, event, interruptible, 0);
+       struct waitq *waitq;
+       waitq = global_eventq(event);
+       return waitq_assert_wait64(waitq, CAST_EVENT64_T(event), interruptible, TIMEOUT_WAIT_FOREVER);
 }
 
 wait_result_t
@@ -1179,36 +912,35 @@ assert_wait_timeout(
 {
        thread_t                        thread = current_thread();
        wait_result_t           wresult;
-       wait_queue_t            wqueue;
        uint64_t                        deadline;
        spl_t                           s;
 
-       if(event == NO_EVENT)
-               panic("assert_wait_timeout() called with NO_EVENT");
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
 
-       wqueue = &wait_queues[wait_hash(event)];
+       struct waitq *waitq;
+       waitq = global_eventq(event);
 
        s = splsched();
-       wait_queue_lock(wqueue);
+       waitq_lock(waitq);
        thread_lock(thread);
 
        clock_interval_to_deadline(interval, scale_factor, &deadline);
-       
+
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-               VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
-       
-       wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
-                                                 interruptible, 
-                                                 TIMEOUT_URGENCY_SYS_NORMAL,
-                                                 deadline, 0,
-                                                 thread);
+                                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+
+       wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
+                                            interruptible,
+                                            TIMEOUT_URGENCY_SYS_NORMAL,
+                                            deadline, TIMEOUT_NO_LEEWAY,
+                                            thread);
 
        thread_unlock(thread);
-       wait_queue_unlock(wqueue);
+       waitq_unlock(waitq);
        splx(s);
-
-       return (wresult);
+       return wresult;
 }
 
 wait_result_t
@@ -1222,42 +954,41 @@ assert_wait_timeout_with_leeway(
 {
        thread_t                        thread = current_thread();
        wait_result_t           wresult;
-       wait_queue_t            wqueue;
        uint64_t                        deadline;
        uint64_t                        abstime;
        uint64_t                        slop;
        uint64_t                        now;
        spl_t                           s;
 
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
+
        now = mach_absolute_time();
        clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
        deadline = now + abstime;
 
        clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop);
 
-       if(event == NO_EVENT)
-               panic("assert_wait_timeout_with_leeway() called with NO_EVENT");
-
-       wqueue = &wait_queues[wait_hash(event)];
+       struct waitq *waitq;
+       waitq = global_eventq(event);
 
        s = splsched();
-       wait_queue_lock(wqueue);
+       waitq_lock(waitq);
        thread_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-               VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
-       
-       wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
-                                                 interruptible,
-                                                 urgency, deadline, slop,
-                                                 thread);
+                                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+
+       wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
+                                            interruptible,
+                                            urgency, deadline, slop,
+                                            thread);
 
        thread_unlock(thread);
-       wait_queue_unlock(wqueue);
+       waitq_unlock(waitq);
        splx(s);
-
-       return (wresult);
+       return wresult;
 }
 
 wait_result_t
@@ -1268,30 +999,30 @@ assert_wait_deadline(
 {
        thread_t                        thread = current_thread();
        wait_result_t           wresult;
-       wait_queue_t            wqueue;
        spl_t                           s;
 
-       assert(event != NO_EVENT);
-       wqueue = &wait_queues[wait_hash(event)];
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
+
+       struct waitq *waitq;
+       waitq = global_eventq(event);
 
        s = splsched();
-       wait_queue_lock(wqueue);
+       waitq_lock(waitq);
        thread_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-               VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
-
-       wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
-                                                 interruptible, 
-                                                 TIMEOUT_URGENCY_SYS_NORMAL, deadline, 0,
-                                                 thread);
+                                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
 
+       wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
+                                            interruptible,
+                                            TIMEOUT_URGENCY_SYS_NORMAL, deadline,
+                                            TIMEOUT_NO_LEEWAY, thread);
        thread_unlock(thread);
-       wait_queue_unlock(wqueue);
+       waitq_unlock(waitq);
        splx(s);
-
-       return (wresult);
+       return wresult;
 }
 
 wait_result_t
@@ -1304,32 +1035,31 @@ assert_wait_deadline_with_leeway(
 {
        thread_t                        thread = current_thread();
        wait_result_t           wresult;
-       wait_queue_t            wqueue;
        spl_t                           s;
 
-       if(event == NO_EVENT)
-               panic("assert_wait_deadline_with_leeway() called with NO_EVENT");
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
 
-       wqueue = &wait_queues[wait_hash(event)];
+       struct waitq *waitq;
+       waitq = global_eventq(event);
 
        s = splsched();
-       wait_queue_lock(wqueue);
+       waitq_lock(waitq);
        thread_lock(thread);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-               MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
-               VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
+                                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE,
+                                 VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0);
 
-       wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
-                                                 interruptible, 
-                                                 urgency, deadline, leeway,
-                                                 thread);
+       wresult = waitq_assert_wait64_locked(waitq, CAST_EVENT64_T(event),
+                                            interruptible,
+                                            urgency, deadline, leeway,
+                                            thread);
 
        thread_unlock(thread);
-       wait_queue_unlock(wqueue);
+       waitq_unlock(waitq);
        splx(s);
-
-       return (wresult);
+       return wresult;
 }
 
 /*
@@ -1355,6 +1085,13 @@ thread_isoncpu(thread_t thread)
        if (thread->runq != PROCESSOR_NULL)
                return (FALSE);
 
+       /*
+        * Thread does not have a stack yet
+        * It could be on the stack alloc queue or preparing to be invoked
+        */
+       if (!thread->kernel_stack)
+               return (FALSE);
+
        /*
         * Thread must be running on a processor, or
         * about to run, or just did run. In all these
@@ -1474,13 +1211,8 @@ thread_unstop(
        wake_lock(thread);
        thread_lock(thread);
 
-       if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
-               thread->state &= ~TH_SUSP;
-               thread_unblock(thread, THREAD_AWAKENED);
+       assert((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) != TH_SUSP);
 
-               thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
-       }
-       else
        if (thread->state & TH_SUSP) {
                thread->state &= ~TH_SUSP;
 
@@ -1577,35 +1309,37 @@ clear_wait_internal(
        thread_t                thread,
        wait_result_t   wresult)
 {
-       wait_queue_t    wq = thread->wait_queue;
        uint32_t        i = LockTimeOut;
+       struct waitq *waitq = thread->waitq;
 
        do {
                if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
                        return (KERN_FAILURE);
 
-               if (wq != WAIT_QUEUE_NULL) {
-                       if (wait_queue_lock_try(wq)) {
-                               wait_queue_pull_thread_locked(wq, thread, TRUE);
-                               /* wait queue unlocked, thread still locked */
-                       }
-                       else {
+               if (waitq != NULL) {
+                       assert(waitq_irq_safe(waitq)); //irqs are already disabled!
+                       if (waitq_lock_try(waitq)) {
+                               waitq_pull_thread_locked(waitq, thread);
+                               waitq_unlock(waitq);
+                       else {
                                thread_unlock(thread);
                                delay(1);
-
                                thread_lock(thread);
-                               if (wq != thread->wait_queue)
-                                       return (KERN_NOT_WAITING);
-
+                               if (waitq != thread->waitq)
+                                       return KERN_NOT_WAITING;
                                continue;
                        }
                }
 
-               return (thread_go(thread, wresult));
+               /* TODO: Can we instead assert TH_TERMINATE is not set?  */
+               if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT)
+                       return (thread_go(thread, wresult));
+               else
+                       return (KERN_NOT_WAITING);
        } while ((--i > 0) || machine_timeout_suspended());
 
        panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
-                 thread, wq, cpu_number());
+                 thread, waitq, cpu_number());
 
        return (KERN_FAILURE);
 }
@@ -1662,18 +1396,18 @@ thread_wakeup_prim_internal(
        wait_result_t           result,
        int                     priority)
 {
-       register wait_queue_t   wq;
-       register int                    index;
+       if (__improbable(event == NO_EVENT))
+               panic("%s() called with NO_EVENT", __func__);
+
+       struct waitq *wq;
 
-       if(event == NO_EVENT)
-               panic("thread_wakeup_prim() called with NO_EVENT");
+       wq = global_eventq(event);
+       priority = (priority == -1 ? WAITQ_ALL_PRIORITIES : priority);
 
-       index = wait_hash(event);
-       wq = &wait_queues[index];
        if (one_thread)
-               return (wait_queue_wakeup_one(wq, event, result, priority));
+               return waitq_wakeup64_one(wq, CAST_EVENT64_T(event), result, priority);
        else
-           return (wait_queue_wakeup_all(wq, event, result));
+               return waitq_wakeup64_all(wq, CAST_EVENT64_T(event), result, priority);
 }
 
 /*
@@ -1698,11 +1432,7 @@ thread_bind(
        s = splsched();
        thread_lock(self);
 
-       /* <rdar://problem/15102234> */
-       assert(self->sched_pri < BASEPRI_RTQUEUES);
-
-       prev = self->bound_processor;
-       self->bound_processor = processor;
+       prev = thread_bind_internal(self, processor);
 
        thread_unlock(self);
        splx(s);
@@ -1710,6 +1440,192 @@ thread_bind(
        return (prev);
 }
 
+/*
+ * thread_bind_internal:
+ *
+ * If the specified thread is not the current thread, and it is currently
+ * running on another CPU, a remote AST must be sent to that CPU to cause
+ * the thread to migrate to its bound processor. Otherwise, the migration
+ * will occur at the next quantum expiration or blocking point.
+ *
+ * When the thread is the current thread, and explicit thread_block() should
+ * be used to force the current processor to context switch away and
+ * let the thread migrate to the bound processor.
+ *
+ * Thread must be locked, and at splsched.
+ */
+
+static processor_t
+thread_bind_internal(
+       thread_t                thread,
+       processor_t             processor)
+{
+       processor_t             prev;
+
+       /* <rdar://problem/15102234> */
+       assert(thread->sched_pri < BASEPRI_RTQUEUES);
+       /* A thread can't be bound if it's sitting on a (potentially incorrect) runqueue */
+       assert(thread->runq == PROCESSOR_NULL);
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_THREAD_BIND), thread_tid(thread), processor ? (uintptr_t)processor->cpu_id : (uintptr_t)-1, 0, 0, 0);
+
+       prev = thread->bound_processor;
+       thread->bound_processor = processor;
+
+       return (prev);
+}
+
+/*
+ * thread_vm_bind_group_add:
+ *
+ * The "VM bind group" is a special mechanism to mark a collection
+ * of threads from the VM subsystem that, in general, should be scheduled
+ * with only one CPU of parallelism. To accomplish this, we initially
+ * bind all the threads to the master processor, which has the effect
+ * that only one of the threads in the group can execute at once, including
+ * preempting threads in the group that are a lower priority. Future
+ * mechanisms may use more dynamic mechanisms to prevent the collection
+ * of VM threads from using more CPU time than desired.
+ *
+ * The current implementation can result in priority inversions where
+ * compute-bound priority 95 or realtime threads that happen to have
+ * landed on the master processor prevent the VM threads from running.
+ * When this situation is detected, we unbind the threads for one
+ * scheduler tick to allow the scheduler to run the threads an
+ * additional CPUs, before restoring the binding (assuming high latency
+ * is no longer a problem).
+ */
+
+/*
+ * The current max is provisioned for:
+ * vm_compressor_swap_trigger_thread (92)
+ * 2 x vm_pageout_iothread_internal (92) when vm_restricted_to_single_processor==TRUE
+ * vm_pageout_continue (92)
+ * memorystatus_thread (95)
+ */
+#define MAX_VM_BIND_GROUP_COUNT (5)
+decl_simple_lock_data(static,sched_vm_group_list_lock);
+static thread_t sched_vm_group_thread_list[MAX_VM_BIND_GROUP_COUNT];
+static int sched_vm_group_thread_count;
+static boolean_t sched_vm_group_temporarily_unbound = FALSE;
+
+void
+thread_vm_bind_group_add(void)
+{
+       thread_t self = current_thread();
+
+       thread_reference_internal(self);
+       self->options |= TH_OPT_SCHED_VM_GROUP;
+
+       simple_lock(&sched_vm_group_list_lock);
+       assert(sched_vm_group_thread_count < MAX_VM_BIND_GROUP_COUNT);
+       sched_vm_group_thread_list[sched_vm_group_thread_count++] = self;
+       simple_unlock(&sched_vm_group_list_lock);
+
+       thread_bind(master_processor);
+
+       /* Switch to bound processor if not already there */
+       thread_block(THREAD_CONTINUE_NULL);
+}
+
+static void
+sched_vm_group_maintenance(void)
+{
+       uint64_t ctime = mach_absolute_time();
+       uint64_t longtime = ctime - sched_tick_interval;
+       int i;
+       spl_t s;
+       boolean_t high_latency_observed = FALSE;
+       boolean_t runnable_and_not_on_runq_observed = FALSE;
+       boolean_t bind_target_changed = FALSE;
+       processor_t bind_target = PROCESSOR_NULL;
+
+       /* Make sure nobody attempts to add new threads while we are enumerating them */
+       simple_lock(&sched_vm_group_list_lock);
+
+       s = splsched();
+
+       for (i=0; i < sched_vm_group_thread_count; i++) {
+               thread_t thread = sched_vm_group_thread_list[i];
+               assert(thread != THREAD_NULL);
+               thread_lock(thread);
+               if ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN) {
+                       if (thread->runq != PROCESSOR_NULL && thread->last_made_runnable_time < longtime) {
+                               high_latency_observed = TRUE;
+                       } else if (thread->runq == PROCESSOR_NULL) {
+                               /* There are some cases where a thread be transitiong that also fall into this case */
+                               runnable_and_not_on_runq_observed = TRUE;
+                       }
+               }
+               thread_unlock(thread);
+
+               if (high_latency_observed && runnable_and_not_on_runq_observed) {
+                       /* All the things we are looking for are true, stop looking */
+                       break;
+               }
+       }
+
+       splx(s);
+
+       if (sched_vm_group_temporarily_unbound) {
+               /* If we turned off binding, make sure everything is OK before rebinding */
+               if (!high_latency_observed) {
+                       /* rebind */
+                       bind_target_changed = TRUE;
+                       bind_target = master_processor;
+                       sched_vm_group_temporarily_unbound = FALSE; /* might be reset to TRUE if change cannot be completed */
+               }
+       } else {
+               /*
+                * Check if we're in a bad state, which is defined by high
+                * latency with no core currently executing a thread. If a
+                * single thread is making progress on a CPU, that means the
+                * binding concept to reduce parallelism is working as
+                * designed.
+                */
+               if (high_latency_observed && !runnable_and_not_on_runq_observed) {
+                       /* unbind */
+                       bind_target_changed = TRUE;
+                       bind_target = PROCESSOR_NULL;
+                       sched_vm_group_temporarily_unbound = TRUE;
+               }
+       }
+
+       if (bind_target_changed) {
+               s = splsched();
+               for (i=0; i < sched_vm_group_thread_count; i++) {
+                       thread_t thread = sched_vm_group_thread_list[i];
+                       boolean_t removed;
+                       assert(thread != THREAD_NULL);
+
+                       thread_lock(thread);
+                       removed = thread_run_queue_remove(thread);
+                       if (removed || ((thread->state & (TH_RUN | TH_WAIT)) == TH_WAIT)) {
+                               thread_bind_internal(thread, bind_target);
+                       } else {
+                               /*
+                                * Thread was in the middle of being context-switched-to,
+                                * or was in the process of blocking. To avoid switching the bind
+                                * state out mid-flight, defer the change if possible.
+                                */
+                               if (bind_target == PROCESSOR_NULL) {
+                                       thread_bind_internal(thread, bind_target);
+                               } else {
+                                       sched_vm_group_temporarily_unbound = TRUE; /* next pass will try again */
+                               }
+                       }
+
+                       if (removed) {
+                               thread_run_queue_reinsert(thread, SCHED_PREEMPT | SCHED_TAILQ);
+                       }
+                       thread_unlock(thread);
+               }
+               splx(s);
+       }
+
+       simple_unlock(&sched_vm_group_list_lock);
+}
+
 /* Invoked prior to idle entry to determine if, on SMT capable processors, an SMT
  * rebalancing opportunity exists when a core is (instantaneously) idle, but
  * other SMT-capable cores may be over-committed. TODO: some possible negatives:
@@ -1718,11 +1634,12 @@ thread_bind(
  * followed by a wakeup shortly thereafter.
  */
 
-/* Invoked with pset locked, returns with pset unlocked */
 #if (DEVELOPMENT || DEBUG)
 int sched_smt_balance = 1;
 #endif
 
+#if __SMP__
+/* Invoked with pset locked, returns with pset unlocked */
 static void
 sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) {
        processor_t ast_processor = NULL;
@@ -1753,7 +1670,7 @@ sched_SMT_balance(processor_t cprocessor, processor_set_t cpset) {
                    (sprocessor->processor_primary != sprocessor) &&
                    (sprocessor->processor_primary->state == PROCESSOR_RUNNING) &&
                    (sprocessor->current_pri < BASEPRI_RTQUEUES) &&
-                   ((cpset->pending_AST_cpu_mask & (1U << sprocessor->cpu_id)) == 0)) {
+                   ((cpset->pending_AST_cpu_mask & (1ULL << sprocessor->cpu_id)) == 0)) {
                        assert(sprocessor != cprocessor);
                        ast_processor = sprocessor;
                        break;
@@ -1769,6 +1686,7 @@ smt_balance_exit:
                cause_ast_check(ast_processor);
        }
 }
+#endif /* __SMP__ */
 
 /*
  *     thread_select:
@@ -1787,6 +1705,7 @@ thread_select(
        thread_t                        new_thread = THREAD_NULL;
 
        assert(processor == current_processor());
+       assert((thread->state & (TH_RUN|TH_TERMINATE2)) == TH_RUN);
 
        do {
                /*
@@ -1803,7 +1722,15 @@ thread_select(
 
                assert(processor->state != PROCESSOR_OFF_LINE);
 
-               if (processor->processor_primary != processor) {
+               if (!processor->is_recommended) {
+                       /*
+                        * The performance controller has provided a hint to not dispatch more threads,
+                        * unless they are bound to us (and thus we are the only option
+                        */
+                       if (!SCHED(processor_bound_count)(processor)) {
+                               goto idle;
+                       }
+               } else if (processor->processor_primary != processor) {
                        /*
                         * Should this secondary SMT processor attempt to find work? For pset runqueue systems,
                         * we should look for work only under the same conditions that choose_processor()
@@ -1817,117 +1744,118 @@ thread_select(
                        }
                }
 
-               simple_lock(&rt_lock);
+               rt_lock_lock();
 
                /*
                 *      Test to see if the current thread should continue
-                *      to run on this processor.  Must be runnable, and not
+                *      to run on this processor.  Must not be attempting to wait, and not
                 *      bound to a different processor, nor be in the wrong
-                *      processor set.
+                *      processor set, nor be forced to context switch by TH_SUSP.
+                *
+                *      Note that there are never any RT threads in the regular runqueue.
+                *
+                *      This code is very insanely tricky.
                 */
-               if (((thread->state & ~TH_SUSP) == TH_RUN) &&
+
+               if (((thread->state & (TH_TERMINATE|TH_IDLE|TH_WAIT|TH_RUN|TH_SUSP)) == TH_RUN) &&
                    (thread->sched_pri >= BASEPRI_RTQUEUES     || processor->processor_primary == processor) &&
                    (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor)      &&
                    (thread->affinity_set == AFFINITY_SET_NULL || thread->affinity_set->aset_pset == pset)) {
-                       if (thread->sched_pri >= BASEPRI_RTQUEUES && first_timeslice(processor)) {
+                       /*
+                        * RT threads with un-expired quantum stay on processor,
+                        * unless there's a valid RT thread with an earlier deadline.
+                        */
+                       if (thread->sched_pri >= BASEPRI_RTQUEUES && processor->first_timeslice) {
                                if (rt_runq.count > 0) {
                                        thread_t next_rt;
 
                                        next_rt = (thread_t)queue_first(&rt_runq.queue);
+
+                                       assert(next_rt->runq == THREAD_ON_RT_RUNQ);
+
                                        if (next_rt->realtime.deadline < processor->deadline &&
-                                          (next_rt->bound_processor == PROCESSOR_NULL || next_rt->bound_processor == processor)) {
-                                               thread = (thread_t)dequeue_head(&rt_runq.queue);
-                                               thread->runq = PROCESSOR_NULL;
-                                               SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
-                                               rt_runq.count--;
+                                           (next_rt->bound_processor == PROCESSOR_NULL ||
+                                            next_rt->bound_processor == processor)) {
+                                               /* The next RT thread is better, so pick it off the runqueue. */
+                                               goto pick_new_rt_thread;
                                        }
                                }
 
-                               simple_unlock(&rt_lock);
-
+                               /* This is still the best RT thread to run. */
                                processor->deadline = thread->realtime.deadline;
 
+                               rt_lock_unlock();
                                pset_unlock(pset);
 
                                return (thread);
                        }
 
-                       if ((thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) && (new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri, reason)) == THREAD_NULL) {
-
-                               simple_unlock(&rt_lock);
-
+                       if ((rt_runq.count == 0) &&
+                           SCHED(processor_queue_has_priority)(processor, thread->sched_pri, TRUE) == FALSE) {
                                /* This thread is still the highest priority runnable (non-idle) thread */
-
                                processor->deadline = UINT64_MAX;
 
+                               rt_lock_unlock();
                                pset_unlock(pset);
 
                                return (thread);
                        }
                }
 
-               if (new_thread != THREAD_NULL ||
-                               (SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) &&
-                                        (new_thread = SCHED(choose_thread)(processor, MINPRI, reason)) != THREAD_NULL)) {
-                               simple_unlock(&rt_lock);
+               /* OK, so we're not going to run the current thread. Look at the RT queue. */
+               if (rt_runq.count > 0) {
+                       thread_t next_rt = (thread_t)queue_first(&rt_runq.queue);
 
-                               processor->deadline = UINT64_MAX;
-                               pset_unlock(pset);
-
-                               return (new_thread);
-               }
-
-               if (rt_runq.count > 0) {
-                       thread_t next_rt = (thread_t)queue_first(&rt_runq.queue);
+                       assert(next_rt->runq == THREAD_ON_RT_RUNQ);
 
-                       if (__probable((next_rt->bound_processor == NULL || (next_rt->bound_processor == processor)))) {
-                               thread = (thread_t)dequeue_head(&rt_runq.queue);
+                       if (__probable((next_rt->bound_processor == PROCESSOR_NULL ||
+                                      (next_rt->bound_processor == processor)))) {
+pick_new_rt_thread:
+                               new_thread = (thread_t)dequeue_head(&rt_runq.queue);
 
-                               thread->runq = PROCESSOR_NULL;
+                               new_thread->runq = PROCESSOR_NULL;
                                SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
                                rt_runq.count--;
 
-                               simple_unlock(&rt_lock);
+                               processor->deadline = new_thread->realtime.deadline;
 
-                               processor->deadline = thread->realtime.deadline;
+                               rt_lock_unlock();
                                pset_unlock(pset);
 
-                               return (thread);
+                               return (new_thread);
                        }
                }
 
-               simple_unlock(&rt_lock);
-
-               /* No realtime threads and no normal threads on the per-processor
-                * runqueue. Finally check for global fairshare threads.
-                */
-               if ((new_thread = SCHED(fairshare_dequeue)()) != THREAD_NULL) {
+               processor->deadline = UINT64_MAX;
+               rt_lock_unlock();
 
-                       processor->deadline = UINT64_MAX;
+               /* No RT threads, so let's look at the regular threads. */
+               if ((new_thread = SCHED(choose_thread)(processor, MINPRI, reason)) != THREAD_NULL) {
                        pset_unlock(pset);
-                       
                        return (new_thread);
                }
-                       
-               processor->deadline = UINT64_MAX;
 
-               /*
-                *      No runnable threads, attempt to steal
-                *      from other processors.
-                */
-               new_thread = SCHED(steal_thread)(pset);
-               if (new_thread != THREAD_NULL) {
-                       return (new_thread);
-               }
+#if __SMP__
+               if (SCHED(steal_thread_enabled)) {
+                       /*
+                        * No runnable threads, attempt to steal
+                        * from other processors. Returns with pset lock dropped.
+                        */
 
-               /*
-                *      If other threads have appeared, shortcut
-                *      around again.
-                */
-               if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0 || SCHED(fairshare_runq_count)() > 0)
-                       continue;
+                       if ((new_thread = SCHED(steal_thread)(pset)) != THREAD_NULL) {
+                               return (new_thread);
+                       }
 
-               pset_lock(pset);
+                       /*
+                        * If other threads have appeared, shortcut
+                        * around again.
+                        */
+                       if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0)
+                               continue;
+
+                       pset_lock(pset);
+               }
+#endif
 
        idle:
                /*
@@ -1946,8 +1874,12 @@ thread_select(
                        }
                }
 
+#if __SMP__
                /* Invoked with pset locked, returns with pset unlocked */
                sched_SMT_balance(processor, pset);
+#else
+               pset_unlock(pset);
+#endif
 
 #if CONFIG_SCHED_IDLE_IN_PLACE
                /*
@@ -1973,7 +1905,7 @@ thread_select(
                 * thread can start running on another processor without
                 * waiting for the fast-idled processor to wake up.
                 */
-               return (processor->idle_thread);
+               new_thread = processor->idle_thread;
 
 #endif /* !CONFIG_SCHED_IDLE_IN_PLACE */
 
@@ -2034,11 +1966,11 @@ thread_select_idle(
         *      Cancel the quantum timer while idling.
         */
        timer_call_cancel(&processor->quantum_timer);
-       processor->timeslice = 0;
+       processor->first_timeslice = FALSE;
 
        (*thread->sched_call)(SCHED_CALL_BLOCK, thread);
 
-       thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL);
+       thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, 0, NULL);
 
        /*
         *      Enable interrupts and perform idling activities.  No
@@ -2065,23 +1997,16 @@ thread_select_idle(
                thread_quantum_init(thread);
                processor->quantum_end = processor->last_dispatch + thread->quantum_remaining;
                timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
-               processor->timeslice = 1;
+               processor->first_timeslice = TRUE;
 
                thread->computation_epoch = processor->last_dispatch;
        }
 
        thread->state &= ~TH_IDLE;
 
-       /*
-        * If we idled in place, simulate a context switch back
-        * to the original priority of the thread so that the
-        * platform layer cannot distinguish this from a true
-        * switch to the idle thread.
-        */
-
        urgency = thread_get_urgency(thread, &arg1, &arg2);
 
-       thread_tell_urgency(urgency, arg1, arg2, new_thread);
+       thread_tell_urgency(urgency, arg1, arg2, 0, new_thread);
 
        sched_run_incr(thread);
        if (thread->sched_mode == TH_MODE_TIMESHARE) {
@@ -2095,91 +2020,15 @@ thread_select_idle(
 }
 #endif /* CONFIG_SCHED_IDLE_IN_PLACE */
 
-#if defined(CONFIG_SCHED_TRADITIONAL) 
-static thread_t
-sched_traditional_choose_thread(
-                                processor_t     processor,
-                                int             priority,
-                       __unused ast_t           reason)
-{
-       thread_t thread;
-       
-       thread = choose_thread_from_runq(processor, runq_for_processor(processor), priority);
-       if (thread != THREAD_NULL) {
-               runq_consider_decr_bound_count(processor, thread);
-       }
-       
-       return thread;
-}
-
-#endif /* defined(CONFIG_SCHED_TRADITIONAL)  */
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
 /*
- *     choose_thread_from_runq:
+ * thread_invoke
  *
- *     Locate a thread to execute from the processor run queue
- *     and return it.  Only choose a thread with greater or equal
- *     priority.
+ * Called at splsched with neither thread locked.
  *
- *     Associated pset must be locked.  Returns THREAD_NULL
- *     on failure.
- */
-thread_t
-choose_thread_from_runq(
-       processor_t             processor,
-       run_queue_t             rq,
-       int                             priority)
-{
-       queue_t                 queue = rq->queues + rq->highq;
-       int                             pri = rq->highq, count = rq->count;
-       thread_t                thread;
-
-       while (count > 0 && pri >= priority) {
-               thread = (thread_t)queue_first(queue);
-               while (!queue_end(queue, (queue_entry_t)thread)) {
-                       if (thread->bound_processor == PROCESSOR_NULL ||
-                                                       thread->bound_processor == processor) {
-                               remqueue((queue_entry_t)thread);
-
-                               thread->runq = PROCESSOR_NULL;
-                               SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
-                               rq->count--;
-                               if (SCHED(priority_is_urgent)(pri)) {
-                                       rq->urgency--; assert(rq->urgency >= 0);
-                               }
-                               if (queue_empty(queue)) {
-                                       if (pri != IDLEPRI)
-                                               clrbit(MAXPRI - pri, rq->bitmap);
-                                       rq->highq = MAXPRI - ffsbit(rq->bitmap);
-                               }
-
-                               return (thread);
-                       }
-                       count--;
-
-                       thread = (thread_t)queue_next((queue_entry_t)thread);
-               }
-
-               queue--; pri--;
-       }
-
-       return (THREAD_NULL);
-}
-
-#endif /* defined(CONFIG_SCHED_TRADITIONAL) */
-
-/*
- *     Perform a context switch and start executing the new thread.
+ * Perform a context switch and start executing the new thread.
  *
- *     Returns FALSE on failure, and the thread is re-dispatched.
- *
- *     Called at splsched.
- */
-
-/*
- * thread_invoke
+ * Returns FALSE when the context switch didn't happen.
+ * The reference to the new thread is still consumed.
  *
  * "self" is what is currently running on the processor,
  * "thread" is the new thread to context switch to
@@ -2191,15 +2040,6 @@ thread_invoke(
        thread_t                        thread,
        ast_t                           reason)
 {
-       thread_continue_t       continuation = self->continuation;
-       void                    *parameter = self->parameter;
-       processor_t             processor;
-       uint64_t                ctime = mach_absolute_time();
-
-#ifdef CONFIG_MACH_APPROXIMATE_TIME
-       commpage_update_mach_approximate_time(ctime);
-#endif
-
        if (__improbable(get_preemption_level() != 0)) {
                int pl = get_preemption_level();
                panic("thread_invoke: preemption_level %d, possible cause: %s",
@@ -2207,33 +2047,37 @@ thread_invoke(
                        "blocking while holding a spinlock, or within interrupt context"));
        }
 
+       thread_continue_t       continuation = self->continuation;
+       void                    *parameter   = self->parameter;
+       processor_t             processor;
+
+       uint64_t                ctime = mach_absolute_time();
+
+#ifdef CONFIG_MACH_APPROXIMATE_TIME
+       commpage_update_mach_approximate_time(ctime);
+#endif
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+       sched_timeshare_consider_maintenance(ctime);
+#endif
+
        assert(self == current_thread());
        assert(self->runq == PROCESSOR_NULL);
+       assert((self->state & (TH_RUN|TH_TERMINATE2)) == TH_RUN);
 
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
-       sched_traditional_consider_maintenance(ctime);
-#endif /* CONFIG_SCHED_TIMESHARE_CORE */       
-       
-       /*
-        * Mark thread interruptible.
-        */
        thread_lock(thread);
-       thread->state &= ~TH_UNINT;
 
-       assert(thread_runnable(thread));
+       assert((thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_TERMINATE|TH_TERMINATE2)) == TH_RUN);
        assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == current_processor());
        assert(thread->runq == PROCESSOR_NULL);
 
        /* Reload precise timing global policy to thread-local policy */
        thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
-       
+
        /* Update SFI class based on other factors */
        thread->sfi_class = sfi_thread_classify(thread);
 
-       /*
-        * Allow time constraint threads to hang onto
-        * a stack.
-        */
+       /* Allow realtime threads to hang onto a stack. */
        if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack)
                self->reserved_stack = self->kernel_stack;
 
@@ -2266,6 +2110,7 @@ thread_invoke(
                        thread->last_processor = processor;
                        thread->c_switch++;
                        ast_context(thread);
+
                        thread_unlock(thread);
 
                        self->reason = reason;
@@ -2290,7 +2135,7 @@ thread_invoke(
                                self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
                        if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) {
-                               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
+                               SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
                                                (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
                        }
 
@@ -2301,6 +2146,9 @@ thread_invoke(
                        TLOG(1, "thread_invoke: calling stack_handoff\n");
                        stack_handoff(self, thread);
 
+                       /* 'self' is now off core */
+                       assert(thread == current_thread());
+
                        DTRACE_SCHED(on__cpu);
 
                        thread_dispatch(self, thread);
@@ -2319,6 +2167,7 @@ thread_invoke(
                        /* same thread but with continuation */
                        ast_context(self);
                        counter(++c_thread_invoke_same);
+
                        thread_unlock(self);
 
                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
@@ -2332,8 +2181,7 @@ thread_invoke(
                        call_continuation(continuation, parameter, self->wait_result);
                        /*NOTREACHED*/
                }
-       }
-       else {
+       } else {
                /*
                 * Check that the other thread has a stack
                 */
@@ -2345,8 +2193,7 @@ need_stack:
                                thread_stack_enqueue(thread);
                                return (FALSE);
                        }
-               }
-               else if (thread == self) {
+               } else if (thread == self) {
                        ast_context(self);
                        counter(++c_thread_invoke_same);
                        thread_unlock(self);
@@ -2375,11 +2222,11 @@ need_stack:
        thread->last_processor = processor;
        thread->c_switch++;
        ast_context(thread);
+
        thread_unlock(thread);
 
        counter(c_thread_invoke_csw++);
 
-       assert(self->runq == PROCESSOR_NULL);
        self->reason = reason;
 
        processor->last_dispatch = ctime;
@@ -2396,13 +2243,13 @@ need_stack:
                                        ctime,
                                         PROCESSOR_DATA(processor, current_state));
        }
-       
+
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
                self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
        if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
-               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
+               SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
                                (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
        }
 
@@ -2414,6 +2261,16 @@ need_stack:
         * This is where we actually switch register context,
         * and address space if required.  We will next run
         * as a result of a subsequent context switch.
+        *
+        * Once registers are switched and the processor is running "thread",
+        * the stack variables and non-volatile registers will contain whatever
+        * was there the last time that thread blocked. No local variables should
+        * be used after this point, except for the special case of "thread", which
+        * the platform layer returns as the previous thread running on the processor
+        * via the function call ABI as a return register, and "self", which may have
+        * been stored on the stack or a non-volatile register, but a stale idea of
+        * what was on the CPU is newly-accurate because that thread is again
+        * running on the CPU.
         */
        assert(continuation == self->continuation);
        thread = machine_switch_context(self, continuation, thread);
@@ -2439,6 +2296,105 @@ need_stack:
        return (TRUE);
 }
 
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+/*
+ *     pset_cancel_deferred_dispatch:
+ *
+ *     Cancels all ASTs that we can cancel for the given processor set
+ *     if the current processor is running the last runnable thread in the
+ *     system.
+ *
+ *     This function assumes the current thread is runnable.  This must
+ *     be called with the pset unlocked.
+ */
+static void
+pset_cancel_deferred_dispatch(
+       processor_set_t         pset,
+       processor_t             processor)
+{
+       processor_t             active_processor = NULL;
+       uint32_t                sampled_sched_run_count;
+
+       pset_lock(pset);
+       sampled_sched_run_count = (volatile uint32_t) sched_run_count;
+
+       /*
+        * If we have emptied the run queue, and our current thread is runnable, we
+        * should tell any processors that are still DISPATCHING that they will
+        * probably not have any work to do.  In the event that there are no
+        * pending signals that we can cancel, this is also uninteresting.
+        *
+        * In the unlikely event that another thread becomes runnable while we are
+        * doing this (sched_run_count is atomically updated, not guarded), the
+        * codepath making it runnable SHOULD (a dangerous word) need the pset lock
+        * in order to dispatch it to a processor in our pset.  So, the other
+        * codepath will wait while we squash all cancelable ASTs, get the pset
+        * lock, and then dispatch the freshly runnable thread.  So this should be
+        * correct (we won't accidentally have a runnable thread that hasn't been
+        * dispatched to an idle processor), if not ideal (we may be restarting the
+        * dispatch process, which could have some overhead).
+        *
+        */
+       if ((sampled_sched_run_count == 1) &&
+           (pset->pending_deferred_AST_cpu_mask)) {
+               qe_foreach_element_safe(active_processor, &pset->active_queue, processor_queue) {
+                       /*
+                        * If a processor is DISPATCHING, it could be because of
+                        * a cancelable signal.
+                        *
+                        * IF the processor is not our
+                        * current processor (the current processor should not
+                        * be DISPATCHING, so this is a bit paranoid), AND there
+                        * is a cancelable signal pending on the processor, AND
+                        * there is no non-cancelable signal pending (as there is
+                        * no point trying to backtrack on bringing the processor
+                        * up if a signal we cannot cancel is outstanding), THEN
+                        * it should make sense to roll back the processor state
+                        * to the IDLE state.
+                        *
+                        * If the racey nature of this approach (as the signal
+                        * will be arbitrated by hardware, and can fire as we
+                        * roll back state) results in the core responding
+                        * despite being pushed back to the IDLE state, it
+                        * should be no different than if the core took some
+                        * interrupt while IDLE.
+                        */
+                       if ((active_processor->state == PROCESSOR_DISPATCHING) &&
+                           (pset->pending_deferred_AST_cpu_mask & (1ULL << active_processor->cpu_id)) &&
+                           (!(pset->pending_AST_cpu_mask & (1ULL << active_processor->cpu_id))) &&
+                           (active_processor != processor)) {
+                               /*
+                                * Squash all of the processor state back to some
+                                * reasonable facsimile of PROCESSOR_IDLE.
+                                *
+                                * TODO: What queue policy do we actually want here?
+                                * We want to promote selection of a good processor
+                                * to run on.  Do we want to enqueue at the head?
+                                * The tail?  At the (relative) old position in the
+                                * queue?  Or something else entirely?
+                                */
+                               re_queue_head(&pset->idle_queue, (queue_entry_t)active_processor);
+
+                               assert(active_processor->next_thread == THREAD_NULL);
+
+                               active_processor->current_pri = IDLEPRI;
+                               active_processor->current_thmode = TH_MODE_FIXED;
+                               active_processor->current_sfi_class = SFI_CLASS_KERNEL;
+                               active_processor->deadline = UINT64_MAX;
+                               active_processor->state = PROCESSOR_IDLE;
+                               pset->pending_deferred_AST_cpu_mask &= ~(1U << active_processor->cpu_id);
+                               machine_signal_idle_cancel(active_processor);
+                       }
+
+               }
+       }
+
+       pset_unlock(pset);
+}
+#else
+/* We don't support deferred ASTs; everything is candycanes and sunshine. */
+#endif
+
 /*
  *     thread_dispatch:
  *
@@ -2447,8 +2403,8 @@ need_stack:
  *     special actions.  Update quantum for other thread and begin
  *     the quantum for ourselves.
  *
- *     "self" is our new current thread that we have context switched
- *     to, "thread" is the old thread that we have switched away from.
+ *      "thread" is the old thread that we have switched away from.
+ *      "self" is the new current thread that we have context switched to
  *
  *     Called at splsched.
  */
@@ -2459,6 +2415,10 @@ thread_dispatch(
 {
        processor_t             processor = self->last_processor;
 
+       assert(processor == current_processor());
+       assert(self == current_thread());
+       assert(thread != self);
+
        if (thread != THREAD_NULL) {
                /*
                 *      If blocked at a continuation, discard
@@ -2467,7 +2427,11 @@ thread_dispatch(
                if (thread->continuation != NULL && thread->kernel_stack != 0)
                        stack_free(thread);
 
-               if (!(thread->state & TH_IDLE)) {
+               if (thread->state & TH_IDLE) {
+                       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
+                               (uintptr_t)thread_tid(thread), 0, thread->state, sched_run_count, 0);
+               } else {
                        int64_t consumed;
                        int64_t remainder = 0;
 
@@ -2503,7 +2467,7 @@ thread_dispatch(
                        /*
                         *      Compute remainder of current quantum.
                         */
-                       if (first_timeslice(processor) &&
+                       if (processor->first_timeslice &&
                            processor->quantum_end > processor->last_dispatch)
                                thread->quantum_remaining = (uint32_t)remainder;
                        else
@@ -2518,7 +2482,7 @@ thread_dispatch(
                                        thread->realtime.deadline = UINT64_MAX;
                                }
                        } else {
-#if defined(CONFIG_SCHED_TRADITIONAL)
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
                                /*
                                 *      For non-realtime threads treat a tiny
                                 *      remaining quantum as an expired quantum
@@ -2528,7 +2492,7 @@ thread_dispatch(
                                        thread->reason |= AST_QUANTUM;
                                        thread->quantum_remaining += SCHED(initial_quantum_size)(thread);
                                }
-#endif
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
                        }
 
                        /*
@@ -2541,16 +2505,16 @@ thread_dispatch(
                                thread->quantum_remaining = 0;
                        } else {
 #if defined(CONFIG_SCHED_MULTIQ)
-                               if (sched_groups_enabled && thread->sched_group == self->sched_group) {
-                                       /* TODO: Remove tracepoint */
+                               if (SCHED(sched_groups_enabled) &&
+                                   thread->sched_group == self->sched_group) {
                                        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-                                           MACHDBG_CODE(DBG_MACH_SCHED, MACH_QUANTUM_HANDOFF) | DBG_FUNC_NONE,
+                                           MACHDBG_CODE(DBG_MACH_SCHED, MACH_QUANTUM_HANDOFF),
                                            self->reason, (uintptr_t)thread_tid(thread),
                                            self->quantum_remaining, thread->quantum_remaining, 0);
 
                                        self->quantum_remaining = thread->quantum_remaining;
                                        thread->quantum_remaining = 0;
-                                       /*  TODO: Should we set AST_QUANTUM here? */
+                                       /* Don't set AST_QUANTUM here - old thread might still want to preempt someone else */
                                }
 #endif /* defined(CONFIG_SCHED_MULTIQ) */
                        }
@@ -2562,15 +2526,15 @@ thread_dispatch(
 
                                priority = thread->sched_pri;
 
-                               if (priority < thread->priority)
-                                       priority = thread->priority;
+                               if (priority < thread->base_pri)
+                                       priority = thread->base_pri;
                                if (priority < BASEPRI_BACKGROUND)
                                        priority = BASEPRI_BACKGROUND;
 
                                if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
                                        KERNEL_DEBUG_CONSTANT(
                                                MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
-                                               (uintptr_t)thread_tid(thread), thread->sched_pri, thread->priority, priority, 0);
+                                               (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, priority, 0);
 
                                        thread->sched_flags |= TH_SFLAG_RW_PROMOTED;
 
@@ -2581,34 +2545,34 @@ thread_dispatch(
 
                        if (!(thread->state & TH_WAIT)) {
                                /*
-                                *      Still running.
+                                *      Still runnable.
                                 */
+                               thread->last_made_runnable_time = mach_approximate_time();
+
+                               machine_thread_going_off_core(thread, FALSE);
+
                                if (thread->reason & AST_QUANTUM)
                                        thread_setrun(thread, SCHED_TAILQ);
-                               else
-                               if (thread->reason & AST_PREEMPT)
+                               else if (thread->reason & AST_PREEMPT)
                                        thread_setrun(thread, SCHED_HEADQ);
                                else
                                        thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
 
-                               thread->reason = AST_NONE;
-
                                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                        MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
                                        (uintptr_t)thread_tid(thread), thread->reason, thread->state, sched_run_count, 0);
-                               
+
                                if (thread->wake_active) {
                                        thread->wake_active = FALSE;
                                        thread_unlock(thread);
 
                                        thread_wakeup(&thread->wake_active);
-                               }
-                               else
+                               } else {
                                        thread_unlock(thread);
+                               }
 
                                wake_unlock(thread);
-                       }
-                       else {
+                       } else {
                                /*
                                 *      Waiting.
                                 */
@@ -2625,6 +2589,7 @@ thread_dispatch(
                                }
 
                                thread->state &= ~TH_RUN;
+                               thread->last_made_runnable_time = ~0ULL;
                                thread->chosen_processor = PROCESSOR_NULL;
 
                                if (thread->sched_mode == TH_MODE_TIMESHARE) {
@@ -2635,11 +2600,15 @@ thread_dispatch(
                                }
                                new_run_count = sched_run_decr(thread);
 
+#if CONFIG_SCHED_SFI
                                if ((thread->state & (TH_WAIT | TH_TERMINATE)) == TH_WAIT) {
                                        if (thread->reason & AST_SFI) {
                                                thread->wait_sfi_begin_time = processor->last_dispatch;
                                        }
                                }
+#endif
+
+                               machine_thread_going_off_core(thread, should_terminate);
 
                                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                        MACHDBG_CODE(DBG_MACH_SCHED,MACH_DISPATCH) | DBG_FUNC_NONE,
@@ -2652,9 +2621,9 @@ thread_dispatch(
                                        thread_unlock(thread);
 
                                        thread_wakeup(&thread->wake_active);
-                               }
-                               else
+                               } else {
                                        thread_unlock(thread);
+                               }
 
                                wake_unlock(thread);
 
@@ -2664,22 +2633,31 @@ thread_dispatch(
                }
        }
 
+       /* Update (new) current thread and reprogram quantum timer */
+       thread_lock(self);
        if (!(self->state & TH_IDLE)) {
                uint64_t        arg1, arg2;
                int             urgency;
+               uint64_t                latency;
+
+#if CONFIG_SCHED_SFI
                ast_t                   new_ast;
 
-               thread_lock(self);
                new_ast = sfi_thread_needs_ast(self, NULL);
-               thread_unlock(self);
 
                if (new_ast != AST_NONE) {
                        ast_on(new_ast);
                }
+#endif
+
+               assert(processor->last_dispatch >= self->last_made_runnable_time);
+               latency = processor->last_dispatch - self->last_made_runnable_time;
 
                urgency = thread_get_urgency(self, &arg1, &arg2);
 
-               thread_tell_urgency(urgency, arg1, arg2, self);
+               thread_tell_urgency(urgency, arg1, arg2, latency, self);
+
+               machine_thread_going_on_core(self, urgency, latency);
                
                /*
                 *      Get a new quantum if none remaining.
@@ -2694,16 +2672,31 @@ thread_dispatch(
                processor->quantum_end = processor->last_dispatch + self->quantum_remaining;
                timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
 
-               processor->timeslice = 1;
+               processor->first_timeslice = TRUE;
+       } else {
+               timer_call_cancel(&processor->quantum_timer);
+               processor->first_timeslice = FALSE;
 
-               self->computation_epoch = processor->last_dispatch;
+               thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, 0, self);
+               machine_thread_going_on_core(self, THREAD_URGENCY_NONE, 0);
        }
-       else {
-               timer_call_cancel(&processor->quantum_timer);
-               processor->timeslice = 0;
 
-               thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL);
+       self->computation_epoch = processor->last_dispatch;
+       self->reason = AST_NONE;
+
+       thread_unlock(self);
+
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+       /*
+        * TODO: Can we state that redispatching our old thread is also
+        * uninteresting?
+        */
+       if ((((volatile uint32_t)sched_run_count) == 1) &&
+           !(self->state & TH_IDLE)) {
+               pset_cancel_deferred_dispatch(processor->processor_set, processor);
        }
+#endif
+
 }
 
 /*
@@ -2725,10 +2718,10 @@ thread_block_reason(
        void                            *parameter,
        ast_t                           reason)
 {
-       register thread_t               self = current_thread();
-       register processor_t    processor;
-       register thread_t               new_thread;
-       spl_t                                   s;
+       thread_t        self = current_thread();
+       processor_t     processor;
+       thread_t        new_thread;
+       spl_t           s;
 
        counter(++c_thread_block_calls);
 
@@ -2738,7 +2731,7 @@ thread_block_reason(
 
        /* If we're explicitly yielding, force a subsequent quantum */
        if (reason & AST_YIELD)
-               processor->timeslice = 0;
+               processor->first_timeslice = FALSE;
 
        /* We're handling all scheduling AST's */
        ast_off(AST_SCHEDULING);
@@ -2825,11 +2818,11 @@ thread_run(
  */
 void
 thread_continue(
-       register thread_t       thread)
+       thread_t        thread)
 {
-       register thread_t               self = current_thread();
-       register thread_continue_t      continuation;
-       register void                   *parameter;
+       thread_t                self = current_thread();
+       thread_continue_t       continuation;
+       void                    *parameter;
 
        DTRACE_SCHED(on__cpu);
 
@@ -2858,10 +2851,8 @@ thread_quantum_init(thread_t thread)
        }
 }
 
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
-
 uint32_t
-sched_traditional_initial_quantum_size(thread_t thread)
+sched_timeshare_initial_quantum_size(thread_t thread)
 {
        if ((thread == THREAD_NULL) || !(thread->sched_flags & TH_SFLAG_THROTTLED))
                return std_quantum;
@@ -2869,21 +2860,6 @@ sched_traditional_initial_quantum_size(thread_t thread)
                return bg_quantum;
 }
 
-#endif /* CONFIG_SCHED_TIMESHARE_CORE */
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static sched_mode_t
-sched_traditional_initial_thread_sched_mode(task_t parent_task)
-{
-       if (parent_task == kernel_task)
-               return TH_MODE_FIXED;
-       else
-               return TH_MODE_TIMESHARE;
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
 /*
  *     run_queue_init:
  *
@@ -2904,87 +2880,6 @@ run_queue_init(
                queue_init(&rq->queues[i]);
 }
 
-#if defined(CONFIG_SCHED_FAIRSHARE_CORE)
-int
-sched_traditional_fairshare_runq_count(void)
-{
-       return fs_runq.count;
-}
-
-uint64_t
-sched_traditional_fairshare_runq_stats_count_sum(void)
-{
-       return fs_runq.runq_stats.count_sum;
-}
-
-void
-sched_traditional_fairshare_enqueue(thread_t thread)
-{
-       queue_t                         queue = &fs_runq.queue;
-       
-       simple_lock(&fs_lock);
-       
-       enqueue_tail(queue, (queue_entry_t)thread);
-       
-       thread->runq = FS_RUNQ;
-       SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
-       fs_runq.count++;
-       
-       simple_unlock(&fs_lock);        
-}
-
-thread_t
-sched_traditional_fairshare_dequeue(void)
-{
-       thread_t thread;
-       
-       simple_lock(&fs_lock);
-       if (fs_runq.count > 0) {
-               thread = (thread_t)dequeue_head(&fs_runq.queue);
-               
-               thread->runq = PROCESSOR_NULL;
-               SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
-               fs_runq.count--;
-               
-               simple_unlock(&fs_lock);
-               
-               return (thread);
-       }
-       simple_unlock(&fs_lock);                
-
-       return THREAD_NULL;
-}
-
-boolean_t
-sched_traditional_fairshare_queue_remove(thread_t thread)
-{
-       queue_t                 q;
-
-       simple_lock(&fs_lock);
-       q = &fs_runq.queue;
-       
-       if (FS_RUNQ == thread->runq) {
-               remqueue((queue_entry_t)thread);
-               SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
-               fs_runq.count--;
-               
-               thread->runq = PROCESSOR_NULL;
-               simple_unlock(&fs_lock);
-               return (TRUE);
-       }
-       else {
-               /*
-                *      The thread left the run queue before we could
-                *      lock the run queue.
-                */
-               assert(thread->runq == PROCESSOR_NULL);
-               simple_unlock(&fs_lock);
-               return (FALSE);
-       }       
-}
-
-#endif /* CONFIG_SCHED_FAIRSHARE_CORE */
-
 /*
  *     run_queue_dequeue:
  *
@@ -3094,33 +2989,27 @@ run_queue_remove(
        thread->runq = PROCESSOR_NULL;
 }
 
-/*
- *     fairshare_setrun:
- *
- *     Dispatch a thread for round-robin execution.
- *
- *     Thread must be locked.  Associated pset must
- *     be locked, and is returned unlocked.
- */
-static void
-fairshare_setrun(
-                                 processor_t                   processor,
-                                 thread_t                      thread)
+/* Assumes RT lock is not held, and acquires splsched/rt_lock itself */
+void
+rt_runq_scan(sched_update_scan_context_t scan_context)
 {
-       processor_set_t         pset = processor->processor_set;
-               
-       thread->chosen_processor = processor;
-
-       SCHED(fairshare_enqueue)(thread);
-       
-       pset_unlock(pset);
+       spl_t           s;
+       thread_t        thread;
 
-       if (processor != current_processor())
-               machine_signal_idle(processor);
+       s = splsched();
+       rt_lock_lock();
 
+       qe_foreach_element_safe(thread, &rt_runq.queue, links) {
+               if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
+                       scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
+               }
+       }
 
+       rt_lock_unlock();
+       splx(s);
 }
 
+
 /*
  *     realtime_queue_insert:
  *
@@ -3134,7 +3023,7 @@ realtime_queue_insert(
        uint64_t                        deadline = thread->realtime.deadline;
        boolean_t                       preempt = FALSE;
 
-       simple_lock(&rt_lock);
+       rt_lock_lock();
 
        if (queue_empty(queue)) {
                enqueue_tail(queue, (queue_entry_t)thread);
@@ -3159,11 +3048,11 @@ realtime_queue_insert(
                insque((queue_entry_t)thread, (queue_entry_t)entry);
        }
 
-       thread->runq = RT_RUNQ;
+       thread->runq = THREAD_ON_RT_RUNQ;
        SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
        rt_runq.count++;
 
-       simple_unlock(&rt_lock);
+       rt_lock_unlock();
 
        return (preempt);
 }
@@ -3207,9 +3096,9 @@ realtime_setrun(
                processor->state = PROCESSOR_DISPATCHING;
 
                if (processor != current_processor()) {
-                       if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
+                       if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
                                /* cleared on exit from main processor_idle() loop */
-                               pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
+                               pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
                                do_signal_idle = TRUE;
                        }
                }
@@ -3243,9 +3132,9 @@ realtime_setrun(
                        if (processor == current_processor()) {
                                ast_on(preempt);
                        } else {
-                               if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
+                               if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
                                        /* cleared on exit from main processor_idle() loop */
-                                       pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
+                                       pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
                                        do_signal_idle = TRUE;
                                }
                        }
@@ -3260,9 +3149,9 @@ realtime_setrun(
                        if (processor == current_processor()) {
                                ast_on(preempt);
                        } else {
-                               if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
+                               if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
                                        /* cleared after IPI causes csw_check() to be called */
-                                       pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
+                                       pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
                                        do_cause_ast = TRUE;
                                }
                        }
@@ -3291,37 +3180,6 @@ priority_is_urgent(int priority)
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-/*
- *     processor_enqueue:
- *
- *     Enqueue thread on a processor run queue.  Thread must be locked,
- *     and not already be on a run queue.
- *
- *     Returns TRUE if a preemption is indicated based on the state
- *     of the run queue.
- *
- *     The run queue must be locked (see thread_run_queue_remove()
- *     for more info).
- */
-static boolean_t
-processor_enqueue(
-       processor_t             processor,
-       thread_t                thread,
-       integer_t               options)
-{
-       run_queue_t             rq = runq_for_processor(processor);
-       boolean_t               result;
-       
-       result = run_queue_enqueue(rq, thread, options);
-       thread->runq = processor;
-       runq_consider_incr_bound_count(processor, thread);
-
-       return (result);
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
 /*
  *     processor_setrun:
  *
@@ -3340,8 +3198,9 @@ processor_setrun(
        processor_set_t         pset = processor->processor_set;
        ast_t                           preempt;
        enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing;
+       enum { eNoSignal, eDoSignal, eDoDeferredSignal } do_signal_idle = eNoSignal;
 
-       boolean_t do_signal_idle = FALSE, do_cause_ast = FALSE;
+       boolean_t do_cause_ast = FALSE;
 
        thread->chosen_processor = processor;
 
@@ -3361,14 +3220,15 @@ processor_setrun(
                processor->deadline = UINT64_MAX;
                processor->state = PROCESSOR_DISPATCHING;
 
-               if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
+               if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
                        /* cleared on exit from main processor_idle() loop */
-                       pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
-                       do_signal_idle = TRUE;
+                       pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
+                       do_signal_idle = eDoSignal;
                }
 
                pset_unlock(pset);
-               if (do_signal_idle) {
+
+               if (do_signal_idle == eDoSignal) {
                        machine_signal_idle(processor);
                }
 
@@ -3378,12 +3238,15 @@ processor_setrun(
        /*
         *      Set preemption mode.
         */
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+       /* TODO: Do we need to care about urgency (see rdar://problem/20136239)? */
+#endif
        if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri)
                preempt = (AST_PREEMPT | AST_URGENT);
        else if(processor->active_thread && thread_eager_preemption(processor->active_thread))
                preempt = (AST_PREEMPT | AST_URGENT);
-       else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->priority)) {
-               if(SCHED(priority_is_urgent)(thread->priority) && thread->sched_pri > processor->current_pri) {
+       else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->base_pri)) {
+               if(SCHED(priority_is_urgent)(thread->base_pri) && thread->sched_pri > processor->current_pri) {
                        preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
                } else {
                        preempt = AST_NONE;
@@ -3414,8 +3277,7 @@ processor_setrun(
                        }
                } else if (     (processor->state == PROCESSOR_RUNNING          ||
                                 processor->state == PROCESSOR_SHUTDOWN)                &&
-                               (thread->sched_pri >= processor->current_pri    ||
-                               processor->current_thmode == TH_MODE_FAIRSHARE)) {
+                               (thread->sched_pri >= processor->current_pri)) {
                        ipi_action = eInterruptRunning;
                }
        } else {
@@ -3449,11 +3311,20 @@ processor_setrun(
                                if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE)
                                        ast_on(preempt);
                        } else {
-                               if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+                               if (!(pset->pending_deferred_AST_cpu_mask & (1ULL << processor->cpu_id)) &&
+                                   !(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
                                        /* cleared on exit from main processor_idle() loop */
-                                       pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
-                                       do_signal_idle = TRUE;
+                                       pset->pending_deferred_AST_cpu_mask |= (1ULL << processor->cpu_id);
+                                       do_signal_idle = eDoDeferredSignal;
+                               }
+#else
+                               if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
+                                       /* cleared on exit from main processor_idle() loop */
+                                       pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
+                                       do_signal_idle = eDoSignal;
                                }
+#endif
                        }
                        break;
                case eInterruptRunning:
@@ -3461,9 +3332,9 @@ processor_setrun(
                                if (csw_check_locked(processor, pset, AST_NONE) != AST_NONE)
                                        ast_on(preempt);
                        } else {
-                               if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) {
+                               if (!(pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))) {
                                        /* cleared after IPI causes csw_check() to be called */
-                                       pset->pending_AST_cpu_mask |= (1U << processor->cpu_id);
+                                       pset->pending_AST_cpu_mask |= (1ULL << processor->cpu_id);
                                        do_cause_ast = TRUE;
                                }
                        }
@@ -3472,120 +3343,29 @@ processor_setrun(
 
        pset_unlock(pset);
 
-       if (do_signal_idle) {
+       if (do_signal_idle == eDoSignal) {
                machine_signal_idle(processor);
-       } else if (do_cause_ast) {
-               cause_ast_check(processor);
        }
-}
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static boolean_t
-processor_queue_empty(processor_t              processor)
-{
-       return runq_for_processor(processor)->count == 0;
-       
-}
-
-static boolean_t
-sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t         processor)
-{
-       processor_set_t pset = processor->processor_set;
-       int count = runq_for_processor(processor)->count;
-
-       /*
-        * The pset runq contains the count of all runnable threads
-        * for all processors in the pset. However, for threads that
-        * are bound to another processor, the current "processor"
-        * is not eligible to execute the thread. So we only
-        * include bound threads that our bound to the current
-        * "processor". This allows the processor to idle when the
-        * count of eligible threads drops to 0, even if there's
-        * a runnable thread bound to a different processor in the
-        * shared runq.
-        */
-
-       count -= pset->pset_runq_bound_count;
-       count += processor->runq_bound_count;
-
-       return count == 0;
-}
-
-static ast_t
-processor_csw_check(processor_t processor)
-{
-       run_queue_t             runq;
-       boolean_t               has_higher;
-
-       assert(processor->active_thread != NULL);
-       
-       runq = runq_for_processor(processor);
-       if (first_timeslice(processor)) {
-               has_higher = (runq->highq > processor->current_pri);
-       } else {
-               has_higher = (runq->highq >= processor->current_pri);
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+       else if (do_signal_idle == eDoDeferredSignal) {
+               /*
+                * TODO: The ability to cancel this signal could make
+                * sending it outside of the pset lock an issue.  Do
+                * we need to address this?  Or would the only fallout
+                * be that the core takes a signal?  As long as we do
+                * not run the risk of having a core marked as signal
+                * outstanding, with no real signal outstanding, the
+                * only result should be that we fail to cancel some
+                * signals.
+                */
+               machine_signal_idle_deferred(processor);
        }
-       if (has_higher) {
-               if (runq->urgency > 0)
-                       return (AST_PREEMPT | AST_URGENT);
-               
-               if (processor->active_thread && thread_eager_preemption(processor->active_thread))
-                       return (AST_PREEMPT | AST_URGENT);
-
-               return AST_PREEMPT;
+#endif
+       else if (do_cause_ast) {
+               cause_ast_check(processor);
        }
-
-       return AST_NONE;
 }
 
-static boolean_t
-processor_queue_has_priority(processor_t               processor,
-                                                        int                            priority,
-                                                        boolean_t                      gte)
-{
-       if (gte)
-               return runq_for_processor(processor)->highq >= priority;
-       else
-               return runq_for_processor(processor)->highq > priority;
-}
-
-static boolean_t
-should_current_thread_rechoose_processor(processor_t                   processor)
-{
-       return (processor->current_pri < BASEPRI_RTQUEUES
-                       && processor->processor_primary != processor);
-}
-
-static int
-sched_traditional_processor_runq_count(processor_t   processor)
-{
-       return runq_for_processor(processor)->count;
-}
-
-static uint64_t
-sched_traditional_processor_runq_stats_count_sum(processor_t   processor)
-{
-       return runq_for_processor(processor)->runq_stats.count_sum;
-}
-
-static uint64_t
-sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t   processor)
-{
-       if (processor->cpu_id == processor->processor_set->cpu_set_low)
-               return runq_for_processor(processor)->runq_stats.count_sum;
-       else
-               return 0ULL;
-}
-
-static int
-sched_traditional_processor_bound_count(processor_t   processor)
-{
-       return processor->runq_bound_count;
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
 /*
  *     choose_next_pset:
  *
@@ -3659,6 +3439,8 @@ choose_processor(
        if (processor != PROCESSOR_NULL) {
                if (processor->processor_set != pset) {
                        processor = PROCESSOR_NULL;
+               } else if (!processor->is_recommended) {
+                       processor = PROCESSOR_NULL;
                } else {
                        switch (processor->state) {
                                case PROCESSOR_START:
@@ -3741,16 +3523,21 @@ choose_processor(
                /*
                 * Choose an idle processor, in pset traversal order
                 */
-               if (!queue_empty(&cset->idle_queue))
-                       return ((processor_t)queue_first(&cset->idle_queue));
+               qe_foreach_element(processor, &cset->idle_queue, processor_queue) {
+                       if (processor->is_recommended)
+                               return processor;
+               }
 
                /*
                 * Otherwise, enumerate active and idle processors to find candidates
                 * with lower priority/etc.
                 */
 
-               processor = (processor_t)queue_first(&cset->active_queue);
-               while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
+               qe_foreach_element(processor, &cset->active_queue, processor_queue) {
+
+                       if (!processor->is_recommended) {
+                               continue;
+                       }
 
                        integer_t cpri = processor->current_pri;
                        if (cpri < lowest_priority) {
@@ -3768,16 +3555,18 @@ choose_processor(
                                lowest_count = ccount;
                                lc_processor = processor;
                        }
-
-                       processor = (processor_t)queue_next((queue_entry_t)processor);
                }
 
                /*
                 * For SMT configs, these idle secondary processors must have active primary. Otherwise
                 * the idle primary would have short-circuited the loop above
                 */
-               processor = (processor_t)queue_first(&cset->idle_secondary_queue);
-               while (!queue_end(&cset->idle_secondary_queue, (queue_entry_t)processor)) {
+               qe_foreach_element(processor, &cset->idle_secondary_queue, processor_queue) {
+
+                       if (!processor->is_recommended) {
+                               continue;
+                       }
+
                        processor_t cprimary = processor->processor_primary;
 
                        /* If the primary processor is offline or starting up, it's not a candidate for this path */
@@ -3790,8 +3579,6 @@ choose_processor(
                                        lp_unpaired_secondary_processor = processor;
                                }
                        }
-
-                       processor = (processor_t)queue_next((queue_entry_t)processor);
                }
 
 
@@ -3806,14 +3593,12 @@ choose_processor(
 
                        if (thread->sched_pri > lowest_unpaired_primary_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               remqueue((queue_entry_t)lp_unpaired_primary_processor);
-                               enqueue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor);
+                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor);
                                return lp_unpaired_primary_processor;
                        }
                        if (thread->sched_pri > lowest_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               remqueue((queue_entry_t)lp_processor);
-                               enqueue_tail(&cset->active_queue, (queue_entry_t)lp_processor);
+                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_processor);
                                return lp_processor;
                        }
                        if (thread->realtime.deadline < furthest_deadline)
@@ -3829,14 +3614,12 @@ choose_processor(
 
                        if (thread->sched_pri > lowest_unpaired_primary_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               remqueue((queue_entry_t)lp_unpaired_primary_processor);
-                               enqueue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor);
+                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_unpaired_primary_processor);
                                return lp_unpaired_primary_processor;
                        }
                        if (thread->sched_pri > lowest_priority) {
                                /* Move to end of active queue so that the next thread doesn't also pick it */
-                               remqueue((queue_entry_t)lp_processor);
-                               enqueue_tail(&cset->active_queue, (queue_entry_t)lp_processor);
+                               re_queue_tail(&cset->active_queue, (queue_entry_t)lp_processor);
                                return lp_processor;
                        }
 
@@ -3931,8 +3714,9 @@ thread_setrun(
        processor_t                     processor;
        processor_set_t         pset;
 
-       assert(thread_runnable(thread));
-       
+       assert((thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_TERMINATE|TH_TERMINATE2)) == TH_RUN);
+       assert(thread->runq == PROCESSOR_NULL);
+
        /*
         *      Update priority if needed.
         */
@@ -3943,6 +3727,7 @@ thread_setrun(
 
        assert(thread->runq == PROCESSOR_NULL);
 
+#if __SMP__
        if (thread->bound_processor == PROCESSOR_NULL) {
                /*
                 *      Unbound case.
@@ -3956,11 +3741,9 @@ thread_setrun(
 
                        processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
 
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
+                       SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
                                                                          (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
-               }
-               else
-               if (thread->last_processor != PROCESSOR_NULL) {
+               } else if (thread->last_processor != PROCESSOR_NULL) {
                        /*
                         *      Simple (last processor) affinity case.
                         */
@@ -3969,10 +3752,9 @@ thread_setrun(
                        pset_lock(pset);
                        processor = SCHED(choose_processor)(pset, processor, thread);
 
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
+                       SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
                                                                  (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0);
-               }
-               else {
+               } else {
                        /*
                         *      No Affinity case:
                         *
@@ -3991,11 +3773,10 @@ thread_setrun(
                        processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
                        task->pset_hint = processor->processor_set;
 
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
+                       SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
                                                                          (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0);
                }
-       }
-       else {
+       } else {
                /*
                 *      Bound case:
                 *
@@ -4005,18 +3786,23 @@ thread_setrun(
                pset = processor->processor_set;
                pset_lock(pset);
 
-               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
+               SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE,
                                                          (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0);
        }
+#else /* !__SMP__ */
+       /* Only one processor to choose */
+       assert(thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == master_processor);
+       processor = master_processor;
+       pset = processor->processor_set;
+       pset_lock(pset);
+#endif /* !__SMP__ */
 
        /*
-        *      Dispatch the thread on the choosen processor.
+        *      Dispatch the thread on the chosen processor.
         *      TODO: This should be based on sched_mode, not sched_pri
         */
        if (thread->sched_pri >= BASEPRI_RTQUEUES)
                realtime_setrun(processor, thread);
-       else if (thread->sched_mode == TH_MODE_FAIRSHARE)
-               fairshare_setrun(processor, thread);
        else
                processor_setrun(processor, thread, options);
 }
@@ -4033,74 +3819,6 @@ task_choose_pset(
        return (pset);
 }
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-/*
- *     processor_queue_shutdown:
- *
- *     Shutdown a processor run queue by
- *     re-dispatching non-bound threads.
- *
- *     Associated pset must be locked, and is
- *     returned unlocked.
- */
-void
-processor_queue_shutdown(
-       processor_t                     processor)
-{
-       processor_set_t         pset = processor->processor_set;
-       run_queue_t                     rq = runq_for_processor(processor);
-       queue_t                         queue = rq->queues + rq->highq;
-       int                                     pri = rq->highq, count = rq->count;
-       thread_t                        next, thread;
-       queue_head_t            tqueue;
-
-       queue_init(&tqueue);
-       
-       while (count > 0) {
-               thread = (thread_t)queue_first(queue);
-               while (!queue_end(queue, (queue_entry_t)thread)) {
-                       next = (thread_t)queue_next((queue_entry_t)thread);
-
-                       if (thread->bound_processor == PROCESSOR_NULL) {
-                               remqueue((queue_entry_t)thread);
-
-                               thread->runq = PROCESSOR_NULL;
-                               SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
-                               runq_consider_decr_bound_count(processor, thread);
-                               rq->count--;
-                               if (SCHED(priority_is_urgent)(pri)) {
-                                       rq->urgency--; assert(rq->urgency >= 0);
-                               }
-                               if (queue_empty(queue)) {
-                                       if (pri != IDLEPRI)
-                                               clrbit(MAXPRI - pri, rq->bitmap);
-                                       rq->highq = MAXPRI - ffsbit(rq->bitmap);
-                               }
-
-                               enqueue_tail(&tqueue, (queue_entry_t)thread);
-                       }
-                       count--;
-
-                       thread = next;
-               }
-
-               queue--; pri--;
-       }
-
-       pset_unlock(pset);
-
-       while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
-               thread_lock(thread);
-
-               thread_setrun(thread, SCHED_TAILQ);
-
-               thread_unlock(thread);
-       }
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
 /*
  *     Check for a preemption point in
  *     the current context.
@@ -4118,7 +3836,7 @@ csw_check(
        pset_lock(pset);
 
        /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */
-       pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id);
+       pset->pending_AST_cpu_mask &= ~(1ULL << processor->cpu_id);
 
        result = csw_check_locked(processor, pset, check_reason);
 
@@ -4140,7 +3858,7 @@ csw_check_locked(
        ast_t                   result;
        thread_t                thread = processor->active_thread;
 
-       if (first_timeslice(processor)) {
+       if (processor->first_timeslice) {
                if (rt_runq.count > 0)
                        return (check_reason | AST_PREEMPT | AST_URGENT);
        }
@@ -4155,14 +3873,41 @@ csw_check_locked(
 
        result = SCHED(processor_csw_check)(processor);
        if (result != AST_NONE)
-               return (check_reason | result);
+               return (check_reason | result | (thread_eager_preemption(thread) ? AST_URGENT : AST_NONE));
+
+#if __SMP__
 
-       if (SCHED(should_current_thread_rechoose_processor)(processor))
+       /*
+        * If the current thread is running on a processor that is no longer recommended, gently
+        * (non-urgently) get to a point and then block, and which point thread_select() should
+        * try to idle the processor and re-dispatch the thread to a recommended processor.
+        */
+       if (!processor->is_recommended)
                return (check_reason | AST_PREEMPT);
-       
+
+       /*
+        * Even though we could continue executing on this processor, a
+        * secondary SMT core should try to shed load to another primary core.
+        *
+        * TODO: Should this do the same check that thread_select does? i.e.
+        * if no bound threads target this processor, and idle primaries exist, preempt
+        * The case of RT threads existing is already taken care of above
+        * Consider Capri in this scenario.
+        *
+        * if (!SCHED(processor_bound_count)(processor) && !queue_empty(&pset->idle_queue))
+        *
+        * TODO: Alternatively - check if only primary is idle, or check if primary's pri is lower than mine.
+        */
+
+       if (processor->current_pri < BASEPRI_RTQUEUES &&
+           processor->processor_primary != processor)
+               return (check_reason | AST_PREEMPT);
+#endif
+
        if (thread->state & TH_SUSP)
                return (check_reason | AST_PREEMPT);
 
+#if CONFIG_SCHED_SFI
        /*
         * Current thread may not need to be preempted, but maybe needs
         * an SFI wait?
@@ -4170,6 +3915,7 @@ csw_check_locked(
        result = sfi_thread_needs_ast(thread, NULL);
        if (result != AST_NONE)
                return (check_reason | result);
+#endif
 
        return (AST_NONE);
 }
@@ -4185,128 +3931,108 @@ csw_check_locked(
  */
 void
 set_sched_pri(
-       thread_t                thread,
-       int                     priority)
+              thread_t        thread,
+              int             priority)
 {
-       boolean_t               removed = thread_run_queue_remove(thread);
+       thread_t cthread = current_thread();
+       boolean_t is_current_thread = (thread == cthread) ? TRUE : FALSE;
        int curgency, nurgency;
        uint64_t urgency_param1, urgency_param2;
-       thread_t cthread = current_thread();
+       boolean_t removed_from_runq = FALSE;
 
-       if (thread == cthread) {
+       /* If we're already at this priority, no need to mess with the runqueue */
+       if (priority == thread->sched_pri)
+               return;
+
+       if (is_current_thread) {
+               assert(thread->runq == PROCESSOR_NULL);
                curgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2);
+       } else {
+               removed_from_runq = thread_run_queue_remove(thread);
        }
-       
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
+                             (uintptr_t)thread_tid(thread),
+                             thread->base_pri,
+                             thread->sched_pri,
+                             0, /* eventually, 'reason' */
+                             0);
+
        thread->sched_pri = priority;
 
-       if (thread == cthread) {
+       if (is_current_thread) {
                nurgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2);
-/* set_sched_pri doesn't alter RT params. We expect direct base priority/QoS
- * class alterations from user space to occur relatively infrequently, hence
- * those are lazily handled. QoS classes have distinct priority bands, and QoS
- * inheritance is expected to involve priority changes.
- */
+               /*
+                * set_sched_pri doesn't alter RT params. We expect direct base priority/QoS
+                * class alterations from user space to occur relatively infrequently, hence
+                * those are lazily handled. QoS classes have distinct priority bands, and QoS
+                * inheritance is expected to involve priority changes.
+                */
                if (nurgency != curgency) {
-                       thread_tell_urgency(nurgency, urgency_param1, urgency_param2, thread);
+                       thread_tell_urgency(nurgency, urgency_param1, urgency_param2, 0, thread);
+                       machine_thread_going_on_core(thread, nurgency, 0);
                }
        }
 
-       if (removed)
-               thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
-       else
-       if (thread->state & TH_RUN) {
-               processor_t             processor = thread->last_processor;
+       /* TODO: Should this be TAILQ if it went down, HEADQ if it went up? */
+       if (removed_from_runq)
+               thread_run_queue_reinsert(thread, SCHED_PREEMPT | SCHED_TAILQ);
+       else if (thread->state & TH_RUN) {
+               processor_t processor = thread->last_processor;
 
-               if (thread == current_thread()) {
-                       ast_t                   preempt;
+               if (is_current_thread) {
+                       ast_t preempt;
 
                        processor->current_pri = priority;
                        processor->current_thmode = thread->sched_mode;
                        processor->current_sfi_class = thread->sfi_class = sfi_thread_classify(thread);
                        if ((preempt = csw_check(processor, AST_NONE)) != AST_NONE)
                                ast_on(preempt);
-               }
-               else
-               if (    processor != PROCESSOR_NULL                                             &&
-                               processor->active_thread == thread      )
+               } else if (processor != PROCESSOR_NULL && processor->active_thread == thread)
                        cause_ast_check(processor);
        }
 }
 
-#if            0
+/*
+ * thread_run_queue_remove_for_handoff
+ *
+ * Pull a thread or its (recursive) push target out of the runqueue
+ * so that it is ready for thread_run()
+ *
+ * Called at splsched
+ *
+ * Returns the thread that was pulled or THREAD_NULL if no thread could be pulled.
+ * This may be different than the thread that was passed in.
+ */
+thread_t
+thread_run_queue_remove_for_handoff(thread_t thread) {
 
-static void
-run_queue_check(
-       run_queue_t             rq,
-       thread_t                thread)
-{
-       queue_t                 q;
-       queue_entry_t   qe;
+       thread_t pulled_thread = THREAD_NULL;
 
-       if (rq != thread->runq)
-               panic("run_queue_check: thread runq");
+       thread_lock(thread);
 
-       if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
-               panic("run_queue_check: thread sched_pri");
+       /*
+        * Check that the thread is not bound
+        * to a different processor, and that realtime
+        * is not involved.
+        *
+        * Next, pull it off its run queue.  If it
+        * doesn't come, it's not eligible.
+        */
 
-       q = &rq->queues[thread->sched_pri];
-       qe = queue_first(q);
-       while (!queue_end(q, qe)) {
-               if (qe == (queue_entry_t)thread)
-                       return;
+       processor_t processor = current_processor();
+       if (processor->current_pri < BASEPRI_RTQUEUES && thread->sched_pri < BASEPRI_RTQUEUES &&
+           (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor)) {
 
-               qe = queue_next(qe);
+                       if (thread_run_queue_remove(thread))
+                               pulled_thread = thread;
        }
 
-       panic("run_queue_check: end");
-}
-
-#endif /* DEBUG */
-
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-/*
- * Locks the runqueue itself.
- *
- * Thread must be locked.
- */
-static boolean_t
-processor_queue_remove(
-                                          processor_t                  processor,
-                                          thread_t             thread)
-{
-       void *                  rqlock;
-       run_queue_t             rq;
-       
-       rqlock = &processor->processor_set->sched_lock;
-       rq = runq_for_processor(processor);
-
-       simple_lock(rqlock);
-       if (processor == thread->runq) {
-               /*
-                *      Thread is on a run queue and we have a lock on
-                *      that run queue.
-                */
-               runq_consider_decr_bound_count(processor, thread);
-               run_queue_remove(rq, thread);
-       }
-       else {
-               /*
-                *      The thread left the run queue before we could
-                *      lock the run queue.
-                */
-               assert(thread->runq == PROCESSOR_NULL);
-               processor = PROCESSOR_NULL;
-       }
-       
-       simple_unlock(rqlock);
+       thread_unlock(thread);
 
-       return (processor != PROCESSOR_NULL);
+       return pulled_thread;
 }
 
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
-
 /*
  *     thread_run_queue_remove:
  *
@@ -4352,23 +4078,19 @@ thread_run_queue_remove(
                return FALSE;
        }
 
-       if (thread->sched_mode == TH_MODE_FAIRSHARE) {
-               return SCHED(fairshare_queue_remove)(thread);
-       }
-       
        if (thread->sched_pri < BASEPRI_RTQUEUES) {
                return SCHED(processor_queue_remove)(processor, thread);
        }
 
-       simple_lock(&rt_lock);
+       rt_lock_lock();
 
        if (thread->runq != PROCESSOR_NULL) {
                /*
-                *      Thread is on a run queue and we have a lock on
+                *      Thread is on the RT run queue and we have a lock on
                 *      that run queue.
                 */
 
-               assert(thread->runq == RT_RUNQ);
+               assert(thread->runq == THREAD_ON_RT_RUNQ);
 
                remqueue((queue_entry_t)thread);
                SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
@@ -4379,125 +4101,28 @@ thread_run_queue_remove(
                removed = TRUE;
        }
 
-       simple_unlock(&rt_lock);
+       rt_lock_unlock();
 
        return (removed);
 }
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-/*
- *     steal_processor_thread:
- *
- *     Locate a thread to steal from the processor and
- *     return it.
- *
- *     Associated pset must be locked.  Returns THREAD_NULL
- *     on failure.
- */
-static thread_t
-steal_processor_thread(
-       processor_t             processor)
-{
-       run_queue_t             rq = runq_for_processor(processor);
-       queue_t                 queue = rq->queues + rq->highq;
-       int                             pri = rq->highq, count = rq->count;
-       thread_t                thread;
-
-       while (count > 0) {
-               thread = (thread_t)queue_first(queue);
-               while (!queue_end(queue, (queue_entry_t)thread)) {
-                       if (thread->bound_processor == PROCESSOR_NULL) {
-                               remqueue((queue_entry_t)thread);
-
-                               thread->runq = PROCESSOR_NULL;
-                               SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
-                               runq_consider_decr_bound_count(processor, thread);
-                               rq->count--;
-                               if (SCHED(priority_is_urgent)(pri)) {
-                                       rq->urgency--; assert(rq->urgency >= 0);
-                               }
-                               if (queue_empty(queue)) {
-                                       if (pri != IDLEPRI)
-                                               clrbit(MAXPRI - pri, rq->bitmap);
-                                       rq->highq = MAXPRI - ffsbit(rq->bitmap);
-                               }
-
-                               return (thread);
-                       }
-                       count--;
-
-                       thread = (thread_t)queue_next((queue_entry_t)thread);
-               }
-
-               queue--; pri--;
-       }
-
-       return (THREAD_NULL);
-}
-
 /*
- *     Locate and steal a thread, beginning
- *     at the pset.
+ * Put the thread back where it goes after a thread_run_queue_remove
  *
- *     The pset must be locked, and is returned
- *     unlocked.
+ * Thread must have been removed under the same thread lock hold
  *
- *     Returns the stolen thread, or THREAD_NULL on
- *     failure.
+ * thread locked, at splsched
  */
-static thread_t
-steal_thread(
-       processor_set_t         pset)
+void
+thread_run_queue_reinsert(thread_t thread, integer_t options)
 {
-       processor_set_t         nset, cset = pset;
-       processor_t                     processor;
-       thread_t                        thread;
-
-       do {
-               processor = (processor_t)queue_first(&cset->active_queue);
-               while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
-                       if (runq_for_processor(processor)->count > 0) {
-                               thread = steal_processor_thread(processor);
-                               if (thread != THREAD_NULL) {
-                                       remqueue((queue_entry_t)processor);
-                                       enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
-
-                                       pset_unlock(cset);
-
-                                       return (thread);
-                               }
-                       }
-
-                       processor = (processor_t)queue_next((queue_entry_t)processor);
-               }
-
-               nset = next_pset(cset);
-
-               if (nset != pset) {
-                       pset_unlock(cset);
-
-                       cset = nset;
-                       pset_lock(cset);
-               }
-       } while (nset != pset);
-
-       pset_unlock(cset);
-
-       return (THREAD_NULL);
-}
+       assert(thread->runq == PROCESSOR_NULL);
 
-static thread_t        steal_thread_disabled(
-                                       processor_set_t         pset)
-{
-       pset_unlock(pset);
+               assert(thread->state & (TH_RUN));
+               thread_setrun(thread, options);
 
-       return (THREAD_NULL);
 }
 
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
-
 void
 sys_override_cpu_throttle(int flag)
 {
@@ -4521,13 +4146,13 @@ thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2)
 
                return (THREAD_URGENCY_REAL_TIME);
        } else if (cpu_throttle_enabled &&
-                  ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE)))  {
+                  ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE)))  {
                /*
                 * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted
                 * TODO: Use TH_SFLAG_THROTTLED instead?
                 */
                *arg1 = thread->sched_pri;
-               *arg2 = thread->priority;
+               *arg2 = thread->base_pri;
 
                return (THREAD_URGENCY_BACKGROUND);
        } else {
@@ -4579,10 +4204,16 @@ processor_idle(
        while (1) {
                if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */
                        break;
-               if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id))
-                       break;
-               if (rt_runq.count)
+               if (pset->pending_AST_cpu_mask & (1ULL << processor->cpu_id))
                        break;
+               if (processor->is_recommended) {
+                       if (rt_runq.count)
+                               break;
+               } else {
+                       if (SCHED(processor_bound_count)(processor))
+                               break;
+               }
+
 #if CONFIG_SCHED_IDLE_IN_PLACE
                if (thread != THREAD_NULL) {
                        /* Did idle-in-place thread wake up */
@@ -4621,7 +4252,10 @@ processor_idle(
        pset_lock(pset);
 
        /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */
-       pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id);
+       pset->pending_AST_cpu_mask &= ~(1ULL << processor->cpu_id);
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+       pset->pending_deferred_AST_cpu_mask &= ~(1ULL << processor->cpu_id);
+#endif
 
        state = processor->state;
        if (state == PROCESSOR_DISPATCHING) {
@@ -4633,7 +4267,7 @@ processor_idle(
                processor->state = PROCESSOR_RUNNING;
 
                if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE)                                        ||
-                                                                                       (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri))       ) {
+                                                                                       (rt_runq.count > 0))    ) {
                        /* Something higher priority has popped up on the runqueue - redispatch this thread elsewhere */
                        processor->current_pri = IDLEPRI;
                        processor->current_thmode = TH_MODE_FIXED;
@@ -4746,7 +4380,7 @@ idle_thread_create(
        thread_lock(thread);
        thread->bound_processor = processor;
        processor->idle_thread = thread;
-       thread->sched_pri = thread->priority = IDLEPRI;
+       thread->sched_pri = thread->base_pri = IDLEPRI;
        thread->state = (TH_RUN | TH_IDLE);
        thread->options |= TH_OPT_IDLE_THREAD;
        thread_unlock(thread);
@@ -4770,6 +4404,8 @@ sched_startup(void)
        kern_return_t   result;
        thread_t                thread;
 
+       simple_lock_init(&sched_vm_group_list_lock, 0);
+
        result = kernel_thread_start_priority((thread_continue_t)sched_init_thread,
            (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread);
        if (result != KERN_SUCCESS)
@@ -4804,10 +4440,16 @@ uint64_t                                sched_tick_max_delta;
  *     times per second.
  */
 void
-sched_traditional_maintenance_continue(void)
+sched_timeshare_maintenance_continue(void)
 {
        uint64_t        sched_tick_ctime, late_time;
 
+       struct sched_update_scan_context scan_context = {
+               .earliest_bg_make_runnable_time = UINT64_MAX,
+               .earliest_normal_make_runnable_time = UINT64_MAX,
+               .earliest_rt_make_runnable_time = UINT64_MAX
+       };
+
        sched_tick_ctime = mach_absolute_time();        
 
        if (__improbable(sched_tick_last_abstime == 0)) {
@@ -4858,7 +4500,20 @@ sched_traditional_maintenance_continue(void)
         *  Scan the run queues for threads which
         *  may need to be updated.
         */
-       SCHED(thread_update_scan)();
+       SCHED(thread_update_scan)(&scan_context);
+
+       rt_runq_scan(&scan_context);
+
+       uint64_t ctime = mach_absolute_time();
+
+       machine_max_runnable_latency(ctime > scan_context.earliest_bg_make_runnable_time ? ctime - scan_context.earliest_bg_make_runnable_time : 0,
+                                                                ctime > scan_context.earliest_normal_make_runnable_time ? ctime - scan_context.earliest_normal_make_runnable_time : 0,
+                                                                ctime > scan_context.earliest_rt_make_runnable_time ? ctime - scan_context.earliest_rt_make_runnable_time : 0);
+
+       /*
+        * Check to see if the special sched VM group needs attention.
+        */
+       sched_vm_group_maintenance();
 
        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_MAINTENANCE)|DBG_FUNC_END,
                                                  sched_pri_shift,
@@ -4867,8 +4522,8 @@ sched_traditional_maintenance_continue(void)
                                                  0,
                                                  0);
 
-       assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT);
-       thread_block((thread_continue_t)sched_traditional_maintenance_continue);
+       assert_wait((event_t)sched_timeshare_maintenance_continue, THREAD_UNINT);
+       thread_block((thread_continue_t)sched_timeshare_maintenance_continue);
        /*NOTREACHED*/
 }
 
@@ -4885,7 +4540,7 @@ static uint64_t sched_maintenance_wakeups;
  * no more than a comparison against the deadline in the common case.
  */
 void
-sched_traditional_consider_maintenance(uint64_t ctime) {
+sched_timeshare_consider_maintenance(uint64_t ctime) {
        uint64_t ndeadline, deadline = sched_maintenance_deadline;
 
        if (__improbable(ctime >= deadline)) {
@@ -4896,7 +4551,7 @@ sched_traditional_consider_maintenance(uint64_t ctime) {
                ndeadline = ctime + sched_tick_interval;
 
                if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) {
-                       thread_wakeup((event_t)sched_traditional_maintenance_continue);
+                       thread_wakeup((event_t)sched_timeshare_maintenance_continue);
                        sched_maintenance_wakeups++;
                }
        }
@@ -4997,7 +4652,8 @@ thread_update_process_threads(void)
  */
 boolean_t
 runq_scan(
-       run_queue_t                             runq)
+       run_queue_t                             runq,
+       sched_update_scan_context_t     scan_context)
 {
        register int                    count;
        register queue_t                q;
@@ -5013,6 +4669,16 @@ runq_scan(
                                                return (TRUE);
                                }
 
+                               if (cpu_throttle_enabled && ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->base_pri <= MAXPRI_THROTTLE))) {
+                                       if (thread->last_made_runnable_time < scan_context->earliest_bg_make_runnable_time) {
+                                               scan_context->earliest_bg_make_runnable_time = thread->last_made_runnable_time;
+                                       }
+                               } else {
+                                       if (thread->last_made_runnable_time < scan_context->earliest_normal_make_runnable_time) {
+                                               scan_context->earliest_normal_make_runnable_time = thread->last_made_runnable_time;
+                                       }
+                               }
+
                                count--;
                        }
 
@@ -5025,52 +4691,6 @@ runq_scan(
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
-#if defined(CONFIG_SCHED_TRADITIONAL)
-
-static void
-thread_update_scan(void)
-{
-       boolean_t                       restart_needed = FALSE;
-       processor_t                     processor = processor_list;
-       processor_set_t         pset;
-       thread_t                        thread;
-       spl_t                           s;
-
-       do {
-               do {
-                       /*
-                        * TODO: in sched_traditional_use_pset_runqueue case,
-                        *  avoid scanning the same runq multiple times
-                        */
-                       pset = processor->processor_set;
-
-                       s = splsched();
-                       pset_lock(pset);
-
-                       restart_needed = runq_scan(runq_for_processor(processor));
-
-                       pset_unlock(pset);
-                       splx(s);
-
-                       if (restart_needed)
-                               break;
-
-                       thread = processor->idle_thread;
-                       if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
-                               if (thread_update_add_thread(thread) == FALSE) {
-                                       restart_needed = TRUE;
-                                       break;
-                               }
-                       }
-               } while ((processor = processor->processor_list) != NULL);
-
-               /* Ok, we now have a collection of candidates -- fix them. */
-               thread_update_process_threads();
-       } while (restart_needed);
-}
-
-#endif /* CONFIG_SCHED_TRADITIONAL */
-
 boolean_t
 thread_eager_preemption(thread_t thread) 
 {
@@ -5124,6 +4744,7 @@ thread_clear_eager_preempt(thread_t thread)
        thread_unlock(thread);
        splx(x);
 }
+
 /*
  * Scheduling statistics
  */
@@ -5185,15 +4806,52 @@ preemption_enabled(void)
        return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
 }
 
-__assert_only static boolean_t
-thread_runnable(
-       thread_t        thread)
-{
-       return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
-}
-
 static void
 sched_timer_deadline_tracking_init(void) {
        nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1);
        nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2);
 }
+
+
+kern_return_t
+sched_work_interval_notify(thread_t thread, uint64_t work_interval_id, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags)
+{
+       int urgency;
+       uint64_t urgency_param1, urgency_param2;
+       spl_t s;
+
+       if (work_interval_id == 0) {
+               return (KERN_INVALID_ARGUMENT);
+       }
+
+       assert(thread == current_thread());
+
+       thread_mtx_lock(thread);
+       if (thread->work_interval_id != work_interval_id) {
+               thread_mtx_unlock(thread);
+               return (KERN_INVALID_ARGUMENT);
+       }
+       thread_mtx_unlock(thread);
+
+       s = splsched();
+       thread_lock(thread);
+       urgency = thread_get_urgency(thread, &urgency_param1, &urgency_param2);
+       thread_unlock(thread);
+       splx(s);
+
+       machine_work_interval_notify(thread, work_interval_id, start, finish, deadline, next_start, urgency, flags);
+       return (KERN_SUCCESS);
+}
+
+void thread_set_options(uint32_t thopt) {
+       spl_t x;
+       thread_t t = current_thread();
+       x = splsched();
+       thread_lock(t);
+       t->options |= thopt;
+       thread_unlock(t);
+       splx(x);
+}
index d7326a91c0770c3df97cc2f5b41c1be98a6dff7a..2522592e0d03b11be8fab61cf63ab8f2b5e0297d 100644 (file)
@@ -76,6 +76,8 @@
 
 #ifdef MACH_KERNEL_PRIVATE
 
+#include <mach/branch_predicates.h>
+
 /* Initialization */
 extern void            sched_init(void);
 
@@ -153,15 +155,17 @@ extern void             sched_thread_mode_demote(thread_t thread,
 extern void             sched_thread_mode_undemote(thread_t thread,
                                                    uint32_t reason);
 
+/* Re-evaluate base priority of thread (thread locked) */
+void thread_recompute_priority(thread_t thread);
+
+/* Re-evaluate base priority of thread (thread unlocked) */
+void thread_recompute_qos(thread_t thread);
+
 /* Reset scheduled priority of thread */
-extern void            compute_priority(
+extern void            thread_recompute_sched_pri(
                                        thread_t                thread,
                                        boolean_t               override_depress);
 
-/* Adjust scheduled priority of thread during execution */
-extern void            compute_my_priority(
-                                       thread_t                thread);
-
 /* Periodic scheduler activity */
 extern void            sched_init_thread(void (*)(void));
 
@@ -175,7 +179,7 @@ extern void         update_priority(
 extern void            lightweight_update_priority(
                                                                thread_t                thread);
 
-extern void            sched_traditional_quantum_expire(thread_t       thread);
+extern void             sched_default_quantum_expire(thread_t thread);
 
 /* Idle processor thread */
 extern void            idle_thread(void);
@@ -202,6 +206,9 @@ extern void         thread_setrun(
 #define SCHED_HEADQ            2
 #define SCHED_PREEMPT  4
 
+extern uintptr_t sched_thread_on_rt_queue;
+#define THREAD_ON_RT_RUNQ  ((processor_t)(uintptr_t)&sched_thread_on_rt_queue)
+
 extern processor_set_t task_choose_pset(
                                                        task_t                  task);
 
@@ -215,12 +222,6 @@ extern processor_t choose_processor(
                                                                         processor_t                    processor,
                                                                         thread_t                       thread);
 
-/* Choose a thread from a processor's priority-based runq */
-extern thread_t choose_thread_from_runq(
-                                                         processor_t           processor,
-                                                         run_queue_t           runq,
-                                                         int                           priority);
-
 
 extern void thread_quantum_init(
                                                                thread_t thread);
@@ -241,28 +242,39 @@ extern void       run_queue_remove(
                                                                         run_queue_t            runq,
                                                                         thread_t                       thread);
                                                                          
+struct sched_update_scan_context
+{
+       uint64_t        earliest_bg_make_runnable_time;
+       uint64_t        earliest_normal_make_runnable_time;
+       uint64_t        earliest_rt_make_runnable_time;
+};
+typedef struct sched_update_scan_context *sched_update_scan_context_t;
 
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
-extern boolean_t        thread_update_add_thread(
-                                                 thread_t thread);
+extern boolean_t        thread_update_add_thread(thread_t thread);
 extern void             thread_update_process_threads(void);
-extern boolean_t        runq_scan(
-                                  run_queue_t runq);
+extern boolean_t        runq_scan(run_queue_t runq, sched_update_scan_context_t scan_context);
+
+extern void sched_timeshare_init(void);
+extern void sched_timeshare_timebase_init(void);
+extern void sched_timeshare_maintenance_continue(void);
+
+extern boolean_t priority_is_urgent(int priority);
+extern uint32_t sched_timeshare_initial_quantum_size(thread_t thread);
 
-void sched_traditional_timebase_init(void);
-void sched_traditional_maintenance_continue(void);
-boolean_t priority_is_urgent(
-                             int priority);
-uint32_t sched_traditional_initial_quantum_size(
-                                                thread_t thread);
-void sched_traditional_init(void);
+extern int sched_compute_timeshare_priority(thread_t thread);
 
 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
+extern void        rt_runq_scan(sched_update_scan_context_t scan_context);
+
 /* Remove thread from its run queue */
-extern boolean_t       thread_run_queue_remove(
-                                               thread_t        thread);
+extern boolean_t       thread_run_queue_remove(thread_t thread);
+thread_t thread_run_queue_remove_for_handoff(thread_t thread);
+
+/* Put a thread back in the run queue after being yanked */
+extern void thread_run_queue_reinsert(thread_t thread, integer_t options);
 
 extern void            thread_timer_expire(
                                        void                    *thread,
@@ -271,35 +283,6 @@ extern void                thread_timer_expire(
 extern boolean_t       thread_eager_preemption(
                                                thread_t thread);
 
-/* Fair Share routines */
-#if defined(CONFIG_SCHED_FAIRSHARE_CORE)
-void           sched_traditional_fairshare_init(void);
-
-int                    sched_traditional_fairshare_runq_count(void);
-
-uint64_t       sched_traditional_fairshare_runq_stats_count_sum(void);
-
-void           sched_traditional_fairshare_enqueue(thread_t thread);
-
-thread_t       sched_traditional_fairshare_dequeue(void);
-
-boolean_t      sched_traditional_fairshare_queue_remove(thread_t thread);
-#endif /* CONFIG_SCHED_FAIRSHARE_CORE */
-
-#if defined(CONFIG_SCHED_GRRR)
-void           sched_grrr_fairshare_init(void);
-
-int                    sched_grrr_fairshare_runq_count(void);
-
-uint64_t       sched_grrr_fairshare_runq_stats_count_sum(void);
-
-void           sched_grrr_fairshare_enqueue(thread_t thread);
-
-thread_t       sched_grrr_fairshare_dequeue(void);
-
-boolean_t      sched_grrr_fairshare_queue_remove(thread_t thread);
-#endif
-
 extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
 
 /* Set the maximum interrupt level for the thread */
@@ -344,6 +327,22 @@ do {                                                               \
        }                                                       \
 } while (0) 
 
+extern uint32_t sched_debug_flags;
+#define SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS  0x00000001
+#define SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS  0x00000002
+
+#define SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(...) do {                                           \
+               if (__improbable(sched_debug_flags & SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS)) { \
+                       KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                                                     \
+               }                                                                                                                               \
+       } while(0)
+
+#define SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT(...) do {                                           \
+               if (__improbable(sched_debug_flags & SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS)) { \
+                       KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                                                     \
+               }                                                                                                                               \
+       } while(0)
+
 #define THREAD_URGENCY_NONE            0       /* indicates that there is no currently runnable */
 #define THREAD_URGENCY_BACKGROUND      1       /* indicates that the thread is marked as a "background" thread */
 #define THREAD_URGENCY_NORMAL          2       /* indicates that the thread is marked as a "normal" thread */
@@ -360,6 +359,7 @@ extern void thread_tell_urgency(
                                        int             urgency,
                                        uint64_t        rt_period,
                                        uint64_t        rt_deadline,
+                                       uint64_t        sched_latency,
                                    thread_t nthread);
 
 /* Tells if there are "active" RT threads in the system (provided by CPU PM) */
@@ -383,6 +383,8 @@ extern void sys_override_cpu_throttle(int flag);
  ****************** Only exported until BSD stops using ********************
  */
 
+extern void                    thread_vm_bind_group_add(void);
+
 /* Wake up thread directly, passing result */
 extern kern_return_t clear_wait(
                                                thread_t                thread,
@@ -394,6 +396,12 @@ extern void                thread_bootstrap_return(void);
 /* Return from exception (BSD-visible interface) */
 extern void            thread_exception_return(void) __dead2;
 
+#define SCHED_STRING_MAX_LENGTH (48)
+/* String declaring the name of the current scheduler */
+extern char sched_string[SCHED_STRING_MAX_LENGTH];
+
+extern kern_return_t sched_work_interval_notify(thread_t thread, uint64_t work_interval_id, uint64_t start, uint64_t finish, uint64_t deadline, uint64_t next_start, uint32_t flags);
+
 #endif /* XNU_KERNEL_PRIVATE */
 
 /* Context switch */
@@ -481,13 +489,14 @@ extern boolean_t          preemption_enabled(void);
 #define SCHED(f) (sched_current_dispatch->f)
 
 struct sched_dispatch_table {
+       const char *sched_name;
        void    (*init)(void);                          /* Init global state */
        void    (*timebase_init)(void);         /* Timebase-dependent initialization */
        void    (*processor_init)(processor_t processor);       /* Per-processor scheduler init */
        void    (*pset_init)(processor_set_t pset);     /* Per-processor set scheduler init */
-       
+
        void    (*maintenance_continuation)(void);      /* Function called regularly */
-       
+
        /*
         * Choose a thread of greater or equal priority from the per-processor
         * runqueue for timeshare/fixed threads
@@ -496,22 +505,22 @@ struct sched_dispatch_table {
                                                                  processor_t           processor,
                                                                  int                           priority,
                                                                  ast_t reason);
-       
+
+       /* True if scheduler supports stealing threads */
+       boolean_t   steal_thread_enabled;
+
        /*
         * Steal a thread from another processor in the pset so that it can run
         * immediately
         */
        thread_t        (*steal_thread)(
                                                                processor_set_t         pset);
-       
+
        /*
-        * Recalculate sched_pri based on base priority, past running time,
-        * and scheduling class.
+        * Compute priority for a timeshare thread based on base priority.
         */
-       void            (*compute_priority)(
-                                        thread_t       thread,
-                                        boolean_t                      override_depress);
-       
+       int (*compute_timeshare_priority)(thread_t thread);
+
        /*
         * Pick the best processor for a thread (any kind of thread) to run on.
         */
@@ -527,35 +536,35 @@ struct sched_dispatch_table {
                                                                 processor_t                    processor,
                                                                 thread_t                       thread,
                                                                 integer_t                      options);
-       
+
        /* Migrate threads away in preparation for processor shutdown */
        void (*processor_queue_shutdown)(
                                                                         processor_t                    processor);
-       
+
        /* Remove the specific thread from the per-processor runqueue */
        boolean_t       (*processor_queue_remove)(
                                                                        processor_t             processor,
                                                                        thread_t                thread);
-       
+
        /*
         * Does the per-processor runqueue have any timeshare or fixed priority
         * threads on it? Called without pset lock held, so should
         * not assume immutability while executing.
         */
        boolean_t       (*processor_queue_empty)(processor_t            processor);
-       
+
        /*
         * Would this priority trigger an urgent preemption if it's sitting
         * on the per-processor runqueue?
         */
        boolean_t       (*priority_is_urgent)(int priority);
-       
+
        /*
         * Does the per-processor runqueue contain runnable threads that
         * should cause the currently-running thread to be preempted?
         */
        ast_t           (*processor_csw_check)(processor_t processor);
-       
+
        /*
         * Does the per-processor runqueue contain a runnable thread
         * of > or >= priority, as a preflight for choose_thread() or other
@@ -564,13 +573,13 @@ struct sched_dispatch_table {
        boolean_t       (*processor_queue_has_priority)(processor_t             processor,
                                                                                                int                             priority,
                                                                                                boolean_t               gte);
-       
+
        /* Quantum size for the specified non-realtime thread. */
        uint32_t        (*initial_quantum_size)(thread_t thread);
        
        /* Scheduler mode for a new thread */
        sched_mode_t    (*initial_thread_sched_mode)(task_t parent_task);
-       
+
        /*
         * Is it safe to call update_priority, which may change a thread's
         * runqueue or other state. This can be used to throttle changes
@@ -583,47 +592,25 @@ struct sched_dispatch_table {
         * Side effects may including migration to another processor's runqueue.
         */
        void            (*update_priority)(thread_t thread);
-       
+
        /* Lower overhead update to scheduled priority and state. */
        void            (*lightweight_update_priority)(thread_t thread);
-       
+
        /* Callback for non-realtime threads when the quantum timer fires */
        void            (*quantum_expire)(thread_t thread);
-       
-       /*
-        * Even though we could continue executing on this processor, does the
-        * topology (SMT, for instance) indicate that a better processor could be
-        * chosen
-        */
-       boolean_t       (*should_current_thread_rechoose_processor)(processor_t                 processor);
-    
+
        /*
         * Runnable threads on per-processor runqueue. Should only
         * be used for relative comparisons of load between processors.
         */
        int                     (*processor_runq_count)(processor_t     processor);
-       
-       /* Aggregate runcount statistics for per-processor runqueue */
-    uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
-       
-       /* Initialize structures to track demoted fairshare threads */
-       void            (*fairshare_init)(void);
-       
-       /* Number of runnable fairshare threads */
-       int                     (*fairshare_runq_count)(void);
-       
-       /* Aggregate runcount statistics for fairshare runqueue */
-       uint64_t        (*fairshare_runq_stats_count_sum)(void);
-       
-       void            (*fairshare_enqueue)(thread_t thread);
-       
-       thread_t        (*fairshare_dequeue)(void);
 
-       boolean_t       (*fairshare_queue_remove)(thread_t thread);
+       /* Aggregate runcount statistics for per-processor runqueue */
+       uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
 
        boolean_t       (*processor_bound_count)(processor_t processor);
 
-       void            (*thread_update_scan)(void);
+       void            (*thread_update_scan)(sched_update_scan_context_t scan_context);
 
        /*
        * Use processor->next_thread to pin a thread to an idle
@@ -631,29 +618,28 @@ struct sched_dispatch_table {
        * be stolen by other processors.
        */
        boolean_t   direct_dispatch_to_idle_processors;
+
+       /* Supports more than one pset */
+       boolean_t   multiple_psets_enabled;
+       /* Supports scheduler groups */
+       boolean_t   sched_groups_enabled;
 };
 
 #if defined(CONFIG_SCHED_TRADITIONAL)
-#define kSchedTraditionalString "traditional"
-#define kSchedTraditionalWithPsetRunqueueString "traditional_with_pset_runqueue"
 extern const struct sched_dispatch_table sched_traditional_dispatch;
 extern const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch;
 #endif
 
 #if defined(CONFIG_SCHED_MULTIQ)
 extern const struct sched_dispatch_table sched_multiq_dispatch;
-#define kSchedMultiQString "multiq"
 extern const struct sched_dispatch_table sched_dualq_dispatch;
-#define kSchedDualQString "dualq"
 #endif
 
 #if defined(CONFIG_SCHED_PROTO)
-#define kSchedProtoString "proto"
 extern const struct sched_dispatch_table sched_proto_dispatch;
 #endif
 
 #if defined(CONFIG_SCHED_GRRR)
-#define kSchedGRRRString "grrr"
 extern const struct sched_dispatch_table sched_grrr_dispatch;
 #endif
 
@@ -661,25 +647,6 @@ extern const struct sched_dispatch_table sched_grrr_dispatch;
  * It is an error to invoke any scheduler-related code
  * before this is set up
  */
-enum sched_enum {
-       sched_enum_unknown = 0,
-#if defined(CONFIG_SCHED_TRADITIONAL)
-       sched_enum_traditional = 1,
-       sched_enum_traditional_with_pset_runqueue = 2,
-#endif
-#if defined(CONFIG_SCHED_PROTO)
-       sched_enum_proto = 3,
-#endif
-#if defined(CONFIG_SCHED_GRRR)
-       sched_enum_grrr = 4,
-#endif
-#if defined(CONFIG_SCHED_MULTIQ)
-       sched_enum_multiq = 5,
-       sched_enum_dualq = 6,
-#endif
-       sched_enum_max = 7,
-};
-
 extern const struct sched_dispatch_table *sched_current_dispatch;
 
 #endif /* MACH_KERNEL_PRIVATE */
index 6523bc66dd6927c172ab802606293e2c8b864ebb..d3a5bf688245d03d2c151a343865f2a6b607bc2e 100644 (file)
@@ -52,7 +52,6 @@
 #include <kern/syscall_subr.h>
 #include <kern/task.h>
 #include <kern/thread.h>
-#include <kern/wait_queue.h>
 
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
@@ -85,9 +84,8 @@ sched_proto_choose_thread(processor_t         processor,
 static thread_t
 sched_proto_steal_thread(processor_set_t               pset);
 
-static void
-sched_proto_compute_priority(thread_t  thread,
-                                                        boolean_t                      override_depress);
+static int
+sched_proto_compute_priority(thread_t thread);
 
 static processor_t
 sched_proto_choose_processor(  processor_set_t         pset,
@@ -142,9 +140,6 @@ sched_proto_lightweight_update_priority(thread_t    thread);
 static void
 sched_proto_quantum_expire(thread_t    thread);
 
-static boolean_t
-sched_proto_should_current_thread_rechoose_processor(processor_t                       processor);
-
 static int
 sched_proto_processor_runq_count(processor_t   processor);
 
@@ -155,18 +150,20 @@ static int
 sched_proto_processor_bound_count(processor_t   processor);
 
 static void
-sched_proto_thread_update_scan(void);
+sched_proto_thread_update_scan(sched_update_scan_context_t scan_context);
 
 
 const struct sched_dispatch_table sched_proto_dispatch = {
+       .sched_name                                     = "proto",
        .init                                           = sched_proto_init,
        .timebase_init                                  = sched_proto_timebase_init,
        .processor_init                                 = sched_proto_processor_init,
        .pset_init                                      = sched_proto_pset_init,
        .maintenance_continuation                       = sched_proto_maintenance_continuation,
        .choose_thread                                  = sched_proto_choose_thread,
+       .steal_thread_enabled                           = FALSE,
        .steal_thread                                   = sched_proto_steal_thread,
-       .compute_priority                               = sched_proto_compute_priority,
+       .compute_timeshare_priority                     = sched_proto_compute_priority,
        .choose_processor                               = sched_proto_choose_processor,
        .processor_enqueue                              = sched_proto_processor_enqueue,
        .processor_queue_shutdown                       = sched_proto_processor_queue_shutdown,
@@ -181,18 +178,13 @@ const struct sched_dispatch_table sched_proto_dispatch = {
        .update_priority                                = sched_proto_update_priority,
        .lightweight_update_priority                    = sched_proto_lightweight_update_priority,
        .quantum_expire                                 = sched_proto_quantum_expire,
-       .should_current_thread_rechoose_processor       = sched_proto_should_current_thread_rechoose_processor,
        .processor_runq_count                           = sched_proto_processor_runq_count,
        .processor_runq_stats_count_sum                 = sched_proto_processor_runq_stats_count_sum,
-       .fairshare_init                                 = sched_traditional_fairshare_init,
-       .fairshare_runq_count                           = sched_traditional_fairshare_runq_count,
-       .fairshare_runq_stats_count_sum                 = sched_traditional_fairshare_runq_stats_count_sum,
-       .fairshare_enqueue                              = sched_traditional_fairshare_enqueue,
-       .fairshare_dequeue                              = sched_traditional_fairshare_dequeue,
-       .fairshare_queue_remove                         = sched_traditional_fairshare_queue_remove,
        .processor_bound_count                          = sched_proto_processor_bound_count,
        .thread_update_scan                             = sched_proto_thread_update_scan,
        .direct_dispatch_to_idle_processors             = TRUE,
+       .multiple_psets_enabled                         = TRUE,
+       .sched_groups_enabled                           = FALSE,
 };
 
 static struct run_queue        *global_runq;
@@ -365,11 +357,10 @@ sched_proto_steal_thread(processor_set_t          pset)
        
 }
 
-static void
-sched_proto_compute_priority(thread_t  thread,
-                                                        boolean_t                      override_depress __unused)
+static int
+sched_proto_compute_priority(thread_t thread)
 {
-       set_sched_pri(thread, thread->priority);
+       return thread->base_pri;
 }
 
 static processor_t
@@ -425,7 +416,7 @@ sched_proto_processor_queue_remove(
 {
        void *                  rqlock;
        run_queue_t             rq;
-       
+
        rqlock = &global_runq_lock;
        rq = global_runq;
        
@@ -511,7 +502,7 @@ sched_proto_priority_is_urgent(int priority)
 }
 
 static ast_t
-sched_proto_processor_csw_check(processor_t processor __unused)
+sched_proto_processor_csw_check(processor_t processor)
 {
        run_queue_t             runq;
        int                             count, urgency;
@@ -526,7 +517,10 @@ sched_proto_processor_csw_check(processor_t processor __unused)
                
                return AST_PREEMPT;
        }
-       
+
+       if (proto_processor != processor)
+               return AST_PREEMPT;
+
        return AST_NONE;
 }
 
@@ -564,17 +558,11 @@ sched_proto_lightweight_update_priority(thread_t  thread __unused)
 }
 
 static void
-sched_proto_quantum_expire(thread_t    thread __unused)
+sched_proto_quantum_expire(thread_t    thread __unused)
 {
        
 }
 
-static boolean_t
-sched_proto_should_current_thread_rechoose_processor(processor_t                       processor)
-{
-       return (proto_processor != processor);
-}
-
 static int
 sched_proto_processor_runq_count(processor_t   processor)
 {
@@ -602,7 +590,7 @@ sched_proto_processor_bound_count(__unused processor_t   processor)
 }
 
 static void
-sched_proto_thread_update_scan(void)
+sched_proto_thread_update_scan(__unused sched_update_scan_context_t scan_context)
 {
        
 }
diff --git a/osfmk/kern/sched_traditional.c b/osfmk/kern/sched_traditional.c
new file mode 100644 (file)
index 0000000..79d94ff
--- /dev/null
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_FREE_COPYRIGHT@
+ */
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <mach/mach_types.h>
+
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+
+static boolean_t
+sched_traditional_use_pset_runqueue = FALSE;
+
+static void
+sched_traditional_init(void);
+
+static thread_t
+sched_traditional_steal_thread(processor_set_t pset);
+
+static thread_t
+sched_traditional_steal_processor_thread(processor_t processor);
+
+static void
+sched_traditional_thread_update_scan(sched_update_scan_context_t scan_context);
+
+static void
+sched_traditional_processor_queue_shutdown(processor_t processor);
+
+static boolean_t
+sched_traditional_processor_enqueue(processor_t processor, thread_t thread, integer_t options);
+
+static boolean_t
+sched_traditional_processor_queue_remove(processor_t processor, thread_t thread);
+
+static boolean_t
+sched_traditional_processor_queue_empty(processor_t processor);
+
+static ast_t
+sched_traditional_processor_csw_check(processor_t processor);
+
+static boolean_t
+sched_traditional_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
+
+static int
+sched_traditional_processor_runq_count(processor_t processor);
+
+static boolean_t
+sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor);
+
+static uint64_t
+sched_traditional_processor_runq_stats_count_sum(processor_t processor);
+
+static uint64_t
+sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor);
+
+static int
+sched_traditional_processor_bound_count(processor_t processor);
+
+extern void
+sched_traditional_quantum_expire(thread_t thread);
+
+static void
+sched_traditional_processor_init(processor_t processor);
+
+static void
+sched_traditional_pset_init(processor_set_t pset);
+
+static void
+sched_traditional_with_pset_runqueue_init(void);
+
+static sched_mode_t
+sched_traditional_initial_thread_sched_mode(task_t parent_task);
+
+static thread_t
+sched_traditional_choose_thread(processor_t processor, int priority, ast_t reason);
+
+/* Choose a thread from a processor's priority-based runq */
+static thread_t sched_traditional_choose_thread_from_runq(processor_t processor, run_queue_t runq, int priority);
+
+const struct sched_dispatch_table sched_traditional_dispatch = {
+       .sched_name                                     = "traditional",
+       .init                                           = sched_traditional_init,
+       .timebase_init                                  = sched_timeshare_timebase_init,
+       .processor_init                                 = sched_traditional_processor_init,
+       .pset_init                                      = sched_traditional_pset_init,
+       .maintenance_continuation                       = sched_timeshare_maintenance_continue,
+       .choose_thread                                  = sched_traditional_choose_thread,
+       .steal_thread_enabled                           = TRUE,
+       .steal_thread                                   = sched_traditional_steal_thread,
+       .compute_timeshare_priority                     = sched_compute_timeshare_priority,
+       .choose_processor                               = choose_processor,
+       .processor_enqueue                              = sched_traditional_processor_enqueue,
+       .processor_queue_shutdown                       = sched_traditional_processor_queue_shutdown,
+       .processor_queue_remove                         = sched_traditional_processor_queue_remove,
+       .processor_queue_empty                          = sched_traditional_processor_queue_empty,
+       .priority_is_urgent                             = priority_is_urgent,
+       .processor_csw_check                            = sched_traditional_processor_csw_check,
+       .processor_queue_has_priority                   = sched_traditional_processor_queue_has_priority,
+       .initial_quantum_size                           = sched_timeshare_initial_quantum_size,
+       .initial_thread_sched_mode                      = sched_traditional_initial_thread_sched_mode,
+       .can_update_priority                            = can_update_priority,
+       .update_priority                                = update_priority,
+       .lightweight_update_priority                    = lightweight_update_priority,
+       .quantum_expire                                 = sched_default_quantum_expire,
+       .processor_runq_count                           = sched_traditional_processor_runq_count,
+       .processor_runq_stats_count_sum                 = sched_traditional_processor_runq_stats_count_sum,
+       .processor_bound_count                          = sched_traditional_processor_bound_count,
+       .thread_update_scan                             = sched_traditional_thread_update_scan,
+       .direct_dispatch_to_idle_processors             = TRUE,
+       .multiple_psets_enabled                         = TRUE,
+       .sched_groups_enabled                           = FALSE,
+};
+
+const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = {
+       .sched_name                                     = "traditional_with_pset_runqueue",
+       .init                                           = sched_traditional_with_pset_runqueue_init,
+       .timebase_init                                  = sched_timeshare_timebase_init,
+       .processor_init                                 = sched_traditional_processor_init,
+       .pset_init                                      = sched_traditional_pset_init,
+       .maintenance_continuation                       = sched_timeshare_maintenance_continue,
+       .choose_thread                                  = sched_traditional_choose_thread,
+       .steal_thread_enabled                           = TRUE,
+       .steal_thread                                   = sched_traditional_steal_thread,
+       .compute_timeshare_priority                     = sched_compute_timeshare_priority,
+       .choose_processor                               = choose_processor,
+       .processor_enqueue                              = sched_traditional_processor_enqueue,
+       .processor_queue_shutdown                       = sched_traditional_processor_queue_shutdown,
+       .processor_queue_remove                         = sched_traditional_processor_queue_remove,
+       .processor_queue_empty                          = sched_traditional_with_pset_runqueue_processor_queue_empty,
+       .priority_is_urgent                             = priority_is_urgent,
+       .processor_csw_check                            = sched_traditional_processor_csw_check,
+       .processor_queue_has_priority                   = sched_traditional_processor_queue_has_priority,
+       .initial_quantum_size                           = sched_timeshare_initial_quantum_size,
+       .initial_thread_sched_mode                      = sched_traditional_initial_thread_sched_mode,
+       .can_update_priority                            = can_update_priority,
+       .update_priority                                = update_priority,
+       .lightweight_update_priority                    = lightweight_update_priority,
+       .quantum_expire                                 = sched_default_quantum_expire,
+       .processor_runq_count                           = sched_traditional_processor_runq_count,
+       .processor_runq_stats_count_sum                 = sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum,
+       .processor_bound_count                          = sched_traditional_processor_bound_count,
+       .thread_update_scan                             = sched_traditional_thread_update_scan,
+       .direct_dispatch_to_idle_processors             = FALSE,
+       .multiple_psets_enabled                         = TRUE,
+       .sched_groups_enabled                           = FALSE,
+};
+
+static void
+sched_traditional_init(void)
+{
+       sched_timeshare_init();
+}
+
+static void
+sched_traditional_with_pset_runqueue_init(void)
+{
+       sched_timeshare_init();
+       sched_traditional_use_pset_runqueue = TRUE;
+}
+
+static void
+sched_traditional_processor_init(processor_t processor)
+{
+       if (!sched_traditional_use_pset_runqueue) {
+               run_queue_init(&processor->runq);
+       }
+       processor->runq_bound_count = 0;
+}
+
+static void
+sched_traditional_pset_init(processor_set_t pset)
+{
+       if (sched_traditional_use_pset_runqueue) {
+               run_queue_init(&pset->pset_runq);
+       }
+       pset->pset_runq_bound_count = 0;
+}
+
+__attribute__((always_inline))
+static inline run_queue_t runq_for_processor(processor_t processor)
+{
+       if (sched_traditional_use_pset_runqueue)
+               return &processor->processor_set->pset_runq;
+       else
+               return &processor->runq;
+}
+
+__attribute__((always_inline))
+static inline void runq_consider_incr_bound_count(processor_t processor,
+                                                 thread_t thread)
+{
+       if (thread->bound_processor == PROCESSOR_NULL)
+               return;
+
+       assert(thread->bound_processor == processor);
+
+       if (sched_traditional_use_pset_runqueue)
+               processor->processor_set->pset_runq_bound_count++;
+
+       processor->runq_bound_count++;
+}
+
+__attribute__((always_inline))
+static inline void runq_consider_decr_bound_count(processor_t processor,
+                                                 thread_t thread)
+{
+       if (thread->bound_processor == PROCESSOR_NULL)
+               return;
+
+       assert(thread->bound_processor == processor);
+
+       if (sched_traditional_use_pset_runqueue)
+               processor->processor_set->pset_runq_bound_count--;
+
+       processor->runq_bound_count--;
+}
+
+static thread_t
+sched_traditional_choose_thread(
+                                processor_t     processor,
+                                int             priority,
+                       __unused ast_t           reason)
+{
+       thread_t thread;
+
+       thread = sched_traditional_choose_thread_from_runq(processor, runq_for_processor(processor), priority);
+       if (thread != THREAD_NULL) {
+               runq_consider_decr_bound_count(processor, thread);
+       }
+
+       return thread;
+}
+
+/*
+ *     sched_traditional_choose_thread_from_runq:
+ *
+ *     Locate a thread to execute from the processor run queue
+ *     and return it.  Only choose a thread with greater or equal
+ *     priority.
+ *
+ *     Associated pset must be locked.  Returns THREAD_NULL
+ *     on failure.
+ */
+static thread_t
+sched_traditional_choose_thread_from_runq(
+                                          processor_t     processor,
+                                          run_queue_t     rq,
+                                          int             priority)
+{
+       queue_t         queue   = rq->queues + rq->highq;
+       int             pri     = rq->highq;
+       int             count   = rq->count;
+       thread_t        thread;
+
+       while (count > 0 && pri >= priority) {
+               thread = (thread_t)(uintptr_t)queue_first(queue);
+               while (!queue_end(queue, (queue_entry_t)thread)) {
+                       if (thread->bound_processor == PROCESSOR_NULL ||
+                           thread->bound_processor == processor) {
+                               remqueue((queue_entry_t)thread);
+
+                               thread->runq = PROCESSOR_NULL;
+                               SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+                               rq->count--;
+                               if (SCHED(priority_is_urgent)(pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
+                               if (queue_empty(queue)) {
+                                       if (pri != IDLEPRI)
+                                               clrbit(MAXPRI - pri, rq->bitmap);
+                                       rq->highq = MAXPRI - ffsbit(rq->bitmap);
+                               }
+
+                               return (thread);
+                       }
+                       count--;
+
+                       thread = (thread_t)(uintptr_t)queue_next((queue_entry_t)thread);
+               }
+
+               queue--; pri--;
+       }
+
+       return (THREAD_NULL);
+}
+
+static sched_mode_t
+sched_traditional_initial_thread_sched_mode(task_t parent_task)
+{
+       if (parent_task == kernel_task)
+               return TH_MODE_FIXED;
+       else
+               return TH_MODE_TIMESHARE;
+}
+
+/*
+ *     sched_traditional_processor_enqueue:
+ *
+ *     Enqueue thread on a processor run queue.  Thread must be locked,
+ *     and not already be on a run queue.
+ *
+ *     Returns TRUE if a preemption is indicated based on the state
+ *     of the run queue.
+ *
+ *     The run queue must be locked (see thread_run_queue_remove()
+ *     for more info).
+ */
+static boolean_t
+sched_traditional_processor_enqueue(processor_t   processor,
+                                    thread_t      thread,
+                                    integer_t     options)
+{
+       run_queue_t     rq = runq_for_processor(processor);
+       boolean_t       result;
+
+       result = run_queue_enqueue(rq, thread, options);
+       thread->runq = processor;
+       runq_consider_incr_bound_count(processor, thread);
+
+       return (result);
+}
+
+static boolean_t
+sched_traditional_processor_queue_empty(processor_t processor)
+{
+       return runq_for_processor(processor)->count == 0;
+}
+
+static boolean_t
+sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t processor)
+{
+       processor_set_t pset = processor->processor_set;
+       int count = runq_for_processor(processor)->count;
+
+       /*
+        * The pset runq contains the count of all runnable threads
+        * for all processors in the pset. However, for threads that
+        * are bound to another processor, the current "processor"
+        * is not eligible to execute the thread. So we only
+        * include bound threads that our bound to the current
+        * "processor". This allows the processor to idle when the
+        * count of eligible threads drops to 0, even if there's
+        * a runnable thread bound to a different processor in the
+        * shared runq.
+        */
+
+       count -= pset->pset_runq_bound_count;
+       count += processor->runq_bound_count;
+
+       return count == 0;
+}
+
+static ast_t
+sched_traditional_processor_csw_check(processor_t processor)
+{
+       run_queue_t     runq;
+       boolean_t       has_higher;
+
+       assert(processor->active_thread != NULL);
+
+       runq = runq_for_processor(processor);
+
+       if (processor->first_timeslice) {
+               has_higher = (runq->highq > processor->current_pri);
+       } else {
+               has_higher = (runq->highq >= processor->current_pri);
+       }
+
+       if (has_higher) {
+               if (runq->urgency > 0)
+                       return (AST_PREEMPT | AST_URGENT);
+
+               return AST_PREEMPT;
+       }
+
+       return AST_NONE;
+}
+
+static boolean_t
+sched_traditional_processor_queue_has_priority(processor_t      processor,
+                                               int              priority,
+                                               boolean_t        gte)
+{
+       if (gte)
+               return runq_for_processor(processor)->highq >= priority;
+       else
+               return runq_for_processor(processor)->highq > priority;
+}
+
+static int
+sched_traditional_processor_runq_count(processor_t processor)
+{
+       return runq_for_processor(processor)->count;
+}
+
+static uint64_t
+sched_traditional_processor_runq_stats_count_sum(processor_t processor)
+{
+       return runq_for_processor(processor)->runq_stats.count_sum;
+}
+
+static uint64_t
+sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t processor)
+{
+       if (processor->cpu_id == processor->processor_set->cpu_set_low)
+               return runq_for_processor(processor)->runq_stats.count_sum;
+       else
+               return 0ULL;
+}
+
+static int
+sched_traditional_processor_bound_count(processor_t processor)
+{
+       return processor->runq_bound_count;
+}
+
+/*
+ *     sched_traditional_processor_queue_shutdown:
+ *
+ *     Shutdown a processor run queue by
+ *     re-dispatching non-bound threads.
+ *
+ *     Associated pset must be locked, and is
+ *     returned unlocked.
+ */
+static void
+sched_traditional_processor_queue_shutdown(processor_t processor)
+{
+       processor_set_t         pset    = processor->processor_set;
+       run_queue_t             rq      = runq_for_processor(processor);
+       queue_t                 queue   = rq->queues + rq->highq;
+       int                     pri     = rq->highq;
+       int                     count   = rq->count;
+       thread_t                next, thread;
+       queue_head_t            tqueue;
+
+       queue_init(&tqueue);
+
+       while (count > 0) {
+               thread = (thread_t)(uintptr_t)queue_first(queue);
+               while (!queue_end(queue, (queue_entry_t)thread)) {
+                       next = (thread_t)(uintptr_t)queue_next((queue_entry_t)thread);
+
+                       if (thread->bound_processor == PROCESSOR_NULL) {
+                               remqueue((queue_entry_t)thread);
+
+                               thread->runq = PROCESSOR_NULL;
+                               SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+                               runq_consider_decr_bound_count(processor, thread);
+                               rq->count--;
+                               if (SCHED(priority_is_urgent)(pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
+                               if (queue_empty(queue)) {
+                                       if (pri != IDLEPRI)
+                                               clrbit(MAXPRI - pri, rq->bitmap);
+                                       rq->highq = MAXPRI - ffsbit(rq->bitmap);
+                               }
+
+                               enqueue_tail(&tqueue, (queue_entry_t)thread);
+                       }
+                       count--;
+
+                       thread = next;
+               }
+
+               queue--; pri--;
+       }
+
+       pset_unlock(pset);
+
+       while ((thread = (thread_t)(uintptr_t)dequeue_head(&tqueue)) != THREAD_NULL) {
+               thread_lock(thread);
+
+               thread_setrun(thread, SCHED_TAILQ);
+
+               thread_unlock(thread);
+       }
+}
+
+#if 0
+static void
+run_queue_check(
+                run_queue_t     rq,
+                thread_t        thread)
+{
+       queue_t         q;
+       queue_entry_t   qe;
+
+       if (rq != thread->runq)
+               panic("run_queue_check: thread runq");
+
+       if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
+               panic("run_queue_check: thread sched_pri");
+
+       q = &rq->queues[thread->sched_pri];
+       qe = queue_first(q);
+       while (!queue_end(q, qe)) {
+               if (qe == (queue_entry_t)thread)
+                       return;
+
+               qe = queue_next(qe);
+       }
+
+       panic("run_queue_check: end");
+}
+#endif /* 0 */
+
+/*
+ * Locks the runqueue itself.
+ *
+ * Thread must be locked.
+ */
+static boolean_t
+sched_traditional_processor_queue_remove(processor_t processor,
+                                         thread_t thread)
+{
+       processor_set_t pset;
+       run_queue_t     rq;
+
+       pset = processor->processor_set;
+       pset_lock(pset);
+
+       rq = runq_for_processor(processor);
+
+       if (processor == thread->runq) {
+               /*
+                * Thread is on a run queue and we have a lock on
+                * that run queue.
+                */
+               runq_consider_decr_bound_count(processor, thread);
+               run_queue_remove(rq, thread);
+       }
+       else {
+               /*
+                * The thread left the run queue before we could
+                * lock the run queue.
+                */
+               assert(thread->runq == PROCESSOR_NULL);
+               processor = PROCESSOR_NULL;
+       }
+
+       pset_unlock(pset);
+
+       return (processor != PROCESSOR_NULL);
+}
+
+/*
+ *     sched_traditional_steal_processor_thread:
+ *
+ *     Locate a thread to steal from the processor and
+ *     return it.
+ *
+ *     Associated pset must be locked.  Returns THREAD_NULL
+ *     on failure.
+ */
+static thread_t
+sched_traditional_steal_processor_thread(processor_t processor)
+{
+       run_queue_t     rq      = runq_for_processor(processor);
+       queue_t         queue   = rq->queues + rq->highq;
+       int             pri     = rq->highq;
+       int             count   = rq->count;
+       thread_t        thread;
+
+       while (count > 0) {
+               thread = (thread_t)(uintptr_t)queue_first(queue);
+               while (!queue_end(queue, (queue_entry_t)thread)) {
+                       if (thread->bound_processor == PROCESSOR_NULL) {
+                               remqueue((queue_entry_t)thread);
+
+                               thread->runq = PROCESSOR_NULL;
+                               SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+                               runq_consider_decr_bound_count(processor, thread);
+                               rq->count--;
+                               if (SCHED(priority_is_urgent)(pri)) {
+                                       rq->urgency--; assert(rq->urgency >= 0);
+                               }
+                               if (queue_empty(queue)) {
+                                       if (pri != IDLEPRI)
+                                               clrbit(MAXPRI - pri, rq->bitmap);
+                                       rq->highq = MAXPRI - ffsbit(rq->bitmap);
+                               }
+
+                               return (thread);
+                       }
+                       count--;
+
+                       thread = (thread_t)(uintptr_t)queue_next((queue_entry_t)thread);
+               }
+
+               queue--; pri--;
+       }
+
+       return (THREAD_NULL);
+}
+
+/*
+ *     Locate and steal a thread, beginning
+ *     at the pset.
+ *
+ *     The pset must be locked, and is returned
+ *     unlocked.
+ *
+ *     Returns the stolen thread, or THREAD_NULL on
+ *     failure.
+ */
+static thread_t
+sched_traditional_steal_thread(processor_set_t pset)
+{
+       processor_set_t nset, cset = pset;
+       processor_t     processor;
+       thread_t        thread;
+
+       do {
+               processor = (processor_t)(uintptr_t)queue_first(&cset->active_queue);
+               while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
+                       if (runq_for_processor(processor)->count > 0) {
+                               thread = sched_traditional_steal_processor_thread(processor);
+                               if (thread != THREAD_NULL) {
+                                       remqueue((queue_entry_t)processor);
+                                       enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
+
+                                       pset_unlock(cset);
+
+                                       return (thread);
+                               }
+                       }
+
+                       processor = (processor_t)(uintptr_t)queue_next((queue_entry_t)processor);
+               }
+
+               nset = next_pset(cset);
+
+               if (nset != pset) {
+                       pset_unlock(cset);
+
+                       cset = nset;
+                       pset_lock(cset);
+               }
+       } while (nset != pset);
+
+       pset_unlock(cset);
+
+       return (THREAD_NULL);
+}
+
+static void
+sched_traditional_thread_update_scan(sched_update_scan_context_t scan_context)
+{
+       boolean_t       restart_needed = FALSE;
+       processor_t     processor = processor_list;
+       processor_set_t pset;
+       thread_t        thread;
+       spl_t           s;
+
+       do {
+               do {
+                       /*
+                        * TODO: in sched_traditional_use_pset_runqueue case,
+                        *  avoid scanning the same runq multiple times
+                        */
+                       pset = processor->processor_set;
+
+                       s = splsched();
+                       pset_lock(pset);
+
+                       restart_needed = runq_scan(runq_for_processor(processor), scan_context);
+
+                       pset_unlock(pset);
+                       splx(s);
+
+                       if (restart_needed)
+                               break;
+
+                       thread = processor->idle_thread;
+                       if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
+                               if (thread_update_add_thread(thread) == FALSE) {
+                                       restart_needed = TRUE;
+                                       break;
+                               }
+                       }
+               } while ((processor = processor->processor_list) != NULL);
+
+               /* Ok, we now have a collection of candidates -- fix them. */
+               thread_update_process_threads();
+       } while (restart_needed);
+}
+
index 819c6c686fc583cb0d67d3d2d82763a8c5ba340f..b43fe2db1b3d63d5c2928551e9f5d957f30eceb6 100644 (file)
@@ -28,6 +28,7 @@
 #include <mach/mach_types.h>
 #include <kern/assert.h>
 #include <kern/clock.h>
+#include <kern/coalition.h>
 #include <kern/debug.h>
 #include <kern/host.h>
 #include <kern/kalloc.h>
 #include <kern/sched_prim.h>
 #include <kern/sfi.h>
 #include <kern/timer_call.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/ledger.h>
-#include <kern/coalition.h>
-
 #include <pexpert/pexpert.h>
 
 #include <libkern/kernel_mach_header.h>
 
 #include <sys/kdebug.h>
 
+#if CONFIG_SCHED_SFI
+
 #define SFI_DEBUG 0
 
 #if SFI_DEBUG
@@ -93,7 +94,7 @@ extern sched_call_t workqueue_get_sched_callback(void);
  *
  * The pset lock may also be taken, but not while any other locks are held.
  *
- * splsched ---> sfi_lock ---> wait_queue ---> thread_lock
+ * splsched ---> sfi_lock ---> waitq ---> thread_lock
  *        \  \              \__ thread_lock (*)
  *         \  \__ pset_lock
  *          \
@@ -168,7 +169,7 @@ struct sfi_class_state {
        boolean_t       class_sfi_is_enabled;
        volatile boolean_t      class_in_on_phase;
 
-       struct wait_queue       wait_queue;     /* threads in ready state */
+       struct waitq            waitq;  /* threads in ready state */
        thread_continue_t       continuation;
 
        const char *    class_name;
@@ -252,7 +253,7 @@ void sfi_init(void)
                        timer_call_setup(&sfi_classes[i].on_timer, sfi_timer_per_class_on, (void *)(uintptr_t)i);
                        sfi_classes[i].on_timer_programmed = FALSE;
                        
-                       kret = wait_queue_init(&sfi_classes[i].wait_queue, SYNC_POLICY_FIFO);
+                       kret = waitq_init(&sfi_classes[i].waitq, SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
                        assert(kret == KERN_SUCCESS);
                } else {
                        /* The only allowed gap is for SFI_CLASS_UNSPECIFIED */
@@ -428,9 +429,9 @@ static void sfi_timer_per_class_on(
        sfi_class->class_in_on_phase = TRUE;
        sfi_class->on_timer_programmed = FALSE;
 
-       kret = wait_queue_wakeup64_all(&sfi_class->wait_queue,
-                                                                  CAST_EVENT64_T(sfi_class_id),
-                                                                  THREAD_AWAKENED);
+       kret = waitq_wakeup64_all(&sfi_class->waitq,
+                                 CAST_EVENT64_T(sfi_class_id),
+                                 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
        assert(kret == KERN_SUCCESS || kret == KERN_NOT_WAITING);
 
        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_ON_TIMER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
@@ -775,23 +776,21 @@ sfi_class_id_t sfi_thread_classify(thread_t thread)
        /*
         * Threads with unspecified, legacy, or user-initiated QOS class can be individually managed.
         */
-
        switch (task_role) {
-               case TASK_CONTROL_APPLICATION:
-               case TASK_FOREGROUND_APPLICATION:
+       case TASK_CONTROL_APPLICATION:
+       case TASK_FOREGROUND_APPLICATION:
+               focal = TRUE;
+               break;
+       case TASK_BACKGROUND_APPLICATION:
+       case TASK_DEFAULT_APPLICATION:
+       case TASK_THROTTLE_APPLICATION:
+       case TASK_UNSPECIFIED:
+               /* Focal if the task is in a coalition with a FG/focal app */
+               if (task_coalition_focal_count(thread->task) > 0)
                        focal = TRUE;
-                       break;
-
-               case TASK_BACKGROUND_APPLICATION:
-               case TASK_DEFAULT_APPLICATION:
-               case TASK_UNSPECIFIED:
-                       /* Focal if in coalition with foreground app */
-                       if (coalition_focal_task_count(thread->task->coalition) > 0)
-                               focal = TRUE;
-                       break;
-
-               default:
-                       break;
+               break;
+       default:
+               break;
        }
 
        if (managed_task) {
@@ -909,7 +908,7 @@ static inline void _sfi_wait_cleanup(sched_call_t callback) {
        self->sfi_wait_class = SFI_CLASS_UNSPECIFIED;
        simple_unlock(&sfi_lock);
        splx(s);
-       assert(SFI_CLASS_UNSPECIFIED < current_sfi_wait_class < MAX_SFI_CLASS_ID);
+       assert((SFI_CLASS_UNSPECIFIED < current_sfi_wait_class) && (current_sfi_wait_class < MAX_SFI_CLASS_ID));
        ledger_credit(self->task->ledger, task_ledgers.sfi_wait_times[current_sfi_wait_class], sfi_wait_time);
 }
 
@@ -974,10 +973,10 @@ void sfi_ast(thread_t thread)
                /* Need to block thread in wait queue */
                KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_THREAD_DEFER), tid, class_id, 0, 0, 0);
 
-               waitret = wait_queue_assert_wait64(&sfi_class->wait_queue,
-                                                  CAST_EVENT64_T(class_id),
-                                                  THREAD_INTERRUPTIBLE,
-                                                  0);
+               waitret = waitq_assert_wait64(&sfi_class->waitq,
+                                             CAST_EVENT64_T(class_id),
+                                             THREAD_INTERRUPTIBLE,
+                                             0);
                if (waitret == THREAD_WAITING) {
                        thread->sfi_wait_class = class_id;
                        did_wait = TRUE;
@@ -1004,10 +1003,7 @@ void sfi_ast(thread_t thread)
        }
 }
 
-/*
- * Thread must be unlocked
- * May be called with coalition, task, or thread mutex held
- */
+/* Thread must be unlocked */
 void sfi_reevaluate(thread_t thread)
 {
        kern_return_t kret;
@@ -1051,7 +1047,7 @@ void sfi_reevaluate(thread_t thread)
 
                        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SFI, SFI_WAIT_CANCELED), thread_tid(thread), current_class_id, class_id, 0, 0);
 
-                       kret = wait_queue_wakeup64_thread(&sfi_class->wait_queue,
+                       kret = waitq_wakeup64_thread(&sfi_class->waitq,
                                                                                          CAST_EVENT64_T(current_class_id),
                                                                                          thread,
                                                                                          THREAD_AWAKENED);
@@ -1091,3 +1087,56 @@ void sfi_reevaluate(thread_t thread)
        simple_unlock(&sfi_lock);
        splx(s);
 }
+
+#else /* !CONFIG_SCHED_SFI */
+
+kern_return_t sfi_set_window(uint64_t window_usecs __unused)
+{
+       return (KERN_NOT_SUPPORTED);
+}
+
+kern_return_t sfi_window_cancel(void)
+{
+       return (KERN_NOT_SUPPORTED);
+}
+
+
+kern_return_t sfi_get_window(uint64_t *window_usecs __unused)
+{
+       return (KERN_NOT_SUPPORTED);
+}
+
+
+kern_return_t sfi_set_class_offtime(sfi_class_id_t class_id __unused, uint64_t offtime_usecs __unused)
+{
+       return (KERN_NOT_SUPPORTED);
+}
+
+kern_return_t sfi_class_offtime_cancel(sfi_class_id_t class_id __unused)
+{
+       return (KERN_NOT_SUPPORTED);
+}
+
+kern_return_t sfi_get_class_offtime(sfi_class_id_t class_id __unused, uint64_t *offtime_usecs __unused)
+{
+       return (KERN_NOT_SUPPORTED);
+}
+
+void sfi_reevaluate(thread_t thread __unused)
+{
+       return;
+}
+
+sfi_class_id_t sfi_thread_classify(thread_t thread)
+{
+       task_t task = thread->task;
+       boolean_t is_kernel_thread = (task == kernel_task);
+
+       if (is_kernel_thread) {
+               return SFI_CLASS_KERNEL;
+       }
+
+       return SFI_CLASS_OPTED_OUT;
+}
+
+#endif /* !CONFIG_SCHED_SFI */
diff --git a/osfmk/kern/smp.h b/osfmk/kern/smp.h
new file mode 100644 (file)
index 0000000..d4e099b
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef        _KERN_SMP_H_
+#define        _KERN_SMP_H_
+
+#ifdef MACH_KERNEL_PRIVATE
+
+#include <machine/smp.h>
+
+#endif /* MACH_KERNEL_PRIVATE */
+
+#endif /* _KERN_SMP_H_ */
index ef9f6b8dad81842118e1d64b5f98810a800f5f7f..400cedcf99491ea95e5dfdb2bbf0eaa54ce1147d 100644 (file)
@@ -222,7 +222,8 @@ stack_alloc_internal(void)
                if (kernel_memory_allocate(kernel_map, &stack,
                                           kernel_stack_size + (2*PAGE_SIZE),
                                           stack_addr_mask,
-                                          KMA_KSTACK | KMA_KOBJECT | guard_flags)
+                                          KMA_KSTACK | KMA_KOBJECT | guard_flags,
+                                          VM_KERN_MEMORY_STACK)
                    != KERN_SUCCESS)
                        panic("stack_alloc: kernel_memory_allocate");
 
index 6ddd0389c8526a9c1b6acf60efab0e16d3310ff3..53013fa79ef7dfa820f3459e6254ebfc29764777 100644 (file)
@@ -86,7 +86,9 @@
 #include <kern/machine.h>
 #include <kern/processor.h>
 #include <kern/sched_prim.h>
+#if CONFIG_SCHED_SFI
 #include <kern/sfi.h>
+#endif
 #include <kern/startup.h>
 #include <kern/task.h>
 #include <kern/thread.h>
 #if CONFIG_TELEMETRY
 #include <kern/telemetry.h>
 #endif
-#include <kern/wait_queue.h>
 #include <kern/xpr.h>
 #include <kern/zalloc.h>
 #include <kern/locks.h>
+#include <kern/debug.h>
+#include <corpses/task_corpse.h>
 #include <prng/random.h>
 #include <console/serial_protos.h>
 #include <vm/vm_kern.h>
 #include <sys/kdebug.h>
 #include <sys/random.h>
 
+#include <kern/waitq.h>
+
+
 #if CONFIG_ATM
 #include <atm/atm_internal.h>
 #endif
 #include <security/mac_mach_internal.h>
 #endif
 
-#if CONFIG_COUNTERS
-#include <pmc/pmc.h>
-#endif
-
 #if KPC
 #include <kern/kpc.h>
 #endif
@@ -202,14 +204,14 @@ static inline void
 kernel_bootstrap_log(const char *message)
 {
 //     kprintf("kernel_bootstrap: %s\n", message);
-       kernel_debug_string(message);
+       kernel_debug_string_simple(message);
 }
 
 static inline void
 kernel_bootstrap_thread_log(const char *message)
 {
 //     kprintf("kernel_bootstrap_thread: %s\n", message);
-       kernel_debug_string(message);
+       kernel_debug_string_simple(message);
 }
 
 void
@@ -227,12 +229,16 @@ kernel_early_bootstrap(void)
         */
        timer_call_init();
 
+#if CONFIG_SCHED_SFI
        /*
         * Configure SFI classes
         */
        sfi_early_init();
+#endif
 }
 
+extern boolean_t IORamDiskBSDRoot(void);
+extern kern_return_t cpm_preallocate_early(void);
 
 void
 kernel_bootstrap(void)
@@ -267,26 +273,34 @@ kernel_bootstrap(void)
        machine_info.major_version = version_major;
        machine_info.minor_version = version_minor;
 
+
 #if CONFIG_TELEMETRY
        kernel_bootstrap_log("telemetry_init");
        telemetry_init();
 #endif
 
+#if CONFIG_CSR
+       kernel_bootstrap_log("csr_init");
+       csr_init();
+#endif
+
        kernel_bootstrap_log("stackshot_lock_init");    
        stackshot_lock_init();
 
        kernel_bootstrap_log("sched_init");
        sched_init();
 
-       kernel_bootstrap_log("wait_queue_bootstrap");
-       wait_queue_bootstrap();
+       kernel_bootstrap_log("waitq_bootstrap");
+       waitq_bootstrap();
 
        kernel_bootstrap_log("ipc_bootstrap");
        ipc_bootstrap();
 
 #if CONFIG_MACF
+       kernel_bootstrap_log("mac_policy_init");
        mac_policy_init();
 #endif
+
        kernel_bootstrap_log("ipc_init");
        ipc_init();
 
@@ -312,8 +326,8 @@ kernel_bootstrap(void)
         *      Initialize the IPC, task, and thread subsystems.
         */
 #if CONFIG_COALITIONS
-       kernel_bootstrap_log("coalition_init");
-       coalition_init();
+       kernel_bootstrap_log("coalitions_init");
+       coalitions_init();
 #endif
 
        kernel_bootstrap_log("task_init");
@@ -328,17 +342,15 @@ kernel_bootstrap(void)
        atm_init();
 #endif
 
-#if CONFIG_CSR
-       kernel_bootstrap_log("csr_init");
-       csr_init();
-#endif
-
 #if CONFIG_BANK
        /* Initialize the BANK Manager. */
        kernel_bootstrap_log("bank_init");
        bank_init();
 #endif
        
+       /* initialize the corpse config based on boot-args */
+       corpses_init();
+
        /*
         *      Create a kernel thread to execute the kernel bootstrap.
         */
@@ -348,6 +360,7 @@ kernel_bootstrap(void)
        if (result != KERN_SUCCESS) panic("kernel_bootstrap: result = %08X\n", result);
 
        thread->state = TH_RUN;
+       thread->last_made_runnable_time = mach_absolute_time();
        thread_deallocate(thread);
 
        kernel_bootstrap_log("load_context - done");
@@ -359,6 +372,7 @@ int kth_started = 0;
 
 vm_offset_t vm_kernel_addrperm;
 vm_offset_t buf_kernel_addrperm;
+vm_offset_t vm_kernel_addrperm_ext;
 
 /*
  * Now running in a thread.  Kick off other services,
@@ -451,10 +465,6 @@ kernel_bootstrap_thread(void)
        alternate_debugger_init();
 #endif
 
-#if CONFIG_COUNTERS
-       pmc_bootstrap();
-#endif
-
 #if KPC
        kpc_init();
 #endif
@@ -544,14 +554,16 @@ kernel_bootstrap_thread(void)
        mac_policy_initmach();
 #endif
 
+#if CONFIG_SCHED_SFI
        kernel_bootstrap_log("sfi_init");
        sfi_init();
+#endif
 
        /*
-        * Initialize the global used for permuting kernel
+        * Initialize the globals used for permuting kernel
         * addresses that may be exported to userland as tokens
-        * using VM_KERNEL_ADDRPERM(). Force the random number
-        * to be odd to avoid mapping a non-zero
+        * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL().
+        * Force the random number to be odd to avoid mapping a non-zero
         * word-aligned address to zero via addition.
         * Note: at this stage we can use the cryptographically secure PRNG
         * rather than early_random().
@@ -560,6 +572,12 @@ kernel_bootstrap_thread(void)
        vm_kernel_addrperm |= 1;
        read_random(&buf_kernel_addrperm, sizeof(buf_kernel_addrperm));
        buf_kernel_addrperm |= 1;
+       read_random(&vm_kernel_addrperm_ext, sizeof(vm_kernel_addrperm_ext));
+       vm_kernel_addrperm_ext |= 1;
+
+       vm_set_restrictions();
+
+
 
        /*
         *      Start the user bootstrap.
@@ -577,7 +595,7 @@ kernel_bootstrap_thread(void)
        serial_keyboard_init();         /* Start serial keyboard if wanted */
 
        vm_page_init_local_q();
-       
+
        thread_bind(PROCESSOR_NULL);
 
        /*
index b4e81af38c630301b0b78475c0cf79ed868a5e87..fc83bb97d76af4cace49ef953b23049d643cd0da 100644 (file)
@@ -43,7 +43,6 @@
 
 #ifdef MACH_KERNEL_PRIVATE
 
-#include <kern/wait_queue.h>
 #include <kern/macro_help.h>
 #include <kern/queue.h>
 #include <kern/locks.h>
index 812aa800a1ba8aae5450a1af488a6ad7f27a3c3b..0cba287b0ad96c207619958ef8c5a271275bcb53 100644 (file)
@@ -54,7 +54,7 @@
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_space.h>
 #include <kern/host.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/zalloc.h>
 #include <kern/mach_param.h>
 
@@ -174,43 +174,28 @@ semaphore_create(
        if (s == SEMAPHORE_NULL)
                return KERN_RESOURCE_SHORTAGE; 
 
-       kret = wait_queue_init(&s->wait_queue, policy); /* also inits lock */
+       kret = waitq_init(&s->waitq, policy | SYNC_POLICY_DISABLE_IRQ); /* also inits lock */
        if (kret != KERN_SUCCESS) {
                zfree(semaphore_zone, s);
                return kret;
        }
 
-       s->count = value;
-
-       /*
-        * One reference for caller, one for port, and one for owner
-        * task (if not the kernel itself).
-        */
-       s->ref_count = (task == kernel_task) ? 2 : 3;
-
        /*
-        *  Create and initialize the semaphore port
+        * Initialize the semaphore values.
         */
-       s->port = ipc_port_alloc_kernel();
-       if (s->port == IP_NULL) {       
-               zfree(semaphore_zone, s);
-               return KERN_RESOURCE_SHORTAGE; 
-       }
-
-       ipc_kobject_set (s->port, (ipc_kobject_t) s, IKOT_SEMAPHORE);
+       s->port = IP_NULL;
+       s->ref_count = 1;
+       s->count = value;
+       s->active = TRUE;
+       s->owner = task;
 
        /*
         *  Associate the new semaphore with the task by adding
         *  the new semaphore to the task's semaphore list.
-        *
-        *  Associate the task with the new semaphore by having the
-        *  semaphores task pointer point to the owning task's structure.
         */
        task_lock(task);
        enqueue_head(&task->semaphore_list, (queue_entry_t) s);
        task->semaphores_owned++;
-       s->owner = task;
-       s->active = TRUE;
        task_unlock(task);
 
        *new_semaphore = s;
@@ -219,27 +204,22 @@ semaphore_create(
 }                
 
 /*
- *     Routine:        semaphore_destroy
+ *     Routine:        semaphore_destroy_internal
  *
- *     Destroys a semaphore.  This call will only succeed if the
- *     specified task is the SAME task name specified at the semaphore's
- *     creation.
+ *     This call will only succeed if the specified task is the SAME task
+ *     specified at the semaphore's creation.
  *
  *     All threads currently blocked on the semaphore are awoken.  These
  *     threads will return with the KERN_TERMINATED error.
  */
 kern_return_t
-semaphore_destroy(
+semaphore_destroy_internal(
        task_t                  task,
        semaphore_t             semaphore)
 {
-       int                             old_count;
+       int                     old_count;
        spl_t                   spl_level;
 
-
-       if (task == TASK_NULL || semaphore == SEMAPHORE_NULL)
-               return KERN_INVALID_ARGUMENT;
-
        /*
         *  Disown semaphore
         */
@@ -248,13 +228,14 @@ semaphore_destroy(
                task_unlock(task);
                return KERN_INVALID_ARGUMENT;
        }
+       spl_level = splsched();
+       semaphore_lock(semaphore);
+
        remqueue((queue_entry_t) semaphore);
        semaphore->owner = TASK_NULL;
        task->semaphores_owned--;
-       task_unlock(task);
 
-       spl_level = splsched();
-       semaphore_lock(semaphore);
+       task_unlock(task);
 
        /*
         *  Deactivate semaphore
@@ -269,25 +250,45 @@ semaphore_destroy(
        semaphore->count = 0;
 
        if (old_count < 0) {
-               wait_queue_wakeup64_all_locked(&semaphore->wait_queue,
-                                            SEMAPHORE_EVENT,
-                                            THREAD_RESTART,
-                                            TRUE);             /* unlock? */
+               waitq_wakeup64_all_locked(&semaphore->waitq,
+                                         SEMAPHORE_EVENT,
+                                         THREAD_RESTART, NULL,
+                                         WAITQ_ALL_PRIORITIES,
+                                         WAITQ_UNLOCK);
+               /* waitq/semaphore is unlocked */
        } else {
                semaphore_unlock(semaphore);
        }
        splx(spl_level);
 
-       /*
-        *  Deallocate
-        *
-        *  Drop the task's semaphore reference, which in turn deallocates
-        *  the semaphore structure if the reference count goes to zero.
-        */
-       semaphore_dereference(semaphore);
        return KERN_SUCCESS;
 }
 
+/*
+ *     Routine:        semaphore_destroy
+ *
+ *     Destroys a semaphore and consume the caller's reference on the
+ *     semaphore.
+ */
+kern_return_t
+semaphore_destroy(
+       task_t                  task,
+       semaphore_t             semaphore)
+{
+       kern_return_t kr;
+
+       if (semaphore == SEMAPHORE_NULL)
+               return KERN_INVALID_ARGUMENT;
+
+       if (task == TASK_NULL) {
+               kr = KERN_INVALID_ARGUMENT;
+       } else {
+               kr = semaphore_destroy_internal(task, semaphore);
+       }
+       semaphore_dereference(semaphore);
+       return kr;
+}
+
 /*
  *     Routine:        semaphore_signal_internal
  *
@@ -315,15 +316,16 @@ semaphore_signal_internal(
 
        if (thread != THREAD_NULL) {
                if (semaphore->count < 0) {
-                       kr = wait_queue_wakeup64_thread_locked(
-                                       &semaphore->wait_queue,
+                       kr = waitq_wakeup64_thread_locked(
+                                       &semaphore->waitq,
                                        SEMAPHORE_EVENT,
                                        thread,
                                        THREAD_AWAKENED,
-                                       TRUE);  /* unlock? */
+                                       WAITQ_UNLOCK);
+                       /* waitq/semaphore is unlocked */
                } else {
-                       semaphore_unlock(semaphore);
                        kr = KERN_NOT_WAITING;
+                       semaphore_unlock(semaphore);
                }
                splx(spl_level);
                return kr;
@@ -332,34 +334,40 @@ semaphore_signal_internal(
        if (options & SEMAPHORE_SIGNAL_ALL) {
                int old_count = semaphore->count;
 
+               kr = KERN_NOT_WAITING;
                if (old_count < 0) {
                        semaphore->count = 0;  /* always reset */
-                       kr = wait_queue_wakeup64_all_locked(
-                                       &semaphore->wait_queue,
+                       kr = waitq_wakeup64_all_locked(
+                                       &semaphore->waitq,
                                        SEMAPHORE_EVENT,
-                                       THREAD_AWAKENED,
-                                       TRUE);          /* unlock? */
+                                       THREAD_AWAKENED, NULL,
+                                       WAITQ_ALL_PRIORITIES,
+                                       WAITQ_UNLOCK);
+                       /* waitq / semaphore is unlocked */
                } else {
                        if (options & SEMAPHORE_SIGNAL_PREPOST)
                                semaphore->count++;
-                       semaphore_unlock(semaphore);
                        kr = KERN_SUCCESS;
+                       semaphore_unlock(semaphore);
                }
                splx(spl_level);
                return kr;
        }
        
        if (semaphore->count < 0) {
-               if (wait_queue_wakeup64_one_locked(
-                                       &semaphore->wait_queue,
+               kr = waitq_wakeup64_one_locked(
+                                       &semaphore->waitq,
                                        SEMAPHORE_EVENT,
-                                       THREAD_AWAKENED,
-                                       FALSE) == KERN_SUCCESS) {
+                                       THREAD_AWAKENED, NULL,
+                                       WAITQ_ALL_PRIORITIES,
+                                       WAITQ_KEEP_LOCKED);
+               if (kr == KERN_SUCCESS) {
                        semaphore_unlock(semaphore);
                        splx(spl_level);
                        return KERN_SUCCESS;
-               } else
+               } else {
                        semaphore->count = 0;  /* all waiters gone */
+               }
        }
 
        if (options & SEMAPHORE_SIGNAL_PREPOST) {
@@ -634,12 +642,12 @@ semaphore_wait_internal(
 
                wait_semaphore->count = -1;  /* we don't keep an actual count */
                thread_lock(self);
-               (void)wait_queue_assert_wait64_locked(
-                                       &wait_semaphore->wait_queue,
+               (void)waitq_assert_wait64_locked(
+                                       &wait_semaphore->waitq,
                                        SEMAPHORE_EVENT,
                                        THREAD_ABORTSAFE,
                                        TIMEOUT_URGENCY_USER_NORMAL,
-                                       deadline, 0,
+                                       deadline, TIMEOUT_NO_LEEWAY,
                                        self);
                thread_unlock(self);
        }
@@ -1064,27 +1072,29 @@ void
 semaphore_dereference(
        semaphore_t             semaphore)
 {
-       int                     ref_count;
-
-       if (semaphore != NULL) {
-               ref_count = hw_atomic_sub(&semaphore->ref_count, 1);
-
-               if (ref_count == 1) {
-                       ipc_port_t port = semaphore->port;
-
-                       if (IP_VALID(port) && 
-                           OSCompareAndSwapPtr(port, IP_NULL, &semaphore->port)) {
-                               /*
-                                * We get to disassociate the port from the sema and
-                                * drop the port's reference on the sema.
-                                */
-                               ipc_port_dealloc_kernel(port);
-                               ref_count = hw_atomic_sub(&semaphore->ref_count, 1);
-                       }
-               }
-               if (ref_count == 0) {
-                       assert(wait_queue_empty(&semaphore->wait_queue));
-                       zfree(semaphore_zone, semaphore);
-               }
+       if (semaphore == NULL)
+               return;
+
+       if (hw_atomic_sub(&semaphore->ref_count, 1) != 0)
+               return;
+
+       /*
+        * Last ref, clean up the port [if any]
+        * associated with the semaphore, destroy
+        * it (if still active) and then free
+        * the semaphore.
+        */
+       ipc_port_t port = semaphore->port;
+
+       if (IP_VALID(port)) {
+               assert(!port->ip_srights);
+               ipc_port_dealloc_kernel(port);
+       }
+       if (semaphore->active) {
+               assert(semaphore->owner != TASK_NULL);
+               semaphore_destroy_internal(semaphore->owner, semaphore);
        }
+       zfree(semaphore_zone, semaphore);
 }
+
+
index 57db2e2a2ada18537d2d612f4f4f4bb0a57be37c..339eb9e9388c425f4607f9d74f38be6fc01a52ee 100644 (file)
 #ifdef MACH_KERNEL_PRIVATE
 
 #include <kern/queue.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 
 typedef struct semaphore {
        queue_chain_t     task_link;  /* chain of semaphores owned by a task */
-       struct wait_queue wait_queue; /* queue of blocked threads & lock     */
+       struct waitq      waitq;      /* queue of blocked threads & lock     */
        task_t            owner;      /* task that owns semaphore            */
        ipc_port_t        port;       /* semaphore port                      */
        uint32_t          ref_count;  /* reference count                     */
@@ -57,13 +57,14 @@ typedef struct semaphore {
        boolean_t         active;     /* active status                       */
 } Semaphore;
 
-#define semaphore_lock(semaphore)   wait_queue_lock(&(semaphore)->wait_queue)
-#define semaphore_unlock(semaphore) wait_queue_unlock(&(semaphore)->wait_queue)
+#define semaphore_lock(semaphore)   waitq_lock(&(semaphore)->waitq)
+#define semaphore_unlock(semaphore) waitq_unlock(&(semaphore)->waitq)
 
 extern void semaphore_init(void);
 
-extern void            semaphore_reference     (semaphore_t semaphore);
-extern void            semaphore_dereference   (semaphore_t semaphore);
+extern void            semaphore_reference(semaphore_t semaphore);
+extern void            semaphore_dereference(semaphore_t semaphore);
+extern  kern_return_t  semaphore_destroy_internal(task_t task, semaphore_t semaphore);
 
 #endif /* MACH_KERNEL_PRIVATE */
 
index 6f614474c13615f4d3d37efb78973a48fe3bdbc3..f0c067b03007c8652b42c863214ec4777e4dd7c0 100644 (file)
@@ -98,14 +98,7 @@ __unused     struct pfz_exit_args *args)
  *     swtch and swtch_pri both attempt to context switch (logic in
  *     thread_block no-ops the context switch if nothing would happen).
  *     A boolean is returned that indicates whether there is anything
- *     else runnable.
- *
- *     This boolean can be used by a thread waiting on a
- *     lock or condition:  If FALSE is returned, the thread is justified
- *     in becoming a resource hog by continuing to spin because there's
- *     nothing else useful that the processor could do.  If TRUE is
- *     returned, the thread should make one more check on the
- *     lock and then be a good citizen and really suspend.
+ *     else runnable.  That's no excuse to spin, though.
  */
 
 static void
@@ -255,7 +248,8 @@ kern_return_t
 thread_switch(
        struct thread_switch_args *args)
 {
-       register thread_t               thread, self = current_thread();
+       thread_t                        thread = THREAD_NULL;
+       thread_t                        self = current_thread();
        mach_port_name_t                thread_name = args->thread_name;
        int                                             option = args->option;
        mach_msg_timeout_t              option_time = args->option_time;
@@ -303,11 +297,11 @@ thread_switch(
        /*
         * Translate the port name if supplied.
         */
-    if (thread_name != MACH_PORT_NULL) {
-               ipc_port_t                      port;
+       if (thread_name != MACH_PORT_NULL) {
+               ipc_port_t port;
 
                if (ipc_port_translate_send(self->task->itk_space,
-                                                                       thread_name, &port) == KERN_SUCCESS) {
+                                           thread_name, &port) == KERN_SUCCESS) {
                        ip_reference(port);
                        ip_unlock(port);
 
@@ -315,16 +309,11 @@ thread_switch(
                        ip_release(port);
 
                        if (thread == self) {
-                               (void)thread_deallocate_internal(thread);
+                               thread_deallocate(thread);
                                thread = THREAD_NULL;
                        }
                }
-               else
-                       thread = THREAD_NULL;
        }
-       else
-               thread = THREAD_NULL;
-
 
        if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
                if (thread != THREAD_NULL) {
@@ -336,16 +325,18 @@ thread_switch(
                                 * a thread in another task)
                                 */
 
-                               (void)thread_deallocate_internal(thread);
+                               thread_deallocate(thread);
                                thread = THREAD_NULL;
                        } else {
                                /*
                                 * Attempt to kick the lock owner up to our same IO throttling tier.
                                 * If the thread is currently blocked in throttle_lowpri_io(),
                                 * it will immediately break out.
+                                *
+                                * TODO: SFI break out?
                                 */
                                int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
-                               
+
                                set_thread_iotier_override(thread, new_policy);
                        }
                }
@@ -355,62 +346,43 @@ thread_switch(
         * Try to handoff if supplied.
         */
        if (thread != THREAD_NULL) {
-               processor_t             processor;
-               spl_t                   s;
+               spl_t s = splsched();
 
-               s = splsched();
-               thread_lock(thread);
+               /* This may return a different thread if the target is pushing on something */
+               thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
 
                KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
-                                                         thread_tid(thread), thread->state, 0, 0, 0);
-
-               /*
-                *      Check that the thread is not bound
-                *      to a different processor, and that realtime
-                *      is not involved.
-                *
-                *      Next, pull it off its run queue.  If it
-                *      doesn't come, it's not eligible.
-                */
-               processor = current_processor();
-               if (processor->current_pri < BASEPRI_RTQUEUES                   &&
-                       thread->sched_pri < BASEPRI_RTQUEUES                            &&
-                       (thread->bound_processor == PROCESSOR_NULL      ||
-                        thread->bound_processor == processor)                          &&
-                               thread_run_queue_remove(thread)                                                 ) {
-                       /*
-                        *      Hah, got it!!
-                        */
-                       thread_unlock(thread);
-
-                       (void)thread_deallocate_internal(thread);
+                                     thread_tid(thread), thread->state,
+                                     pulled_thread ? TRUE : FALSE, 0, 0);
+
+               if (pulled_thread != THREAD_NULL) {
+                       /* We can't be dropping the last ref here */
+                       thread_deallocate_safe(thread);
 
                        if (wait_option)
                                assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE,
-                                                                                                               option_time, scale_factor);
-                       else
-                       if (depress_option)
+                                                   option_time, scale_factor);
+                       else if (depress_option)
                                thread_depress_ms(option_time);
 
                        self->saved.swtch.option = option;
                        self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
 
-                       thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread);
+                       thread_run(self, (thread_continue_t)thread_switch_continue, NULL, pulled_thread);
                        /* NOTREACHED */
+                       panic("returned from thread_run!");
                }
 
-               thread_unlock(thread);
                splx(s);
 
                thread_deallocate(thread);
        }
-               
+
        if (wait_option)
                assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor);
-       else
-       if (depress_option)
+       else if (depress_option)
                thread_depress_ms(option_time);
-         
+
        self->saved.swtch.option = option;
        self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
 
@@ -482,7 +454,7 @@ thread_depress_expire(
     thread_lock(thread);
        if (--thread->depress_timer_active == 0) {
                thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
-               SCHED(compute_priority)(thread, FALSE);
+               thread_recompute_sched_pri(thread, FALSE);
        }
     thread_unlock(thread);
     splx(s);
@@ -503,7 +475,7 @@ thread_depress_abort_internal(
        if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
                if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
                        thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
-                       SCHED(compute_priority)(thread, FALSE);
+                       thread_recompute_sched_pri(thread, FALSE);
                        result = KERN_SUCCESS;
                }
 
index e4918288499e8cd1a4638ff10d7ec86d168bc68a..e086346e3e7db5661e19064128a26b22436e8e13 100644 (file)
@@ -60,7 +60,7 @@
 #include <mach/mach_traps.h>
 
 #include <kern/syscall_sw.h>
-#if CONFIG_REQUIRES_U32_MUNGING
+#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))
 #include <sys/munge.h>
 #endif
 
index 6cf3ae9539f329fe7f7251618baacb05b5458189..5137a17410ca9963d2922fce961ee1f68ef9322a 100644 (file)
@@ -73,7 +73,7 @@ typedef       void    mach_munge_t(void *);
 typedef struct {
        int                     mach_trap_arg_count; /* Number of trap arguments (Arch independant) */
        kern_return_t           (*mach_trap_function)(void *);
-#if CONFIG_REQUIRES_U32_MUNGING
+#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))
        mach_munge_t            *mach_trap_arg_munge32; /* system call argument munger routine for 32-bit */
 #endif
        int                     mach_trap_u32_words; /* number of 32-bit words to copyin for U32 */
@@ -88,7 +88,7 @@ typedef struct {
 extern const mach_trap_t       mach_trap_table[];
 extern int                     mach_trap_count;
 
-#if CONFIG_REQUIRES_U32_MUNGING
+#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))
 
 #if    !MACH_ASSERT
 #define        MACH_TRAP(name, arg_count, u32_arg_words, munge32)      \
@@ -99,7 +99,7 @@ extern int                    mach_trap_count;
 #endif /* !MACH_ASSERT */
 
 
-#else /* !CONFIG_REQUIRES_U32_MUNGING */
+#else /* !CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4)) */
 
 #if    !MACH_ASSERT
 #define        MACH_TRAP(name, arg_count, u32_arg_words, munge32)      \
diff --git a/osfmk/kern/sysdiagnose.c b/osfmk/kern/sysdiagnose.c
new file mode 100644 (file)
index 0000000..5f66f7e
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach/host_priv.h>
+#include <mach/host_special_ports.h>
+#include <mach/mach_types.h>
+
+#include <mach/sysdiagnose_notification.h>
+
+#include <kern/misc_protos.h>
+#include <kern/host.h>
+
+#include <sys/kdebug.h>
+
+extern kern_return_t sysdiagnose_notify_user(uint32_t);
+
+/*
+ * If userland has registered a port for sysdiagnose notifications, send one now.
+ */
+kern_return_t
+sysdiagnose_notify_user(uint32_t keycode)
+{
+       mach_port_t user_port;
+       kern_return_t kr;
+
+       kr = host_get_sysdiagnose_port(host_priv_self(), &user_port);
+       if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
+               return kr;
+       }
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SYSDIAGNOSE, SYSDIAGNOSE_NOTIFY_USER) | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+       return send_sysdiagnose_notification(user_port, keycode);
+}
index 999dfefcf89c0df40050ec72ca85afee189a52a9..498fd09c8e8bef1abd67a1358103a9d47d4e26ca 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -91,6 +91,7 @@
 #include <mach/host_priv.h>
 #include <mach/machine/vm_types.h>
 #include <mach/vm_param.h>
+#include <mach/mach_vm.h>
 #include <mach/semaphore.h>
 #include <mach/task_info.h>
 #include <mach/task_special_ports.h>
 #include <kern/coalition.h>
 #include <kern/zalloc.h>
 #include <kern/kalloc.h>
+#include <kern/kern_cdata.h>
 #include <kern/processor.h>
 #include <kern/sched_prim.h>   /* for thread_wakeup */
 #include <kern/ipc_tt.h>
 #include <kern/sync_lock.h>
 #include <kern/affinity.h>
 #include <kern/exc_resource.h>
+#include <kern/machine.h>
+#include <corpses/task_corpse.h>
 #if CONFIG_TELEMETRY
 #include <kern/telemetry.h>
 #endif
 #include <vm/vm_purgeable_internal.h>
 
 #include <sys/resource.h>
+#include <sys/signalvar.h> /* for coredump */
+
 /*
  * Exported interfaces
  */
 
 #include <vm/vm_shared_region.h>
 
-#if CONFIG_COUNTERS
-#include <pmc/pmc.h>
-#endif /* CONFIG_COUNTERS */
-
 #include <libkern/OSDebug.h>
 #include <libkern/OSAtomic.h>
 
@@ -180,7 +182,7 @@ lck_spin_t          dead_task_statistics_lock;
 ledger_template_t task_ledger_template = NULL;
 
 struct _task_ledger_indices task_ledgers __attribute__((used)) =
-       {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
         { 0 /* initialized at runtime */},
 #ifdef CONFIG_BANK
         -1, -1,
@@ -191,13 +193,15 @@ void init_task_ledgers(void);
 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
-void __attribute__((noinline)) THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb);
-int coredump(void *core_proc, int reserve_mb, int ignore_ulimit);
+void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
 
 kern_return_t task_suspend_internal(task_t);
 kern_return_t task_resume_internal(task_t);
+static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
+
 
 void proc_init_cpumon_params(void);
+extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 
 // Warn tasks when they hit 80% of their memory limit.
 #define        PHYS_FOOTPRINT_WARNING_LEVEL 80
@@ -220,7 +224,9 @@ int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which
 
 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 
-int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
+ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
+int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
+
 #if MACH_ASSERT
 int pmap_ledgers_panic = 1;
 #endif /* MACH_ASSERT */
@@ -234,7 +240,9 @@ extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 extern int     proc_pid(struct proc *p);
 extern int     proc_selfpid(void);
 extern char    *proc_name_address(struct proc *p);
+extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 #if CONFIG_JETSAM
+extern void    proc_memstat_terminated(struct proc* p, boolean_t set);
 extern void    memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 #endif
 #endif
@@ -403,30 +411,31 @@ task_init(void)
         * and takes precedence over the device tree.
         * Setting the boot-arg to 0 disables task limits.
         */
-       if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
-                       sizeof (max_task_footprint))) {
+       if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
+                       sizeof (max_task_footprint_mb))) {
                /*
                 * No limit was found in boot-args, so go look in the device tree.
                 */
-               if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
-                               sizeof(max_task_footprint))) {
+               if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
+                               sizeof(max_task_footprint_mb))) {
                        /*
                         * No limit was found in device tree.
                         */
-                       max_task_footprint = 0;
+                       max_task_footprint_mb = 0;
                }
        }
 
-       if (max_task_footprint != 0) {
+       if (max_task_footprint_mb != 0) {
 #if CONFIG_JETSAM
-               if (max_task_footprint < 50) {
+               if (max_task_footprint_mb < 50) {
                                printf("Warning: max_task_pmem %d below minimum.\n",
-                               max_task_footprint);
-                               max_task_footprint = 50;
+                               max_task_footprint_mb);
+                               max_task_footprint_mb = 50;
                }
                printf("Limiting task physical memory footprint to %d MB\n",
-                       max_task_footprint);
-               max_task_footprint *= 1024 * 1024; // Convert MB to bytes
+                       max_task_footprint_mb);
+
+               max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 #else
                printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 #endif
@@ -493,9 +502,9 @@ task_init(void)
         * Create the kernel task as the first task.
         */
 #ifdef __LP64__
-       if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
+       if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 #else
-       if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
+       if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 #endif
                panic("task_init\n");
 
@@ -567,10 +576,11 @@ host_security_create_task_token(
  *
  * phys_footprint
  *   Physical footprint: This is the sum of:
- *     + internal
- *     + internal_compressed
+ *     + (internal - alternate_accounting)
+ *     + (internal_compressed - alternate_accounting_compressed)
  *     + iokit_mapped
- *     - alternate_accounting
+ *     + purgeable_nonvolatile
+ *     + purgeable_nonvolatile_compressed
  *
  * internal
  *   The task's anonymous memory, which on iOS is always resident.
@@ -616,6 +626,8 @@ init_task_ledgers(void)
            "bytes");
        task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
            "bytes");
+       task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
+           "bytes");
        task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
            "bytes");
        task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
@@ -629,6 +641,7 @@ init_task_ledgers(void)
        task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
            "count");
        
+#if CONFIG_SCHED_SFI
        sfi_class_id_t class_id, ledger_alias;
        for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
                task_ledgers.sfi_wait_times[class_id] = -1;
@@ -654,13 +667,13 @@ init_task_ledgers(void)
                }
        }
 
+       assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
+#endif /* CONFIG_SCHED_SFI */
+
 #ifdef CONFIG_BANK
        task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
        task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 #endif
-
-       assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
-
        if ((task_ledgers.cpu_time < 0) ||
            (task_ledgers.tkm_private < 0) ||
            (task_ledgers.tkm_shared < 0) ||
@@ -669,6 +682,7 @@ init_task_ledgers(void)
            (task_ledgers.internal < 0) ||
            (task_ledgers.iokit_mapped < 0) ||
            (task_ledgers.alternate_accounting < 0) ||
+           (task_ledgers.alternate_accounting_compressed < 0) ||
            (task_ledgers.phys_footprint < 0) ||
            (task_ledgers.internal_compressed < 0) ||
            (task_ledgers.purgeable_volatile < 0) ||
@@ -692,6 +706,7 @@ init_task_ledgers(void)
                ledger_panic_on_negative(t, task_ledgers.internal_compressed);
                ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
                ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
+               ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
                ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
                ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
                ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
@@ -712,7 +727,7 @@ init_task_ledgers(void)
 kern_return_t
 task_create_internal(
        task_t          parent_task,
-       coalition_t     parent_coalition __unused,
+       coalition_t     *parent_coalitions __unused,
        boolean_t       inherit_memory,
        boolean_t       is_64bit,
        task_t          *child_task)            /* OUT */
@@ -788,6 +803,7 @@ task_create_internal(
 
 #ifdef MACH_BSD
        new_task->bsd_info = NULL;
+       new_task->corpse_info = NULL;
 #endif /* MACH_BSD */
 
 #if CONFIG_JETSAM
@@ -822,10 +838,6 @@ task_create_internal(
 
        new_task->affinity_space = NULL;
 
-#if CONFIG_COUNTERS
-       new_task->t_chud = 0U;
-#endif
-
        new_task->pidsuspended = FALSE;
        new_task->frozen = FALSE;
        new_task->changing_freeze_state = FALSE;
@@ -845,6 +857,7 @@ task_create_internal(
 
        new_task->low_mem_notified_warn = 0;
        new_task->low_mem_notified_critical = 0;
+       new_task->low_mem_privileged_listener = 0;
        new_task->purged_memory_warn = 0;
        new_task->purged_memory_critical = 0;
        new_task->mem_notify_reserved = 0;
@@ -947,21 +960,9 @@ task_create_internal(
                }
        }
 
-       new_task->coalition = COALITION_NULL;
-
-#if CONFIG_COALITIONS
-       if (parent_coalition) {
-               coalition_adopt_task(parent_coalition, new_task);
-       } else if (parent_task && parent_task->coalition) {
-               coalition_adopt_task(parent_task->coalition, new_task);
-       } else {
-               coalition_default_adopt_task(new_task);
-       }
-
-       if (new_task->coalition == COALITION_NULL) {
-               panic("created task is not a member of any coalition");
-       }
-#endif /* CONFIG_COALITIONS */
+       bzero(new_task->coalition, sizeof(new_task->coalition));
+       for (int i = 0; i < COALITION_NUM_TYPES; i++)
+               queue_chain_init(new_task->task_coalition[i]);
 
        /* Allocate I/O Statistics */
        new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
@@ -973,10 +974,33 @@ task_create_internal(
        bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
        new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
        new_task->task_gpu_ns = 0;
-       lck_mtx_lock(&tasks_threads_lock);
-       queue_enter(&tasks, new_task, task_t, tasks);
-       tasks_count++;
-       lck_mtx_unlock(&tasks_threads_lock);
+
+#if CONFIG_COALITIONS
+
+       /* TODO: there is no graceful failure path here... */
+       if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
+               coalitions_adopt_task(parent_coalitions, new_task);
+       } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
+               /*
+                * all tasks at least have a resource coalition, so
+                * if the parent has one then inherit all coalitions
+                * the parent is a part of
+                */
+               coalitions_adopt_task(parent_task->coalition, new_task);
+       } else {
+               /* TODO: assert that new_task will be PID 1 (launchd) */
+               coalitions_adopt_init_task(new_task);
+       }
+
+       if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
+               panic("created task is not a member of a resource coalition");
+       }
+#endif /* CONFIG_COALITIONS */
+
+       new_task->dispatchqueue_offset = 0;
+       if (parent_task != NULL) {
+               new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
+       }
 
        if (vm_backing_store_low && parent_task != NULL)
                new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
@@ -988,6 +1012,11 @@ task_create_internal(
 
        ipc_task_enable(new_task);
 
+       lck_mtx_lock(&tasks_threads_lock);
+       queue_enter(&tasks, new_task, task_t, tasks);
+       tasks_count++;
+       lck_mtx_unlock(&tasks_threads_lock);
+
        *child_task = new_task;
        return(KERN_SUCCESS);
 }
@@ -1131,13 +1160,20 @@ task_deallocate(
 #endif
 
 #if CONFIG_COALITIONS
-       if (!task->coalition) {
-               panic("deallocating task was not a member of any coalition");
-       }
-       coalition_release(task->coalition);
+       if (!task->coalition[COALITION_TYPE_RESOURCE])
+               panic("deallocating task was not a member of a resource coalition");
+       task_release_coalitions(task);
 #endif /* CONFIG_COALITIONS */
 
-       task->coalition = COALITION_NULL;
+       bzero(task->coalition, sizeof(task->coalition));
+
+#if MACH_BSD
+       /* clean up collected information since last reference to task is gone */
+       if (task->corpse_info) {
+               task_crashinfo_destroy(task->corpse_info);
+               task->corpse_info = NULL;
+       }
+#endif
 
        zfree(task_zone, task);
 }
@@ -1166,6 +1202,123 @@ task_suspension_token_deallocate(
        return(task_deallocate((task_t)token));
 }
 
+
+/*
+ * task_collect_crash_info:
+ *
+ * collect crash info from bsd and mach based data
+ */
+kern_return_t
+task_collect_crash_info(task_t task)
+{
+       kern_return_t kr = KERN_SUCCESS;
+
+       kcdata_descriptor_t crash_data = NULL;
+       kcdata_descriptor_t crash_data_release = NULL;
+       mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
+       mach_vm_offset_t crash_data_user_ptr = 0;
+
+       if (!corpses_enabled()) {
+               return KERN_NOT_SUPPORTED;
+       }
+
+       task_lock(task);
+       assert(task->bsd_info != NULL);
+       if (task->corpse_info == NULL && task->bsd_info != NULL) {
+               task_unlock(task);
+               /* map crash data memory in task's vm map */
+               kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
+
+               if (kr != KERN_SUCCESS)
+                       goto out_no_lock;
+
+               crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
+               if (crash_data) {
+                       task_lock(task);
+                       crash_data_release = task->corpse_info;
+                       task->corpse_info = crash_data;
+                       task_unlock(task);
+                       kr = KERN_SUCCESS;
+               } else {
+                       /* if failed to create corpse info, free the mapping */
+                       if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
+                               printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
+                       }
+                       kr = KERN_FAILURE;
+               }
+
+               if (crash_data_release != NULL) {
+                       task_crashinfo_destroy(crash_data_release);
+               }
+       } else {
+               task_unlock(task);
+       }
+
+out_no_lock:
+       return kr;
+}
+
+/*
+ * task_deliver_crash_notification:
+ *
+ * Makes outcall to registered host port for a corpse.
+ */
+kern_return_t
+task_deliver_crash_notification(task_t task)
+{
+       kcdata_descriptor_t crash_info = task->corpse_info;
+       thread_t th_iter = NULL;
+       kern_return_t kr = KERN_SUCCESS;
+       wait_interrupt_t wsave;
+       mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
+
+       if (crash_info == NULL)
+               return KERN_FAILURE;
+
+       code[0] = crash_info->kcd_addr_begin;
+       code[1] = crash_info->kcd_length;
+
+       task_lock(task);
+       queue_iterate(&task->threads, th_iter, thread_t, task_threads)
+       {
+               ipc_thread_reset(th_iter);
+       }
+       task_unlock(task);
+
+       wsave = thread_interrupt_level(THREAD_UNINT);
+       kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
+       if (kr != KERN_SUCCESS) {
+               printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
+       }
+
+       /*
+        * crash reporting is done. Now release threads
+        * for reaping by thread_terminate_daemon
+        */
+       task_lock(task);
+       assert(task->active_thread_count == 0);
+       queue_iterate(&task->threads, th_iter, thread_t, task_threads)
+       {
+               thread_mtx_lock(th_iter);
+               assert(th_iter->inspection == TRUE);
+               th_iter->inspection = FALSE;
+               /* now that the corpse has been autopsied, dispose of the thread name */
+               uthread_cleanup_name(th_iter->uthread);
+               thread_mtx_unlock(th_iter);
+       }
+
+       thread_terminate_crashed_threads();
+       /* remove the pending corpse report flag */
+       task_clear_corpse_pending_report(task);
+
+       task_unlock(task);
+
+       (void)thread_interrupt_level(wsave);
+       task_terminate_internal(task);
+
+       return kr;
+}
+
 /*
  *     task_terminate:
  *
@@ -1210,6 +1363,47 @@ __unused task_partial_reap(task_t task, __unused int pid)
                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
 }
 
+kern_return_t
+task_mark_corpse(task_t task)
+{
+       kern_return_t kr = KERN_SUCCESS;
+       thread_t self_thread;
+       (void) self_thread;
+       wait_interrupt_t wsave;
+
+       assert(task != kernel_task);
+       assert(task == current_task());
+       assert(!task_is_a_corpse(task));
+
+       kr = task_collect_crash_info(task);
+       if (kr != KERN_SUCCESS) {
+               return kr;
+       }
+
+       self_thread = current_thread();
+
+       wsave = thread_interrupt_level(THREAD_UNINT);
+       task_lock(task);
+
+       task_set_corpse_pending_report(task);
+       task_set_corpse(task);
+
+       kr = task_start_halt_locked(task, TRUE);
+       assert(kr == KERN_SUCCESS);
+       ipc_task_reset(task);
+       ipc_task_enable(task);
+
+       task_unlock(task);
+       /* terminate the ipc space */
+       ipc_space_terminate(task->itk_space);
+       
+       task_start_halt(task);
+       thread_terminate_internal(self_thread);
+       (void) thread_interrupt_level(wsave);
+       assert(task->halting == TRUE);
+       return kr;
+}
+
 kern_return_t
 task_terminate_internal(
        task_t                  task)
@@ -1254,6 +1448,20 @@ task_terminate_internal(
                return (KERN_FAILURE);
        }
 
+       if (task_corpse_pending_report(task)) {
+               /*
+                *      Task is marked for reporting as corpse.
+                *      Just return an error. This will
+                *      just get us to our AST special handler and that
+                *      will get us to finish the path to death
+                */
+               task_unlock(task);
+               if (self_task != task)
+                       task_unlock(self_task);
+
+               return (KERN_FAILURE);
+       }
+
        if (self_task != task)
                task_unlock(self_task);
 
@@ -1298,7 +1506,7 @@ task_terminate_internal(
        task_unlock(task);
 
        proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
-                            TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
+                            TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
 
         /* Early object reap phase */
 
@@ -1328,6 +1536,8 @@ task_terminate_internal(
                                         task_ledgers.iokit_mapped);
        ledger_disable_panic_on_negative(task->map->pmap->ledger,
                                         task_ledgers.alternate_accounting);
+       ledger_disable_panic_on_negative(task->map->pmap->ledger,
+                                        task_ledgers.alternate_accounting_compressed);
 #endif
 
        /*
@@ -1338,14 +1548,21 @@ task_terminate_internal(
         * expense of removing the address space regions
         * at reap time, we do it explictly here.
         */
+
+       vm_map_lock(task->map);
+       vm_map_disable_hole_optimization(task->map);
+       vm_map_unlock(task->map);
+
        vm_map_remove(task->map,
                      task->map->min_offset,
                      task->map->max_offset,
-                     VM_MAP_NO_FLAGS);
+                     /* no unnesting on final cleanup: */
+                     VM_MAP_REMOVE_NO_UNNESTING);
 
        /* release our shared region */
        vm_shared_region_set(task, NULL);
 
+
 #if MACH_ASSERT
        /*
         * Identify the pmap's process, in case the pmap ledgers drift
@@ -1383,9 +1600,9 @@ task_terminate_internal(
 
 #if CONFIG_COALITIONS
        /*
-        * Leave our coalition. (drop activation but not reference)
+        * Leave our coalitions. (drop activation but not reference)
         */
-       coalition_remove_task(task);
+       coalitions_remove_task(task);
 #endif
 
        /*
@@ -1405,10 +1622,20 @@ task_terminate_internal(
  *     termination.
  */
 kern_return_t
-task_start_halt(
-       task_t          task)
+task_start_halt(task_t task)
+{
+       kern_return_t kr = KERN_SUCCESS;
+       task_lock(task);
+       kr = task_start_halt_locked(task, FALSE);
+       task_unlock(task);
+       return kr;
+}
+
+static kern_return_t
+task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
 {
-       thread_t        thread, self;
+       thread_t thread, self;
+       uint64_t dispatchqueue_offset;
 
        assert(task != kernel_task);
 
@@ -1417,43 +1644,44 @@ task_start_halt(
        if (task != self->task)
                return (KERN_INVALID_ARGUMENT);
 
-       task_lock(task);
-
        if (task->halting || !task->active || !self->active) {
                /*
-                *      Task or current thread is already being terminated.
-                *      Hurry up and return out of the current kernel context
-                *      so that we run our AST special handler to terminate
-                *      ourselves.
+                * Task or current thread is already being terminated.
+                * Hurry up and return out of the current kernel context
+                * so that we run our AST special handler to terminate
+                * ourselves.
                 */
-               task_unlock(task);
-
                return (KERN_FAILURE);
        }
 
        task->halting = TRUE;
 
-       if (task->thread_count > 1) {
-
-               /*
-                * Mark all the threads to keep them from starting any more
-                * user-level execution.  The thread_terminate_internal code
-                * would do this on a thread by thread basis anyway, but this
-                * gives us a better chance of not having to wait there.
-                */
-               task_hold_locked(task);
+       /*
+        * Mark all the threads to keep them from starting any more
+        * user-level execution.  The thread_terminate_internal code
+        * would do this on a thread by thread basis anyway, but this
+        * gives us a better chance of not having to wait there.
+        */
+       task_hold_locked(task);
+       dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
 
-               /*
-                *      Terminate all the other threads in the task.
-                */
-               queue_iterate(&task->threads, thread, thread_t, task_threads) {
-                       if (thread != self)
-                               thread_terminate_internal(thread);
+       /*
+        * Terminate all the other threads in the task.
+        */
+       queue_iterate(&task->threads, thread, thread_t, task_threads)
+       {
+               if (should_mark_corpse) {
+                       thread_mtx_lock(thread);
+                       thread->inspection = TRUE;
+                       thread_mtx_unlock(thread);
                }
-
-               task_release_locked(task);
+               if (thread != self)
+                       thread_terminate_internal(thread);
        }
-       task_unlock(task);
+       task->dispatchqueue_offset = dispatchqueue_offset;
+
+       task_release_locked(task);
+
        return KERN_SUCCESS;
 }
 
@@ -1509,7 +1737,9 @@ task_complete_halt(task_t task)
         * getting a new one.
         */
        vm_map_remove(task->map, task->map->min_offset,
-                     task->map->max_offset, VM_MAP_NO_FLAGS);
+                     task->map->max_offset,
+                     /* no unnesting on final cleanup: */
+                     VM_MAP_REMOVE_NO_UNNESTING);
 
        task->halting = FALSE;
 }
@@ -1809,7 +2039,7 @@ place_task_hold    (
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
            MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
-           proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
+           task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
            task->user_stop_count, task->user_stop_count + 1, 0);
 
 #if MACH_ASSERT
@@ -1861,7 +2091,7 @@ release_task_hold    (
 
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                    MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
-                   proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
+                   task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
                    task->user_stop_count, mode, task->legacy_stop_count);
 
 #if MACH_ASSERT
@@ -1975,9 +2205,9 @@ task_suspend(
         */
        if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
                MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
-               printf("warning: %s(%d) failed to copyout suspension token for task %s(%d) with error: %d\n",
-                       proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
-                       proc_name_address(task->bsd_info), proc_pid(task->bsd_info), kr);
+               printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
+                               proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
+                               task_pid(task), kr);
                return (kr);
        }
 
@@ -2025,9 +2255,9 @@ task_resume(
        } else {
                is_write_unlock(space);
                if (kr == KERN_SUCCESS)
-                       printf("warning: %s(%d) performed out-of-band resume on %s(%d)\n",
+                       printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
                               proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
-                              proc_name_address(task->bsd_info), proc_pid(task->bsd_info));
+                              task_pid(task));
        }
 
        return kr;
@@ -2286,6 +2516,9 @@ task_pidresume(
  * Conditions:
  *     The caller holds a reference to the task
  */
+extern void            vm_wake_compactor_swapper();
+extern queue_head_t    c_swapout_list_head;
+
 kern_return_t
 task_freeze(
        register task_t    task,
@@ -2335,6 +2568,18 @@ task_freeze(
        
        task_unlock(task);
 
+       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+               vm_wake_compactor_swapper();
+               /*
+                * We do an explicit wakeup of the swapout thread here
+                * because the compact_and_swap routines don't have
+                * knowledge about these kind of "per-task packed c_segs"
+                * and so will not be evaluating whether we need to do
+                * a wakeup there.
+                */
+               thread_wakeup((event_t)&c_swapout_list_head);
+       }
+
        return (kr);
 }
 
@@ -2346,9 +2591,6 @@ task_freeze(
  * Conditions:
  *     The caller holds a reference to the task
  */
-extern void
-vm_consider_waking_compactor_swapper(void);
-
 kern_return_t
 task_thaw(
        register task_t         task)
@@ -2394,7 +2636,7 @@ task_thaw(
        task_unlock(task);
 
        if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
-               vm_consider_waking_compactor_swapper();
+               vm_wake_compactor_swapper();
        }
 
        return (kr);
@@ -2484,8 +2726,7 @@ task_set_info(
                        mem_info = (task_trace_memory_info_t) task_info_in;
                        kern_return_t kr = atm_register_trace_memory(task,
                                                mem_info->user_memory_address,
-                                               mem_info->buffer_size,
-                                               mem_info->mailbox_array_size);
+                                               mem_info->buffer_size);
                        return kr;
                        break;
                }
@@ -2497,6 +2738,7 @@ task_set_info(
        return (KERN_SUCCESS);
 }
 
+int radar_20146450 = 1;
 kern_return_t
 task_info(
        task_t                  task,
@@ -3029,7 +3271,7 @@ task_info(
                task_vm_info_t          vm_info;
                vm_map_t                map;
 
-               if (*task_info_count < TASK_VM_INFO_COUNT) {
+               if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
                    error = KERN_INVALID_ARGUMENT;
                    break;
                }
@@ -3092,7 +3334,9 @@ task_info(
                } else {
                        mach_vm_size_t  volatile_virtual_size;
                        mach_vm_size_t  volatile_resident_size;
+                       mach_vm_size_t  volatile_compressed_size;
                        mach_vm_size_t  volatile_pmap_size;
+                       mach_vm_size_t  volatile_compressed_pmap_size;
                        kern_return_t   kr;
 
                        if (flavor == TASK_VM_INFO_PURGEABLE) {
@@ -3100,10 +3344,16 @@ task_info(
                                        map,
                                        &volatile_virtual_size,
                                        &volatile_resident_size,
-                                       &volatile_pmap_size);
+                                       &volatile_compressed_size,
+                                       &volatile_pmap_size,
+                                       &volatile_compressed_pmap_size);
                                if (kr == KERN_SUCCESS) {
                                        vm_info->purgeable_volatile_pmap =
                                                volatile_pmap_size;
+                                       if (radar_20146450) {
+                                       vm_info->compressed -=
+                                               volatile_compressed_pmap_size;
+                                       }
                                        vm_info->purgeable_volatile_resident =
                                                volatile_resident_size;
                                        vm_info->purgeable_volatile_virtual =
@@ -3113,7 +3363,13 @@ task_info(
                        vm_map_unlock_read(map);
                }
 
-               *task_info_count = TASK_VM_INFO_COUNT;
+               if (*task_info_count >= TASK_VM_INFO_COUNT) {
+                       vm_info->phys_footprint = 0;
+                       *task_info_count = TASK_VM_INFO_COUNT;
+               } else {
+                       *task_info_count = TASK_VM_INFO_REV0_COUNT;
+               }
+
                break;
        }
 
@@ -3136,6 +3392,7 @@ task_info(
                wait_state_info->total_wait_state_time = 0;
                bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
 
+#if CONFIG_SCHED_SFI
                int i, prev_lentry = -1;
                int64_t  val_credit, val_debit;
 
@@ -3154,12 +3411,84 @@ task_info(
                        prev_lentry = task_ledgers.sfi_wait_times[i];
                }
 
+#endif /* CONFIG_SCHED_SFI */
                wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val; 
                *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
 
                break;
        }
+       case TASK_VM_INFO_PURGEABLE_ACCOUNT:
+       {
+#if DEVELOPMENT || DEBUG
+               pvm_account_info_t      acnt_info;
+
+               if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
+                       error = KERN_INVALID_ARGUMENT;
+                       break;
+               }
 
+               if (task_info_out == NULL) {
+                       error = KERN_INVALID_ARGUMENT;
+                       break;
+               }
+
+               acnt_info = (pvm_account_info_t) task_info_out;
+
+               error = vm_purgeable_account(task, acnt_info);
+
+               *task_info_count = PVM_ACCOUNT_INFO_COUNT;
+
+               break;
+#else /* DEVELOPMENT || DEBUG */
+               error = KERN_NOT_SUPPORTED;
+               break;
+#endif /* DEVELOPMENT || DEBUG */
+       }
+       case TASK_FLAGS_INFO:
+       {
+               task_flags_info_t               flags_info;
+
+               if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
+                   error = KERN_INVALID_ARGUMENT;
+                   break;
+               }
+
+               flags_info = (task_flags_info_t)task_info_out;
+
+               /* only publish the 64-bit flag of the task */
+               flags_info->flags = task->t_flags & TF_64B_ADDR;
+
+               *task_info_count = TASK_FLAGS_INFO_COUNT;
+               break;
+       }
+
+       case TASK_DEBUG_INFO_INTERNAL:
+       {
+#if DEVELOPMENT || DEBUG
+               task_debug_info_internal_t dbg_info;
+               if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
+                       error = KERN_NOT_SUPPORTED;
+                       break;
+               }
+
+               if (task_info_out == NULL) {
+                       error = KERN_INVALID_ARGUMENT;
+                       break;
+               }
+               dbg_info = (task_debug_info_internal_t) task_info_out;
+               dbg_info->ipc_space_size = 0;
+               if (task->itk_space){
+                       dbg_info->ipc_space_size = task->itk_space->is_table_size;
+               }
+
+               error = KERN_SUCCESS;
+               *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
+               break;
+#else /* DEVELOPMENT || DEBUG */
+               error = KERN_NOT_SUPPORTED;
+               break;
+#endif /* DEVELOPMENT || DEBUG */
+       }
        default:
                error = KERN_INVALID_ARGUMENT;
        }
@@ -3442,6 +3771,12 @@ task_get_assignment(
        return (KERN_SUCCESS);
 }
 
+uint64_t
+get_task_dispatchqueue_offset(
+               task_t          task)
+{
+       return task->dispatchqueue_offset;
+}
 
 /*
  *     task_policy
@@ -3504,7 +3839,7 @@ task_synchronizer_destroy_all(task_t task)
 
        while (!queue_empty(&task->semaphore_list)) {
                semaphore = (semaphore_t) queue_first(&task->semaphore_list);
-               (void) semaphore_destroy(task, semaphore);
+               (void) semaphore_destroy_internal(task, semaphore);
        }
 }
 
@@ -3575,11 +3910,11 @@ task_get_state(
 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
 
 void __attribute__((noinline))
-THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
+PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
 {
        task_t                                          task            = current_task();
        int                                                     pid         = 0;
-       char                                    *procname       = (char *) "unknown";   
+       const char                                      *procname       = "unknown";
        mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
 
 #ifdef MACH_BSD
@@ -3609,7 +3944,7 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
                 * be filling up the disk; and ignore the core size resource limit for this
                 * core file.
                 */
-               if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, 1)) != 0) {
+               if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
                        printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
                }
                /*
@@ -3629,6 +3964,14 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
                return;
        }
 
+       /*
+        * A task that has triggered an EXC_RESOURCE, should not be
+        * jetsammed when the device is under memory pressure.  Here
+        * we set the P_MEMSTAT_TERMINATED flag so that the process
+        * will be skipped if the memorystatus_thread wakes up.
+        */
+       proc_memstat_terminated(current_task()->bsd_info, TRUE);
+
        printf("process %s[%d] crossed memory high watermark (%d MB); sending "
                "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
 
@@ -3636,7 +3979,7 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
        EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
        EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
        EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
-       
+
        /*
         * Use the _internal_ variant so that no user-space
         * process can resume our task from under us.
@@ -3644,6 +3987,13 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
        task_suspend_internal(task);
        exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
        task_resume_internal(task);
+
+       /*
+        * After the EXC_RESOURCE has been handled, we must clear the
+        * P_MEMSTAT_TERMINATED flag so that the process can again be
+        * considered for jetsam if the memorystatus_thread wakes up.
+        */
+       proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
 }
 
 /*
@@ -3693,7 +4043,7 @@ task_footprint_exceeded(int warning, __unused const void *param0, __unused const
         * generate a non-fatal high watermark EXC_RESOURCE.
         */
        if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
-               THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
+               PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
        }
 
        memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
@@ -3717,6 +4067,28 @@ task_set_phys_footprint_limit(
        return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
 }
 
+kern_return_t
+task_convert_phys_footprint_limit(
+       int limit_mb,
+       int *converted_limit_mb)
+{
+       if (limit_mb == -1) {
+               /*
+                * No limit
+                */
+               if (max_task_footprint != 0) {
+                       *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
+               } else {
+                       *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
+               }
+       } else {
+               /* nothing to convert */
+               *converted_limit_mb = limit_mb;
+       }
+       return (KERN_SUCCESS);
+}
+
+
 kern_return_t
 task_set_phys_footprint_limit_internal(
        task_t task,
@@ -3729,7 +4101,13 @@ task_set_phys_footprint_limit_internal(
        ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
        
        if (old_limit_mb) {
-               *old_limit_mb = old >> 20;
+               /* 
+                * Check that limit >> 20 will not give an "unexpected" 32-bit
+                * result. There are, however, implicit assumptions that -1 mb limit
+                * equates to LEDGER_LIMIT_INFINITY.
+                */
+               assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
+               *old_limit_mb = (int)(old >> 20);
        }
 
        if (new_limit_mb == -1) {
@@ -3757,6 +4135,10 @@ task_set_phys_footprint_limit_internal(
        ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
                (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
 
+        if (task == current_task()) {
+                ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
+        }
+
        task_unlock(task);
 
        return (KERN_SUCCESS);
@@ -3770,7 +4152,13 @@ task_get_phys_footprint_limit(
        ledger_amount_t limit;
     
        ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
-       *limit_mb = limit >> 20;
+       /* 
+        * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
+        * result. There are, however, implicit assumptions that -1 mb limit
+        * equates to LEDGER_LIMIT_INFINITY.
+        */
+       assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
+       *limit_mb = (int)(limit >> 20);
        
        return (KERN_SUCCESS);
 }
@@ -3833,6 +4221,17 @@ task_reference(
                task_reference_internal(task);
 }
 
+/* defined in bsd/kern/kern_prot.c */
+extern int get_audit_token_pid(audit_token_t *audit_token);
+
+int task_pid(task_t task)
+{
+       if (task)
+               return get_audit_token_pid(&task->audit_token);
+       return -1;
+}
+
+
 /* 
  * This routine is called always with task lock held.
  * And it returns a thread handle without reference as the caller
@@ -3976,7 +4375,7 @@ THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
 {
        task_t                                          task            = current_task();
        int                                                     pid         = 0;
-       char                                    *procname       = (char *) "unknown";   
+       const char                                      *procname       = "unknown";
        uint64_t                                        observed_wakeups_rate;
        uint64_t                                        permitted_wakeups_rate;
        uint64_t                                        observation_interval;
index b82c53bb41cdb26d48389f4628860b1af59b8abe..7b7c153065abedf0e0d54f7bb8f7266226652b3a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #ifdef XNU_KERNEL_PRIVATE
 
+#include <kern/kern_cdata.h>
 #include <mach/sfi_class.h>
 
 /* defns for task->rsu_controldata */
 
 
 #include <kern/thread.h>
+#include <mach/coalition.h>
 
 #ifdef CONFIG_ATM
 #include <atm/atm_internal.h>
@@ -258,6 +260,7 @@ struct task {
 #ifdef  MACH_BSD 
        void *bsd_info;
 #endif  
+       kcdata_descriptor_t             corpse_info;
        struct vm_shared_region         *shared_region;
        volatile uint32_t t_flags;                                      /* general-purpose task flags protected by task_lock (TL) */
 #define TF_64B_ADDR             0x00000001                              /* task has 64-bit addressing */
@@ -266,6 +269,8 @@ struct task {
 #define TF_WAKEMON_WARNING      0x00000008                              /* task is in wakeups monitor warning zone */
 #define TF_TELEMETRY            (TF_CPUMON_WARNING | TF_WAKEMON_WARNING) /* task is a telemetry participant */
 #define TF_GPU_DENIED           0x00000010                              /* task is not allowed to access the GPU */
+#define TF_CORPSE               0x00000020                              /* task is a corpse */
+#define TF_PENDING_CORPSE       0x00000040                              /* task corpse has not been reported yet */
 
 #define task_has_64BitAddr(task)       \
         (((task)->t_flags & TF_64B_ADDR) != 0)
@@ -276,10 +281,25 @@ struct task {
 #define task_has_64BitData(task)    \
         (((task)->t_flags & TF_64B_DATA) != 0)
 
+#define task_is_a_corpse(task)      \
+        (((task)->t_flags & TF_CORPSE) != 0)
+
+#define task_set_corpse(task)       \
+        ((task)->t_flags |= TF_CORPSE)
+
+#define task_corpse_pending_report(task)       \
+        (((task)->t_flags & TF_PENDING_CORPSE) != 0)
+
+#define task_set_corpse_pending_report(task)       \
+        ((task)->t_flags |= TF_PENDING_CORPSE)
+
+#define task_clear_corpse_pending_report(task)       \
+        ((task)->t_flags &= ~TF_PENDING_CORPSE)
+
        mach_vm_address_t       all_image_info_addr; /* dyld __all_image_info     */
        mach_vm_size_t          all_image_info_size; /* section location and size */
 
-#if CONFIG_COUNTERS || KPERF
+#if KPERF
 #define TASK_PMC_FLAG                  0x1     /* Bit in "t_chud" signifying PMC interest */
 #define TASK_KPC_FORCED_ALL_CTRS       0x2     /* Bit in "t_chud" signifying KPC forced all counters */
 
@@ -325,11 +345,12 @@ struct task {
        /*
         * Can be merged with imp_donor bits, once the IMPORTANCE_INHERITANCE macro goes away.
         */
-       uint32_t        low_mem_notified_warn     :1,    /* warning low memory notification is sent to the task */
-                       low_mem_notified_critical :1,    /* critical low memory notification is sent to the task */
-                       purged_memory_warn        :1,    /* purgeable memory of the task is purged for warning level pressure */
-                       purged_memory_critical    :1,    /* purgeable memory of the task is purged for critical level pressure */
-                       mem_notify_reserved       :28;   /* reserved for future use */
+       uint32_t        low_mem_notified_warn           :1,     /* warning low memory notification is sent to the task */
+                       low_mem_notified_critical       :1,     /* critical low memory notification is sent to the task */
+                       purged_memory_warn              :1,     /* purgeable memory of the task is purged for warning level pressure */
+                       purged_memory_critical          :1,     /* purgeable memory of the task is purged for critical level pressure */
+                       low_mem_privileged_listener     :1,     /* if set, task would like to know about pressure changes before other tasks on the system */
+                       mem_notify_reserved             :27;    /* reserved for future use */
 
        io_stat_info_t  task_io_stats;
        
@@ -350,10 +371,16 @@ struct task {
        boolean_t       task_purgeable_disowning;
        boolean_t       task_purgeable_disowned;
 
-       /* Coalition is set in task_create_internal and unset in task_deallocate_internal, so it can be referenced without the task lock. */
-       coalition_t     coalition;              /* coalition this task belongs to */
-       /* These fields are protected by coalition->lock, not the task lock. */
-       queue_chain_t   coalition_tasks;        /* list of tasks in the coalition */
+       /*
+        * A task's coalition set is "adopted" in task_create_internal
+        * and unset in task_deallocate_internal, so each array member
+        * can be referenced without the task lock.
+        * Note: these fields are protected by coalition->lock,
+        *       not the task lock.
+        */
+       coalition_t     coalition[COALITION_NUM_TYPES];
+       queue_chain_t   task_coalition[COALITION_NUM_TYPES];
+       uint64_t        dispatchqueue_offset;
 
 #if HYPERVISOR
        void *hv_task_target; /* hypervisor virtual machine object associated with this task */
@@ -444,6 +471,10 @@ extern kern_return_t       task_wait(
 extern kern_return_t   task_release(
                                                        task_t          task);
 
+/* Suspend/resume a task where the kernel owns the suspend count */
+extern kern_return_t    task_suspend_internal(          task_t          task);
+extern kern_return_t    task_resume_internal(           task_t          task);
+
 /* Suspends a task by placing a hold on its threads */
 extern kern_return_t    task_pidsuspend(
                                                        task_t          task);
@@ -491,7 +522,7 @@ extern kern_return_t        task_terminate_internal(
 
 extern kern_return_t   task_create_internal(
                                                        task_t          parent_task,
-                                                       coalition_t     parent_coalition,
+                                                       coalition_t     *parent_coalitions,
                                                        boolean_t       inherit_memory,
                                                        boolean_t       is_64bit,
                                                        task_t          *child_task);   /* OUT */
@@ -542,6 +573,7 @@ extern int          get_task_numacts(
                                        task_t          task);
 
 extern int get_task_numactivethreads(task_t task);
+extern kern_return_t task_collect_crash_info(task_t task);
 
 /* JMM - should just be temporary (implementation in bsd_kern still) */
 extern void    set_bsdtask_info(task_t,void *);
@@ -555,7 +587,9 @@ extern uint64_t     get_task_phys_footprint(task_t);
 extern uint64_t        get_task_phys_footprint_max(task_t);
 extern uint64_t        get_task_purgeable_size(task_t);
 extern uint64_t        get_task_cpu_time(task_t);
+extern uint64_t get_task_dispatchqueue_offset(task_t);
 
+extern kern_return_t task_convert_phys_footprint_limit(int, int *);
 extern kern_return_t task_set_phys_footprint_limit_internal(task_t, int, int *, boolean_t);
 extern kern_return_t task_get_phys_footprint_limit(task_t task, int *limit_mb);
 
@@ -586,6 +620,7 @@ struct _task_ledger_indices {
        int internal;
        int iokit_mapped;
        int alternate_accounting;
+       int alternate_accounting_compressed;
        int phys_footprint;
        int internal_compressed;
        int purgeable_volatile;
@@ -594,7 +629,9 @@ struct _task_ledger_indices {
        int purgeable_nonvolatile_compressed;
        int platform_idle_wakeups;
        int interrupt_wakeups;
-        int sfi_wait_times[MAX_SFI_CLASS_ID];
+#if CONFIG_SCHED_SFI
+       int sfi_wait_times[MAX_SFI_CLASS_ID];
+#endif /* CONFIG_SCHED_SFI */
 #ifdef CONFIG_BANK
        int cpu_time_billed_to_me;
        int cpu_time_billed_to_others;
@@ -666,11 +703,15 @@ extern void proc_get_task_policy2(task_t task, thread_t thread, int category, in
 /* For use by kernel threads and others who don't hold a reference on the target thread */
 extern void proc_set_task_policy_thread(task_t task, uint64_t tid, int category, int flavor, int value);
 
-extern void proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp,
+extern void proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
                                       ipc_port_t * portwatch_ports, int portwatch_count);
 
 extern void task_set_main_thread_qos(task_t task, thread_t main_thread);
 
+extern int proc_darwin_role_to_task_role(int darwin_role, int* task_role);
+extern int proc_task_role_to_darwin_role(int task_role);
+
+
 /* IO Throttle tiers */
 #define THROTTLE_LEVEL_NONE     -1
 #define        THROTTLE_LEVEL_TIER0     0      /* IOPOL_NORMAL, IOPOL_DEFAULT, IOPOL_PASSIVE */
@@ -699,6 +740,7 @@ extern int proc_restore_workq_bgthreadpolicy(thread_t thread);
 
 extern int proc_get_darwinbgstate(task_t task, uint32_t *flagsp);
 extern boolean_t proc_task_is_tal(task_t task);
+extern int task_get_apptype(task_t);
 extern integer_t task_grab_latency_qos(task_t task);
 extern void task_policy_create(task_t task, int parent_boosted);
 extern void thread_policy_create(thread_t thread);
@@ -791,6 +833,7 @@ extern int task_importance_drop_legacy_external_assertion(task_t target_task, ui
 
 #endif /* IMPORTANCE_INHERITANCE */
 
+extern int task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value);
 extern boolean_t task_has_been_notified(task_t task, int pressurelevel);
 extern boolean_t task_used_for_purging(task_t task, int pressurelevel);
 extern void task_mark_has_been_notified(task_t task, int pressurelevel);
@@ -799,11 +842,7 @@ extern void task_clear_has_been_notified(task_t task, int pressurelevel);
 extern void task_clear_used_for_purging(task_t task);
 extern int task_importance_estimate(task_t task);
 
-/*
- * This should only be used for debugging.
- * pid is stored in audit_token by set_security_token().
- */
-#define audit_token_pid_from_task(task)  ((task)->audit_token.val[5])
+extern int task_pid(task_t task);
 
 /* End task_policy */
 
@@ -818,6 +857,7 @@ extern boolean_t task_is_gpu_denied(task_t task);
 
 extern void    *get_bsdtask_info(task_t);
 extern void    *get_bsdthreadtask_info(thread_t);
+extern void task_bsdtask_kill(task_t);
 extern vm_map_t get_task_map(task_t);
 extern ledger_t        get_task_ledger(task_t);
 
index e7df6a708ecf7067390eb54c9595deff8c7ac709..101197a51848ac397978da5d001eb9a7f49ae41d 100644 (file)
@@ -131,7 +131,10 @@ static void task_policy_update_locked(task_t task, thread_t thread, task_pend_to
 static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token);
 static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi);
 static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos);
+
+#if CONFIG_SCHED_SFI
 static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role);
+#endif
 
 static int proc_get_effective_policy(task_t task, thread_t thread, int policy);
 
@@ -198,9 +201,9 @@ static void task_importance_update_live_donor(task_t target_task);
 
 /* Macros for making tracing simpler */
 
-#define tpriority(task, thread)  ((uintptr_t)(thread == THREAD_NULL ? (task->priority)  : (thread->priority)))
+#define tpriority(task, thread)  ((uintptr_t)(thread == THREAD_NULL ? (task->priority)  : (thread->base_pri)))
 #define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK  : TASK_POLICY_THREAD)
-#define targetid(task, thread)   ((uintptr_t)(thread == THREAD_NULL ? (audit_token_pid_from_task(task)) : (thread->thread_id)))
+#define targetid(task, thread)   ((uintptr_t)(thread == THREAD_NULL ? (task_pid(task)) : (thread->thread_id)))
 
 /*
  * Default parameters for certain policies
@@ -418,7 +421,7 @@ task_policy_set(
 
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                          (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
-                                         proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL),
+                                         proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
                                          trequested_1(task, THREAD_NULL), 0);
 
                task->requested_policy.t_sup_active      = (info->active)         ? 1 : 0;
@@ -439,7 +442,7 @@ task_policy_set(
 
                KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                          (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
-                                         proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL),
+                                         proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
                                          trequested_1(task, THREAD_NULL), 0);
 
                break;
@@ -657,14 +660,14 @@ task_policy_create(task_t task, int parent_boosted)
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
-                                 audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL),
+                                 task_pid(task), teffective_0(task, THREAD_NULL),
                                  teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
 
        task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL);
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
-                                 audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL),
+                                 task_pid(task), teffective_0(task, THREAD_NULL),
                                  teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
 
        task_importance_update_live_donor(task);
@@ -771,7 +774,7 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
                                case TASK_DEFAULT_APPLICATION:
                                        /* This is 'may render UI but we don't know if it's focal/nonfocal' */
                                        next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
-                                       break;                                  
+                                       break;
 
                                case TASK_NONUI_APPLICATION:
                                        /* i.e. 'off-screen' */
@@ -784,6 +787,11 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
                                        next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
                                        break;
 
+                               case TASK_THROTTLE_APPLICATION:
+                                       /* i.e. 'TAL launch' */
+                                       next.t_qos_ceiling = THREAD_QOS_UTILITY;
+                                       break;
+
                                case TASK_UNSPECIFIED:
                                default:
                                        /* Apps that don't have an application role get
@@ -847,7 +855,6 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
        boolean_t wants_darwinbg        = FALSE;
        boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
        boolean_t wants_watchersbg      = FALSE; /* Do I want my pidbound threads to be bg */
-       boolean_t wants_tal             = FALSE; /* Do I want the effects of TAL mode */
 
        /*
         * If DARWIN_BG has been requested at either level, it's engaged.
@@ -863,7 +870,12 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
                if (requested.t_apptype      == TASK_APPTYPE_APP_TAL &&
                    requested.t_role         == TASK_BACKGROUND_APPLICATION &&
                    requested.t_tal_enabled  == 1) {
-                       wants_tal = TRUE;
+                       next.t_tal_engaged = 1;
+               }
+
+               if ((requested.t_apptype     == TASK_APPTYPE_APP_DEFAULT ||
+                    requested.t_apptype     == TASK_APPTYPE_APP_TAL) &&
+                   requested.t_role         == TASK_THROTTLE_APPLICATION) {
                        next.t_tal_engaged = 1;
                }
 
@@ -916,7 +928,10 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
 
        boolean_t wants_lowpri_cpu = FALSE;
 
-       if (wants_darwinbg || wants_tal)
+       if (wants_darwinbg)
+               wants_lowpri_cpu = TRUE;
+
+       if (next.t_tal_engaged)
                wants_lowpri_cpu = TRUE;
 
        if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0)
@@ -942,7 +957,7 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
                if (requested.t_sup_disk && requested.t_boosted == 0)
                        iopol = MAX(iopol, proc_suppressed_disk_tier);
 
-               if (wants_tal)
+               if (next.t_tal_engaged)
                        iopol = MAX(iopol, proc_tal_disk_tier);
 
                if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
@@ -1066,7 +1081,7 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
                        /* TODO: This should only be shot down on SIGTERM, not exit */
                        next.t_suspended   = 0;
                } else {
-                       next.thep_qos = 0;
+                       next.thep_qos = THREAD_QOS_UNSPECIFIED;
                }
        }
 
@@ -1184,11 +1199,12 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
                        (prev.t_sfi_managed != next.t_sfi_managed))
                        update_sfi = TRUE;
 
-/* TODO: if CONFIG_SFI */
+#if CONFIG_SCHED_SFI
                if (prev.t_role != next.t_role && task_policy_update_coalition_focal_tasks(task, prev.t_role, next.t_role)) {
                        update_sfi = TRUE;
                        pend_token->tpt_update_coal_sfi = 1;
                }
+#endif /* !CONFIG_SCHED_SFI */
 
                task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi);
        } else {
@@ -1205,7 +1221,8 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
 
                if (prev.thep_qos           != next.thep_qos          ||
                    prev.thep_qos_relprio   != next.thep_qos_relprio  ||
-                   prev.qos_ui_is_urgent   != next.qos_ui_is_urgent) {
+                   prev.qos_ui_is_urgent   != next.qos_ui_is_urgent  ||
+                   prev.terminated         != next.terminated) {
                        update_qos = TRUE;
                }
 
@@ -1213,6 +1230,8 @@ task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_cr
        }
 }
 
+
+#if CONFIG_SCHED_SFI
 /*
  * Yet another layering violation. We reach out and bang on the coalition directly.
  */
@@ -1223,24 +1242,27 @@ task_policy_update_coalition_focal_tasks(task_t     task,
 {
        boolean_t sfi_transition = FALSE;
 
+       /* task moving into/out-of the foreground */
        if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
-               if (coalition_adjust_focal_task_count(task->coalition, 1) == 1)
+               if (task_coalition_adjust_focal_count(task, 1) == 1)
                        sfi_transition = TRUE;
        } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
-               if (coalition_adjust_focal_task_count(task->coalition, -1) == 0)
+               if (task_coalition_adjust_focal_count(task, -1) == 0)
                        sfi_transition = TRUE;
        }
 
+       /* task moving into/out-of background */
        if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
-               if (coalition_adjust_non_focal_task_count(task->coalition, 1) == 1)
+               if (task_coalition_adjust_nonfocal_count(task, 1) == 1)
                        sfi_transition = TRUE;
        } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
-               if (coalition_adjust_non_focal_task_count(task->coalition, -1) == 0)
+               if (task_coalition_adjust_nonfocal_count(task, -1) == 0)
                        sfi_transition = TRUE;
        }
 
        return sfi_transition;
 }
+#endif /* CONFIG_SCHED_SFI */
 
 /* Despite the name, the thread's task is locked, the thread is not */
 void
@@ -1379,6 +1401,30 @@ task_policy_update_task_locked(task_t    task,
        }
 }
 
+#if CONFIG_SCHED_SFI
+/* coalition object is locked */
+static void
+task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
+{
+       thread_t thread;
+
+       /* unused for now */
+       (void)coal;
+
+       /* skip the task we're re-evaluating on behalf of: it's already updated */
+       if (task == (task_t)ctx)
+               return;
+
+       task_lock(task);
+
+       queue_iterate(&task->threads, thread, thread_t, task_threads) {
+               sfi_reevaluate(thread);
+       }
+
+       task_unlock(task);
+}
+#endif /* CONFIG_SCHED_SFI */
+
 /*
  * Called with task unlocked to do things that can't be done while holding the task lock
  */
@@ -1401,8 +1447,12 @@ task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_tok
                if (pend_token->tpt_update_live_donor)
                        task_importance_update_live_donor(task);
 
+#if CONFIG_SCHED_SFI
+               /* use the resource coalition for SFI re-evaluation */
                if (pend_token->tpt_update_coal_sfi)
-                       coalition_sfi_reevaluate(task->coalition, task);
+                       coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
+                                               (void *)task, task_sfi_reevaluate_cb);
+#endif /* CONFIG_SCHED_SFI */
        }
 }
 
@@ -2059,6 +2109,60 @@ proc_tier_to_iopol(int tier, int passive)
        }
 }
 
+int
+proc_darwin_role_to_task_role(int darwin_role, int* task_role)
+{
+       integer_t role = TASK_UNSPECIFIED;
+
+       switch (darwin_role) {
+               case PRIO_DARWIN_ROLE_DEFAULT:
+                       role = TASK_UNSPECIFIED;
+                       break;
+               case PRIO_DARWIN_ROLE_UI_FOCAL:
+                       role = TASK_FOREGROUND_APPLICATION;
+                       break;
+               case PRIO_DARWIN_ROLE_UI:
+                       role = TASK_DEFAULT_APPLICATION;
+                       break;
+               case PRIO_DARWIN_ROLE_NON_UI:
+                       role = TASK_NONUI_APPLICATION;
+                       break;
+               case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
+                       role = TASK_BACKGROUND_APPLICATION;
+                       break;
+               case PRIO_DARWIN_ROLE_TAL_LAUNCH:
+                       role = TASK_THROTTLE_APPLICATION;
+                       break;
+               default:
+                       return EINVAL;
+       }
+
+       *task_role = role;
+
+       return 0;
+}
+
+int
+proc_task_role_to_darwin_role(int task_role)
+{
+       switch (task_role) {
+               case TASK_FOREGROUND_APPLICATION:
+                       return PRIO_DARWIN_ROLE_UI_FOCAL;
+               case TASK_BACKGROUND_APPLICATION:
+                       return PRIO_DARWIN_ROLE_UI;
+               case TASK_NONUI_APPLICATION:
+                       return PRIO_DARWIN_ROLE_NON_UI;
+               case TASK_DEFAULT_APPLICATION:
+                       return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
+               case TASK_THROTTLE_APPLICATION:
+                       return PRIO_DARWIN_ROLE_TAL_LAUNCH;
+               case TASK_UNSPECIFIED:
+               default:
+                       return PRIO_DARWIN_ROLE_DEFAULT;
+       }
+}
+
+
 /* apply internal backgrounding for workqueue threads */
 int
 proc_apply_workq_bgthreadpolicy(thread_t thread)
@@ -2500,14 +2604,14 @@ extern boolean_t ipc_importance_interactive_receiver;
  * TODO: Make this function more table-driven instead of ad-hoc
  */
 void
-proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp,
+proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
                           ipc_port_t * portwatch_ports, int portwatch_count)
 {
        struct task_pend_token pend_token = {};
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
-                                 audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
+                                 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
                                  apptype, 0);
 
        switch (apptype) {
@@ -2588,7 +2692,10 @@ proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp,
 
        if (apptype != TASK_APPTYPE_NONE) {
                task->requested_policy.t_apptype = apptype;
+       }
 
+       if (role != TASK_UNSPECIFIED) {
+               task->requested_policy.t_role = role;
        }
 
        if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
@@ -2606,10 +2713,12 @@ proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp,
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
-                                 audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
+                                 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
                                  task_is_importance_receiver(task), 0);
 }
 
+extern task_t bsd_init_task;
+
 /* Set up the primordial thread's QoS */
 void
 task_set_main_thread_qos(task_t task, thread_t main_thread) {
@@ -2621,13 +2730,18 @@ task_set_main_thread_qos(task_t task, thread_t main_thread) {
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
-                                 audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
+                                 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
                                  main_thread->requested_policy.thrp_qos, 0);
 
        int primordial_qos = THREAD_QOS_UNSPECIFIED;
 
        int qos_clamp = task->requested_policy.t_qos_clamp;
 
+       if (task == bsd_init_task) {
+               /* PID 1 gets a special case */
+               primordial_qos = THREAD_QOS_USER_INITIATED;
+       }
+
        switch (task->requested_policy.t_apptype) {
                case TASK_APPTYPE_APP_TAL:
                case TASK_APPTYPE_APP_DEFAULT:
@@ -2663,7 +2777,7 @@ task_set_main_thread_qos(task_t task, thread_t main_thread) {
 
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
-                                 audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
+                                 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
                                  primordial_qos, 0);
 }
 
@@ -2674,6 +2788,12 @@ proc_task_is_tal(task_t task)
        return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
 }
 
+int
+task_get_apptype(task_t task)
+{
+       return task->requested_policy.t_apptype;
+}
+
 /* for telemetry */
 integer_t
 task_grab_latency_qos(task_t task)
@@ -2943,19 +3063,44 @@ proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uin
 void
 proc_init_cpumon_params(void)
 {
+       /*
+        * The max CPU percentage can be configured via the boot-args and
+        * a key in the device tree. The boot-args are honored first, then the
+        * device tree.
+        */
        if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
-               sizeof (proc_max_cpumon_percentage))) {
-               proc_max_cpumon_percentage = DEFAULT_CPUMON_PERCENTAGE;
+               sizeof (proc_max_cpumon_percentage)))
+       {
+               uint64_t max_percentage = 0ULL;
+
+               if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
+                       sizeof(max_percentage)))
+               {
+                       max_percentage = DEFAULT_CPUMON_PERCENTAGE;
+               }
+
+               assert(max_percentage <= UINT8_MAX);
+               proc_max_cpumon_percentage = (uint8_t) max_percentage;
        }
 
        if (proc_max_cpumon_percentage > 100) {
                proc_max_cpumon_percentage = 100;
        }
 
-       /* The interval should be specified in seconds. */ 
+       /*
+        * The interval should be specified in seconds.
+        *
+        * Like the max CPU percentage, the max CPU interval can be configured
+        * via boot-args and the device tree.
+        */
        if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
-               sizeof (proc_max_cpumon_interval))) {
-               proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
+               sizeof (proc_max_cpumon_interval)))
+       {
+               if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
+                       sizeof(proc_max_cpumon_interval)))
+               {
+                       proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
+               }
        }
 
        proc_max_cpumon_interval *= NSEC_PER_SEC;
@@ -3266,7 +3411,7 @@ task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t d
 
                        if (warn) {
                                int       pid = 0;
-                               char      *procname = (char *)"unknown";
+                               const char *procname = "unknown";
 
 #ifdef MACH_BSD
                                pid = proc_selfpid();
@@ -3495,19 +3640,19 @@ task_set_boost_locked(task_t task, boolean_t boost_active)
 {
 #if IMPORTANCE_DEBUG
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
-                                 proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
+                                 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
 #endif
 
        task->requested_policy.t_boosted = boost_active;
 
 #if IMPORTANCE_DEBUG
        if (boost_active == TRUE){
-               DTRACE_BOOST2(boost, task_t, task, int, audit_token_pid_from_task(task));
+               DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
        } else {
-               DTRACE_BOOST2(unboost, task_t, task, int, audit_token_pid_from_task(task));
+               DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
        }
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
-                                 proc_selfpid(), audit_token_pid_from_task(task),
+                                 proc_selfpid(), task_pid(task),
                                  trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
 #endif
 }
@@ -3741,7 +3886,7 @@ task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
        int boost = 0;
 
        __impdebug_only int released_pid = 0;
-       __impdebug_only int pid = audit_token_pid_from_task(task);
+       __impdebug_only int pid = task_pid(task);
 
        ipc_importance_task_t release_imp_task = IIT_NULL;
 
@@ -3787,7 +3932,7 @@ task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
                        if (boost > 0)
                                ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
 
-                       // released_pid = audit_token_pid_from_task(release_imp_task); /* TODO: Need ref-safe way to get pid */
+                       // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
                        ipc_importance_task_release(release_imp_task);
                }
 #if IMPORTANCE_DEBUG
@@ -3815,6 +3960,26 @@ task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
 #define TASK_IMPORTANCE_NOTDARWINBG    1
 
 
+/*
+ * (Un)Mark the task as a privileged listener for memory notifications.
+ * if marked, this task will be among the first to be notified amongst
+ * the bulk of all other tasks when the system enters a pressure level
+ * of interest to this task.
+ */
+int
+task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
+{
+       if (old_value != NULL) {
+               *old_value = (boolean_t)task->low_mem_privileged_listener;
+       } else {
+               task_lock(task);
+               task->low_mem_privileged_listener = (uint32_t)new_value;
+               task_unlock(task);
+       }
+
+       return 0;
+}
+
 /*
  * Checks if the task is already notified.
  *
index b392ac9ba43b504ee3034458e49316f590b75fa5..cda0bebc6785a0067c8c2f2ea14a4acef66a32cb 100644 (file)
@@ -144,7 +144,7 @@ void telemetry_init(void)
        if (telemetry_buffer.size > TELEMETRY_MAX_BUFFER_SIZE)
                telemetry_buffer.size = TELEMETRY_MAX_BUFFER_SIZE;
 
-       ret = kmem_alloc(kernel_map, &telemetry_buffer.buffer, telemetry_buffer.size);
+       ret = kmem_alloc(kernel_map, &telemetry_buffer.buffer, telemetry_buffer.size, VM_KERN_MEMORY_DIAG);
        if (ret != KERN_SUCCESS) {
                kprintf("Telemetry: Allocation failed: %d\n", ret);
                return;
@@ -266,7 +266,7 @@ telemetry_enable_window(void)
         * but we would prefer to avoid blocking while holding the
         * lock.
         */
-       ret = kmem_alloc(kernel_map, &kern_buffer, kern_buffer_size);
+       ret = kmem_alloc(kernel_map, &kern_buffer, kern_buffer_size, VM_KERN_MEMORY_DIAG);
 
        TELEMETRY_LOCK();
 
@@ -337,6 +337,13 @@ telemetry_disable_window(void)
 static boolean_t
 telemetry_is_active(thread_t thread)
 {
+       task_t task = thread->task;
+
+       if (task == kernel_task) {
+               /* Kernel threads never return to an AST boundary, and are ineligible */
+               return FALSE;
+       }
+
        if (telemetry_sample_all_tasks == TRUE) {
                return (TRUE);
        }
@@ -782,11 +789,13 @@ copytobuffer:
        thsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
        thsnap->thread_id = thread_tid(thread);
        thsnap->state = thread->state;
-       thsnap->priority = thread->priority;
+       thsnap->priority = thread->base_pri;
        thsnap->sched_pri = thread->sched_pri;
        thsnap->sched_flags = thread->sched_flags;
        thsnap->ss_flags |= kStacksPCOnly;
        thsnap->ts_qos = thread->effective_policy.thep_qos;
+       thsnap->ts_rqos = thread->requested_policy.thrp_qos;
+       thsnap->ts_rqos_override = thread->requested_policy.thrp_qos_override;
 
        if (thread->effective_policy.darwinbg) {
                thsnap->ss_flags |= kThreadDarwinBG;
@@ -1131,7 +1140,7 @@ void bootprofile_init(void)
                return;
        }
 
-       ret = kmem_alloc(kernel_map, &bootprofile_buffer, bootprofile_buffer_size);
+       ret = kmem_alloc(kernel_map, &bootprofile_buffer, bootprofile_buffer_size, VM_KERN_MEMORY_DIAG);
        if (ret != KERN_SUCCESS) {
                kprintf("Boot profile: Allocation failed: %d\n", ret);
                return;
index 8b7d863b7a8e08f0ef4c867b4ce38e567009775d..6e9a472a05eb57ae2b5fe657f822e692b3705708 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <kern/assert.h>
 #include <kern/exc_resource.h>
 #include <kern/telemetry.h>
+#include <corpses/task_corpse.h>
 #if KPC
 #include <kern/kpc.h>
 #endif
 #include <vm/vm_pageout.h>
 
 #include <sys/kdebug.h>
-
+#include <sys/bsdtask_info.h>
 #include <mach/sdt.h>
 
 /*
@@ -154,6 +155,8 @@ static queue_head_t         thread_stack_queue;
 decl_simple_lock_data(static,thread_terminate_lock)
 static queue_head_t            thread_terminate_queue;
 
+static queue_head_t            crashed_threads_queue;
+
 static struct thread   thread_template, init_thread;
 
 static void            sched_call_null(
@@ -214,7 +217,7 @@ thread_bootstrap(void)
        thread_template.reason = AST_NONE;
        thread_template.at_safe_point = FALSE;
        thread_template.wait_event = NO_EVENT64;
-       thread_template.wait_queue = WAIT_QUEUE_NULL;
+       thread_template.waitq = NULL;
        thread_template.wait_result = THREAD_WAITING;
        thread_template.options = THREAD_ABORTSAFE;
        thread_template.state = TH_WAIT | TH_UNINT;
@@ -236,13 +239,13 @@ thread_bootstrap(void)
        thread_template.static_param = 0;
        thread_template.policy_reset = 0;
 
-       thread_template.priority = 0;
+       thread_template.base_pri = 0;
        thread_template.sched_pri = 0;
        thread_template.max_priority = 0;
        thread_template.task_priority = 0;
        thread_template.promotions = 0;
        thread_template.pending_promoter_index = 0;
-       thread_template.pending_promoter[0] =
+       thread_template.pending_promoter[0] = NULL;
        thread_template.pending_promoter[1] = NULL;
        thread_template.rwlock_count = 0;
 
@@ -255,6 +258,7 @@ thread_bootstrap(void)
 
        thread_template.quantum_remaining = 0;
        thread_template.last_run_time = 0;
+       thread_template.last_made_runnable_time = 0;
 
        thread_template.computation_metered = 0;
        thread_template.computation_epoch = 0;
@@ -280,16 +284,15 @@ thread_bootstrap(void)
        thread_template.vtimer_prof_save = 0;
        thread_template.vtimer_rlim_save = 0;
 
+#if CONFIG_SCHED_SFI
        thread_template.wait_sfi_begin_time = 0;
+#endif
 
        thread_template.wait_timer_is_set = FALSE;
        thread_template.wait_timer_active = 0;
 
        thread_template.depress_timer_active = 0;
 
-       thread_template.special_handler.handler = special_handler;
-       thread_template.special_handler.next = NULL;
-
        thread_template.recover = (vm_offset_t)NULL;
        
        thread_template.map = VM_MAP_NULL;
@@ -348,6 +351,8 @@ thread_bootstrap(void)
        thread_template.ith_voucher_name = MACH_PORT_NULL;
        thread_template.ith_voucher = IPC_VOUCHER_NULL;
 
+       thread_template.work_interval_id = 0;
+
        init_thread = thread_template;
        machine_set_current_thread(&init_thread);
 }
@@ -409,7 +414,6 @@ void
 thread_terminate_self(void)
 {
        thread_t                thread = current_thread();
-
        task_t                  task;
        spl_t                   s;
        int threadcnt;
@@ -437,7 +441,7 @@ thread_terminate_self(void)
                thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 
                /* If our priority was low because of a depressed yield, restore it in case we block below */
-               set_sched_pri(thread, thread->priority);
+               thread_recompute_sched_pri(thread, FALSE);
 
                if (timer_call_cancel(&thread->depress_timer))
                        thread->depress_timer_active--;
@@ -466,16 +470,24 @@ thread_terminate_self(void)
        thread_mtx_unlock(thread);
 
        task = thread->task;
-       uthread_cleanup(task, thread->uthread, task->bsd_info);
+       uthread_cleanup(task, thread->uthread, task->bsd_info, thread->inspection == 1 ? TRUE : FALSE);
        threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 
        /*
         * If we are the last thread to terminate and the task is
         * associated with a BSD process, perform BSD process exit.
         */
-       if (threadcnt == 0 && task->bsd_info != NULL)
+       if (threadcnt == 0 && task->bsd_info != NULL) {
                proc_exit(task->bsd_info);
-
+               /*
+                * if there is crash info in task
+                * then do the deliver action since this is
+                * last thread for this task.
+                */
+               if (task->corpse_info) {
+                       task_deliver_crash_notification(task);
+               }
+       }
        uthread_cred_free(thread->uthread);
 
        s = splsched();
@@ -515,7 +527,9 @@ thread_terminate_self(void)
         */
        thread->state |= TH_TERMINATE;
        thread_mark_wait_locked(thread, THREAD_UNINT);
+       assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
        assert(thread->promotions == 0);
+       assert(!(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED));
        assert(thread->rwlock_count == 0);
        thread_unlock(thread);
        /* splsched */
@@ -524,6 +538,14 @@ thread_terminate_self(void)
        /*NOTREACHED*/
 }
 
+/* Drop a thread refcount that definitely isn't the last one. */
+void
+thread_deallocate_safe(thread_t thread)
+{
+       if (__improbable(hw_atomic_sub(&(thread)->ref_count, 1) == 0))
+               panic("bad thread refcount!");
+}
+
 void
 thread_deallocate(
        thread_t                        thread)
@@ -533,12 +555,14 @@ thread_deallocate(
        if (thread == THREAD_NULL)
                return;
 
-       if (thread_deallocate_internal(thread) > 0)
+       if (__probable(hw_atomic_sub(&(thread)->ref_count, 1) > 0))
                return;
 
        if(!(thread->state & TH_TERMINATE2))
                panic("thread_deallocate: thread not properly terminated\n");
 
+       assert(thread->runq == PROCESSOR_NULL);
+
 #if KPC
        kpc_thread_destroy(thread);
 #endif
@@ -598,11 +622,22 @@ thread_terminate_daemon(void)
        simple_lock(&thread_terminate_lock);
 
        while ((thread = (thread_t)dequeue_head(&thread_terminate_queue)) != THREAD_NULL) {
+
+               /* 
+                * if marked for crash reporting, skip reaping. 
+                * The corpse delivery thread will clear bit and enqueue 
+                * for reaping when done
+                */
+               if (thread->inspection){
+                       enqueue_tail(&crashed_threads_queue, (queue_entry_t)thread);
+                       continue;
+               }
+
                simple_unlock(&thread_terminate_lock);
                (void)spllo();
 
                assert(thread->SHARE_COUNT == 0);
-               assert(thread->BG_COUNT == 0);          
+               assert(thread->BG_COUNT == 0);
 
                task = thread->task;
 
@@ -678,6 +713,42 @@ thread_terminate_enqueue(
        thread_wakeup((event_t)&thread_terminate_queue);
 }
 
+/*
+ * thread_terminate_crashed_threads:
+ * walk the list of crashed therds and put back set of threads
+ * who are no longer being inspected.
+ */
+void
+thread_terminate_crashed_threads()
+{
+       thread_t th_iter, th_remove;
+       boolean_t should_wake_terminate_queue = FALSE;
+
+       simple_lock(&thread_terminate_lock);
+       /*
+        * loop through the crashed threads queue
+        * to put any threads that are not being inspected anymore
+        */
+       th_iter = (thread_t)queue_first(&crashed_threads_queue);
+       while (!queue_end(&crashed_threads_queue, (queue_entry_t)th_iter)) {
+               th_remove = th_iter;
+               th_iter = (thread_t)queue_next(&th_iter->links);
+
+               /* make sure current_thread is never in crashed queue */
+               assert(th_remove != current_thread());
+               if (th_remove->inspection != TRUE){
+                       remque((queue_entry_t)th_remove);
+                       enqueue_tail(&thread_terminate_queue, (queue_entry_t)th_remove);
+                       should_wake_terminate_queue = TRUE;
+               }
+       }
+
+       simple_unlock(&thread_terminate_lock);
+       if (should_wake_terminate_queue == TRUE) {
+               thread_wakeup((event_t)&thread_terminate_queue);
+       }
+}
+
 /*
  *     thread_stack_daemon:
  *
@@ -699,6 +770,8 @@ thread_stack_daemon(void)
 
                /* allocate stack with interrupts enabled so that we can call into VM */
                stack_alloc(thread);
+
+               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
                
                s = splsched();
                thread_lock(thread);
@@ -727,6 +800,8 @@ void
 thread_stack_enqueue(
        thread_t                thread)
 {
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
+
        simple_lock(&thread_stack_lock);
        enqueue_tail(&thread_stack_queue, (queue_entry_t)thread);
        simple_unlock(&thread_stack_lock);
@@ -742,6 +817,7 @@ thread_daemon_init(void)
 
        simple_lock_init(&thread_terminate_lock, 0);
        queue_init(&thread_terminate_queue);
+       queue_init(&crashed_threads_queue);
 
        result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
        if (result != KERN_SUCCESS)
@@ -759,6 +835,9 @@ thread_daemon_init(void)
        thread_deallocate(thread);
 }
 
+#define TH_OPTION_NONE         0x00
+#define TH_OPTION_NOCRED       0x01
+#define TH_OPTION_NOSUSP       0x02
 /*
  * Create a new thread.
  * Doesn't start the thread running.
@@ -771,9 +850,6 @@ thread_create_internal(
        integer_t                               priority,
        thread_continue_t               continuation,
        int                                             options,
-#define TH_OPTION_NONE         0x00
-#define TH_OPTION_NOCRED       0x01
-#define TH_OPTION_NOSUSP       0x02
        thread_t                                *out_thread)
 {
        thread_t                                new_thread;
@@ -806,7 +882,7 @@ thread_create_internal(
 
                new_thread->uthread = NULL;
                /* cred free may not be necessary */
-               uthread_cleanup(parent_task, ut, parent_task->bsd_info);
+               uthread_cleanup(parent_task, ut, parent_task->bsd_info, FALSE);
                uthread_cred_free(ut);
                uthread_zone_free(ut);
 #endif  /* MACH_BSD */
@@ -852,7 +928,7 @@ thread_create_internal(
                        void *ut = new_thread->uthread;
 
                        new_thread->uthread = NULL;
-                       uthread_cleanup(parent_task, ut, parent_task->bsd_info);
+                       uthread_cleanup(parent_task, ut, parent_task->bsd_info, FALSE);
                        /* cred free may not be necessary */
                        uthread_cred_free(ut);
                        uthread_zone_free(ut);
@@ -908,14 +984,6 @@ thread_create_internal(
        timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
        timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
 
-#if CONFIG_COUNTERS
-       /*
-        * If parent task has any reservations, they need to be propagated to this
-        * thread.
-        */
-       new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? 
-               THREAD_PMC_FLAG : 0U;
-#endif
 #if KPC
        kpc_thread_create(new_thread);
 #endif
@@ -925,26 +993,29 @@ thread_create_internal(
        new_thread->requested_policy.terminated = parent_task->effective_policy.terminated;
 
        /* Set the thread's scheduling parameters */
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+       new_thread->sched_stamp = sched_tick;
+       new_thread->pri_shift = sched_pri_shift;
+#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
+
        new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
        new_thread->sched_flags = 0;
        new_thread->max_priority = parent_task->max_priority;
        new_thread->task_priority = parent_task->priority;
-       new_thread->priority = (priority < 0)? parent_task->priority: priority;
-       if (new_thread->priority > new_thread->max_priority)
-               new_thread->priority = new_thread->max_priority;
-       new_thread->importance = new_thread->priority - new_thread->task_priority;
-       new_thread->saved_importance = new_thread->importance;
 
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
-       new_thread->sched_stamp = sched_tick;
-       new_thread->pri_shift = sched_pri_shift;
-#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
+       int new_priority = (priority < 0) ? parent_task->priority: priority;
+       new_priority = (priority < 0)? parent_task->priority: priority;
+       if (new_priority > new_thread->max_priority)
+               new_priority = new_thread->max_priority;
+
+       new_thread->importance = new_priority - new_thread->task_priority;
+       new_thread->saved_importance = new_thread->importance;
 
        if (parent_task->max_priority <= MAXPRI_THROTTLE) {
                sched_set_thread_throttled(new_thread, TRUE);
        }
 
-       SCHED(compute_priority)(new_thread, FALSE);
+       sched_set_thread_base_priority(new_thread, new_priority);
 
        thread_policy_create(new_thread);
 
@@ -962,7 +1033,7 @@ thread_create_internal(
        threads_count++;
 
        new_thread->active = TRUE;
-
+       new_thread->inspection = FALSE;
        *out_thread = new_thread;
 
        {
@@ -991,7 +1062,8 @@ static kern_return_t
 thread_create_internal2(
        task_t                          task,
        thread_t                        *new_thread,
-       boolean_t                       from_user)
+       boolean_t                       from_user,
+       thread_continue_t               continuation)
 {
        kern_return_t           result;
        thread_t                        thread;
@@ -999,7 +1071,7 @@ thread_create_internal2(
        if (task == TASK_NULL || task == kernel_task)
                return (KERN_INVALID_ARGUMENT);
 
-       result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
+       result = thread_create_internal(task, -1, continuation, TH_OPTION_NONE, &thread);
        if (result != KERN_SUCCESS)
                return (result);
 
@@ -1030,7 +1102,7 @@ thread_create(
        task_t                          task,
        thread_t                        *new_thread)
 {
-       return thread_create_internal2(task, new_thread, FALSE);
+       return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
 }
 
 kern_return_t
@@ -1038,7 +1110,16 @@ thread_create_from_user(
        task_t                          task,
        thread_t                        *new_thread)
 {
-       return thread_create_internal2(task, new_thread, TRUE);
+       return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
+}
+
+kern_return_t
+thread_create_with_continuation(
+       task_t                          task,
+       thread_t                        *new_thread,
+       thread_continue_t               continuation)
+{
+       return thread_create_internal2(task, new_thread, FALSE, continuation);
 }
 
 static kern_return_t
@@ -1060,8 +1141,7 @@ thread_create_running_internal2(
        if (result != KERN_SUCCESS)
                return (result);
 
-       result = machine_thread_set_state(
-                                               thread, flavor, new_state, new_state_count);
+       result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
        if (result != KERN_SUCCESS) {
                task_unlock(task);
                lck_mtx_unlock(&tasks_threads_lock);
@@ -1219,6 +1299,80 @@ kernel_thread_start(
        return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
 }
 
+/* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
+/* it is assumed that the thread is locked by the caller */
+static void
+retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
+{
+       int     state, flags;
+
+       /* fill in info */
+
+       thread_read_times(thread, &basic_info->user_time,
+                                                               &basic_info->system_time);
+
+       /*
+        *      Update lazy-evaluated scheduler info because someone wants it.
+        */
+       if (SCHED(can_update_priority)(thread))
+               SCHED(update_priority)(thread);
+
+       basic_info->sleep_time = 0;
+
+       /*
+        *      To calculate cpu_usage, first correct for timer rate,
+        *      then for 5/8 ageing.  The correction factor [3/5] is
+        *      (1/(5/8) - 1).
+        */
+       basic_info->cpu_usage = 0;
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
+       if (sched_tick_interval) {
+               basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
+                                                                       * TH_USAGE_SCALE) /     sched_tick_interval);
+               basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
+       }
+#endif
+
+       if (basic_info->cpu_usage > TH_USAGE_SCALE)
+               basic_info->cpu_usage = TH_USAGE_SCALE;
+
+       basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
+                                                                                       POLICY_TIMESHARE: POLICY_RR);
+
+       flags = 0;
+       if (thread->options & TH_OPT_IDLE_THREAD)
+               flags |= TH_FLAGS_IDLE;
+
+       if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
+               flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
+       }
+
+       if (!thread->kernel_stack)
+               flags |= TH_FLAGS_SWAPPED;
+
+       state = 0;
+       if (thread->state & TH_TERMINATE)
+               state = TH_STATE_HALTED;
+       else
+       if (thread->state & TH_RUN)
+               state = TH_STATE_RUNNING;
+       else
+       if (thread->state & TH_UNINT)
+               state = TH_STATE_UNINTERRUPTIBLE;
+       else
+       if (thread->state & TH_SUSP)
+               state = TH_STATE_STOPPED;
+       else
+       if (thread->state & TH_WAIT)
+               state = TH_STATE_WAITING;
+
+       basic_info->run_state = state;
+       basic_info->flags = flags;
+
+       basic_info->suspend_count = thread->user_stop_count;
+
+       return;
+}
 
 kern_return_t
 thread_info_internal(
@@ -1227,116 +1381,47 @@ thread_info_internal(
        thread_info_t                   thread_info_out,        /* ptr to OUT array */
        mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
 {
-       int                                             state, flags;
-       spl_t                                   s;
+       spl_t   s;
 
        if (thread == THREAD_NULL)
                return (KERN_INVALID_ARGUMENT);
 
        if (flavor == THREAD_BASIC_INFO) {
-           register thread_basic_info_t        basic_info;
 
-           if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
+               if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
                        return (KERN_INVALID_ARGUMENT);
 
-           basic_info = (thread_basic_info_t) thread_info_out;
-
-           s = splsched();
-           thread_lock(thread);
-
-           /* fill in info */
+               s = splsched();
+               thread_lock(thread);
 
-           thread_read_times(thread, &basic_info->user_time,
-                                                                       &basic_info->system_time);
+               retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
 
-               /*
-                *      Update lazy-evaluated scheduler info because someone wants it.
-                */
-               if (SCHED(can_update_priority)(thread))
-                       SCHED(update_priority)(thread);
+               thread_unlock(thread);
+               splx(s);
 
-               basic_info->sleep_time = 0;
+               *thread_info_count = THREAD_BASIC_INFO_COUNT;
 
-               /*
-                *      To calculate cpu_usage, first correct for timer rate,
-                *      then for 5/8 ageing.  The correction factor [3/5] is
-                *      (1/(5/8) - 1).
-                */
-               basic_info->cpu_usage = 0;
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
-               if (sched_tick_interval) {
-                       basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
-                                                                               * TH_USAGE_SCALE) /     sched_tick_interval);
-                       basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
-               }
-#endif
-               
-               if (basic_info->cpu_usage > TH_USAGE_SCALE)
-                       basic_info->cpu_usage = TH_USAGE_SCALE;
-
-               basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
-                                                                                               POLICY_TIMESHARE: POLICY_RR);
-
-           flags = 0;
-               if (thread->options & TH_OPT_IDLE_THREAD)
-                       flags |= TH_FLAGS_IDLE;
-
-           if (!thread->kernel_stack)
-                       flags |= TH_FLAGS_SWAPPED;
-
-           state = 0;
-           if (thread->state & TH_TERMINATE)
-                       state = TH_STATE_HALTED;
-           else
-               if (thread->state & TH_RUN)
-                       state = TH_STATE_RUNNING;
-           else
-               if (thread->state & TH_UNINT)
-                       state = TH_STATE_UNINTERRUPTIBLE;
-           else
-               if (thread->state & TH_SUSP)
-                       state = TH_STATE_STOPPED;
-           else
-               if (thread->state & TH_WAIT)
-                       state = TH_STATE_WAITING;
-
-           basic_info->run_state = state;
-           basic_info->flags = flags;
-
-           basic_info->suspend_count = thread->user_stop_count;
-
-           thread_unlock(thread);
-           splx(s);
-
-           *thread_info_count = THREAD_BASIC_INFO_COUNT;
-
-           return (KERN_SUCCESS);
+               return (KERN_SUCCESS);
        }
        else
        if (flavor == THREAD_IDENTIFIER_INFO) {
-           register thread_identifier_info_t   identifier_info;
+               register thread_identifier_info_t       identifier_info;
 
-           if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
+               if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
                        return (KERN_INVALID_ARGUMENT);
 
-           identifier_info = (thread_identifier_info_t) thread_info_out;
+               identifier_info = (thread_identifier_info_t) thread_info_out;
 
-           s = splsched();
-           thread_lock(thread);
+               s = splsched();
+               thread_lock(thread);
 
-           identifier_info->thread_id = thread->thread_id;
-           identifier_info->thread_handle = thread->machine.cthread_self;
-           if(thread->task->bsd_info) {
-               identifier_info->dispatch_qaddr =  identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
-           } else {
-                   thread_unlock(thread);
-                   splx(s);
-                   return KERN_INVALID_ARGUMENT;
-           }
+               identifier_info->thread_id = thread->thread_id;
+               identifier_info->thread_handle = thread->machine.cthread_self;
+               identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
 
-           thread_unlock(thread);
-           splx(s);
-           return KERN_SUCCESS;
+               thread_unlock(thread);
+               splx(s);
+               return KERN_SUCCESS;
        }
        else
        if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
@@ -1347,23 +1432,22 @@ thread_info_internal(
 
                ts_info = (policy_timeshare_info_t)thread_info_out;
 
-           s = splsched();
+               s = splsched();
                thread_lock(thread);
 
-           if (thread->sched_mode != TH_MODE_TIMESHARE) {
-               thread_unlock(thread);
+               if (thread->sched_mode != TH_MODE_TIMESHARE) {
+                       thread_unlock(thread);
                        splx(s);
-
                        return (KERN_INVALID_POLICY);
-           }
+               }
 
                ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
                if (ts_info->depressed) {
                        ts_info->base_priority = DEPRESSPRI;
-                       ts_info->depress_priority = thread->priority;
+                       ts_info->depress_priority = thread->base_pri;
                }
                else {
-                       ts_info->base_priority = thread->priority;
+                       ts_info->base_priority = thread->base_pri;
                        ts_info->depress_priority = -1;
                }
 
@@ -1371,11 +1455,11 @@ thread_info_internal(
                ts_info->max_priority = thread->max_priority;
 
                thread_unlock(thread);
-           splx(s);
+               splx(s);
 
                *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
 
-               return (KERN_SUCCESS);  
+               return (KERN_SUCCESS);
        }
        else
        if (flavor == THREAD_SCHED_FIFO_INFO) {
@@ -1389,17 +1473,17 @@ thread_info_internal(
                policy_rr_info_t                        rr_info;
                uint32_t quantum_time;
                uint64_t quantum_ns;
-               
+
                if (*thread_info_count < POLICY_RR_INFO_COUNT)
                        return (KERN_INVALID_ARGUMENT);
 
                rr_info = (policy_rr_info_t) thread_info_out;
 
-           s = splsched();
+               s = splsched();
                thread_lock(thread);
 
-           if (thread->sched_mode == TH_MODE_TIMESHARE) {
-               thread_unlock(thread);
+               if (thread->sched_mode == TH_MODE_TIMESHARE) {
+                       thread_unlock(thread);
                        splx(s);
 
                        return (KERN_INVALID_POLICY);
@@ -1408,25 +1492,80 @@ thread_info_internal(
                rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
                if (rr_info->depressed) {
                        rr_info->base_priority = DEPRESSPRI;
-                       rr_info->depress_priority = thread->priority;
+                       rr_info->depress_priority = thread->base_pri;
                }
                else {
-                       rr_info->base_priority = thread->priority;
+                       rr_info->base_priority = thread->base_pri;
                        rr_info->depress_priority = -1;
                }
 
                quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
                absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
-               
+
                rr_info->max_priority = thread->max_priority;
-           rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
+               rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
 
                thread_unlock(thread);
-           splx(s);
+               splx(s);
 
                *thread_info_count = POLICY_RR_INFO_COUNT;
 
-               return (KERN_SUCCESS);  
+               return (KERN_SUCCESS);
+       }
+       else
+       if (flavor == THREAD_EXTENDED_INFO) {
+               thread_basic_info_data_t        basic_info;
+               thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
+
+               if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
+                       return (KERN_INVALID_ARGUMENT);
+               }
+
+               s = splsched();
+               thread_lock(thread);
+
+               /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
+                * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
+                */
+               retrieve_thread_basic_info(thread, &basic_info);
+               extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
+               extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
+
+               extended_info->pth_cpu_usage = basic_info.cpu_usage;
+               extended_info->pth_policy = basic_info.policy;
+               extended_info->pth_run_state = basic_info.run_state;
+               extended_info->pth_flags = basic_info.flags;
+               extended_info->pth_sleep_time = basic_info.sleep_time;
+               extended_info->pth_curpri = thread->sched_pri;
+               extended_info->pth_priority = thread->base_pri;
+               extended_info->pth_maxpriority = thread->max_priority;
+
+               bsd_getthreadname(thread->uthread,extended_info->pth_name);
+
+               thread_unlock(thread);
+               splx(s);
+
+               *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
+
+               return (KERN_SUCCESS);
+       }
+       else
+       if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
+#if DEVELOPMENT || DEBUG
+               thread_debug_info_internal_t dbg_info;
+               if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
+                       return (KERN_NOT_SUPPORTED);
+
+               if (thread_info_out == NULL)
+                       return (KERN_INVALID_ARGUMENT);
+
+               dbg_info = (thread_debug_info_internal_t) thread_info_out;
+               dbg_info->page_creation_count = thread->t_page_creation_count;
+
+               *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
+               return (KERN_SUCCESS);
+#endif /* DEVELOPMENT || DEBUG */
+               return (KERN_NOT_SUPPORTED);
        }
 
        return (KERN_INVALID_ARGUMENT);
@@ -1669,7 +1808,7 @@ THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void)
        task_t           task                           = current_task();
        thread_t     thread             = current_thread();
        uint64_t     tid                = thread->thread_id;
-       char         *procname          = (char *) "unknown";
+       const char       *procname          = "unknown";
        time_value_t thread_total_time  = {0, 0};
        time_value_t thread_system_time;
        time_value_t thread_user_time;
@@ -2046,8 +2185,10 @@ thread_dispatchqaddr(
 
        if (thread != THREAD_NULL) {
                thread_handle = thread->machine.cthread_self;
-
-               if (thread->task->bsd_info)
+               
+                if (thread->inspection == TRUE)
+                       dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
+                else if (thread->task->bsd_info)
                        dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
        }
 
@@ -2384,6 +2525,15 @@ int64_t dtrace_get_thread_vtime(thread_t thread)
                return 0;
 }
 
+int dtrace_get_thread_last_cpu_id(thread_t thread)
+{
+       if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
+               return thread->last_processor->cpu_id;
+       } else {
+               return -1;
+       }
+}
+
 int64_t dtrace_get_thread_tracing(thread_t thread)
 {
        if (thread != THREAD_NULL)
index 0b5061a33a19a1536d4ece308967f3939c794055..07fc07fd3ab06827a903f9d6a56ce2347d6f16fb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #include <mach/port.h>
 #include <kern/cpu_number.h>
+#include <kern/smp.h>
 #include <kern/queue.h>
 #include <kern/timer.h>
 #include <kern/simple_lock.h>
 #include <kern/exception.h>
 #include <kern/affinity.h>
 
+#include <kern/waitq.h>
+
 #include <ipc/ipc_kmsg.h>
 
 #include <machine/cpu_data.h>
@@ -134,19 +137,19 @@ struct thread {
         *      transition from PROCESSOR_NULL to non-null must be done
         *      under the thread lock and the run queue lock.
         *
-        *      When the thread is on a wait queue, these first three fields
-        *      are treated as an unofficial union with a wait_queue_element.
-        *      If you change these, you must change that definition as well
-        *      (kern/wait_queue.h).
+        *      New waitq APIs allow the 'links' and 'runq' fields to be
+        *      anywhere in the thread structure.
         */
        /* Items examined often, modified infrequently */
        queue_chain_t   links;                          /* run/wait queue links */
        processor_t             runq;                           /* run queue assignment */
-       wait_queue_t    wait_queue;                     /* wait queue we are currently on */
        event64_t               wait_event;                     /* wait queue event */
+       struct waitq    *waitq;
        /* Data updated during assert_wait/thread_wakeup */
+#if __SMP__
        decl_simple_lock_data(,sched_lock)      /* scheduling lock (thread_lock()) */
        decl_simple_lock_data(,wake_lock)       /* for thread stop / wait (wake_lock()) */
+#endif
        integer_t               options;                        /* options set by thread itself */
 #define TH_OPT_INTMASK         0x0003          /* interrupt / abort level */
 #define TH_OPT_VMPRIV          0x0004          /* may allocate reserved memory */
@@ -154,16 +157,20 @@ struct thread {
 #define TH_OPT_SYSTEM_CRITICAL 0x0010          /* Thread must always be allowed to run - even under heavy load */
 #define TH_OPT_PROC_CPULIMIT   0x0020          /* Thread has a task-wide CPU limit applied to it */
 #define TH_OPT_PRVT_CPULIMIT   0x0040          /* Thread has a thread-private CPU limit applied to it */
-#define TH_OPT_IDLE_THREAD             0x0080          /* Thread is a per-processor idle thread */
+#define TH_OPT_IDLE_THREAD     0x0080          /* Thread is a per-processor idle thread */
+#define TH_OPT_GLOBAL_FORCED_IDLE      0x0100  /* Thread performs forced idle for thermal control */
+#define TH_OPT_SCHED_VM_GROUP  0x0200          /* Thread belongs to special scheduler VM group */
+#define TH_OPT_HONOR_QLIMIT    0x0400          /* Thread will honor qlimit while sending mach_msg, regardless of MACH_SEND_ALWAYS */
 
        boolean_t                       wake_active;    /* wake event on stop */
        int                                     at_safe_point;  /* thread_abort_safely allowed */
        ast_t                           reason;                 /* why we blocked */
+       uint32_t                        quantum_remaining;
+       wait_result_t                   wait_result;    /* outcome of wait -
+                                                        * may be examined by this thread
+                                                        * WITHOUT locking */
        thread_continue_t       continuation;   /* continue here next dispatch */
        void                            *parameter;             /* continuation parameter */
-       wait_result_t           wait_result;    /* outcome of wait -
-                                                                                * may be examined by this thread
-                                                                                * WITHOUT locking */
 
        /* Data updated/used in thread_invoke */
        vm_offset_t             kernel_stack;           /* current kernel stack */
@@ -178,8 +185,8 @@ struct thread {
 #define TH_SUSP                        0x02                    /* stopped or requested to stop */
 #define TH_RUN                 0x04                    /* running or on runq */
 #define TH_UNINT               0x08                    /* waiting uninteruptibly */
-#define        TH_TERMINATE    0x10                    /* halted at termination */
-#define        TH_TERMINATE2   0x20                    /* added to termination queue */
+#define TH_TERMINATE           0x10                    /* halted at termination */
+#define TH_TERMINATE2          0x20                    /* added to termination queue */
 
 #define TH_IDLE                        0x80                    /* idling processor */
 
@@ -191,10 +198,10 @@ struct thread {
        sfi_class_id_t                  sfi_wait_class; /* Currently in SFI wait for this class, protected by sfi_lock */
        
        uint32_t                        sched_flags;            /* current flag bits */
-#define TH_SFLAG_FAIRSHARE_TRIPPED     0x0001          /* fairshare scheduling activated */
+/* TH_SFLAG_FAIRSHARE_TRIPPED (unused) 0x0001 */
 #define TH_SFLAG_FAILSAFE              0x0002          /* fail-safe has tripped */
 #define TH_SFLAG_THROTTLED             0x0004          /* thread treated as background for scheduler decay purposes */
-#define TH_SFLAG_DEMOTED_MASK      (TH_SFLAG_THROTTLE_DEMOTED | TH_SFLAG_FAILSAFE | TH_SFLAG_FAIRSHARE_TRIPPED)        /* saved_mode contains previous sched_mode */
+#define TH_SFLAG_DEMOTED_MASK      (TH_SFLAG_THROTTLE_DEMOTED | TH_SFLAG_FAILSAFE)     /* saved_mode contains previous sched_mode */
 
 #define        TH_SFLAG_PROMOTED               0x0008          /* sched pri has been promoted */
 #define TH_SFLAG_ABORT                 0x0010          /* abort interruptible waits */
@@ -206,15 +213,17 @@ struct thread {
 #define TH_SFLAG_PRI_UPDATE            0x0100          /* Updating priority */
 #define TH_SFLAG_EAGERPREEMPT          0x0200          /* Any preemption of this thread should be treated as if AST_URGENT applied */
 #define TH_SFLAG_RW_PROMOTED           0x0400          /* sched pri has been promoted due to blocking with RW lock held */
-#define TH_SFLAG_PROMOTED_MASK         (TH_SFLAG_PROMOTED | TH_SFLAG_RW_PROMOTED)
 #define TH_SFLAG_THROTTLE_DEMOTED      0x0800          /* throttled thread forced to timeshare mode (may be applied in addition to failsafe) */
+#define TH_SFLAG_WAITQ_PROMOTED                0x1000          /* sched pri promoted from waitq wakeup (generally for IPC receive) */
+#define TH_SFLAG_PROMOTED_MASK         (TH_SFLAG_PROMOTED | TH_SFLAG_RW_PROMOTED | TH_SFLAG_WAITQ_PROMOTED)
 
 #define TH_SFLAG_RW_PROMOTED_BIT       (10)    /* 0x400 */
 
-       int16_t                         sched_pri;                      /* scheduled (current) priority */
-       int16_t                         priority;                       /* base priority */
-       int16_t                         max_priority;           /* copy of max base priority */
-       int16_t                         task_priority;          /* copy of task base priority */
+       int16_t                         sched_pri;              /* scheduled (current) priority */
+       int16_t                         base_pri;               /* base priority */
+       int16_t                         max_priority;           /* copy of max base priority */
+       int16_t                         task_priority;          /* copy of task base priority */
+
 #if defined(CONFIG_SCHED_GRRR)
 #if 0
        uint16_t                        grrr_deficit;           /* fixed point (1/1000th quantum) fractional deficit */
@@ -233,6 +242,7 @@ struct thread {
 #endif /* MACH_ASSERT */
 
        integer_t                       importance;                     /* task-relative importance */
+       uint32_t                        was_promoted_on_wakeup;
 
        /* Priority depression expiration */
        integer_t                       depress_timer_active;
@@ -246,9 +256,8 @@ struct thread {
                uint64_t                        deadline;
        }                                       realtime;
 
-       uint32_t                        was_promoted_on_wakeup;
        uint64_t                        last_run_time;          /* time when thread was switched away from */
-       uint32_t                        quantum_remaining;                      /* duration of current quantum remaining */
+       uint64_t                        last_made_runnable_time;        /* time when thread was unblocked or preempted */
 
 #if defined(CONFIG_SCHED_MULTIQ)
        sched_group_t                   sched_group;
@@ -286,6 +295,7 @@ struct thread {
        uint32_t                        p_switch;               /* total processor switches */
        uint32_t                        ps_switch;              /* total pset switches */
 
+       integer_t mutex_count;  /* total count of locks held */
        /* Timing data structures */
        int                                     precise_user_kernel_time; /* precise user/kernel enabled for this thread */
        timer_data_t            user_timer;                     /* user mode timer */
@@ -295,8 +305,10 @@ struct thread {
        uint64_t                        vtimer_prof_save;
        uint64_t                        vtimer_rlim_save;
 
+#if CONFIG_SCHED_SFI
        /* Timing for wait state */
        uint64_t                wait_sfi_begin_time;    /* start time for thread waiting in SFI */
+#endif
 
        /* Timed wait expiration */
        timer_call_data_t       wait_timer;
@@ -320,7 +332,6 @@ struct thread {
                        mach_vm_address_t       msg_addr;       /* receive buffer pointer */
                        mach_msg_size_t         msize;          /* max size for recvd msg */
                        mach_msg_option_t       option;         /* options for receive */
-                       mach_msg_size_t         slist_size;     /* scatter list size */
                        mach_port_name_t        receiver_name;  /* the receive port name */
                        struct ipc_kmsg         *kmsg;          /* received message */
                        mach_msg_continue_t     continuation;
@@ -346,6 +357,10 @@ struct thread {
                mach_exception_data_type_t              subcode;        /* Exception sub-code */
        } guard_exc_info;
 
+       /* Kernel holds on this thread  */
+       int16_t                                         suspend_count;
+       /* User level suspensions */
+       int16_t                                         user_stop_count;
 
        /* IPC data structures */
 #if IMPORTANCE_INHERITANCE
@@ -362,9 +377,6 @@ struct thread {
        /* Activation */
                queue_chain_t                   task_threads;
 
-               /*** Machine-dependent state ***/
-               struct machine_thread   machine;
-
                /* Task membership */
                struct task                             *task;
                vm_map_t                                map;
@@ -380,17 +392,10 @@ struct thread {
                        active:1,                               /* Thread is active and has not been terminated */
                        started:1,                              /* Thread has been started after creation */
                        static_param:1,                 /* Disallow policy parameter changes */
+                       inspection:1,                           /* TRUE when task is being inspected by crash reporter */
                        policy_reset:1,                 /* Disallow policy parameter changes on terminating threads */
                        :0;
-
-               /* Return Handers */
-               struct ReturnHandler {
-                       struct ReturnHandler    *next;
-                       void            (*handler)(
-                                                       struct ReturnHandler            *rh,
-                                                       struct thread                           *thread);
-               } *handlers, special_handler;
-
+       
                /* Ports associated with this thread */
                struct ipc_port                 *ith_self;              /* not a right, doesn't hold ref */
                struct ipc_port                 *ith_sself;             /* a send right */
@@ -410,11 +415,10 @@ struct thread {
 
                clock_sec_t t_page_creation_time;
                uint32_t    t_page_creation_count;
-
-       uint32_t    t_page_creation_throttled;
+               uint32_t    t_page_creation_throttled;
 #if (DEVELOPMENT || DEBUG)
-       uint64_t    t_page_creation_throttled_hard;
-       uint64_t    t_page_creation_throttled_soft;
+               uint64_t    t_page_creation_throttled_hard;
+               uint64_t    t_page_creation_throttled_soft;
 #endif /* DEVELOPMENT || DEBUG */
 
 #define T_CHUD_MARKED           0x01          /* this thread is marked by CHUD */
@@ -433,8 +437,6 @@ struct thread {
                uint32_t t_chud;        /* CHUD flags, used for Shark */
                uint32_t chud_c_switch; /* last dispatch detection */
 
-               integer_t mutex_count;  /* total count of locks held */
-
 #ifdef KPC
        /* accumulated performance counters for this thread */
        uint64_t *kpc_buf;
@@ -478,11 +480,10 @@ struct thread {
        } *overrides;
 
        int     iotier_override; /* atomic operations to set, cleared on ret to user */
+       integer_t               saved_importance;               /* saved task-relative importance */
        io_stat_info_t                  thread_io_stats; /* per-thread I/O statistics */
 
 
-       integer_t               saved_importance;               /* saved task-relative importance */
-
        uint32_t                        thread_callout_interrupt_wakeups;
        uint32_t                        thread_callout_platform_idle_wakeups;
        uint32_t                        thread_timer_wakeups_bin_1;
@@ -492,16 +493,18 @@ struct thread {
                                        callout_woken_from_platform_idle:1,
                                        callout_woke_thread:1,
                                        thread_bitfield_unused:13;
-       /* Kernel holds on this thread  */
-       int16_t                                         suspend_count;
-       /* User level suspensions */
-       int16_t                                         user_stop_count;
 
        mach_port_name_t                ith_voucher_name;
        ipc_voucher_t                   ith_voucher;
 #if CONFIG_IOSCHED
        void                            *decmp_upl;
 #endif /* CONFIG_IOSCHED */
+
+       /* work interval ID (if any) associated with the thread. Uses thread mutex */
+       uint64_t                work_interval_id;
+
+       /*** Machine-dependent state ***/
+       struct machine_thread   machine;
 };
 
 #define ith_state              saved.receive.state
@@ -509,7 +512,6 @@ struct thread {
 #define ith_msg_addr                   saved.receive.msg_addr
 #define ith_msize              saved.receive.msize
 #define        ith_option              saved.receive.option
-#define ith_scatter_list_size  saved.receive.slist_size
 #define ith_receiver_name      saved.receive.receiver_name
 #define ith_continuation       saved.receive.continuation
 #define ith_kmsg               saved.receive.kmsg
@@ -530,9 +532,6 @@ extern void                 thread_daemon_init(void);
 #define        thread_reference_internal(thread)       \
                        (void)hw_atomic_add(&(thread)->ref_count, 1)
 
-#define thread_deallocate_internal(thread)     \
-                       hw_atomic_sub(&(thread)->ref_count, 1)
-
 #define thread_reference(thread)                                       \
 MACRO_BEGIN                                                                                    \
        if ((thread) != THREAD_NULL)                                    \
@@ -542,6 +541,9 @@ MACRO_END
 extern void                    thread_deallocate(
                                                thread_t                thread);
 
+extern void                    thread_deallocate_safe(
+                                               thread_t                thread);
+
 extern void                    thread_terminate_self(void);
 
 extern kern_return_t   thread_terminate_internal(
@@ -553,6 +555,8 @@ extern void                 thread_start_internal(
 extern void                    thread_terminate_enqueue(
                                                thread_t                thread);
 
+extern void                    thread_terminate_crashed_threads(void);
+
 extern void                    thread_stack_enqueue(
                                                thread_t                thread);
 
@@ -562,7 +566,8 @@ extern void                 thread_hold(
 extern void                    thread_release(
                                                thread_t        thread);
 
-
+/* Locking for scheduler state, always acquired with interrupts disabled (splsched()) */
+#if __SMP__
 #define        thread_lock_init(th)    simple_lock_init(&(th)->sched_lock, 0)
 #define thread_lock(th)                        simple_lock(&(th)->sched_lock)
 #define thread_unlock(th)              simple_unlock(&(th)->sched_lock)
@@ -570,6 +575,15 @@ extern void                        thread_release(
 #define wake_lock_init(th)             simple_lock_init(&(th)->wake_lock, 0)
 #define wake_lock(th)                  simple_lock(&(th)->wake_lock)
 #define wake_unlock(th)                        simple_unlock(&(th)->wake_lock)
+#else
+#define thread_lock_init(th)   do { (void)th; } while(0)
+#define thread_lock(th)                        do { (void)th; } while(0)
+#define thread_unlock(th)              do { (void)th; } while(0)
+
+#define wake_lock_init(th)             do { (void)th; } while(0)
+#define wake_lock(th)                  do { (void)th; } while(0)
+#define wake_unlock(th)                        do { (void)th; } while(0)
+#endif
 
 #define thread_should_halt_fast(thread)                (!(thread)->active)
 
@@ -647,6 +661,9 @@ extern void                         machine_load_context(
 extern kern_return_t   machine_thread_state_initialize(
                                                        thread_t                                thread);
 
+extern kern_return_t   machine_thread_neon_state_initialize(
+                                                       thread_t                                thread);
+
 extern kern_return_t   machine_thread_set_state(
                                                        thread_t                                thread,
                                                        thread_flavor_t                 flavor,
@@ -691,19 +708,14 @@ extern kern_return_t      machine_thread_set_tsd_base(
                                                        thread_t                                thread,
                                                        mach_vm_offset_t                tsd_base);
 
-typedef struct ReturnHandler           ReturnHandler;
-
 #define        thread_mtx_lock(thread)                 lck_mtx_lock(&(thread)->mutex)
 #define        thread_mtx_try(thread)                  lck_mtx_try_lock(&(thread)->mutex)
 #define        thread_mtx_unlock(thread)               lck_mtx_unlock(&(thread)->mutex)
 
-extern void                    act_execute_returnhandlers(void);
-
 extern void                    install_special_handler(
                                                thread_t                thread);
 
 extern void                    special_handler(
-                                               ReturnHandler   *rh,
                                                thread_t                thread);
 
 extern void
@@ -731,6 +743,8 @@ typedef struct {
        uint32_t        qos_latency_qos[THREAD_QOS_LAST];
 } qos_policy_params_t;
 
+extern void thread_set_options(uint32_t thopt);
+
 #else  /* MACH_KERNEL_PRIVATE */
 
 __BEGIN_DECLS
@@ -781,6 +795,17 @@ __BEGIN_DECLS
 uint16_t       thread_set_tag(thread_t, uint16_t);
 uint16_t       thread_get_tag(thread_t);
 
+/*
+ * Allocate/assign a single work interval ID for a thread,
+ * and support deallocating it.
+ */
+extern kern_return_t                   thread_policy_create_work_interval(
+       thread_t                thread,
+       uint64_t                *work_interval_id);
+
+extern kern_return_t                   thread_policy_destroy_work_interval(
+       thread_t                thread,
+       uint64_t                work_interval_id);
 
 extern kern_return_t    thread_state_initialize(
                                                        thread_t                                thread);
@@ -797,6 +822,11 @@ extern kern_return_t       thread_getstatus(
                                                        thread_state_t                  tstate,
                                                        mach_msg_type_number_t  *count);
 
+extern kern_return_t   thread_create_with_continuation(
+                                                       task_t task,
+                                                       thread_t *new_thread,
+                                                       thread_continue_t continuation);
+
 extern kern_return_t   thread_create_workq(
                                                        task_t                  task,
                                                        thread_continue_t       thread_return,
@@ -916,9 +946,16 @@ extern task_t      get_threadtask(thread_t);
 extern void            *get_bsdthread_info(thread_t);
 extern void            set_bsdthread_info(thread_t, void *);
 extern void            *uthread_alloc(task_t, thread_t, int);
-extern void            uthread_cleanup(task_t, void *, void *); 
+extern void            uthread_cleanup_name(void *uthread);
+extern void            uthread_cleanup(task_t, void *, void *, boolean_t);
 extern void            uthread_zone_free(void *); 
-extern void            uthread_cred_free(void *); 
+extern void            uthread_cred_free(void *);
+
+#if PROC_REF_DEBUG
+extern int             uthread_get_proc_refcount(void *);
+extern void            uthread_reset_proc_refcount(void *);
+extern int             proc_ref_tracking_disabled;
+#endif
 
 extern boolean_t       thread_should_halt(
                                                thread_t                thread);
@@ -928,13 +965,14 @@ extern boolean_t  thread_should_abort(
 
 extern int is_64signalregset(void);
 
-void act_set_apc(thread_t);
-void act_set_kperf(thread_t);
+extern void act_set_kperf(thread_t);
+extern void set_astledger(thread_t thread);
 
 extern uint32_t dtrace_get_thread_predcache(thread_t);
 extern int64_t dtrace_get_thread_vtime(thread_t);
 extern int64_t dtrace_get_thread_tracing(thread_t);
 extern boolean_t dtrace_get_thread_reentering(thread_t);
+extern int dtrace_get_thread_last_cpu_id(thread_t);
 extern vm_offset_t dtrace_get_kernel_stack(thread_t);
 extern void dtrace_set_thread_predcache(thread_t, uint32_t);
 extern void dtrace_set_thread_vtime(thread_t, int64_t);
index 86041cd8053b281d1ada8bef7e9bf2642faf9eef..6c1b2e37959439466f37e3f0e791eeb8df68fc01 100644 (file)
@@ -86,7 +86,7 @@ void                  special_handler_continue(void);
 
 /*
  * Internal routine to mark a thread as started.
- * Always called with the thread locked.
+ * Always called with the thread mutex locked.
  *
  * Note: function intentionally declared with the noinline attribute to
  * prevent multiple declaration of probe symbols in this file; we would
@@ -391,7 +391,7 @@ thread_abort_safely(
 
 kern_return_t
 thread_info(
-       thread_t                                thread,
+       thread_t                        thread,
        thread_flavor_t                 flavor,
        thread_info_t                   thread_info_out,
        mach_msg_type_number_t  *thread_info_count)
@@ -403,7 +403,7 @@ thread_info(
 
        thread_mtx_lock(thread);
 
-       if (thread->active)
+       if (thread->active || thread->inspection)
                result = thread_info_internal(
                                                thread, flavor, thread_info_out, thread_info_count);
        else
@@ -451,6 +451,11 @@ thread_get_state(
                        result = machine_thread_get_state(
                                                                        thread, flavor, state, state_count);
        }
+       else if (thread->inspection)
+       {
+               result = machine_thread_get_state(
+                                                                       thread, flavor, state, state_count);
+       }
        else
                result = KERN_TERMINATED;
 
@@ -736,23 +741,14 @@ void
 install_special_handler_locked(
        thread_t                                thread)
 {
-       ReturnHandler   **rh;
-
-       /* The work handler must always be the last ReturnHandler on the list,
-          because it can do tricky things like detach the thr_act.  */
-       for (rh = &thread->handlers; *rh; rh = &(*rh)->next)
-               continue;
-
-       if (rh != &thread->special_handler.next)
-               *rh = &thread->special_handler;
-
+       
        /*
         * Temporarily undepress, so target has
         * a chance to do locking required to
         * block itself in special_handler().
         */
        if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)
-               SCHED(compute_priority)(thread, TRUE);
+               thread_recompute_sched_pri(thread, TRUE);
 
        thread_ast_set(thread, AST_APC);
 
@@ -770,46 +766,9 @@ install_special_handler_locked(
 
 /*
  * Activation control support routines internal to this file:
+ *
  */
 
-void
-act_execute_returnhandlers(void)
-{
-       thread_t        thread = current_thread();
-
-       thread_ast_clear(thread, AST_APC);
-       spllo();
-
-       for (;;) {
-               ReturnHandler   *rh;
-
-               thread_mtx_lock(thread);
-
-               (void)splsched();
-               thread_lock(thread);
-
-               rh = thread->handlers;
-               if (rh != NULL) {
-                       thread->handlers = rh->next;
-
-                       thread_unlock(thread);
-                       spllo();
-
-                       thread_mtx_unlock(thread);
-
-                       /* Execute it */
-                       (*rh->handler)(rh, thread);
-               }
-               else
-                       break;
-       }
-
-       thread_unlock(thread);
-       spllo();
-
-       thread_mtx_unlock(thread);
-}
-
 /*
  * special_handler_continue
  *
@@ -854,7 +813,6 @@ special_handler_continue(void)
  */
 void
 special_handler(
-       __unused ReturnHandler  *rh,
        thread_t                                thread)
 {
        spl_t           s;
@@ -872,16 +830,9 @@ special_handler(
         */
        if (thread->active) {
                if (thread->suspend_count > 0) {
-                       if (thread->handlers == NULL) {
-                               assert_wait(&thread->suspend_count, THREAD_ABORTSAFE);
-                               thread_mtx_unlock(thread);
-                               thread_block((thread_continue_t)special_handler_continue);
-                               /*NOTREACHED*/
-                       }
-
+                       assert_wait(&thread->suspend_count, THREAD_ABORTSAFE);
                        thread_mtx_unlock(thread);
-
-                       special_handler_continue();
+                       thread_block((thread_continue_t)special_handler_continue);
                        /*NOTREACHED*/
                }
        }
@@ -946,28 +897,27 @@ act_get_state(
 
 static void
 act_set_ast(
-           thread_t    thread,
+           thread_t thread,
            ast_t ast)
 {
-       spl_t           s = splsched();
-       
+       spl_t s = splsched();
+
        if (thread == current_thread()) {
                thread_ast_set(thread, ast);
                ast_propagate(thread->ast);
-       }
-       else {
-               processor_t             processor;
+       } else {
+               processor_t processor;
 
                thread_lock(thread);
                thread_ast_set(thread, ast);
                processor = thread->last_processor;
                if ( processor != PROCESSOR_NULL            &&
                     processor->state == PROCESSOR_RUNNING  &&
-                    processor->active_thread == thread      )
+                    processor->active_thread == thread     )
                        cause_ast_check(processor);
                thread_unlock(thread);
        }
-       
+
        splx(s);
 }
 
@@ -978,13 +928,6 @@ act_set_astbsd(
        act_set_ast( thread, AST_BSD );
 }
 
-void
-act_set_apc(
-       thread_t        thread)
-{
-       act_set_ast( thread, AST_APC );
-}
-
 void
 act_set_kperf(
        thread_t        thread)
@@ -1005,3 +948,11 @@ act_set_astmacf(
        act_set_ast( thread, AST_MACF);
 }
 #endif
+
+void
+set_astledger(thread_t thread)
+{
+       act_set_ast(thread, AST_LEDGER);
+}
+
+
index 99ffcc9b5cb21d5633ac7c48bbb6ce05a3c1bde4..82339e20864c66fca9c5bf76323955eaa9f365e5 100644 (file)
@@ -35,7 +35,7 @@
 #include <kern/clock.h>
 #include <kern/task.h>
 #include <kern/thread.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/ledger.h>
 
 #include <vm/vm_pageout.h>
@@ -54,7 +54,7 @@
 #include <machine/machine_routines.h>
 
 static zone_t                  thread_call_zone;
-static struct wait_queue       daemon_wqueue;
+static struct waitq            daemon_waitq;
 
 struct thread_call_group {
        queue_head_t            pending_queue;
@@ -66,7 +66,7 @@ struct thread_call_group {
        timer_call_data_t       delayed_timer;
        timer_call_data_t       dealloc_timer;
 
-       struct wait_queue       idle_wqueue;
+       struct waitq            idle_waitq;
        uint32_t                idle_count, active_count;
 
        integer_t               pri;
@@ -253,7 +253,7 @@ thread_call_group_setup(
        timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
        timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 
-       wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
+       waitq_init(&group->idle_waitq, SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
 
        group->target_thread_count = target_thread_count;
        group->pri = thread_call_priority_to_sched_pri(pri);
@@ -326,7 +326,7 @@ thread_call_initialize(void)
 #endif
 
        nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
-       wait_queue_init(&daemon_wqueue, SYNC_POLICY_FIFO);
+       waitq_init(&daemon_waitq, SYNC_POLICY_FIFO);
 
        thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_LOW], THREAD_CALL_PRIORITY_LOW, 0, TRUE);
        thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_USER], THREAD_CALL_PRIORITY_USER, 0, TRUE);
@@ -1053,7 +1053,8 @@ thread_call_wake(
         * Traditional behavior: wake only if no threads running.
         */
        if (group_isparallel(group) || group->active_count == 0) {
-               if (wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
+               if (waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
+                                      THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) {
                        group->idle_count--; group->active_count++;
 
                        if (group->idle_count == 0) {
@@ -1063,7 +1064,8 @@ thread_call_wake(
                } else {
                        if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
                                thread_call_daemon_awake = TRUE;
-                               wait_queue_wakeup_one(&daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
+                               waitq_wakeup64_one(&daemon_waitq, NO_EVENT64,
+                                                  THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
                        }
                }
        }
@@ -1202,9 +1204,11 @@ thread_call_thread(
 
                enable_ints_and_unlock(s);
 
+#if DEVELOPMENT || DEBUG
                KERNEL_DEBUG_CONSTANT(
                                MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
                                VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
+#endif /* DEVELOPMENT || DEBUG */
 
 #if CONFIG_DTRACE
                DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call);
@@ -1264,7 +1268,7 @@ thread_call_thread(
                }   
 
                /* Wait for more work (or termination) */
-               wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0); 
+               wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
                if (wres != THREAD_WAITING) {
                        panic("kcall worker unable to assert wait?");
                }   
@@ -1276,7 +1280,7 @@ thread_call_thread(
                if (group->idle_count < group->target_thread_count) {
                        group->idle_count++;
 
-                       wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */
+                       waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
 
                        enable_ints_and_unlock(s);
 
@@ -1331,7 +1335,7 @@ thread_call_daemon_continue(__unused void *arg)
 
 out:
        thread_call_daemon_awake = FALSE;
-       wait_queue_assert_wait(&daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
+       waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
 
        enable_ints_and_unlock(s);
 
@@ -1484,7 +1488,8 @@ thread_call_dealloc_timer(
                if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
                        terminated = TRUE;
                        group->idle_count--;
-                       res = wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTED, -1);
+                       res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
+                                                THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
                        if (res != KERN_SUCCESS) {
                                panic("Unable to wake up idle thread for termination?");
                        }
index ece343e196899d2eda15e768501728eba26f7cfc..9a82a198fbd3e5b358e4ceb85946e98c8dddcd42 100644 (file)
@@ -92,15 +92,8 @@ const qos_policy_params_t thread_qos_policy_params = {
        .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 };
 
-void
-thread_recompute_qos(thread_t thread);
-
 static void
-thread_recompute_priority(
-       thread_t                thread);
-
-static void
-thread_set_user_sched_mode(thread_t thread, sched_mode_t mode);
+thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 
 static int
 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
@@ -276,13 +269,7 @@ thread_policy_set_internal(
                s = splsched();
                thread_lock(thread);
 
-               boolean_t removed = thread_run_queue_remove(thread);
-
-               thread_set_user_sched_mode(thread, mode);
-               thread_recompute_priority(thread);
-
-               if (removed)
-                       thread_setrun(thread, SCHED_TAILQ);
+               thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 
                thread_unlock(thread);
                splx(s);
@@ -312,23 +299,17 @@ thread_policy_set_internal(
                s = splsched();
                thread_lock(thread);
 
-               boolean_t removed = thread_run_queue_remove(thread);
-
                thread->realtime.period = info->period;
                thread->realtime.computation = info->computation;
                thread->realtime.constraint = info->constraint;
                thread->realtime.preemptible = info->preemptible;
 
-               thread_set_user_sched_mode(thread, TH_MODE_REALTIME);
-               thread_recompute_priority(thread);
-
-               if (removed)
-                       thread_setrun(thread, SCHED_TAILQ);
+               thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 
                thread_unlock(thread);
                splx(s);
 
-               sfi_reevaluate(thread);         
+               sfi_reevaluate(thread);
 
                break;
        }
@@ -579,14 +560,7 @@ thread_set_mode_and_absolute_pri(
 
                thread->importance = priority - thread->task_priority;
 
-               boolean_t removed = thread_run_queue_remove(thread);
-
-               thread_set_user_sched_mode(thread, mode);
-
-               thread_recompute_priority(thread);
-
-               if (removed)
-                       thread_setrun(thread, SCHED_TAILQ);
+               thread_set_user_sched_mode_and_recompute_pri(thread, mode);
        }
 
        thread_unlock(thread);
@@ -598,15 +572,21 @@ thread_set_mode_and_absolute_pri(
 }
 
 /*
- * Set the thread's requested mode
+ * Set the thread's requested mode and recompute priority
  * Called with thread mutex and thread locked
+ *
+ * TODO: Mitigate potential problems caused by moving thread to end of runq
+ * whenever its priority is recomputed
+ *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
  */
 static void
-thread_set_user_sched_mode(thread_t thread, sched_mode_t mode)
+thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 {
        if (thread->policy_reset)
                return;
 
+       boolean_t removed = thread_run_queue_remove(thread);
+
        /*
         * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
         * That way there's zero confusion over which the user wants
@@ -616,6 +596,11 @@ thread_set_user_sched_mode(thread_t thread, sched_mode_t mode)
                thread->saved_mode = mode;
        else
                sched_set_thread_mode(thread, mode);
+
+       thread_recompute_priority(thread);
+
+       if (removed)
+               thread_run_queue_reinsert(thread, SCHED_TAILQ);
 }
 
 /* called with task lock locked */
@@ -778,7 +763,7 @@ out:
  *
  * Called with thread_lock and thread mutex held.
  */
-static void
+void
 thread_recompute_priority(
        thread_t                thread)
 {
@@ -819,6 +804,14 @@ thread_recompute_priority(
                priority += thread->task_priority;
        }
 
+       if (thread->saved_mode == TH_MODE_REALTIME &&
+           thread->sched_flags & TH_SFLAG_FAILSAFE)
+               priority = DEPRESSPRI;
+
+       if (thread->effective_policy.terminated == TRUE && priority < thread->task_priority) {
+               priority = thread->task_priority;
+       }
+
        if (priority > thread->max_priority)
                priority = thread->max_priority;
        else if (priority < MINPRI)
@@ -1176,3 +1169,42 @@ thread_policy_get(
 
        return (result);
 }
+
+static volatile uint64_t unique_work_interval_id = 1; /* Start at 1, 0 is not a valid work interval ID */
+
+kern_return_t
+thread_policy_create_work_interval(
+       thread_t                thread,
+       uint64_t                *work_interval_id)
+{
+       thread_mtx_lock(thread);
+       if (thread->work_interval_id) {
+               /* already assigned a work interval ID */
+               thread_mtx_unlock(thread);
+               return (KERN_INVALID_VALUE);
+       }
+
+       thread->work_interval_id = OSIncrementAtomic64((volatile int64_t *)&unique_work_interval_id);
+       *work_interval_id = thread->work_interval_id;
+
+       thread_mtx_unlock(thread);
+       return KERN_SUCCESS;
+}
+
+kern_return_t
+thread_policy_destroy_work_interval(
+       thread_t                thread,
+       uint64_t                work_interval_id)
+{
+       thread_mtx_lock(thread);
+       if (work_interval_id == 0 || thread->work_interval_id == 0 || thread->work_interval_id != work_interval_id) {
+               /* work ID isn't valid or doesn't match previously assigned work interval ID */
+               thread_mtx_unlock(thread);
+               return (KERN_INVALID_ARGUMENT);
+       }
+
+       thread->work_interval_id = 0;
+
+       thread_mtx_unlock(thread);
+       return KERN_SUCCESS;
+}
index 56497e013e59bc2f81b25febdcfaaea36455a01a..8c65ed2f89df8b011c37f865b92d7bc7f3814b4c 100644 (file)
@@ -32,6 +32,7 @@
 #include <mach/mach_types.h>
 
 #include <kern/clock.h>
+#include <kern/smp.h>
 #include <kern/processor.h>
 #include <kern/timer_call.h>
 #include <kern/timer_queue.h>
@@ -73,13 +74,17 @@ lck_grp_t               timer_longterm_lck_grp;
 lck_attr_t              timer_longterm_lck_attr;
 lck_grp_attr_t          timer_longterm_lck_grp_attr;
 
-
+/* Timer queue lock must be acquired with interrupts disabled (under splclock()) */
+#if __SMP__
 #define timer_queue_lock_spin(queue)                                   \
        lck_mtx_lock_spin_always(&queue->lock_data)
 
 #define timer_queue_unlock(queue)              \
        lck_mtx_unlock_always(&queue->lock_data)
-
+#else
+#define timer_queue_lock_spin(queue)   (void)1
+#define timer_queue_unlock(queue)              (void)1
+#endif
 
 #define QUEUE(x)       ((queue_t)(x))
 #define MPQUEUE(x)     ((mpqueue_head_t *)(x))
diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c
deleted file mode 100644 (file)
index 1c99877..0000000
+++ /dev/null
@@ -1,2172 +0,0 @@
-/*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *     File:   wait_queue.c (adapted from sched_prim.c)
- *     Author: Avadis Tevanian, Jr.
- *     Date:   1986
- *
- *     Primitives for manipulating wait queues: either global
- *     ones from sched_prim.c, or private ones associated with
- *     particular structures(pots, semaphores, etc..).
- */
-
-#include <kern/kern_types.h>
-#include <kern/simple_lock.h>
-#include <kern/zalloc.h>
-#include <kern/queue.h>
-#include <kern/spl.h>
-#include <mach/sync_policy.h>
-#include <kern/mach_param.h>
-#include <kern/sched_prim.h>
-
-#include <kern/wait_queue.h>
-#include <vm/vm_kern.h>
-
-/* forward declarations */
-static boolean_t wait_queue_member_locked(
-                       wait_queue_t            wq,
-                       wait_queue_set_t        wq_set);
-
-static void wait_queues_init(void);
-
-#define WAIT_QUEUE_MAX thread_max
-#define WAIT_QUEUE_SET_MAX task_max * 3
-#define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64
-
-static zone_t _wait_queue_link_zone;
-static zone_t _wait_queue_set_zone;
-static zone_t _wait_queue_zone;
-
-/* see rdar://6737748&5561610; we need an unshadowed
- * definition of a WaitQueueLink for debugging,
- * but it needs to be used somewhere to wind up in
- * the dSYM file. */
-volatile WaitQueueLink *unused_except_for_debugging;
-
-
-/*
- *     Waiting protocols and implementation:
- *
- *     Each thread may be waiting for exactly one event; this event
- *     is set using assert_wait().  That thread may be awakened either
- *     by performing a thread_wakeup_prim() on its event,
- *     or by directly waking that thread up with clear_wait().
- *
- *     The implementation of wait events uses a hash table.  Each
- *     bucket is queue of threads having the same hash function
- *     value; the chain for the queue (linked list) is the run queue
- *     field.  [It is not possible to be waiting and runnable at the
- *     same time.]
- *
- *     Locks on both the thread and on the hash buckets govern the
- *     wait event field and the queue chain field.  Because wakeup
- *     operations only have the event as an argument, the event hash
- *     bucket must be locked before any thread.
- *
- *     Scheduling operations may also occur at interrupt level; therefore,
- *     interrupts below splsched() must be prevented when holding
- *     thread or hash bucket locks.
- *
- *     The wait event hash table declarations are as follows:
- */
-
-struct wait_queue boot_wait_queue[1];
-__private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0];
-__private_extern__ uint32_t num_wait_queues = 1;
-
-#define        P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
-#define ROUNDDOWN(x,y) (((x)/(y))*(y))
-
-static uint32_t
-compute_wait_hash_size(void)
-{
-       uint32_t hsize, queues;
-       
-       if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize)))
-               return (hsize);
-
-       queues = thread_max / 11;
-       hsize = P2ROUNDUP(queues * sizeof(struct wait_queue), PAGE_SIZE);
-
-       return hsize;
-}
-
-static void
-wait_queues_init(void)
-{
-       uint32_t        i, whsize, qsz;
-       kern_return_t   kret;
-
-       /*
-        * Determine the amount of memory we're willing to reserve for
-        * the waitqueue hash table
-        */
-       whsize = compute_wait_hash_size();
-
-       /* Determine the number of waitqueues we can fit. */
-       qsz = sizeof (struct wait_queue);
-       whsize = ROUNDDOWN(whsize, qsz);
-       num_wait_queues = whsize / qsz;
-
-       /*
-        * The hash algorithm requires that this be a power of 2, so we
-        * just mask off all the low-order bits.
-        */
-       for (i = 0; i < 31; i++) {
-               uint32_t bit = (1 << i);
-               if ((num_wait_queues & bit) == num_wait_queues)
-                       break;
-               num_wait_queues &= ~bit;
-       }
-       assert(num_wait_queues > 0);
-
-       /* Now determine how much memory we really need. */
-       whsize = P2ROUNDUP(num_wait_queues * qsz, PAGE_SIZE);
-
-       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues,
-           whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
-
-       if (kret != KERN_SUCCESS || wait_queues == NULL)
-               panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize);
-
-       for (i = 0; i < num_wait_queues; i++) {
-               wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
-       }
-}
-
-void
-wait_queue_bootstrap(void)
-{
-       wait_queues_init();
-       _wait_queue_zone = zinit(sizeof(struct wait_queue),
-                                     WAIT_QUEUE_MAX * sizeof(struct wait_queue),
-                                     sizeof(struct wait_queue),
-                                     "wait queues");
-       zone_change(_wait_queue_zone, Z_NOENCRYPT, TRUE);
-
-       _wait_queue_set_zone = zinit(sizeof(struct wait_queue_set),
-                                     WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set),
-                                     sizeof(struct wait_queue_set),
-                                     "wait queue sets");
-       zone_change(_wait_queue_set_zone, Z_NOENCRYPT, TRUE);
-
-       _wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link),
-                                     WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link),
-                                     sizeof(struct _wait_queue_link),
-                                     "wait queue links");
-       zone_change(_wait_queue_link_zone, Z_NOENCRYPT, TRUE);
-}
-
-/*
- *     Routine:        wait_queue_init
- *     Purpose:
- *             Initialize a previously allocated wait queue.
- *     Returns:
- *             KERN_SUCCESS - The wait_queue_t was initialized
- *             KERN_INVALID_ARGUMENT - The policy parameter was invalid
- */
-kern_return_t
-wait_queue_init(
-       wait_queue_t wq,
-       int policy)
-{
-       /* only FIFO and LIFO for now */
-       if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0)
-               return KERN_INVALID_ARGUMENT;
-
-       wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
-       wq->wq_type = _WAIT_QUEUE_inited;
-       wq->wq_eventmask = 0;
-       queue_init(&wq->wq_queue);
-       hw_lock_init(&wq->wq_interlock);
-       return KERN_SUCCESS;
-}
-
-/*
- *     Routine:                   wait_queue_alloc
- *     Purpose:
- *             Allocate and initialize a wait queue for use outside of
- *             of the mach part of the kernel.
- *     Conditions:
- *             Nothing locked - can block.
- *     Returns:
- *             The allocated and initialized wait queue
- *             WAIT_QUEUE_NULL if there is a resource shortage
- */
-wait_queue_t
-wait_queue_alloc(
-       int policy)
-{
-       wait_queue_t wq;
-       kern_return_t ret;
-
-       wq = (wait_queue_t) zalloc(_wait_queue_zone);
-       if (wq != WAIT_QUEUE_NULL) {
-               ret = wait_queue_init(wq, policy);
-               if (ret != KERN_SUCCESS) {
-                       zfree(_wait_queue_zone, wq);
-                       wq = WAIT_QUEUE_NULL;
-               }
-       }
-       return wq;
-}
-
-/*
- *     Routine:        wait_queue_free
- *     Purpose:
- *             Free an allocated wait queue.
- *     Conditions:
- *             May block.
- */
-kern_return_t
-wait_queue_free(
-       wait_queue_t wq)
-{
-       if (!wait_queue_is_queue(wq))
-               return KERN_INVALID_ARGUMENT;
-       if (!queue_empty(&wq->wq_queue))
-               return KERN_FAILURE;
-       zfree(_wait_queue_zone, wq);
-       return KERN_SUCCESS;
-}
-
-/*
- *     Routine:        wait_queue_set_init
- *     Purpose:
- *             Initialize a previously allocated wait queue set.
- *     Returns:
- *             KERN_SUCCESS - The wait_queue_set_t was initialized
- *             KERN_INVALID_ARGUMENT - The policy parameter was invalid
- */
-kern_return_t
-wait_queue_set_init(
-       wait_queue_set_t wqset,
-       int policy)
-{
-       kern_return_t ret;
-
-       ret = wait_queue_init(&wqset->wqs_wait_queue, policy);
-       if (ret != KERN_SUCCESS)
-               return ret;
-
-       wqset->wqs_wait_queue.wq_type = _WAIT_QUEUE_SET_inited;
-       if (policy & SYNC_POLICY_PREPOST)
-               wqset->wqs_wait_queue.wq_prepost = TRUE;
-       else 
-               wqset->wqs_wait_queue.wq_prepost = FALSE;
-       queue_init(&wqset->wqs_setlinks);
-       queue_init(&wqset->wqs_preposts);
-       return KERN_SUCCESS;
-}
-
-
-kern_return_t
-wait_queue_sub_init(
-       wait_queue_set_t wqset,
-       int policy)
-{
-       return wait_queue_set_init(wqset, policy);
-}
-
-kern_return_t
-wait_queue_sub_clearrefs(
-        wait_queue_set_t wq_set)
-{
-       wait_queue_link_t wql;
-       queue_t q;
-       spl_t s;
-
-       if (!wait_queue_is_set(wq_set))
-               return KERN_INVALID_ARGUMENT;
-
-       s = splsched();
-       wqs_lock(wq_set);
-       q = &wq_set->wqs_preposts;
-       while (!queue_empty(q)) {
-               queue_remove_first(q, wql, wait_queue_link_t, wql_preposts);
-               assert(!wql_is_preposted(wql));
-       }
-       wqs_unlock(wq_set);
-       splx(s);
-       return KERN_SUCCESS;
-}
-
-/*
- *     Routine:        wait_queue_set_alloc
- *     Purpose:
- *             Allocate and initialize a wait queue set for
- *             use outside of the mach part of the kernel.
- *     Conditions:
- *             May block.
- *     Returns:
- *             The allocated and initialized wait queue set
- *             WAIT_QUEUE_SET_NULL if there is a resource shortage
- */
-wait_queue_set_t
-wait_queue_set_alloc(
-    int policy)
-{
-       wait_queue_set_t wq_set;
-
-       wq_set = (wait_queue_set_t) zalloc(_wait_queue_set_zone);
-       if (wq_set != WAIT_QUEUE_SET_NULL) {
-               kern_return_t ret;
-
-               ret = wait_queue_set_init(wq_set, policy);
-               if (ret != KERN_SUCCESS) {
-                       zfree(_wait_queue_set_zone, wq_set);
-                       wq_set = WAIT_QUEUE_SET_NULL;
-               }
-       }
-       return wq_set;
-}
-
-/*
- *     Routine:        wait_queue_set_free
- *     Purpose:
- *             Free an allocated wait queue set
- *     Conditions:
- *             May block.
- */
-kern_return_t
-wait_queue_set_free(
-       wait_queue_set_t wq_set)
-{
-       if (!wait_queue_is_set(wq_set))
-               return KERN_INVALID_ARGUMENT;
-
-       if (!queue_empty(&wq_set->wqs_wait_queue.wq_queue))
-               return KERN_FAILURE;
-
-       zfree(_wait_queue_set_zone, wq_set);
-       return KERN_SUCCESS;
-}
-
-
-/*
- *     
- *     Routine:        wait_queue_set_size
- *     Routine:        wait_queue_link_size
- *     Purpose:
- *             Return the size of opaque wait queue structures
- */
-unsigned int wait_queue_set_size(void) { return sizeof(WaitQueueSet); }
-unsigned int wait_queue_link_size(void) { return sizeof(WaitQueueLink); }
-
-/* declare a unique type for wait queue link structures */
-static unsigned int _wait_queue_link;
-static unsigned int _wait_queue_link_noalloc;
-static unsigned int _wait_queue_unlinked;
-
-#define WAIT_QUEUE_LINK ((void *)&_wait_queue_link)
-#define WAIT_QUEUE_LINK_NOALLOC ((void *)&_wait_queue_link_noalloc)
-#define WAIT_QUEUE_UNLINKED ((void *)&_wait_queue_unlinked)
-
-#define WAIT_QUEUE_ELEMENT_CHECK(wq, wqe) \
-       WQASSERT(((wqe)->wqe_queue == (wq) && \
-         queue_next(queue_prev((queue_t) (wqe))) == (queue_t)(wqe)), \
-         "wait queue element list corruption: wq=%#x, wqe=%#x", \
-         (wq), (wqe))
-
-#define WQSPREV(wqs, wql) ((wait_queue_link_t)queue_prev( \
-                       ((&(wqs)->wqs_setlinks == (queue_t)(wql)) ? \
-                       (queue_t)(wql) : &(wql)->wql_setlinks)))
-
-#define WQSNEXT(wqs, wql) ((wait_queue_link_t)queue_next( \
-                       ((&(wqs)->wqs_setlinks == (queue_t)(wql)) ? \
-                       (queue_t)(wql) : &(wql)->wql_setlinks)))
-
-#define WAIT_QUEUE_SET_LINK_CHECK(wqs, wql) \
-               WQASSERT(((((wql)->wql_type == WAIT_QUEUE_LINK) || \
-                          ((wql)->wql_type == WAIT_QUEUE_LINK_NOALLOC)) && \
-                       ((wql)->wql_setqueue == (wqs)) && \
-                       (((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) || \
-                        ((wql)->wql_queue->wq_type == _WAIT_QUEUE_SET_inited)) && \
-                       (WQSNEXT((wqs), WQSPREV((wqs),(wql))) == (wql))), \
-                       "wait queue set links corruption: wqs=%#x, wql=%#x", \
-                        (wqs), (wql))
-
-#if defined(_WAIT_QUEUE_DEBUG_)
-
-#define WQASSERT(e, s, p0, p1) ((e) ? 0 : panic(s, p0, p1))
-
-#define WAIT_QUEUE_CHECK(wq) \
-MACRO_BEGIN \
-       queue_t q2 = &(wq)->wq_queue; \
-       wait_queue_element_t wqe2 = (wait_queue_element_t) queue_first(q2); \
-       while (!queue_end(q2, (queue_entry_t)wqe2)) { \
-               WAIT_QUEUE_ELEMENT_CHECK((wq), wqe2); \
-               wqe2 = (wait_queue_element_t) queue_next((queue_t) wqe2); \
-       } \
-MACRO_END
-
-#define WAIT_QUEUE_SET_CHECK(wqs) \
-MACRO_BEGIN \
-       queue_t q2 = &(wqs)->wqs_setlinks; \
-       wait_queue_link_t wql2 = (wait_queue_link_t) queue_first(q2); \
-       while (!queue_end(q2, (queue_entry_t)wql2)) { \
-               WAIT_QUEUE_SET_LINK_CHECK((wqs), wql2); \
-               wql2 = (wait_queue_link_t) wql2->wql_setlinks.next; \
-       } \
-MACRO_END
-
-#else /* !_WAIT_QUEUE_DEBUG_ */
-
-#define WQASSERT(e, s, p0, p1) assert(e)
-
-#define WAIT_QUEUE_CHECK(wq)
-#define WAIT_QUEUE_SET_CHECK(wqs)
-
-#endif /* !_WAIT_QUEUE_DEBUG_ */
-
-/*
- *     Routine:        wait_queue_global
- *     Purpose:
- *             Indicate if this wait queue is a global wait queue or not.
- */
-static boolean_t
-wait_queue_global(
-       wait_queue_t wq)
-{
-       if ((wq >= wait_queues) && (wq <= (wait_queues + num_wait_queues))) {
-               return TRUE;
-       }
-       return FALSE;
-}
-
-
-/*
- *     Routine:        wait_queue_member_locked
- *     Purpose:
- *             Indicate if this set queue is a member of the queue
- *     Conditions:
- *             The wait queue is locked
- *             The set queue is just that, a set queue
- */
-static boolean_t
-wait_queue_member_locked(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set)
-{
-       wait_queue_element_t wq_element;
-       queue_t q;
-
-       assert(wait_queue_held(wq));
-       assert(wait_queue_is_set(wq_set));
-
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if ((wq_element->wqe_type == WAIT_QUEUE_LINK) ||
-                   (wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC)) {
-                       wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-
-                       if (wql->wql_setqueue == wq_set)
-                               return TRUE;
-               }
-               wq_element = (wait_queue_element_t)
-                            queue_next((queue_t) wq_element);
-       }
-       return FALSE;
-}
-       
-
-/*
- *     Routine:        wait_queue_member
- *     Purpose:
- *             Indicate if this set queue is a member of the queue
- *     Conditions:
- *             The set queue is just that, a set queue
- */
-boolean_t
-wait_queue_member(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set)
-{
-       boolean_t ret;
-       spl_t s;
-
-       if (!wait_queue_is_set(wq_set))
-               return FALSE;
-
-       s = splsched();
-       wait_queue_lock(wq);
-       ret = wait_queue_member_locked(wq, wq_set);
-       wait_queue_unlock(wq);
-       splx(s);
-
-       return ret;
-}
-
-
-/*
- *     Routine:        wait_queue_link_internal
- *     Purpose:
- *             Insert a set wait queue into a wait queue.  This
- *             requires us to link the two together using a wait_queue_link
- *             structure that was provided.
- *     Conditions:
- *             The wait queue being inserted must be inited as a set queue
- *             The wait_queue_link structure must already be properly typed
- */
-static 
-kern_return_t
-wait_queue_link_internal(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set,
-       wait_queue_link_t wql)
-{
-       wait_queue_element_t wq_element;
-       queue_t q;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set))
-               return KERN_INVALID_ARGUMENT;
-
-       /*
-        * There are probably fewer threads and sets associated with
-        * the wait queue than there are wait queues associated with
-        * the set.  So let's validate it that way.
-        */
-       s = splsched();
-       wait_queue_lock(wq);
-       q = &wq->wq_queue;
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if ((wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) &&
-                   ((wait_queue_link_t)wq_element)->wql_setqueue == wq_set) {
-                       wait_queue_unlock(wq);
-                       splx(s);
-                       return KERN_ALREADY_IN_SET;
-               }
-               wq_element = (wait_queue_element_t)
-                               queue_next((queue_t) wq_element);
-       }
-
-       /*
-        * Not already a member, so we can add it.
-        */
-       wqs_lock(wq_set);
-
-       WAIT_QUEUE_SET_CHECK(wq_set);
-
-       assert(wql->wql_type == WAIT_QUEUE_LINK ||
-              wql->wql_type == WAIT_QUEUE_LINK_NOALLOC);
-
-       wql->wql_queue = wq;
-       wql_clear_prepost(wql);
-       queue_enter(&wq->wq_queue, wql, wait_queue_link_t, wql_links);
-       wql->wql_setqueue = wq_set;
-       queue_enter(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks);
-
-       wqs_unlock(wq_set);
-       wait_queue_unlock(wq);
-       splx(s);
-
-       return KERN_SUCCESS;
-}      
-
-/*
- *     Routine:        wait_queue_link_noalloc
- *     Purpose:
- *             Insert a set wait queue into a wait queue.  This
- *             requires us to link the two together using a wait_queue_link
- *             structure that we allocate.
- *     Conditions:
- *             The wait queue being inserted must be inited as a set queue
- */
-kern_return_t
-wait_queue_link_noalloc(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set,
-       wait_queue_link_t wql)
-{
-       wql->wql_type = WAIT_QUEUE_LINK_NOALLOC;
-       return wait_queue_link_internal(wq, wq_set, wql);
-}
-
-/*
- *     Routine:        wait_queue_link
- *     Purpose:
- *             Insert a set wait queue into a wait queue.  This
- *             requires us to link the two together using a wait_queue_link
- *             structure that we allocate.
- *     Conditions:
- *             The wait queue being inserted must be inited as a set queue
- */
-kern_return_t
-wait_queue_link(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set)
-{
-       wait_queue_link_t wql;
-       kern_return_t ret;
-
-       wql = (wait_queue_link_t) zalloc(_wait_queue_link_zone);
-       if (wql == WAIT_QUEUE_LINK_NULL)
-               return KERN_RESOURCE_SHORTAGE;
-
-       wql->wql_type = WAIT_QUEUE_LINK;
-       ret = wait_queue_link_internal(wq, wq_set, wql);
-       if (ret != KERN_SUCCESS)
-               zfree(_wait_queue_link_zone, wql);
-
-       return ret;
-}      
-
-wait_queue_link_t
-wait_queue_link_allocate(void)
-{
-       wait_queue_link_t wql;
-
-       wql = zalloc(_wait_queue_link_zone); /* Can't fail */
-       bzero(wql, sizeof(*wql));
-       wql->wql_type = WAIT_QUEUE_UNLINKED;
-
-       return wql;
-}
-
-kern_return_t
-wait_queue_link_free(wait_queue_link_t wql) 
-{
-       zfree(_wait_queue_link_zone, wql);
-       return KERN_SUCCESS;
-}
-
-
-/*
- *     Routine:        wait_queue_unlink_locked
- *     Purpose:
- *             Undo the linkage between a wait queue and a set.
- */
-static void
-wait_queue_unlink_locked(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set,
-       wait_queue_link_t wql)
-{
-       assert(wait_queue_held(wq));
-       assert(wait_queue_held(&wq_set->wqs_wait_queue));
-
-       wql->wql_queue = WAIT_QUEUE_NULL;
-       queue_remove(&wq->wq_queue, wql, wait_queue_link_t, wql_links);
-       wql->wql_setqueue = WAIT_QUEUE_SET_NULL;
-       queue_remove(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks);
-       if (wql_is_preposted(wql)) {
-               queue_t ppq = &wq_set->wqs_preposts;
-               queue_remove(ppq, wql, wait_queue_link_t, wql_preposts);
-       }
-       wql->wql_type = WAIT_QUEUE_UNLINKED;
-
-       WAIT_QUEUE_CHECK(wq);
-       WAIT_QUEUE_SET_CHECK(wq_set);
-}
-
-/*
- *     Routine:        wait_queue_unlink_nofree
- *     Purpose:
- *             Remove the linkage between a wait queue and a set,
- *             returning the linkage structure to the caller to
- *             free later.
- *     Conditions:
- *             The wait queue being must be a member set queue
- */
-kern_return_t
-wait_queue_unlink_nofree(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set,
-       wait_queue_link_t *wqlp)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_link_t wql;
-       queue_t q;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-       s = splsched();
-       wait_queue_lock(wq);
-
-       q = &wq->wq_queue;
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-
-                       wql = (wait_queue_link_t)wq_element;
-                       
-                       if (wql->wql_setqueue == wq_set) {
-
-                               wqs_lock(wq_set);
-                               wait_queue_unlink_locked(wq, wq_set, wql);
-                               wqs_unlock(wq_set);
-                               wait_queue_unlock(wq);
-                               splx(s);
-                               *wqlp = wql;
-                               return KERN_SUCCESS;
-                       }
-               }
-               wq_element = (wait_queue_element_t)
-                               queue_next((queue_t) wq_element);
-       }
-       wait_queue_unlock(wq);
-       splx(s);
-       return KERN_NOT_IN_SET;
-}      
-
-/*
- *     Routine:        wait_queue_unlink
- *     Purpose:
- *             Remove the linkage between a wait queue and a set,
- *             freeing the linkage structure.
- *     Conditions:
- *             The wait queue being must be a member set queue
- */
-kern_return_t
-wait_queue_unlink(
-       wait_queue_t wq,
-       wait_queue_set_t wq_set)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_link_t wql;
-       queue_t q;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-       s = splsched();
-       wait_queue_lock(wq);
-
-       q = &wq->wq_queue;
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-
-                       wql = (wait_queue_link_t)wq_element;
-                       
-                       if (wql->wql_setqueue == wq_set) {
-                               boolean_t alloced;
-
-                               alloced = (wql->wql_type == WAIT_QUEUE_LINK);
-                               wqs_lock(wq_set);
-                               wait_queue_unlink_locked(wq, wq_set, wql);
-                               wqs_unlock(wq_set);
-                               wait_queue_unlock(wq);
-                               splx(s);
-                               if (alloced)
-                                       zfree(_wait_queue_link_zone, wql);
-                               return KERN_SUCCESS;
-                       }
-               }
-               wq_element = (wait_queue_element_t)
-                               queue_next((queue_t) wq_element);
-       }
-       wait_queue_unlock(wq);
-       splx(s);
-       return KERN_NOT_IN_SET;
-}      
-
-/*
- *     Routine:        wait_queue_unlink_all_nofree_locked
- *     Purpose:
- *             Remove the linkage between a wait queue and all its sets.
- *             All the linkage structures are returned to the caller for
- *             later freeing.
- *     Conditions:
- *             Wait queue locked.
- */
-
-static void
-wait_queue_unlink_all_nofree_locked(
-       wait_queue_t wq,
-       queue_t links)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wq_next_element;
-       wait_queue_set_t wq_set;
-       wait_queue_link_t wql;
-       queue_t q;
-
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wq_next_element = (wait_queue_element_t)
-                            queue_next((queue_t) wq_element);
-
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-                       wql = (wait_queue_link_t)wq_element;
-                       wq_set = wql->wql_setqueue;
-                       wqs_lock(wq_set);
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wqs_unlock(wq_set);
-                       enqueue(links, &wql->wql_links);
-               }
-               wq_element = wq_next_element;
-       }
-}      
-
-/*
- *     Routine:        wait_queue_unlink_all_nofree
- *     Purpose:
- *             Remove the linkage between a wait queue and all its sets.
- *             All the linkage structures are returned to the caller for
- *             later freeing.
- *     Conditions:
- *             Nothing of interest locked.
- */
-
-kern_return_t
-wait_queue_unlink_all_nofree(
-       wait_queue_t wq,
-       queue_t links)
-{
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       s = splsched();
-       wait_queue_lock(wq);
-       wait_queue_unlink_all_nofree_locked(wq, links);
-       wait_queue_unlock(wq);
-       splx(s);
-
-       return(KERN_SUCCESS);
-}      
-
-/*
- *     Routine:        wait_queue_unlink_all_locked
- *     Purpose:
- *             Remove the linkage between a locked wait queue and all its
- *             sets and enqueue the allocated ones onto the links queue
- *             provided.
- *     Conditions:
- *             Wait queue locked.
- */
-static void
-wait_queue_unlink_all_locked(
-       wait_queue_t wq,
-       queue_t links)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wq_next_element;
-       wait_queue_set_t wq_set;
-       wait_queue_link_t wql;
-       queue_t q;
-
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               boolean_t alloced;
-
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wq_next_element = (wait_queue_element_t)
-                            queue_next((queue_t) wq_element);
-
-               alloced = (wq_element->wqe_type == WAIT_QUEUE_LINK);
-               if (alloced || wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-                       wql = (wait_queue_link_t)wq_element;
-                       wq_set = wql->wql_setqueue;
-                       wqs_lock(wq_set);
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wqs_unlock(wq_set);
-                       if (alloced)
-                               enqueue(links, &wql->wql_links);
-               }
-               wq_element = wq_next_element;
-       }
-
-}
-
-
-/*
- *     Routine:        wait_queue_unlink_all
- *     Purpose:
- *             Remove the linkage between a wait queue and all its sets.
- *             All the linkage structures that were allocated internally
- *             are freed.  The others are the caller's responsibility.
- *     Conditions:
- *             Nothing of interest locked.
- */
-
-kern_return_t
-wait_queue_unlink_all(
-       wait_queue_t wq)
-{
-       wait_queue_link_t wql;
-       queue_head_t links_queue_head;
-       queue_t links = &links_queue_head;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       queue_init(links);
-
-       s = splsched();
-       wait_queue_lock(wq);
-       wait_queue_unlink_all_locked(wq, links);
-       wait_queue_unlock(wq);
-       splx(s);
-
-       while(!queue_empty(links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               zfree(_wait_queue_link_zone, wql);
-       }
-
-       return(KERN_SUCCESS);
-}      
-
-/* legacy interface naming */
-kern_return_t
-wait_subqueue_unlink_all(
-       wait_queue_set_t        wq_set)
-{
-       return wait_queue_set_unlink_all(wq_set);
-}
-
-
-/*
- *     Routine:        wait_queue_set_unlink_all_nofree
- *     Purpose:
- *             Remove the linkage between a set wait queue and all its
- *             member wait queues and all the sets it may be a member of.
- *             The links structures are returned for later freeing by the
- *             caller.
- *     Conditions:
- *             The wait queue must be a set
- */
-kern_return_t
-wait_queue_set_unlink_all_nofree(
-       wait_queue_set_t wq_set,
-       queue_t         links)
-{
-       wait_queue_link_t wql;
-       wait_queue_t wq;
-       queue_t q;
-       spl_t s;
-
-       if (!wait_queue_is_set(wq_set)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-retry:
-       s = splsched();
-       wqs_lock(wq_set);
-
-       /* remove the wait queues that are members of our set */
-       q = &wq_set->wqs_setlinks;
-
-       wql = (wait_queue_link_t)queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wql)) {
-               WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
-               wq = wql->wql_queue;
-               if (wait_queue_lock_try(wq)) {
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wait_queue_unlock(wq);
-                       enqueue(links, &wql->wql_links);
-                       wql = (wait_queue_link_t)queue_first(q);
-               } else {
-                       wqs_unlock(wq_set);
-                       splx(s);
-                       delay(1);
-                       goto retry;
-               }
-       }
-
-       /* remove this set from sets it belongs to */
-       wait_queue_unlink_all_nofree_locked(&wq_set->wqs_wait_queue, links);
-
-       wqs_unlock(wq_set);
-       splx(s);
-
-       return(KERN_SUCCESS);
-}      
-
-/*
- *     Routine:        wait_queue_set_unlink_all
- *     Purpose:
- *             Remove the linkage between a set wait queue and all its
- *             member wait queues and all the sets it may be members of.
- *             The link structures are freed for those links which were
- *             dynamically allocated.
- *     Conditions:
- *             The wait queue must be a set
- */
-kern_return_t
-wait_queue_set_unlink_all(
-       wait_queue_set_t wq_set)
-{
-       wait_queue_link_t wql;
-       wait_queue_t wq;
-       queue_t q;
-       queue_head_t links_queue_head;
-       queue_t links = &links_queue_head;
-       spl_t s;
-
-       if (!wait_queue_is_set(wq_set)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       queue_init(links);
-
-retry:
-       s = splsched();
-       wqs_lock(wq_set);
-
-       /* remove the wait queues that are members of our set */
-       q = &wq_set->wqs_setlinks;
-
-       wql = (wait_queue_link_t)queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wql)) {
-               WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
-               wq = wql->wql_queue;
-               if (wait_queue_lock_try(wq)) {
-                       boolean_t alloced;
-
-                       alloced = (wql->wql_type == WAIT_QUEUE_LINK);
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wait_queue_unlock(wq);
-                       if (alloced)
-                               enqueue(links, &wql->wql_links);
-                       wql = (wait_queue_link_t)queue_first(q);
-               } else {
-                       wqs_unlock(wq_set);
-                       splx(s);
-                       delay(1);
-                       goto retry;
-               }
-       }
-
-
-       /* remove this set from sets it belongs to */
-       wait_queue_unlink_all_locked(&wq_set->wqs_wait_queue, links);
-
-       wqs_unlock(wq_set);
-       splx(s);
-
-       while (!queue_empty (links)) {
-               wql = (wait_queue_link_t) dequeue(links);
-               zfree(_wait_queue_link_zone, wql);
-       }
-       return(KERN_SUCCESS);
-}      
-
-kern_return_t
-wait_queue_set_unlink_one(
-       wait_queue_set_t wq_set,
-       wait_queue_link_t wql)
-{
-       wait_queue_t wq;
-       spl_t s;
-
-       assert(wait_queue_is_set(wq_set));
-
-retry:
-       s = splsched();
-       wqs_lock(wq_set);
-
-       WAIT_QUEUE_SET_CHECK(wq_set);
-
-       /* Already unlinked, e.g. by selclearthread() */
-       if (wql->wql_type == WAIT_QUEUE_UNLINKED) {
-               goto out;
-       }
-
-       WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
-
-       /* On a wait queue, and we hold set queue lock ... */
-       wq = wql->wql_queue;
-       if (wait_queue_lock_try(wq)) {
-               wait_queue_unlink_locked(wq, wq_set, wql);
-               wait_queue_unlock(wq);
-       } else {
-               wqs_unlock(wq_set);
-               splx(s);
-               delay(1);
-               goto retry;
-       }
-
-out:
-       wqs_unlock(wq_set);
-       splx(s);
-
-       return KERN_SUCCESS;
-}
-
-/*
- *     Routine:        wait_queue_assert_wait64_locked
- *     Purpose:
- *             Insert the current thread into the supplied wait queue
- *             waiting for a particular event to be posted to that queue.
- *
- *     Conditions:
- *             The wait queue is assumed locked.
- *             The waiting thread is assumed locked.
- *
- */
-__private_extern__ wait_result_t
-wait_queue_assert_wait64_locked(
-       wait_queue_t wq,
-       event64_t event,
-       wait_interrupt_t interruptible,
-       wait_timeout_urgency_t urgency,
-       uint64_t deadline,
-       uint64_t leeway,
-       thread_t thread)
-{
-       wait_result_t wait_result;
-       boolean_t realtime;
-
-       if (!wait_queue_assert_possible(thread))
-               panic("wait_queue_assert_wait64_locked");
-
-       if (wq->wq_type == _WAIT_QUEUE_SET_inited) {
-               wait_queue_set_t wqs = (wait_queue_set_t)wq;
-
-               if (event == NO_EVENT64 && wqs_is_preposted(wqs))
-                       return (thread->wait_result = THREAD_AWAKENED);
-       }
-
-       /*
-        * Realtime threads get priority for wait queue placements.
-        * This allows wait_queue_wakeup_one to prefer a waiting
-        * realtime thread, similar in principle to performing
-        * a wait_queue_wakeup_all and allowing scheduler prioritization
-        * to run the realtime thread, but without causing the
-        * lock contention of that scenario.
-        */
-       realtime = (thread->sched_pri >= BASEPRI_REALTIME);
-
-       /*
-        * This is the extent to which we currently take scheduling attributes
-        * into account.  If the thread is vm priviledged, we stick it at
-        * the front of the queue.  Later, these queues will honor the policy
-        * value set at wait_queue_init time.
-        */
-       wait_result = thread_mark_wait_locked(thread, interruptible);
-       if (wait_result == THREAD_WAITING) {
-               if (!wq->wq_fifo
-                       || (thread->options & TH_OPT_VMPRIV)
-                       || realtime)
-                       enqueue_head(&wq->wq_queue, (queue_entry_t) thread);
-               else
-                       enqueue_tail(&wq->wq_queue, (queue_entry_t) thread);
-
-               thread->wait_event = event;
-               thread->wait_queue = wq;
-
-               if (deadline != 0) {
-
-                       if (!timer_call_enter_with_leeway(&thread->wait_timer, NULL,
-                               deadline, leeway, urgency, FALSE))
-                               thread->wait_timer_active++;
-                       thread->wait_timer_is_set = TRUE;
-               }
-               if (wait_queue_global(wq)) {
-                       wq->wq_eventmask = wq->wq_eventmask | CAST_TO_EVENT_MASK(event);
-               }
-
-       }
-       return(wait_result);
-}
-
-/*
- *     Routine:        wait_queue_assert_wait
- *     Purpose:
- *             Insert the current thread into the supplied wait queue
- *             waiting for a particular event to be posted to that queue.
- *
- *     Conditions:
- *             nothing of interest locked.
- */
-wait_result_t
-wait_queue_assert_wait(
-       wait_queue_t wq,
-       event_t event,
-       wait_interrupt_t interruptible,
-       uint64_t deadline)
-{
-       spl_t s;
-       wait_result_t ret;
-       thread_t thread = current_thread();
-
-       /* If it is an invalid wait queue, you can't wait on it */
-       if (!wait_queue_is_valid(wq))
-               return (thread->wait_result = THREAD_RESTART);
-
-       s = splsched();
-       wait_queue_lock(wq);
-       thread_lock(thread);
-       ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
-                                             interruptible, 
-                                             TIMEOUT_URGENCY_SYS_NORMAL, 
-                                             deadline, 0,
-                                             thread);
-       thread_unlock(thread);
-       wait_queue_unlock(wq);
-       splx(s);
-       return(ret);
-}
-
-/*
- *     Routine:        wait_queue_assert_wait_with_leeway
- *     Purpose:
- *             Insert the current thread into the supplied wait queue
- *             waiting for a particular event to be posted to that queue.
- *             Deadline values are specified with urgency and leeway.
- *
- *     Conditions:
- *             nothing of interest locked.
- */
-wait_result_t
-wait_queue_assert_wait_with_leeway(
-       wait_queue_t wq,
-       event_t event,
-       wait_interrupt_t interruptible,
-       wait_timeout_urgency_t urgency,
-       uint64_t deadline,
-       uint64_t leeway)
-{
-       spl_t s;
-       wait_result_t ret;
-       thread_t thread = current_thread();
-
-       /* If it is an invalid wait queue, you can't wait on it */
-       if (!wait_queue_is_valid(wq))
-               return (thread->wait_result = THREAD_RESTART);
-
-       s = splsched();
-       wait_queue_lock(wq);
-       thread_lock(thread);
-       ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
-                                             interruptible, 
-                                             urgency, deadline, leeway,
-                                             thread);
-       thread_unlock(thread);
-       wait_queue_unlock(wq);
-       splx(s);
-       return(ret);
-}
-
-/*
- *     Routine:        wait_queue_assert_wait64
- *     Purpose:
- *             Insert the current thread into the supplied wait queue
- *             waiting for a particular event to be posted to that queue.
- *     Conditions:
- *             nothing of interest locked.
- */
-wait_result_t
-wait_queue_assert_wait64(
-       wait_queue_t wq,
-       event64_t event,
-       wait_interrupt_t interruptible,
-       uint64_t deadline)
-{
-       spl_t s;
-       wait_result_t ret;
-       thread_t thread = current_thread();
-
-       /* If it is an invalid wait queue, you cant wait on it */
-       if (!wait_queue_is_valid(wq))
-               return (thread->wait_result = THREAD_RESTART);
-
-       s = splsched();
-       wait_queue_lock(wq);
-       thread_lock(thread);
-       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, 
-                                             TIMEOUT_URGENCY_SYS_NORMAL,
-                                             deadline, 0,
-                                             thread);
-       thread_unlock(thread);
-       wait_queue_unlock(wq);
-       splx(s);
-       return(ret);
-}
-
-/*
- *     Routine:        wait_queue_assert_wait64_with_leeway
- *     Purpose:
- *             Insert the current thread into the supplied wait queue
- *             waiting for a particular event to be posted to that queue.
- *             Deadline values are specified with urgency and leeway.
- *     Conditions:
- *             nothing of interest locked.
- */
-wait_result_t
-wait_queue_assert_wait64_with_leeway(
-       wait_queue_t wq,
-       event64_t event,
-       wait_interrupt_t interruptible,
-       wait_timeout_urgency_t urgency,
-       uint64_t deadline,
-       uint64_t leeway)
-{
-       spl_t s;
-       wait_result_t ret;
-       thread_t thread = current_thread();
-
-       /* If it is an invalid wait queue, you cant wait on it */
-       if (!wait_queue_is_valid(wq))
-               return (thread->wait_result = THREAD_RESTART);
-
-       s = splsched();
-       wait_queue_lock(wq);
-       thread_lock(thread);
-       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, 
-                                             urgency, deadline, leeway,
-                                             thread);
-       thread_unlock(thread);
-       wait_queue_unlock(wq);
-       splx(s);
-       return(ret);
-}
-
-/*
- *     Routine:        _wait_queue_select64_all
- *     Purpose:
- *             Select all threads off a wait queue that meet the
- *             supplied criteria.
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             wake_queue initialized and ready for insertion
- *             possibly recursive
- *     Returns:
- *             a queue of locked threads
- */
-static void
-_wait_queue_select64_all(
-       wait_queue_t wq,
-       event64_t event,
-       queue_t wake_queue)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wqe_next;
-       unsigned long eventmask = 0;
-       boolean_t is_queue_global = FALSE;
-       queue_t q;
-
-       is_queue_global = wait_queue_global(wq);
-       if (is_queue_global) {
-               eventmask = CAST_TO_EVENT_MASK(event);
-               if ((wq->wq_eventmask & eventmask) != eventmask) {
-                       return;
-               }
-               eventmask = 0;
-       }
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wqe_next = (wait_queue_element_t)
-                          queue_next((queue_t) wq_element);
-
-               /*
-                * We may have to recurse if this is a compound wait queue.
-                */
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-                       wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_set_t set_queue = wql->wql_setqueue;
-
-                       /*
-                        * We have to check the set wait queue. If it is marked
-                        * as pre-post, and it is the "generic event" then mark
-                        * it pre-posted now (if not already).
-                        */
-                       wqs_lock(set_queue);
-                       if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) {
-                               queue_t ppq = &set_queue->wqs_preposts;
-                               queue_enter(ppq, wql, wait_queue_link_t, wql_preposts);
-                       }
-                       if (! wait_queue_empty(&set_queue->wqs_wait_queue)) 
-                               _wait_queue_select64_all(&set_queue->wqs_wait_queue, event, wake_queue);
-                       wqs_unlock(set_queue);
-               } else {
-                       
-                       /*
-                        * Otherwise, its a thread.  If it is waiting on
-                        * the event we are posting to this queue, pull
-                        * it off the queue and stick it in out wake_queue.
-                        */
-                       thread_t t = (thread_t)(void *)wq_element;
-
-                       if (t->wait_event == event) {
-                               thread_lock(t);
-                               remqueue((queue_entry_t) t);
-                               enqueue (wake_queue, (queue_entry_t) t);
-                               t->wait_queue = WAIT_QUEUE_NULL;
-                               t->wait_event = NO_EVENT64;
-                               t->at_safe_point = FALSE;
-                               /* returned locked */
-                       } else {
-                               if (is_queue_global) {
-                                       eventmask = eventmask | 
-                                               CAST_TO_EVENT_MASK(t->wait_event);
-                               }
-                       }
-               }
-               wq_element = wqe_next;
-       }
-       /* Update event mask if global wait queue */
-       if (is_queue_global) {
-               wq->wq_eventmask = eventmask;
-       }
-
-}
-
-/*
- *     Routine:        wait_queue_wakeup64_all_locked
- *     Purpose:
- *             Wakeup some number of threads that are in the specified
- *             wait queue and waiting on the specified event.
- *     Conditions:
- *             wait queue already locked (may be released).
- *     Returns:
- *             KERN_SUCCESS - Threads were woken up
- *             KERN_NOT_WAITING - No threads were waiting <wq,event> pair
- */
-__private_extern__ kern_return_t
-wait_queue_wakeup64_all_locked(
-       wait_queue_t wq,
-       event64_t event,
-       wait_result_t result,
-       boolean_t unlock)
-{
-       queue_head_t wake_queue_head;
-       queue_t q = &wake_queue_head;
-       kern_return_t res;
-
-//     assert(wait_queue_held(wq));
-//     if(!wq->wq_interlock.lock_data) {               /* (BRINGUP */
-//             panic("wait_queue_wakeup64_all_locked: lock not held on %p\n", wq);     /* (BRINGUP) */
-//     }
-
-       queue_init(q);
-
-       /*
-        * Select the threads that we will wake up.      The threads
-        * are returned to us locked and cleanly removed from the
-        * wait queue.
-        */
-       _wait_queue_select64_all(wq, event, q);
-       if (unlock)
-               wait_queue_unlock(wq);
-
-       /*
-        * For each thread, set it running.
-        */
-       res = KERN_NOT_WAITING;
-       while (!queue_empty (q)) {
-               thread_t thread = (thread_t)(void *) dequeue(q);
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-               thread_unlock(thread);
-       }
-       return res;
-}
-
-
-/*
- *     Routine:                wait_queue_wakeup_all
- *     Purpose:
- *             Wakeup some number of threads that are in the specified
- *             wait queue and waiting on the specified event.
- *     Conditions:
- *             Nothing locked
- *     Returns:
- *             KERN_SUCCESS - Threads were woken up
- *             KERN_NOT_WAITING - No threads were waiting <wq,event> pair
- */
-kern_return_t
-wait_queue_wakeup_all(
-       wait_queue_t wq,
-       event_t event,
-       wait_result_t result)
-{
-       kern_return_t ret;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       s = splsched();
-       wait_queue_lock(wq);
-//     if(!wq->wq_interlock.lock_data) {               /* (BRINGUP */
-//             panic("wait_queue_wakeup_all: we did not get the lock on %p\n", wq);    /* (BRINGUP) */
-//     }
-       ret = wait_queue_wakeup64_all_locked(
-                               wq, CAST_DOWN(event64_t,event),
-                               result, TRUE);
-       /* lock released */
-       splx(s);
-       return ret;
-}
-
-/*
- *     Routine:                wait_queue_wakeup64_all
- *     Purpose:
- *             Wakeup some number of threads that are in the specified
- *             wait queue and waiting on the specified event.
- *     Conditions:
- *             Nothing locked
- *     Returns:
- *             KERN_SUCCESS - Threads were woken up
- *             KERN_NOT_WAITING - No threads were waiting <wq,event> pair
- */
-kern_return_t
-wait_queue_wakeup64_all(
-       wait_queue_t wq,
-       event64_t event,
-       wait_result_t result)
-{
-       kern_return_t ret;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       s = splsched();
-       wait_queue_lock(wq);
-       ret = wait_queue_wakeup64_all_locked(wq, event, result, TRUE);
-       /* lock released */
-       splx(s);
-       return ret;
-}
-
-/*
- *     Routine:        _wait_queue_select64_one
- *     Purpose:
- *             Select the best thread off a wait queue that meet the
- *             supplied criteria.
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             possibly recursive
- *     Returns:
- *             a locked thread - if one found
- *     Note:
- *             This is where the sync policy of the wait queue comes
- *             into effect.  For now, we just assume FIFO/LIFO.
- */
-static thread_t
-_wait_queue_select64_one(
-       wait_queue_t wq,
-       event64_t event)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wqe_next;
-       thread_t t = THREAD_NULL;
-       thread_t fifo_thread = THREAD_NULL;
-       boolean_t is_queue_fifo = TRUE;
-       boolean_t is_queue_global = FALSE;
-       boolean_t thread_imp_donor = FALSE;
-       boolean_t realtime = FALSE;
-       unsigned long eventmask = 0;
-       queue_t q;
-
-       if (wait_queue_global(wq)) {
-               eventmask = CAST_TO_EVENT_MASK(event);
-               if ((wq->wq_eventmask & eventmask) != eventmask) {
-                       return THREAD_NULL;
-               }
-               eventmask = 0;
-               is_queue_global = TRUE;
-#if IMPORTANCE_INHERITANCE
-               is_queue_fifo = FALSE;
-#endif /* IMPORTANCE_INHERITANCE */
-       }
-
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wqe_next = (wait_queue_element_t)
-                              queue_next((queue_t) wq_element);
-
-               /*
-                * We may have to recurse if this is a compound wait queue.
-                */
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-                       wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_set_t set_queue = wql->wql_setqueue;
-
-                       /*
-                        * We have to check the set wait queue. If the set
-                        * supports pre-posting, it isn't already preposted,
-                        * and we didn't find a thread in the set, then mark it.
-                        *
-                        * If we later find a thread, there may be a spurious
-                        * pre-post here on this set.  The wait side has to check
-                        * for that either pre- or post-wait.
-                        */
-                       wqs_lock(set_queue);
-                       if (! wait_queue_empty(&set_queue->wqs_wait_queue)) {
-                               t = _wait_queue_select64_one(&set_queue->wqs_wait_queue, event);
-                       }
-                       if (t != THREAD_NULL) {
-                               wqs_unlock(set_queue);
-                               return t;
-                       }
-                       if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) {
-                               queue_t ppq = &set_queue->wqs_preposts;
-                               queue_enter(ppq, wql, wait_queue_link_t, wql_preposts);
-                       }
-                       wqs_unlock(set_queue);
-
-               } else {
-                       
-                       /*
-                        * Otherwise, its a thread.  If it is waiting on
-                        * the event we are posting to this queue, pull
-                        * it off the queue and stick it in out wake_queue.
-                        */
-                       t = (thread_t)(void *)wq_element;
-                       if (t->wait_event == event) {
-                               if (fifo_thread == THREAD_NULL) {
-                                       fifo_thread = t;
-                               }
-#if IMPORTANCE_INHERITANCE
-                               /* 
-                                * Checking imp donor bit does not need thread lock or
-                                * or task lock since we have the wait queue lock and
-                                * thread can not be removed from it without acquiring
-                                * wait queue lock. The imp donor bit may change
-                                * once we read its value, but it is ok to wake 
-                                * a thread while someone drops importance assertion 
-                                * on the that thread.
-                                */
-                               thread_imp_donor = task_is_importance_donor(t->task);
-#endif /* IMPORTANCE_INHERITANCE */
-                               realtime = (t->sched_pri >= BASEPRI_REALTIME);
-                               if (is_queue_fifo || thread_imp_donor || realtime || 
-                                               (t->options & TH_OPT_VMPRIV)) {
-                                       thread_lock(t);
-                                       remqueue((queue_entry_t) t);
-                                       t->wait_queue = WAIT_QUEUE_NULL;
-                                       t->wait_event = NO_EVENT64;
-                                       t->at_safe_point = FALSE;
-                                       return t;       /* still locked */
-                               }
-                       }
-                       if (is_queue_global) {
-                               eventmask = eventmask | CAST_TO_EVENT_MASK(t->wait_event);
-                       }
-                       t = THREAD_NULL;
-               }
-               wq_element = wqe_next;
-       }
-
-       if (is_queue_global) {
-               wq->wq_eventmask = eventmask;
-       }
-#if IMPORTANCE_INHERITANCE
-       if (fifo_thread != THREAD_NULL) {
-               thread_lock(fifo_thread);
-               remqueue((queue_entry_t) fifo_thread);
-               fifo_thread->wait_queue = WAIT_QUEUE_NULL;
-               fifo_thread->wait_event = NO_EVENT64;
-               fifo_thread->at_safe_point = FALSE;
-               return fifo_thread;     /* still locked */
-       }
-#endif /* IMPORTANCE_INHERITANCE */
-       return THREAD_NULL;
-}
-
-
-/*
- *     Routine:        wait_queue_pull_thread_locked
- *     Purpose:
- *             Pull a thread off its wait queue and (possibly) unlock 
- *             the waitq.
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             thread locked
- *     Returns:
- *             with the thread still locked.
- */
-void
-wait_queue_pull_thread_locked(
-       wait_queue_t waitq,
-       thread_t thread,
-       boolean_t unlock)
-{
-
-       assert(thread->wait_queue == waitq);
-
-       remqueue((queue_entry_t)thread );
-       thread->wait_queue = WAIT_QUEUE_NULL;
-       thread->wait_event = NO_EVENT64;
-       thread->at_safe_point = FALSE;
-       if (unlock)
-               wait_queue_unlock(waitq);
-}
-
-
-/*
- *     Routine:        wait_queue_select64_thread
- *     Purpose:
- *             Look for a thread and remove it from the queues, if
- *             (and only if) the thread is waiting on the supplied
- *             <wait_queue, event> pair.
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             possibly recursive
- *     Returns:
- *             KERN_NOT_WAITING: Thread is not waiting here.
- *             KERN_SUCCESS: It was, and is now removed (returned locked)
- */
-static kern_return_t
-_wait_queue_select64_thread(
-       wait_queue_t wq,
-       event64_t event,
-       thread_t thread)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wqe_next;
-       kern_return_t res = KERN_NOT_WAITING;
-       queue_t q = &wq->wq_queue;
-
-       thread_lock(thread);
-
-       if ((thread->wait_queue == wq) && (thread->wait_event == event)) {
-               remqueue((queue_entry_t) thread);
-               thread->at_safe_point = FALSE;
-               thread->wait_event = NO_EVENT64;
-               thread->wait_queue = WAIT_QUEUE_NULL;
-               /* thread still locked */
-               return KERN_SUCCESS;
-       }
-
-       thread_unlock(thread);
-       
-       /*
-        * The wait_queue associated with the thread may be one of this
-        * wait queue's sets.  Go see.  If so, removing it from
-        * there is like removing it from here.
-        */
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wqe_next = (wait_queue_element_t)
-                              queue_next((queue_t) wq_element);
-
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
-                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
-                       wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_set_t set_queue = wql->wql_setqueue;
-
-                       wqs_lock(set_queue);
-                       if (! wait_queue_empty(&set_queue->wqs_wait_queue)) {
-                               res = _wait_queue_select64_thread(&set_queue->wqs_wait_queue,
-                                                               event,
-                                                               thread);
-                       }
-                       wqs_unlock(set_queue);
-                       if (res == KERN_SUCCESS)
-                               return KERN_SUCCESS;
-               }
-               wq_element = wqe_next;
-       }
-       return res;
-}
-
-
-/*
- *     Routine:        wait_queue_wakeup64_identity_locked
- *     Purpose:
- *             Select a single thread that is most-eligible to run and set
- *             set it running.  But return the thread locked.
- *
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             possibly recursive
- *     Returns:
- *             a pointer to the locked thread that was awakened
- */
-__private_extern__ thread_t
-wait_queue_wakeup64_identity_locked(
-       wait_queue_t wq,
-       event64_t event,
-       wait_result_t result,
-       boolean_t unlock)
-{
-       kern_return_t res;
-       thread_t thread;
-
-       assert(wait_queue_held(wq));
-
-       thread = _wait_queue_select64_one(wq, event);
-       if (unlock)
-               wait_queue_unlock(wq);
-
-       if (thread) {
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-       }
-       return thread;  /* still locked if not NULL */
-}
-
-
-/*
- *     Routine:        wait_queue_wakeup64_one_locked
- *     Purpose:
- *             Select a single thread that is most-eligible to run and set
- *             set it runnings.
- *
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             possibly recursive
- *     Returns:
- *             KERN_SUCCESS: It was, and is, now removed.
- *             KERN_NOT_WAITING - No thread was waiting <wq,event> pair
- */
-__private_extern__ kern_return_t
-wait_queue_wakeup64_one_locked(
-       wait_queue_t wq,
-       event64_t event,
-       wait_result_t result,
-       boolean_t unlock)
-{
-       thread_t thread;
-
-       assert(wait_queue_held(wq));
-
-       thread = _wait_queue_select64_one(wq, event);
-       if (unlock)
-               wait_queue_unlock(wq);
-
-       if (thread) {
-               kern_return_t res;
-               
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-               thread_unlock(thread);
-               return res;
-       }
-
-       return KERN_NOT_WAITING;
-}
-
-/*
- *     Routine:        wait_queue_wakeup_one
- *     Purpose:
- *             Wakeup the most appropriate thread that is in the specified
- *             wait queue for the specified event.
- *     Conditions:
- *             Nothing locked
- *     Returns:
- *             KERN_SUCCESS - Thread was woken up
- *             KERN_NOT_WAITING - No thread was waiting <wq,event> pair
- */
-kern_return_t
-wait_queue_wakeup_one(
-       wait_queue_t wq,
-       event_t event,
-       wait_result_t result,
-       int priority)
-{
-       thread_t thread;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       s = splsched();
-       wait_queue_lock(wq);
-       thread = _wait_queue_select64_one(wq, CAST_DOWN(event64_t,event));
-       wait_queue_unlock(wq);
-
-       if (thread) {
-               kern_return_t res;
-
-               if (thread->sched_pri < priority) {
-                       if (priority <= MAXPRI) {
-                               set_sched_pri(thread, priority);
-
-                               thread->was_promoted_on_wakeup = 1;
-                               thread->sched_flags |= TH_SFLAG_PROMOTED;
-                       }
-               }
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-               thread_unlock(thread);
-               splx(s);
-               return res;
-       }
-
-       splx(s);
-       return KERN_NOT_WAITING;
-}
-
-/*
- *     Routine:        wait_queue_wakeup64_one
- *     Purpose:
- *             Wakeup the most appropriate thread that is in the specified
- *             wait queue for the specified event.
- *     Conditions:
- *             Nothing locked
- *     Returns:
- *             KERN_SUCCESS - Thread was woken up
- *             KERN_NOT_WAITING - No thread was waiting <wq,event> pair
- */
-kern_return_t
-wait_queue_wakeup64_one(
-       wait_queue_t wq,
-       event64_t event,
-       wait_result_t result)
-{
-       thread_t thread;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-       s = splsched();
-       wait_queue_lock(wq);
-       thread = _wait_queue_select64_one(wq, event);
-       wait_queue_unlock(wq);
-
-       if (thread) {
-               kern_return_t res;
-
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-               thread_unlock(thread);
-               splx(s);
-               return res;
-       }
-
-       splx(s);
-       return KERN_NOT_WAITING;
-}
-
-
-/*
- *     Routine:        wait_queue_wakeup64_thread_locked
- *     Purpose:
- *             Wakeup the particular thread that was specified if and only
- *             it was in this wait queue (or one of it's set queues)
- *             and waiting on the specified event.
- *
- *             This is much safer than just removing the thread from
- *             whatever wait queue it happens to be on.  For instance, it
- *             may have already been awoken from the wait you intended to
- *             interrupt and waited on something else (like another
- *             semaphore).
- *     Conditions:
- *             at splsched
- *             wait queue already locked (may be released).
- *     Returns:
- *             KERN_SUCCESS - the thread was found waiting and awakened
- *             KERN_NOT_WAITING - the thread was not waiting here
- */
-__private_extern__ kern_return_t
-wait_queue_wakeup64_thread_locked(
-       wait_queue_t wq,
-       event64_t event,
-       thread_t thread,
-       wait_result_t result,
-       boolean_t unlock)
-{
-       kern_return_t res;
-
-       assert(wait_queue_held(wq));
-
-       /*
-        * See if the thread was still waiting there.  If so, it got
-        * dequeued and returned locked.
-        */
-       res = _wait_queue_select64_thread(wq, event, thread);
-       if (unlock)
-           wait_queue_unlock(wq);
-
-       if (res != KERN_SUCCESS)
-               return KERN_NOT_WAITING;
-
-       res = thread_go(thread, result);
-       assert(res == KERN_SUCCESS);
-       thread_unlock(thread);
-       return res;
-}
-
-/*
- *     Routine:        wait_queue_wakeup_thread
- *     Purpose:
- *             Wakeup the particular thread that was specified if and only
- *             it was in this wait queue (or one of it's set queues)
- *             and waiting on the specified event.
- *
- *             This is much safer than just removing the thread from
- *             whatever wait queue it happens to be on.  For instance, it
- *             may have already been awoken from the wait you intended to
- *             interrupt and waited on something else (like another
- *             semaphore).
- *     Conditions:
- *             nothing of interest locked
- *             we need to assume spl needs to be raised
- *     Returns:
- *             KERN_SUCCESS - the thread was found waiting and awakened
- *             KERN_NOT_WAITING - the thread was not waiting here
- */
-kern_return_t
-wait_queue_wakeup_thread(
-       wait_queue_t wq,
-       event_t event,
-       thread_t thread,
-       wait_result_t result)
-{
-       kern_return_t res;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       s = splsched();
-       wait_queue_lock(wq);
-       res = _wait_queue_select64_thread(wq, CAST_DOWN(event64_t,event), thread);
-       wait_queue_unlock(wq);
-
-       if (res == KERN_SUCCESS) {
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-               thread_unlock(thread);
-               splx(s);
-               return res;
-       }
-       splx(s);
-       return KERN_NOT_WAITING;
-}
-
-/*
- *     Routine:        wait_queue_wakeup64_thread
- *     Purpose:
- *             Wakeup the particular thread that was specified if and only
- *             it was in this wait queue (or one of it's set's queues)
- *             and waiting on the specified event.
- *
- *             This is much safer than just removing the thread from
- *             whatever wait queue it happens to be on.  For instance, it
- *             may have already been awoken from the wait you intended to
- *             interrupt and waited on something else (like another
- *             semaphore).
- *     Conditions:
- *             we need to assume spl needs to be raised
- *     Returns:
- *             KERN_SUCCESS - the thread was found waiting and awakened
- *             KERN_NOT_WAITING - the thread was not waiting here
- */
-kern_return_t
-wait_queue_wakeup64_thread(
-       wait_queue_t wq,
-       event64_t event,
-       thread_t thread,
-       wait_result_t result)
-{
-       kern_return_t res;
-       spl_t s;
-
-       if (!wait_queue_is_valid(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       s = splsched();
-       wait_queue_lock(wq);
-       res = _wait_queue_select64_thread(wq, event, thread);
-       wait_queue_unlock(wq);
-
-       if (res == KERN_SUCCESS) {
-               res = thread_go(thread, result);
-               assert(res == KERN_SUCCESS);
-
-               thread_unlock(thread);
-               splx(s);
-               return res;
-       }
-       splx(s);
-       return KERN_NOT_WAITING;
-}
diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h
deleted file mode 100644 (file)
index 4a38cee..0000000
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifdef KERNEL_PRIVATE
-
-#ifndef _KERN_WAIT_QUEUE_H_
-#define _KERN_WAIT_QUEUE_H_
-
-#include <mach/mach_types.h>
-#include <mach/sync_policy.h>
-#include <mach/kern_return.h>          /* for kern_return_t */
-
-#include <kern/kern_types.h>           /* for wait_queue_t */
-#include <kern/queue.h>
-#include <kern/assert.h>
-
-#include <sys/cdefs.h>
-
-#ifdef MACH_KERNEL_PRIVATE
-
-#include <kern/simple_lock.h>
-#include <mach/branch_predicates.h>
-
-#include <machine/cpu_number.h>
-#include <machine/machine_routines.h> /* machine_timeout_suspended() */
-
-/*
- * The event mask is of 60 bits on 64 bit architeture and 28 bits on
- * 32 bit architecture and so we calculate its size using sizeof(long).
- * If the bitfield for wq_type and wq_fifo is changed, then value of 
- * EVENT_MASK_BITS will also change. 
- */
-#define EVENT_MASK_BITS  ((sizeof(long) * 8) - 4)
-
-/*
- * Zero out the 4 msb of the event.
- */
-#define CAST_TO_EVENT_MASK(event)  (((CAST_DOWN(unsigned long, event)) << 4) >> 4)
-/*
- *     wait_queue_t
- *     This is the definition of the common event wait queue
- *     that the scheduler APIs understand.  It is used
- *     internally by the gerneralized event waiting mechanism
- *     (assert_wait), and also for items that maintain their
- *     own wait queues (such as ports and semaphores).
- *
- *     It is not published to other kernel components.  They
- *     can create wait queues by calling wait_queue_alloc.
- *
- *     NOTE:  Hardware locks are used to protect event wait
- *     queues since interrupt code is free to post events to
- *     them.
- */
-typedef struct wait_queue {
-    unsigned long int                    /* flags */
-    /* boolean_t */    wq_type:2,              /* only public field */
-                                       wq_fifo:1,              /* fifo wakeup policy? */
-                                       wq_prepost:1,   /* waitq supports prepost? set only */
-                                       wq_eventmask:EVENT_MASK_BITS; 
-    hw_lock_data_t     wq_interlock;   /* interlock */
-    queue_head_t       wq_queue;               /* queue of elements */
-} WaitQueue;
-
-/*
- *     wait_queue_set_t
- *     This is the common definition for a set wait queue.
- *     These can be linked as members/elements of multiple regular
- *     wait queues.  They have an additional set of linkages to
- *     identify the linkage structures that point to them.
- */
-typedef struct wait_queue_set {
-       WaitQueue               wqs_wait_queue; /* our wait queue */
-       queue_head_t    wqs_setlinks;   /* links from set perspective */
-       queue_head_t    wqs_preposts;   /* preposted links */
-} WaitQueueSet;
-
-#define wqs_type               wqs_wait_queue.wq_type
-#define wqs_fifo               wqs_wait_queue.wq_fifo
-#define wqs_prepost    wqs_wait_queue.wq_prepost
-#define wqs_queue              wqs_wait_queue.wq_queue
-
-/*
- *     wait_queue_element_t
- *     This structure describes the elements on an event wait
- *     queue.  It is the common first fields in a thread shuttle
- *     and wait_queue_link_t.  In that way, a wait queue can
- *     consist of both thread shuttle elements and links off of
- *     to other (set) wait queues.
- *
- *     WARNING: These fields correspond to fields in the thread
- *     shuttle (run queue links and run queue pointer). Any change in
- *     the layout here will have to be matched with a change there.
- */
-typedef struct wait_queue_element {
-       queue_chain_t   wqe_links;      /* link of elements on this queue */
-       void *                  wqe_type;       /* Identifies link vs. thread */
-       wait_queue_t    wqe_queue;      /* queue this element is on */
-} WaitQueueElement;
-
-typedef WaitQueueElement *wait_queue_element_t;
-
-/*
- *     wait_queue_link_t
- *     Specialized wait queue element type for linking set
- *     event waits queues onto a wait queue.  In this way, an event
- *     can be constructed so that any thread waiting on any number
- *     of associated wait queues can handle the event, while letting
- *     the thread only be linked on the single wait queue it blocked on.
- *
- *     One use: ports in multiple portsets.  Each thread is queued up
- *     on the portset that it specifically blocked on during a receive
- *     operation.  Each port's event queue links in all the portset
- *     event queues of which it is a member.  An IPC event post associated
- *     with that port may wake up any thread from any of those portsets,
- *     or one that was waiting locally on the port itself.
- */
-typedef struct _wait_queue_link {
-       WaitQueueElement                wql_element;    /* element on master */
-       queue_chain_t                   wql_setlinks;   /* element on set */
-       queue_chain_t                   wql_preposts;   /* element on set prepost list */
-    wait_queue_set_t           wql_setqueue;   /* set queue */
-} WaitQueueLink;
-
-#define wql_links wql_element.wqe_links
-#define wql_type  wql_element.wqe_type
-#define wql_queue wql_element.wqe_queue
-
-#define _WAIT_QUEUE_inited             0x2
-#define _WAIT_QUEUE_SET_inited         0x3
-
-#define wait_queue_is_queue(wq)        \
-       ((wq)->wq_type == _WAIT_QUEUE_inited)
-
-#define wait_queue_is_set(wqs) \
-       ((wqs)->wqs_type == _WAIT_QUEUE_SET_inited)
-
-#define wait_queue_is_valid(wq)        \
-       (((wq)->wq_type & ~1) == _WAIT_QUEUE_inited)
-
-#define wait_queue_empty(wq)   (queue_empty(&(wq)->wq_queue))
-
-#define wait_queue_held(wq)            (hw_lock_held(&(wq)->wq_interlock))
-#define wait_queue_lock_try(wq) (hw_lock_try(&(wq)->wq_interlock))
-
-/* For x86, the hardware timeout is in TSC units. */
-#if defined(i386) || defined(x86_64)
-#define        hwLockTimeOut LockTimeOutTSC
-#else
-#define        hwLockTimeOut LockTimeOut
-#endif
-/*
- * Double the standard lock timeout, because wait queues tend
- * to iterate over a number of threads - locking each.  If there is
- * a problem with a thread lock, it normally times out at the wait
- * queue level first, hiding the real problem.
- */
-
-static inline void wait_queue_lock(wait_queue_t wq) {
-       if (__improbable(hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2) == 0)) {
-               boolean_t wql_acquired = FALSE;
-
-               while (machine_timeout_suspended()) {
-#if    defined(__i386__) || defined(__x86_64__)
-/*
- * i386/x86_64 return with preemption disabled on a timeout for
- * diagnostic purposes.
- */
-                       mp_enable_preemption();
-#endif
-                       if ((wql_acquired = hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2)))
-                               break;
-               }
-               if (wql_acquired == FALSE)
-                       panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number());
-       }
-       assert(wait_queue_held(wq));
-}
-
-static inline void wait_queue_unlock(wait_queue_t wq) {
-       assert(wait_queue_held(wq));
-       hw_lock_unlock(&(wq)->wq_interlock);
-}
-
-#define wqs_lock(wqs)          wait_queue_lock(&(wqs)->wqs_wait_queue)
-#define wqs_unlock(wqs)                wait_queue_unlock(&(wqs)->wqs_wait_queue)
-#define wqs_lock_try(wqs)      wait_queue__try_lock(&(wqs)->wqs_wait_queue)
-#define wqs_is_preposted(wqs)  ((wqs)->wqs_prepost && !queue_empty(&(wqs)->wqs_preposts))
-
-#define wql_is_preposted(wql)  ((wql)->wql_preposts.next != NULL)
-#define wql_clear_prepost(wql)  ((wql)->wql_preposts.next = (wql)->wql_preposts.prev = NULL)
-
-#define wait_queue_assert_possible(thread) \
-                       ((thread)->wait_queue == WAIT_QUEUE_NULL)
-
-/* bootstrap interface - can allocate/link wait_queues and sets after calling this */
-__private_extern__ void wait_queue_bootstrap(void);
-
-/******** Decomposed interfaces (to build higher level constructs) ***********/
-
-/* assert intent to wait on a locked wait queue */
-__private_extern__ wait_result_t wait_queue_assert_wait64_locked(
-                       wait_queue_t wait_queue,
-                       event64_t wait_event,
-                       wait_interrupt_t interruptible,
-                       wait_timeout_urgency_t urgency,
-                       uint64_t deadline,
-                       uint64_t leeway,
-                       thread_t thread);
-
-/* pull a thread from its wait queue */
-__private_extern__ void wait_queue_pull_thread_locked(
-                       wait_queue_t wait_queue,
-                       thread_t thread,
-                       boolean_t unlock);
-
-/* wakeup all threads waiting for a particular event on locked queue */
-__private_extern__ kern_return_t wait_queue_wakeup64_all_locked(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       wait_result_t result,
-                       boolean_t unlock);
-
-/* wakeup one thread waiting for a particular event on locked queue */
-__private_extern__ kern_return_t wait_queue_wakeup64_one_locked(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       wait_result_t result,
-                       boolean_t unlock);
-
-/* return identity of a thread awakened for a particular <wait_queue,event> */
-__private_extern__ thread_t wait_queue_wakeup64_identity_locked(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       wait_result_t result,
-                       boolean_t unlock);
-
-/* wakeup thread iff its still waiting for a particular event on locked queue */
-__private_extern__ kern_return_t wait_queue_wakeup64_thread_locked(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       thread_t thread,
-                       wait_result_t result,
-                       boolean_t unlock);
-
-extern uint32_t num_wait_queues;
-extern struct wait_queue *wait_queues;
-/* The Jenkins "one at a time" hash.
- * TBD: There may be some value to unrolling here,
- * depending on the architecture.
- */
-static inline uint32_t wq_hash(char *key)
-{
-       uint32_t hash = 0;
-       size_t i, length = sizeof(char *);
-
-       for (i = 0; i < length; i++) {
-               hash += key[i];
-               hash += (hash << 10);
-               hash ^= (hash >> 6);
-       }
-       hash += (hash << 3);
-       hash ^= (hash >> 11);
-       hash += (hash << 15);
-
-       hash &= (num_wait_queues - 1);
-       return hash;
-}
-
-#define        wait_hash(event) wq_hash((char *)&event) 
-
-#endif /* MACH_KERNEL_PRIVATE */
-
-__BEGIN_DECLS
-
-/******** Semi-Public interfaces (not a part of a higher construct) ************/
-
-extern unsigned int wait_queue_set_size(void);
-extern unsigned int wait_queue_link_size(void);
-
-extern kern_return_t wait_queue_init(
-                       wait_queue_t wait_queue,
-                       int policy);
-
-extern wait_queue_set_t wait_queue_set_alloc(
-                       int policy);
-
-extern kern_return_t wait_queue_set_init(
-                       wait_queue_set_t set_queue,
-                       int policy);
-
-extern kern_return_t wait_queue_set_free(
-                       wait_queue_set_t set_queue);
-
-extern wait_queue_link_t wait_queue_link_alloc(
-                       int policy);
-
-extern kern_return_t wait_queue_link_free(
-                       wait_queue_link_t link_element);
-
-extern kern_return_t wait_queue_link(
-                       wait_queue_t wait_queue,
-                       wait_queue_set_t set_queue);
-
-extern kern_return_t wait_queue_link_noalloc(
-                       wait_queue_t wait_queue,
-                       wait_queue_set_t set_queue,
-                       wait_queue_link_t link);
-
-extern boolean_t wait_queue_member(
-                       wait_queue_t wait_queue,
-                       wait_queue_set_t set_queue);
-
-extern kern_return_t wait_queue_unlink(
-                       wait_queue_t wait_queue,
-                       wait_queue_set_t set_queue);
-
-extern kern_return_t wait_queue_unlink_all(
-                       wait_queue_t wait_queue);
-
-extern kern_return_t wait_queue_set_unlink_all(
-                       wait_queue_set_t set_queue);
-
-#ifdef XNU_KERNEL_PRIVATE
-extern kern_return_t wait_queue_set_unlink_one(
-                       wait_queue_set_t set_queue,
-                       wait_queue_link_t link);
-
-extern kern_return_t wait_queue_unlink_nofree(
-                       wait_queue_t wait_queue,
-                       wait_queue_set_t set_queue,
-                       wait_queue_link_t *wqlp);
-
-extern kern_return_t wait_queue_unlink_all_nofree(
-                       wait_queue_t wait_queue,
-                       queue_t links);
-
-extern kern_return_t wait_queue_set_unlink_all_nofree(
-                       wait_queue_set_t set_queue,
-                       queue_t links);
-
-extern wait_queue_link_t wait_queue_link_allocate(void);
-
-#endif /* XNU_KERNEL_PRIVATE */
-
-/* legacy API */
-kern_return_t wait_queue_sub_init(
-                       wait_queue_set_t set_queue,
-                       int policy);
-
-kern_return_t wait_queue_sub_clearrefs(
-                       wait_queue_set_t wq_set);
-
-extern kern_return_t wait_subqueue_unlink_all(
-                       wait_queue_set_t set_queue);
-
-extern wait_queue_t wait_queue_alloc(
-                       int policy);
-
-extern kern_return_t wait_queue_free(
-                       wait_queue_t wait_queue);
-
-/* assert intent to wait on <wait_queue,event64> pair */
-extern wait_result_t wait_queue_assert_wait64(
-                       wait_queue_t wait_queue,
-                       event64_t wait_event,
-                       wait_interrupt_t interruptible,
-                       uint64_t deadline);
-
-extern wait_result_t wait_queue_assert_wait64_with_leeway(
-                       wait_queue_t wait_queue,
-                       event64_t wait_event,
-                       wait_interrupt_t interruptible,
-                       wait_timeout_urgency_t urgency,
-                       uint64_t deadline,
-                       uint64_t leeway);
-
-/* wakeup the most appropriate thread waiting on <wait_queue,event64> pair */
-extern kern_return_t wait_queue_wakeup64_one(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       wait_result_t result);
-
-/* wakeup all the threads waiting on <wait_queue,event64> pair */
-extern kern_return_t wait_queue_wakeup64_all(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       wait_result_t result);
-
-/* wakeup a specified thread waiting iff waiting on <wait_queue,event64> pair */
-extern kern_return_t wait_queue_wakeup64_thread(
-                       wait_queue_t wait_queue,
-                       event64_t wake_event,
-                       thread_t thread,
-                       wait_result_t result);
-
-/*
- * Compatibility Wait Queue APIs based on pointer events instead of 64bit
- * integer events.
- */
-
-/* assert intent to wait on <wait_queue,event> pair */
-extern wait_result_t wait_queue_assert_wait(
-                       wait_queue_t wait_queue,
-                       event_t wait_event,
-                       wait_interrupt_t interruptible,
-                       uint64_t deadline);
-
-/* assert intent to wait on <wait_queue,event> pair */
-extern wait_result_t wait_queue_assert_wait_with_leeway(
-                       wait_queue_t wait_queue,
-                       event_t wait_event,
-                       wait_interrupt_t interruptible,
-                       wait_timeout_urgency_t urgency,
-                       uint64_t deadline,
-                       uint64_t leeway);
-
-/* wakeup the most appropriate thread waiting on <wait_queue,event> pair */
-extern kern_return_t wait_queue_wakeup_one(
-                       wait_queue_t wait_queue,
-                       event_t wake_event,
-                       wait_result_t result,
-                       int priority);
-
-/* wakeup all the threads waiting on <wait_queue,event> pair */
-extern kern_return_t wait_queue_wakeup_all(
-                       wait_queue_t wait_queue,
-                       event_t wake_event,
-                       wait_result_t result);
-
-/* wakeup a specified thread waiting iff waiting on <wait_queue,event> pair */
-extern kern_return_t wait_queue_wakeup_thread(
-                       wait_queue_t wait_queue,
-                       event_t wake_event,
-                       thread_t thread,
-                       wait_result_t result);
-
-__END_DECLS
-
-#endif /* _KERN_WAIT_QUEUE_H_ */
-
-#endif /* KERNEL_PRIVATE */
diff --git a/osfmk/kern/waitq.c b/osfmk/kern/waitq.c
new file mode 100644 (file)
index 0000000..be41d61
--- /dev/null
@@ -0,0 +1,5846 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_FREE_COPYRIGHT@
+ */
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+#include <kern/ast.h>
+#include <kern/kern_types.h>
+#include <kern/mach_param.h>
+#include <kern/queue.h>
+#include <kern/sched_prim.h>
+#include <kern/simple_lock.h>
+#include <kern/spl.h>
+#include <kern/waitq.h>
+#include <kern/zalloc.h>
+#include <libkern/OSAtomic.h>
+#include <mach/sync_policy.h>
+#include <vm/vm_kern.h>
+
+#include <sys/kdebug.h>
+
+#if CONFIG_WAITQ_DEBUG
+#define wqdbg(fmt,...) \
+       printf("WQ[%s]:  " fmt "\n", __func__, ## __VA_ARGS__)
+#else
+#define wqdbg(fmt,...) do { } while (0)
+#endif
+
+#ifdef WAITQ_VERBOSE_DEBUG
+#define wqdbg_v(fmt,...) \
+       printf("WQ[v:%s]:  " fmt "\n", __func__, ## __VA_ARGS__)
+#else
+#define wqdbg_v(fmt,...) do { } while (0)
+#endif
+
+#define wqinfo(fmt,...) \
+       printf("WQ[%s]: " fmt "\n", __func__,  ## __VA_ARGS__)
+
+#define wqerr(fmt,...) \
+       printf("WQ[%s] ERROR: " fmt "\n", __func__, ## __VA_ARGS__)
+
+
+/*
+ * un-comment the following lines to debug the link/prepost tables
+ * NOTE: this expands each element by ~40 bytes
+ */
+//#define CONFIG_WAITQ_LINK_STATS
+//#define CONFIG_WAITQ_PREPOST_STATS
+
+/*
+ * file-static functions / data
+ */
+static thread_t waitq_select_one_locked(struct waitq *waitq, event64_t event,
+                                       uint64_t *reserved_preposts,
+                                       int priority, spl_t *spl);
+
+static kern_return_t waitq_select_thread_locked(struct waitq *waitq,
+                                               event64_t event,
+                                               thread_t thread, spl_t *spl);
+
+#define WAITQ_SET_MAX (task_max * 3)
+static zone_t waitq_set_zone;
+
+
+#define        P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+
+
+#ifdef CONFIG_WAITQ_STATS
+static __inline__ void waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], int skip);
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Wait Queue Link/Prepost Table Implementation
+ *
+ * ---------------------------------------------------------------------- */
+#define DEFAULT_MIN_FREE_TABLE_ELEM    100
+static uint32_t g_min_free_table_elem;
+static uint32_t g_min_free_cache;
+
+static vm_size_t   g_wqt_max_tbl_size;
+static lck_grp_t   g_wqt_lck_grp;
+
+/* 1 prepost table, 1 setid link table */
+#define NUM_WQ_TABLES 2
+
+/* default VA space for waitq tables (zone allocated) */
+#define DEFAULT_MAX_TABLE_SIZE  P2ROUNDUP(8 * 1024 * 1024, PAGE_SIZE)
+
+struct wq_id {
+       union {
+               uint64_t id;
+               struct {
+                       /*
+                        * this bitfied is OK because we don't need to
+                        * enforce a particular memory layout
+                        */
+                       uint64_t idx:18, /* allows indexing up to 8MB of 32byte link objects */
+                                generation:46;
+               };
+       };
+};
+
+enum wqt_elem_type {
+       WQT_FREE     = 0,
+       WQT_ELEM     = 1,
+       WQT_LINK     = 2,
+       WQT_RESERVED = 3,
+};
+
+struct wqt_elem {
+       uint32_t wqt_bits;
+
+       uint32_t wqt_next_idx;
+
+       struct wq_id wqt_id;
+};
+
+/* this _must_ match the idx bitfield definition in struct wq_id */
+#define WQT_IDX_MAX           (0x3ffff)
+#if defined(DEVELOPMENT) || defined(DEBUG)
+/* global for lldb macros */
+uint64_t g_wqt_idx_max = WQT_IDX_MAX;
+#endif
+
+/* reference count bits should _always_ be the low-order bits */
+#define WQT_BITS_REFCNT_MASK  (0x1FFFFFFF)
+#define WQT_BITS_REFCNT_SHIFT (0)
+#define WQT_BITS_REFCNT       (WQT_BITS_REFCNT_MASK << WQT_BITS_REFCNT_SHIFT)
+
+#define WQT_BITS_TYPE_MASK    (0x3)
+#define WQT_BITS_TYPE_SHIFT   (29)
+#define WQT_BITS_TYPE         (WQT_BITS_TYPE_MASK << WQT_BITS_TYPE_SHIFT)
+
+#define WQT_BITS_VALID_MASK   (0x1)
+#define WQT_BITS_VALID_SHIFT  (31)
+#define WQT_BITS_VALID        (WQT_BITS_VALID_MASK << WQT_BITS_VALID_SHIFT)
+
+#define wqt_bits_refcnt(bits) \
+       (((bits) >> WQT_BITS_REFCNT_SHIFT) & WQT_BITS_REFCNT_MASK)
+
+#define wqt_bits_type(bits) \
+       (((bits) >> WQT_BITS_TYPE_SHIFT) & WQT_BITS_TYPE_MASK)
+
+#define wqt_bits_valid(bits) \
+       ((bits) & WQT_BITS_VALID)
+
+struct wq_table;
+typedef void (*wq_table_poison_func)(struct wq_table *, struct wqt_elem *);
+
+/*
+ * A table is a container for slabs of elements. Each slab is 'slab_sz' bytes
+ * and contains 'slab_sz/elem_sz' elements (of 'elem_sz' bytes each). These
+ * slabs allow the table to be broken up into potentially dis-contiguous VA
+ * space. On 32-bit platforms with large amounts of physical RAM, this is
+ * quite important. Keeping slabs like this slightly complicates retrieval of
+ * table elements, but not by much.
+ */
+struct wq_table {
+       struct wqt_elem **table;   /* an array of 'slabs' of elements */
+       struct wqt_elem **next_free_slab;
+       struct wq_id     free_list __attribute__((aligned(8)));
+
+       uint32_t         nelem;
+       uint32_t         used_elem;
+       uint32_t         elem_sz;  /* size of a table element (bytes) */
+
+       uint32_t         slab_sz;  /* size of a table 'slab' object (bytes) */
+       uint32_t         slab_shift;
+       uint32_t         slab_msk;
+       uint32_t         slab_elem;
+       zone_t           slab_zone;
+
+       wq_table_poison_func poison;
+
+       lck_mtx_t        lock;
+       uint32_t         state;
+
+#if CONFIG_WAITQ_STATS
+       uint32_t         nslabs;
+
+       uint64_t         nallocs;
+       uint64_t         nreallocs;
+       uint64_t         npreposts;
+       int64_t          nreservations;
+       uint64_t         nreserved_releases;
+       uint64_t         nspins;
+
+       uint64_t         max_used;
+       uint64_t         avg_used;
+       uint64_t         max_reservations;
+       uint64_t         avg_reservations;
+#endif
+} __attribute__((aligned(8)));
+
+#define wqt_elem_ofst_slab(slab, slab_msk, ofst) \
+       /* cast through 'void *' to avoid compiler alignment warning messages */ \
+       ((struct wqt_elem *)((void *)((uintptr_t)(slab) + ((ofst) & (slab_msk)))))
+
+#if defined(CONFIG_WAITQ_LINK_STATS) || defined(CONFIG_WAITQ_PREPOST_STATS)
+/* version that makes no assumption on waste within a slab */
+static inline struct wqt_elem *
+wqt_elem_idx(struct wq_table *table, uint32_t idx)
+{
+       int slab_idx = idx / table->slab_elem;
+       struct wqt_elem *slab = table->table[slab_idx];
+       if (!slab)
+               panic("Invalid index:%d slab:%d (NULL) for table:%p\n",
+                     idx, slab_idx, table);
+       assert(slab->wqt_id.idx <= idx && (slab->wqt_id.idx + table->slab_elem) > idx);
+       return wqt_elem_ofst_slab(slab, table->slab_msk, (idx - slab->wqt_id.idx) * table->elem_sz);
+}
+#else /* !CONFIG_WAITQ_[LINK|PREPOST]_STATS */
+/* verion that assumes 100% ultilization of slabs (no waste) */
+static inline struct wqt_elem *
+wqt_elem_idx(struct wq_table *table, uint32_t idx)
+{
+       uint32_t ofst = idx * table->elem_sz;
+       struct wqt_elem *slab = table->table[ofst >> table->slab_shift];
+       if (!slab)
+               panic("Invalid index:%d slab:%d (NULL) for table:%p\n",
+                     idx, (ofst >> table->slab_shift), table);
+       assert(slab->wqt_id.idx <= idx && (slab->wqt_id.idx + table->slab_elem) > idx);
+       return wqt_elem_ofst_slab(slab, table->slab_msk, ofst);
+}
+#endif /* !CONFIG_WAITQ_[LINK|PREPOST]_STATS */
+
+static int __assert_only wqt_elem_in_range(struct wqt_elem *elem,
+                                          struct wq_table *table)
+{
+       struct wqt_elem **base = table->table;
+       uintptr_t e = (uintptr_t)elem;
+       assert(base != NULL);
+       while (*base != NULL) {
+               uintptr_t b = (uintptr_t)(*base);
+               if (e >= b && e < b + table->slab_sz)
+                       return 1;
+               base++;
+               if ((uintptr_t)base >= (uintptr_t)table->table + PAGE_SIZE)
+                       return 0;
+       }
+       return 0;
+}
+
+static struct wqt_elem *wq_table_get_elem(struct wq_table *table, uint64_t id);
+static void wq_table_put_elem(struct wq_table *table, struct wqt_elem *elem);
+static int wqt_elem_list_link(struct wq_table *table, struct wqt_elem *parent,
+                             struct wqt_elem *child);
+
+static void wqt_elem_invalidate(struct wqt_elem *elem)
+{
+       uint32_t __assert_only old = OSBitAndAtomic(~WQT_BITS_VALID, &elem->wqt_bits);
+       OSMemoryBarrier();
+       assert(((wqt_bits_type(old) != WQT_RESERVED) && (old & WQT_BITS_VALID)) ||
+              ((wqt_bits_type(old) == WQT_RESERVED) && !(old & WQT_BITS_VALID)));
+}
+
+static void wqt_elem_mkvalid(struct wqt_elem *elem)
+{
+       uint32_t __assert_only old = OSBitOrAtomic(WQT_BITS_VALID, &elem->wqt_bits);
+       OSMemoryBarrier();
+       assert(!(old & WQT_BITS_VALID));
+}
+
+static void wqt_elem_set_type(struct wqt_elem *elem, int type)
+{
+       uint32_t old_bits, new_bits;
+       do {
+               old_bits = elem->wqt_bits;
+               new_bits = (old_bits & ~WQT_BITS_TYPE) |
+                          ((type & WQT_BITS_TYPE_MASK) << WQT_BITS_TYPE_SHIFT);
+       } while (OSCompareAndSwap(old_bits, new_bits, &elem->wqt_bits) == FALSE);
+       OSMemoryBarrier();
+}
+
+
+static void wq_table_bootstrap(void)
+{
+       uint32_t      tmp32 = 0;
+
+       g_min_free_cache = 0;
+       g_min_free_table_elem = DEFAULT_MIN_FREE_TABLE_ELEM;
+       if (PE_parse_boot_argn("wqt_min_free", &tmp32, sizeof(tmp32)) == TRUE)
+               g_min_free_table_elem = tmp32;
+       wqdbg("Minimum free table elements: %d", tmp32);
+
+       g_wqt_max_tbl_size = DEFAULT_MAX_TABLE_SIZE;
+       if (PE_parse_boot_argn("wqt_tbl_size", &tmp32, sizeof(tmp32)) == TRUE)
+               g_wqt_max_tbl_size = (vm_size_t)P2ROUNDUP(tmp32, PAGE_SIZE);
+
+       lck_grp_init(&g_wqt_lck_grp, "waitq_table_locks", LCK_GRP_ATTR_NULL);
+}
+
+static void wq_table_init(struct wq_table *table, const char *name,
+                         uint32_t max_tbl_elem, uint32_t elem_sz,
+                         wq_table_poison_func poison)
+{
+       kern_return_t kr;
+       uint32_t slab_sz, slab_shift, slab_msk, slab_elem;
+       zone_t slab_zone;
+       size_t max_tbl_sz;
+       struct wqt_elem *e, **base;
+
+       /*
+        * First, allocate a single page of memory to act as the base
+        * for the table's element slabs
+        */
+       kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&base,
+                                   PAGE_SIZE, 0, KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ);
+       if (kr != KERN_SUCCESS)
+               panic("Cannot initialize %s table: "
+                     "kernel_memory_allocate failed:%d\n", name, kr);
+       memset(base, 0, PAGE_SIZE);
+
+       /*
+        * Based on the maximum table size, calculate the slab size:
+        * we allocate 1 page of slab pointers for the table, and we need to
+        * index elements of 'elem_sz', this gives us the slab size based on
+        * the maximum size the table should grow.
+        */
+       max_tbl_sz = (max_tbl_elem * elem_sz);
+       max_tbl_sz = P2ROUNDUP(max_tbl_sz, PAGE_SIZE);
+
+       /* system maximum table size divided by number of slots in a page */
+       slab_sz = (uint32_t)(max_tbl_sz / (PAGE_SIZE / (sizeof(void *))));
+       if (slab_sz < PAGE_SIZE)
+               slab_sz = PAGE_SIZE;
+
+       /* make sure the slab size is a power of two */
+       slab_shift = 0;
+       slab_msk = ~0;
+       for (uint32_t i = 0; i < 31; i++) {
+               uint32_t bit = (1 << i);
+               if ((slab_sz & bit) == slab_sz) {
+                       slab_shift = i;
+                       slab_msk = 0;
+                       for (uint32_t j = 0; j < i; j++)
+                               slab_msk |= (1 << j);
+                       break;
+               }
+               slab_sz &= ~bit;
+       }
+       slab_elem = slab_sz / elem_sz;
+
+       /* initialize the table's slab zone (for table growth) */
+       wqdbg("Initializing %s zone: slab:%d (%d,0x%x) max:%ld",
+             name, slab_sz, slab_shift, slab_msk, max_tbl_sz);
+       slab_zone = zinit(slab_sz, max_tbl_sz, slab_sz, name);
+       assert(slab_zone != ZONE_NULL);
+
+       /* allocate the first slab and populate it */
+       base[0] = (struct wqt_elem *)zalloc(slab_zone);
+       if (base[0] == NULL)
+               panic("Can't allocate a %s table slab from zone:%p",
+                     name, slab_zone);
+
+       memset(base[0], 0, slab_sz);
+
+       /* setup the initial freelist */
+       wqdbg("initializing %d links (%d bytes each)...", slab_elem, elem_sz);
+       for (unsigned l = 0; l < slab_elem; l++) {
+               e = wqt_elem_ofst_slab(base[0], slab_msk, l * elem_sz);
+               e->wqt_id.idx = l;
+               /*
+                * setting generation to 0 ensures that a setid of 0 is
+                * invalid because the generation will be incremented before
+                * each element's allocation.
+                */
+               e->wqt_id.generation = 0;
+               e->wqt_next_idx = l + 1;
+       }
+
+       /* make sure the last free element points to a never-valid idx */
+       e = wqt_elem_ofst_slab(base[0], slab_msk, (slab_elem - 1) * elem_sz);
+       e->wqt_next_idx = WQT_IDX_MAX;
+
+       lck_mtx_init(&table->lock, &g_wqt_lck_grp, LCK_ATTR_NULL);
+
+       table->slab_sz = slab_sz;
+       table->slab_shift = slab_shift;
+       table->slab_msk = slab_msk;
+       table->slab_elem = slab_elem;
+       table->slab_zone = slab_zone;
+
+       table->elem_sz = elem_sz;
+       table->nelem = slab_elem;
+       table->used_elem = 0;
+       table->elem_sz = elem_sz;
+       table->poison = poison;
+
+       table->table = base;
+       table->next_free_slab = &base[1];
+       table->free_list.id = base[0]->wqt_id.id;
+
+#if CONFIG_WAITQ_STATS
+       table->nslabs = 1;
+       table->nallocs = 0;
+       table->nreallocs = 0;
+       table->npreposts = 0;
+       table->nreservations = 0;
+       table->nreserved_releases = 0;
+
+       table->max_used = 0;
+       table->avg_used = 0;
+       table->max_reservations = 0;
+       table->avg_reservations = 0;
+#endif
+}
+
+/**
+ * grow a waitq table by adding another 'slab' of table elements
+ *
+ * Conditions:
+ *     table mutex is unlocked
+ *     calling thread can block
+ */
+static void wq_table_grow(struct wq_table *table, uint32_t min_free)
+{
+       struct wqt_elem *slab, **slot;
+       struct wqt_elem *e = NULL, *first_new_elem, *last_new_elem;
+       struct wq_id free_id;
+       uint32_t free_elem;
+
+       assert(get_preemption_level() == 0);
+       assert(table && table->slab_zone);
+
+       lck_mtx_lock(&table->lock);
+
+       free_elem = table->nelem - table->used_elem;
+
+       /*
+        * If the caller just wanted to ensure a minimum number of elements,
+        * do that (and don't just blindly grow the table). Also, don't grow
+        * the table unnecessarily - we could have been beaten by a higher
+        * priority thread who acquired the lock and grew the table before we
+        * got here.
+        */
+       if (free_elem > min_free) {
+               lck_mtx_unlock(&table->lock);
+               return;
+       }
+
+       /* we are now committed to table growth */
+       wqdbg_v("BEGIN");
+
+       if (table->next_free_slab == NULL) {
+               /*
+                * before we panic, check one more time to see if any other
+                * threads have free'd from space in the table.
+                */
+               if ((table->nelem - table->used_elem) > 0) {
+                       /* there's at least 1 free element: don't panic yet */
+                       lck_mtx_unlock(&table->lock);
+                       return;
+               }
+               panic("No more room to grow table: %p (nelem: %d, used: %d)",
+                     table, table->nelem, table->used_elem);
+       }
+       slot = table->next_free_slab;
+       table->next_free_slab++;
+       if ((uintptr_t)table->next_free_slab >= (uintptr_t)table->table + PAGE_SIZE)
+               table->next_free_slab = NULL;
+
+       assert(*slot == NULL);
+
+       /* allocate another slab */
+       slab = (struct wqt_elem *)zalloc(table->slab_zone);
+       if (slab == NULL)
+               panic("Can't allocate a %s table (%p) slab from zone:%p",
+                     table->slab_zone->zone_name, table, table->slab_zone);
+
+       memset(slab, 0, table->slab_sz);
+
+       /* put the new elements into a freelist */
+       wqdbg_v("    init %d new links...", table->slab_elem);
+       for (unsigned l = 0; l < table->slab_elem; l++) {
+               uint32_t idx = l + table->nelem;
+               if (idx >= (WQT_IDX_MAX - 1))
+                       break; /* the last element of the last slab */
+               e = wqt_elem_ofst_slab(slab, table->slab_msk, l * table->elem_sz);
+               e->wqt_id.idx = idx;
+               e->wqt_next_idx = idx + 1;
+       }
+       last_new_elem = e;
+       assert(last_new_elem != NULL);
+
+       first_new_elem = wqt_elem_ofst_slab(slab, table->slab_msk, 0);
+
+       /* update table book keeping, and atomically swap the freelist head */
+       *slot = slab;
+       if (table->nelem + table->slab_elem >= WQT_IDX_MAX)
+               table->nelem = WQT_IDX_MAX - 1;
+       else
+               table->nelem += table->slab_elem;
+
+#if CONFIG_WAITQ_STATS
+       table->nslabs += 1;
+#endif
+
+       /*
+        * The atomic swap of the free list head marks the end of table
+        * growth. Incoming requests may now use the newly allocated slab
+        * of table elements
+        */
+       free_id = table->free_list;
+       /* connect the existing free list to the end of the new free list */
+       last_new_elem->wqt_next_idx = free_id.idx;
+       while (OSCompareAndSwap64(free_id.id, first_new_elem->wqt_id.id,
+                                 &table->free_list.id) == FALSE) {
+               OSMemoryBarrier();
+               free_id = table->free_list;
+               last_new_elem->wqt_next_idx = free_id.idx;
+       }
+       OSMemoryBarrier();
+
+       lck_mtx_unlock(&table->lock);
+
+       return;
+}
+
+static __attribute__((noinline))
+struct wqt_elem *wq_table_alloc_elem(struct wq_table *table, int type, int nelem)
+{
+       int nspins = 0, ntries = 0, nalloc = 0;
+       uint32_t table_size;
+       struct wqt_elem *elem = NULL;
+       struct wq_id free_id, next_id;
+
+       static const int max_retries = 500;
+
+       if (type != WQT_ELEM && type != WQT_LINK && type != WQT_RESERVED)
+               panic("wq_table_aloc of invalid elem type:%d from table @%p",
+                     type, table);
+
+       assert(nelem > 0);
+       elem = NULL;
+
+try_again:
+       if (ntries++ > max_retries) {
+               struct wqt_elem *tmp;
+               if (table->used_elem + nelem >= table_size)
+                       panic("No more room to grow table: 0x%p size:%d, used:%d, requested elem:%d",
+                             table, table_size, table->used_elem, nelem);
+               if (nelem == 1)
+                       panic("Too many alloc retries: %d, table:%p, type:%d, nelem:%d",
+                             ntries, table, type, nelem);
+               /* don't panic: try allocating one-at-a-time */
+               while (nelem > 0) {
+                       tmp = wq_table_alloc_elem(table, type, 1);
+                       if (elem)
+                               wqt_elem_list_link(table, tmp, elem);
+                       elem = tmp;
+                       --nelem;
+               }
+               assert(elem != NULL);
+               return elem;
+       }
+
+       nalloc = 0;
+       table_size = table->nelem;
+
+       if (table->used_elem + nelem >= table_size) {
+               if (get_preemption_level() != 0) {
+#if CONFIG_WAITQ_STATS
+                       table->nspins += 1;
+#endif
+                       /*
+                        * We may have just raced with table growth: check
+                        * again to make sure there really isn't any space.
+                        */
+                       if (++nspins > 4)
+                               panic("Can't grow table %p with preemption"
+                                     " disabled!", table);
+                       delay(1);
+                       goto try_again;
+               }
+               wq_table_grow(table, nelem);
+               goto try_again;
+       }
+
+       /* read this value only once before the CAS */
+       free_id = table->free_list;
+       if (free_id.idx >= table_size)
+               goto try_again;
+
+       /*
+        * Find the item on the free list which will become the new free list
+        * head, but be careful not to modify any memory (read only)!  Other
+        * threads can alter table state at any time up until the CAS.  We
+        * don't modify any memory until we've successfully swapped out the
+        * free list head with the one we've investigated.
+        */
+       for (struct wqt_elem *next_elem = wqt_elem_idx(table, free_id.idx);
+            nalloc < nelem;
+            nalloc++) {
+               elem = next_elem;
+               next_id.generation = 0;
+               next_id.idx = next_elem->wqt_next_idx;
+               if (next_id.idx < table->nelem) {
+                       next_elem = wqt_elem_idx(table, next_id.idx);
+                       next_id.id = next_elem->wqt_id.id;
+               } else {
+                       goto try_again;
+               }
+       }
+       /* 'elem' points to the last element being allocated */
+
+       if (OSCompareAndSwap64(free_id.id, next_id.id,
+                              &table->free_list.id) == FALSE)
+               goto try_again;
+
+       /* load barrier */
+       OSMemoryBarrier();
+
+       /*
+        * After the CAS, we know that we own free_id, and it points to a
+        * valid table entry (checked above). Grab the table pointer and
+        * reset some values.
+        */
+       OSAddAtomic(nelem, &table->used_elem);
+
+       /* end the list of allocated elements */
+       elem->wqt_next_idx = WQT_IDX_MAX;
+       /* reset 'elem' to point to the first allocated element */
+       elem = wqt_elem_idx(table, free_id.idx);
+
+       /*
+        * Update the generation count, and return the element(s)
+        * with a single reference (and no valid bit). If the
+        * caller immediately calls _put() on any element, then
+        * it will be released back to the free list. If the caller
+        * subsequently marks the element as valid, then the put
+        * will simply drop the reference.
+        */
+       for (struct wqt_elem *tmp = elem; ; ) {
+               assert(!wqt_bits_valid(tmp->wqt_bits) &&
+                      (wqt_bits_refcnt(tmp->wqt_bits) == 0));
+               --nalloc;
+               tmp->wqt_id.generation += 1;
+               tmp->wqt_bits = 1;
+               wqt_elem_set_type(tmp, type);
+               if (tmp->wqt_next_idx == WQT_IDX_MAX)
+                       break;
+               assert(tmp->wqt_next_idx != WQT_IDX_MAX);
+               tmp = wqt_elem_idx(table, tmp->wqt_next_idx);
+       }
+       assert(nalloc == 0);
+
+#if CONFIG_WAITQ_STATS
+       uint64_t nreservations;
+       table->nallocs += nelem;
+       if (type == WQT_RESERVED)
+               OSIncrementAtomic64(&table->nreservations);
+       nreservations = table->nreservations;
+       if (table->used_elem > table->max_used)
+               table->max_used = table->used_elem;
+       if (nreservations > table->max_reservations)
+               table->max_reservations = nreservations;
+       table->avg_used = (table->avg_used + table->used_elem) / 2;
+       table->avg_reservations = (table->avg_reservations + nreservations) / 2;
+#endif
+
+       return elem;
+}
+
+static void wq_table_realloc_elem(struct wq_table *table, struct wqt_elem *elem, int type)
+{
+       (void)table;
+       assert(wqt_elem_in_range(elem, table) &&
+              !wqt_bits_valid(elem->wqt_bits));
+
+#if CONFIG_WAITQ_STATS
+       table->nreallocs += 1;
+       if (wqt_bits_type(elem->wqt_bits) == WQT_RESERVED && type != WQT_RESERVED) {
+               /*
+                * This isn't under any lock, so we'll clamp it.
+                * the stats are meant to be informative, not perfectly
+                * accurate
+                */
+               OSDecrementAtomic64(&table->nreservations);
+       }
+       table->avg_reservations = (table->avg_reservations + table->nreservations) / 2;
+#endif
+
+       /*
+        * Return the same element with a new generation count, and a
+        * (potentially) new type. Don't touch the refcount: the caller
+        * is responsible for getting that (and the valid bit) correct.
+        */
+       elem->wqt_id.generation += 1;
+       elem->wqt_next_idx = WQT_IDX_MAX;
+       wqt_elem_set_type(elem, type);
+
+       return;
+}
+
+static void wq_table_free_elem(struct wq_table *table, struct wqt_elem *elem)
+{
+       struct wq_id next_id;
+
+       assert(wqt_elem_in_range(elem, table) &&
+              !wqt_bits_valid(elem->wqt_bits) &&
+              (wqt_bits_refcnt(elem->wqt_bits) == 0));
+
+       OSDecrementAtomic(&table->used_elem);
+
+#if CONFIG_WAITQ_STATS
+       table->avg_used = (table->avg_used + table->used_elem) / 2;
+       if (wqt_bits_type(elem->wqt_bits) == WQT_RESERVED)
+               OSDecrementAtomic64(&table->nreservations);
+       table->avg_reservations = (table->avg_reservations + table->nreservations) / 2;
+#endif
+
+       elem->wqt_bits = 0;
+
+       if (table->poison)
+               (table->poison)(table, elem);
+
+again:
+       next_id = table->free_list;
+       if (next_id.idx >= table->nelem)
+               elem->wqt_next_idx = WQT_IDX_MAX;
+       else
+               elem->wqt_next_idx = next_id.idx;
+
+       /* store barrier */
+       OSMemoryBarrier();
+       if (OSCompareAndSwap64(next_id.id, elem->wqt_id.id,
+                              &table->free_list.id) == FALSE)
+               goto again;
+}
+
+/* get a reference to a table element identified by 'id' */
+static struct wqt_elem *wq_table_get_elem(struct wq_table *table, uint64_t id)
+{
+       struct wqt_elem *elem;
+       uint32_t idx, bits, new_bits;
+
+       /*
+        * Here we have a reference to the table which is guaranteed to remain
+        * valid until we drop the reference
+        */
+
+       idx = ((struct wq_id *)&id)->idx;
+
+       if (idx >= table->nelem)
+               panic("id:0x%llx : idx:%d > %d", id, idx, table->nelem);
+
+       elem = wqt_elem_idx(table, idx);
+
+       /* verify the validity by taking a reference on the table object */
+       bits = elem->wqt_bits;
+       if (!wqt_bits_valid(bits))
+               return NULL;
+
+       /*
+        * do a pre-verify on the element ID to potentially
+        * avoid 2 compare-and-swaps
+        */
+       if (elem->wqt_id.id != id)
+               return NULL;
+
+       new_bits = bits + 1;
+
+       /* check for overflow */
+       assert(wqt_bits_refcnt(new_bits) > 0);
+
+       while (OSCompareAndSwap(bits, new_bits, &elem->wqt_bits) == FALSE) {
+               /*
+                * either the element became invalid,
+                * or someone else grabbed/removed a reference.
+                */
+               bits = elem->wqt_bits;
+               if (!wqt_bits_valid(bits)) {
+                       /* don't return invalid elements */
+                       return NULL;
+               }
+               new_bits = bits + 1;
+               assert(wqt_bits_refcnt(new_bits) > 0);
+       }
+
+       /* load barrier */
+       OSMemoryBarrier();
+
+       /* check to see that our reference is to the same generation! */
+       if (elem->wqt_id.id != id) {
+               /*
+               wqdbg("ID:0x%llx table generation (%d) != %d",
+                     id, elem->wqt_id.generation,
+                     ((struct wq_id *)&id)->generation);
+                */
+               wq_table_put_elem(table, elem);
+               return NULL;
+       }
+
+       /* We now have a reference on a valid object */
+       return elem;
+}
+
+/* release a ref to table element - puts it back on free list as appropriate */
+static void wq_table_put_elem(struct wq_table *table, struct wqt_elem *elem)
+{
+       uint32_t bits, new_bits;
+
+       assert(wqt_elem_in_range(elem, table));
+
+       bits = elem->wqt_bits;
+       new_bits = bits - 1;
+
+       /* check for underflow */
+       assert(wqt_bits_refcnt(new_bits) < WQT_BITS_REFCNT_MASK);
+
+       while (OSCompareAndSwap(bits, new_bits, &elem->wqt_bits) == FALSE) {
+               bits = elem->wqt_bits;
+               new_bits = bits - 1;
+               /* catch underflow */
+               assert(wqt_bits_refcnt(new_bits) < WQT_BITS_REFCNT_MASK);
+       }
+
+       /* load barrier */
+       OSMemoryBarrier();
+
+       /*
+        * if this was the last reference, and it was marked as invalid,
+        * then we can add this link object back to the free list
+        */
+       if (!wqt_bits_valid(new_bits) && (wqt_bits_refcnt(new_bits) == 0))
+               wq_table_free_elem(table, elem);
+
+       return;
+}
+
+
+/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ *
+ * API: wqt_elem_list_...
+ *
+ * Reuse the free list linkage member, 'wqt_next_idx' of a table element
+ * in a slightly more generic singly-linked list. All members of this
+ * list have been allocated from a table, but have not been made valid.
+ *
+ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/
+
+/* link parent->child */
+static int wqt_elem_list_link(struct wq_table *table, struct wqt_elem *parent, struct wqt_elem *child)
+{
+       int nelem = 1;
+
+       assert(wqt_elem_in_range(parent, table));
+
+       /* find the end of the parent's list */
+       while (parent->wqt_next_idx != WQT_IDX_MAX) {
+               assert(parent->wqt_next_idx < table->nelem);
+               parent = wqt_elem_idx(table, parent->wqt_next_idx);
+               nelem++;
+       }
+
+       if (child) {
+               assert(wqt_elem_in_range(child, table));
+               parent->wqt_next_idx = child->wqt_id.idx;
+       }
+
+       return nelem;
+}
+
+static struct wqt_elem *wqt_elem_list_next(struct wq_table *table, struct wqt_elem *head)
+{
+       struct wqt_elem *elem;
+
+       if (!head)
+               return NULL;
+       if (head->wqt_next_idx >= table->nelem)
+               return NULL;
+
+       elem = wqt_elem_idx(table, head->wqt_next_idx);
+       assert(wqt_elem_in_range(elem, table));
+
+       return elem;
+}
+
+/*
+ * Obtain a pointer to the first element of a list.  Don't take an extra
+ * reference on the object - the list implicitly holds that reference.
+ *
+ * This function is used to convert the head of a singly-linked list
+ * to a real wqt_elem object.
+ */
+static struct wqt_elem *wqt_elem_list_first(struct wq_table *table, uint64_t id)
+{
+       uint32_t idx;
+       struct wqt_elem *elem = NULL;
+
+       if (id == 0)
+               return NULL;
+
+       idx = ((struct wq_id *)&id)->idx;
+
+       if (idx > table->nelem)
+               panic("Invalid element for id:0x%llx", id);
+       elem = wqt_elem_idx(table, idx);
+
+       /* invalid element: reserved ID was probably already reallocated */
+       if (elem->wqt_id.id != id)
+               return NULL;
+
+       /* the returned element should _not_ be marked valid! */
+       if (wqt_bits_valid(elem->wqt_bits) ||
+           wqt_bits_type(elem->wqt_bits) != WQT_RESERVED ||
+           wqt_bits_refcnt(elem->wqt_bits) != 1) {
+               panic("Valid/unreserved element %p (0x%x) in reserved list",
+                     elem, elem->wqt_bits);
+       }
+
+       return elem;
+}
+
+static void wqt_elem_reset_next(struct wq_table *table, struct wqt_elem *wqp)
+{
+       (void)table;
+
+       if (!wqp)
+               return;
+       assert(wqt_elem_in_range(wqp, table));
+
+       wqp->wqt_next_idx = WQT_IDX_MAX;
+}
+
+/*
+ * Pop an item off the list.
+ * New list head returned in *id, caller responsible for reference on returned
+ * object. We do a realloc here to reset the type of the object, but still
+ * leave it invalid.
+ */
+static struct wqt_elem *wqt_elem_list_pop(struct wq_table *table, uint64_t *id, int type)
+{
+       struct wqt_elem *first, *next;
+
+       if (!id || *id == 0)
+               return NULL;
+
+       /* pop an item off the reserved stack */
+
+       first = wqt_elem_list_first(table, *id);
+       if (!first) {
+               *id = 0;
+               return NULL;
+       }
+
+       next = wqt_elem_list_next(table, first);
+       if (next)
+               *id = next->wqt_id.id;
+       else
+               *id = 0;
+
+       wq_table_realloc_elem(table, first, type);
+
+       return first;
+}
+
+/*
+ * Free an entire list of linked/reserved elements
+ */
+static int wqt_elem_list_release(struct wq_table *table,
+                                struct wqt_elem *head,
+                                int __assert_only type)
+{
+       struct wqt_elem *elem;
+       struct wq_id free_id;
+       int nelem = 0;
+
+       if (!head)
+               return 0;
+
+       for (elem = head; ; ) {
+               assert(wqt_elem_in_range(elem, table));
+               assert(!wqt_bits_valid(elem->wqt_bits) && (wqt_bits_refcnt(elem->wqt_bits) == 1));
+               assert(wqt_bits_type(elem->wqt_bits) == type);
+
+               nelem++;
+               elem->wqt_bits = 0;
+               if (table->poison)
+                       (table->poison)(table, elem);
+
+               if (elem->wqt_next_idx == WQT_IDX_MAX)
+                       break;
+               assert(elem->wqt_next_idx < table->nelem);
+               elem = wqt_elem_idx(table, elem->wqt_next_idx);
+       }
+
+       /*
+        * 'elem' now points to the end of our list, and 'head' points to the
+        * beginning. We want to atomically swap the free list pointer with
+        * the 'head' and ensure that 'elem' points to the previous free list
+        * head.
+        */
+
+again:
+       free_id = table->free_list;
+       if (free_id.idx >= table->nelem)
+               elem->wqt_next_idx = WQT_IDX_MAX;
+       else
+               elem->wqt_next_idx = free_id.idx;
+
+       /* store barrier */
+       OSMemoryBarrier();
+       if (OSCompareAndSwap64(free_id.id, head->wqt_id.id,
+                              &table->free_list.id) == FALSE)
+               goto again;
+
+       OSAddAtomic(-nelem, &table->used_elem);
+       return nelem;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * SetID Link Table Implementation
+ *
+ * ---------------------------------------------------------------------- */
+static struct wq_table g_linktable;
+
+enum setid_link_type {
+       SLT_ALL     = -1,
+       SLT_FREE    = WQT_FREE,
+       SLT_WQS     = WQT_ELEM,
+       SLT_LINK    = WQT_LINK,
+};
+
+struct setid_link {
+       struct wqt_elem wqte;
+
+       union {
+               /* wqt_type == SLT_WQS (WQT_ELEM) */
+               struct {
+                       struct waitq_set *sl_set;
+                       /* uint64_t          sl_prepost_id; */
+               } sl_wqs;
+
+               /* wqt_type == SLT_LINK (WQT_LINK) */
+               struct {
+                       uint64_t          sl_left_setid;
+                       uint64_t          sl_right_setid;
+               } sl_link;
+       };
+#ifdef CONFIG_WAITQ_LINK_STATS
+       thread_t  sl_alloc_th;
+       task_t    sl_alloc_task;
+       uintptr_t sl_alloc_bt[NWAITQ_BTFRAMES];
+       uint64_t  sl_alloc_ts;
+       uintptr_t sl_invalidate_bt[NWAITQ_BTFRAMES];
+       uint64_t  sl_invalidate_ts;
+       uintptr_t sl_mkvalid_bt[NWAITQ_BTFRAMES];
+       uint64_t  sl_mkvalid_ts;
+       uint64_t  sl_free_ts;
+#endif
+};
+#if !defined(CONFIG_WAITQ_LINK_STATS)
+_Static_assert((sizeof(struct setid_link) & (sizeof(struct setid_link) - 1)) == 0,
+              "setid_link struct must be a power of two!");
+#endif
+
+#define sl_refcnt(link) \
+       (wqt_bits_refcnt((link)->wqte.wqt_bits))
+
+#define sl_type(link) \
+       (wqt_bits_type((link)->wqte.wqt_bits))
+
+#define sl_set_valid(link) \
+       do { \
+               wqt_elem_mkvalid(&(link)->wqte); \
+               lt_do_mkvalid_stats(&(link)->wqte); \
+       } while (0)
+
+#define sl_is_valid(link) \
+       wqt_bits_valid((link)->wqte.wqt_bits)
+
+#define sl_set_id wqte.wqt_id
+
+#define SLT_WQS_POISON         ((void *)(0xf00df00d))
+#define SLT_LINK_POISON        (0x0bad0badffffffffull)
+
+static void lt_poison(struct wq_table *table, struct wqt_elem *elem)
+{
+       struct setid_link *sl_link = (struct setid_link *)elem;
+       (void)table;
+
+       switch (sl_type(sl_link)) {
+       case SLT_WQS:
+               sl_link->sl_wqs.sl_set = SLT_WQS_POISON;
+               break;
+       case SLT_LINK:
+               sl_link->sl_link.sl_left_setid = SLT_LINK_POISON;
+               sl_link->sl_link.sl_right_setid = SLT_LINK_POISON;
+               break;
+       default:
+               break;
+       }
+#ifdef CONFIG_WAITQ_LINK_STATS
+       memset(sl_link->sl_alloc_bt, 0, sizeof(sl_link->sl_alloc_bt));
+       sl_link->sl_alloc_ts = 0;
+       memset(sl_link->sl_mkvalid_bt, 0, sizeof(sl_link->sl_mkvalid_bt));
+       sl_link->sl_mkvalid_ts = 0;
+
+       sl_link->sl_alloc_th = THREAD_NULL;
+       /* leave the sl_alloc_task in place for debugging */
+
+       sl_link->sl_free_ts = mach_absolute_time();
+#endif
+}
+
+#ifdef CONFIG_WAITQ_LINK_STATS
+static __inline__ void lt_do_alloc_stats(struct wqt_elem *elem)
+{
+       if (elem) {
+               struct setid_link *link = (struct setid_link *)elem;
+               memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt));
+               waitq_grab_backtrace(link->sl_alloc_bt, 0);
+               link->sl_alloc_th = current_thread();
+               link->sl_alloc_task = current_task();
+
+               assert(link->sl_alloc_ts == 0);
+               link->sl_alloc_ts = mach_absolute_time();
+
+               memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt));
+               link->sl_invalidate_ts = 0;
+       }
+}
+
+static __inline__ void lt_do_invalidate_stats(struct wqt_elem *elem)
+{
+       struct setid_link *link = (struct setid_link *)elem;
+
+       if (!elem)
+               return;
+
+       assert(link->sl_mkvalid_ts > 0);
+
+       memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt));
+       link->sl_invalidate_ts = mach_absolute_time();
+       waitq_grab_backtrace(link->sl_invalidate_bt, 0);
+}
+
+static __inline__ void lt_do_mkvalid_stats(struct wqt_elem *elem)
+{
+       struct setid_link *link = (struct setid_link *)elem;
+
+       if (!elem)
+               return;
+
+       memset(link->sl_mkvalid_bt, 0, sizeof(link->sl_mkvalid_bt));
+       link->sl_mkvalid_ts = mach_absolute_time();
+       waitq_grab_backtrace(link->sl_mkvalid_bt, 0);
+}
+#else
+#define lt_do_alloc_stats(e)
+#define lt_do_invalidate_stats(e)
+#define lt_do_mkvalid_stats(e)
+#endif /* CONFIG_WAITQ_LINK_STATS */
+
+static void lt_init(void)
+{
+       uint32_t tablesz = 0, max_links = 0;
+
+       if (PE_parse_boot_argn("wql_tsize", &tablesz, sizeof(tablesz)) != TRUE)
+               tablesz = (uint32_t)g_wqt_max_tbl_size;
+
+       tablesz = P2ROUNDUP(tablesz, PAGE_SIZE);
+       max_links = tablesz / sizeof(struct setid_link);
+       assert(max_links > 0 && tablesz > 0);
+
+       /* we have a restricted index range */
+       if (max_links > (WQT_IDX_MAX + 1))
+               max_links = WQT_IDX_MAX + 1;
+
+       wqinfo("init linktable with max:%d elements (%d bytes)",
+              max_links, tablesz);
+       wq_table_init(&g_linktable, "wqslab.links", max_links,
+                     sizeof(struct setid_link), lt_poison);
+}
+
+static void lt_ensure_free_space(void)
+{
+       if (g_linktable.nelem - g_linktable.used_elem < g_min_free_table_elem) {
+               /*
+                * we don't hold locks on these values, so check for underflow
+                */
+               if (g_linktable.used_elem <= g_linktable.nelem) {
+                       wqdbg_v("Forcing table growth: nelem=%d, used=%d, min_free=%d",
+                               g_linktable.nelem, g_linktable.used_elem,
+                               g_min_free_table_elem);
+                       wq_table_grow(&g_linktable, g_min_free_table_elem);
+               }
+       }
+}
+
+static struct setid_link *lt_alloc_link(int type)
+{
+       struct wqt_elem *elem;
+
+       elem = wq_table_alloc_elem(&g_linktable, type, 1);
+       lt_do_alloc_stats(elem);
+       return (struct setid_link *)elem;
+}
+
+static void lt_realloc_link(struct setid_link *link, int type)
+{
+       wq_table_realloc_elem(&g_linktable, &link->wqte, type);
+#ifdef CONFIG_WAITQ_LINK_STATS
+       memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt));
+       link->sl_alloc_ts = 0;
+       lt_do_alloc_stats(&link->wqte);
+
+       memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt));
+       link->sl_invalidate_ts = 0;
+#endif
+}
+
+static void lt_invalidate(struct setid_link *link)
+{
+       wqt_elem_invalidate(&link->wqte);
+       lt_do_invalidate_stats(&link->wqte);
+}
+
+static struct setid_link *lt_get_link(uint64_t setid)
+{
+       struct wqt_elem *elem;
+
+       elem = wq_table_get_elem(&g_linktable, setid);
+       return (struct setid_link *)elem;
+}
+
+static void lt_put_link(struct setid_link *link)
+{
+       if (!link)
+               return;
+       wq_table_put_elem(&g_linktable, (struct wqt_elem *)link);
+}
+
+static struct setid_link *lt_get_reserved(uint64_t setid, int type)
+{
+       struct wqt_elem *elem;
+
+       elem = wqt_elem_list_first(&g_linktable, setid);
+       if (!elem)
+               return NULL;
+       wq_table_realloc_elem(&g_linktable, elem, type);
+       return (struct setid_link *)elem;
+}
+
+
+static inline int waitq_maybe_remove_link(struct waitq *waitq,
+                                         uint64_t setid,
+                                         struct setid_link *parent,
+                                         struct setid_link *left,
+                                         struct setid_link *right);
+
+enum {
+       LINK_WALK_ONE_LEVEL = 0,
+       LINK_WALK_FULL_DAG  = 1,
+       LINK_WALK_FULL_DAG_UNLOCKED = 2,
+};
+
+typedef int (*lt_callback_func)(struct waitq *waitq, void *ctx,
+                               struct setid_link *link);
+
+/**
+ * walk all table elements (of type 'link_type') pointed to by 'setid'
+ *
+ * Conditions:
+ *     waitq is locked (or NULL)
+ *     'setid' is managed by 'waitq'
+ *             this could be direct (waitq->waitq_set_id == setid)
+ *             OR indirect (setid is the left/right ID in a LINK chain,
+ *                          whose root is waitq->waitq_set_id)
+ *
+ * Notes:
+ *     This function uses recursion to walk the set of table elements
+ *     pointed to by 'setid'. For each element encountered, 'cb' will be
+ *     called. If non-zero, the return value of this callback function can
+ *     early-out of the table walk.
+ *
+ *     For each link element encountered, the function takes a reference to
+ *     it. The reference is dropped only after the callback and any recursion
+ *     has completed.
+ *
+ *     The assumed table/link/tree structure:
+ *                   'setid'
+ *                   /    \
+ *                  /      \
+ *              L(LINK)     R(LINK)
+ *               /\             /\
+ *              /  \           /  \
+ *             /    \       Rl(*)  Rr(*)
+ *         Ll(*)  Lr(*)      /\    /\
+ *           /\     /\    ... ... ... ...
+ *        ...  ... ... ...
+ *                    \
+ *                    WQS(wqset_q.waitq_setid == Sx)
+ *                    [waitq set is a membet of setid, 'Sx')
+ *
+ *                    'Sx'
+ *                   /    \
+ *                  /      \
+ *              L(LINK)     R(LINK)
+ *               /\             /\
+ *             ... ...        ... ...
+ *
+ *     The basic algorithm is as follows:
+ *     *) take a reference to the table object pointed to by 'setid'
+ *     *) if appropriate, call 'cb' (potentially early-out on non-zero return)
+ *     *) if the link object points to a waitq set, and the walk type
+ *        is 'FULL_DAG' (full directed-acyclic-graph), then try to lock
+ *        the associated waitq set object and recursively walk all sets to
+ *        which that set belongs. This is a DFS of the tree structure.
+ *     *) recurse down the left side of the tree (following the
+ *        'sl_left_setid' pointer in the link object
+ *     *) recurse down the right side of the tree (following the
+ *        'sl_right_setid' pointer in the link object
+ */
+static __attribute__((noinline))
+int walk_setid_links(int walk_type, struct waitq *waitq,
+                    uint64_t setid, int link_type,
+                    void *ctx, lt_callback_func cb)
+{
+       struct setid_link *link;
+       uint64_t nextid;
+       int sl_type;
+
+       link = lt_get_link(setid);
+
+       /* invalid link */
+       if (!link)
+               return WQ_ITERATE_CONTINUE;
+
+       setid = nextid = 0;
+       sl_type = sl_type(link);
+       if (sl_type == SLT_LINK) {
+               setid  = link->sl_link.sl_left_setid;
+               nextid = link->sl_link.sl_right_setid;
+       }
+
+       /*
+        * Make the callback only on specified link_type (or all links)
+        * Note that after the callback, the link object may be
+        * invalid. The only valid thing we can do is put our
+        * reference to it (which may put it back on the free list)
+        */
+       if (link_type == SLT_ALL || link_type == sl_type) {
+               /* allow the callback to early-out */
+               int ret = cb(waitq, ctx, link);
+               if (ret != WQ_ITERATE_CONTINUE) {
+                       lt_put_link(link);
+                       return ret;
+               }
+       }
+
+       if (sl_type == SLT_WQS &&
+           (walk_type == LINK_WALK_FULL_DAG ||
+            walk_type == LINK_WALK_FULL_DAG_UNLOCKED)) {
+               /*
+                * Recurse down any sets to which this wait queue set was
+                * added.  We do this just before we put our reference to
+                * the link object (which may free it).
+                */
+               struct waitq_set *wqset = link->sl_wqs.sl_set;
+               int ret = WQ_ITERATE_CONTINUE;
+               int get_spl = 0;
+               int should_unlock = 0;
+               uint64_t wqset_setid = 0;
+               spl_t set_spl;
+
+               if (waitq_set_is_valid(wqset) && walk_type == LINK_WALK_FULL_DAG) {
+                       if ((!waitq || !waitq_irq_safe(waitq)) &&
+                           waitq_irq_safe(&wqset->wqset_q)) {
+                               get_spl = 1;
+                               set_spl = splsched();
+                       }
+                       waitq_set_lock(wqset);
+                       should_unlock = 1;
+               }
+
+               /*
+                * verify the linked waitq set as it could have been
+                * invalidated before we grabbed the lock!
+                */
+               if (wqset->wqset_id != link->sl_set_id.id) {
+                       /*This is the bottom of the tree: just get out */
+                       if (should_unlock) {
+                               waitq_set_unlock(wqset);
+                               if (get_spl)
+                                       splx(set_spl);
+                       }
+                       lt_put_link(link);
+                       return WQ_ITERATE_CONTINUE;
+               }
+
+               wqset_setid = wqset->wqset_q.waitq_set_id;
+
+               if (wqset_setid > 0)
+                       ret = walk_setid_links(walk_type, &wqset->wqset_q,
+                                              wqset_setid, link_type, ctx, cb);
+               if (should_unlock) {
+                       waitq_set_unlock(wqset);
+                       if (get_spl)
+                               splx(set_spl);
+               }
+               if (ret != WQ_ITERATE_CONTINUE) {
+                       lt_put_link(link);
+                       return ret;
+               }
+       }
+
+       lt_put_link(link);
+
+       /* recurse down left side of the tree */
+       if (setid) {
+               int ret = walk_setid_links(walk_type, waitq, setid, link_type, ctx, cb);
+               if (ret != WQ_ITERATE_CONTINUE)
+                       return ret;
+       }
+
+       /* recurse down right side of the tree */
+       if (nextid)
+               return walk_setid_links(walk_type, waitq, nextid, link_type, ctx, cb);
+
+       return WQ_ITERATE_CONTINUE;
+}
+
+/* ----------------------------------------------------------------------
+ *
+ * Prepost Link Table Implementation
+ *
+ * ---------------------------------------------------------------------- */
+static struct wq_table g_prepost_table;
+
+enum wq_prepost_type {
+       WQP_FREE  = WQT_FREE,
+       WQP_WQ    = WQT_ELEM,
+       WQP_POST  = WQT_LINK,
+};
+
+struct wq_prepost {
+       struct wqt_elem wqte;
+
+       union {
+               /* wqt_type == WQP_WQ (WQT_ELEM) */
+               struct {
+                       struct waitq *wqp_wq_ptr;
+               } wqp_wq;
+               /* wqt_type == WQP_POST (WQT_LINK) */
+               struct {
+                       uint64_t      wqp_next_id;
+                       uint64_t      wqp_wq_id;
+               } wqp_post;
+       };
+#ifdef CONFIG_WAITQ_PREPOST_STATS
+       thread_t  wqp_alloc_th;
+       task_t    wqp_alloc_task;
+       uintptr_t wqp_alloc_bt[NWAITQ_BTFRAMES];
+#endif
+};
+#if !defined(CONFIG_WAITQ_PREPOST_STATS)
+_Static_assert((sizeof(struct wq_prepost) & (sizeof(struct wq_prepost) - 1)) == 0,
+              "wq_prepost struct must be a power of two!");
+#endif
+
+#define wqp_refcnt(wqp) \
+       (wqt_bits_refcnt((wqp)->wqte.wqt_bits))
+
+#define wqp_type(wqp) \
+       (wqt_bits_type((wqp)->wqte.wqt_bits))
+
+#define wqp_set_valid(wqp) \
+       wqt_elem_mkvalid(&(wqp)->wqte)
+
+#define wqp_is_valid(wqp) \
+       wqt_bits_valid((wqp)->wqte.wqt_bits)
+
+#define wqp_prepostid wqte.wqt_id
+
+#define WQP_WQ_POISON              (0x0bad0badffffffffull)
+#define WQP_POST_POISON            (0xf00df00df00df00d)
+
+static void wqp_poison(struct wq_table *table, struct wqt_elem *elem)
+{
+       struct wq_prepost *wqp = (struct wq_prepost *)elem;
+       (void)table;
+
+       switch (wqp_type(wqp)) {
+       case WQP_WQ:
+               break;
+       case WQP_POST:
+               wqp->wqp_post.wqp_next_id = WQP_POST_POISON;
+               wqp->wqp_post.wqp_wq_id = WQP_POST_POISON;
+               break;
+       default:
+               break;
+       }
+}
+
+#ifdef CONFIG_WAITQ_PREPOST_STATS
+static __inline__ void wqp_do_alloc_stats(struct wqt_elem *elem)
+{
+       if (elem) {
+               struct wq_prepost *wqp = (struct wq_prepost *)elem;
+
+               /* be sure the take stats for _all_ allocated objects */
+               for (;;) {
+                       uint32_t next_idx;
+
+                       memset(wqp->wqp_alloc_bt, 0, sizeof(wqp->wqp_alloc_bt));
+                       waitq_grab_backtrace(wqp->wqp_alloc_bt, 4);
+                       wqp->wqp_alloc_th = current_thread();
+                       wqp->wqp_alloc_task = current_task();
+                       next_idx = wqp->wqte.wqt_next_idx;
+
+                       if (next_idx == WQT_IDX_MAX)
+                               break;
+                       assert(next_idx < g_prepost_table.nelem);
+
+                       wqp = (struct wq_prepost *)wqt_elem_idx(&g_prepost_table,
+                                                               next_idx);
+               }
+       }
+}
+#else
+#define wqp_do_alloc_stats(e)
+#endif /* CONFIG_WAITQ_LINK_STATS */
+
+static void wqp_init(void)
+{
+       uint32_t tablesz = 0, max_wqp = 0;
+
+       if (PE_parse_boot_argn("wqp_tsize", &tablesz, sizeof(tablesz)) != TRUE)
+               tablesz = (uint32_t)g_wqt_max_tbl_size;
+
+       tablesz = P2ROUNDUP(tablesz, PAGE_SIZE);
+       max_wqp = tablesz / sizeof(struct wq_prepost);
+       assert(max_wqp > 0 && tablesz > 0);
+
+       /* we have a restricted index range */
+       if (max_wqp > (WQT_IDX_MAX + 1))
+               max_wqp = WQT_IDX_MAX + 1;
+
+       wqinfo("init prepost table with max:%d elements (%d bytes)",
+              max_wqp, tablesz);
+       wq_table_init(&g_prepost_table, "wqslab.prepost", max_wqp,
+                     sizeof(struct wq_prepost), wqp_poison);
+}
+
+/*
+ * Refill the per-CPU cache.
+ */
+static void wq_prepost_refill_cpu_cache(uint32_t nalloc)
+{
+       struct wqt_elem *new_head, *old_head;
+       struct wqp_cache *cache;
+
+       /* require preemption enabled to allocate elements */
+       if (get_preemption_level() != 0)
+               return;
+
+       new_head = wq_table_alloc_elem(&g_prepost_table,
+                                      WQT_RESERVED, nalloc);
+       if (new_head == NULL)
+               return;
+
+       disable_preemption();
+       cache = &PROCESSOR_DATA(current_processor(), wqp_cache);
+       cache->avail += nalloc;
+       if (cache->head == 0 || cache->head == WQT_IDX_MAX) {
+               cache->head = new_head->wqt_id.id;
+               goto out;
+       }
+
+       old_head = wqt_elem_list_first(&g_prepost_table, cache->head);
+       (void)wqt_elem_list_link(&g_prepost_table, new_head, old_head);
+       cache->head = new_head->wqt_id.id;
+
+out:
+       enable_preemption();
+       return;
+}
+
+static void wq_prepost_ensure_free_space(void)
+{
+       uint32_t free_elem;
+       uint32_t min_free;
+       struct wqp_cache *cache;
+
+       if (g_min_free_cache == 0)
+               g_min_free_cache = (WQP_CACHE_MAX * ml_get_max_cpus());
+
+       /*
+        * Ensure that we always have a pool of per-CPU prepost elements
+        */
+       disable_preemption();
+       cache = &PROCESSOR_DATA(current_processor(), wqp_cache);
+       free_elem = cache->avail;
+       enable_preemption();
+
+       if (free_elem < (WQP_CACHE_MAX / 3))
+               wq_prepost_refill_cpu_cache(WQP_CACHE_MAX - free_elem);
+
+       /*
+        * Now ensure that we have a sufficient amount of free table space
+        */
+       free_elem = g_prepost_table.nelem - g_prepost_table.used_elem;
+       min_free = g_min_free_table_elem + g_min_free_cache;
+       if (free_elem < min_free) {
+               /*
+                * we don't hold locks on these values, so check for underflow
+                */
+               if (g_prepost_table.used_elem <= g_prepost_table.nelem) {
+                       wqdbg_v("Forcing table growth: nelem=%d, used=%d, min_free=%d+%d",
+                               g_prepost_table.nelem, g_prepost_table.used_elem,
+                               g_min_free_table_elem, g_min_free_cache);
+                       wq_table_grow(&g_prepost_table, min_free);
+               }
+       }
+}
+
+static struct wq_prepost *wq_prepost_alloc(int type, int nelem)
+{
+       struct wqt_elem *elem;
+       struct wq_prepost *wqp;
+       struct wqp_cache *cache;
+
+       if (type != WQT_RESERVED)
+               goto do_alloc;
+       if (nelem == 0)
+               return NULL;
+
+       /*
+        * First try to grab the elements from the per-CPU cache if we are
+        * allocating RESERVED elements
+        */
+       disable_preemption();
+       cache = &PROCESSOR_DATA(current_processor(), wqp_cache);
+       if (nelem <= (int)cache->avail) {
+               struct wqt_elem *first, *next = NULL;
+               int nalloc = nelem;
+
+               cache->avail -= nelem;
+
+               /* grab the first element */
+               first = wqt_elem_list_first(&g_prepost_table, cache->head);
+
+               /* find the last element and re-adjust the cache head */
+               for (elem = first; elem != NULL && nalloc > 0; elem = next) {
+                       next = wqt_elem_list_next(&g_prepost_table, elem);
+                       if (--nalloc == 0) {
+                               /* terminate the allocated list */
+                               elem->wqt_next_idx = WQT_IDX_MAX;
+                               break;
+                       }
+               }
+               assert(nalloc == 0);
+               if (!next)
+                       cache->head = WQT_IDX_MAX;
+               else
+                       cache->head = next->wqt_id.id;
+               /* assert that we don't have mis-matched book keeping */
+               assert(!(cache->head == WQT_IDX_MAX && cache->avail > 0));
+               enable_preemption();
+               elem = first;
+               goto out;
+       }
+       enable_preemption();
+
+do_alloc:
+       /* fall-back to standard table allocation */
+       elem = wq_table_alloc_elem(&g_prepost_table, type, nelem);
+       if (!elem)
+               return NULL;
+
+out:
+       wqp = (struct wq_prepost *)elem;
+       wqp_do_alloc_stats(elem);
+       return wqp;
+}
+
+/*
+static void wq_prepost_realloc(struct wq_prepost *wqp, int type)
+{
+       wq_table_realloc_elem(&g_prepost_table, &wqp->wqte, type);
+}
+*/
+
+static void wq_prepost_invalidate(struct wq_prepost *wqp)
+{
+       wqt_elem_invalidate(&wqp->wqte);
+}
+
+static struct wq_prepost *wq_prepost_get(uint64_t wqp_id)
+{
+       struct wqt_elem *elem;
+
+       elem = wq_table_get_elem(&g_prepost_table, wqp_id);
+       return (struct wq_prepost *)elem;
+}
+
+static void wq_prepost_put(struct wq_prepost *wqp)
+{
+       wq_table_put_elem(&g_prepost_table, (struct wqt_elem *)wqp);
+}
+
+static int wq_prepost_rlink(struct wq_prepost *parent, struct wq_prepost *child)
+{
+       return wqt_elem_list_link(&g_prepost_table, &parent->wqte, &child->wqte);
+}
+
+static struct wq_prepost *wq_prepost_get_rnext(struct wq_prepost *head)
+{
+       struct wqt_elem *elem;
+       struct wq_prepost *wqp;
+       uint64_t id;
+
+       elem = wqt_elem_list_next(&g_prepost_table, &head->wqte);
+       if (!elem)
+               return NULL;
+       id = elem->wqt_id.id;
+       elem = wq_table_get_elem(&g_prepost_table, id);
+
+       if (!elem)
+               return NULL;
+       wqp = (struct wq_prepost *)elem;
+       if (elem->wqt_id.id != id ||
+           wqp_type(wqp) != WQP_POST ||
+           wqp->wqp_post.wqp_next_id != head->wqp_prepostid.id) {
+               wq_table_put_elem(&g_prepost_table, elem);
+               return NULL;
+       }
+
+       return wqp;
+}
+
+static void wq_prepost_reset_rnext(struct wq_prepost *wqp)
+{
+       wqt_elem_reset_next(&g_prepost_table, &wqp->wqte);
+}
+
+
+/**
+ * remove 'wqp' from the prepost list on 'wqset'
+ *
+ * Conditions:
+ *     wqset is locked
+ *     caller holds a reference on wqp (and is responsible to release it)
+ *
+ * Result:
+ *     wqp is invalidated, wqset is potentially updated with a new
+ *     prepost ID, and the next element of the prepost list may be
+ *     consumed as well (if the list contained only 2 objects)
+ */
+static int wq_prepost_remove(struct waitq_set *wqset,
+                            struct wq_prepost *wqp)
+{
+       int more_posts = 1;
+       uint64_t next_id = wqp->wqp_post.wqp_next_id;
+       uint64_t wqp_id = wqp->wqp_prepostid.id;
+       struct wq_prepost *prev_wqp, *next_wqp;
+
+       assert(wqp_type(wqp) == WQP_POST);
+
+       if (next_id == wqp_id) {
+               /* the list is singular and becoming empty */
+               wqset->wqset_prepost_id = 0;
+               more_posts = 0;
+               goto out;
+       }
+
+       prev_wqp = wq_prepost_get_rnext(wqp);
+       assert(prev_wqp != NULL);
+       assert(prev_wqp->wqp_post.wqp_next_id == wqp_id);
+       assert(prev_wqp->wqp_prepostid.id != wqp_id);
+       assert(wqp_type(prev_wqp) == WQP_POST);
+
+       if (prev_wqp->wqp_prepostid.id == next_id) {
+               /*
+                * There are two items in the list, and we're removing one. We
+                * only need to keep the WQP_WQ pointer from 'prev_wqp'
+                */
+               wqset->wqset_prepost_id = prev_wqp->wqp_post.wqp_wq_id;
+               wq_prepost_invalidate(prev_wqp);
+               wq_prepost_put(prev_wqp);
+               more_posts = 0;
+               goto out;
+       }
+
+       /* prev->next = next */
+       prev_wqp->wqp_post.wqp_next_id = next_id;
+
+       /* next->prev = prev */
+       next_wqp = wq_prepost_get(next_id);
+       assert(next_wqp != NULL);
+       assert(next_wqp != wqp);
+       assert(next_wqp != prev_wqp);
+       assert(wqp_type(next_wqp) == WQP_POST);
+
+       wq_prepost_reset_rnext(next_wqp);
+       wq_prepost_rlink(next_wqp, prev_wqp);
+
+       /* If we remove the head of the list, update the wqset */
+       if (wqp_id == wqset->wqset_prepost_id)
+               wqset->wqset_prepost_id = next_id;
+
+       wq_prepost_put(prev_wqp);
+       wq_prepost_put(next_wqp);
+
+out:
+       wq_prepost_reset_rnext(wqp);
+       wq_prepost_invalidate(wqp);
+       return more_posts;
+}
+
+static struct wq_prepost *wq_prepost_rfirst(uint64_t id)
+{
+       struct wqt_elem *elem;
+       elem = wqt_elem_list_first(&g_prepost_table, id);
+       wqp_do_alloc_stats(elem);
+       return (struct wq_prepost *)(void *)elem;
+}
+
+static struct wq_prepost *wq_prepost_rpop(uint64_t *id, int type)
+{
+       struct wqt_elem *elem;
+       elem = wqt_elem_list_pop(&g_prepost_table, id, type);
+       wqp_do_alloc_stats(elem);
+       return (struct wq_prepost *)(void *)elem;
+}
+
+static void wq_prepost_release_rlist(struct wq_prepost *wqp)
+{
+       int nelem = 0;
+       struct wqp_cache *cache;
+       struct wqt_elem *elem;
+
+       if (!wqp)
+               return;
+
+       elem = &wqp->wqte;
+
+       /*
+        * These are reserved elements: release them back to the per-cpu pool
+        * if our cache is running low.
+        */
+       disable_preemption();
+       cache = &PROCESSOR_DATA(current_processor(), wqp_cache);
+       if (cache->avail < WQP_CACHE_MAX) {
+               struct wqt_elem *tmp = NULL;
+               if (cache->head != WQT_IDX_MAX)
+                       tmp = wqt_elem_list_first(&g_prepost_table, cache->head);
+               nelem = wqt_elem_list_link(&g_prepost_table, elem, tmp);
+               cache->head = elem->wqt_id.id;
+               cache->avail += nelem;
+               enable_preemption();
+               return;
+       }
+       enable_preemption();
+
+       /* release these elements back to the main table */
+       nelem = wqt_elem_list_release(&g_prepost_table, elem, WQT_RESERVED);
+
+#if CONFIG_WAITQ_STATS
+       g_prepost_table.nreserved_releases += 1;
+       OSDecrementAtomic64(&g_prepost_table.nreservations);
+#endif
+}
+
+typedef int (*wqp_callback_func)(struct waitq_set *wqset,
+                                void *ctx,
+                                struct wq_prepost *wqp,
+                                struct waitq *waitq);
+
+/**
+ * iterate over a chain of preposts associated with a waitq set.
+ *
+ * Conditions:
+ *     wqset is locked
+ *
+ * Notes:
+ *     This loop performs automatic prepost chain management / culling, and
+ *     may reset or adjust the waitq set's prepost ID pointer. If you don't
+ *     want this extra processing, you can use wq_prepost_iterate().
+ */
+static int wq_prepost_foreach_locked(struct waitq_set *wqset,
+                                    void *ctx, wqp_callback_func cb)
+{
+       int ret;
+       struct wq_prepost *wqp, *tmp_wqp;
+
+       if (!wqset || !wqset->wqset_prepost_id)
+               return WQ_ITERATE_SUCCESS;
+
+restart:
+       wqp = wq_prepost_get(wqset->wqset_prepost_id);
+       if (!wqp) {
+               /*
+                * The prepost object is no longer valid, reset the waitq
+                * set's prepost id.
+                */
+               wqset->wqset_prepost_id = 0;
+               return WQ_ITERATE_SUCCESS;
+       }
+
+       if (wqp_type(wqp) == WQP_WQ) {
+               uint64_t __assert_only wqp_id = wqp->wqp_prepostid.id;
+               if (cb)
+                       ret = cb(wqset, ctx, wqp, wqp->wqp_wq.wqp_wq_ptr);
+
+               switch (ret) {
+               case WQ_ITERATE_INVALIDATE_CONTINUE:
+                       /* the caller wants to remove the only prepost here */
+                       assert(wqp_id == wqset->wqset_prepost_id);
+                       wqset->wqset_prepost_id = 0;
+                       /* fall through */
+               case WQ_ITERATE_CONTINUE:
+                       wq_prepost_put(wqp);
+                       ret = WQ_ITERATE_SUCCESS;
+                       break;
+               case WQ_ITERATE_RESTART:
+                       wq_prepost_put(wqp);
+                       /* fall through */
+               case WQ_ITERATE_DROPPED:
+                       goto restart;
+               default:
+                       wq_prepost_put(wqp);
+                       break;
+               }
+               return ret;
+       }
+
+       assert(wqp->wqp_prepostid.id == wqset->wqset_prepost_id);
+       assert(wqp_type(wqp) == WQP_POST);
+
+       /*
+        * At this point we know we have a list of POST objects.
+        * Grab a handle to the last element in the list and start
+        * the iteration.
+        */
+       tmp_wqp = wq_prepost_get_rnext(wqp);
+       assert(tmp_wqp != NULL && wqp_type(tmp_wqp) == WQP_POST);
+
+       uint64_t last_id = tmp_wqp->wqp_prepostid.id;
+       wq_prepost_put(tmp_wqp);
+
+       ret = WQ_ITERATE_SUCCESS;
+       for (;;) {
+               uint64_t wqp_id, first_id, next_id;
+
+               wqp_id = wqp->wqp_prepostid.id;
+               first_id = wqset->wqset_prepost_id;
+               next_id = wqp->wqp_post.wqp_next_id;
+
+               /* grab the WQP_WQ object this _POST points to */
+               tmp_wqp = wq_prepost_get(wqp->wqp_post.wqp_wq_id);
+               if (!tmp_wqp) {
+                       /*
+                        * This WQP_POST object points to an invalid
+                        * WQP_WQ object - remove the POST object from
+                        * the list.
+                        */
+                       if (wq_prepost_remove(wqset, wqp) == 0) {
+                               wq_prepost_put(wqp);
+                               goto restart;
+                       }
+                       goto next_prepost;
+               }
+               assert(wqp_type(tmp_wqp) == WQP_WQ);
+               /*
+                * make the callback: note that this could remove 'wqp' or
+                * drop the lock on our waitq set. We need to re-validate
+                * our state when this function returns.
+                */
+               if (cb)
+                       ret = cb(wqset, ctx, wqp,
+                                tmp_wqp->wqp_wq.wqp_wq_ptr);
+               wq_prepost_put(tmp_wqp);
+
+               switch (ret) {
+               case WQ_ITERATE_CONTINUE:
+                       /* continue iteration */
+                       break;
+               case WQ_ITERATE_INVALIDATE_CONTINUE:
+                       assert(next_id == wqp->wqp_post.wqp_next_id);
+                       if (wq_prepost_remove(wqset, wqp) == 0) {
+                               wq_prepost_put(wqp);
+                               goto restart;
+                       }
+                       goto next_prepost;
+               case WQ_ITERATE_RESTART:
+                       wq_prepost_put(wqp);
+                       /* fall-through */
+               case WQ_ITERATE_DROPPED:
+                       /* the callback dropped the ref to wqp: just restart */
+                       goto restart;
+               default:
+                       /* break out of the iteration for some other reason */
+                       goto finish_prepost_foreach;
+               }
+
+               /*
+                * the set lock may have been dropped during callback,
+                * if something looks different, restart the prepost iteration
+                */
+               if (!wqp_is_valid(wqp) ||
+                   (wqp->wqp_post.wqp_next_id != next_id) ||
+                   wqset->wqset_prepost_id != first_id) {
+                       wq_prepost_put(wqp);
+                       goto restart;
+               }
+
+next_prepost:
+               /* this was the last object in the list */
+               if (wqp_id == last_id)
+                       break;
+
+               /* get the next object */
+               tmp_wqp = wq_prepost_get(next_id);
+               if (!tmp_wqp) {
+                       /*
+                        * At this point we've already checked our state
+                        * after the callback (which may have dropped the set
+                        * lock). If we find an invalid member of the list
+                        * then something is wrong.
+                        */
+                       panic("Invalid WQP_POST member 0x%llx in waitq set "
+                             "0x%llx prepost list (first:%llx, "
+                             "wqp:%p)",
+                             next_id, wqset->wqset_id, first_id, wqp);
+               }
+               wq_prepost_put(wqp);
+               wqp = tmp_wqp;
+
+               assert(wqp_type(wqp) == WQP_POST);
+       }
+
+finish_prepost_foreach:
+       wq_prepost_put(wqp);
+       if (ret == WQ_ITERATE_CONTINUE)
+               ret = WQ_ITERATE_SUCCESS;
+
+       return ret;
+}
+
+/**
+ * Perform a simple loop over a chain of prepost objects
+ *
+ * Conditions:
+ *     If 'prepost_id' is associated with a waitq (set) then that object must
+ *     be locked before calling this function.
+ *     Callback function, 'cb', must be able to handle a NULL wqset pointer
+ *     and a NULL waitq pointer!
+ *
+ * Notes:
+ *     This prepost chain iteration will _not_ automatically adjust any chain
+ *     element or linkage. This is the responsibility of the caller! If you
+ *     want automatic prepost chain management (at a cost of extra CPU time),
+ *     you can use: wq_prepost_foreach_locked().
+ */
+static int wq_prepost_iterate(uint64_t prepost_id,
+                             void *ctx, wqp_callback_func cb)
+{
+       int ret;
+       struct wq_prepost *wqp;
+
+       if (!prepost_id)
+               return WQ_ITERATE_SUCCESS;
+
+       wqp = wq_prepost_get(prepost_id);
+       if (!wqp)
+               return WQ_ITERATE_SUCCESS;
+
+       if (wqp_type(wqp) == WQP_WQ) {
+               ret = WQ_ITERATE_SUCCESS;
+               if (cb)
+                       ret = cb(NULL, ctx, wqp, wqp->wqp_wq.wqp_wq_ptr);
+
+               if (ret != WQ_ITERATE_DROPPED)
+                       wq_prepost_put(wqp);
+               return ret;
+       }
+
+       assert(wqp->wqp_prepostid.id == prepost_id);
+       assert(wqp_type(wqp) == WQP_POST);
+
+       /* at this point we know we have a list of POST objects */
+       uint64_t next_id;
+
+       ret = WQ_ITERATE_CONTINUE;
+       do {
+               struct wq_prepost *tmp_wqp;
+               struct waitq *wq = NULL;
+
+               next_id = wqp->wqp_post.wqp_next_id;
+
+               /* grab the WQP_WQ object this _POST points to */
+               tmp_wqp = wq_prepost_get(wqp->wqp_post.wqp_wq_id);
+               if (tmp_wqp) {
+                       assert(wqp_type(tmp_wqp) == WQP_WQ);
+                       wq = tmp_wqp->wqp_wq.wqp_wq_ptr;
+               }
+
+               if (cb)
+                       ret = cb(NULL, ctx, wqp, wq);
+               if (tmp_wqp)
+                       wq_prepost_put(tmp_wqp);
+
+               if (ret != WQ_ITERATE_CONTINUE)
+                       break;
+
+               tmp_wqp = wq_prepost_get(next_id);
+               if (!tmp_wqp) {
+                       /*
+                        * the chain is broken: nothing we can do here besides
+                        * bail from the iteration.
+                        */
+                       ret = WQ_ITERATE_ABORTED;
+                       break;
+               }
+
+               wq_prepost_put(wqp);
+               wqp = tmp_wqp;
+
+               assert(wqp_type(wqp) == WQP_POST);
+       } while (next_id != prepost_id);
+
+       if (ret != WQ_ITERATE_DROPPED)
+               wq_prepost_put(wqp);
+
+       if (ret == WQ_ITERATE_CONTINUE)
+               ret = WQ_ITERATE_SUCCESS;
+       return ret;
+}
+
+
+struct _is_posted_ctx {
+       struct waitq *posting_wq;
+       int did_prepost;
+};
+
+static int wq_is_preposted_on_set_cb(struct waitq_set *wqset, void *ctx,
+                                    struct wq_prepost *wqp, struct waitq *waitq)
+{
+       struct _is_posted_ctx *pctx = (struct _is_posted_ctx *)ctx;
+
+       (void)wqset;
+       (void)wqp;
+
+       /*
+        * Don't early-out, run through the _entire_ list:
+        * This ensures that we retain a minimum number of invalid elements.
+        */
+       if (pctx->posting_wq == waitq)
+               pctx->did_prepost = 1;
+
+       return WQ_ITERATE_CONTINUE;
+}
+
+
+/**
+ * checks if 'waitq' has already preposted on 'wqset'
+ *
+ * Parameters:
+ *     waitq    The waitq that's preposting
+ *     wqset    The set onto which waitq may be preposted
+ *
+ * Conditions:
+ *     both waitq and wqset are locked
+ *
+ * Returns non-zero if 'waitq' has already preposted to 'wqset'
+ */
+static int wq_is_preposted_on_set(struct waitq *waitq, struct waitq_set *wqset)
+{
+       int ret;
+       struct _is_posted_ctx pctx;
+
+       /*
+        * If the set's only prepost matches the waitq's prepost ID,
+        * then it obviously already preposted to the set.
+        */
+       if (waitq->waitq_prepost_id != 0 &&
+           wqset->wqset_prepost_id == waitq->waitq_prepost_id)
+               return 1;
+
+       /* use full prepost iteration: always trim the list */
+       pctx.posting_wq = waitq;
+       pctx.did_prepost = 0;
+       ret = wq_prepost_foreach_locked(wqset, (void *)&pctx,
+                                       wq_is_preposted_on_set_cb);
+       return pctx.did_prepost;
+}
+
+static struct wq_prepost *wq_get_prepost_obj(uint64_t *reserved, int type)
+{
+       struct wq_prepost *wqp = NULL;
+       /*
+        * don't fail just because the caller doesn't have enough
+        * reservations, we've kept a low-water mark on the prepost table,
+        * so there should be some available for us.
+        */
+       if (reserved && *reserved) {
+               wqp = wq_prepost_rpop(reserved, type);
+       } else {
+               /*
+                * TODO: if in interrupt context, grab from a special
+                *       region / reserved list!
+                */
+               wqp = wq_prepost_alloc(type, 1);
+       }
+
+       if (wqp == NULL)
+               panic("Couldn't allocate prepost object!");
+       return wqp;
+}
+
+
+/**
+ * prepost a waitq onto a waitq set
+ *
+ * Parameters:
+ *     wqset    The set onto which waitq will be preposted
+ *     waitq    The waitq that's preposting
+ *     reserved List (wqt_elem_list_ style) of pre-allocated prepost elements
+ *              Could be NULL
+ *
+ * Conditions:
+ *     both wqset and waitq are locked
+ *
+ * Notes:
+ *     If reserved is NULL, this may block on prepost table growth.
+ */
+static void wq_prepost_do_post_locked(struct waitq_set *wqset,
+                                     struct waitq *waitq,
+                                     uint64_t *reserved)
+{
+       struct wq_prepost *wqp_post, *wqp_head, *wqp_tail;
+
+       assert(waitq_held(waitq) && waitq_held(&wqset->wqset_q));
+
+       /*
+        * nothing to do if it's already preposted:
+        * note that this also culls any invalid prepost objects
+        */
+       if (wq_is_preposted_on_set(waitq, wqset))
+               return;
+
+       /*
+        * This function is called because an event is being posted to 'waitq'.
+        * We need a prepost object associated with this queue. Allocate one
+        * now if the waitq isn't already associated with one.
+        */
+       if (waitq->waitq_prepost_id == 0) {
+               struct wq_prepost *wqp;
+               wqp = wq_get_prepost_obj(reserved, WQP_WQ);
+               wqp->wqp_wq.wqp_wq_ptr = waitq;
+               wqp_set_valid(wqp);
+               waitq->waitq_prepost_id = wqp->wqp_prepostid.id;
+               wq_prepost_put(wqp);
+       }
+
+#if CONFIG_WAITQ_STATS
+       g_prepost_table.npreposts += 1;
+#endif
+
+       wqdbg_v("preposting waitq %p (0x%llx) to set 0x%llx",
+               (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq),
+               waitq->waitq_prepost_id, wqset->wqset_id);
+
+       if (wqset->wqset_prepost_id == 0) {
+               /* the set has no previous preposts */
+               wqset->wqset_prepost_id = waitq->waitq_prepost_id;
+               return;
+       }
+
+       wqp_head = wq_prepost_get(wqset->wqset_prepost_id);
+       if (!wqp_head) {
+               /* the previous prepost has become invalid */
+               wqset->wqset_prepost_id = waitq->waitq_prepost_id;
+               return;
+       }
+
+       assert(wqp_head->wqp_prepostid.id == wqset->wqset_prepost_id);
+
+       /*
+        * If we get here, we're going to need at least one new wq_prepost
+        * object. If the previous wqset_prepost_id points to a WQP_WQ, we
+        * actually need to allocate 2 wq_prepost objects because the WQP_WQ
+        * is tied to the waitq and shared across all sets.
+        */
+       wqp_post = wq_get_prepost_obj(reserved, WQP_POST);
+
+       wqp_post->wqp_post.wqp_wq_id = waitq->waitq_prepost_id;
+       wqdbg_v("POST 0x%llx :: WQ 0x%llx", wqp_post->wqp_prepostid.id,
+               waitq->waitq_prepost_id);
+
+       if (wqp_type(wqp_head) == WQP_WQ) {
+               /*
+                * We must replace the wqset_prepost_id with a pointer
+                * to two new WQP_POST objects
+                */
+               uint64_t wqp_id = wqp_head->wqp_prepostid.id;
+               wqdbg_v("set 0x%llx previous had 1 WQ prepost (0x%llx): "
+                       "replacing with two POST preposts",
+                       wqset->wqset_id, wqp_id);
+
+               /* drop the old reference */
+               wq_prepost_put(wqp_head);
+
+               /* grab another new object (the 2nd of two) */
+               wqp_head = wq_get_prepost_obj(reserved, WQP_POST);
+
+               /* point this one to the original WQP_WQ object */
+               wqp_head->wqp_post.wqp_wq_id = wqp_id;
+               wqdbg_v("POST 0x%llx :: WQ 0x%llx",
+                       wqp_head->wqp_prepostid.id, wqp_id);
+       
+               /* link it to the new wqp_post object allocated earlier */
+               wqp_head->wqp_post.wqp_next_id = wqp_post->wqp_prepostid.id;
+               /* make the list a double-linked and circular */
+               wq_prepost_rlink(wqp_head, wqp_post);
+
+               /*
+                * Finish setting up the new prepost: point it back to the
+                * POST object we allocated to replace the original wqset
+                * WQ prepost object
+                */
+               wqp_post->wqp_post.wqp_next_id = wqp_head->wqp_prepostid.id;
+               wq_prepost_rlink(wqp_post, wqp_head);
+
+               /* mark objects valid, and reset the wqset prepost list head */
+               wqp_set_valid(wqp_head);
+               wqp_set_valid(wqp_post);
+               wqset->wqset_prepost_id = wqp_head->wqp_prepostid.id;
+
+               /* release both references */
+               wq_prepost_put(wqp_head);
+               wq_prepost_put(wqp_post);
+
+               wqdbg_v("set 0x%llx: 0x%llx/0x%llx -> 0x%llx/0x%llx -> 0x%llx",
+                       wqset->wqset_id, wqset->wqset_prepost_id,
+                       wqp_head->wqp_prepostid.id, wqp_head->wqp_post.wqp_next_id,
+                       wqp_post->wqp_prepostid.id,
+                       wqp_post->wqp_post.wqp_next_id);
+               return;
+       }
+
+       assert(wqp_type(wqp_head) == WQP_POST);
+
+       /*
+        * Add the new prepost to the end of the prepost list
+        */
+       wqp_tail = wq_prepost_get_rnext(wqp_head);
+       assert(wqp_tail != NULL);
+       assert(wqp_tail->wqp_post.wqp_next_id == wqset->wqset_prepost_id);
+
+       /*
+        * link the head to the new tail
+        * NOTE: this needs to happen first in case wqp_tail == wqp_head
+        */
+       wq_prepost_reset_rnext(wqp_head);
+       wq_prepost_rlink(wqp_head, wqp_post);
+
+       /* point the new object to the list head, and list tail */
+       wqp_post->wqp_post.wqp_next_id = wqp_head->wqp_prepostid.id;
+       wq_prepost_rlink(wqp_post, wqp_tail);
+
+       /* point the last item in the waitq set's list to the new object */
+       wqp_tail->wqp_post.wqp_next_id = wqp_post->wqp_prepostid.id;
+
+       wqp_set_valid(wqp_post);
+
+       wq_prepost_put(wqp_head);
+       wq_prepost_put(wqp_tail);
+       wq_prepost_put(wqp_post);
+
+       wqdbg_v("set 0x%llx (wqp:0x%llx) last_prepost:0x%llx, "
+               "new_prepost:0x%llx->0x%llx", wqset->wqset_id,
+               wqset->wqset_prepost_id, wqp_head->wqp_prepostid.id,
+               wqp_post->wqp_prepostid.id, wqp_post->wqp_post.wqp_next_id);
+
+       return;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Stats collection / reporting
+ *
+ * ---------------------------------------------------------------------- */
+#if CONFIG_WAITQ_STATS
+static void wq_table_stats(struct wq_table *table, struct wq_table_stats *stats)
+{
+       stats->version = WAITQ_STATS_VERSION;
+       stats->table_elements = table->nelem;
+       stats->table_used_elems = table->used_elem;
+       stats->table_elem_sz = table->elem_sz;
+       stats->table_slabs = table->nslabs;
+       stats->table_slab_sz = table->slab_sz;
+
+       stats->table_num_allocs = table->nallocs;
+       stats->table_num_preposts = table->npreposts;
+       stats->table_num_reservations = table->nreservations;
+
+       stats->table_max_used = table->max_used;
+       stats->table_avg_used = table->avg_used;
+       stats->table_max_reservations = table->max_reservations;
+       stats->table_avg_reservations = table->avg_reservations;
+}
+
+void waitq_link_stats(struct wq_table_stats *stats)
+{
+       if (!stats)
+               return;
+       wq_table_stats(&g_linktable, stats);
+}
+
+void waitq_prepost_stats(struct wq_table_stats *stats)
+{
+       wq_table_stats(&g_prepost_table, stats);
+}
+#endif
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Global Wait Queues
+ *
+ * ---------------------------------------------------------------------- */
+
+static struct waitq g_boot_waitq;
+static struct waitq *global_waitqs = &g_boot_waitq;
+static uint32_t g_num_waitqs = 1;
+
+/*
+ * Zero out the used MSBs of the event.
+ */
+#define _CAST_TO_EVENT_MASK(event)   ((uintptr_t)(event) & ((1ul << _EVENT_MASK_BITS) - 1ul))
+
+/*
+ * The Jenkins "one at a time" hash.
+ * TBD: There may be some value to unrolling here,
+ * depending on the architecture.
+ */
+static __inline__ uint32_t waitq_hash(char *key, size_t length)
+{
+       uint32_t hash = 0;
+       size_t i;
+
+       for (i = 0; i < length; i++) {
+               hash += key[i];
+               hash += (hash << 10);
+               hash ^= (hash >> 6);
+       }
+
+       hash += (hash << 3);
+       hash ^= (hash >> 11);
+       hash += (hash << 15);
+
+       hash &= (g_num_waitqs - 1);
+       return hash;
+}
+
+/* return a global waitq pointer corresponding to the given event */
+struct waitq *_global_eventq(char *event, size_t event_length)
+{
+       return &global_waitqs[waitq_hash(event, event_length)];
+}
+
+/* return an indexed global waitq pointer */
+struct waitq *global_waitq(int index)
+{
+       return &global_waitqs[index % g_num_waitqs];
+}
+
+
+#if CONFIG_WAITQ_STATS
+/* this global is for lldb */
+const uint32_t g_nwaitq_btframes = NWAITQ_BTFRAMES;
+struct wq_stats g_boot_stats;
+struct wq_stats *g_waitq_stats = &g_boot_stats;
+
+static __inline__ void waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], int skip)
+{
+       uintptr_t buf[NWAITQ_BTFRAMES + skip];
+       if (skip < 0)
+               skip = 0;
+       memset(buf, 0, (NWAITQ_BTFRAMES + skip) * sizeof(uintptr_t));
+       fastbacktrace(buf, g_nwaitq_btframes + skip);
+       memcpy(&bt[0], &buf[skip], NWAITQ_BTFRAMES * sizeof(uintptr_t));
+}
+
+static __inline__ struct wq_stats *waitq_global_stats(struct waitq *waitq) {
+       struct wq_stats *wqs;
+       uint32_t idx;
+
+       if (!waitq_is_global(waitq))
+               return NULL;
+
+       idx = (uint32_t)(((uintptr_t)waitq - (uintptr_t)global_waitqs) / sizeof(*waitq));
+       assert(idx < g_num_waitqs);
+       wqs = &g_waitq_stats[idx];
+       return wqs;
+}
+
+static __inline__ void waitq_stats_count_wait(struct waitq *waitq)
+{
+       struct wq_stats *wqs = waitq_global_stats(waitq);
+       if (wqs != NULL) {
+               wqs->waits++;
+               waitq_grab_backtrace(wqs->last_wait, 2);
+       }
+}
+
+static __inline__ void waitq_stats_count_wakeup(struct waitq *waitq)
+{
+       struct wq_stats *wqs = waitq_global_stats(waitq);
+       if (wqs != NULL) {
+               wqs->wakeups++;
+               waitq_grab_backtrace(wqs->last_wakeup, 2);
+       }
+}
+
+static __inline__ void waitq_stats_count_clear_wakeup(struct waitq *waitq)
+{
+       struct wq_stats *wqs = waitq_global_stats(waitq);
+       if (wqs != NULL) {
+               wqs->wakeups++;
+               wqs->clears++;
+               waitq_grab_backtrace(wqs->last_wakeup, 2);
+       }
+}
+
+static __inline__ void waitq_stats_count_fail(struct waitq *waitq)
+{
+       struct wq_stats *wqs = waitq_global_stats(waitq);
+       if (wqs != NULL) {
+               wqs->failed_wakeups++;
+               waitq_grab_backtrace(wqs->last_failed_wakeup, 2);
+       }
+}
+#else
+#define waitq_stats_count_wait(q)         do { } while (0)
+#define waitq_stats_count_wakeup(q)       do { } while (0)
+#define waitq_stats_count_clear_wakeup(q) do { } while (0)
+#define waitq_stats_count_fail(q)         do { } while (0)
+#endif
+
+int waitq_is_valid(struct waitq *waitq)
+{
+       return (waitq != NULL) && ((waitq->waitq_type & ~1) == WQT_QUEUE);
+}
+
+int waitq_set_is_valid(struct waitq_set *wqset)
+{
+       return (wqset != NULL) && waitqs_is_set(wqset);
+}
+
+int waitq_is_global(struct waitq *waitq)
+{
+       if (waitq >= global_waitqs && waitq < global_waitqs + g_num_waitqs)
+               return 1;
+       return 0;
+}
+
+int waitq_irq_safe(struct waitq *waitq)
+{
+       /* global wait queues have this bit set on initialization */
+       return waitq->waitq_irq;
+}
+
+static uint32_t waitq_hash_size(void)
+{
+       uint32_t hsize, queues;
+       
+       if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize)))
+               return (hsize);
+
+       queues = thread_max / 11;
+       hsize = P2ROUNDUP(queues * sizeof(struct waitq), PAGE_SIZE);
+
+       return hsize;
+}
+
+void waitq_bootstrap(void)
+{
+       kern_return_t kret;
+       uint32_t whsize, qsz;
+
+       wq_table_bootstrap();
+       lt_init();
+       wqp_init();
+
+       /*
+        * Determine the amount of memory we're willing to reserve for
+        * the waitqueue hash table
+        */
+       whsize = waitq_hash_size();
+
+       /* Determine the number of waitqueues we can fit. */
+       qsz = sizeof(struct waitq);
+       whsize = ROUNDDOWN(whsize, qsz);
+       g_num_waitqs = whsize / qsz;
+
+       /*
+        * The hash algorithm requires that this be a power of 2, so we
+        * just mask off all the low-order bits.
+        */
+       for (uint32_t i = 0; i < 31; i++) {
+               uint32_t bit = (1 << i);
+               if ((g_num_waitqs & bit) == g_num_waitqs)
+                       break;
+               g_num_waitqs &= ~bit;
+       }
+       assert(g_num_waitqs > 0);
+
+       /* Now determine how much memory we really need. */
+       whsize = P2ROUNDUP(g_num_waitqs * qsz, PAGE_SIZE);
+
+       wqdbg("allocating %d global queues  (%d bytes)", g_num_waitqs, whsize);
+       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *)&global_waitqs,
+                                     whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ);
+       if (kret != KERN_SUCCESS || global_waitqs == NULL)
+               panic("kernel_memory_allocate() failed to alloc global_waitqs"
+                     ", error: %d, whsize: 0x%x", kret, whsize);
+
+#if CONFIG_WAITQ_STATS
+       whsize = P2ROUNDUP(g_num_waitqs * sizeof(struct wq_stats), PAGE_SIZE);
+       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *)&g_waitq_stats,
+                                     whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ);
+       if (kret != KERN_SUCCESS || global_waitqs == NULL)
+               panic("kernel_memory_allocate() failed to alloc g_waitq_stats"
+                     ", error: %d, whsize: 0x%x", kret, whsize);
+       memset(g_waitq_stats, 0, whsize);
+#endif
+
+       for (uint32_t i = 0; i < g_num_waitqs; i++) {
+               waitq_init(&global_waitqs[i], SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
+       }
+
+
+       waitq_set_zone = zinit(sizeof(struct waitq_set),
+                              WAITQ_SET_MAX * sizeof(struct waitq_set),
+                              sizeof(struct waitq_set),
+                              "waitq sets");
+       zone_change(waitq_set_zone, Z_NOENCRYPT, TRUE);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Wait Queue Implementation
+ *
+ * ---------------------------------------------------------------------- */
+
+/*
+ * Double the standard lock timeout, because wait queues tend
+ * to iterate over a number of threads - locking each.  If there is
+ * a problem with a thread lock, it normally times out at the wait
+ * queue level first, hiding the real problem.
+ */
+/* For x86, the hardware timeout is in TSC units. */
+#if defined(__i386__) || defined(__x86_64__)
+#define        hwLockTimeOut LockTimeOutTSC
+#else
+#define        hwLockTimeOut LockTimeOut
+#endif
+
+void waitq_lock(struct waitq *wq)
+{
+       if (__improbable(hw_lock_to(&(wq)->waitq_interlock,
+                                   hwLockTimeOut * 2) == 0)) {
+               boolean_t wql_acquired = FALSE;
+
+               while (machine_timeout_suspended()) {
+#if defined(__i386__) || defined(__x86_64__)
+                       /*
+                        * i386/x86_64 return with preemption disabled on a
+                        * timeout for diagnostic purposes.
+                        */
+                       mp_enable_preemption();
+#endif
+                       wql_acquired = hw_lock_to(&(wq)->waitq_interlock,
+                                                 hwLockTimeOut * 2);
+                       if (wql_acquired)
+                               break;
+               }
+               if (wql_acquired == FALSE)
+                       panic("waitq deadlock - waitq=%p, cpu=%d\n",
+                             wq, cpu_number());
+       }
+       assert(waitq_held(wq));
+}
+
+void waitq_unlock(struct waitq *wq)
+{
+       assert(waitq_held(wq));
+       hw_lock_unlock(&(wq)->waitq_interlock);
+}
+
+
+/**
+ * clear the thread-related waitq state
+ *
+ * Conditions:
+ *     'thread' is locked
+ */
+static inline void thread_clear_waitq_state(thread_t thread)
+{
+       thread->waitq = NULL;
+       thread->wait_event = NO_EVENT64;
+       thread->at_safe_point = FALSE;
+}
+
+
+typedef thread_t (*waitq_select_cb)(void *ctx, struct waitq *waitq,
+                                   int is_global, thread_t thread);
+
+struct waitq_select_args {
+       /* input parameters */
+       struct waitq    *posted_waitq;
+       struct waitq    *waitq;
+       event64_t        event;
+       waitq_select_cb  select_cb;
+       void            *select_ctx;
+
+       uint64_t        *reserved_preposts;
+
+       /* output parameters */
+       queue_t       threadq;
+       int           max_threads;
+       int          *nthreads;
+       spl_t        *spl;
+};
+
+static void do_waitq_select_n_locked(struct waitq_select_args *args);
+
+/**
+ * callback invoked once for every waitq set to which a waitq belongs
+ *
+ * Conditions:
+ *     ctx->posted_waitq is locked
+ *     'link' points to a valid waitq set
+ *
+ * Notes:
+ *     Takes the waitq set lock on the set pointed to by 'link'
+ *     Calls do_waitq_select_n_locked() which could recurse back into
+ *     this function if the waitq set is a member of other sets.
+ *     If no threads were selected, it preposts the input waitq
+ *     onto the waitq set pointed to by 'link'.
+ */
+static int waitq_select_walk_cb(struct waitq *waitq, void *ctx,
+                               struct setid_link *link)
+{
+       int ret = WQ_ITERATE_CONTINUE;
+       struct waitq_select_args args = *((struct waitq_select_args *)ctx);
+       struct waitq_set *wqset;
+       int get_spl = 0;
+       spl_t set_spl;
+
+       (void)waitq;
+       assert(sl_type(link) == SLT_WQS);
+
+       wqset = link->sl_wqs.sl_set;
+       args.waitq = &wqset->wqset_q;
+
+       if (!waitq_irq_safe(waitq) && waitq_irq_safe(&wqset->wqset_q)) {
+               get_spl = 1;
+               set_spl = splsched();
+       }
+       waitq_set_lock(wqset);
+       /*
+        * verify that the link wasn't invalidated just before
+        * we were able to take the lock.
+        */
+       if (wqset->wqset_id != link->sl_set_id.id)
+               goto out_unlock;
+
+       /*
+        * Find any threads waiting on this wait queue set,
+        * and recurse into any waitq set to which this set belongs.
+        */
+       do_waitq_select_n_locked(&args);
+
+       if (*(args.nthreads) > 0 ||
+           (args.threadq && !queue_empty(args.threadq))) {
+               /* at least 1 thread was selected and returned: don't prepost */
+               if (args.max_threads > 0 &&
+                   *(args.nthreads) >= args.max_threads) {
+                       /* break out of the setid walk */
+                       ret = WQ_ITERATE_FOUND;
+               }
+               goto out_unlock;
+       } else {
+               /*
+                * No thread selected: prepost 'waitq' to 'wqset'
+                * if wqset can handle preposts and the event is set to 0.
+                * We also make sure to not post waitq sets to other sets.
+                *
+                * In the future, we may consider an optimization to prepost
+                * 'args.posted_waitq' directly to 'wqset' to avoid
+                * unnecessary data structure manipulations in the kqueue path
+                */
+               if (args.event == NO_EVENT64 && waitq_set_can_prepost(wqset)) {
+                       wq_prepost_do_post_locked(wqset, waitq,
+                                                 args.reserved_preposts);
+               }
+       }
+
+out_unlock:
+       waitq_set_unlock(wqset);
+       if (get_spl)
+               splx(set_spl);
+       return ret;
+}
+
+/**
+ * generic thread selection from a waitq (and sets to which the waitq belongs)
+ *
+ * Conditions:
+ *     args->waitq (and args->posted_waitq) is locked
+ *
+ * Notes:
+ *     Uses the optional select callback function to refine the selection
+ *     of one or more threads from a waitq and any set to which the waitq
+ *     belongs. The select callback is invoked once for every thread that
+ *     is found to be waiting on the input args->waitq.
+ *
+ *     If one or more threads are selected, this may disable interrupts.
+ *     The previous interrupt state is returned in args->spl and should
+ *     be used in a call to splx() if threads are returned to the caller.
+ */
+static void do_waitq_select_n_locked(struct waitq_select_args *args)
+{
+       struct waitq *waitq = args->waitq;
+       int max_threads = args->max_threads;
+       thread_t thread = THREAD_NULL, first_thread = THREAD_NULL;
+       int global_q = 0;
+       unsigned long eventmask = 0;
+       int *nthreads = args->nthreads;
+
+       assert(max_threads != 0);
+
+       global_q = waitq_is_global(waitq);
+       if (global_q) {
+               eventmask = _CAST_TO_EVENT_MASK(args->event);
+               /* make sure this waitq accepts this event mask */
+               if ((waitq->waitq_eventmask & eventmask) != eventmask)
+                       return;
+               eventmask = 0;
+       }
+
+       /* look through each thread waiting directly on the waitq */
+       qe_foreach_element_safe(thread, &waitq->waitq_queue, links) {
+               thread_t t = THREAD_NULL;
+               assert(thread->waitq == waitq);
+               if (thread->wait_event == args->event) {
+                       t = thread;
+                       if (first_thread == THREAD_NULL)
+                               first_thread = thread;
+
+                       /* allow the caller to futher refine the selection */
+                       if (args->select_cb)
+                               t = args->select_cb(args->select_ctx, waitq,
+                                                   global_q, thread);
+                       if (t != THREAD_NULL) {
+                               *nthreads += 1;
+                               if (args->threadq) {
+                                       if (*nthreads == 1)
+                                               *(args->spl) = splsched();
+                                       thread_lock(t);
+                                       thread_clear_waitq_state(t);
+                                       /* put locked thread on output queue */
+                                       re_queue_tail(args->threadq, &t->links);
+                               }
+                               /* only enqueue up to 'max' threads */
+                               if (*nthreads >= max_threads && max_threads > 0)
+                                       break;
+                       }
+               }
+               /* thread wasn't selected, and the waitq is global */
+               if (t == THREAD_NULL && global_q)
+                       eventmask |= _CAST_TO_EVENT_MASK(thread->wait_event);
+       }
+
+       /*
+        * Update the eventmask of global queues:
+        * - If we selected all the threads in the queue, or we selected zero
+        *   threads on the queue, set the eventmask to the calculated value
+        *   (potentially 0 if we selected them all)
+        * - If we just pulled out a subset of threads from the queue, then we
+        *   can't assume the calculated mask is complete (because we may not
+        *   have made it through all the threads in the queue), so we have to
+        *   leave it alone.
+        */
+       if (global_q && (queue_empty(&waitq->waitq_queue) || *nthreads == 0))
+               waitq->waitq_eventmask = (typeof(waitq->waitq_eventmask))eventmask;
+
+       /*
+        * Grab the first thread in the queue if no other thread was selected.
+        * We can guarantee that no one has manipulated this thread because
+        * it's waiting on the given waitq, and we have that waitq locked.
+        */
+       if (*nthreads == 0 && first_thread != THREAD_NULL && args->threadq) {
+               /* we know this is the first (and only) thread */
+               ++(*nthreads);
+               *(args->spl) = splsched();
+               thread_lock(first_thread);
+               thread_clear_waitq_state(first_thread);
+               re_queue_tail(args->threadq, &first_thread->links);
+
+               /* update the eventmask on global queues */
+               if (global_q && queue_empty(&waitq->waitq_queue))
+                       waitq->waitq_eventmask = 0;
+       }
+
+       if (max_threads > 0 && *nthreads >= max_threads)
+               return;
+
+       /*
+        * wait queues that are not in any sets
+        * are the bottom of the recursion
+        */
+       if (!waitq->waitq_set_id)
+               return;
+
+       /* check to see if the set ID for this wait queue is valid */
+       struct setid_link *link = lt_get_link(waitq->waitq_set_id);
+       if (!link) {
+               /* the waitq set to which this waitq belonged, has been invalidated */
+               waitq->waitq_set_id = 0;
+               return;
+       }
+
+       lt_put_link(link);
+
+       /*
+        * If this waitq is a member of any wait queue sets, we need to look
+        * for waiting thread(s) in any of those sets, and prepost all sets that
+        * don't have active waiters.
+        *
+        * Note that we do a local walk of this waitq's links - we manually
+        * recurse down wait queue set's with non-zero wqset_q.waitq_set_id
+        */
+       (void)walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id,
+                              SLT_WQS, (void *)args, waitq_select_walk_cb);
+}
+
+/**
+ * main entry point for thread selection from a waitq
+ *
+ * Conditions:
+ *     waitq is locked
+ *
+ * Returns:
+ *     The number of threads waiting on 'waitq' for 'event' which have
+ *     been placed onto the input 'threadq'
+ *
+ * Notes:
+ *     The 'select_cb' function is invoked for every thread found waiting
+ *     on 'waitq' for 'event'. The thread is _not_ locked upon callback
+ *     invocation. This parameter may be NULL.
+ *
+ *     If one or more threads are returned in 'threadq' then the caller is
+ *     responsible to call splx() using the returned 'spl' value. Each
+ *     returned thread is locked.
+ */
+static __inline__ int waitq_select_n_locked(struct waitq *waitq,
+                                           event64_t event,
+                                           waitq_select_cb select_cb,
+                                           void *select_ctx,
+                                           uint64_t *reserved_preposts,
+                                           queue_t threadq,
+                                           int max_threads, spl_t *spl)
+{
+       int nthreads = 0;
+
+       struct waitq_select_args args = {
+               .posted_waitq = waitq,
+               .waitq = waitq,
+               .event = event,
+               .select_cb = select_cb,
+               .select_ctx = select_ctx,
+               .reserved_preposts = reserved_preposts,
+               .threadq = threadq,
+               .max_threads = max_threads,
+               .nthreads = &nthreads,
+               .spl = spl,
+       };
+
+       do_waitq_select_n_locked(&args);
+       return nthreads;
+}
+
+
+/**
+ * callback function that uses thread parameters to determine wakeup eligibility
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     'thread' is not locked
+ */
+static thread_t waitq_select_one_cb(void *ctx, struct waitq *waitq,
+                                   int is_global, thread_t thread)
+{
+       int fifo_q, realtime;
+       boolean_t thread_imp_donor = FALSE;
+
+       (void)ctx;
+       (void)waitq;
+       (void)is_global;
+       realtime = 0;
+
+       fifo_q = 1; /* default to FIFO for all queues for now */
+#if IMPORTANCE_INHERITANCE
+       if (is_global)
+               fifo_q = 0; /* 'thread_imp_donor' takes the place of FIFO checking */
+#endif
+
+       if (thread->sched_pri >= BASEPRI_REALTIME)
+               realtime = 1;
+
+#if IMPORTANCE_INHERITANCE
+       /* 
+        * Checking imp donor bit does not need thread lock or
+        * or task lock since we have the wait queue lock and
+        * thread can not be removed from it without acquiring
+        * wait queue lock. The imp donor bit may change
+        * once we read its value, but it is ok to wake
+        * a thread while someone drops importance assertion
+        * on the that thread.
+        */
+       thread_imp_donor = task_is_importance_donor(thread->task);
+#endif /* IMPORTANCE_INHERITANCE */
+
+       if (fifo_q || thread_imp_donor == TRUE
+           || realtime || (thread->options & TH_OPT_VMPRIV)) {
+               /*
+                * If this thread's task is an importance donor,
+                * or it's a realtime thread, or it's a VM privileged
+                * thread, OR the queue is marked as FIFO:
+                *     select the thread
+                */
+               return thread;
+       }
+
+       /* by default, _don't_ select the thread */
+       return THREAD_NULL;
+}
+
+/**
+ * select a single thread from a waitq that's waiting for a given event
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *
+ * Returns:
+ *     A locked thread that's been removed from the waitq, but has not
+ *     yet been put on a run queue. Caller is responsible to call splx
+ *     with the '*spl' value.
+ */
+static thread_t waitq_select_one_locked(struct waitq *waitq, event64_t event,
+                                       uint64_t *reserved_preposts,
+                                       int priority, spl_t *spl)
+{
+       int nthreads;
+       queue_head_t threadq;
+
+       (void)priority;
+
+       queue_init(&threadq);
+
+       nthreads = waitq_select_n_locked(waitq, event, waitq_select_one_cb, NULL,
+                                        reserved_preposts, &threadq, 1, spl);
+
+       /* if we selected a thread, return it (still locked) */
+       if (!queue_empty(&threadq)) {
+               thread_t t;
+               queue_entry_t qe = dequeue_head(&threadq);
+               t = qe_element(qe, struct thread, links);
+               assert(queue_empty(&threadq)); /* there should be 1 entry */
+               /* t has been locked and removed from all queues */
+               return t;
+       }
+
+       return THREAD_NULL;
+}
+
+
+struct select_thread_ctx {
+       thread_t      thread;
+       event64_t     event;
+       spl_t        *spl;
+};
+
+/**
+ * link walk callback invoked once for each set to which a waitq belongs
+ *
+ * Conditions:
+ *     initial waitq is locked
+ *     ctx->thread is unlocked
+ *
+ * Notes:
+ *     This may disable interrupts and early-out of the full DAG link walk by
+ *     returning KERN_ALREADY_IN_SET. In this case, the returned thread has
+ *     been removed from the waitq, it's waitq state has been reset, and the
+ *     caller is responsible to call splx() with the returned interrupt state
+ *     in ctx->spl.
+ */
+static int waitq_select_thread_cb(struct waitq *waitq, void *ctx,
+                                 struct setid_link *link)
+{
+       struct select_thread_ctx *stctx = (struct select_thread_ctx *)ctx;
+       struct waitq_set *wqset;
+
+       (void)waitq;
+
+       thread_t thread = stctx->thread;
+       event64_t event = stctx->event;
+
+       if (sl_type(link) != SLT_WQS)
+               return WQ_ITERATE_CONTINUE;
+
+       wqset = link->sl_wqs.sl_set;
+
+       if (!waitq_irq_safe(waitq) && waitq_irq_safe(&wqset->wqset_q)) {
+               *(stctx->spl) = splsched();
+               waitq_set_lock(wqset);
+               thread_lock(thread);
+       } else {
+               waitq_set_lock(wqset);
+               *(stctx->spl) = splsched();
+               thread_lock(thread);
+       }
+
+       if ((thread->waitq == &wqset->wqset_q)
+           && (thread->wait_event == event)) {
+               remqueue(&thread->links);
+               thread_clear_waitq_state(thread);
+               /*
+                * thread still locked,
+                * return non-zero to break out of WQS walk
+                */
+               waitq_set_unlock(wqset);
+               return WQ_ITERATE_FOUND;
+       }
+
+       thread_unlock(thread);
+       waitq_set_unlock(wqset);
+       splx(*(stctx->spl));
+
+       return WQ_ITERATE_CONTINUE;
+}
+
+/**
+ * returns KERN_SUCCESS and locks 'thread' if-and-only-if 'thread' is waiting
+ * on 'waitq' (or any set to which waitq belongs) for 'event'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     'thread' is unlocked
+ */
+static kern_return_t waitq_select_thread_locked(struct waitq *waitq,
+                                               event64_t event,
+                                               thread_t thread, spl_t *spl)
+{
+       struct setid_link *link;
+       struct select_thread_ctx ctx;
+       kern_return_t kr;
+
+       *spl = splsched();
+       thread_lock(thread);
+
+       if ((thread->waitq == waitq) && (thread->wait_event == event)) {
+               remqueue(&thread->links);
+               thread_clear_waitq_state(thread);
+               /* thread still locked */
+               return KERN_SUCCESS;
+       }
+
+       thread_unlock(thread);
+       splx(*spl);
+
+       if (!waitq->waitq_set_id)
+               return KERN_NOT_WAITING;
+
+       /* check to see if the set ID for this wait queue is valid */
+       link = lt_get_link(waitq->waitq_set_id);
+       if (!link) {
+               /* the waitq to which this set belonged, has been invalidated */
+               waitq->waitq_set_id = 0;
+               return KERN_NOT_WAITING;
+       }
+
+       /*
+        * The thread may be waiting on a wait queue set to which
+        * the input 'waitq' belongs. Go look for the thread in
+        * all wait queue sets. If it's there, we'll remove it
+        * because it's equivalent to waiting directly on the input waitq.
+        */
+       ctx.thread = thread;
+       ctx.event = event;
+       ctx.spl = spl;
+       kr = walk_setid_links(LINK_WALK_FULL_DAG, waitq, waitq->waitq_set_id,
+                             SLT_WQS, (void *)&ctx, waitq_select_thread_cb);
+
+       lt_put_link(link);
+
+       /* we found a thread, return success */
+       if (kr == WQ_ITERATE_FOUND)
+               return KERN_SUCCESS;
+
+       return KERN_NOT_WAITING;
+}
+
+static int prepost_exists_cb(struct waitq_set __unused *wqset,
+                            void __unused *ctx,
+                            struct wq_prepost __unused *wqp,
+                            struct waitq __unused *waitq)
+{
+       /* if we get here, then we know that there is a valid prepost object! */
+       return WQ_ITERATE_FOUND;
+}
+
+/**
+ * declare a thread's intent to wait on 'waitq' for 'wait_event'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     'thread' is locked
+ */
+wait_result_t waitq_assert_wait64_locked(struct waitq *waitq,
+                                         event64_t wait_event,
+                                         wait_interrupt_t interruptible,
+                                         wait_timeout_urgency_t urgency,
+                                         uint64_t deadline,
+                                         uint64_t leeway,
+                                         thread_t thread)
+{
+       wait_result_t wait_result;
+       int realtime = 0;
+
+       /*
+        * Warning: Do _not_ place debugging print statements here.
+        *          The thread is locked!
+        */
+
+       if (thread->waitq != NULL)
+               panic("thread already waiting on %p", thread->waitq);
+
+       if (waitq_is_set(waitq)) {
+               struct waitq_set *wqset = (struct waitq_set *)waitq;
+               /*
+                * early-out if the thread is waiting on a wait queue set
+                * that has already been pre-posted.
+                */
+               if (wait_event == NO_EVENT64 && waitq_set_maybe_preposted(wqset)) {
+                       int ret;
+                       /*
+                        * Run through the list of potential preposts. Because
+                        * this is a hot path, we short-circuit the iteration
+                        * if we find just one prepost object.
+                        */
+                       ret = wq_prepost_foreach_locked(wqset, NULL,
+                                                       prepost_exists_cb);
+                       if (ret == WQ_ITERATE_FOUND) {
+                               thread->wait_result = THREAD_AWAKENED;
+                               return THREAD_AWAKENED;
+                       }
+               }
+       }
+
+       /*
+        * Realtime threads get priority for wait queue placements.
+        * This allows wait_queue_wakeup_one to prefer a waiting
+        * realtime thread, similar in principle to performing
+        * a wait_queue_wakeup_all and allowing scheduler prioritization
+        * to run the realtime thread, but without causing the
+        * lock contention of that scenario.
+        */
+       if (thread->sched_pri >= BASEPRI_REALTIME)
+               realtime = 1;
+
+       /*
+        * This is the extent to which we currently take scheduling attributes
+        * into account.  If the thread is vm priviledged, we stick it at
+        * the front of the queue.  Later, these queues will honor the policy
+        * value set at waitq_init time.
+        */
+       wait_result = thread_mark_wait_locked(thread, interruptible);
+       /* thread->wait_result has been set */
+       if (wait_result == THREAD_WAITING) {
+               if (!waitq->waitq_fifo
+                   || (thread->options & TH_OPT_VMPRIV) || realtime)
+                       enqueue_head(&waitq->waitq_queue, &thread->links);
+               else
+                       enqueue_tail(&waitq->waitq_queue, &thread->links);
+
+               thread->wait_event = wait_event;
+               thread->waitq = waitq;
+
+               if (deadline != 0) {
+                       boolean_t act;
+                       act = timer_call_enter_with_leeway(&thread->wait_timer,
+                                                          NULL,
+                                                          deadline, leeway,
+                                                          urgency, FALSE);
+                       if (!act)
+                               thread->wait_timer_active++;
+                       thread->wait_timer_is_set = TRUE;
+               }
+
+               if (waitq_is_global(waitq))
+                       waitq->waitq_eventmask = waitq->waitq_eventmask
+                                               | _CAST_TO_EVENT_MASK(wait_event);
+
+               waitq_stats_count_wait(waitq);
+       }
+
+       return wait_result;
+}
+
+/**
+ * remove 'thread' from its current blocking state on 'waitq'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     'thread' is locked
+ *
+ * Notes:
+ *     This function is primarily used by clear_wait_internal in
+ *     sched_prim.c from the thread timer wakeup path
+ *     (i.e. the thread was waiting on 'waitq' with a timeout that expired)
+ */
+void waitq_pull_thread_locked(struct waitq *waitq, thread_t thread)
+{
+       (void)waitq;
+       assert(thread->waitq == waitq);
+
+       remqueue(&thread->links);
+       thread_clear_waitq_state(thread);
+       waitq_stats_count_clear_wakeup(waitq);
+
+       /* clear the global event mask if this was the last thread there! */
+       if (waitq_is_global(waitq) && queue_empty(&waitq->waitq_queue))
+               waitq->waitq_eventmask = 0;
+}
+
+
+static __inline__
+void maybe_adjust_thread_pri(thread_t thread, int priority) {
+       if (thread->sched_pri < priority) {
+               if (priority <= MAXPRI) {
+                       set_sched_pri(thread, priority);
+
+                       thread->was_promoted_on_wakeup = 1;
+                       thread->sched_flags |= TH_SFLAG_PROMOTED;
+               }
+               return;
+       }
+
+       /*
+        * If the caller is requesting the waitq subsystem to promote the
+        * priority of the awoken thread, then boost the thread's priority to
+        * the default WAITQ_BOOST_PRIORITY (if it's not already equal or
+        * higher priority).  This boost must be removed via a call to
+        * waitq_clear_promotion_locked.
+        */
+       if (priority == WAITQ_PROMOTE_PRIORITY &&
+           (thread->sched_pri < WAITQ_BOOST_PRIORITY ||
+            !(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED))) {
+
+               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAITQ_PROMOTE) | DBG_FUNC_NONE,
+                                     (uintptr_t)thread_tid(thread),
+                                     thread->sched_pri, thread->base_pri,
+                                     WAITQ_BOOST_PRIORITY, 0);
+               thread->sched_flags |= TH_SFLAG_WAITQ_PROMOTED;
+               if (thread->sched_pri < WAITQ_BOOST_PRIORITY)
+                       set_sched_pri(thread, WAITQ_BOOST_PRIORITY);
+       }
+}
+
+/**
+ * Clear a thread's waitq priority promotion state and the waitq's boost flag
+ *
+ * This function will always clear the waitq's 'waitq_boost' flag. If the
+ * 'thread' parameter is non-null, the this function will also check the
+ * priority promotion (boost) state of that thread. If this thread was boosted
+ * (by having been awoken from a boosting waitq), then this boost state is
+ * cleared. This function is to be paired with waitq_enable_promote_locked.
+ */
+void waitq_clear_promotion_locked(struct waitq *waitq, thread_t thread)
+{
+       spl_t s;
+
+       assert(waitq_held(waitq));
+       if (thread == THREAD_NULL)
+               return;
+
+       if (!waitq_irq_safe(waitq))
+               s = splsched();
+       thread_lock(thread);
+
+       if (thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) {
+               thread->sched_flags &= ~TH_SFLAG_WAITQ_PROMOTED;
+
+               if (thread->sched_flags & TH_SFLAG_PROMOTED_MASK) {
+                       /* it still has other promotions (mutex/rw_lock) */
+               } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAITQ_DEMOTE) | DBG_FUNC_NONE,
+                                             (uintptr_t)thread_tid(thread),
+                                             thread->sched_pri,
+                                             thread->base_pri,
+                                             DEPRESSPRI, 0);
+                       set_sched_pri(thread, DEPRESSPRI);
+               } else {
+                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAITQ_DEMOTE) | DBG_FUNC_NONE,
+                                             (uintptr_t)thread_tid(thread),
+                                             thread->sched_pri,
+                                             thread->base_pri,
+                                             thread->base_pri, 0);
+                       thread_recompute_sched_pri(thread, FALSE);
+               }
+       }
+
+       thread_unlock(thread);
+       if (!waitq_irq_safe(waitq))
+               splx(s);
+}
+
+/**
+ * wakeup all threads waiting on 'waitq' for 'wake_event'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *
+ * Notes:
+ *     May temporarily disable and re-enable interrupts
+ *     and re-adjust thread priority of each awoken thread.
+ *
+ *     If the input 'lock_state' == WAITQ_UNLOCK then the waitq will have
+ *     been unlocked before calling thread_go() on any returned threads, and
+ *     is guaranteed to be unlocked upon function return.
+ */
+kern_return_t waitq_wakeup64_all_locked(struct waitq *waitq,
+                                       event64_t wake_event,
+                                       wait_result_t result,
+                                       uint64_t *reserved_preposts,
+                                       int priority,
+                                       waitq_lock_state_t lock_state)
+{
+       kern_return_t ret;
+       thread_t thread;
+       spl_t th_spl;
+       int nthreads;
+       queue_head_t wakeup_queue;
+
+       assert(waitq_held(waitq));
+       queue_init(&wakeup_queue);
+
+       nthreads = waitq_select_n_locked(waitq, wake_event, NULL, NULL,
+                                        reserved_preposts,
+                                        &wakeup_queue, -1, &th_spl);
+
+       /* set each thread running */
+       ret = KERN_NOT_WAITING;
+
+#if CONFIG_WAITQ_STATS
+       qe_foreach_element(thread, &wakeup_queue, links)
+               waitq_stats_count_wakeup(waitq);
+#endif
+       if (lock_state == WAITQ_UNLOCK)
+               waitq_unlock(waitq);
+
+       qe_foreach_element_safe(thread, &wakeup_queue, links) {
+               remqueue(&thread->links);
+               maybe_adjust_thread_pri(thread, priority);
+               ret = thread_go(thread, result);
+               assert(ret == KERN_SUCCESS);
+               thread_unlock(thread);
+       }
+       if (nthreads > 0)
+               splx(th_spl);
+       else
+               waitq_stats_count_fail(waitq);
+
+       return ret;
+}
+
+/**
+ * wakeup one thread waiting on 'waitq' for 'wake_event'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *
+ * Notes:
+ *     May temporarily disable and re-enable interrupts.
+ */
+kern_return_t waitq_wakeup64_one_locked(struct waitq *waitq,
+                                       event64_t wake_event,
+                                       wait_result_t result,
+                                       uint64_t *reserved_preposts,
+                                       int priority,
+                                       waitq_lock_state_t lock_state)
+{
+       thread_t thread;
+       spl_t th_spl;
+
+       assert(waitq_held(waitq));
+
+       thread = waitq_select_one_locked(waitq, wake_event,
+                                        reserved_preposts,
+                                        priority, &th_spl);
+
+       if (thread != THREAD_NULL)
+               waitq_stats_count_wakeup(waitq);
+       else
+               waitq_stats_count_fail(waitq);
+
+       if (lock_state == WAITQ_UNLOCK)
+               waitq_unlock(waitq);
+
+       if (thread != THREAD_NULL) {
+               maybe_adjust_thread_pri(thread, priority);
+               kern_return_t ret = thread_go(thread, result);
+               assert(ret == KERN_SUCCESS);
+               thread_unlock(thread);
+               splx(th_spl);
+               return ret;
+       }
+
+       return KERN_NOT_WAITING;
+}
+
+/**
+ * wakeup one thread waiting on 'waitq' for 'wake_event'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *
+ * Returns:
+ *     A locked, runnable thread.
+ *     If return value is non-NULL, interrupts have also
+ *     been disabled, and the caller is responsible to call
+ *     splx() with the returned '*spl' value.
+ */
+thread_t waitq_wakeup64_identity_locked(struct waitq *waitq,
+                                       event64_t wake_event,
+                                       wait_result_t result,
+                                       spl_t *spl,
+                                       uint64_t *reserved_preposts,
+                                       waitq_lock_state_t lock_state)
+{
+       thread_t thread;
+
+       assert(waitq_held(waitq));
+
+       thread = waitq_select_one_locked(waitq, wake_event,
+                                        reserved_preposts,
+                                        WAITQ_ALL_PRIORITIES, spl);
+
+       if (thread != THREAD_NULL)
+               waitq_stats_count_wakeup(waitq);
+       else
+               waitq_stats_count_fail(waitq);
+
+       if (lock_state == WAITQ_UNLOCK)
+               waitq_unlock(waitq);
+
+       if (thread != THREAD_NULL) {
+               kern_return_t __assert_only ret;
+               ret = thread_go(thread, result);
+               assert(ret == KERN_SUCCESS);
+       }
+
+       return thread; /* locked if not NULL (caller responsible for spl) */
+}
+
+/**
+ * wakeup a specific thread iff it's waiting on 'waitq' for 'wake_event'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     'thread' is unlocked
+ *
+ * Notes:
+ *     May temporarily disable and re-enable interrupts
+ *
+ *     If the input lock_state == WAITQ_UNLOCK then the waitq will have been
+ *     unlocked before calling thread_go() if 'thread' is to be awoken, and
+ *     is guaranteed to be unlocked upon function return.
+ */
+kern_return_t waitq_wakeup64_thread_locked(struct waitq *waitq,
+                                          event64_t wake_event,
+                                          thread_t thread,
+                                          wait_result_t result,
+                                          waitq_lock_state_t lock_state)
+{
+       kern_return_t ret;
+       spl_t th_spl;
+
+       assert(waitq_held(waitq));
+
+       /*
+        * See if the thread was still waiting there.  If so, it got
+        * dequeued and returned locked.
+        */
+       ret = waitq_select_thread_locked(waitq, wake_event, thread, &th_spl);
+
+       if (ret == KERN_SUCCESS)
+               waitq_stats_count_wakeup(waitq);
+       else
+               waitq_stats_count_fail(waitq);
+
+       if (lock_state == WAITQ_UNLOCK)
+               waitq_unlock(waitq);
+
+       if (ret != KERN_SUCCESS)
+               return KERN_NOT_WAITING;
+
+       ret = thread_go(thread, result);
+       assert(ret == KERN_SUCCESS);
+       thread_unlock(thread);
+       splx(th_spl);
+
+       return ret;
+}
+
+
+
+/* ----------------------------------------------------------------------
+ *
+ * In-Kernel API
+ *
+ * ---------------------------------------------------------------------- */
+
+/**
+ * initialize a waitq object
+ */
+kern_return_t waitq_init(struct waitq *waitq, int policy)
+{
+       assert(waitq != NULL);
+
+       /* only FIFO and LIFO for now */
+       if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0)
+               return KERN_INVALID_ARGUMENT;
+
+       waitq->waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
+       waitq->waitq_irq = !!(policy & SYNC_POLICY_DISABLE_IRQ);
+       waitq->waitq_prepost = 0;
+       waitq->waitq_type = WQT_QUEUE;
+       waitq->waitq_eventmask = 0;
+
+       waitq->waitq_set_id = 0;
+       waitq->waitq_prepost_id = 0;
+
+       hw_lock_init(&waitq->waitq_interlock);
+       queue_init(&waitq->waitq_queue);
+
+       return KERN_SUCCESS;
+}
+
+struct wq_unlink_ctx {
+       struct waitq *unlink_wq;
+       struct waitq_set *unlink_wqset;
+};
+
+static int waitq_unlink_prepost_cb(struct waitq_set __unused *wqset, void *ctx,
+                                  struct wq_prepost *wqp, struct waitq *waitq);
+
+/**
+ * walk_setid_links callback to invalidate 'link' parameter
+ *
+ * Conditions:
+ *     Called from walk_setid_links.
+ *     Note that unlink other callbacks, this one make no assumptions about
+ *     the 'waitq' parameter, specifically it does not have to be locked or
+ *     even valid.
+ */
+static int waitq_unlink_all_cb(struct waitq *waitq, void *ctx,
+                              struct setid_link *link)
+{
+       (void)waitq;
+       (void)ctx;
+       if (sl_type(link) == SLT_LINK && sl_is_valid(link))
+               lt_invalidate(link);
+
+       if (sl_type(link) == SLT_WQS) {
+               struct waitq_set *wqset;
+               int do_spl = 0;
+               spl_t spl;
+               struct wq_unlink_ctx ulctx;
+
+               /*
+                * When destroying the waitq, take the time to clear out any
+                * preposts it may have made. This could potentially save time
+                * on the IPC send path which would otherwise have to iterate
+                * over lots of dead port preposts.
+                */
+               if (waitq->waitq_prepost_id == 0)
+                       goto out;
+
+               wqset = link->sl_wqs.sl_set;
+               assert(wqset != NULL);
+
+               if (waitq_set_is_valid(wqset) &&
+                   waitq_irq_safe(&wqset->wqset_q)) {
+                       spl = splsched();
+                       do_spl = 1;
+               }
+               waitq_set_lock(wqset);
+
+               if (!waitq_set_is_valid(wqset)) {
+                       /* someone raced us to teardown */
+                       goto out_unlock;
+               }
+               if (!waitq_set_maybe_preposted(wqset))
+                       goto out_unlock;
+
+               ulctx.unlink_wq = waitq;
+               ulctx.unlink_wqset = wqset;
+               (void)wq_prepost_iterate(wqset->wqset_prepost_id, &ulctx,
+                                        waitq_unlink_prepost_cb);
+out_unlock:
+               waitq_set_unlock(wqset);
+               if (do_spl)
+                       splx(spl);
+       }
+
+out:
+       return WQ_ITERATE_CONTINUE;
+}
+
+
+/**
+ * cleanup any link/prepost table resources associated with a waitq
+ */
+void waitq_deinit(struct waitq *waitq)
+{
+       uint64_t setid = 0;
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               return;
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+       if (!waitq_valid(waitq))
+               goto out;
+
+       waitq_unlink_all_locked(waitq, &setid, &s, NULL);
+       waitq->waitq_type = WQT_INVALID;
+       assert(queue_empty(&waitq->waitq_queue));
+
+out:
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       if (setid)
+               (void)walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, setid,
+                                      SLT_ALL, NULL, waitq_unlink_all_cb);
+}
+
+
+/**
+ * invalidate the given wq_prepost object
+ *
+ * Conditions:
+ *     Called from wq_prepost_iterate (_not_ from wq_prepost_foreach_locked!)
+ */
+static int wqset_clear_prepost_chain_cb(struct waitq_set __unused *wqset,
+                                       void __unused *ctx,
+                                       struct wq_prepost *wqp,
+                                       struct waitq __unused *waitq)
+{
+       if (wqp_type(wqp) == WQP_POST)
+               wq_prepost_invalidate(wqp);
+       return WQ_ITERATE_CONTINUE;
+}
+
+
+/**
+ * allocate and initialize a waitq set object
+ *
+ * Conditions:
+ *     may block
+ *
+ * Returns:
+ *     allocated / initialized waitq_set object
+ *     NULL on failure
+ */
+struct waitq_set *waitq_set_alloc(int policy)
+{
+       struct waitq_set *wqset;
+
+       wqset = (struct waitq_set *)zalloc(waitq_set_zone);
+       if (!wqset)
+               panic("Can't allocate a new waitq set from zone %p", waitq_set_zone);
+
+       kern_return_t ret;
+       ret = waitq_set_init(wqset, policy, NULL);
+       if (ret != KERN_SUCCESS) {
+               zfree(waitq_set_zone, wqset);
+               wqset = NULL;
+       }
+
+       return wqset;
+}
+
+/**
+ * initialize a waitq set object
+ *
+ * Conditions:
+ *     may (rarely) block if link table needs to grow, and
+ *     no 'reserved_link' object is passed.
+ */
+kern_return_t waitq_set_init(struct waitq_set *wqset,
+                            int policy, uint64_t *reserved_link)
+{
+       struct setid_link *link;
+       kern_return_t ret;
+
+       memset(wqset, 0, sizeof(*wqset));
+
+       ret = waitq_init(&wqset->wqset_q, policy);
+       if (ret != KERN_SUCCESS)
+               return ret;
+
+       wqset->wqset_q.waitq_type = WQT_SET;
+       if (policy & SYNC_POLICY_PREPOST)
+               wqset->wqset_q.waitq_prepost = 1;
+       else
+               wqset->wqset_q.waitq_prepost = 0;
+
+       if (reserved_link && *reserved_link != 0) {
+               link = lt_get_reserved(*reserved_link, SLT_WQS);
+               /* always consume the caller's reference */
+               *reserved_link = 0;
+       } else {
+               link = lt_alloc_link(SLT_WQS);
+       }
+       if (!link)
+               panic("Can't allocate link object for waitq set: %p", wqset);
+
+       link->sl_wqs.sl_set = wqset;
+       sl_set_valid(link);
+
+       wqset->wqset_id = link->sl_set_id.id;
+       wqset->wqset_prepost_id = 0;
+       lt_put_link(link);
+
+       return KERN_SUCCESS;
+}
+
+/**
+ * clear out / release any resources associated with a waitq set
+ *
+ * Conditions:
+ *     may block
+ * Note:
+ *     This will render the waitq set invalid, and it must
+ *     be re-initialized with waitq_set_init before it can be used again
+ */
+void waitq_set_deinit(struct waitq_set *wqset)
+{
+       struct setid_link *link = NULL;
+       uint64_t set_id, set_links_id, prepost_id;
+       int do_spl = 0;
+       spl_t s;
+
+       if (!waitqs_is_set(wqset))
+               panic("trying to de-initialize an invalid wqset @%p", wqset);
+
+       if (waitq_irq_safe(&wqset->wqset_q)) {
+               s = splsched();
+               do_spl = 1;
+       }
+       waitq_set_lock(wqset);
+
+       set_id = wqset->wqset_id;
+
+       /* grab the set's link object */
+       link = lt_get_link(set_id);
+       if (link)
+               lt_invalidate(link);
+
+       /* someone raced us to deinit */
+       if (!link || wqset->wqset_id != set_id || set_id != link->sl_set_id.id) {
+               if (link)
+                       lt_put_link(link);
+               waitq_set_unlock(wqset);
+               if (do_spl)
+                       splx(s);
+               return;
+       }
+
+       /* every wait queue set should have a valid link object */
+       assert(link != NULL && sl_type(link) == SLT_WQS);
+
+       wqset->wqset_id = 0;
+
+       wqset->wqset_q.waitq_type = WQT_INVALID;
+       wqset->wqset_q.waitq_fifo = 0;
+       wqset->wqset_q.waitq_prepost = 0;
+       /* don't clear the 'waitq_irq' bit: it's used in locking! */
+       wqset->wqset_q.waitq_eventmask = 0;
+
+       /*
+        * This set may have a lot of preposts, or may have been a member of
+        * many other sets. To minimize spinlock hold times, we clear out the
+        * waitq set data structure under the lock-hold, but don't clear any
+        * table objects. We keep handles to the prepost and set linkage
+        * objects and free those outside the critical section.
+        */
+       prepost_id = wqset->wqset_prepost_id;
+       wqset->wqset_prepost_id = 0;
+
+       set_links_id = 0;
+       waitq_unlink_all_locked(&wqset->wqset_q, &set_links_id, &s, NULL);
+
+       waitq_set_unlock(wqset);
+       if (do_spl)
+               splx(s);
+
+       /*
+        * walk_setid_links may race with us for access to the waitq set.
+        * If walk_setid_links has a reference to the set, then we should wait
+        * until the link's refcount goes to 1 (our reference) before we exit
+        * this function. That way we ensure that the waitq set memory will
+        * remain valid even though it's been cleared out.
+        */
+       while (sl_refcnt(link) > 1)
+               delay(1);
+       lt_put_link(link);
+
+       /*
+        * release all the set link objects
+        * (links to other sets to which this set was previously added)
+        */
+       if (set_links_id)
+               (void)walk_setid_links(LINK_WALK_ONE_LEVEL, NULL, set_links_id,
+                                      SLT_ALL, NULL, waitq_unlink_all_cb);
+
+       /* drop / unlink all the prepost table objects */
+       (void)wq_prepost_iterate(prepost_id, NULL, wqset_clear_prepost_chain_cb);
+}
+
+/**
+ * de-initialize and free an allocated waitq set object
+ *
+ * Conditions:
+ *     may block
+ */
+kern_return_t waitq_set_free(struct waitq_set *wqset)
+{
+       waitq_set_deinit(wqset);
+
+       memset(wqset, 0, sizeof(*wqset));
+       zfree(waitq_set_zone, wqset);
+
+       return KERN_SUCCESS;
+}
+
+#if defined(DEVLEOPMENT) || defined(DEBUG)
+#if CONFIG_WAITQ_DEBUG
+/**
+ * return the set ID of 'wqset'
+ */
+uint64_t wqset_id(struct waitq_set *wqset)
+{
+       if (!wqset)
+               return 0;
+
+       assert(waitqs_is_set(wqset));
+       return wqset->wqset_id;
+}
+
+/**
+ * returns a pointer to the waitq object embedded in 'wqset'
+ */
+struct waitq *wqset_waitq(struct waitq_set *wqset)
+{
+       if (!wqset)
+               return NULL;
+
+       assert(waitqs_is_set(wqset));
+
+       return &wqset->wqset_q;
+}
+#endif /* CONFIG_WAITQ_DEBUG */
+#endif /* DEVELOPMENT || DEBUG */
+
+
+/**
+ * clear all preposts originating from 'waitq'
+ *
+ * Conditions:
+ *     'waitq' locked
+ *     may (rarely) spin waiting for another on-core thread to
+ *     release the last reference to the waitq's prepost link object
+ *
+ * NOTE:
+ *     If this function needs to spin, it will drop the waitq lock!
+ *     The return value of the function indicates whether or not this
+ *     happened: 1 == lock was dropped, 0 == lock held
+ */
+int waitq_clear_prepost_locked(struct waitq *waitq, spl_t *s)
+{
+       struct wq_prepost *wqp;
+       int dropped_lock = 0;
+
+       if (waitq->waitq_prepost_id == 0)
+               return 0;
+
+       wqp = wq_prepost_get(waitq->waitq_prepost_id);
+       waitq->waitq_prepost_id = 0;
+       if (wqp) {
+               uint64_t wqp_id = wqp->wqp_prepostid.id;
+               wqdbg_v("invalidate prepost 0x%llx (refcnt:%d)",
+                       wqp->wqp_prepostid.id, wqp_refcnt(wqp));
+               wq_prepost_invalidate(wqp);
+               while (wqp_refcnt(wqp) > 1) {
+                       int do_spl = waitq_irq_safe(waitq);
+
+                       /*
+                        * Some other thread must have raced us to grab a link
+                        * object reference before we invalidated it. This
+                        * means that they are probably trying to access the
+                        * waitq to which the prepost object points. We need
+                        * to wait here until the other thread drops their
+                        * reference. We know that no one else can get a
+                        * reference (the object has been invalidated), and
+                        * that prepost references are short-lived (dropped on
+                        * a call to wq_prepost_put). We also know that no one
+                        * blocks while holding a reference therefore the
+                        * other reference holder must be on-core. We'll just
+                        * sit and wait for the other reference to be dropped.
+                        */
+                       disable_preemption();
+
+                       waitq_unlock(waitq);
+                       if (s && do_spl)
+                               splx(*s);
+                       dropped_lock = 1;
+                       /*
+                        * don't yield here, just spin and assume the other
+                        * consumer is already on core...
+                        */
+                       delay(1);
+                       if (s && do_spl)
+                               *s = splsched();
+                       waitq_lock(waitq);
+
+                       enable_preemption();
+               }
+               if (wqp_refcnt(wqp) > 0 && wqp->wqp_prepostid.id == wqp_id)
+                       wq_prepost_put(wqp);
+       }
+
+       return dropped_lock;
+}
+
+/**
+ * clear all preposts originating from 'waitq'
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     may disable and re-enable interrupts
+ */
+void waitq_clear_prepost(struct waitq *waitq)
+{
+       spl_t s;
+       int do_spl = waitq_irq_safe(waitq);
+
+       assert(waitq_valid(waitq));
+
+       if (do_spl)
+               s = splsched();
+       waitq_lock(waitq);
+       /* it doesn't matter to us if the lock is dropped here */
+       (void)waitq_clear_prepost_locked(waitq, &s);
+       waitq_unlock(waitq);
+       if (do_spl)
+               splx(s);
+}
+
+/**
+ * return a the waitq's prepost object ID (allocate if necessary)
+ *
+ * Conditions:
+ *     'waitq' is unlocked
+ */
+uint64_t waitq_get_prepost_id(struct waitq *waitq)
+{
+       struct wq_prepost *wqp;
+       uint64_t wqp_id = 0;
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               return 0;
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       if (!waitq_valid(waitq))
+               goto out_unlock;
+
+       if (waitq->waitq_prepost_id) {
+               wqp_id = waitq->waitq_prepost_id;
+               goto out_unlock;
+       }
+
+       /* don't hold a spinlock while allocating a prepost object */
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       wqp = wq_prepost_alloc(WQP_WQ, 1);
+       if (!wqp)
+               return 0;
+
+       /* re-acquire the waitq lock */
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       if (!waitq_valid(waitq)) {
+               wq_prepost_put(wqp);
+               wqp_id = 0;
+               goto out_unlock;
+       }
+
+       if (waitq->waitq_prepost_id) {
+               /* we were beat by someone else */
+               wq_prepost_put(wqp);
+               wqp_id = waitq->waitq_prepost_id;
+               goto out_unlock;
+       }
+
+       wqp->wqp_wq.wqp_wq_ptr = waitq;
+
+       wqp_set_valid(wqp);
+       wqp_id = wqp->wqp_prepostid.id;
+       waitq->waitq_prepost_id = wqp_id;
+
+       wq_prepost_put(wqp);
+
+out_unlock:
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       return wqp_id;
+}
+
+
+static int waitq_inset_cb(struct waitq *waitq, void *ctx, struct setid_link *link)
+{
+       uint64_t setid = *(uint64_t *)ctx;
+       int ltype = sl_type(link);
+       (void)waitq;
+       if (ltype == SLT_WQS && link->sl_set_id.id == setid) {
+               wqdbg_v("  waitq already in set 0x%llx", setid);
+               return WQ_ITERATE_FOUND;
+       } else if (ltype == SLT_LINK) {
+               /*
+                * break out early if we see a link that points to the setid
+                * in question. This saves us a step in the
+                * iteration/recursion
+                */
+               wqdbg_v("  waitq already in set 0x%llx (SLT_LINK)", setid);
+               if (link->sl_link.sl_left_setid == setid ||
+                   link->sl_link.sl_right_setid == setid)
+                       return WQ_ITERATE_FOUND;
+       }
+
+       return WQ_ITERATE_CONTINUE;
+}
+
+/**
+ * determine if 'waitq' is a member of 'wqset'
+ *
+ * Conditions:
+ *     neither 'waitq' nor 'wqset' is not locked
+ *     may disable and re-enable interrupts while locking 'waitq'
+ */
+boolean_t waitq_member(struct waitq *waitq, struct waitq_set *wqset)
+{
+       kern_return_t kr = WQ_ITERATE_SUCCESS;
+       uint64_t setid;
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       if (!waitqs_is_set(wqset))
+               return FALSE;
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       setid = wqset->wqset_id;
+       if (!setid)
+               goto out_unlock;
+
+       /* fast path: most waitqs are members of only 1 set */
+       if (waitq->waitq_set_id == setid) {
+               waitq_unlock(waitq);
+               if (waitq_irq_safe(waitq))
+                       splx(s);
+               return TRUE;
+       }
+
+       /* walk the link table and look for the Set ID of wqset */
+       kr = walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id,
+                             SLT_ALL, (void *)&setid, waitq_inset_cb);
+
+out_unlock:
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       if (kr == WQ_ITERATE_FOUND)
+               return TRUE;
+       return FALSE;
+}
+
+/**
+ * Returns true is the given waitq is a member of at least 1 set
+ */
+boolean_t waitq_in_set(struct waitq *waitq)
+{
+       struct setid_link *link;
+       boolean_t inset = FALSE;
+       spl_t s;
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       if (!waitq->waitq_set_id)
+               goto out_unlock;
+
+       link = lt_get_link(waitq->waitq_set_id);
+       if (link) {
+               /* if we get here, the waitq is in _at_least_one_ set */
+               inset = TRUE;
+               lt_put_link(link);
+       } else {
+               /* we can just optimize this for next time */
+               waitq->waitq_set_id = 0;
+       }
+
+out_unlock:
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+       return inset;
+}
+
+
+/**
+ * pre-allocate a waitq link structure from the link table
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     may (rarely) block if link table needs to grow
+ */
+uint64_t waitq_link_reserve(struct waitq *waitq)
+{
+       struct setid_link *link;
+       uint64_t reserved_id = 0;
+
+       assert(get_preemption_level() == 0 && waitq_wait_possible(current_thread()));
+
+       /*
+        * We've asserted that the caller can block, so we enforce a
+        * minimum-free table element policy here.
+        */
+       lt_ensure_free_space();
+
+       (void)waitq;
+       link = lt_alloc_link(WQT_RESERVED);
+       if (!link)
+               return 0;
+
+       reserved_id = link->sl_set_id.id;
+
+       return reserved_id;
+}
+
+/**
+ * release a pre-allocated waitq link structure
+ */
+void waitq_link_release(uint64_t id)
+{
+       struct setid_link *link;
+
+       if (id == 0)
+               return;
+
+       link = lt_get_reserved(id, SLT_LINK);
+       if (!link)
+               return;
+
+       /*
+        * if we successfully got a link object, then we know
+        * it's not been marked valid, and can be released with
+        * a standard lt_put_link() which should free the element.
+        */
+       lt_put_link(link);
+#if CONFIG_WAITQ_STATS
+       g_linktable.nreserved_releases += 1;
+#endif
+}
+
+/**
+ * link 'waitq' to the set identified by 'setid' using the 'link' structure
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     caller should have a reference to the 'link' object
+ */
+static kern_return_t waitq_link_internal(struct waitq *waitq,
+                                        uint64_t setid, struct setid_link *link)
+{
+       struct setid_link *qlink;
+       kern_return_t kr;
+
+       assert(waitq_held(waitq));
+
+       /*
+        * If the waitq_set_id field is empty, then this waitq is not
+        * a member of any other set. All we have to do is update the
+        * field.
+        */
+       if (!waitq->waitq_set_id) {
+               waitq->waitq_set_id = setid;
+               return KERN_SUCCESS;
+       }
+
+       qlink = lt_get_link(waitq->waitq_set_id);
+       if (!qlink) {
+               /*
+                * The set to which this wait queue belonged has been
+                * destroyed / invalidated. We can re-use the waitq field.
+                */
+               waitq->waitq_set_id = setid;
+               return KERN_SUCCESS;
+       }
+       lt_put_link(qlink);
+
+       /*
+        * Check to see if it's already a member of the set.
+        *
+        * TODO: check for cycles!
+        */
+       kr = walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id,
+                             SLT_ALL, (void *)&setid, waitq_inset_cb);
+       if (kr == WQ_ITERATE_FOUND)
+               return kr;
+
+       /*
+        * This wait queue is a member of at least one set already,
+        * and _not_ a member of the given set. Use our previously
+        * allocated link object, and hook it up to the wait queue.
+        * Note that it's possible that one or more of the wait queue sets to
+        * which the wait queue belongs was invalidated before we allocated
+        * this link object. That's OK because the next time we use that
+        * object we'll just ignore it.
+        */
+       link->sl_link.sl_left_setid = setid;
+       link->sl_link.sl_right_setid = waitq->waitq_set_id;
+       sl_set_valid(link);
+
+       waitq->waitq_set_id = link->sl_set_id.id;
+
+       return KERN_SUCCESS;
+}
+
+/**
+ * link 'waitq' to 'wqset'
+ *
+ * Conditions:
+ *     if 'lock_state' contains WAITQ_SHOULD_LOCK, 'waitq' must be unlocked.
+ *     Otherwise, 'waitq' must be locked.
+ *
+ *     may (rarely) block on link table allocation if the table has to grow,
+ *     and no 'reserved_link' object is passed.
+ *
+ * Notes:
+ *     The caller can guarantee that this function will never block by
+ *     pre-allocating a link table object and passing its ID in 'reserved_link'
+ */
+kern_return_t waitq_link(struct waitq *waitq, struct waitq_set *wqset,
+                        waitq_lock_state_t lock_state, uint64_t *reserved_link)
+{
+       kern_return_t kr;
+       struct setid_link *link;
+       int should_lock = (lock_state == WAITQ_SHOULD_LOCK);
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       if (!waitqs_is_set(wqset))
+               return KERN_INVALID_ARGUMENT;
+
+       wqdbg_v("Link waitq %p to wqset 0x%llx",
+               (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id);
+
+       if (waitq_irq_safe(waitq) && (!reserved_link || *reserved_link == 0)) {
+               /*
+                * wait queues that need IRQs disabled cannot block waiting
+                * for table growth to complete. Even though this is rare,
+                * we require all these waitqs to pass in a reserved link
+                * object to avoid the potential to block.
+                */
+               panic("Global/IRQ-safe waitq %p cannot link to %p without"
+                     "reserved object!", waitq, wqset);
+       }
+
+       /*
+        * We _might_ need a new link object here, so we'll grab outside
+        * the lock because the alloc call _might_ block.
+        *
+        * If the caller reserved a link beforehand, then lt_get_link
+        * is guaranteed not to block because the caller holds an extra
+        * reference to the link which, in turn, hold a reference to the
+        * link table.
+        */
+       if (reserved_link && *reserved_link != 0) {
+               link = lt_get_reserved(*reserved_link, SLT_LINK);
+               /* always consume the caller's reference */
+               *reserved_link = 0;
+       } else {
+               link = lt_alloc_link(SLT_LINK);
+       }
+       if (!link)
+               return KERN_NO_SPACE;
+
+       if (should_lock) {
+               if (waitq_irq_safe(waitq))
+                       s = splsched();
+               waitq_lock(waitq);
+       }
+
+       kr = waitq_link_internal(waitq, wqset->wqset_id, link);
+
+       if (should_lock) {
+               waitq_unlock(waitq);
+               if (waitq_irq_safe(waitq))
+                       splx(s);
+       }
+
+       lt_put_link(link);
+
+       return kr;
+}
+
+/**
+ * helper: unlink 'waitq' from waitq set identified by 'setid'
+ *         this function also prunes invalid objects from the tree
+ *
+ * Conditions:
+ *     MUST be called from walk_setid_links link table walk
+ *     'waitq' is locked
+ *
+ * Notes:
+ *     This is a helper function which compresses the link table by culling
+ *     unused or unnecessary links. See comments below for different
+ *     scenarios.
+ */
+static inline int waitq_maybe_remove_link(struct waitq *waitq,
+                                         uint64_t setid,
+                                         struct setid_link *parent,
+                                         struct setid_link *left,
+                                         struct setid_link *right)
+{
+       uint64_t *wq_setid = &waitq->waitq_set_id;
+
+       /*
+        * There are two scenarios:
+        *
+        * Scenario 1:
+        * --------------------------------------------------------------------
+        * waitq->waitq_set_id == parent
+        *
+        *         parent(LINK)
+        *           /    \
+        *          /      \
+        *         /        \
+        *  L(LINK/WQS_l)   R(LINK/WQS_r)
+        *
+        * In this scenario, we assert that the original waitq points to the
+        * parent link we were passed in.  If WQS_l (or WQS_r) is the waitq
+        * set we're looking for, we can set the corresponding parent
+        * link id (left or right) to 0.  To compress the tree, we can reset the
+        * waitq_set_id of the original waitq to point to the side of the
+        * parent that is still valid. We then discard the parent link object.
+        */
+       if (*wq_setid == parent->sl_set_id.id) {
+               if (!left && !right) {
+                       /* completely invalid children */
+                       lt_invalidate(parent);
+                       wqdbg_v("S1, L+R");
+                       *wq_setid = 0;
+                       return WQ_ITERATE_INVALID;
+               } else if (!left || left->sl_set_id.id == setid) {
+                       /*
+                        * left side matches we know it points either to the
+                        * WQS we're unlinking, or to an invalid object:
+                        * no need to invalidate it
+                        */
+                       *wq_setid = right->sl_set_id.id;
+                       lt_invalidate(parent);
+                       wqdbg_v("S1, L");
+                       return left ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
+               } else if (!right || right->sl_set_id.id == setid) {
+                       /*
+                        * if right side matches we know it points either to the
+                        * WQS we're unlinking, or to an invalid object:
+                        * no need to invalidate it
+                        */
+                       *wq_setid = left->sl_set_id.id;
+                       lt_invalidate(parent);
+                       wqdbg_v("S1, R");
+                       return right ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
+               }
+       }
+
+       /*
+        * the tree walk starts at the top-of-tree and moves down,
+        * so these are safe asserts.
+        */
+       assert(left || right); /* one of them has to be valid at this point */
+
+       /*
+        * Scenario 2:
+        * --------------------------------------------------------------------
+        * waitq->waitq_set_id == ... (OR parent)
+        *
+        *                    ...
+        *                     |
+        *                   parent
+        *                   /    \
+        *                  /      \
+        *              L(LINK)     R(LINK)
+        *               /\             /\
+        *              /  \           /  \
+        *             /    \       Rl(*)  Rr(*)
+        *         Ll(WQS)  Lr(WQS)
+        *
+        * In this scenario, a leaf node of either the left or right side
+        * could be the wait queue set we're looking to unlink. We also handle
+        * the case where one of these links is invalid.  If a leaf node is
+        * invalid or it's the set we're looking for, we can safely remove the
+        * middle link (left or right) and point the parent link directly to
+        * the remaining leaf node.
+        */
+       if (left && sl_type(left) == SLT_LINK) {
+               uint64_t Ll, Lr;
+               struct setid_link *linkLl, *linkLr;
+               assert(left->sl_set_id.id != setid);
+               Ll = left->sl_link.sl_left_setid;
+               Lr = left->sl_link.sl_right_setid;
+               linkLl = lt_get_link(Ll);
+               linkLr = lt_get_link(Lr);
+               if (!linkLl && !linkLr) {
+                       /*
+                        * The left object points to two invalid objects!
+                        * We can invalidate the left w/o touching the parent.
+                        */
+                       lt_invalidate(left);
+                       wqdbg_v("S2, Ll+Lr");
+                       return WQ_ITERATE_INVALID;
+               } else if (!linkLl || Ll == setid) {
+                       /* Ll is invalid and/or the wait queue set we're looking for */
+                       parent->sl_link.sl_left_setid = Lr;
+                       lt_invalidate(left);
+                       lt_put_link(linkLl);
+                       lt_put_link(linkLr);
+                       wqdbg_v("S2, Ll");
+                       return linkLl ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
+               } else if (!linkLr || Lr == setid) {
+                       /* Lr is invalid and/or the wait queue set we're looking for */
+                       parent->sl_link.sl_left_setid = Ll;
+                       lt_invalidate(left);
+                       lt_put_link(linkLr);
+                       lt_put_link(linkLl);
+                       wqdbg_v("S2, Lr");
+                       return linkLr ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
+               }
+               lt_put_link(linkLl);
+               lt_put_link(linkLr);
+       }
+
+       if (right && sl_type(right) == SLT_LINK) {
+               uint64_t Rl, Rr;
+               struct setid_link *linkRl, *linkRr;
+               assert(right->sl_set_id.id != setid);
+               Rl = right->sl_link.sl_left_setid;
+               Rr = right->sl_link.sl_right_setid;
+               linkRl = lt_get_link(Rl);
+               linkRr = lt_get_link(Rr);
+               if (!linkRl && !linkRr) {
+                       /*
+                        * The right object points to two invalid objects!
+                        * We can invalidate the right w/o touching the parent.
+                        */
+                       lt_invalidate(right);
+                       wqdbg_v("S2, Rl+Rr");
+                       return WQ_ITERATE_INVALID;
+               } else if (!linkRl || Rl == setid) {
+                       /* Rl is invalid and/or the wait queue set we're looking for */
+                       parent->sl_link.sl_right_setid = Rr;
+                       lt_invalidate(right);
+                       lt_put_link(linkRl);
+                       lt_put_link(linkRr);
+                       wqdbg_v("S2, Rl");
+                       return linkRl ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
+               } else if (!linkRr || Rr == setid) {
+                       /* Rr is invalid and/or the wait queue set we're looking for */
+                       parent->sl_link.sl_right_setid = Rl;
+                       lt_invalidate(right);
+                       lt_put_link(linkRl);
+                       lt_put_link(linkRr);
+                       wqdbg_v("S2, Rr");
+                       return linkRr ? WQ_ITERATE_UNLINKED : WQ_ITERATE_INVALID;
+               }
+               lt_put_link(linkRl);
+               lt_put_link(linkRr);
+       }
+
+       return WQ_ITERATE_CONTINUE;
+}
+
+/**
+ * link table walk callback that unlinks 'waitq' from 'ctx->setid'
+ *
+ * Conditions:
+ *     called from walk_setid_links
+ *     'waitq' is locked
+ *
+ * Notes:
+ *     uses waitq_maybe_remove_link() to compress the linktable and
+ *     perform the actual unlinking
+ */
+static int waitq_unlink_cb(struct waitq *waitq, void *ctx,
+                          struct setid_link *link)
+{
+       uint64_t setid = *((uint64_t *)ctx);
+       struct setid_link *right, *left;
+       int ret = 0;
+
+       if (sl_type(link) != SLT_LINK)
+               return WQ_ITERATE_CONTINUE;
+
+       do  {
+               left  = lt_get_link(link->sl_link.sl_left_setid);
+               right = lt_get_link(link->sl_link.sl_right_setid);
+
+               ret = waitq_maybe_remove_link(waitq, setid, link, left, right);
+
+               lt_put_link(left);
+               lt_put_link(right);
+
+               if (!sl_is_valid(link))
+                       return WQ_ITERATE_INVALID;
+               /* A ret value of UNLINKED will break us out of table walk */
+       } while (ret == WQ_ITERATE_INVALID);
+
+       return ret;
+}
+
+
+/**
+ * undo/remove a prepost from 'ctx' (waitq) to 'wqset'
+ *
+ * Conditions:
+ *     Called from wq_prepost_foreach_locked OR wq_prepost_iterate
+ *     'wqset' may be NULL
+ *     (ctx)->unlink_wqset is locked
+ */
+static int waitq_unlink_prepost_cb(struct waitq_set __unused *wqset, void *ctx,
+                                  struct wq_prepost *wqp, struct waitq *waitq)
+{
+       struct wq_unlink_ctx *ulctx = (struct wq_unlink_ctx *)ctx;
+
+       if (waitq != ulctx->unlink_wq)
+               return WQ_ITERATE_CONTINUE;
+
+       if (wqp_type(wqp) == WQP_WQ &&
+           wqp->wqp_prepostid.id == ulctx->unlink_wqset->wqset_prepost_id) {
+               /* this is the only prepost on this wait queue set */
+               wqdbg_v("unlink wqp (WQ) 0x%llx", wqp->wqp_prepostid.id);
+               ulctx->unlink_wqset->wqset_prepost_id = 0;
+               return WQ_ITERATE_BREAK;
+       }
+
+       assert(wqp_type(wqp) == WQP_POST);
+
+       /*
+        * The prepost object 'wqp' points to a waitq which should no longer
+        * be preposted to 'ulctx->unlink_wqset'. We can remove the prepost
+        * object from the list and break out of the iteration. Using the
+        * context object in this way allows this same callback function to be
+        * used from both wq_prepost_foreach_locked and wq_prepost_iterate.
+        */
+       wq_prepost_remove(ulctx->unlink_wqset, wqp);
+       return WQ_ITERATE_BREAK;
+}
+
+/**
+ * unlink 'waitq' from 'wqset'
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *     'wqset' is _not_ locked
+ *     may (rarely) spin in prepost clear and drop/re-acquire 'waitq' lock
+ *     (see waitq_clear_prepost_locked)
+ */
+static kern_return_t waitq_unlink_locked(struct waitq *waitq,
+                                        struct waitq_set *wqset,
+                                        spl_t *s)
+{
+       uint64_t setid;
+       kern_return_t kr;
+
+       setid = wqset->wqset_id;
+
+       if (waitq->waitq_set_id == 0) {
+               /*
+                * TODO:
+                * it doesn't belong to anyone, and it has a prepost object?
+                * This is an artifact of not cleaning up after kqueues when
+                * they prepost into select sets...
+                */
+               if (waitq->waitq_prepost_id != 0)
+                       (void)waitq_clear_prepost_locked(waitq, s);
+               return KERN_NOT_IN_SET;
+       }
+
+       if (waitq->waitq_set_id == setid) {
+               waitq->waitq_set_id = 0;
+               /*
+                * This was the only set to which the waitq belonged: we can
+                * safely release the waitq's prepost object. It doesn't
+                * matter if this function drops and re-acquires the lock
+                * because we're not manipulating waitq state any more.
+                */
+               (void)waitq_clear_prepost_locked(waitq, s);
+               return KERN_SUCCESS;
+       }
+
+       /*
+        * The waitq was a member of more that 1 set, so we need to
+        * handle potentially compressing the link table, and
+        * adjusting the waitq->waitq_set_id value.
+        *
+        * Note: we can't free the waitq's associated prepost object (if any)
+        *       because it may be in use by the one or more _other_ sets to
+        *       which this queue belongs.
+        *
+        * Note: This function only handles a single level of the queue linkage.
+        *       Removing a waitq from a set to which it does not directly
+        *       belong is undefined. For example, if a waitq belonged to set
+        *       A, and set A belonged to set B. You can't remove the waitq
+        *       from set B.
+        */
+       kr = walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id,
+                             SLT_LINK, (void *)&setid, waitq_unlink_cb);
+
+       if (kr == WQ_ITERATE_UNLINKED) {
+               struct wq_unlink_ctx ulctx;
+               int do_spl = 0;
+
+               kr = KERN_SUCCESS; /* found it and dis-associated it */
+
+               if (!waitq_irq_safe(waitq) && waitq_irq_safe(&wqset->wqset_q)) {
+                       *s = splsched();
+                       do_spl = 1;
+               }
+               waitq_set_lock(wqset);
+               /*
+                * clear out any prepost from waitq into wqset
+                * TODO: this could be more efficient than a linear search of
+                *       the waitq set's prepost list.
+                */
+               ulctx.unlink_wq = waitq;
+               ulctx.unlink_wqset = wqset;
+               (void)wq_prepost_iterate(wqset->wqset_prepost_id, (void *)&ulctx,
+                                        waitq_unlink_prepost_cb);
+               waitq_set_unlock(wqset);
+               if (do_spl)
+                       splx(*s);
+       } else {
+               kr = KERN_NOT_IN_SET; /* waitq is _not_ associated with wqset */
+       }
+
+       return kr;
+}
+
+/**
+ * unlink 'waitq' from 'wqset'
+ *
+ * Conditions:
+ *     neither 'waitq' nor 'wqset' is locked
+ *     may disable and re-enable interrupts
+ *     may (rarely) spin in prepost clear
+ *     (see waitq_clear_prepost_locked)
+ */
+kern_return_t waitq_unlink(struct waitq *waitq, struct waitq_set *wqset)
+{
+       kern_return_t kr = KERN_SUCCESS;
+       spl_t s;
+
+       assert(waitqs_is_set(wqset));
+
+       /*
+        * we allow the waitq to be invalid because the caller may be trying
+        * to clear out old/dirty state
+        */
+       if (!waitq_valid(waitq))
+               return KERN_INVALID_ARGUMENT;
+
+       wqdbg_v("unlink waitq %p from set 0x%llx",
+               (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id);
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       kr = waitq_unlink_locked(waitq, wqset, &s);
+
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       return kr;
+}
+
+/**
+ * unlink a waitq from a waitq set, but reference the waitq by its prepost ID
+ *
+ * Conditions:
+ *     'wqset' is unlocked
+ *     wqp_id may be valid or invalid
+ */
+void waitq_unlink_by_prepost_id(uint64_t wqp_id, struct waitq_set *wqset)
+{
+       struct wq_prepost *wqp;
+
+       disable_preemption();
+       wqp = wq_prepost_get(wqp_id);
+       if (wqp) {
+               struct waitq *wq;
+               spl_t s;
+
+               wq = wqp->wqp_wq.wqp_wq_ptr;
+
+               /*
+                * lock the waitq, then release our prepost ID reference, then
+                * unlink the waitq from the wqset: this ensures that we don't
+                * hold a prepost ID reference during the unlink, but we also
+                * complete the unlink operation atomically to avoid a race
+                * with waitq_unlink[_all].
+                */
+               if (waitq_irq_safe(wq))
+                       s = splsched();
+               waitq_lock(wq);
+               wq_prepost_put(wqp);
+
+               if (!waitq_valid(wq)) {
+                       /* someone already tore down this waitq! */
+                       waitq_unlock(wq);
+                       if (waitq_irq_safe(wq))
+                               splx(s);
+                       enable_preemption();
+                       return;
+               }
+
+               /* this _may_ drop the wq lock, but that's OK */
+               waitq_unlink_locked(wq, wqset, &s);
+
+               waitq_unlock(wq);
+               if (waitq_irq_safe(wq))
+                       splx(s);
+       }
+       enable_preemption();
+       return;
+}
+
+
+/**
+ * unlink 'waitq' from all sets to which it belongs
+ *
+ * Conditions:
+ *     'waitq' is locked
+ *
+ * Notes:
+ *     may drop and re-acquire the waitq lock
+ *     may (rarely) spin (see waitq_clear_prepost_locked)
+ */
+kern_return_t waitq_unlink_all_locked(struct waitq *waitq, uint64_t *old_set_id,
+                                     spl_t *s, int *dropped_lock)
+{
+       wqdbg_v("unlink waitq %p from all sets",
+               (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq));
+
+       *old_set_id = 0;
+
+       /* it's not a member of any sets */
+       if (waitq->waitq_set_id == 0)
+               return KERN_SUCCESS;
+
+       *old_set_id = waitq->waitq_set_id;
+       waitq->waitq_set_id = 0;
+
+       /*
+        * invalidate the prepost entry for this waitq.
+        * This may drop and re-acquire the waitq lock, but that's OK because
+        * if it was added to another set and preposted to that set in the
+        * time we drop the lock, the state will remain consistent.
+        */
+       int dropped = waitq_clear_prepost_locked(waitq, s);
+       if (dropped_lock)
+               *dropped_lock = dropped;
+
+       return KERN_SUCCESS;
+}
+
+/**
+ * unlink 'waitq' from all sets to which it belongs
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     may disable and re-enable interrupts
+ *     may (rarely) spin
+ *     (see waitq_unlink_all_locked, waitq_clear_prepost_locked)
+ */
+kern_return_t waitq_unlink_all(struct waitq *waitq)
+{
+       kern_return_t kr = KERN_SUCCESS;
+       uint64_t setid = 0;
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+       if (waitq_valid(waitq))
+               kr = waitq_unlink_all_locked(waitq, &setid, &s, NULL);
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       if (setid) {
+               /*
+                * Walk the link table and invalidate each LINK object that
+                * used to connect this waitq to one or more sets: this works
+                * because SLT_LINK objects are private to each wait queue
+                */
+               (void)walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, setid,
+                                      SLT_LINK, NULL, waitq_unlink_all_cb);
+       }
+
+       return kr;
+}
+
+
+/**
+ * unlink all waitqs from 'wqset'
+ *
+ * Conditions:
+ *     'wqset' is not locked
+ *     may (rarely) spin/block (see waitq_clear_prepost_locked)
+ */
+kern_return_t waitq_set_unlink_all(struct waitq_set *wqset)
+{
+       struct setid_link *link;
+       uint64_t prepost_id, set_links_id = 0;
+       spl_t spl;
+
+       assert(waitqs_is_set(wqset));
+
+       wqdbg_v("unlink all queues from set 0x%llx", wqset->wqset_id);
+
+       /*
+        * This operation does not require interaction with any of the set's
+        * constituent wait queues. All we have to do is invalidate the SetID
+        */
+       if (waitq_irq_safe(&wqset->wqset_q))
+               spl = splsched();
+       waitq_set_lock(wqset);
+
+       /* invalidate and re-alloc the link object first */
+       link = lt_get_link(wqset->wqset_id);
+
+       /* we may have raced with a waitq_set_deinit: handle this */
+       if (!link) {
+               waitq_set_unlock(wqset);
+               return KERN_SUCCESS;
+       }
+
+       lt_invalidate(link);
+
+       /* re-alloc the object to get a new generation ID */
+       lt_realloc_link(link, SLT_WQS);
+       link->sl_wqs.sl_set = wqset;
+
+       wqset->wqset_id = link->sl_set_id.id;
+       sl_set_valid(link);
+       lt_put_link(link);
+
+       /* clear any preposts attached to this set */
+       prepost_id = wqset->wqset_prepost_id;
+       wqset->wqset_prepost_id = 0;
+
+       /*
+        * clear set linkage and prepost object associated with this set:
+        * waitq sets may prepost to other sets if, for example, they are
+        * associated with a kqueue which is in a select set.
+        *
+        * This may drop and re-acquire the set lock, but that's OK because
+        * the resulting state will remain consistent.
+        */
+       waitq_unlink_all_locked(&wqset->wqset_q, &set_links_id, &spl, NULL);
+
+       waitq_set_unlock(wqset);
+       if (waitq_irq_safe(&wqset->wqset_q))
+               splx(spl);
+
+       /*
+        * release all the set link objects
+        * (links to other sets to which this set was previously added)
+        */
+       if (set_links_id)
+               (void)walk_setid_links(LINK_WALK_ONE_LEVEL, &wqset->wqset_q,
+                                      set_links_id, SLT_LINK, NULL,
+                                      waitq_unlink_all_cb);
+
+       /* drop / unlink all the prepost table objects */
+       if (prepost_id)
+               (void)wq_prepost_iterate(prepost_id, NULL,
+                                        wqset_clear_prepost_chain_cb);
+
+       return KERN_SUCCESS;
+}
+
+
+static int waitq_prepost_reserve_cb(struct waitq *waitq, void *ctx,
+                                   struct setid_link *link)
+{
+       uint32_t *num = (uint32_t *)ctx;
+       (void)waitq;
+
+       /*
+        * In the worst case, we'll have to allocate 2 prepost objects
+        * per waitq set (if the set was already preposted by another
+        * waitq).
+        */
+       if (sl_type(link) == SLT_WQS) {
+               /*
+                * check to see if the associated waitq actually supports
+                * preposting
+                */
+               if (waitq_set_can_prepost(link->sl_wqs.sl_set))
+                       *num += 2;
+       }
+       return WQ_ITERATE_CONTINUE;
+}
+
+static int waitq_alloc_prepost_reservation(int nalloc, struct waitq *waitq,
+                                          spl_t *s, int *did_unlock,
+                                          struct wq_prepost **wqp)
+{
+       struct wq_prepost *tmp;
+       struct wqp_cache *cache;
+
+       *did_unlock = 0;
+
+       /*
+        * Before we unlock the waitq, check the per-processor prepost object
+        * cache to see if there's enough there for us. If so, do the
+        * allocation, keep the lock and save an entire iteration over the set
+        * linkage!
+        */
+       if (waitq) {
+               disable_preemption();
+               cache = &PROCESSOR_DATA(current_processor(), wqp_cache);
+               if (nalloc <= (int)cache->avail)
+                       goto do_alloc;
+               enable_preemption();
+
+               /* unlock the waitq to perform the allocation */
+               *did_unlock = 1;
+               waitq_unlock(waitq);
+               if (waitq_irq_safe(waitq))
+                       splx(*s);
+       }
+
+do_alloc:
+       tmp = wq_prepost_alloc(WQT_RESERVED, nalloc);
+       if (!tmp)
+               panic("Couldn't reserve %d preposts for waitq @%p (wqp@%p)",
+                     nalloc, waitq, *wqp);
+       if (*wqp) {
+               /* link the two lists */
+               int __assert_only rc;
+               rc = wq_prepost_rlink(tmp, *wqp);
+               assert(rc == nalloc);
+       }
+       *wqp = tmp;
+
+       /*
+        * If the caller can block, then enforce a minimum-free table element
+        * policy here. This helps ensure that we will have enough prepost
+        * objects for callers such as selwakeup() that can be called with
+        * spin locks held.
+        */
+       if (get_preemption_level() == 0)
+               wq_prepost_ensure_free_space();
+
+       if (waitq) {
+               if (*did_unlock == 0) {
+                       /* decrement the preemption count if alloc from cache */
+                       enable_preemption();
+               } else {
+                       /* otherwise: re-lock the waitq */
+                       if (waitq_irq_safe(waitq))
+                               *s = splsched();
+                       waitq_lock(waitq);
+               }
+       }
+
+       return nalloc;
+}
+
+static int waitq_count_prepost_reservation(struct waitq *waitq, int extra, int keep_locked)
+{
+       int npreposts = 0;
+
+       /*
+        * If the waitq is not currently part of a set, and we're not asked to
+        * keep the waitq locked then we'll want to have 3 in reserve
+        * just-in-case it becomes part of a set while we unlock and reserve.
+        * We may need up to 1 object for the waitq, and 2 for the set.
+        */
+       if (waitq->waitq_set_id == 0) {
+               npreposts = 3;
+       } else {
+               /* this queue has never been preposted before */
+               if (waitq->waitq_prepost_id == 0)
+                       npreposts = 3;
+
+               /*
+                * Walk the set of table linkages associated with this waitq
+                * and count the worst-case number of prepost objects that
+                * may be needed during a wakeup_all. We can walk this without
+                * locking each set along the way because the table-based IDs
+                * disconnect us from the set pointers themselves, and the
+                * table walking is careful to read the setid values only once.
+                * Locking each set up the chain also doesn't guarantee that
+                * their membership won't change between the time we unlock
+                * that set and when we actually go to prepost, so our
+                * situation is no worse than before and we've alleviated lock
+                * contention on any sets to which this waitq belongs.
+                */
+               (void)walk_setid_links(LINK_WALK_FULL_DAG_UNLOCKED,
+                                      waitq, waitq->waitq_set_id,
+                                      SLT_WQS, (void *)&npreposts,
+                                      waitq_prepost_reserve_cb);
+       }
+
+       if (extra > 0)
+               npreposts += extra;
+
+       if (npreposts == 0 && !keep_locked) {
+               /*
+                * If we get here, we were asked to reserve some prepost
+                * objects for a waitq that's previously preposted, and is not
+                * currently a member of any sets. We have also been
+                * instructed to unlock the waitq when we're done. In this
+                * case, we pre-allocated enough reserved objects to handle
+                * the case where the waitq gets added to a single set when
+                * the lock is released.
+                */
+               npreposts = 3;
+       }
+
+       return npreposts;
+}
+
+
+/**
+ * pre-allocate prepost objects for 'waitq'
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *
+ * Returns:
+ *     panic on error
+ *
+ *     0 on success, '*reserved' is set to the head of a singly-linked
+ *     list of pre-allocated prepost objects.
+ *
+ * Notes:
+ *     If 'lock_state' is WAITQ_KEEP_LOCKED, this function performs the pre-allocation
+ *     atomically and returns 'waitq' locked. If the waitq requires
+ *     interrupts to be disabled, then the output parameter 's' is set to the
+ *     previous interrupt state (from splsched), and the caller is
+ *     responsible to call splx().
+ *
+ *     This function attempts to pre-allocate precisely enough prepost
+ *     objects based on the current set membership of 'waitq'. If the
+ *     operation is performed atomically, then the caller
+ *     is guaranteed to have enough pre-allocated prepost object to avoid
+ *     any (rare) blocking in the wakeup path.
+ */
+uint64_t waitq_prepost_reserve(struct waitq *waitq, int extra,
+                              waitq_lock_state_t lock_state, spl_t *s)
+{
+       uint64_t reserved = 0;
+       uint64_t prev_setid = 0, prev_prepostid = 0;
+       struct wq_prepost *wqp = NULL;
+       int nalloc = 0, npreposts = 0;
+       int keep_locked = (lock_state == WAITQ_KEEP_LOCKED);
+       int unlocked = 0;
+
+       if (s)
+               *s = 0;
+
+       wqdbg_v("Attempting to reserve prepost linkages for waitq %p (extra:%d)",
+               (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), extra);
+
+       if (waitq == NULL && extra > 0) {
+               /*
+                * Simple prepost object allocation:
+                * we'll add 2 more because the waitq might need an object,
+                * and the set itself may need a new POST object in addition
+                * to the number of preposts requested by the caller
+                */
+               nalloc = waitq_alloc_prepost_reservation(extra + 2, NULL, NULL,
+                                                        &unlocked, &wqp);
+               assert(nalloc == extra + 2);
+               return wqp->wqp_prepostid.id;
+       }
+
+       assert(lock_state == WAITQ_KEEP_LOCKED || lock_state == WAITQ_UNLOCK);
+
+       if (waitq_irq_safe(waitq))
+               *s = splsched();
+       waitq_lock(waitq);
+
+       /* global queues are never part of any sets */
+       if (waitq_is_global(waitq)) {
+               if (keep_locked)
+                       goto out;
+               goto out_unlock;
+       }
+
+       /* remember the set ID that we started with */
+       prev_setid = waitq->waitq_set_id;
+       prev_prepostid = waitq->waitq_prepost_id;
+
+       /*
+        * If the waitq is not part of a set, and we're asked to
+        * keep the set locked, then we don't have to reserve
+        * anything!
+        */
+       if (prev_setid == 0 && keep_locked)
+               goto out;
+
+       npreposts = waitq_count_prepost_reservation(waitq, extra, keep_locked);
+
+       /* nothing for us to do! */
+       if (npreposts == 0) {
+               if (keep_locked)
+                       goto out;
+               goto out_unlock;
+       }
+
+try_alloc:
+       /* this _may_ unlock and relock the waitq! */
+       nalloc = waitq_alloc_prepost_reservation(npreposts, waitq, s,
+                                                &unlocked, &wqp);
+
+       if (!unlocked) {
+               /* allocation held the waitq lock: we'd done! */
+               if (keep_locked)
+                       goto out;
+               goto out_unlock;
+       }
+
+       /*
+        * Before we return, if the allocation had to unlock the waitq, we
+        * must check one more time to see if we have enough. If not, we'll
+        * try to allocate the difference. If the caller requests it, we'll
+        * also leave the waitq locked so that the use of the pre-allocated
+        * prepost objects can be guaranteed to be enough if a wakeup_all is
+        * performed before unlocking the waitq.
+        */
+
+       /*
+        * If the waitq is no longer associated with a set, or if the waitq's
+        * set/prepostid has not changed since we first walked its linkage,
+        * we're done.
+        */
+       if ((waitq->waitq_set_id == 0) ||
+           (waitq->waitq_set_id == prev_setid &&
+            waitq->waitq_prepost_id == prev_prepostid)) {
+               if (keep_locked)
+                       goto out;
+               goto out_unlock;
+       }
+
+       npreposts = waitq_count_prepost_reservation(waitq, extra, keep_locked);
+
+       if (npreposts > nalloc) {
+               prev_setid = waitq->waitq_set_id;
+               prev_prepostid = waitq->waitq_prepost_id;
+               npreposts = npreposts - nalloc; /* only allocate the diff */
+               goto try_alloc;
+       }
+
+       if (keep_locked)
+               goto out;
+
+out_unlock:
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(*s);
+out:
+       if (wqp)
+               reserved = wqp->wqp_prepostid.id;
+
+       return reserved;
+}
+
+/**
+ * release a linked list of prepost objects allocated via _prepost_reserve
+ *
+ * Conditions:
+ *     may (rarely) spin waiting for prepost table growth memcpy
+ */
+void waitq_prepost_release_reserve(uint64_t id)
+{
+       struct wq_prepost *wqp;
+
+       wqdbg_v("releasing reserved preposts starting at: 0x%llx", id);
+
+       wqp = wq_prepost_rfirst(id);
+       if (!wqp)
+               return;
+
+       wq_prepost_release_rlist(wqp);
+}
+
+
+/**
+ * clear all preposts from 'wqset'
+ *
+ * Conditions:
+ *     'wqset' is not locked
+ */
+void waitq_set_clear_preposts(struct waitq_set *wqset)
+{
+       uint64_t prepost_id;
+       spl_t spl;
+
+       assert(waitqs_is_set(wqset));
+
+       wqdbg_v("Clearing all preposted queues on waitq_set: 0x%llx",
+               wqset->wqset_id);
+
+       if (waitq_irq_safe(&wqset->wqset_q))
+               spl = splsched();
+       waitq_set_lock(wqset);
+       prepost_id = wqset->wqset_prepost_id;
+       wqset->wqset_prepost_id = 0;
+       waitq_set_unlock(wqset);
+       if (waitq_irq_safe(&wqset->wqset_q))
+               splx(spl);
+
+       /* drop / unlink all the prepost table objects */
+       if (prepost_id)
+               (void)wq_prepost_iterate(prepost_id, NULL,
+                                        wqset_clear_prepost_chain_cb);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Iteration: waitq -> sets / waitq_set -> preposts
+ *
+ * ---------------------------------------------------------------------- */
+
+struct wq_it_ctx {
+       void *input;
+       void *ctx;
+       waitq_iterator_t it;
+
+       spl_t *spl;
+};
+
+static int waitq_iterate_sets_cb(struct waitq *waitq, void *ctx,
+                                struct setid_link *link)
+{
+       struct wq_it_ctx *wctx = (struct wq_it_ctx *)(ctx);
+       struct waitq_set *wqset;
+       int ret;
+       spl_t spl;
+
+       (void)waitq;
+       assert(sl_type(link) == SLT_WQS);
+
+       /*
+        * the waitq is locked, so we can just take the set lock
+        * and call the iterator function
+        */
+       wqset = link->sl_wqs.sl_set;
+       assert(wqset != NULL);
+
+       if (!waitq_irq_safe(waitq) && waitq_irq_safe(&wqset->wqset_q))
+               spl = splsched();
+       waitq_set_lock(wqset);
+
+       ret = wctx->it(wctx->ctx, (struct waitq *)wctx->input, wqset);
+
+       waitq_set_unlock(wqset);
+       if (!waitq_irq_safe(waitq) && waitq_irq_safe(&wqset->wqset_q))
+               splx(spl);
+
+       return ret;
+}
+
+/**
+ * call external iterator function for each prepost object in wqset
+ *
+ * Conditions:
+ *     Called from wq_prepost_foreach_locked
+ *     (wqset locked, waitq _not_ locked)
+ */
+static int wqset_iterate_prepost_cb(struct waitq_set *wqset, void *ctx,
+                                   struct wq_prepost *wqp, struct waitq *waitq)
+{
+       struct wq_it_ctx *wctx = (struct wq_it_ctx *)(ctx);
+       uint64_t wqp_id;
+       int ret;
+       spl_t s;
+
+       (void)wqp;
+
+       /*
+        * This is a bit tricky. The 'wqset' is locked, but the 'waitq' is not.
+        * Taking the 'waitq' lock is a lock order violation, so we need to be
+        * careful. We also must realize that we may have taken a reference to
+        * the 'wqp' just as the associated waitq was being torn down (or
+        * clearing all its preposts) - see waitq_clear_prepost_locked(). If
+        * the 'wqp' is valid and we can get the waitq lock, then we are good
+        * to go. If not, we need to back off, check that the 'wqp' hasn't
+        * been invalidated, and try to re-take the locks.
+        */
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       if (waitq_lock_try(waitq))
+               goto call_iterator;
+
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       if (!wqp_is_valid(wqp))
+               return WQ_ITERATE_RESTART;
+
+       /* We are passed a prepost object with a reference on it. If neither
+        * the waitq set nor the waitq require interrupts disabled, then we
+        * may block on the delay(1) call below. We can't hold a prepost
+        * object reference while blocking, so we have to give that up as well
+        * and re-acquire it when we come back.
+        */
+       wqp_id = wqp->wqp_prepostid.id;
+       wq_prepost_put(wqp);
+       waitq_set_unlock(wqset);
+       wqdbg_v("dropped set:%p lock waiting for wqp:%p (0x%llx -> wq:%p)",
+               wqset, wqp, wqp->wqp_prepostid.id, waitq);
+       delay(1);
+       waitq_set_lock(wqset);
+       wqp = wq_prepost_get(wqp_id);
+       if (!wqp)
+               /* someone cleared preposts while we slept! */
+               return WQ_ITERATE_DROPPED;
+
+       /*
+        * TODO:
+        * This differs slightly from the logic in ipc_mqueue.c:
+        * ipc_mqueue_receive_on_thread(). There, if the waitq lock
+        * can't be obtained, the prepost link is placed on the back of
+        * the chain, and the iteration starts from the beginning. Here,
+        * we just restart from the beginning.
+        */
+       return WQ_ITERATE_RESTART;
+
+call_iterator:
+       if (!wqp_is_valid(wqp)) {
+               ret = WQ_ITERATE_RESTART;
+               goto out_unlock;
+       }
+
+       /* call the external callback */
+       ret = wctx->it(wctx->ctx, waitq, wqset);
+
+       if (ret == WQ_ITERATE_BREAK_KEEP_LOCKED) {
+               ret = WQ_ITERATE_BREAK;
+               if (wctx->spl)
+                       *(wctx->spl) = s;
+               goto out;
+       }
+
+out_unlock:
+       waitq_unlock(waitq);
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+out:
+       return ret;
+}
+
+/**
+ * iterator over all sets to which the given waitq has been linked
+ *
+ * Conditions:
+ *     'waitq' is locked
+ */
+int waitq_iterate_sets(struct waitq *waitq, void *ctx, waitq_iterator_t it)
+{
+       int ret;
+       struct wq_it_ctx wctx = {
+               .input = (void *)waitq,
+               .ctx = ctx,
+               .it = it,
+       };
+       if (!it || !waitq)
+               return KERN_INVALID_ARGUMENT;
+
+       ret = walk_setid_links(LINK_WALK_ONE_LEVEL, waitq, waitq->waitq_set_id,
+                              SLT_WQS, (void *)&wctx, waitq_iterate_sets_cb);
+       if (ret == WQ_ITERATE_CONTINUE)
+               ret = WQ_ITERATE_SUCCESS;
+       return ret;
+}
+
+/**
+ * iterator over all preposts in the given wqset
+ *
+ * Conditions:
+ *     'wqset' is locked
+ */
+int waitq_set_iterate_preposts(struct waitq_set *wqset,
+                              void *ctx, waitq_iterator_t it, spl_t *s)
+{
+       struct wq_it_ctx wctx = {
+               .input = (void *)wqset,
+               .ctx = ctx,
+               .it = it,
+               .spl = s,
+       };
+       if (!it || !wqset)
+               return WQ_ITERATE_INVALID;
+
+       assert(waitq_held(&wqset->wqset_q));
+
+       return wq_prepost_foreach_locked(wqset, (void *)&wctx,
+                                        wqset_iterate_prepost_cb);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Higher-level APIs
+ *
+ * ---------------------------------------------------------------------- */
+
+/**
+ * declare a thread's intent to wait on 'waitq' for 'wait_event'
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     will disable and re-enable interrupts while locking current_thread()
+ */
+wait_result_t waitq_assert_wait64(struct waitq *waitq,
+                                 event64_t wait_event,
+                                 wait_interrupt_t interruptible,
+                                 uint64_t deadline)
+{
+       wait_result_t ret;
+       thread_t thread = current_thread();
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       if (!waitq_irq_safe(waitq))
+               s = splsched();
+       thread_lock(thread);
+
+       ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
+                                        TIMEOUT_URGENCY_SYS_NORMAL,
+                                        deadline, TIMEOUT_NO_LEEWAY, thread);
+
+       thread_unlock(thread);
+       waitq_unlock(waitq);
+
+       splx(s);
+
+       return ret;
+}
+
+/**
+ * declare a thread's intent to wait on 'waitq' for 'wait_event'
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     will disable and re-enable interrupts while locking current_thread()
+ */
+wait_result_t waitq_assert_wait64_leeway(struct waitq *waitq,
+                                        event64_t wait_event,
+                                        wait_interrupt_t interruptible,
+                                        wait_timeout_urgency_t urgency,
+                                        uint64_t deadline,
+                                        uint64_t leeway)
+{
+       wait_result_t ret;
+       thread_t thread = current_thread();
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       if (!waitq_irq_safe(waitq))
+               s = splsched();
+       thread_lock(thread);
+
+       ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
+                                        urgency, deadline, leeway, thread);
+
+       thread_unlock(thread);
+       waitq_unlock(waitq);
+
+       splx(s);
+
+       return ret;
+}
+
+/**
+ * wakeup a single thread from a waitq that's waiting for a given event
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     may (rarely) block if 'waitq' is non-global and a member of 1 or more sets
+ *     may disable and re-enable interrupts
+ *
+ * Notes:
+ *     will _not_ block if waitq is global (or not a member of any set)
+ */
+kern_return_t waitq_wakeup64_one(struct waitq *waitq, event64_t wake_event,
+                                wait_result_t result, int priority)
+{
+       kern_return_t kr;
+       uint64_t reserved_preposts = 0;
+       spl_t spl;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       /* NOTE: this will _not_ reserve anything if waitq is global */
+       reserved_preposts = waitq_prepost_reserve(waitq, 0,
+                                                 WAITQ_KEEP_LOCKED, &spl);
+
+       /* waitq is locked upon return */
+       kr = waitq_wakeup64_one_locked(waitq, wake_event, result,
+                                      &reserved_preposts, priority, WAITQ_UNLOCK);
+
+       if (waitq_irq_safe(waitq))
+               splx(spl);
+
+       /* release any left-over prepost object (won't block/lock anything) */
+       waitq_prepost_release_reserve(reserved_preposts);
+
+       return kr;
+}
+
+/**
+ * wakeup all threads from a waitq that are waiting for a given event
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *     may (rarely) block if 'waitq' is non-global and a member of 1 or more sets
+ *     may disable and re-enable interrupts
+ *
+ * Notes:
+ *     will _not_ block if waitq is global (or not a member of any set)
+ */
+kern_return_t waitq_wakeup64_all(struct waitq *waitq,
+                                event64_t wake_event,
+                                wait_result_t result,
+                                int priority)
+{
+       kern_return_t ret;
+       uint64_t reserved_preposts = 0;
+       spl_t s;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       /* keep waitq locked upon return */
+       /* NOTE: this will _not_ reserve anything if waitq is global */
+       reserved_preposts = waitq_prepost_reserve(waitq, 0,
+                                                 WAITQ_KEEP_LOCKED, &s);
+
+       /* waitq is locked */
+
+       ret = waitq_wakeup64_all_locked(waitq, wake_event, result,
+                                       &reserved_preposts, priority,
+                                       WAITQ_UNLOCK);
+
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       waitq_prepost_release_reserve(reserved_preposts);
+
+       return ret;
+
+}
+
+/**
+ * wakeup a specific thread iff it's waiting on 'waitq' for 'wake_event'
+ *
+ * Conditions:
+ *     'waitq' is not locked
+ *
+ * Notes:
+ *     May temporarily disable and re-enable interrupts
+ */
+kern_return_t waitq_wakeup64_thread(struct waitq *waitq,
+                                   event64_t wake_event,
+                                   thread_t thread,
+                                   wait_result_t result)
+{
+       kern_return_t ret;
+       spl_t s, th_spl;
+
+       if (!waitq_valid(waitq))
+               panic("Invalid waitq: %p", waitq);
+
+       if (waitq_irq_safe(waitq))
+               s = splsched();
+       waitq_lock(waitq);
+
+       ret = waitq_select_thread_locked(waitq, wake_event, thread, &th_spl);
+       /* on success, returns 'thread' locked */
+
+       waitq_unlock(waitq);
+
+       if (ret == KERN_SUCCESS) {
+               ret = thread_go(thread, result);
+               assert(ret == KERN_SUCCESS);
+               thread_unlock(thread);
+               splx(th_spl);
+               waitq_stats_count_wakeup(waitq);
+       } else {
+               ret = KERN_NOT_WAITING;
+               waitq_stats_count_fail(waitq);
+       }
+
+       if (waitq_irq_safe(waitq))
+               splx(s);
+
+       return ret;
+}
diff --git a/osfmk/kern/waitq.h b/osfmk/kern/waitq.h
new file mode 100644 (file)
index 0000000..92751fa
--- /dev/null
@@ -0,0 +1,452 @@
+#ifndef _WAITQ_H_
+#define _WAITQ_H_
+/*
+ * Copyright (c) 2014-2015 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifdef KERNEL_PRIVATE
+
+#include <mach/mach_types.h>
+#include <mach/sync_policy.h>
+#include <mach/kern_return.h>          /* for kern_return_t */
+
+#include <kern/kern_types.h>           /* for wait_queue_t */
+#include <kern/queue.h>
+#include <kern/assert.h>
+
+#include <sys/cdefs.h>
+
+/*
+ * Constants and types used in the waitq APIs
+ */
+#define WAITQ_ALL_PRIORITIES   (-1)
+#define WAITQ_PROMOTE_PRIORITY (-2)
+
+typedef enum e_waitq_lock_state {
+       WAITQ_KEEP_LOCKED    = 0x01,
+       WAITQ_UNLOCK         = 0x02,
+       WAITQ_SHOULD_LOCK    = 0x04,
+       WAITQ_ALREADY_LOCKED = 0x08,
+       WAITQ_DONT_LOCK      = 0x10,
+} waitq_lock_state_t;
+
+#ifndef MACH_KERNEL_PRIVATE
+
+/*
+ * The opaque waitq structure is here mostly for AIO and selinfo,
+ * but could potentially be used by other BSD subsystems.
+ */
+#ifndef __LP64__
+       struct waitq { char opaque[32]; };
+       struct waitq_set { char opaque[48]; };
+#else
+       #if defined(__x86_64__)
+               struct waitq { char opaque[48]; };
+               struct waitq_set { char opaque[64]; };
+       #else
+               struct waitq { char opaque[40]; };
+               struct waitq_set { char opaque[56]; };
+       #endif /* !x86_64 */
+#endif /* __LP64__ */
+
+#else /* MACH_KERNEL_PRIVATE */
+
+#include <kern/spl.h>
+#include <kern/simple_lock.h>
+#include <mach/branch_predicates.h>
+
+#include <machine/cpu_number.h>
+#include <machine/machine_routines.h> /* machine_timeout_suspended() */
+
+/*
+ * The event mask is of 59 bits on 64 bit architeture and 27 bits on
+ * 32 bit architecture and so we calculate its size using sizeof(long).
+ * If the bitfield for wq_type and wq_fifo is changed, then value of
+ * EVENT_MASK_BITS will also change.
+ *
+ * New plan: this is an optimization anyway, so I'm stealing 32bits
+ * from the mask to shrink the waitq object even further.
+ */
+#define _EVENT_MASK_BITS   ((sizeof(uint32_t) * 8) - 5)
+
+#define WAITQ_BOOST_PRIORITY 31
+
+enum waitq_type {
+       WQT_INVALID = 0,
+       WQT_QUEUE   = 0x2,
+       WQT_SET     = 0x3,
+};
+
+#if CONFIG_WAITQ_STATS
+#define NWAITQ_BTFRAMES 5
+struct wq_stats {
+       uint64_t waits;
+       uint64_t wakeups;
+       uint64_t clears;
+       uint64_t failed_wakeups;
+
+       uintptr_t last_wait[NWAITQ_BTFRAMES];
+       uintptr_t last_wakeup[NWAITQ_BTFRAMES];
+       uintptr_t last_failed_wakeup[NWAITQ_BTFRAMES];
+};
+#endif
+
+/*
+ *     struct waitq
+ *
+ *     This is the definition of the common event wait queue
+ *     that the scheduler APIs understand.  It is used
+ *     internally by the gerneralized event waiting mechanism
+ *     (assert_wait), and also for items that maintain their
+ *     own wait queues (such as ports and semaphores).
+ *
+ *     It is not published to other kernel components.
+ *
+ *     NOTE:  Hardware locks are used to protect event wait
+ *     queues since interrupt code is free to post events to
+ *     them.
+ */
+struct waitq {
+       uint32_t /* flags */
+               waitq_type:2,    /* only public field */
+               waitq_fifo:1,    /* fifo wakeup policy? */
+               waitq_prepost:1, /* waitq supports prepost? */
+               waitq_irq:1,     /* waitq requires interrupts disabled */
+               waitq_eventmask:_EVENT_MASK_BITS;
+               /* the wait queue set (set-of-sets) to which this queue belongs */
+       hw_lock_data_t  waitq_interlock;        /* interlock */
+
+       uint64_t waitq_set_id;
+       uint64_t waitq_prepost_id;
+       queue_head_t    waitq_queue;            /* queue of elements */
+};
+
+/*
+ *     struct waitq_set
+ *
+ *     This is the common definition for a set wait queue.
+ */
+struct waitq_set {
+       struct waitq wqset_q;
+       uint64_t     wqset_id;
+       uint64_t     wqset_prepost_id;
+};
+
+extern void waitq_bootstrap(void);
+
+#define waitq_is_queue(wq) \
+       ((wq)->waitq_type == WQT_QUEUE)
+
+#define waitq_is_set(wq) \
+       ((wq)->waitq_type == WQT_SET && ((struct waitq_set *)(wq))->wqset_id != 0)
+
+#define waitqs_is_set(wqs) \
+       (((wqs)->wqset_q.waitq_type == WQT_SET) && ((wqs)->wqset_id != 0))
+
+#define waitq_valid(wq) \
+       ((wq) != NULL && ((wq)->waitq_type & ~1) == WQT_QUEUE)
+
+#define waitq_empty(wq) \
+       (queue_empty(&(wq)->waitq_queue))
+
+#define waitq_held(wq) \
+       (hw_lock_held(&(wq)->waitq_interlock))
+
+#define waitq_lock_try(wq) \
+       (hw_lock_try(&(wq)->waitq_interlock))
+
+#define waitq_wait_possible(thread) \
+       ((thread)->waitq == NULL)
+
+extern void waitq_lock(struct waitq *wq);
+extern void waitq_unlock(struct waitq *wq);
+
+#define waitq_set_lock(wqs)            waitq_lock(&(wqs)->wqset_q)
+#define waitq_set_unlock(wqs)          waitq_unlock(&(wqs)->wqset_q)
+#define waitq_set_lock_try(wqs)                waitq_lock_try(&(wqs)->wqset_q)
+#define waitq_set_can_prepost(wqs)     (waitqs_is_set(wqs) && \
+                                        (wqs)->wqset_q.waitq_prepost)
+#define waitq_set_maybe_preposted(wqs) ((wqs)->wqset_q.waitq_prepost && \
+                                        (wqs)->wqset_prepost_id > 0)
+
+/* assert intent to wait on a locked wait queue */
+extern wait_result_t waitq_assert_wait64_locked(struct waitq *waitq,
+                                               event64_t wait_event,
+                                               wait_interrupt_t interruptible,
+                                               wait_timeout_urgency_t urgency,
+                                               uint64_t deadline,
+                                               uint64_t leeway,
+                                               thread_t thread);
+
+/* pull a thread from its wait queue */
+extern void waitq_pull_thread_locked(struct waitq *waitq, thread_t thread);
+
+/* wakeup all threads waiting for a particular event on locked queue */
+extern kern_return_t waitq_wakeup64_all_locked(struct waitq *waitq,
+                                              event64_t wake_event,
+                                              wait_result_t result,
+                                              uint64_t *reserved_preposts,
+                                              int priority,
+                                              waitq_lock_state_t lock_state);
+
+/* wakeup one thread waiting for a particular event on locked queue */
+extern kern_return_t waitq_wakeup64_one_locked(struct waitq *waitq,
+                                              event64_t wake_event,
+                                              wait_result_t result,
+                                              uint64_t *reserved_preposts,
+                                              int priority,
+                                              waitq_lock_state_t lock_state);
+
+/* return identity of a thread awakened for a particular <wait_queue,event> */
+extern thread_t waitq_wakeup64_identity_locked(struct waitq *waitq,
+                                              event64_t wake_event,
+                                              wait_result_t result,
+                                              spl_t *spl,
+                                              uint64_t *reserved_preposts,
+                                              waitq_lock_state_t lock_state);
+
+/* wakeup thread iff its still waiting for a particular event on locked queue */
+extern kern_return_t waitq_wakeup64_thread_locked(struct waitq *waitq,
+                                                 event64_t wake_event,
+                                                 thread_t thread,
+                                                 wait_result_t result,
+                                                 waitq_lock_state_t lock_state);
+
+/* clear all preposts generated by the given waitq */
+extern int waitq_clear_prepost_locked(struct waitq *waitq, spl_t *s);
+
+/* clear all preposts from the given wait queue set */
+extern void waitq_set_clear_preposts_locked(struct waitq_set *wqset);
+
+/* unlink the given waitq from all sets */
+extern kern_return_t waitq_unlink_all_locked(struct waitq *waitq,
+                                            uint64_t *old_set_id,
+                                            spl_t *s,
+                                            int *dropped_lock);
+
+/*
+ * clear a thread's boosted priority
+ * (given via WAITQ_PROMOTE_PRIORITY in the wakeup function)
+ */
+extern void waitq_clear_promotion_locked(struct waitq *waitq,
+                                        thread_t thread);
+
+/*
+ * waitq iteration
+ */
+
+enum waitq_iteration_constant {
+       WQ_ITERATE_DROPPED             = -4,
+       WQ_ITERATE_INVALID             = -3,
+       WQ_ITERATE_ABORTED             = -2,
+       WQ_ITERATE_FAILURE             = -1,
+       WQ_ITERATE_SUCCESS             =  0,
+       WQ_ITERATE_CONTINUE            =  1,
+       WQ_ITERATE_BREAK               =  2,
+       WQ_ITERATE_BREAK_KEEP_LOCKED   =  3,
+       WQ_ITERATE_INVALIDATE_CONTINUE =  4,
+       WQ_ITERATE_RESTART             =  5,
+       WQ_ITERATE_FOUND               =  6,
+       WQ_ITERATE_UNLINKED            =  7,
+};
+
+/* callback invoked with both 'waitq' and 'wqset' locked */
+typedef int (*waitq_iterator_t)(void *ctx, struct waitq *waitq,
+                               struct waitq_set *wqset);
+
+/* iterate over all sets to which waitq belongs */
+extern int waitq_iterate_sets(struct waitq *waitq, void *ctx,
+                             waitq_iterator_t it);
+
+/* iterator over all waitqs that have preposted to wqset */
+extern int waitq_set_iterate_preposts(struct waitq_set *wqset,
+                                     void *ctx, waitq_iterator_t it, spl_t *s);
+
+/*
+ * prepost reservation
+ */
+extern uint64_t waitq_prepost_reserve(struct waitq *waitq, int extra,
+                                     waitq_lock_state_t lock_state, spl_t *s);
+
+extern void waitq_prepost_release_reserve(uint64_t id);
+
+#endif /* MACH_KERNEL_PRIVATE */
+
+
+__BEGIN_DECLS
+
+/*
+ * waitq init
+ */
+extern kern_return_t waitq_init(struct waitq *waitq, int policy);
+extern void waitq_deinit(struct waitq *waitq);
+
+/*
+ * global waitqs
+ */
+extern struct waitq *_global_eventq(char *event, size_t event_length);
+#define global_eventq(event) _global_eventq((char *)&(event), sizeof(event))
+
+extern struct waitq *global_waitq(int index);
+
+/*
+ * set alloc/init/free
+ */
+extern struct waitq_set *waitq_set_alloc(int policy);
+
+extern kern_return_t waitq_set_init(struct waitq_set *wqset,
+                                   int policy, uint64_t *reserved_link);
+
+extern void waitq_set_deinit(struct waitq_set *wqset);
+
+extern kern_return_t waitq_set_free(struct waitq_set *wqset);
+
+#if defined(DEVELOPMENT) || defined(DEBUG)
+#if CONFIG_WAITQ_DEBUG
+extern uint64_t wqset_id(struct waitq_set *wqset);
+
+struct waitq *wqset_waitq(struct waitq_set *wqset);
+#endif /* CONFIG_WAITQ_DEBUG */
+#endif /* DEVELOPMENT || DEBUG */
+
+
+/*
+ * set membership
+ */
+extern uint64_t waitq_link_reserve(struct waitq *waitq);
+
+extern void waitq_link_release(uint64_t id);
+
+extern boolean_t waitq_member(struct waitq *waitq, struct waitq_set *wqset);
+
+/* returns true if the waitq is in at least 1 set */
+extern boolean_t waitq_in_set(struct waitq *waitq);
+
+
+/* on success, consumes an reserved_link reference */
+extern kern_return_t waitq_link(struct waitq *waitq,
+                               struct waitq_set *wqset,
+                               waitq_lock_state_t lock_state,
+                               uint64_t *reserved_link);
+
+extern kern_return_t waitq_unlink(struct waitq *waitq, struct waitq_set *wqset);
+
+extern kern_return_t waitq_unlink_all(struct waitq *waitq);
+
+extern kern_return_t waitq_set_unlink_all(struct waitq_set *wqset);
+
+
+/*
+ * preposts
+ */
+extern void waitq_clear_prepost(struct waitq *waitq);
+
+extern void waitq_set_clear_preposts(struct waitq_set *wqset);
+
+/*
+ * interfaces used primarily by the select/kqueue subsystems
+ */
+extern uint64_t waitq_get_prepost_id(struct waitq *waitq);
+extern void     waitq_unlink_by_prepost_id(uint64_t wqp_id, struct waitq_set *wqset);
+
+/*
+ * waitq attributes
+ */
+extern int waitq_is_valid(struct waitq *waitq);
+
+extern int waitq_set_is_valid(struct waitq_set *wqset);
+
+extern int waitq_is_global(struct waitq *waitq);
+
+extern int waitq_irq_safe(struct waitq *waitq);
+
+#if CONFIG_WAITQ_STATS
+/*
+ * waitq statistics
+ */
+#define WAITQ_STATS_VERSION 1
+struct wq_table_stats {
+       uint32_t version;
+       uint32_t table_elements;
+       uint32_t table_used_elems;
+       uint32_t table_elem_sz;
+       uint32_t table_slabs;
+       uint32_t table_slab_sz;
+
+       uint64_t table_num_allocs;
+       uint64_t table_num_preposts;
+       uint64_t table_num_reservations;
+
+       uint64_t table_max_used;
+       uint64_t table_avg_used;
+       uint64_t table_max_reservations;
+       uint64_t table_avg_reservations;
+};
+
+extern void waitq_link_stats(struct wq_table_stats *stats);
+extern void waitq_prepost_stats(struct wq_table_stats *stats);
+#endif /* CONFIG_WAITQ_STATS */
+
+/*
+ *
+ * higher-level waiting APIs
+ *
+ */
+
+/* assert intent to wait on <waitq,event64> pair */
+extern wait_result_t waitq_assert_wait64(struct waitq *waitq,
+                                        event64_t wait_event,
+                                        wait_interrupt_t interruptible,
+                                        uint64_t deadline);
+
+extern wait_result_t waitq_assert_wait64_leeway(struct waitq *waitq,
+                                               event64_t wait_event,
+                                               wait_interrupt_t interruptible,
+                                               wait_timeout_urgency_t urgency,
+                                               uint64_t deadline,
+                                               uint64_t leeway);
+
+/* wakeup the most appropriate thread waiting on <waitq,event64> pair */
+extern kern_return_t waitq_wakeup64_one(struct waitq *waitq,
+                                       event64_t wake_event,
+                                       wait_result_t result,
+                                       int priority);
+
+/* wakeup all the threads waiting on <waitq,event64> pair */
+extern kern_return_t waitq_wakeup64_all(struct waitq *waitq,
+                                       event64_t wake_event,
+                                       wait_result_t result,
+                                       int priority);
+
+/* wakeup a specified thread iff it's waiting on <waitq,event64> pair */
+extern kern_return_t waitq_wakeup64_thread(struct waitq *waitq,
+                                          event64_t wake_event,
+                                          thread_t thread,
+                                          wait_result_t result);
+__END_DECLS
+
+#endif /* KERNEL_PRIVATE */
+#endif /* _WAITQ_H_ */
index 7b4efb64565e7bae348aeb448efed05376dd65ec..6921f0d7fc32856639a308319a860a05b2feba5f 100644 (file)
@@ -401,7 +401,7 @@ get_backup_ptr(vm_size_t  elem_size,
 static inline struct zone_page_metadata *
 get_zone_page_metadata(struct zone_free_element *element)
 {
-       return (struct zone_page_metadata *)(trunc_page((vm_offset_t)element) + PAGE_SIZE - sizeof(struct zone_page_metadata));
+       return (struct zone_page_metadata *)(trunc_page((vm_offset_t)element));
 }
 
 /*
@@ -553,7 +553,6 @@ backup_ptr_mismatch_panic(zone_t        zone,
        zone_element_was_modified_panic(zone, element, primary, likely_backup, 0);
 }
 
-
 /*
  * Sets the next element of tail to elem.
  * elem can be NULL.
@@ -815,21 +814,6 @@ struct fake_zone_info {
 };
 
 static const struct fake_zone_info fake_zones[] = {
-       {
-               .name = "kernel_stacks",
-               .init = stack_fake_zone_init,
-               .query = stack_fake_zone_info,
-       },
-       {
-               .name = "page_tables",
-               .init = pt_fake_zone_init,
-               .query = pt_fake_zone_info,
-       },
-       {
-               .name = "kalloc.large",
-               .init = kalloc_fake_zone_init,
-               .query = kalloc_fake_zone_info,
-       },
 };
 static const unsigned int num_fake_zones =
        sizeof (fake_zones) / sizeof (fake_zones[0]);
@@ -936,6 +920,10 @@ zone_t             zinfo_zone = ZONE_NULL; /* zone of per-task zone info */
 
 vm_offset_t    zdata;
 vm_size_t      zdata_size;
+/*
+ * Align elements that use the zone page list to 32 byte boundaries.
+ */
+#define ZONE_ELEMENT_ALIGNMENT 32
 
 #define zone_wakeup(zone) thread_wakeup((event_t)(zone))
 #define zone_sleep(zone)                               \
@@ -999,6 +987,9 @@ boolean_t zone_gc_forced = FALSE;
 boolean_t panic_include_zprint = FALSE;
 boolean_t zone_gc_allowed_by_time_throttle = TRUE;
 
+vm_offset_t panic_kext_memory_info = 0;
+vm_size_t panic_kext_memory_size = 0;
+
 #define ZALLOC_DEBUG_ZONEGC            0x00000001
 #define ZALLOC_DEBUG_ZCRAM             0x00000002
 uint32_t zalloc_debug = 0;
@@ -1311,12 +1302,12 @@ zleak_activate(void)
        lck_spin_unlock(&zleak_lock);
 
        /* Allocate and zero tables */
-       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size);
+       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK);
        if (retval != KERN_SUCCESS) {
                goto fail;
        }
 
-       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size);
+       retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK);
        if (retval != KERN_SUCCESS) {
                goto fail;
        }
@@ -1738,10 +1729,19 @@ use_this_allocation:
         * to its page_metadata, and if the wastage in the tail of
         * the allocation is not too large
         */
-       if (alloc == PAGE_SIZE) {
-               if ((PAGE_SIZE % size) >= sizeof(struct zone_page_metadata)) {
-                       use_page_list = TRUE;
-               } else if ((PAGE_SIZE - sizeof(struct zone_page_metadata)) % size <= PAGE_SIZE / 100) {
+
+       /* zone_zone can't use page metadata since the page metadata will overwrite zone metadata */
+       if (alloc == PAGE_SIZE && zone_zone != ZONE_NULL) {
+               vm_offset_t first_element_offset;
+               size_t zone_page_metadata_size = sizeof(struct zone_page_metadata);
+
+               if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0) {
+                       first_element_offset = zone_page_metadata_size;
+               } else {
+                       first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
+               }
+
+               if (((PAGE_SIZE - first_element_offset) % size) <= PAGE_SIZE / 100) {
                        use_page_list = TRUE;
                }
        }
@@ -1760,7 +1760,8 @@ use_this_allocation:
        z->count = 0;
        z->countfree = 0;
        z->sum_count = 0LL;
-       z->doing_alloc = FALSE;
+       z->doing_alloc_without_vm_priv = FALSE;
+       z->doing_alloc_with_vm_priv = FALSE;
        z->doing_gc = FALSE;
        z->exhaustible = FALSE;
        z->collectable = TRUE;
@@ -1851,7 +1852,8 @@ static void zone_replenish_thread(zone_t z) {
                lock_zone(z);
                assert(z->prio_refill_watermark != 0);
                while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) {
-                       assert(z->doing_alloc == FALSE);
+                       assert(z->doing_alloc_without_vm_priv == FALSE);
+                       assert(z->doing_alloc_with_vm_priv == FALSE);
                        assert(z->async_prio_refill == TRUE);
 
                        unlock_zone(z);
@@ -1867,19 +1869,18 @@ static void zone_replenish_thread(zone_t z) {
                        if (z->noencrypt)
                                zflags |= KMA_NOENCRYPT;
                                
-                       kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
+                       kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
 
                        if (kr == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                if (alloc_size == PAGE_SIZE)
                                        space = zone_alias_addr(space);
 #endif
-                               ZONE_PAGE_COUNT_INCR(z, (alloc_size / PAGE_SIZE));      
                                zcram(z, space, alloc_size);
                        } else if (kr == KERN_RESOURCE_SHORTAGE) {
                                VM_PAGE_WAIT();
                        } else if (kr == KERN_NO_SPACE) {
-                               kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags);
+                               kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
                                if (kr == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                        if (alloc_size == PAGE_SIZE)
@@ -1924,7 +1925,7 @@ zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) {
 }
 
 /*
- *     Cram the given memory into the specified zone.
+ *     Cram the given memory into the specified zone. Update the zone page count accordingly.
  */
 void
 zcram(
@@ -1952,17 +1953,20 @@ zcram(
        if (from_zm && !zone->use_page_list)
                zone_page_init(newmem, size);
 
+       ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
+
        lock_zone(zone);
 
        if (zone->use_page_list) {
                struct zone_page_metadata *page_metadata;
+               size_t zone_page_metadata_size = sizeof(struct zone_page_metadata);
 
                assert((newmem & PAGE_MASK) == 0);
                assert((size & PAGE_MASK) == 0);
                for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) {
 
                        vm_size_t pos_in_page;
-                       page_metadata = (struct zone_page_metadata *)(newmem + PAGE_SIZE - sizeof(struct zone_page_metadata));
+                       page_metadata = (struct zone_page_metadata *)(newmem);
                        
                        page_metadata->pages.next = NULL;
                        page_metadata->pages.prev = NULL;
@@ -1973,18 +1977,17 @@ zcram(
 
                        enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_metadata);
 
-                       for (pos_in_page = 0; (newmem + pos_in_page + elem_size) < (vm_offset_t)page_metadata; pos_in_page += elem_size) {
+                       vm_offset_t first_element_offset;
+                       if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0){
+                               first_element_offset = zone_page_metadata_size;
+                       } else {
+                               first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT));
+                       }
+
+                       for (pos_in_page = first_element_offset; (newmem + pos_in_page + elem_size) < (vm_offset_t)(newmem + PAGE_SIZE); pos_in_page += elem_size) {
                                page_metadata->alloc_count++;
                                zone->count++;  /* compensate for free_to_zone */
-                               if ((newmem + pos_in_page) == (vm_offset_t)zone) {
-                                       /*
-                                        * special case for the "zone_zone" zone, which is using the first
-                                        * allocation of its pmap_steal_memory()-ed allocation for
-                                        * the "zone_zone" variable already.
-                                        */
-                               } else {
-                                       free_to_zone(zone, newmem + pos_in_page, FALSE);
-                               }
+                               free_to_zone(zone, newmem + pos_in_page, FALSE);
                                zone->cur_size += elem_size;
                        }
                }
@@ -2046,12 +2049,11 @@ zfill(
                return 0;
        size = nelem * zone->elem_size;
        size = round_page(size);
-       kr = kmem_alloc_kobject(kernel_map, &memory, size);
+       kr = kmem_alloc_kobject(kernel_map, &memory, size, VM_KERN_MEMORY_ZONE);
        if (kr != KERN_SUCCESS)
                return 0;
 
        zone_change(zone, Z_FOREIGN, TRUE);
-       ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE));
        zcram(zone, memory, size);
        nalloc = (int)(size / zone->elem_size);
        assert(nalloc >= nelem);
@@ -2132,6 +2134,7 @@ zone_bootstrap(void)
        zone_change(zone_zone, Z_NOENCRYPT, TRUE);
 
        zcram(zone_zone, zdata, zdata_size);
+       VM_PAGE_MOVE_STOLEN(atop_64(zdata_size));
 
        /* initialize fake zones and zone info if tracking by task */
        if (zinfo_per_task) {
@@ -2179,7 +2182,7 @@ zone_init(
        vm_offset_t     zone_max;
 
        retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size,
-                              FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT,
+                              FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(VM_KERN_MEMORY_ZONE),
                               &zone_map);
 
        if (retval != KERN_SUCCESS)
@@ -2259,7 +2262,7 @@ zone_page_table_expand(zone_page_index_t pindex)
                struct zone_page_table_entry *entry_array;
 
                if (kmem_alloc_kobject(zone_map, &second_level_array,
-                                                          second_level_size) != KERN_SUCCESS) {
+                                                          second_level_size, VM_KERN_MEMORY_OSFMK) != KERN_SUCCESS) {
                        panic("zone_page_table_expand");
                }
                zone_map_table_page_count += (second_level_size / PAGE_SIZE);
@@ -2324,6 +2327,7 @@ zalloc_internal(
 #endif
        thread_t thr = current_thread();
        boolean_t       check_poison = FALSE;
+       boolean_t       set_doing_alloc_with_vm_priv = FALSE;
 
 #if CONFIG_ZLEAKS
        uint32_t        zleak_tracedepth = 0;  /* log this allocation if nonzero */
@@ -2395,21 +2399,35 @@ zalloc_internal(
 
        while ((addr == 0) && canblock) {
                /*
-                *      If nothing was there, try to get more
+                * zone is empty, try to expand it
+                * 
+                * Note that we now allow up to 2 threads (1 vm_privliged and 1 non-vm_privliged)
+                * to expand the zone concurrently...  this is necessary to avoid stalling
+                * vm_privileged threads running critical code necessary to continue compressing/swapping
+                * pages (i.e. making new free pages) from stalling behind non-vm_privileged threads
+                * waiting to acquire free pages when the vm_page_free_count is below the
+                * vm_page_free_reserved limit.
                 */
-               if (zone->doing_alloc) {
+               if ((zone->doing_alloc_without_vm_priv || zone->doing_alloc_with_vm_priv) &&
+                   (((thr->options & TH_OPT_VMPRIV) == 0) || zone->doing_alloc_with_vm_priv)) {
                        /*
-                        *      Someone is allocating memory for this zone.
-                        *      Wait for it to show up, then try again.
+                        * This is a non-vm_privileged thread and a non-vm_privileged or
+                        * a vm_privileged thread is already expanding the zone...
+                        *    OR
+                        * this is a vm_privileged thread and a vm_privileged thread is
+                        * already expanding the zone...
+                        *
+                        * In either case wait for a thread to finish, then try again.
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
                } else if (zone->doing_gc) {
-                       /* zone_gc() is running. Since we need an element
+                       /*
+                        * zone_gc() is running. Since we need an element
                         * from the free list that is currently being
-                        * collected, set the waiting bit and try to
-                        * interrupt the GC process, and try again
-                        * when we obtain the lock.
+                        * collected, set the waiting bit and 
+                        * wait for the GC process to finish
+                        * before trying again
                         */
                        zone->waiting = TRUE;
                        zone_sleep(zone);
@@ -2445,7 +2463,12 @@ zalloc_internal(
                                        panic("zalloc: zone \"%s\" empty.", zone->zone_name);
                                }
                        }
-                       zone->doing_alloc = TRUE;
+                       if ((thr->options & TH_OPT_VMPRIV)) {
+                               zone->doing_alloc_with_vm_priv = TRUE;
+                               set_doing_alloc_with_vm_priv = TRUE;
+                       } else {
+                               zone->doing_alloc_without_vm_priv = TRUE;
+                       }
                        unlock_zone(zone);
 
                        for (;;) {
@@ -2460,7 +2483,7 @@ zalloc_internal(
                                if (zone->noencrypt)
                                        zflags |= KMA_NOENCRYPT;
                                
-                               retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags);
+                               retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE);
                                if (retval == KERN_SUCCESS) {
 #if    ZONE_ALIAS_ADDR
                                        if (alloc_size == PAGE_SIZE)
@@ -2485,7 +2508,6 @@ zalloc_internal(
                                                }       
                                        }
 #endif /* CONFIG_ZLEAKS */
-                                       ZONE_PAGE_COUNT_INCR(zone, (alloc_size / PAGE_SIZE));
                                        zcram(zone, space, alloc_size);
                                        
                                        break;
@@ -2518,9 +2540,14 @@ zalloc_internal(
                                }
                        }
                        lock_zone(zone);
-                       zone->doing_alloc = FALSE; 
+
+                       if (set_doing_alloc_with_vm_priv == TRUE)
+                               zone->doing_alloc_with_vm_priv = FALSE;
+                       else
+                               zone->doing_alloc_without_vm_priv = FALSE; 
+                       
                        if (zone->waiting) {
-                               zone->waiting = FALSE;
+                               zone->waiting = FALSE;
                                zone_wakeup(zone);
                        }
                        addr = try_alloc_from_zone(zone, &check_poison);
@@ -3089,6 +3116,13 @@ zone_change(
                        break;
                case Z_ALIGNMENT_REQUIRED:
                        zone->alignment_required = value;
+                       /*
+                        * Disable the page list optimization here to provide
+                        * more of an alignment guarantee. This prevents
+                        * the alignment from being modified by the metadata stored
+                        * at the beginning of the page.
+                        */
+                       zone->use_page_list = FALSE;
 #if    ZONE_DEBUG                      
                        zone_debug_disable(zone);
 #endif
@@ -3330,7 +3364,7 @@ zone_page_free_element(
 }
 
 
-
+#define ZONEGC_SMALL_ELEMENT_SIZE      4096
 
 struct {
        uint64_t        zgc_invoked;
@@ -3402,7 +3436,7 @@ zone_gc(boolean_t all_zones)
                if (!z->collectable)
                        continue;
 
-               if (all_zones == FALSE && z->elem_size < PAGE_SIZE && !z->use_page_list)
+               if (all_zones == FALSE && z->elem_size < ZONEGC_SMALL_ELEMENT_SIZE && !z->use_page_list)
                        continue;
 
                lock_zone(z);
@@ -3878,14 +3912,14 @@ task_zone_info(
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &names_addr, names_size);
+                                &names_addr, names_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS)
                return kr;
        names = (mach_zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &info_addr, info_size);
+                                &info_addr, info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
@@ -3916,7 +3950,7 @@ task_zone_info(
                zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
                zi->tzi_count = (uint64_t)zcopy.count;
-               zi->tzi_cur_size = (uint64_t)zcopy.cur_size;
+               zi->tzi_cur_size = ptoa_64(zcopy.page_count);
                zi->tzi_max_size = (uint64_t)zcopy.max_size;
                zi->tzi_elem_size = (uint64_t)zcopy.elem_size;
                zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size;
@@ -4019,13 +4053,33 @@ mach_zone_info(
        mach_msg_type_number_t  *namesCntp,
        mach_zone_info_array_t  *infop,
        mach_msg_type_number_t  *infoCntp)
+{
+       return (mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL));
+}
+
+kern_return_t
+mach_memory_info(
+       host_priv_t             host,
+       mach_zone_name_array_t  *namesp,
+       mach_msg_type_number_t  *namesCntp,
+       mach_zone_info_array_t  *infop,
+       mach_msg_type_number_t  *infoCntp,
+       mach_memory_info_array_t *memoryInfop,
+       mach_msg_type_number_t   *memoryInfoCntp)
 {
        mach_zone_name_t        *names;
        vm_offset_t             names_addr;
        vm_size_t               names_size;
+
        mach_zone_info_t        *info;
        vm_offset_t             info_addr;
        vm_size_t               info_size;
+
+       mach_memory_info_t      *memory_info;
+       vm_offset_t             memory_info_addr;
+       vm_size_t               memory_info_size;
+        unsigned int           num_sites;
+
        unsigned int            max_zones, i;
        zone_t                  z;
        mach_zone_name_t        *zn;
@@ -4055,22 +4109,48 @@ mach_zone_info(
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &names_addr, names_size);
+                                &names_addr, names_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS)
                return kr;
        names = (mach_zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &info_addr, info_size);
+                                &info_addr, info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
                return kr;
        }
-
        info = (mach_zone_info_t *) info_addr;
 
+       num_sites = 0;
+       memory_info_addr = 0;
+       if (memoryInfop && memoryInfoCntp)
+       {
+               num_sites = VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT;
+               memory_info_size = round_page(num_sites * sizeof *info);
+               kr = kmem_alloc_pageable(ipc_kernel_map,
+                                        &memory_info_addr, memory_info_size, VM_KERN_MEMORY_IPC);
+               if (kr != KERN_SUCCESS) {
+                       kmem_free(ipc_kernel_map,
+                                 names_addr, names_size);
+                       kmem_free(ipc_kernel_map,
+                                 info_addr, info_size);
+                       return kr;
+               }
+
+               kr = vm_map_wire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_size,
+                                    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_IPC), FALSE);
+               assert(kr == KERN_SUCCESS);
+
+               memory_info = (mach_memory_info_t *) memory_info_addr;
+               vm_page_diagnose(memory_info, num_sites);
+
+               kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_size, FALSE);
+               assert(kr == KERN_SUCCESS);
+       }
+
        zn = &names[0];
        zi = &info[0];
 
@@ -4093,7 +4173,7 @@ mach_zone_info(
                zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
 
                zi->mzi_count = (uint64_t)zcopy.count;
-               zi->mzi_cur_size = (uint64_t)zcopy.cur_size;
+               zi->mzi_cur_size = ptoa_64(zcopy.page_count);
                zi->mzi_max_size = (uint64_t)zcopy.max_size;
                zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
                zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
@@ -4155,6 +4235,16 @@ mach_zone_info(
        *infop = (mach_zone_info_t *) copy;
        *infoCntp = max_zones;
 
+       if (memoryInfop && memoryInfoCntp)
+       {
+               kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr,
+                                  (vm_map_size_t)memory_info_size, TRUE, &copy);
+               assert(kr == KERN_SUCCESS);
+
+               *memoryInfop = (mach_memory_info_t *) copy;
+               *memoryInfoCntp = num_sites;
+       }
+
        return KERN_SUCCESS;
 }
 
@@ -4213,14 +4303,14 @@ host_zone_info(
 
        names_size = round_page(max_zones * sizeof *names);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &names_addr, names_size);
+                                &names_addr, names_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS)
                return kr;
        names = (zone_name_t *) names_addr;
 
        info_size = round_page(max_zones * sizeof *info);
        kr = kmem_alloc_pageable(ipc_kernel_map,
-                                &info_addr, info_size);
+                                &info_addr, info_size, VM_KERN_MEMORY_IPC);
        if (kr != KERN_SUCCESS) {
                kmem_free(ipc_kernel_map,
                          names_addr, names_size);
@@ -4251,7 +4341,7 @@ host_zone_info(
                zn->zn_name[sizeof zn->zn_name - 1] = '\0';
 
                zi->zi_count = zcopy.count;
-               zi->zi_cur_size = zcopy.cur_size;
+               zi->zi_cur_size = ptoa(zcopy.page_count);
                zi->zi_max_size = zcopy.max_size;
                zi->zi_elem_size = zcopy.elem_size;
                zi->zi_alloc_size = zcopy.alloc_size;
index 6210bd62fcb6fb4b7088fe280a5dcc45a4afa7a5..5446890558b3bc59a3c7607cbbdb3ee662240373 100644 (file)
@@ -120,7 +120,8 @@ struct zone {
        /* boolean_t */ collectable        :1,  /* (F) garbage collect empty pages */
        /* boolean_t */ expandable         :1,  /* (T) expand zone (with message)? */
        /* boolean_t */ allows_foreign     :1,  /* (F) allow non-zalloc space */
-       /* boolean_t */ doing_alloc        :1,  /* is zone expanding now? */
+       /* boolean_t */ doing_alloc_without_vm_priv:1,  /* is zone expanding now via a non-vm_privileged thread? */
+       /* boolean_t */ doing_alloc_with_vm_priv:1, /* is zone expanding now via a vm_privileged thread? */
        /* boolean_t */ waiting            :1,  /* is thread waiting for expansion? */
        /* boolean_t */ async_pending      :1,  /* asynchronous allocation pending? */
        /* boolean_t */ zleak_on           :1,  /* Are we collecting allocation information? */
@@ -132,7 +133,7 @@ struct zone {
        /* boolean_t */ gzalloc_exempt     :1,
        /* boolean_t */ alignment_required :1,
        /* boolean_t */ use_page_list      :1,
-       /* future    */ _reserved          :16;
+       /* future    */ _reserved          :15;
 
        int             index;          /* index into zone_info arrays for this zone */
        struct zone     *next_zone;     /* Link for all-zones list */
index b938c7fbb3bbf3d5694e5013daf6b4b90bcf6f94..74e1d806760f1d37dd561450af3c3c14158ea507 100644 (file)
@@ -9,18 +9,18 @@ include $(MakeInc_def)
 
 MIG_DEFS = kextd_mach.defs
 
-DATAFILES = ${MIG_DEFS}
+PRIVATE_DATAFILES = ${MIG_DEFS}
 
-INSTALL_MI_LIST =
+KERNELFILES = ${MIG_DEFS}
 
-INSTALL_MI_LCL_LIST    = ${DATAFILES}
+INSTALL_MI_LIST =
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_MI_GEN_LIST = 
 
 INSTALL_MI_DIR = kextd
 
-EXPORT_MI_LIST = \
-       ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_GEN_LIST = kextd_mach.h
 
index dbaf90b2155b5e413d397085081fb552282f58ca..1bfe48847d815a77ed26b562f909be4afb007ce3 100644 (file)
@@ -50,6 +50,7 @@
 #include <kperf/action.h>
 #include <kperf/context.h>
 #include <kperf/ast.h>
+#include <kperf/kperf_kpc.h>
 
 #define ACTION_MAX 32
 
@@ -65,11 +66,21 @@ struct action
 static unsigned actionc = 0;
 static struct action *actionv = NULL;
 
+/* manage callbacks from system */
+
+/* callback set for kdebug */
+static int kperf_kdbg_callback_set = 0;
 /* whether to record callstacks on kdebug events */
-static int kdebug_callstack_action = 0;
+static int kdebug_callstacks = 0;
+/* the action ID to trigger on signposts */
+static int kperf_signpost_action = 0;
 
-/* whether we get a callback on a thread switch */
-int  kperf_cswitch_hook = 0;
+/* callback set for context-switch */
+int kperf_cswitch_callback_set = 0;
+/* should emit tracepoint on context switch */
+static int kdebug_cswitch = 0;
+/* the action ID to trigger on context switches */
+static int kperf_cswitch_action = 0;
 
 /* indirect hooks to play nice with CHUD for the transition to kperf */
 kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t fn);
@@ -78,10 +89,10 @@ kern_return_t chudxnu_kdebug_callback_cancel(void);
 /* Do the real work! */
 /* this can be called in any context ... right? */
 static kern_return_t
-kperf_sample_internal( struct kperf_sample *sbuf,
-                       struct kperf_context *context,
-                       unsigned sample_what, unsigned sample_flags,
-                       unsigned actionid )
+kperf_sample_internal(struct kperf_sample *sbuf,
+                      struct kperf_context *context,
+                      unsigned sample_what, unsigned sample_flags,
+                      unsigned actionid)
 {
        boolean_t enabled;
        int did_ucallstack = 0, did_tinfo_extra = 0;
@@ -90,8 +101,9 @@ kperf_sample_internal( struct kperf_sample *sbuf,
        /* not much point continuing here, but what to do ? return
         * Shutdown? cut a tracepoint and continue?
         */
-       if( sample_what == 0 )
+       if (sample_what == 0) {
                return SAMPLE_CONTINUE;
+       }
 
        int is_kernel = (context->cur_pid == 0);
 
@@ -100,101 +112,120 @@ kperf_sample_internal( struct kperf_sample *sbuf,
        sbuf->ucallstack.nframes = 0;
        sbuf->ucallstack.flags = CALLSTACK_VALID;
 
-       /*  an event occurred. Sample everything and dump it in a
-        *  buffer.
+       /* an event occurred. Sample everything and dump it in a
+        * buffer.
         */
 
        /* collect data from samplers */
-       if( sample_what & SAMPLER_TINFO ) {
-               kperf_threadinfo_sample( &sbuf->threadinfo, context );
-               
+       if (sample_what & SAMPLER_TINFO) {
+               kperf_threadinfo_sample(&sbuf->threadinfo, context);
+
                /* See if we should drop idle thread samples */
-               if( !(sample_flags & SAMPLE_FLAG_IDLE_THREADS) )
-                       if (sbuf->threadinfo.runmode & 0x40)
+               if (!(sample_flags & SAMPLE_FLAG_IDLE_THREADS)) {
+                       if (sbuf->threadinfo.runmode & 0x40) {
                                return SAMPLE_CONTINUE;
+                       }
+               }
        }
 
-       if( (sample_what & SAMPLER_KSTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) )
-               kperf_kcallstack_sample( &sbuf->kcallstack, context );
+       if ((sample_what & SAMPLER_KSTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK)) {
+               kperf_kcallstack_sample(&(sbuf->kcallstack), context);
+       }
 
        /* sensitive ones */
-       if ( !is_kernel ) {
-               if( sample_flags & SAMPLE_FLAG_PEND_USER )
-               {
-                       if( (sample_what & SAMPLER_USTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) )
-                               did_ucallstack = kperf_ucallstack_pend( context );
-
-                       if( sample_what & SAMPLER_TINFOEX )
-                               did_tinfo_extra = kperf_threadinfo_extra_pend( context );
+       if (!is_kernel) {
+               if (sample_what & SAMPLER_MEMINFO) {
+                       kperf_meminfo_sample(&(sbuf->meminfo), context);
                }
-               else
-               {
-                       if( (sample_what & SAMPLER_USTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) )
-                               kperf_ucallstack_sample( &sbuf->ucallstack, context );
-
-                       if( sample_what & SAMPLER_TINFOEX )
-                               kperf_threadinfo_extra_sample( &sbuf->tinfo_ex,
-                                                              context );
+
+               if (sample_flags & SAMPLE_FLAG_PEND_USER) {
+                       if ((sample_what & SAMPLER_USTACK)
+                           && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK))
+                       {
+                               did_ucallstack = kperf_ucallstack_pend(context);
+                       }
+
+                       if (sample_what & SAMPLER_TINFOEX) {
+                               did_tinfo_extra = kperf_threadinfo_extra_pend(context);
+                       }
+               } else {
+                       if ((sample_what & SAMPLER_USTACK)
+                           && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK))
+                       {
+                               kperf_ucallstack_sample(&(sbuf->ucallstack), context);
+                       }
+
+                       if (sample_what & SAMPLER_TINFOEX) {
+                               kperf_threadinfo_extra_sample(&(sbuf->tinfo_ex),
+                                                             context);
+                       }
                }
        }
 
-#if KPC
-       if ( sample_what & SAMPLER_PMC_CPU )
-               kperf_kpc_cpu_sample( &sbuf->kpcdata, 
-                                     (sample_what & SAMPLER_PMC_CPU) != 0 );
-#endif
+       if (sample_what & SAMPLER_PMC_THREAD) {
+               kperf_kpc_thread_sample(&(sbuf->kpcdata), sample_what);
+       } else if (sample_what & SAMPLER_PMC_CPU) {
+               kperf_kpc_cpu_sample(&(sbuf->kpcdata), sample_what);
+       }
 
        /* lookup the user tag, if any */
-       if( actionid 
-           && (actionid <= actionc) )
-               userdata = actionv[actionid-1].userdata;
-       else
+       if (actionid && (actionid <= actionc)) {
+               userdata = actionv[actionid - 1].userdata;
+       } else {
                userdata = actionid;
+       }
 
        /* stash the data into the buffer
         * interrupts off to ensure we don't get split
         */
        enabled = ml_set_interrupts_enabled(FALSE);
 
-       BUF_DATA( PERF_GEN_EVENT | DBG_FUNC_START, sample_what, 
-                  actionid, userdata, sample_flags );
+       BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_START, sample_what,
+                actionid, userdata, sample_flags);
 
        /* dump threadinfo */
-       if( sample_what & SAMPLER_TINFO )
+       if (sample_what & SAMPLER_TINFO) {
                kperf_threadinfo_log( &sbuf->threadinfo );
+       }
 
        /* dump kcallstack */
-       if( sample_what & SAMPLER_KSTACK )
+       if (sample_what & SAMPLER_KSTACK) {
                kperf_kcallstack_log( &sbuf->kcallstack );
-
+       }
 
        /* dump user stuff */
-       if ( !is_kernel ) {
-               if ( sample_flags & SAMPLE_FLAG_PEND_USER )
-               {
-                       if ( did_ucallstack )
-                               BUF_INFO1( PERF_CS_UPEND, 0 );
-
-                       if ( did_tinfo_extra )
-                               BUF_INFO1( PERF_TI_XPEND, 0 );
+       if (!is_kernel) {
+               /* dump meminfo */
+               if (sample_what & SAMPLER_MEMINFO) {
+                       kperf_meminfo_log(&(sbuf->meminfo));
                }
-               else
-               {
-                       if( sample_what & SAMPLER_USTACK )
-                               kperf_ucallstack_log( &sbuf->ucallstack );
 
-                       if( sample_what & SAMPLER_TINFOEX )
-                               kperf_threadinfo_extra_log( &sbuf->tinfo_ex );
+               if (sample_flags & SAMPLE_FLAG_PEND_USER) {
+                       if (did_ucallstack) {
+                               BUF_INFO1(PERF_CS_UPEND, 0);
+                       }
+
+                       if (did_tinfo_extra) {
+                               BUF_INFO1(PERF_TI_XPEND, 0);
+                       }
+               } else {
+                       if (sample_what & SAMPLER_USTACK) {
+                               kperf_ucallstack_log(&(sbuf->ucallstack));
+                       }
+
+                       if (sample_what & SAMPLER_TINFOEX) {
+                               kperf_threadinfo_extra_log(&(sbuf->tinfo_ex));
+                       }
                }
        }
 
-#if KPC
-       if ( sample_what & SAMPLER_PMC_CPU )
-               kperf_kpc_cpu_log( &sbuf->kpcdata );
-       
-#endif
+       if (sample_what & SAMPLER_PMC_THREAD) {
+               kperf_kpc_thread_log(&(sbuf->kpcdata));
+       } else if (sample_what & SAMPLER_PMC_CPU) {
+               kperf_kpc_cpu_log(&(sbuf->kpcdata));
+       }
 
-       BUF_DATA1( PERF_GEN_EVENT | DBG_FUNC_END, sample_what );
+       BUF_DATA1(PERF_GEN_EVENT | DBG_FUNC_END, sample_what);
 
        /* intrs back on */
        ml_set_interrupts_enabled(enabled);
@@ -204,36 +235,37 @@ kperf_sample_internal( struct kperf_sample *sbuf,
 
 /* Translate actionid into sample bits and take a sample */
 kern_return_t
-kperf_sample( struct kperf_sample *sbuf,
-             struct kperf_context *context,
-              unsigned actionid, unsigned sample_flags )
+kperf_sample(struct kperf_sample *sbuf,
+             struct kperf_context *context,
+             unsigned actionid, unsigned sample_flags)
 {
        unsigned sample_what = 0;
        int pid_filter;
 
        /* work out what to sample, if anything */
-       if( (actionid > actionc) || (actionid == 0) )
+       if ((actionid > actionc) || (actionid == 0)) {
                return SAMPLE_SHUTDOWN;
+       }
 
        /* check the pid filter against the context's current pid.
         * filter pid == -1 means any pid
         */
-       pid_filter = actionv[actionid-1].pid_filter;
-       if( (pid_filter != -1)
-           && (pid_filter != context->cur_pid) )
+       pid_filter = actionv[actionid - 1].pid_filter;
+       if ((pid_filter != -1) && (pid_filter != context->cur_pid)) {
                return SAMPLE_CONTINUE;
+       }
 
        /* the samplers to run */
-       sample_what = actionv[actionid-1].sample;
+       sample_what = actionv[actionid - 1].sample;
 
        /* do the actual sample operation */
-       return kperf_sample_internal( sbuf, context, sample_what, 
-                                     sample_flags, actionid );
+       return kperf_sample_internal(sbuf, context, sample_what,
+                                    sample_flags, actionid);
 }
 
 /* ast callback on a thread */
 void
-kperf_thread_ast_handler( thread_t thread )
+kperf_thread_ast_handler(thread_t thread)
 {
        int r;
        uint32_t t_chud;
@@ -245,7 +277,7 @@ kperf_thread_ast_handler( thread_t thread )
 
        /* use ~2kb of the stack for the sample, should be ok since we're in the ast */
        struct kperf_sample sbuf;
-       bzero(&sbuf, sizeof(struct kperf_sample));
+       memset(&sbuf, 0, sizeof(struct kperf_sample));
 
        /* make a context, take a sample */
        struct kperf_context ctx;
@@ -253,58 +285,57 @@ kperf_thread_ast_handler( thread_t thread )
        ctx.cur_pid = -1;
 
        task = chudxnu_task_for_thread(thread);
-       if(task)
+       if (task) {
                ctx.cur_pid = chudxnu_pid_for_task(task);
+       }
 
        /* decode the chud bits so we know what to sample */
        t_chud = kperf_get_thread_bits(thread);
-       
-       if (t_chud & T_AST_NAME)
+
+       if (t_chud & T_AST_NAME) {
                sample_what |= SAMPLER_TINFOEX;
-       
-       if (t_chud & T_AST_CALLSTACK)
-       {
+       }
+
+       if (t_chud & T_AST_CALLSTACK) {
                sample_what |= SAMPLER_USTACK;
                sample_what |= SAMPLER_TINFO;
        }
 
        /* do the sample, just of the user stuff */
-       r = kperf_sample_internal( &sbuf, &ctx, sample_what, 0, 0 );
+       r = kperf_sample_internal(&sbuf, &ctx, sample_what, 0, 0);
 
        BUF_INFO1(PERF_AST_HNDLR | DBG_FUNC_END, r);
 }
 
 /* register AST bits */
 int
-kperf_ast_pend( thread_t cur_thread, uint32_t check_bits,
-               uint32_t set_bits )
+kperf_ast_pend(thread_t cur_thread, uint32_t check_bits,
+               uint32_t set_bits)
 {
        /* pend on the thread */
        uint32_t t_chud, set_done = 0;
+
        /* can only pend on the current thread */
-       if( cur_thread != chudxnu_current_thread() )
+       if (cur_thread != chudxnu_current_thread()) {
                panic("pending to non-current thread");
+       }
 
        /* get our current bits */
        t_chud = kperf_get_thread_bits(cur_thread);
 
        /* see if it's already been done or pended */
-       if( !(t_chud & check_bits ) )
-       {
+       if (!(t_chud & check_bits)) {
                /* set the bit on the thread */
                t_chud |= set_bits;
                kperf_set_thread_bits(cur_thread, t_chud);
 
                /* set the actual AST */
-               kperf_set_thread_ast( cur_thread );
+               kperf_set_thread_ast(cur_thread);
 
                set_done = 1;
        }
 
        return set_done;
-
-//     BUF_INFO3( dbg_code, (uintptr_t)cur_thread, t_chud, set_done );
 }
 
 /*
@@ -316,8 +347,9 @@ kperf_ast_pend( thread_t cur_thread, uint32_t check_bits,
 #define IS_MACH_SYSCALL(debugid)  (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_EXCP_SC)))
 #define IS_VM_FAULT(debugid)      (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_VM)))
 #define IS_BSD_SYSCTLL(debugid)   (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_BSD, DBG_BSD_EXCP_SC)))
-#define IS_APPS_SIGNPOST(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_APPS, DBG_MACH_CHUD)))
-#define IS_MACH_SIGNPOST(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_CHUD)))
+#define IS_APPS_SIGNPOST(debugid) (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_APPS, DBG_MACH_CHUD))
+#define IS_MACH_SIGNPOST(debugid) (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_CHUD))
+#define IS_ENERGYTRACE(debugid)   ((debugid & 0xff000000U) == KDBG_CLASS_ENCODE((unsigned)DBG_ENERGYTRACE, 0U))
 
 void
 kperf_kdebug_callback(uint32_t debugid)
@@ -325,90 +357,139 @@ kperf_kdebug_callback(uint32_t debugid)
        int cur_pid = 0;
        task_t task = NULL;
 
-       /* if we're not doing kperf callback stacks, return */
-       if( !kdebug_callstack_action )
+       if (!kdebug_callstacks && !kperf_signpost_action) {
                return;
+       }
 
        /* if we're looking at a kperf tracepoint, don't recurse */
-       if( (debugid & 0xff000000) == KDBG_CLASS_ENCODE(DBG_PERF, 0) )
+       if ((debugid & 0xff000000) == KDBG_CLASS_ENCODE(DBG_PERF, 0)) {
                return;
+       }
 
        /* ensure interrupts are already off thanks to kdebug */
-       if( ml_get_interrupts_enabled() )
+       if (ml_get_interrupts_enabled()) {
                return;
+       }
 
        /* make sure we're not being called recursively.  */
 #if NOTYET
-       if( kperf_kdbg_recurse(KPERF_RECURSE_IN) )
+       if (kperf_kdbg_recurse(KPERF_RECURSE_IN)) {
                return;
+       }
 #endif
 
        /* check the happy list of trace codes */
-       if( !( IS_MIG(debugid)
-              || IS_MACH_SYSCALL(debugid)
-              || IS_VM_FAULT(debugid)
-              || IS_BSD_SYSCTLL(debugid)
-              || IS_MACH_SIGNPOST(debugid)
-              || IS_APPS_SIGNPOST(debugid) ) )
+       if(!(IS_MIG(debugid)
+            || IS_MACH_SYSCALL(debugid)
+            || IS_VM_FAULT(debugid)
+            || IS_BSD_SYSCTLL(debugid)
+            || IS_MACH_SIGNPOST(debugid)
+            || IS_ENERGYTRACE(debugid)
+            || IS_APPS_SIGNPOST(debugid)))
+       {
                return;
+       }
 
        /* check for kernel */
        thread_t thread = chudxnu_current_thread();
        task = chudxnu_task_for_thread(thread);
-       if(task)
+       if (task) {
                cur_pid = chudxnu_pid_for_task(task);
-       if( !cur_pid )
+       }
+       if (!cur_pid) {
                return;
+       }
+
+       if (kdebug_callstacks) {
+               /* dicing with death */
+               BUF_INFO2(PERF_KDBG_HNDLR, debugid, cur_pid);
+
+               /* pend the AST */
+               kperf_ast_pend( thread, T_AST_CALLSTACK, T_AST_CALLSTACK );
+       }
 
+       if (kperf_signpost_action && (IS_MACH_SIGNPOST(debugid)
+           || IS_APPS_SIGNPOST(debugid)))
+       {
 #if NOTYET
-       /* setup a context */
-       struct kperf_context ctx;
-       struct kperf_sample *intbuf = NULL;
+               /* make sure we're not being called recursively.  */
+               if(kperf_kdbg_recurse(KPERF_RECURSE_IN)) {
+                       return;
+               }
+#endif
 
-       ctx.cur_thread = thread;
-       ctx.cur_pid = cur_pid;
-       ctx.trigger_type = TRIGGER_TYPE_TRACE;
-       ctx.trigger_id = 0;
-
-       /* CPU sample buffer -- only valid with interrupts off (above)
-        * Technically this isn't true -- tracepoints can, and often
-        * are, cut from interrupt handlers, but none of those tracepoints
-        * should make it this far.
-        */
-       intbuf = kperf_intr_sample_buffer();
-
-       /* do the sample */
-       kperf_sample( intbuf, &ctx, kdebug_callstack_action, SAMPLE_FLAG_PEND_USER );
-       
-       /* no longer recursive */
-       kperf_kdbg_recurse(KPERF_RECURSE_OUT);
-#else
-       /* dicing with death */
-       BUF_INFO2(PERF_KDBG_HNDLR, debugid, cur_pid);
-
-       /* pend the AST */
-       kperf_ast_pend( thread, T_AST_CALLSTACK, T_AST_CALLSTACK );
+               /* setup a context */
+               struct kperf_context ctx;
+               struct kperf_sample *intbuf = NULL;
+               BUF_INFO2(PERF_SIGNPOST_HNDLR | DBG_FUNC_START, debugid, cur_pid);
+
+               ctx.cur_thread = thread;
+               ctx.cur_pid = cur_pid;
+               ctx.trigger_type = TRIGGER_TYPE_TRACE;
+               ctx.trigger_id = 0;
+
+               /* CPU sample buffer -- only valid with interrupts off (above)
+               * Technically this isn't true -- tracepoints can, and often
+               * are, cut from interrupt handlers, but none of those tracepoints
+               * should make it this far.
+               */
+               intbuf = kperf_intr_sample_buffer();
+
+               /* do the sample */
+               kperf_sample(intbuf, &ctx, kperf_signpost_action,
+                            SAMPLE_FLAG_PEND_USER);
+
+               BUF_INFO2(PERF_SIGNPOST_HNDLR | DBG_FUNC_END, debugid, cur_pid);
+#if NOTYET
+               /* no longer recursive */
+               kperf_kdbg_recurse(KPERF_RECURSE_OUT);
 #endif
+       }
+}
+
+static void
+kperf_kdbg_callback_update(void)
+{
+       unsigned old_callback_set = kperf_kdbg_callback_set;
+
+       /* compute new callback state */
+       kperf_kdbg_callback_set = kdebug_callstacks || kperf_signpost_action;
 
+       if (old_callback_set && !kperf_kdbg_callback_set) {
+               /* callback should no longer be set */
+               chudxnu_kdebug_callback_cancel();
+       } else if (!old_callback_set && kperf_kdbg_callback_set) {
+               /* callback must now be set */
+               chudxnu_kdebug_callback_enter(NULL);
+       }
 }
 
 int
 kperf_kdbg_get_stacks(void)
 {
-       return kdebug_callstack_action;
+       return kdebug_callstacks;
 }
 
 int
 kperf_kdbg_set_stacks(int newval)
 {
-       /* set the value */
-       kdebug_callstack_action = newval;
+       kdebug_callstacks = newval;
+       kperf_kdbg_callback_update();
 
-       /* enable the callback from kdebug */
-       if( newval )
-               chudxnu_kdebug_callback_enter(NULL);
-       else
-               chudxnu_kdebug_callback_cancel();
+       return 0;
+}
+
+int
+kperf_signpost_action_get(void)
+{
+       return kperf_signpost_action;
+}
+
+int
+kperf_signpost_action_set(int newval)
+{
+       kperf_signpost_action = newval;
+       kperf_kdbg_callback_update();
 
        return 0;
 }
@@ -419,7 +500,7 @@ kperf_kdbg_set_stacks(int newval)
 
 /* called from context switch handler */
 void
-kperf_switch_context( __unused thread_t old, thread_t new )
+kperf_switch_context(__unused thread_t old, thread_t new)
 {
        task_t task = get_threadtask(new);
        int pid = chudxnu_pid_for_task(task);
@@ -427,7 +508,81 @@ kperf_switch_context( __unused thread_t old, thread_t new )
        /* cut a tracepoint to tell us what the new thread's PID is
         * for Instruments
         */
-       BUF_DATA2( PERF_TI_CSWITCH, thread_tid(new), pid );
+       BUF_DATA2(PERF_TI_CSWITCH, thread_tid(new), pid);
+
+       /* trigger action after counters have been updated */
+       if (kperf_cswitch_action) {
+               struct kperf_sample sbuf;
+               struct kperf_context ctx;
+               int r;
+
+               BUF_DATA1(PERF_CSWITCH_HNDLR | DBG_FUNC_START, 0);
+
+               ctx.cur_pid = 0;
+               ctx.cur_thread = old;
+
+               /* get PID for context */
+               task_t old_task = chudxnu_task_for_thread(ctx.cur_thread);
+               if (old_task) {
+                       ctx.cur_pid = chudxnu_pid_for_task(old_task);
+               }
+
+               ctx.trigger_type = TRIGGER_TYPE_CSWITCH;
+               ctx.trigger_id = 0;
+
+               r = kperf_sample(&sbuf, &ctx, kperf_cswitch_action,
+                                    SAMPLE_FLAG_PEND_USER);
+
+               BUF_INFO1(PERF_CSWITCH_HNDLR | DBG_FUNC_END, r);
+       }
+}
+
+static void
+kperf_cswitch_callback_update(void)
+{
+       unsigned old_callback_set = kperf_cswitch_callback_set;
+
+       unsigned new_callback_set = kdebug_cswitch || kperf_cswitch_action;
+
+       if (old_callback_set && !new_callback_set) {
+               kperf_cswitch_callback_set = 0;
+       } else if (!old_callback_set && new_callback_set) {
+               kperf_cswitch_callback_set = 1;
+       } else {
+               return;
+       }
+
+       kperf_kpc_cswitch_callback_update();
+}
+
+int
+kperf_kdbg_cswitch_get(void)
+{
+       return kdebug_cswitch;
+}
+
+int
+kperf_kdbg_cswitch_set(int newval)
+{
+       kdebug_cswitch = newval;
+       kperf_cswitch_callback_update();
+
+       return 0;
+}
+
+int
+kperf_cswitch_action_get(void)
+{
+       return kperf_cswitch_action;
+}
+
+int
+kperf_cswitch_action_set(int newval)
+{
+       kperf_cswitch_action = newval;
+       kperf_cswitch_callback_update();
+
+       return 0;
 }
 
 /*
@@ -440,78 +595,91 @@ kperf_action_get_count(void)
 }
 
 int
-kperf_action_set_samplers( unsigned actionid, uint32_t samplers )
+kperf_action_set_samplers(unsigned actionid, uint32_t samplers)
 {
-       if( (actionid > actionc) || (actionid == 0) )
+       if ((actionid > actionc) || (actionid == 0)) {
+               return EINVAL;
+       }
+
+       /* disallow both CPU and thread counters to be sampled in the same
+        * action */
+       if ((samplers & SAMPLER_PMC_THREAD) && (samplers & SAMPLER_PMC_CPU)) {
                return EINVAL;
+       }
 
-       actionv[actionid-1].sample = samplers;
+       actionv[actionid - 1].sample = samplers;
 
        return 0;
 }
 
 int
-kperf_action_get_samplers( unsigned actionid, uint32_t *samplers_out )
+kperf_action_get_samplers(unsigned actionid, uint32_t *samplers_out)
 {
-       if( (actionid > actionc) )
+       if ((actionid > actionc)) {
                return EINVAL;
+       }
 
-       if( actionid == 0 )
+       if (actionid == 0) {
                *samplers_out = 0; /* "NULL" action */
-       else
-               *samplers_out = actionv[actionid-1].sample;
+       } else {
+               *samplers_out = actionv[actionid - 1].sample;
+       }
 
        return 0;
 }
 
 int
-kperf_action_set_userdata( unsigned actionid, uint32_t userdata )
+kperf_action_set_userdata(unsigned actionid, uint32_t userdata)
 {
-       if( (actionid > actionc) || (actionid == 0) )
+       if ((actionid > actionc) || (actionid == 0)) {
                return EINVAL;
+       }
 
-       actionv[actionid-1].userdata = userdata;
+       actionv[actionid - 1].userdata = userdata;
 
        return 0;
 }
 
 int
-kperf_action_get_userdata( unsigned actionid, uint32_t *userdata_out )
+kperf_action_get_userdata(unsigned actionid, uint32_t *userdata_out)
 {
-       if( (actionid > actionc) )
+       if ((actionid > actionc)) {
                return EINVAL;
+       }
 
-       if( actionid == 0 )
+       if (actionid == 0) {
                *userdata_out = 0; /* "NULL" action */
-       else
-               *userdata_out = actionv[actionid-1].userdata;
+       } else {
+               *userdata_out = actionv[actionid - 1].userdata;
+       }
 
        return 0;
 }
 
 int
-kperf_action_set_filter( unsigned actionid,
-                        int pid )
+kperf_action_set_filter(unsigned actionid, int pid)
 {
-       if( (actionid > actionc) || (actionid == 0) )
+       if ((actionid > actionc) || (actionid == 0)) {
                return EINVAL;
+       }
 
-       actionv[actionid-1].pid_filter = pid;
+       actionv[actionid - 1].pid_filter = pid;
 
        return 0;
 }
 
 int
-kperf_action_get_filter( unsigned actionid,
-                        int *pid_out )
+kperf_action_get_filter(unsigned actionid, int *pid_out)
 {
-       if( (actionid > actionc) )
+       if ((actionid > actionc)) {
                return EINVAL;
+       }
 
-       if( actionid == 0 )
+       if (actionid == 0) {
                *pid_out = -1; /* "NULL" action */
-       else
-               *pid_out = actionv[actionid-1].pid_filter;
+       } else {
+               *pid_out = actionv[actionid - 1].pid_filter;
+       }
 
        return 0;
 }
@@ -523,50 +691,57 @@ kperf_action_set_count(unsigned count)
        unsigned old_count, i;
 
        /* easy no-op */
-       if( count == actionc )
+       if (count == actionc) {
                return 0;
+       }
 
        /* TODO: allow shrinking? */
-       if( count < actionc )
+       if (count < actionc) {
                return EINVAL;
+       }
 
        /* cap it for good measure */
-       if( count > ACTION_MAX )
+       if (count > ACTION_MAX) {
                return EINVAL;
+       }
 
        /* creating the action arror for the first time. create a few
         * more things, too.
         */
-               if( actionc == 0 )
-       {
+       if (actionc == 0) {
                int r;
                r = kperf_init();
 
-               if( r != 0 )
+               if (r != 0) {
                        return r;
+               }
        }
 
        /* create a new array */
-       new_actionv = kalloc( count * sizeof(*new_actionv) );
-       if( new_actionv == NULL )
+       new_actionv = kalloc(count * sizeof(*new_actionv));
+       if (new_actionv == NULL) {
                return ENOMEM;
+       }
 
        old_actionv = actionv;
        old_count = actionc;
 
-       if( old_actionv != NULL )
-               bcopy( actionv, new_actionv, actionc * sizeof(*actionv) );
+       if (old_actionv != NULL) {
+               memcpy(new_actionv, actionv, actionc * sizeof(*actionv));
+       }
 
-       bzero( &new_actionv[actionc], (count - old_count) * sizeof(*actionv) );
+       memset(&(new_actionv[actionc]), 0, (count - old_count) * sizeof(*actionv));
 
-       for( i = old_count; i < count; i++ )
+       for (i = old_count; i < count; i++) {
                new_actionv[i].pid_filter = -1;
+       }
 
        actionv = new_actionv;
        actionc = count;
 
-       if( old_actionv != NULL )
-               kfree( old_actionv, old_count * sizeof(*actionv) );
+       if (old_actionv != NULL) {
+               kfree(old_actionv, old_count * sizeof(*actionv));
+       }
 
        return 0;
 }
index 1ea3f31695ee74ca3b2b31a46b2589e1a3e7e03d..01f103f5c1ce82ac9ab6263925040b34dbf0d6ac 100644 (file)
 struct kperf_sample;
 struct kperf_context;
 
-
 /* bits for defining what to do on an action */
 #define SAMPLER_TINFO      (1<<0)
 #define SAMPLER_TINFOEX    (1<<1)
 #define SAMPLER_KSTACK     (1<<2)
 #define SAMPLER_USTACK     (1<<3)
-#define SAMPLER_PMC_THREAD (1<<4) /* FIXME: not implemented */
+#define SAMPLER_PMC_THREAD (1<<4)
 #define SAMPLER_PMC_CPU    (1<<5)
 #define SAMPLER_PMC_CONFIG (1<<6)
+#define SAMPLER_MEMINFO    (1<<7)
 
 /* flags for sample calls*/
 #define SAMPLE_FLAG_PEND_USER    (1<<0)
@@ -46,10 +46,10 @@ struct kperf_context;
 #define SAMPLE_FLAG_EMPTY_CALLSTACK (1<<2)
 
 /*  Take a sample into "sbuf" using current thread "cur_thread" */
-extern kern_return_t kperf_sample( struct kperf_sample *sbuf, 
-                                  struct kperf_context*, 
-                                   unsigned actionid,
-                                   unsigned sample_flags );
+extern kern_return_t kperf_sample(struct kperf_sample *sbuf,
+                                  struct kperf_context*,
+                                  unsigned actionid,
+                                  unsigned sample_flags);
 
 /* return codes from taking a sample
  * either keep trigger, or something went wrong (or we're shutting down)
@@ -68,17 +68,17 @@ extern struct kperf_sample* kperf_intr_sample_buffer(void);
 extern unsigned kperf_action_get_count(void);
 extern int kperf_action_set_count(unsigned count);
 
-extern int kperf_action_set_samplers( unsigned actionid,
-                                      uint32_t samplers );
-extern int kperf_action_get_samplers( unsigned actionid,
-                                      uint32_t *samplers_out );
+extern int kperf_action_set_samplers(unsigned actionid,
+                                     uint32_t samplers);
+extern int kperf_action_get_samplers(unsigned actionid,
+                                     uint32_t *samplers_out);
 
-extern int kperf_action_set_userdata( unsigned actionid,
-                                      uint32_t userdata );
-extern int kperf_action_get_userdata( unsigned actionid,
-                                      uint32_t *userdata_out );
+extern int kperf_action_set_userdata(unsigned actionid,
+                                     uint32_t userdata);
+extern int kperf_action_get_userdata(unsigned actionid,
+                                     uint32_t *userdata_out);
 
-extern int kperf_action_set_filter( unsigned actionid,
-                                   int pid );
-extern int kperf_action_get_filter( unsigned actionid,
-                                   int *pid_out );
+extern int kperf_action_set_filter(unsigned actionid,
+                                   int pid);
+extern int kperf_action_get_filter(unsigned actionid,
+                                   int *pid_out);
index 28673a5a98fe826e7a238ad3865182c75dd1ad48..b46d46cec29200b8386d482f0ad7bfd1df719d61 100644 (file)
@@ -42,6 +42,9 @@
 #define PERF_AST        (5)
 #define PERF_KPC        (6)
 #define PERF_KDBG       (7)
+#define PERF_CSWITCH    (8)
+#define PERF_SIGNPOST   (9)
+#define PERF_MEMINFO    (10)
 
 /* sub-class codes */
 #define PERF_GEN_CODE(code) PERF_CODE(PERF_GENERIC, code)
 #define PERF_AST_HNDLR      PERF_AST_CODE(0)
 #define PERF_AST_ERROR      PERF_AST_CODE(1)
 
-#define PERF_KPC_CODE(code) PERF_CODE(PERF_KPC, code)
-#define PERF_KPC_HNDLR      PERF_KPC_CODE(0)
-#define PERF_KPC_FCOUNTER   PERF_KPC_CODE(1)
-#define PERF_KPC_COUNTER    PERF_KPC_CODE(2)
-#define PERF_KPC_DATA       PERF_KPC_CODE(3)
-#define PERF_KPC_CONFIG     PERF_KPC_CODE(4)
-#define PERF_KPC_CFG_REG    PERF_KPC_CODE(5)
-#define PERF_KPC_DATA32     PERF_KPC_CODE(6)
-#define PERF_KPC_CFG_REG32  PERF_KPC_CODE(7)
+#define PERF_KPC_CODE(code)    PERF_CODE(PERF_KPC, code)
+#define PERF_KPC_HNDLR         PERF_KPC_CODE(0)
+#define PERF_KPC_FCOUNTER      PERF_KPC_CODE(1)
+#define PERF_KPC_COUNTER       PERF_KPC_CODE(2)
+#define PERF_KPC_DATA          PERF_KPC_CODE(3)
+#define PERF_KPC_CONFIG        PERF_KPC_CODE(4)
+#define PERF_KPC_CFG_REG       PERF_KPC_CODE(5)
+#define PERF_KPC_DATA32        PERF_KPC_CODE(6)
+#define PERF_KPC_CFG_REG32     PERF_KPC_CODE(7)
+#define PERF_KPC_DATA_THREAD   PERF_KPC_CODE(8)
+#define PERF_KPC_DATA_THREAD32 PERF_KPC_CODE(9)
 
 #define PERF_KDBG_CODE(code) PERF_CODE(PERF_KDBG, code)
 #define PERF_KDBG_HNDLR      PERF_KDBG_CODE(0)
 
+#define PERF_CSWITCH_CODE(code) PERF_CODE(PERF_CSWITCH, code)
+#define PERF_CSWITCH_HNDLR      PERF_CSWITCH_CODE(0)
+
+#define PERF_SIGNPOST_CODE(code) PERF_CODE(PERF_SIGNPOST, code)
+#define PERF_SIGNPOST_HNDLR      PERF_SIGNPOST_CODE(0)
+
+#define PERF_MI_CODE(code) PERF_CODE(PERF_MEMINFO, code)
+#define PERF_MI_SAMPLE     PERF_MI_CODE(0)
+#define PERF_MI_DATA       PERF_MI_CODE(1)
+
 /* error sub-codes for trace data */
 enum
 {
index 4bedab65baf67f62a4600d7028110dec3e8941dc..89bf40f72d4204a3dd4dfc68f5b32eff51166778 100644 (file)
@@ -53,7 +53,7 @@ callstack_sample( struct callstack *cs,
        else
                code = PERF_CS_KSAMPLE;
 
-       BUF_INFO1( code, (uintptr_t)context->cur_thread );
+       BUF_INFO1( code, (uintptr_t)thread_tid(context->cur_thread) );
 
        /* fill out known flags */
        cs->flags = 0;
@@ -95,7 +95,7 @@ callstack_sample( struct callstack *cs,
                cs->nframes = 0;
        }
 
-       if( cs->nframes >= MAX_CALLSTACK_FRAMES )
+       if( cs->nframes > MAX_CALLSTACK_FRAMES )
        {
                /* necessary? */
                BUF_INFO1(PERF_CS_ERROR, ERR_FRAMES);
index e06b9f9784fa9eb902de73bf007d9e78eabbd86e..f1e232aa1bb554a6fb70dfb7e550bc3fc5785a83 100644 (file)
@@ -26,6 +26,8 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+#include <kern/thread.h>
+
 /* context of what we're looking at */
 struct kperf_context
 {
index 12b168acd8498d6cb3b234667bfa5872c328cbe3..01c976d73d0d8b35f424c239b1c2d80185a3d4d8 100644 (file)
 #ifndef __KPERF_H__
 #define __KPERF_H__
 
+#include <kern/thread.h>
+
 /* The various trigger types supported by kperf */
-#define TRIGGER_TYPE_TIMER (0)
-#define TRIGGER_TYPE_PMI   (1)
-#define TRIGGER_TYPE_TRACE (2)
+#define TRIGGER_TYPE_TIMER   (0)
+#define TRIGGER_TYPE_PMI     (1)
+#define TRIGGER_TYPE_TRACE   (2)
+#define TRIGGER_TYPE_CSWITCH (3)
 
 /* Helpers to get and set AST bits on a thread */
 extern uint32_t kperf_get_thread_bits( thread_t thread );
@@ -66,6 +69,20 @@ extern void kperf_kdebug_callback(uint32_t debugid);
 extern int kperf_kdbg_get_stacks(void);
 extern int kperf_kdbg_set_stacks(int);
 
+/* get and set whether to trigger an action on signposts */
+extern int kperf_signpost_action_get(void);
+extern int kperf_signpost_action_set(int newval);
+
+extern int kperf_cswitch_callback_set;
+
+/* get and set whether to output tracepoints on context-switch */
+extern int kperf_kdbg_cswitch_get(void);
+extern int kperf_kdbg_cswitch_set(int newval);
+
+/* get and set whether to trigger an action on context-switch */
+extern int kperf_cswitch_action_get(void);
+extern int kperf_cswitch_action_set(int newval);
+
 /* given a task port, find out its pid */
 int kperf_port_to_pid(mach_port_name_t portname);
 
@@ -77,10 +94,9 @@ extern int kperf_access_check(void);
 /* track recursion on kdebug tracepoint tracking */
 extern int kperf_kdbg_recurse(int step);
 #define KPERF_RECURSE_IN  (1)
-#define KPERF_RECURSE_out (-1)
+#define KPERF_RECURSE_OUT (-1)
 
 /* context switch tracking */
-extern int  kperf_cswitch_hook;
 extern void kperf_switch_context( thread_t old, thread_t new );
 
 /* bootstrap */
index 9019d6562170155afe8db56834d7427150e9fa2f..7142c3fe93277312201760ffcd02e2272926daf7 100644 (file)
@@ -38,5 +38,6 @@
 /* common definitions */
 extern int kperf_mp_broadcast( void (*func)(void*), void *arg );
 extern int kperf_mp_signal(void);
+extern kern_return_t kperf_get_phys_footprint(task_t, uint64_t *);
 
 #endif /* _KPERF_ARCH_H */
index fc474262f0ff42ec9c3063e3f23b15aa33b4a4ff..b0a4c0a7a586730ee3d73338406984e5366aa7ec 100644 (file)
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+/*  Sample KPC data into kperf and manage a shared context-switch handler */
 
-/*  Sample KPC data into kperf */
-
-#include <mach/mach_types.h>
-#include <kern/thread.h> /* thread_* */
-#include <kern/debug.h> /* panic */
-// #include <sys/proc.h>
-
-#include <chud/chud_xnu.h>
 #include <kperf/kperf.h>
-
 #include <kperf/buffer.h>
 #include <kperf/context.h>
-
 #include <kperf/kperf_kpc.h>
+#include <kern/kpc.h> /* kpc_cswitch_context, kpc_threads_counting */
 
-/* If we have kperf enabled, but not KPC */
-#if KPC
+unsigned kperf_kpc_cswitch_set = 0;
 
 void
-kperf_kpc_cpu_sample( struct kpcdata *kpcd, int sample_config )
+kperf_kpc_switch_context(thread_t old, thread_t new)
 {
-       kpcd->running  = kpc_get_running();
-       kpcd->counterc = kpc_get_cpu_counters(0, kpcd->running,
-                                             &kpcd->curcpu, kpcd->counterv);
-       if( !sample_config )
+       if (kpc_threads_counting) {
+               kpc_switch_context(old, new);
+       }
+       if (kperf_cswitch_callback_set) {
+               kperf_switch_context(old, new);
+       }
+}
+
+void
+kperf_kpc_cswitch_callback_update(void)
+{
+       kperf_kpc_cswitch_set = kperf_cswitch_callback_set ||
+                               kpc_threads_counting;
+}
+
+void
+kperf_kpc_thread_sample(struct kpcdata *kpcd, int sample_config)
+{
+       kpcd->running = kpc_get_running();
+       /* let kpc_get_curthread_counters set the correct count */
+       kpcd->counterc = KPC_MAX_COUNTERS;
+       if (kpc_get_curthread_counters(&kpcd->counterc,
+                                      kpcd->counterv)) {
+               /* if thread counters aren't ready, default to 0 */
+               memset(kpcd->counterv, 0,
+                      sizeof(uint64_t) * kpcd->counterc);
+       }
+       /* help out Instruments */
+       if (!sample_config) {
                kpcd->configc = 0;
-       else
-       {
+       } else {
                kpcd->configc = kpc_get_config_count(kpcd->running);
                kpc_get_config(kpcd->running, kpcd->configv);
        }
-       
 }
 
 void
-kperf_kpc_cpu_log( struct kpcdata *kpcd )
+kperf_kpc_cpu_sample(struct kpcdata *kpcd, int sample_config)
 {
-       unsigned i;
+       kpcd->running  = kpc_get_running();
+       kpcd->counterc = kpc_get_cpu_counters(0, kpcd->running,
+                                             &kpcd->curcpu,
+                                             kpcd->counterv);
+       if (!sample_config) {
+               kpcd->configc = 0;
+       } else {
+               kpcd->configc = kpc_get_config_count(kpcd->running);
+               kpc_get_config(kpcd->running, kpcd->configv);
+       }
+}
 
-       /* cut a config for instruments -- what's running and
-        * how many fixed counters there are
-        */
+static void
+kperf_kpc_config_log(const struct kpcdata *kpcd)
+{
        BUF_DATA(PERF_KPC_CONFIG,
                 kpcd->running,
                 kpcd->counterc,
                 kpc_get_counter_count(KPC_CLASS_FIXED_MASK),
                 kpcd->configc);
+}
+
+static void
+kperf_kpc_log(uint32_t code, uint32_t code32, const struct kpcdata *kpcd)
+{
+       unsigned i;
 
 #if __LP64__
-       /* config registers, if they were asked for */
-       for (i = 0; i < ((kpcd->configc+3) / 4); i++) {
-               BUF_DATA( PERF_KPC_CFG_REG,
-                         kpcd->configv[0 + i * 4],
-                         kpcd->configv[1 + i * 4],
-                         kpcd->configv[2 + i * 4],
-                         kpcd->configv[3 + i * 4] );
+       (void)code32;
+       /* config registers */
+       for (i = 0; i < ((kpcd->configc + 3) / 4); i++) {
+               BUF_DATA(PERF_KPC_CFG_REG,
+                        kpcd->configv[0 + i * 4],
+                        kpcd->configv[1 + i * 4],
+                        kpcd->configv[2 + i * 4],
+                        kpcd->configv[3 + i * 4]);
        }
 
-       /* and the actual data -- 64-bit trace entries */
-       for (i = 0; i < ((kpcd->counterc+3) / 4); i++) {
-               BUF_DATA( PERF_KPC_DATA,
-                         kpcd->counterv[0 + i * 4],
-                         kpcd->counterv[1 + i * 4],
-                         kpcd->counterv[2 + i * 4],
-                         kpcd->counterv[3 + i * 4] );
+       /* and the actual counts with one 64-bit argument each */
+       for (i = 0; i < ((kpcd->counterc + 3) / 4); i++) {
+               BUF_DATA(code,
+                        kpcd->counterv[0 + i * 4],
+                        kpcd->counterv[1 + i * 4],
+                        kpcd->counterv[2 + i * 4],
+                        kpcd->counterv[3 + i * 4]);
        }
-
 #else
-       /* config registers, if requested */
-       for (i = 0; i < ((kpcd->configc+1) / 2); i++) {
-               BUF_DATA( PERF_KPC_CFG_REG32,
-                         (kpcd->configv[0 + i * 2] >> 32ULL),
-                          kpcd->configv[0 + i * 2] & 0xffffffffULL,
-                         (kpcd->configv[1 + i * 2] >> 32ULL),
-                          kpcd->configv[1 + i * 2] & 0xffffffffULL );
+       (void)code;
+       /* config registers */
+       for (i = 0; i < ((kpcd->configc + 1) / 2); i++) {
+               BUF_DATA(PERF_KPC_CFG_REG32,
+                        (kpcd->configv[0 + i * 2] >> 32ULL),
+                        kpcd->configv[0 + i * 2] & 0xffffffffULL,
+                        (kpcd->configv[1 + i * 2] >> 32ULL),
+                        kpcd->configv[1 + i * 2] & 0xffffffffULL);
        }
 
-       /* and the actual data -- two counters per tracepoint */
-       for (i = 0; i < ((kpcd->counterc+1) / 2); i++) {
-               BUF_DATA( PERF_KPC_DATA32,
-                         (kpcd->counterv[0 + i * 2] >> 32ULL),
-                          kpcd->counterv[0 + i * 2] & 0xffffffffULL,
-                         (kpcd->counterv[1 + i * 2] >> 32ULL),
-                          kpcd->counterv[1 + i * 2] & 0xffffffffULL );
+       /* and the actual counts with two 32-bit trace arguments each */
+       for (i = 0; i < ((kpcd->counterc + 1) / 2); i++) {
+               BUF_DATA(code32,
+                        (kpcd->counterv[0 + i * 2] >> 32ULL),
+                        kpcd->counterv[0 + i * 2] & 0xffffffffULL,
+                        (kpcd->counterv[1 + i * 2] >> 32ULL),
+                        kpcd->counterv[1 + i * 2] & 0xffffffffULL);
        }
 #endif
 }
 
-#endif /* KPC */
+void
+kperf_kpc_cpu_log(const struct kpcdata *kpcd)
+{
+       kperf_kpc_config_log(kpcd);
+       kperf_kpc_log(PERF_KPC_DATA, PERF_KPC_DATA32, kpcd);
+}
+
+void
+kperf_kpc_thread_log(const struct kpcdata *kpcd)
+{
+       kperf_kpc_config_log(kpcd);
+       kperf_kpc_log(PERF_KPC_DATA_THREAD, PERF_KPC_DATA_THREAD32, kpcd);
+}
index 91df04c0fc13bbb6bf856c440edda2c0aba4ee27..d4dc8d814ed6acd1f6980d24699c0462ed151679 100644 (file)
 #include <kern/kpc.h> /* KPC_MAX_COUNTERS */
 #endif
 
+/* controls whether a context-switch handler is invoked */
+extern unsigned kperf_kpc_cswitch_set;
+
+void kperf_kpc_switch_context(thread_t old, thread_t new);
+void kperf_kpc_cswitch_callback_update(void);
+
+/* for osfmk/platform/pcb.c context switches */
+static inline void
+kperf_kpc_cswitch(thread_t old, thread_t new)
+{
+       if (!kperf_kpc_cswitch_set) {
+               return;
+       }
+
+       kperf_kpc_switch_context(old, new);
+}
+
 /* KPC sample data */
 struct kpcdata
 {
@@ -44,8 +61,9 @@ struct kpcdata
        uint64_t configv[KPC_MAX_COUNTERS];
 };
 
-
-void kperf_kpc_cpu_sample( struct kpcdata *, int );
-void kperf_kpc_cpu_log( struct kpcdata * );
+void kperf_kpc_thread_sample(struct kpcdata *, int);
+void kperf_kpc_cpu_sample(struct kpcdata *, int);
+void kperf_kpc_thread_log(const struct kpcdata *);
+void kperf_kpc_cpu_log(const struct kpcdata *);
 
 #endif /* __KPERF_KPC_H__ */
index d712fd0d0c5eb5ac18703b1f0558559acf200275..2a6554ab62af22aef6eec65a51ec96e5497c9afd 100644 (file)
@@ -64,6 +64,9 @@ static boolean_t blessed_preempt = FALSE;
 #define REQ_KDBG_CALLSTACKS (12)
 #define REQ_PET_IDLE_RATE   (13)
 #define REQ_BLESS_PREEMPT   (14)
+#define REQ_KDBG_CSWITCH    (15)
+#define REQ_CSWITCH_ACTION  (16)
+#define REQ_SIGNPOST_ACTION (17)
 
 /* simple state variables */
 int kperf_debug_level = 0;
@@ -450,6 +453,45 @@ sysctl_pet_idle_rate( struct sysctl_oid *oidp, struct sysctl_req *req )
     return error;
 }
 
+static int
+sysctl_kdbg_cswitch( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int value = kperf_kdbg_cswitch_get();
+    int error = sysctl_handle_int(oidp, &value, 0, req);
+
+    if (error || !req->newptr) {
+        return error;
+    }
+
+    return kperf_kdbg_cswitch_set(value);
+}
+
+static int
+sysctl_cswitch_action( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int value = kperf_cswitch_action_get();
+    int error = sysctl_handle_int(oidp, &value, 0, req);
+
+    if (error || !req->newptr) {
+        return error;
+    }
+
+    return kperf_cswitch_action_set(value);
+}
+
+static int
+sysctl_signpost_action( struct sysctl_oid *oidp, struct sysctl_req *req )
+{
+    int value = kperf_signpost_action_get();
+    int error = sysctl_handle_int(oidp, &value, 0, req);
+
+    if (error || !req->newptr) {
+        return error;
+    }
+
+    return kperf_signpost_action_set(value);
+}
+
 /*
  * #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp,         \
  *                                void *arg1, int arg2,                 \
@@ -503,6 +545,9 @@ kperf_sysctl SYSCTL_HANDLER_ARGS
        case REQ_KDBG_CALLSTACKS:
                ret = sysctl_kdbg_callstacks( oidp, req );
                break;
+       case REQ_KDBG_CSWITCH:
+               ret = sysctl_kdbg_cswitch( oidp, req );
+               break;
        case REQ_ACTION_FILTER_BY_TASK:
                ret = sysctl_action_filter( oidp, req, 1 );
                break;
@@ -515,6 +560,12 @@ kperf_sysctl SYSCTL_HANDLER_ARGS
        case REQ_BLESS_PREEMPT:
                ret = sysctl_bless_preempt( oidp, req );
                break;
+       case REQ_CSWITCH_ACTION:
+               ret = sysctl_cswitch_action( oidp, req );
+               break;
+       case REQ_SIGNPOST_ACTION:
+               ret = sysctl_signpost_action( oidp, req );
+               break;
        default:
                ret = ENOENT;
                break;
@@ -548,7 +599,6 @@ kperf_sysctl_bless_handler SYSCTL_HANDLER_ARGS
        return ret;
 }
 
-
 /***************************
  *
  * Access control
@@ -715,21 +765,31 @@ SYSCTL_PROC(_kperf, OID_AUTO, blessed_preempt,
             (void*)REQ_BLESS_PREEMPT, 
             sizeof(int), kperf_sysctl, "I", "Blessed preemption");
 
-
 SYSCTL_PROC(_kperf, OID_AUTO, kdbg_callstacks,
             CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
             (void*)REQ_KDBG_CALLSTACKS, 
             sizeof(int), kperf_sysctl, "I", "Generate kdbg callstacks");
 
-SYSCTL_INT(_kperf, OID_AUTO, kdbg_cswitch, 
-           CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, 
-           &kperf_cswitch_hook, 0, "Generate context switch info");
+SYSCTL_PROC(_kperf, OID_AUTO, kdbg_cswitch,
+            CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY,
+            (void *)REQ_KDBG_CSWITCH,
+            sizeof(int), kperf_sysctl, "I", "Generate context switch info");
 
 SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate,
             CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
             (void*)REQ_PET_IDLE_RATE,
             sizeof(int), kperf_sysctl, "I", "Rate at which unscheduled threads are forced to be sampled in PET mode");
 
+SYSCTL_PROC(_kperf, OID_AUTO, cswitch_action,
+            CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY,
+            (void*)REQ_CSWITCH_ACTION,
+            sizeof(int), kperf_sysctl, "I", "ID of action to trigger on context-switch");
+
+SYSCTL_PROC(_kperf, OID_AUTO, signpost_action,
+            CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY,
+            (void*)REQ_SIGNPOST_ACTION,
+            sizeof(int), kperf_sysctl, "I", "ID of action to trigger on signposts");
+
 /* debug */
 SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW, 
            &kperf_debug_level, 0, "debug level");
diff --git a/osfmk/kperf/meminfo.c b/osfmk/kperf/meminfo.c
new file mode 100644 (file)
index 0000000..b7910ab
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <kern/task.h> /* task_ledgers */
+#include <kern/thread.h>
+#include <kern/ledger.h>
+
+#include <kperf/kperf.h>
+#include <kperf/kperf_arch.h>
+
+#include <kperf/buffer.h>
+#include <kperf/context.h>
+#include <kperf/meminfo.h>
+
+/* collect current memory info */
+void
+kperf_meminfo_sample(struct meminfo *mi, struct kperf_context *context)
+{
+       task_t task;
+       ledger_amount_t credit, debit;
+       uint64_t phys_footprint;
+       kern_return_t kr;
+
+       assert(mi);
+       assert(context);
+
+       thread_t thread = context->cur_thread;
+
+       BUF_INFO1(PERF_MI_SAMPLE, (uintptr_t)thread_tid(thread));
+
+       task = get_threadtask(thread);
+
+       kr = kperf_get_phys_footprint(task, &phys_footprint);
+       if (kr == KERN_SUCCESS) {
+               mi->phys_footprint = phys_footprint;
+       } else {
+               mi->phys_footprint = UINT64_MAX;
+       }
+
+       kr = ledger_get_entries(task->ledger, task_ledgers.purgeable_volatile,
+                               &credit, &debit);
+       if (kr == KERN_SUCCESS) {
+               mi->purgeable_volatile = credit - debit;
+       } else {
+               mi->purgeable_volatile = UINT64_MAX;
+       }
+
+       kr = ledger_get_entries(task->ledger,
+                               task_ledgers.purgeable_volatile_compressed,
+                               &credit, &debit);
+       if (kr == KERN_SUCCESS) {
+               mi->purgeable_volatile_compressed = credit - debit;
+       } else {
+               mi->purgeable_volatile_compressed = UINT64_MAX;
+       }
+}
+
+/* log an existing sample into the buffer */
+void
+kperf_meminfo_log(struct meminfo *mi)
+{
+       BUF_DATA3(PERF_MI_DATA, mi->phys_footprint, mi->purgeable_volatile,
+                 mi->purgeable_volatile_compressed);
+}
+
diff --git a/osfmk/kperf/meminfo.h b/osfmk/kperf/meminfo.h
new file mode 100644 (file)
index 0000000..5103e1e
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __KPERF_MEMINFO_H__
+#define __KPERF_MEMINFO_H__
+
+#include <mach/mach_types.h>
+#include <kern/ledger.h>
+
+struct meminfo {
+       uint64_t phys_footprint;
+       uint64_t purgeable_volatile;
+       uint64_t purgeable_volatile_compressed;
+};
+
+struct kperf_context;
+extern void kperf_meminfo_sample(struct meminfo *, struct kperf_context *);
+extern void kperf_meminfo_log(struct meminfo *mi);
+
+#endif /* __KPERF_MEMINFO_H__ */
index 68c7b0bd2ed51322af679dfb3f433205adfb50e3..e00f6a045af6f27ae8b817a27862ac08b8545eb0 100644 (file)
@@ -41,8 +41,7 @@
 #include <kperf/pet.h>
 #include <kperf/timetrigger.h>
 
-extern kern_return_t task_resume_internal(task_t);
-extern kern_return_t task_suspend_internal(task_t);
+#include <kern/task.h>
 
 /* timer id to call back on */
 static unsigned pet_timerid = 0;
index 1f0f3117c1006b0cdc9e6e64f4bfed4f2a2666dc..fb6df72d2261e2154ba0fc0b71d31aa9af7c545a 100644 (file)
@@ -29,6 +29,7 @@
 #include "threadinfo.h"
 #include "callstack.h"
 #include "kperf_kpc.h"
+#include "meminfo.h"
 
 #ifndef __KPERF_SAMPLE_H__
 #define __KPERF_SAMPLE_H__
@@ -40,6 +41,7 @@ struct kperf_sample
        struct tinfo_ex   tinfo_ex;
        struct callstack  kcallstack;
        struct callstack  ucallstack;
+       struct meminfo    meminfo;
 
 #if KPC
        struct kpcdata    kpcdata;
index d78af20683bb8067773052cc423be7041bd9fac9..382a053040715de98481bc19662cc6981c4ac365 100644 (file)
@@ -73,7 +73,7 @@ void
 kperf_threadinfo_sample(struct threadinfo *ti, struct kperf_context *context)
 {
        thread_t cur_thread = context->cur_thread;
-       BUF_INFO1( PERF_TI_SAMPLE, (uintptr_t)cur_thread );
+       BUF_INFO1( PERF_TI_SAMPLE, (uintptr_t)thread_tid(cur_thread) );
 
        // fill out the fields
        ti->pid = context->cur_pid;
@@ -112,7 +112,7 @@ kperf_threadinfo_extra_sample(struct tinfo_ex *tex, struct kperf_context *contex
        /* check if there's anything for us to do */
        if( t_chud & T_AST_NAME )
        {
-               BUF_INFO1( PERF_TI_XSAMPLE, (uintptr_t)cur_thread );
+               BUF_INFO1( PERF_TI_XSAMPLE, (uintptr_t)thread_tid(cur_thread) );
 
                /* get the name out */
 #ifdef FIXME
diff --git a/osfmk/kperf/x86_64/kperf_meminfo.c b/osfmk/kperf/x86_64/kperf_meminfo.c
new file mode 100644 (file)
index 0000000..9ed5acc
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <kern/task.h> /* task_ledgers */
+#include <kern/thread.h>
+#include <kern/ledger.h>
+
+#include <kperf/kperf_arch.h>
+
+kern_return_t
+kperf_get_phys_footprint(task_t task, uint64_t *phys_footprint_out)
+{
+       kern_return_t kr;
+       ledger_amount_t credit, debit;
+       uint64_t phys_footprint;
+
+       kr = ledger_get_entries(task->ledger, task_ledgers.internal,
+                               &credit, &debit);
+       if (kr == KERN_SUCCESS) {
+               phys_footprint = credit - debit;
+       } else {
+               return kr;
+       }
+
+       kr = ledger_get_entries(task->ledger, task_ledgers.internal_compressed,
+                               &credit, &debit);
+       if (kr == KERN_SUCCESS) {
+               phys_footprint += credit - debit;
+       } else {
+               return kr;
+       }
+
+       *phys_footprint_out = phys_footprint;
+       return KERN_SUCCESS;
+}
+
index a262c76dfe329b468ad801b0952c64baed7cbe8c..d42e5bcb199c49b81952b99c6b67b68de0abcbc7 100644 (file)
@@ -10,16 +10,17 @@ MIG_DEFS = lockd_mach.defs
 
 DATAFILES = lockd_mach_types.h ${MIG_DEFS}
 
-INSTALL_MI_LIST =
+PRIVATE_DATAFILES = lockd_mach_types.h ${MIG_DEFS}
+KERNELFILES = ${PRIVATE_DATAFILES}
 
-INSTALL_MI_LCL_LIST    = ${DATAFILES} 
+INSTALL_MI_LIST =
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_MI_GEN_LIST = 
 
 INSTALL_MI_DIR = lockd
 
-EXPORT_MI_LIST = \
-       ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_GEN_LIST = lockd_mach.h
 
index 7338ab86b27989e4e3e28b5718da5b29280a58e4..b14ee43e43ec4972046dbca887a8c5dd6d0e37a8 100644 (file)
@@ -7,14 +7,16 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = machine
-INSTINC_SUBDIRS_ARM = arm
 INSTINC_SUBDIRS_X86_64 = i386
 INSTINC_SUBDIRS_X86_64H = i386
+INSTINC_SUBDIRS_ARM = arm arm64
+INSTINC_SUBDIRS_ARM64 = arm arm64
 
 EXPINC_SUBDIRS = machine
 EXPINC_SUBDIRS_X86_64 = i386
 EXPINC_SUBDIRS_X86_64H = i386
-EXPINC_SUBDIRS_ARM = arm
+EXPINC_SUBDIRS_ARM = arm arm64
+EXPINC_SUBDIRS_ARM64 = arm arm64
 
 MIG_TYPES = \
        clock_types.defs \
@@ -53,6 +55,7 @@ MACH_PRIVATE_DEFS = \
        memory_object.defs \
        memory_object_control.defs \
        memory_object_default.defs \
+       sysdiagnose_notification.defs \
        upl.defs \
        vm32_map.defs
 
@@ -70,7 +73,8 @@ MIG_USHDRS = \
        memory_object_default_server.h \
        notify_server.h \
        task_access_server.h \
-       telemetry_notification_server.h
+       telemetry_notification_server.h \
+       sysdiagnose_notification_server.h
 
 MIG_UUHDRS = \
        clock.h \
@@ -157,17 +161,34 @@ INSTALL_MI_LIST   = \
        bootstrap.h \
        ${DATAFILES}
 
-INSTALL_MI_LCL_LIST = \
+PRIVATE_DATAFILES = \
        bootstrap.h \
-       sfi_class.h \
+       coalition.h \
        coalition_notification.defs \
-       ${DATAFILES}
+       host_info.h \
+       mach_host.defs \
+       mach_traps.h \
+       memory_object_types.h \
+       mig.h \
+       processor_info.h \
+       semaphore.h \
+       sfi_class.h \
+       syscall_sw.h \
+       sysdiagnose_notification.defs \
+       task_info.h \
+       task_policy.h \
+       thread_policy.h \
+       thread_switch.h \
+       vm_prot.h
+
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_KF_MI_LIST = \
        mach_interface.h \
        $(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES})
 
 INSTALL_KF_MI_LCL_LIST = \
+       coalition.h \
        mach_interface.h \
        $(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES})
 
@@ -177,6 +198,7 @@ INSTALL_MI_DIR = mach
 
 EXPORT_MI_LIST = \
        branch_predicates.h \
+       coalition.h \
        mach_interface.h \
        sfi_class.h \
        ${DATAFILES}
@@ -247,7 +269,8 @@ MIG_KUSRC = \
        task_access_user.c \
        telemetry_notification_user.c \
        upl_user.c \
-       vm_map_user.c
+       vm_map_user.c \
+       sysdiagnose_notification_user.c
 
 MIG_KSHDRS = \
        clock_server.h \
diff --git a/osfmk/mach/coalition.h b/osfmk/mach/coalition.h
new file mode 100644 (file)
index 0000000..2c0b22e
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _MACH_COALITION_H_
+#define _MACH_COALITION_H_
+
+/* code shared by userspace and xnu */
+
+#define COALITION_CREATE_FLAGS_MASK       ((uint32_t)0xF1)
+#define COALITION_CREATE_FLAGS_PRIVILEGED ((uint32_t)0x01)
+
+#define COALITION_CREATE_FLAGS_TYPE_MASK  ((uint32_t)0xF0)
+#define COALITION_CREATE_FLAGS_TYPE_SHIFT (4)
+
+#define COALITION_CREATE_FLAGS_GET_TYPE(flags) \
+       (((flags) & COALITION_CREATE_FLAGS_TYPE_MASK) >> COALITION_CREATE_FLAGS_TYPE_SHIFT)
+
+#define COALITION_CREATE_FLAGS_SET_TYPE(flags, type) \
+       do { \
+               flags &= ~COALITION_CREATE_FLAGS_TYPE_MASK; \
+               flags |= (((type) << COALITION_CREATE_FLAGS_TYPE_SHIFT) \
+                          & COALITION_CREATE_FLAGS_TYPE_MASK); \
+       } while (0)
+
+
+#define COALITION_TYPE_RESOURCE  (0)
+#define COALITION_TYPE_JETSAM    (1)
+#define COALITION_TYPE_MAX       (1)
+
+#define COALITION_NUM_TYPES      (COALITION_TYPE_MAX + 1)
+
+#define COALITION_TASKROLE_UNDEF  (0)
+#define COALITION_TASKROLE_LEADER (1)
+#define COALITION_TASKROLE_XPC    (2)
+#define COALITION_TASKROLE_EXT    (3)
+
+#define COALITION_NUM_TASKROLES   (4)
+
+#define COALITION_ROLEMASK_ALLROLES ((1 << COALITION_NUM_TASKROLES) - 1)
+#define COALITION_ROLEMASK_UNDEF    (1 << COALITION_TASKROLE_UNDEF)
+#define COALITION_ROLEMASK_LEADER   (1 << COALITION_TASKROLE_LEADER)
+#define COALITION_ROLEMASK_XPC      (1 << COALITION_TASKROLE_XPC)
+#define COALITION_ROLEMASK_EXT      (1 << COALITION_TASKROLE_EXT)
+
+#define COALITION_SORT_NOSORT     (0)
+#define COALITION_SORT_DEFAULT    (1)
+#define COALITION_SORT_MEM_ASC    (2)
+#define COALITION_SORT_MEM_DEC    (3)
+#define COALITION_SORT_USER_ASC   (4)
+#define COALITION_SORT_USER_DEC   (5)
+
+#define COALITION_NUM_SORT        (6)
+
+struct coalition_resource_usage {
+       uint64_t tasks_started;
+       uint64_t tasks_exited;
+       uint64_t time_nonempty;
+       uint64_t cpu_time;
+       uint64_t interrupt_wakeups;
+       uint64_t platform_idle_wakeups;
+       uint64_t bytesread;
+       uint64_t byteswritten;
+       uint64_t gpu_time;
+       uint64_t cpu_time_billed_to_me;
+       uint64_t cpu_time_billed_to_others;
+};
+
+#ifdef PRIVATE
+/* definitions shared by only xnu + Libsyscall */
+
+/* Syscall flavors */
+#define COALITION_OP_CREATE 1
+#define COALITION_OP_TERMINATE 2
+#define COALITION_OP_REAP 3
+
+/* coalition_info flavors */
+#define COALITION_INFO_RESOURCE_USAGE 1
+
+/* structure returned from libproc coalition listing interface */
+struct procinfo_coalinfo {
+       uint64_t coalition_id;
+       uint32_t coalition_type;
+       uint32_t coalition_tasks;
+};
+
+#endif /* PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+#if COALITION_DEBUG
+#define coal_dbg(fmt, ...) \
+       printf("%s: " fmt "\n", __func__, ## __VA_ARGS__)
+#else
+#define coal_dbg(fmt, ...)
+#endif
+
+#endif
+
+#endif /* _MACH_COALITION_H_ */
index baf2ec24de6c6a6e1502fbb784d7575df5b230da..0b56db3a0372bb7ad493ccca10e23a6e308a53e6 100644 (file)
 
 #define EXC_GUARD              12      /* Violated guarded resource protections */
 
+#define EXC_CORPSE_NOTIFY      13      /* Abnormal process exited to corpse state */
+
 
 /*
  *     Machine-independent exception behaviors
 #define EXC_MASK_CRASH                 (1 << EXC_CRASH)
 #define EXC_MASK_RESOURCE              (1 << EXC_RESOURCE)
 #define EXC_MASK_GUARD                 (1 << EXC_GUARD)
+#define EXC_MASK_CORPSE_NOTIFY         (1 << EXC_CORPSE_NOTIFY)
 
 #define EXC_MASK_ALL   (EXC_MASK_BAD_ACCESS |                  \
                         EXC_MASK_BAD_INSTRUCTION |             \
                         EXC_MASK_MACHINE)
 
 #ifdef KERNEL_PRIVATE
-#define EXC_MASK_VALID (EXC_MASK_ALL | EXC_MASK_CRASH)
+#define EXC_MASK_VALID (EXC_MASK_ALL | EXC_MASK_CRASH | EXC_MASK_CORPSE_NOTIFY)
 #endif /* KERNEL_PRIVATE */
 
 #define FIRST_EXCEPTION                1       /* ZERO is illegal */
diff --git a/osfmk/mach/flipc_cb.h b/osfmk/mach/flipc_cb.h
deleted file mode 100644 (file)
index 4c7f969..0000000
+++ /dev/null
@@ -1,1220 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- *
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:29  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:45  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.11.1  1996/09/17  16:34:42  bruel
- *     fixed types.
- *     [96/09/17            bruel]
- *
- * Revision 1.1.6.1  1995/06/13  18:20:10  sjs
- *     Merged from flipc_shared.
- *     [95/06/07            sjs]
- * 
- * Revision 1.1.3.14  1995/05/19  00:58:14  sjs
- *     Added send_ready to shared area, used for fast check if there is something
- *     to do (and prevents the cache from getting stirred).
- *     [95/05/18            sjs]
- * 
- * Revision 1.1.3.13  1995/05/16  20:46:28  randys
- *     Export performance valid information through performance
- *     structure rather than kernel configuration section.
- *     [95/05/16            randys]
- * 
- *     Added performance (FLIPC_PERF) config information to
- *     kernel_configuration section of comm buffer, so that user
- *     programs can find out if this information is being gathered.
- *     [95/05/16            randys]
- * 
- * Revision 1.1.3.12  1995/05/15  14:26:54  randys
- *     Updated comments on use of acquire pointer (it's completely
- *     ignored if dpb is set) and added macros for testing !dpb and
- *     enabled at the same time.
- *     [95/05/11            randys]
- * 
- *     Change pme_process_ptr ==> sme_process_ptr (since it's being read
- *     by AIL now).
- *     [95/05/11            randys]
- * 
- *     Added private copied of release and process pointers.
- *     [95/05/11            randys]
- * 
- *     Rearrange endpoint structure to separate data with importantly
- *     different access patterns into different cache lines.  This
- *     involved duplicating some (effectively constant) entries, and
- *     creating two versions of some macros.
- *     [95/05/11            randys]
- * 
- * Revision 1.1.3.11  1995/05/08  16:06:33  randys
- *     Added comment explaining that an endpoint bufferlist must always
- *     have valid buffer pointers in all of its entries, to keep
- *     FLIPC_endpoint_buffer_available from going off the deep end.  No
- *     code changes.
- *     [95/04/18            randys]
- * 
- * Revision 1.1.3.10  1995/04/05  21:21:52  randys
- *     Added a field to the buffer control structure holding the
- *     scheduling policy chosen for the allocations lock.
- *     [95/04/05            randys]
- * 
- * Revision 1.1.3.9  1995/03/23  20:35:19  randys
- *     Added comments indicating duplication of declarations of
- *     flipc_cb_base & flipc_cb_size in this file and in flipc_usermsg.h
- *     Modified declaration of flipc_cb_size to be unsigned long.
- *     [95/03/21            randys]
- * 
- * Revision 1.1.3.8  1995/02/23  21:32:42  randys
- *     Added space for kernel configuration in communications buffer
- *     control structure.
- *     [95/02/22            randys]
- * 
- * Revision 1.1.3.7  1995/02/21  17:22:58  randys
- *     Re-indented code to four space indentation
- *     [1995/02/21  16:25:32  randys]
- * 
- * Revision 1.1.3.6  1995/02/13  22:57:29  randys
- *     Replaced all of NEXT_{ACQUIRE,RELEASE,PROCESS}_PTR macros with a
- *      single NEXT_BUFFERLIST_PTR macro.
- *     [95/02/03            randys]
- * 
- * Revision 1.1.3.5  1995/01/26  21:01:44  randys
- *     Add performance structure into CB.
- *     [1995/01/24  21:14:31  randys]
- * 
- *     Added flag in epgroup structure to note that epgroup
- *     has a semaphore associated with it.
- *     [1995/01/19  23:02:13  randys]
- * 
- *     Add a space in the comm buffer header for the null_destination
- *     the ME sets up for the AIL.  Get rid of
- *     FLIPC_ADDRESS_ENDPOINT_PTR (it isn't used)
- *     [1995/01/19  20:22:30  randys]
- * 
- *     Up the comm buffer size to 1 megabyte
- *     [1995/01/17  22:23:27  randys]
- * 
- * Revision 1.1.3.4  1995/01/12  21:19:01  randys
- *     Minor commenting changes from dlb
- *     [1995/01/06  18:18:12  randys]
- * 
- * Revision 1.1.3.3  1994/12/22  16:23:57  randys
- *     Fixed calculation of number of buffers on an endpoint
- *     to take size of buffer pointers into account.
- *     [1994/12/21  16:19:55  randys]
- * 
- * Revision 1.1.3.2  1994/12/20  19:01:56  randys
- *     Moved definition of flipc_simple_lock to flipc_cb.h
- *     [1994/12/20  17:34:41  randys]
- * 
- *     Added a simple lock in the comm buffer to use for the
- *     allocations lock, along with directions as to how
- *     to use it (not like a normal simple lock).
- *     [1994/12/20  15:27:25  randys]
- * 
- *     Added error log into communications buffer control
- *     structure, and changed FLIPC_ADDRESS_ENDPOINT_PTR to
- *     correctly compute the endpoint pointer based on the
- *     new ctl structure layout.
- *     [1994/12/19  23:47:45  randys]
- * 
- *     Added filename in comment at top of each file
- *     [1994/12/19  20:28:20  randys]
- * 
- *     Add version field to epgroup to check races on buffer acquire
- *     from epgroup.
- *     [1994/12/19  18:05:04  randys]
- * 
- * Revision 1.1.3.1  1994/12/12  17:46:12  randys
- *     Putting initial flipc implementation under flipc_shared
- *     [1994/12/12  16:27:46  randys]
- * 
- * Revision 1.1.1.2  1994/12/11  23:11:18  randys
- *     Initial flipc code checkin
- * 
- * $EndLog$
- */
-
-/*
- * mach/flipc_cb.h
- *
- * This file is intended to be the data structure layout for the flipc
- * communcations buffer, both for the KKT implementation and
- * for the eventual paragon implementation.  This file should include
- * all of the information necessary for either humans or machines to
- * understand the data structure layout.
- *
- * The communications buffer is the wired section of memory used for
- * communication between the flipc applications interface layer and
- * the flipc message engine.  No structure in it are visible to the
- * user; the applications interface layer mediates all user access to
- * the CB.
- */
-
-#ifndef _MACH_FLIPC_CB_H_
-#define _MACH_FLIPC_CB_H_
-
-#include <mach/flipc_types.h>
-
-/*
- * Flipc naming and argument ordering conventions (this applies mainly to
- * user-interface.h, but seems inappropriate in a user-visible header file):
- *
- * All objects prefixed with "flipc"; uppercase for user-visible
- * objects, lower case for internal ones.
- *
- * Types created with typedef will have _t suffixes.
- *
- * Words will be separated by '_'.
- *
- * Macro definitions will be all in caps.
- *
- * Enum members will have their initial letter (after Flipc) capitalized.
- *
- *
- * For user-visible routines:
- *
- * The first word following the "flipc" will be the flipc object type that
- * that routine operates on (specifically "domain", "epgroup",
- * "endpoint", or "buffer").
- *
- * The object named by the first word of the call will, if an argument
- * to the call, be the first argument.
- *
- * Output variables passed as pointers in the arglist will come last.
- */
-
-/*
- * The kinds of objects that exist in the communications buffer are:
- *
- * Endpoints -- Used for sending or receiving.
- * Buffers -- Composed of a buffer header and buffer data.
- * Endpoint groups -- Used for collecting multiple numbers of endpoints
- *     together for a select like operation.
- */
-
-/*
- * We can't use general pointers inside the communications buffer,
- * since the address space on either side of the interface is
- * different.  The places where we could use pointers are:
- *
- *     *) From endpoint sets to endpoints.
- *     *) From endpoints to buffers.
- *
- * The kinds of pointers we could use are:
- *     *) Byte offset from the beginning of the comm buffer.  This
- *        is simple, but has the disadvantage of allowing the user to
- *        play games with pointing endpoint buffer pointers into data
- *        space, & etc.
- *     *) Rigid arrays of each type of object, with the object
- *        "pointer" being an index into the array.  This avoids the
- *        above problem, but complicates memory allocation (forces
- *        allocation to be contiguous, which may force pre-deciding
- *        how much space each of the above types will take).
- *
- * Though we appear to be going for the rigid allocation for each type
- * of data structure, I'm still going to do the "simple offset"
- * solution to maintain maximum flexibility into the future.
- * The single exception to this is that FLIPC addresses will be composed of
- * node number and endpoint number, where the endpoint number will be
- * the index into the endpoint array.
- */
-
-typedef unsigned long flipc_cb_ptr;
-/* Define a null value, which doesn't point anywhere into the CB.  */
-#define FLIPC_CBPTR_NULL ((flipc_cb_ptr) -1)
-
-/*
- * Synchronization between message engine and application.
- *
- * In general, it isn't reasonable to allow locking and unlocking of
- * data structures between message engine and communications buffer,
- * as this requires the message engine to trust arbitrary user
- * threads.  The solution is to arrange all data structures so that
- * they may be accessed by both parties without locking.  The way that
- * this is usually done is that specific variables are considered to
- * be owned by one of the ME or the AIL, and the other party is
- * allowed to read the variable but not to modify it.  With this
- * arrangement, implementing things like producer/consumer circular
- * queues is possible; each agent (ME or AIL) goes around the list
- * doing its thing, and avoids passing the pointer showing where the
- * other agent is working.
- *
- * Following the above, we may divide structure members into five
- * classes, and define prefixes for these five classes.
- *
- *     Description             Prefix
- *      -------------------------------
- *     Private to AIL          pail_
- *     Private to ME           pme_
- *     AIL owned, read by ME   sail_
- *     ME owned, read by AIL   sme_
- *     Shared in other way     shrd_
- *
- * Shared variables may change their ownership based on their own
- * or someone elses value (these variables may be thought of as
- * being handed back and forth between the two entities) or on a
- * configuration option of the structure (not handed back and forth,
- * but still based on another variables value).
- *
- * In addition, I am going to put variables that are set at endpoint
- * allocation and cleared at deallocation (but read by both sides) in
- * a separate class; they are "AIL owned, read by ME" but are
- * effectively constant over the synchronization protocols we care
- * about.
- *
- *     Constant after allocation       const_
- *
- * Note that this ignores memory consistency issues (when the two
- * agents are actually on two separate processors).  These issues need
- * to be explored in more detail; for now suffice it to say that the
- * above methods work given a sequentially consistent memory model or
- * a processor consistent memory model.
- *
- * Also note that an optimizing compiler may reorder our memory
- * accesses, playing merry hell with the inter-node synchronization
- * protocols (the compiler doesn't know about the other node, after
- * all).  To avoid this, all structure members used for
- * synchronization will be marked volatile; this will force the
- * compiler to keep the order and number of accesses intact.  This
- * will also force the compiler *not* to optimize way accesses to
- * these variables, so it is wise to explicitly load the variable into
- * a temporary once if you need to do multiple computations with it,
- * and store it back afterwards when you are done.
- */
-
-/*
- * Memory allocation:
- *
- * For maximum simplicity in the first implementation, we need to know
- * at comm buffer allocation time how many endpoints, endpoint_sets,
- * and buffers we will want total, until the end of time.  This
- * masively simplifies memory allocation; there will be a single array
- * of each type of data and the communication buffer will be taken up
- * by the concatenation of these arrays (with some fiddling to make
- * sure that no data crosses a page boundary).
- *
- * For each data type there will be a free list to which pieces of
- * data will be added to or removed from as needed.  Each data type
- * will have a pointer in it to allow it to be linked onto the free
- * list.
- */
-
-/*
- * Multiple thread access to data structures:
- *
- * There are several points in the communications buffer (notably
- * endpoint accesses) when multiple application threads will be
- * attempting operations on data structures at the same time.  To
- * multiplex these operations, we need a per-data structure lock.
- * Lock attributes:
- *     *) This lock will not be kernel based, as such a lock would be
- *        too heavyweight to use for arbitrary sending and receiving
- *        operations).
- *     *) Because it is not kernel based, it may not be used to
- *        multiplex accesses from threads at different kernel
- *        priority levels.  Deadlock would result if a low-priority
- *        thread gained the lock and then was prempted by a
- *        high-priority thread that wanted to acquire it.
- *     *) Architecture-dependent interfaces need to be designed to
- *        atomically lock and unlock this data structure.
- *
- * These are "simple locks" and are defined in flipc_dep.h.
- */
-
-/*
- * Lock type.  This placement (in flipc_cb.h) is a little bit of a
- * hack, as it really should be defined with the machine dependent lock
- * macros.  But then the machine independent lock macros have problems
- * because they have to include it both before and after the prototypes.
- * So rather than split the machine dependent stuff into multiple
- * files, I'll define it here and hope that this definition works for
- * whatever architectures we're on.
- */
-typedef unsigned long flipc_simple_lock;
-
-/*
- * Ownership of data structures.
- *
- * Please note that this is a can of worms, and that I (Randys)
- * consider this (and it's interactions with endpoint group membership)
- * the likeliest place for design bugs in FLIPC.  Any and all should
- * take this as an open invitation and challenge to find bugs in what
- * follows.
- *
- * Rules:
- *
- *     *) If you've disabled a structure and synched with the
- *        appropriate side of the ME, the ME won't touch it.
- *
- *     *) If you've taken a send endpoint off of the send endpoint
- *        list and sync'd with the ME, the ME won't touch it.
- *
- *[The rest of this applies to the AIL only; the above rules are the
- * only ones the ME respects.  ]
- *
- *     *) Within the AIL, a disabled structure is owned by:
- *             *) The routine that disabled it, before it is put on
- *                the free list.
- *             *) The routine that dequeued it from the free list,
- *                before it is enabled.
- *        Taking of the simple lock is not required for ownership in
- *        these cases.  Taking of the simple lock is not required for
- *        the act of *enabling* the structure (you have ownership and
- *        are giving it away), however it is required for the act of
- *        disabling the structure (since it is the only valid way to
- *        take ownership of an enabled structure, and you can't
- *        modify the enabled bit without having ownership).
- *
- *     *) The simple lock in a structure always needs to be valid, as
- *        simple locks may be taken while the structure is in any
- *        state.  Simiarly, the enabled bit must always be valid,
- *        both because it's what the ME checks, and because it may be
- *        checked by the AIL while the structure is free.
- *
- *     *) Holding the simple lock on an enabled structure imparts
- *        ownership of that structure.  You are allowed to take the
- *        simple lock of a disabled structure, but ownership is not
- *        gained by doing so.
- *
- *     *) You are allowed to read the enabled/disabled bit without
- *        owning the structure (if the structure is disabled, there
- *        may be no way to gain the ownership).
- *
- *     *) Owning a structure allows you to do what you want with it,
- *        except:
- *             *) As mentioned above, the simple lock and
- *                enabled/disabled bit must always be valid.
- *             *) The ownership of the endpoint group related members
- *                of an endpoint structure is special; see below.
- *             *) The allocations lock must be held to manipulate the
- *                next send endpoint field of any endpoint.
- *
- *     *) If an endpoint is on an endpoint group, the ownership of
- *        the the endpoint group related members of the structure
- *        (sail_endpoint_group and pail_next_eg_endpoint) go with the
- *        owndership of the endpoint group, not the endpoint.  For
- *        this purpose only, membership is defined atomically as the
- *        sail_endpoint_group pointer being set to an endpoint group.
- *        Thus one may remove an endpoint from an endpoint group
- *        without owning the endpoint (change the sail_endpoint_group
- *        pointer last).  One requires both locks to add an endpoint
- *        to an endpoint group, however.
- *
- *        (Part of the motivation for this is that removal and
- *        addition of endpoints to endpoint groups requires
- *        modifications of pointers in other endpoint structures).
- *
- *     *) No structure may be put on the free list if marked with any
- *        association to any other structure.  Specifically, endpoint
- *        groups may have no endpoints belonging to them, and
- *        endpoints may not belong to an endpoint group or have
- *        buffers belonging to them.
- *
- *     *) One consequence of the above is that endpoint groups may
- *        not be marked as disabled while they have any endpoints on
- *        them, as freeing an endpoint requires it to be removed from
- *        its endpoint group, and if ownership of the endpoint group
- *        cannot be gained, that is impossible.
- *
- *     *) In theory, endpoints *may* be marked disabled while they
- *        are still on endpoint groups.  In practice, they are not.
- *        This is relied on by the code which frees endpoint groups,
- *        in a non-obvious way.  Specifically, that code assumes that
- *        there is no way that a call to free endpoint will return
- *        with the endpoint still on the endpoint group.  Since the
- *        only way for free endpoint to fail is if the endpoint is
- *        inactive, and since the endpoint is set inactive only after
- *        free endpoint (presumably a different one) confirms that it
- *        isn't on any endpoint group, this assumption is true.
- *
- *        Got that?  Take home lesson: don't allow endpoints to be
- *        marked disabled while still on endpoint groups until you
- *        *do* get that, and are willing to take the responsibility
- *        of changing it so that it works under your new scheme.
- *
- *     *) Ownership of the freelist(s) are gained by holding the
- *        allocations lock for the buffer, and *only* in that way.
- *        No modification of freelist, send endpoint list, or send
- *        side ME sync bits is valid without holding the allocations
- *        lock.  In other words, while you can read things in the
- *        main communications buffer control structure at will, you
- *        may not change them without owning the allocations lock.
- *
- *     *) The state where a structure is disabled but off of the
- *        freelist may be valid as an intermediate (while an AIL
- *        routine is orchestrating a transition) but is not a valid
- *        static state.  This state must not survive the return to
- *        application code of the thread that disabled the structure.
- */
-
-/*
- * Flipc data buffer management.
- *
- * A buffer (whether being used for sending or receiving) may be in
- * one of three states:
- *
- * READY -- Buffer held by application.
- * PROCESSING -- Buffer held by endpoint, unprocessed.  For receive endpoints,
- *        this means that the buffer is empty, waiting to be filled by
- *        an incoming message.  For send endpoints, this means tht the
- *        buffer is full, waiting to be sent out.
- * COMPLETED -- Buffer held by the endpoint, processed.  For receive
- *        endpoints, this means that the buffer is full, with newly
- *        received data in it.  For send endpoints, this means that the
- *        buffer is empty (*), with it's data having been sent out.
- *
- *        (*) In point of fact the data hasn't been touched, though bits
- *        may have been fiddled with in the header data structure.  But
- *        it's been sent.
- * FREE -- The buffer is in the pool of free buffers, and may be
- * allocated to any newly created endpoint.
- *
- * The transition diagram between these states is relatively simple:
- *
- *
- *                  release
- *             /-----------------\|
- * +----------+                  -+----------+
- * |  READY   |                   |PROCESSING|<- - - - - -
- * +----------+_                  +----------+           \
- *      ^     |\ - - - - - - - - /     |    |             \endpoint allocate
- *      |         (processed)              \endpoint       \
- *      |                              |     \ free         |
- *      | acquire                      /      ------\
- *      |                                           \      |
- *      |                            / (processed)    >+----------+
- * +----------+                                               |   FREE   |
- * |COMPLETED |< - - - - - - - - - -                  +----------+
- * +----------+                               endpoint allocate    /     ^
- *     |     ^- - - - - - - - - - - - - - - - - - - - - - -      |
- *     |                                                        /
- *     \               endpoint free                           /
- *      ------------------------------------------------------/
- *
- * (If it doesn't look simple, imagine it without the FREE state; that
- * state doesn't enter into almost any buffer manipulations)
- *
- * For send buffers, release==send, acquire==allocate, and
- * processed==the sending done by the message engine.  For receive buffers,
- * release==release, acquire==receive, and process==the actual
- * arrival of the message handled by the messaging engine.
- *
- * The choice of path from the PROCESSING state is an endpoint
- * specific configuration option; a particular endpoint may leave a
- * processed buffer on the endpoint, or it may release it back to the
- * application by dropping it from the endpoint.
- *
- * Buffers are assigned the PROCESSING state on a newly allocated
- * receive endpoint (to be ready to receive messages) and the
- * COMPLETED state on a newly allocated send endpoint.
- *
- * The state (other than FREE) that a particular buffer is in is
- * determined by its place on a circular queue of buffer pointers that
- * is part of the endpoint structure.  Buffers owned by the
- * application (READY) are not pointed to by pointers on this queue.
- * The buffer is released to the message engine by placement of a
- * pointer to it on this queue.  When the message engine is done
- * processing the buffer, it sets a flag in the buffer header.  If the
- * endpoint is so configured, it then removes the buffer pointer from
- * the queue; otherwise the AIL acquires the buffer (and removes the
- * pointer from the queue) when it chooses.
- *
- *      . . . . . .
- *     .           .
- *    .             .
- *    .             .       AIL releasing
- *    .             .       ^
- *    .         +-------+--/
- *    .         |       |
- *    .         |Buffers|
- *    .         | to be |
- *    .         |Sent or|
- *    .         |Receivd|
- *    .         | Into  |    ^ ME processing
- *    .         +-------+ --/
- *    .         |       |
- *    .  AIL    | Sent  |  (These buffers have a flag set to indicate
- *    .Acquiring|  or   |   that they have been processed.  This
- *    .         |Filled |   section is optional; the endpoint may be
- *    .         |buffers|   configured to drop buffers after processing)
- *    .     ^   |       |
- *    .      \--+-------+
- *    .             .
- *     .           .
- *      . . . . . .
- *
- *
- * The AIL will refuse to acquire a buffer that has not yet been
- * processed by the ME.  Acquire will not work at all on endpoints
- * that have been configured to drop buffers on completion.
- *
- * The buffer_available primitive is coded to avoid doing a
- * (potentially costly) acquiring of the endpoint flipc lock.  Since
- * telling where there is a buffer available requires two operations
- * (comparison of the acquire and release pointers to see if there are
- * any buffers on the endpoint, and then indirection of the acquire
- * pointer to see if that buffer has bee processed yet), there is a
- * potential race that will admit the possibility of indirecting
- * through an invalid pointer.  For this reason, for the life of an
- * endpoint, it is a requirement that all buffer pointers on the
- * bufferlist point *somewhere* (ie. to some existing buffer), so that
- * this indirection will not cause an access error.  The
- * buffer_available primitive may return the wrong result, but (as
- * long as the incorrectness is transitory), this is acceptable.
- */
-
-/* Set up the states so that FLIPC_buffer_processed can just do an
-   & and a test.  */
-typedef enum {
-    flipc_Free = 0x0, flipc_Processing = 0x1,
-    flipc_Completed = 0x2, flipc_Ready = 0x3
-} flipc_buffer_state_t;
-#define FLIPC_BUFFER_PROCESSED_P(state) ((state) & 0x2)
-
-/*
- * Data header/buffer layout.
- *
- * For this implementation, and probably for all time, the header
- * immediately precedes the data in memory, and the mesaging engine
- * will send both header and data.  Our priority is message dispatch
- * speed rather than raw bandwidth (this is the small message side of
- * a transfer mechanism), so we don't mind that we are throwing away
- * some bandwidth by taking up transferred space with header data.
- *
- * The data size will be the maximum size allowed by the underlying
- * transport, minus the header size (available at run time).  The user
- * will be given a pointer to the data buffer, and will use this both
- * for copying data in and out, and as an argument to the underlying
- * flipc routines.  The flipc routines will access appropriately.
- *
- * The header structure follows; the user data type will be offset and
- * cast appropriately to access this.
- */
-
-typedef struct flipc_data_buffer {
-    union {
-       FLIPC_address_t destination; /* For sending.  */
-       flipc_cb_ptr free;      /* Link for header free list.  */
-    } u;
-
-    /* ME owned if flipc_Processing, AIL owned otherwise.  May not ever
-       assume the state flipc_Ready in an optimized implementation.  */
-    volatile flipc_buffer_state_t shrd_state;
-} *flipc_data_buffer_t;
-
-/*
- * Endpoint structure.
- *
- * An endpoint is the data structure used for communicating buffers,
- * either send or receive.  Note that all actual circular lists of
- * buffer pointers on the endpoints are in their own array that gets
- * partitioned out to the various endpoints.  This is because we want
- * the endpoint structures themselves to be fixed size for easy
- * indexing upon receit of a message.  This large scale array will be
- * of size (max_buffers_per_endpoint) * (number_of_endpoints).  Both
- * of these values are set during the domain initialization call.
- *
- * Note that the pointers contained in the buffer lists are pointers to
- * buffer *headers*, not to the data.
- */
-
-/*
- * This structure is divided into four cache lines, separated by their
- * usage type:
- *
- *     *) Private data that the AIL scribbles on.
- *     *) Data the AIL writes (regularly) that the ME reads
- *        (occaisionally).  The canonical example is the release pointer.
- *     *) Private data that the ME scribbles on.
- *     *) Data the ME writes (regularly) that the AIL reads (occaisionally).
- *        The canonical example is the process pointer.
- *
- * There are a couple of other categories of stuff, that can be shoehorned
- * into the above:
- *     *) Constant data that both sides read regularly.  This can be
- *        duplicated in the two private areas (actually, it can be
- *        duplicated in any two areas that stay in the cache of the
- *        respective processors).
- *     *) Stuff that is not accessed on the critical path; it can go
- *        almost anywhere (probably in one of the two ping-ponging
- *        cache lines).
- *     *) Stuff that is read-only for a single processor goes in that
- *        processors private data section.
- *
- * Duplicate entries have a "p" or a "a" suffixed to the name to
- * indicate that fact.  Note that these will usually, but not always,
- * be "const" variables--they may be "const" variables only from the
- * critical path viewpoint.
- *
- * We take cache line length as being 8 * sizeof(int).
- */
-
-typedef struct flipc_endpoint {
-
-    /* ===Private AIL data===  */
-    /* Type of endpoint (send, recv, etc).  Duplicated in private
-       ME section.  */
-    FLIPC_endpoint_type_t constda_type;
-
-    /* This next value is two variables squeezed into a single word to
-     * save on memory accesses (since they are almost always read at
-     * the same time.  The two variables are:
-     *
-     * const_drop_processed_buffers -- Should the message engine drop
-     * buffers after processing them (as opposed to leaving them on
-     * the endpoint)?
-     *
-     * sail_enabled (volatile) -- Is the endpoint enabled?  This isn't
-     * marked constant because it is used for synchronization on
-     * endpoint deallocation.
-     *
-     * Note that to reduce test and branches, we these two variables
-     * are represented by two bits in the word (bit 0 and bit 16).  It
-     * is illegal to have bits other than 0 and 16 set in this word.
-     * This assumption is used in ENABLED_AND_NOT_DPB_P, and is enforced
-     * in DOE_CONSTRUCT (assumed to not be performance critical) below.
-     *
-     * Duplicated in private ME section.
-     */
-
-    volatile unsigned long sailda_dpb_or_enabled;
-
-#define EXTRACT_DPB(dpb_or_enabled) ((dpb_or_enabled) >> 16)
-#define EXTRACT_ENABLED(dpb_or_enabled)  ((dpb_or_enabled) & 0xffff)
-#define DISABLED_OR_DPB_P(dpb_or_enabled) ((dpb_or_enabled) ^ 0x1)
-#define DOE_CONSTRUCT(dpb, enabled) \
-    (((dpb) ? 0x10000 : 0) | ((enabled) ? 0x1 : 0))
-
-    flipc_simple_lock pail_lock; /* Simple lock for serializing
-                                   multiple thread access to
-                                   structure.  AIL owned.  */
-    /* First element in buffer list array that is ours.  Constant
-       from communications buffer initialization.  */
-    flipc_cb_ptr constda_my_buffer_list;
-    /* First element after my_buffer_list that is *not* in my buffer
-       list.  Constant from communications buffer initialization.  */
-    flipc_cb_ptr constda_next_buffer_list;
-
-    /* First location that has a valid buffer pointer in it.  This may
-       contain a pointer to a buffer available for acquisition, or it
-       may contain a pointer to a buffer that is still being
-       processed; the buffer header or process_ptr needs to be checked
-       to be sure.  This location is AIL owned.  It is ignored by all
-       (including the ME and initialization code) if
-       drop_processed_buffers, above, is set.  */
-    volatile flipc_cb_ptr shrd_acquire_ptr;
-
-    /* AIL private copy of process pointer.  This hopefully means that
-       the AIL won't need to read the real process pointer (and fault
-       in a cache line) very often.  */
-    flipc_cb_ptr pail_process_ptr;
-
-    unsigned int pad_pail_7;
-
-    /* ===End of cache line===*/
-    /* ===AIL writes, ME occaisionally reads=== */
-
-    /* Next location at which the AIL may insert a buffer pointer.  */
-    volatile flipc_cb_ptr sail_release_ptr;
-    unsigned int pad_sail_1;
-    unsigned int pad_sail_2;
-    unsigned int pad_sail_3;
-    unsigned int pad_sail_4;
-    unsigned int pad_sail_5;
-    unsigned int pad_sail_6;
-    unsigned int pad_sail_7;
-
-    /* ===End of cache line===*/
-    /* ===Private ME data=== */
-    /* See above comments (in private ail section).  */
-
-    FLIPC_endpoint_type_t constdm_type;
-    volatile unsigned long saildm_dpb_or_enabled;
-
-    volatile unsigned long sme_overruns; /* For a receive endpoint, counter for
-                                    the number of messages that have
-                                    arrived when there hasn't been
-                                    space.  ME owned.   */
-    unsigned long pail_overruns_seen;  /* A count of the number of overruns
-                                  that the AIL has noted and doesn't
-                                  want to be bothered with again.
-                                  The user only sees the difference
-                                  between the previous count and this.  */
-
-    /*
-     * For send endpoints; linked into a list that is used by the ME
-     * to find stuff to do.  Also used for endpoint free list.
-     * Null if at end of list.  Not "const" because it's used as a
-     * synchronization variable during setup and teardown
-     * of send endpoints.
-     */
-    volatile flipc_cb_ptr sail_next_send_endpoint;
-
-    /* Constant buffer lsit pointers for ME.  See private ail comments.  */
-    flipc_cb_ptr constdm_my_buffer_list;
-    flipc_cb_ptr constdm_next_buffer_list;
-
-    /* Private ME copy of release pointer.  This hopefully means that
-       the ME won't have to read (and fault in a cache line) the
-       release pointer very often.  */
-
-    flipc_cb_ptr pme_release_ptr;
-    /* ===End of cache line===*/
-
-    /* ===ME writes, AIL occaisionally reads=== */
-    /*
-     * For endpoint group membership.
-     */
-    flipc_cb_ptr pail_next_eg_endpoint; /* Next endpoint in endpoint group.
-                                          AIL owned.  */
-    flipc_cb_ptr sail_epgroup; /* Direct pointer to endpoint group that
-                                  we are part of.  FLIPC_CBPTR_NULL
-                                  if none.  AIL owned.  */
-
-    /* First location that has a buffer pointer available for
-       processing. If this value is equal to the release_ptr there are no
-       buffers available for processing.  */
-    volatile flipc_cb_ptr sme_process_ptr;
-    unsigned int pad_sme_3;
-    unsigned int pad_sme_4;
-    unsigned int pad_sme_5;
-    unsigned int pad_sme_6;
-    unsigned int pad_sme_7;
-
-    /* ===End of cache line===*/
-    /* ===END=== */
-
-    /* The following macros may have possible performance loss in
-       multiple accesses (or indirection, but a good compiler will get
-       around that).  We need to have versions for each processor so
-       that the constant reads are done from the right copy.  */
-
-    /* General bufferlist pointer increment macro, with versions
-       for ME and AIL.  */
-
-#define NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, suf)     \
-    (((bufferlist_ptr) + sizeof(flipc_data_buffer_t)           \
-      == ((endpoint)->const ## suf ## _next_buffer_list)) ?    \
-     ((endpoint)->const ## suf ## _my_buffer_list) :           \
-     (bufferlist_ptr) + sizeof(flipc_data_buffer_t))
-#define NEXT_BUFFERLIST_PTR_ME(bufferlist_ptr, endpoint) \
-    NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, dm)
-#define NEXT_BUFFERLIST_PTR_AIL(bufferlist_ptr, endpoint) \
-    NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, da)
-
-    /* Macros for each of "can I release onto this buffer?"  "Can I
-       acquire from this buffer?" and "Can I process an element on
-       this buffer?"  The first two presume they are being executed on
-       the main procesor, the third on the co-processor.
-       All have three arguments:
-       *) A variable which will be set to the release, acquire, or
-       process pointer after the macro *if* the operation is ok.
-       *) A temporary variable used inside the function.
-       *) The endpoint.
-
-       We presume the acquire macro won't be called if drop processed
-       buffers is enabled; the process and release macros deal
-       appropriately with that issue.  */
-
-    /* In general these macros will:
-       *) Not read a volatile structure member more than once.
-       *) If a variables owner is the other processor, these macros
-          will check a local copy of the variable first before checking
-          the other processors.
-       *) Will only update the local copy if the remote copy really is
-          different from the local one.
-         */
-
-/* This macro implements the synchronization check; local cbptr is
-   the pointer owned by the local processor which we want to compare
-   with a pointer on the remote processor which we have a copy
-   of locally.  Reads the remote pointer zero or one times; other
-   reads are as necessary.  
-
-   The algorithm is: 
-   *) If the local copy says our pointer and the remote value aren't equal,
-      we're done.
-   *) Otherwise, check the remote copy.  If it says the values aren't
-      equal, update the local copy.  */
-
-#define ENDPOINT_SYNCNE_CHECK(local_cbptr, copy_rmt_cbptr,     \
-                             rmt_cbptr, tmp_cbptr)             \
-    ((local_cbptr) != (copy_rmt_cbptr)                         \
-     || ((((tmp_cbptr) = (rmt_cbptr)) != (local_cbptr))                \
-        && (((copy_rmt_cbptr) = (tmp_cbptr)), 1)))
-
-#define ENDPOINT_ACQUIRE_OK(acquire_cbptr, tmp_cbptr, endpoint)                \
-    ((acquire_cbptr) = (endpoint)->shrd_acquire_ptr,                   \
-     ENDPOINT_SYNCNE_CHECK(acquire_cbptr, (endpoint)->pail_process_ptr,        \
-                          (endpoint)->sme_process_ptr, tmp_cbptr))
-
-#define ENDPOINT_PROCESS_OK(process_cbptr, tmp_cbptr, endpoint)                \
-    ((process_cbptr) = (endpoint)->sme_process_ptr,                    \
-     ENDPOINT_SYNCNE_CHECK(process_cbptr, (endpoint)->pme_release_ptr, \
-                          (endpoint)->sail_release_ptr, tmp_cbptr))
-      
-#define NODPB_ENDPOINT_RELEASE_OK(release_cbptr, tmp_cbptr, endpoint)  \
-    ((release_cbptr) = (endpoint)->sail_release_ptr,                   \
-     (tmp_cbptr) = (endpoint)->shrd_acquire_ptr,                       \
-     (NEXT_BUFFERLIST_PTR_AIL(release_cbptr, endpoint)                 \
-      != (tmp_cbptr)))     
-           
-/* Don't use NEXT_BUFFERLIST_PTR here to save a temporary variable.  */ 
-#define DPB_ENDPOINT_RELEASE_OK(release_cbptr, tmp_cbptr, endpoint)       \
-    (release_cbptr = (endpoint)->sail_release_ptr,                        \
-     ((release_cbptr + sizeof(flipc_data_buffer_t) ==                     \
-       (endpoint)->constda_next_buffer_list)                              \
-      ? ENDPOINT_SYNCNE_CHECK((endpoint)->constda_my_buffer_list,         \
-                             (endpoint)->pail_process_ptr,                \
-                             (endpoint)->sme_process_ptr,                 \
-                             tmp_cbptr)                                   \
-      : ENDPOINT_SYNCNE_CHECK(release_cbptr + sizeof(flipc_data_buffer_t), \
-                             (endpoint)->pail_process_ptr,                \
-                             (endpoint)->sme_process_ptr,                 \
-                             tmp_cbptr)))
-
-    /* This next is tricky; remember that acquire_ptr points
-       to an actual bufferptr on the list, whereas release_ptr does
-       not.  This macro is only used in FLIPC_endpoint_query, and so
-       doesn't need to have an ME version.  */
-
-#define BUFFERS_ON_ENDPOINT_AIL(acquire_ptr, release_ptr, endpoint)    \
-    ((release_ptr) > (acquire_ptr)                                     \
-     ? ((release_ptr) - (acquire_ptr)) / sizeof(flipc_cb_ptr)          \
-     : ((((release_ptr) - (endpoint)->constda_my_buffer_list)          \
-        + ((endpoint)->constda_next_buffer_list - acquire_ptr))        \
-       / sizeof(flipc_cb_ptr)))
-} *flipc_endpoint_t;
-
-
-/*
- * Endpoint groups.
- *
- * Used to represent a group of endpoints, for linking sending/receiving
- * with semaphores & etc.  Note that there needs to be a private data
- * structure kept by the kernel that associates with each epgroup
- * a semaphore to be used for wakeups on that endpoint set.
- */
-
-typedef struct flipc_epgroup {
-    flipc_simple_lock pail_lock;       /* Lock to synchronize threads (at the
-                                          same priority level) accessing this
-                                          structure.  */
-    volatile unsigned long sail_enabled;       /* Set if structure is active.  */
-    unsigned long const_semaphore_associated; /* Flag to indicate whether or not
-                                         there is a semaphore associated
-                                         with this endpoint group in the
-                                         kernel flipc routines.  */
-    volatile unsigned long sail_wakeup_req; /* Incremented when a thread wants to
-                                       be woken.  */
-    volatile unsigned long pme_wakeup_del; /* Incremented when the ME delivers a
-                                      wakeup. */
-    unsigned long pail_version;                /* Incremented when epgroup membership
-                                          is changed; checked when retrieving
-                                          a buffer from an epgroup.  */
-    unsigned long sail_msgs_per_wakeup;        /* How many messages need to arrive
-                                          before the ME delivers a wakeup.  */
-    unsigned long pme_msgs_since_wakeup;       /* How many messages have arrived
-                                          since the last wakeup.  ME
-                                          owned.  */
-
-    flipc_cb_ptr pail_first_endpoint; /* First endpoint in the group.  The
-                                        other endpoints are linked along
-                                        behind him.  AIL owned.  */
-    flipc_cb_ptr pail_free;    /* Used to link this endpoint onto
-                                  the freelist.  */
-} *flipc_epgroup_t;
-
-/*
- * Communication buffer control structure.
- *
- * This is in the communications buffer itself.  Note that any changes
- * in this structure require it to be locked with the allocation lock,
- * as access to this structure is shared by all threads using the CB.
- */
-
-/*
- * Individual data type layout.
- *
- * All we need here is a pointer to the start of each type of data
- * struct, the number of those data structures in the communications
- * buffer, and a pointer to the beginning of the freelist for that data
- * structure.
- *
- * Note that the composite buffer list doesn't have a freelist associated
- * with it, since each section of the buffer list is tightly bound to an
- * endpoint, and is allocated and freed with that endpoint.  We still
- * need the start and number information, though.
- */
-struct flipc_cb_type_ctl {
-    flipc_cb_ptr start;                /* Where there array of this type of
-                                  data structure starts.  */
-    unsigned long number;              /* How many of them we've got.  */
-    flipc_cb_ptr free;         /* Where the beginning of the freelist
-                                  is.  */
-};
-
-/*
- * Synchronization with message engine.
- *
- * At certain times (specifically during structure allocation/free or
- * additions to the send list) you want to know that the messaging
- * engine has picked up your changes.  However, the message engine has
- * (effectively) two threads, one for each of the send and receive
- * sides.  The mechanisms used for synchronizations with the two sides
- * differ.  In an eventual co-processor implementation (with a single
- * thread), only the send side mechanism will be used.
- *
- * To request a cached state flush by the send side of the mesasging
- * engine, you flip the request_sync bit and it responds by flipping
- * the response_sync bit.  The send ME checks this bit once every trip
- * through the send endpoints.
- *
- * On the receive side, since receives take very little time and do
- * not block (unlike sends) when we want to make sure the ME is
- * holding no cached receive side state, we simply spin until we see
- * that the ME receive side is no longer operating.  It sets a
- * variable whenever it is in the process of receiving a message.
- */
-
-/*
- * Proper manipulation of the send endpoint list.
- *
- * Note that synchronizing with the message engine over access to the
- * send endpoint list is especially tricky.  There is no problem with
- * writing new values in all of the locations required to take a send
- * endpoint off of the list.  However, we must be very sure before
- * modifying the pointer *in* the send endpoint that the ME isn't
- * currently working in that send endpoint (else it could be sent off
- * into the void).  Two options here:
- *
- *     *) Synchronize (using the below variables) for each send
- *        endpoint removed, after the removal but before the
- *        modification of the data in the internal structure.
- *     *) If we can always be sure that the send endpoint link in the
- *        endpoint structure has a valid value, we can simply let the
- *        chips fall where they may.  It will be null while free, and
- *        have a value that points back into the send buffer list
- *        when reallocated.  I'm not going to do this; it's sleezy
- *        and will partially mess up fairness based on ME send
- *        endpoint round-robinning.
- */
-
-/*
- * This entire structure is protected by an kernel level lock so there
- * is no conflict between threads accessing it.  See flipc_kfr.c for
- * details on this lock; how it is implemented and used depends on what
- * kernel base we are on.
- */
-
-/*
- * Note that the last element of this structure is variable sized, so this
- * structure itself is also variable sized.
- */
-typedef struct flipc_comm_buffer_ctl {
-    /* Kernel flipc configuration that the user must match in order to
-       work with this kernel.  Checked as soon as the comm buffer is
-       mapped.  */
-    struct {
-       unsigned int real_time_primitives:1;
-       unsigned int message_engine_in_kernel:1;
-       unsigned int no_bus_locking:1; /* One way check -- if the kernel doesn't
-                                  have this and the user does, that's
-                                  an error.  */
-    } kernel_configuration;
-    volatile unsigned long     send_ready;     /* A send(s) is ready to go */
-
-    /* These first three structures are constant after communications buffer
-       initialization.  */
-    unsigned long data_buffer_size; /* Size of the data buffers.  */
-    unsigned long local_node_address; /* Local node number.  */
-    FLIPC_address_t null_destination; /* Local null destination value.  */
-
-#if REAL_TIME_PRIMITIVES
-    /* The scheduling policy used by the task initializing flipc for
-       the allocations lock.  */
-    int allocations_lock_policy;
-#else
-    /* A poor substitute for a kernel level allocations lock.
-       Note that this *cannot* be used as a regular simple lock;
-       instead, try to acquire it, call sleep(1), try again, etc.
-       Spinning on this lock will probably waste lots of cycles.  */
-    flipc_simple_lock pail_alloc_lock;
-#endif
-
-    /* All of the members of these structures except for the free pointer
-       are constant after initialization.  The free pointer is ail owned
-       and private.  */
-    struct flipc_cb_type_ctl endpoint;
-    struct flipc_cb_type_ctl epgroup;
-    struct flipc_cb_type_ctl bufferlist;
-    struct flipc_cb_type_ctl data_buffer;
-
-    /* Global synchronization with the message engine.  On the KKT
-       implementation we need one synchronizer for each thread.  */
-
-    /* Send side: */
-    volatile unsigned long sail_request_sync; /* request_sync = !request_sync when the
-                                         AIL wants to synchronize with the
-                                         CB.  */
-    volatile unsigned long sme_respond_sync; /* respond_sync = !respond_sync when
-                                        the ME has noticed the sync
-                                        request.  By responding to the
-                                        sync, the ME is stating that it has
-                                        no communications buffer state that
-                                        was cached previous to it noticing
-                                        the sync.    */
-
-    /* Receive side.  */
-    volatile unsigned long sme_receive_in_progress; /* Set by the ME before it looks at
-                                               any data structures; cleared
-                                               afterwards.  A simple spin in
-                                               the user space on this
-                                               variable will suffice, as the
-                                               time that the message
-                                               engine could be receiving
-                                               is low.  */
-
-    /* Send endpoint list starts here.  */
-    volatile flipc_cb_ptr sail_send_endpoint_list; /* Null if no send endpoints.
-                                                   */
-
-    /* Keep track of whatever performance information we choose.  */
-    struct FLIPC_domain_performance_info performance;
-
-    /* Keep track of various kinds of error information here.  */
-    struct FLIPC_domain_errors sme_error_log;
-
-} *flipc_comm_buffer_ctl_t;
-
-
-/*
- * The communications buffer.
- *
- * The only restriction on the layout of the communications buffer is
- * that the buffers themselves may not cross page boundaries.  So we
- * will place the data buffers at the end of the communications
- * buffer, and the other objects at the beginning, and there may be a
- * little bit of extra space in the middle.
- *
- * Note that this layout may change in future versions of FLIPC.
- *
- *     +---------------------------+
- *     |    flipc_comm_buffer_ctl  |
- *     +---------------------------+
- *     |                           |
- *     |         Endpoints         |
- *     |                           |
- *     +---------------------------+
- *     |                           |
- *     |      Endpoint Groups      |
- *     |                           |
- *     +---------------------------+
- *     |                           |
- *     | Combined Buffer Lists     |
- *     |                           |
- *     +---------------------------+
- *     |                           |
- *     | (Possible empty space)    |
- *     |                           |
- *     +---------------------------+
- *     |                           |
- *     |    Data Buffers           |
- *     |                           |
- *     +---------------------------+
- */
-
-/* The number of pages that the kernel will reserve for the comm
-   buffer.  The AIL needs to know this to know how much to map.  */
-#define COMM_BUFFER_SIZE 0x100000
-
-/*
- * These variables are set, in a per-address space context, to the base
- * and length of the communications buffer.  The ME needs to do bounds
- * checking to make sure it isn't overrunning anything.  Note that the
- * existence of these variables implies that an application will only
- * open a single domain.
- *
- * These declarations are duplicated in flipc/flipc_usermsg.h, and
- * should be kept in sync with that file.
- */
-unsigned char *flipc_cb_base;
-unsigned long flipc_cb_length;         /* In bytes.  */
-
-/*
- * Following is a set of macros to convert back and forth between
- * real address pointers and flipc_cb_ptr's for each data type.  They
- * rely on the flipc_cb_base being set correctly.
- *
- * A possible future improvement might be to have bounds checking occur
- * inside these macros, but I'm not sure what I'd do if it failed.
- */
-
-/* Easy going one way.  */
-#define FLIPC_CBPTR(ptr) \
-(((unsigned char *) (ptr)) - flipc_cb_base)
-
-/* Need to get the right types going the other way.  */
-#define FLIPC_ENDPOINT_PTR(cb_ptr) \
-((flipc_endpoint_t) ((cb_ptr) + flipc_cb_base))
-#define FLIPC_EPGROUP_PTR(cb_ptr) \
-((flipc_epgroup_t) ((cb_ptr) + flipc_cb_base))
-#define FLIPC_DATA_BUFFER_PTR(cb_ptr) \
-((flipc_data_buffer_t) ((cb_ptr) + flipc_cb_base))
-#define FLIPC_BUFFERLIST_PTR(cb_ptr) \
-((flipc_cb_ptr *) ((cb_ptr) + flipc_cb_base))
-
-
-/*
- * Flipc addresses.
- *
- * The addresses used by flipc for communication are defined in the
- * user visible header file as unsigned longs.  These macros pull that
- * information apart for use of the FLIPC internal routines.
- *
- * I assume in the following that endpoints immediately follow the
- * comm buffer control structure, because that makes indexing into
- * them much easier.
- */
-
-#define FLIPC_CREATE_ADDRESS(node, endpoint_idx) \
-((node << 16) | (endpoint_idx))
-#define FLIPC_ADDRESS_NODE(addr) (((unsigned long) (addr)) >> 16)
-#define FLIPC_ADDRESS_ENDPOINT(addr) (((unsigned long) (addr)) & 0xffff)
-
-#endif /* _MACH_FLIPC_CB_H_ */
diff --git a/osfmk/mach/flipc_debug.h b/osfmk/mach/flipc_debug.h
deleted file mode 100644 (file)
index 969deda..0000000
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-/*
- * Really a C file, but I'd like to have this code available in both
- * the kernel and the application, so I'll put it in a .h file.  This
- * file needs to be included only once in the AIL or ME, into a .c file
- * where it will be compiled.
- */
-
-/* 
- * Since these are debug functions, it doesn't matter which processor macro
- * version I use; I don't mind spoiling cache while I'm debugging.
- */
-
-#include <mach/flipc_cb.h>
-/*
- * Print (using printf) all buffers in the communications buffer that
- * are not on any endpoint or on the buffer freelist.  Only active
- * endpoints are checked.
- *
- * Note that no locking is done; this function implicitly assumes the
- * communications buffer is in a quiescent state.  It is expected that
- * this function will normally be called from a debugger.
- *
- * As long as it is at it, this function prints buffers that are
- * doubly owned (valid pointers to them from two places).
- */
-
-/*
- * Given that these functions will normally be called from the debugger,
- * there isn't any need to globally visible prototypes for them.  To
- * eliminate compilation warnings, we include prototypes for the functions
- * here in the file.
- */ 
-static void flipcdbg_update_bufferset_bitvec(flipc_comm_buffer_ctl_t,
-                                            flipc_data_buffer_t);
-void flipcdbg_print_unowned_buffers(void);
-void flipcdbg_buffer_find_refs(flipc_cb_ptr buffer_cbptr);
-
-#ifdef __GNUC__
-__inline__
-#endif
-static void 
-flipcdbg_update_bufferset_bitvec(flipc_comm_buffer_ctl_t cb_ctl,
-                                flipc_data_buffer_t buffer)
-{
-    unsigned char *buffer_base = flipc_cb_base + cb_ctl->data_buffer.start;
-    int bitpos = ((((unsigned char *) buffer) - buffer_base)
-                 / cb_ctl->data_buffer_size);
-    int element = bitpos / (sizeof(unsigned long) * 8);
-    int subbitpos = bitpos - element * sizeof(unsigned long) * 8;
-
-    /* Is that position set already?  */
-    if (flipc_debug_buffer_bitvec[element] & (1 << subbitpos))
-       printf("Buffer 0x%x (idx: %d, cbptr: 0x%x) is multiply referenced.\n",
-              buffer, bitpos, FLIPC_CBPTR(buffer));
-
-    /* Set it.  */
-    flipc_debug_buffer_bitvec[element] |= (1 << subbitpos);
-}
-
-void
-flipcdbg_print_unowned_buffers(void)
-{
-    flipc_comm_buffer_ctl_t cb_ctl =
-       (flipc_comm_buffer_ctl_t) flipc_cb_base;
-    int i;
-    unsigned long bitvec_length = ((cb_ctl->data_buffer.number + sizeof(unsigned long) * 8)
-                           / (sizeof(unsigned int) * 8));
-    flipc_data_buffer_t current_buffer;
-    flipc_endpoint_t current_endpoint;
-    flipc_cb_ptr current_cbptr;
-    int header_printed = 0;
-
-    /* Clean out the bitvec.  */
-    for (i = 0; i < bitvec_length; i++)
-       flipc_debug_buffer_bitvec[i] = 0;
-
-    /* Go through the freelist, setting bits for each buffer.  */
-    for (current_cbptr = cb_ctl->data_buffer.free;
-        current_cbptr != FLIPC_CBPTR_NULL;
-        current_cbptr = current_buffer->u.free) {
-       int bitpos;
-       int element, subbitpos;
-
-       current_buffer = FLIPC_DATA_BUFFER_PTR(current_cbptr);
-       flipcdbg_update_bufferset_bitvec(cb_ctl, current_buffer);
-    }
-    
-    /* Go through all the endpoints, again setting bits for each buffer.  */
-    for (current_endpoint = FLIPC_ENDPOINT_PTR(cb_ctl->endpoint.start);
-        (current_endpoint
-         < (FLIPC_ENDPOINT_PTR(cb_ctl->endpoint.start)
-            + cb_ctl->endpoint.number));
-        current_endpoint++) {
-       if (EXTRACT_ENABLED(current_endpoint->saildm_dpb_or_enabled)) {
-           flipc_cb_ptr current_ptr =
-               (EXTRACT_DPB(current_endpoint->saildm_dpb_or_enabled)
-                ? current_endpoint->sme_process_ptr 
-                : current_endpoint->shrd_acquire_ptr);
-           flipc_cb_ptr limit_ptr = current_endpoint->sail_release_ptr;
-
-           while (current_ptr != limit_ptr) {
-               flipc_cb_ptr current_buffer_cbptr =
-                   *FLIPC_BUFFERLIST_PTR(current_ptr);
-               flipc_data_buffer_t current_buffer =
-                   FLIPC_DATA_BUFFER_PTR(current_buffer_cbptr);
-
-               /* Mark this as set.  */
-               flipcdbg_update_bufferset_bitvec(cb_ctl, current_buffer);
-
-               /* Increment the current pointer.  */
-               current_ptr = NEXT_BUFFERLIST_PTR_ME(current_ptr,
-                                                    current_endpoint);
-           }
-       }
-    }
-
-    /* Ok, we should have marked every buffer that has a reference.
-       Print out all the ones that done have references.  */
-    for (i = 0; i < bitvec_length; i++) {
-       int this_limit =
-           ((i == bitvec_length - 1)
-            ? cb_ctl->data_buffer.number % (sizeof(unsigned long)*8)
-            : sizeof(unsigned long)*8); 
-       if (flipc_debug_buffer_bitvec[i] != (unsigned long) -1) {
-           int j;
-           for (j = 0; j < this_limit; j++) {
-               if (!(flipc_debug_buffer_bitvec[i] & (1 << j))) {
-                   int buffer_bitpos = i * sizeof(unsigned long) * 8 + j;
-                   flipc_cb_ptr buffer_cbptr =
-                       (buffer_bitpos * cb_ctl->data_buffer_size
-                        + cb_ctl->data_buffer.start);
-                   flipc_data_buffer_t buffer_ptr =
-                       FLIPC_DATA_BUFFER_PTR(buffer_cbptr);
-                   
-                   /* Print header if necessary.  */
-                   if (!header_printed) {
-                       header_printed = 1;
-                       printf("Unreferenced buffers (ptr,idx,cbptr):");
-                   }
-
-                   /* Print buffer.  */
-                   printf(" (0x%x,%d,0x%x)", buffer_ptr, buffer_bitpos,
-                          buffer_cbptr);
-               }
-           }
-       }
-    }
-    if (header_printed)
-       printf("\n");
-}
-
-void
-flipcdbg_buffer_find_refs(flipc_cb_ptr buffer_cbptr)
-{
-    flipc_comm_buffer_ctl_t cb_ctl =
-       (flipc_comm_buffer_ctl_t) flipc_cb_base;
-    int found_on_freelist = 0;
-    int found_on_endpoints = 0;
-    int i;
-    flipc_endpoint_t current_endpoint;
-
-    flipc_cb_ptr current_cbptr;
-    flipc_data_buffer_t current_buffer;
-
-    /* Go through the freelist, looking for buffer.  */
-    for (i = 0, current_cbptr = cb_ctl->data_buffer.free;
-        current_cbptr != FLIPC_CBPTR_NULL;
-        i++, current_cbptr = current_buffer->u.free) {
-       if (current_cbptr == buffer_cbptr) {
-           printf("Buffer found on freelist in position %d\n", i);
-           found_on_freelist = 1;
-       }
-       current_buffer = FLIPC_DATA_BUFFER_PTR(current_cbptr);
-       if (i > cb_ctl->data_buffer.number) {
-           printf ("**Some form of corruption following freelist.**");
-           return;
-       }
-    }
-    if (found_on_freelist)
-       printf("(Total buffers on freelist: %d/%d)\n", i,
-              cb_ctl->data_buffer.number);
-    
-    /* Go through all the endpoints, again looking for the buffer.  */
-    for (current_endpoint = FLIPC_ENDPOINT_PTR(cb_ctl->endpoint.start);
-        (current_endpoint
-         < (FLIPC_ENDPOINT_PTR(cb_ctl->endpoint.start)
-            + cb_ctl->endpoint.number));
-        current_endpoint++) {
-       if (EXTRACT_ENABLED(current_endpoint->saildm_dpb_or_enabled)) {
-           flipc_cb_ptr current_ptr =
-               (EXTRACT_DPB(current_endpoint->saildm_dpb_or_enabled)
-                ? current_endpoint->sme_process_ptr 
-                : current_endpoint->shrd_acquire_ptr);
-           flipc_cb_ptr limit_ptr = current_endpoint->sail_release_ptr;
-
-           while (current_ptr != limit_ptr) {
-               current_cbptr = *FLIPC_BUFFERLIST_PTR(current_ptr);
-
-               if (current_cbptr == buffer_cbptr) {
-                   printf("Buffer found on endpoint 0x%x (idx: %d)\n",
-                          current_endpoint,
-                          (current_endpoint
-                           - FLIPC_ENDPOINT_PTR(cb_ctl->endpoint.start)));
-                   found_on_endpoints = 1;
-               }
-
-               /* Increment the current pointer.  */
-               current_ptr = NEXT_BUFFERLIST_PTR_ME(current_ptr,
-                                                    current_endpoint);
-           }
-       }
-    }
-}
-
-
-
diff --git a/osfmk/mach/flipc_device.h b/osfmk/mach/flipc_device.h
deleted file mode 100644 (file)
index e76e520..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:29  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:45  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.5.1  1995/06/13  18:20:16  sjs
- *     Merged from flipc_shared.
- *     [95/06/07            sjs]
- *
- * Revision 1.1.3.4  1995/04/05  21:21:58  randys
- *     Added allocations_lock_policy argument to usermsg_Init_Buffer set
- *     status call.
- *     [95/04/05            randys]
- * 
- * Revision 1.1.3.3  1995/02/21  17:23:08  randys
- *     Re-indented code to four space indentation
- *     [1995/02/21  16:25:35  randys]
- * 
- * Revision 1.1.3.2  1994/12/20  19:02:03  randys
- *     Added filename in comment at top of each file
- *     [1994/12/19  20:28:25  randys]
- * 
- * Revision 1.1.3.1  1994/12/12  17:46:17  randys
- *     Putting initial flipc implementation under flipc_shared
- *     [1994/12/12  16:27:48  randys]
- * 
- * Revision 1.1.1.2  1994/12/11  23:11:21  randys
- *     Initial flipc code checkin
- * 
- * $EndLog$
- */
-
-/*
- * mach/flipc_device.h
- *
- * Declarations related to the device driver interface to FLIPC.
- */
-
-#ifndef _MACH_FLIPC_DEVICE_H_
-#define _MACH_FLIPC_DEVICE_H_
-
-/*
- * Definitions of constants both the ME and AIL need to know for
- * communications through the device driver interface.  These are the
- * possible values for the top 16 bits of the flavor parameter; the
- * bottom 16 bits are extra information that may be needed (eg. to
- * parameterize a request for semaphore in the get status routine).
- */
-typedef enum {                 /* Arguments.  */
-    /* Get status flavors.  */ 
-    usermsg_Get_Initialized_Status = 1, /* (int *init_p) */
-    usermsg_Get_Epgroup_Semaphore, /* (mach_port_t *semaphore) */
-    usermsg_Return_Allocations_Lock, /* (void) */
-
-    /* Set status flavors.  */
-    usermsg_Init_Buffer,               /* (int max_endpoints,
-                                          int max_epgroups,
-                                          int max_buffers,
-                                          int max_buffers_per_endpoint,
-                                          int allocations_lock_policy) */
-    usermsg_Process_Work,              /* (void) */
-    usermsg_Acquire_Allocations_Lock, /* (void) */
-    usermsg_Release_Allocations_Lock, /* (void) */
-    usermsg_Epgroup_Associate_Semaphore /* (int epgroup_idx, mach_port_t port) */
-} usermsg_devop_t;
-
-#define FLIPC_DEVICE_FLAVOR(devop, param)  (((devop)<<16)|(param))
-
-#endif /* _MACH_FLIPC_DEVICE_H_ */
diff --git a/osfmk/mach/flipc_locks.h b/osfmk/mach/flipc_locks.h
deleted file mode 100644 (file)
index 94d9a6e..0000000
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:30  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:45  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.4.1  1995/06/13  18:20:29  sjs
- *     Merged from flipc_shared.
- *     [95/06/07            sjs]
- *
- * Revision 1.1.2.3  1995/03/09  19:42:30  rwd
- *     Move yield function out of macro and prototype.
- *     [1995/03/09  19:36:25  rwd]
- * 
- * Revision 1.1.2.2  1995/02/21  17:23:11  randys
- *     Re-indented code to four space indentation
- *     [1995/02/21  16:25:39  randys]
- * 
- * Revision 1.1.2.1  1994/12/20  19:02:06  randys
- *     Moved definition of flipc_simple_lock to flipc_cb.h
- *     [1994/12/20  17:34:44  randys]
- * 
- *     Separated the lock macros out into machine dependent and independent files;
- *     this is the machine independent file.
- *     [1994/12/20  16:43:38  randys]
- * 
- * $EndLog$
- */
-
-/*
- * mach/flipc_locks.h
- *
- * The machine independent part of the flipc_simple_locks definitions.
- * Most of the locks definitions is in flipc_dep.h, but what isn't
- * dependent on the platform being used is here.
- */
-
-/*
- * Note that the locks defined in this file and in flipc_dep.h are only
- * for use by user level code.  The reason why this file is visible to
- * the kernel is that the kernel section of flipc needs to initialize
- * these locks.
- */
-
-#ifndef _MACH_FLIPC_LOCKS_H_
-#define _MACH_FLIPC_LOCKS_H_
-
-/* Get the simple lock type.  */
-#include <mach/flipc_cb.h>
-
-/*
- * Lock function prototypes.  This needs to be before any lock definitions
- * that happen to be macros.
- */
-
-/* Initializes lock.  Always a macro (so that kernel code can use it without
-   library assistance).  */
-void flipc_simple_lock_init(flipc_simple_lock *lock);
-
-/* Returns 1 if lock gained, 0 otherwise.  */
-int flipc_simple_lock_try(flipc_simple_lock *lock);
-
-/* Returns 1 if lock is locked, 0 otherwise.  */
-int flipc_simple_lock_locked(flipc_simple_lock *lock);
-
-/* Releases the lock.  */
-void flipc_simple_lock_release(flipc_simple_lock *lock);
-
-/* Take the lock.  */
-void flipc_simple_lock_acquire(flipc_simple_lock *lock);
-
-/* Take two locks.  Does not hold one while spinning on the
-   other.  */
-void flipc_simple_lock_acquire_2(flipc_simple_lock *lock1,
-                          flipc_simple_lock *lock2);
-
-/* Get the machine dependent stuff.  The things that need to be
- * defined in a machine dependent fashion are:
- *
- *   flipc_simple_lock_init    (must be a macro)
- *   flipc_simple_lock_try
- *   flipc_simple_lock_locked
- *   flipc_simple_lock_release
- *
- * These last three don't necessarily have to be macros, but if they
- * aren't definitions must be included in the machine dependent
- * part of the user level library code.
- */
-#include <mach/machine/flipc_dep.h>
-
-/*
- * Set at flipc initialization time to thread_yield argument to
- * FLIPC_domain_init
- */
-
-extern void (*flipc_simple_lock_yield_fn)(void);
-
-/*
- * Machine independent definitions; defined in terms of above routines.
- */
-
-/* Take the lock.  Assumes an external define saying how long to
-   spin, and an external function to call when we've spun too long.  */
-#define flipc_simple_lock_acquire(lock)                \
-do {                                           \
-  int __spin_count = 0;                                \
-                                               \
-  while (flipc_simple_lock_locked(lock)                \
-        || !flipc_simple_lock_try(lock))       \
-    if (++__spin_count > LOCK_SPIN_LIMIT) {    \
-      (*flipc_simple_lock_yield_fn)();         \
-      __spin_count = 0;                                \
-    }                                          \
-} while (0)
-
-/* Take two locks.  Hold neither while spinning on the other.  */
-#define flipc_simple_lock_acquire_2(lock1, lock2)      \
-do {                                                   \
-  int __spin_count = 0;                                        \
-                                                       \
-  while (1) {                                          \
-    while (flipc_simple_lock_locked(lock1)             \
-          || !flipc_simple_lock_try(lock1))            \
-      if (++__spin_count > LOCK_SPIN_LIMIT) {          \
-       (*flipc_simple_lock_yield_fn)();                \
-       __spin_count = 0;                               \
-      }                                                        \
-                                                       \
-    if (flipc_simple_lock_try(lock2))                  \
-      break;                                           \
-    flipc_simple_lock_release(lock1);                  \
-                                                       \
-    while (flipc_simple_lock_locked(lock2)             \
-          || !flipc_simple_lock_try(lock2))            \
-      if (++__spin_count > LOCK_SPIN_LIMIT) {          \
-       (*flipc_simple_lock_yield_fn)();                \
-       __spin_count = 0;                               \
-      }                                                        \
-                                                       \
-    if (flipc_simple_lock_try(lock1))                  \
-      break;                                           \
-    flipc_simple_lock_release(lock2);                  \
-  }                                                    \
-} while (0)
-
-#endif /* _MACH_FLIPC_LOCKS_H_ */
diff --git a/osfmk/mach/flipc_types.h b/osfmk/mach/flipc_types.h
deleted file mode 100644 (file)
index 1f7f978..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:30  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:45  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.7.1  1996/09/17  16:34:35  bruel
- *     fixed types.
- *     [96/09/17            bruel]
- *
- * Revision 1.1.5.1  1995/06/13  18:20:20  sjs
- *     Merged from flipc_shared.
- *     [95/06/07            sjs]
- * 
- * Revision 1.1.3.11  1995/05/23  19:55:36  randys
- *     Don't keep track of messages sent to a bad destination--that's
- *     purely a transport function now.
- *     [95/05/23            randys]
- * 
- * Revision 1.1.3.10  1995/05/23  15:40:20  randys
- *     Added field to FLIPC_domain_errors to indicate validity of other
- *     fields.
- *     [95/05/22            randys]
- * 
- * Revision 1.1.3.9  1995/05/16  20:46:35  randys
- *     Added a "performance_valid" field to the flipc performance
- *     structure.
- *     [95/05/16            randys]
- * 
- * Revision 1.1.3.8  1995/04/05  21:22:01  randys
- *     Added field to domain_info struct to include allocations lock
- *     sched policy.
- *     [95/04/05            randys]
- * 
- * Revision 1.1.3.7  1995/03/09  19:42:33  rwd
- *     Define SEMAPHORE_NULL (for now) and include mach_types.h instead
- *     of sema_types.h.
- *     [95/03/08            rwd]
- * 
- * Revision 1.1.3.6  1995/02/23  21:32:52  randys
- *      Removed placeholder definition for locks--I don't believe that I
- *      use locks unless I'm on top of a real time base, in which case
- *      that base will define the type.
- *     [95/02/22            randys]
- * 
- * Revision 1.1.3.5  1995/02/21  17:23:13  randys
- *     Re-indented code to four space indentation
- *     [1995/02/21  16:25:36  randys]
- * 
- * Revision 1.1.3.4  1995/02/16  23:20:14  randys
- *     ANSIfy FLIPC_thread_yield_function.
- *     [95/02/14            randys]
- * 
- *     Add FLIPC_thread_yield_function type.
- *     [95/02/14            randys]
- * 
- * Revision 1.1.3.3  1995/01/26  21:01:51  randys
- *     Added performance structure.
- *     [1995/01/24  21:14:12  randys]
- * 
- *     Added FLIPC_epgroup_info struct
- *     [1995/01/24  18:30:02  randys]
- * 
- *     Create a new structure (FLIPC_endpoint_info) to return
- *     information about an endpoint.
- *     [1995/01/20  19:26:35  randys]
- * 
- *     Get rid of FLIPC_DESTINATION_NULL and add in
- *     FLIPC_ADDRESS_ERROR (return code from FLIPC_buffer_destination)
- *     [1995/01/19  20:23:24  randys]
- * 
- *     Added domain index type for specifying domain in
- *     init and attach calls
- *     [1995/01/18  16:47:25  randys]
- * 
- * Revision 1.1.3.2  1994/12/20  19:02:09  randys
- *     Added error reporting structure type, and added
- *     room in the standard domain query for error log size.
- *     [1994/12/19  23:46:09  randys]
- * 
- *     Added filename in comment at top of each file
- *     [1994/12/19  20:28:26  randys]
- * 
- *     Support and doc for minor user interface changes for error conditions
- *     [1994/12/18  23:24:30  randys]
- * 
- *     Yank the semaphore type definitions back out, and include the file
- *     that defines those types.
- *     [1994/12/13  17:50:03  randys]
- * 
- * Revision 1.1.3.1  1994/12/12  17:46:20  randys
- *     Put definitions of semaphore_t and SEMAPHORE_NULL back in; they aren't
- *     defined in user space yet.
- *     [1994/12/12  17:21:56  randys]
- * 
- * Revision 1.1.1.2  1994/12/11  23:11:23  randys
- *     Initial flipc code checkin
- * 
- * $EndLog$
- */
-
-/*
- * mach/flipc_types.h
- *
- * Definitions of those flipc types that need to be visible to both the AIL
- * and kernel sides of flipc (which is just about everything).
- */
-
-#ifndef _MACH_FLIPC_TYPES_H_
-#define _MACH_FLIPC_TYPES_H_
-
-#include <mach/port.h>
-
-/*
- * Define a couple of generally useful types.
- */
-#include <mach/mach_types.h>
-
-#ifndef MACH_KERNEL
-#define SEMAPHORE_NULL (semaphore_port_t)0
-#endif /* !defined(MACH_KERNEL) */
-
-/*
- * Basic flipc types; visible to both user and kernel segments of the
- * flipc implementation.
- */
-/* Flipc addresses.  These name a node-endpoint combination for
-   sending.  */
-typedef unsigned int FLIPC_address_t;
-#define FLIPC_ADDRESS_ERROR ((FLIPC_address_t) -1)
-
-/* Flipc endpoints.  */
-typedef void *FLIPC_endpoint_t;
-#define FLIPC_ENDPOINT_NULL ((FLIPC_endpoint_t) 0)
-
-/* Buffer pointers (returned by query functions).  Users are allowed to
-   copy directly to/from this pointer; it points at their data.  */
-typedef void *FLIPC_buffer_t;   
-#define FLIPC_BUFFER_NULL ((FLIPC_buffer_t) 0)   
-
-/* Endpoint group identifiers.  */
-typedef void *FLIPC_epgroup_t;
-#define FLIPC_EPGROUP_NULL ((FLIPC_epgroup_t) 0)
-#define FLIPC_EPGROUP_ERROR ((FLIPC_epgroup_t) -1)
-
-/* Domain index; argument to initialization and attach routines.  */
-typedef unsigned int FLIPC_domain_index_t;
-
-/* Domain handle (mach port).  */
-typedef mach_port_t FLIPC_domain_t;
-
-/* The different types an endpoint can be.  FLIPC_Inactive is used when
-   the endpoint has not been configured and hence is on the freelist.  */
-typedef enum {
-    FLIPC_Inactive = -1,
-    FLIPC_Send,
-    FLIPC_Receive
-} FLIPC_endpoint_type_t;
-
-/* Structure for returning performance information about the flipc
-   domain; a placeholder for future entries as needed.
-   This information will only be valid if the kernel is configured to
-   keep flipc performance information.  */
-typedef struct FLIPC_domain_performance_info {
-    unsigned long performance_valid;   /* Non zero if the other information
-                                  in this structure is valid.  */
-    unsigned long messages_sent;               /* Since last init.  */
-    unsigned long messages_received;   /* Since last init.  Includes overruns
-                                  (because they are marked in the
-                                  endpoint data structure).  Doesn't
-                                  include other drops (they are
-                                  marked in other places) */
-} *FLIPC_domain_performance_info_t;
-
-/* Flipc yield function.  */
-typedef void (*FLIPC_thread_yield_function)(void);
-
-/* Structure for returning information about the flipc domain.  */
-typedef struct FLIPC_domain_info {
-    int max_endpoints;
-    int max_epgroups;
-    int max_buffers;
-    int max_buffers_per_endpoint;
-    int msg_buffer_size;
-    FLIPC_thread_yield_function yield_fn;
-    int policy;                        /* Allocations lock sched policy.
-                                  Unused if REAL_TIME_PRIMITIVES are
-                                  not being used.  */
-    struct FLIPC_domain_performance_info performance;
-    int error_log_size;                /* In bytes.  */
-} *FLIPC_domain_info_t;
-
-/* Structure for returning information about the error state of
-   the flipc domain.  Note that this is variable sized; the size
-   of the transport specific information is not known at compile
-   time.  */
-typedef struct FLIPC_domain_errors {
-    int error_full_config_p;           /* 1 if disabled and badtype below are
-                                          valid; 0 if only msgdrop_inactive
-                                          is valid.  */ 
-    int msgdrop_inactive;              /* Messages dropped because
-                                          of the domain being locally
-                                          inactive.  */
-    int msgdrop_disabled;              /* Messages dropped because of a
-                                          disabled endpoint.  */
-    int msgdrop_badtype;               /* Messages dropped because they
-                                          were sent to a send endpoint.  */
-
-    int transport_error_size;  /* Size of the following array of
-                                  ints, in bytes.  */
-    int transport_error_info[1];       /* Really of transport_error_size.  */
-} *FLIPC_domain_errors_t;
-
-/* Structure for returning information about endpoints.  */
-typedef struct FLIPC_endpoint_info {
-    FLIPC_endpoint_type_t type;
-    unsigned int processed_buffers_dropped_p;
-    unsigned long number_buffers;
-    FLIPC_epgroup_t epgroup;
-} *FLIPC_endpoint_info_t;
-
-typedef struct FLIPC_epgroup_info {
-    unsigned long msgs_per_wakeup;
-} *FLIPC_epgroup_info_t;
-
-#endif /* _MACH_FLIPC_TYPES_H_ */
index 8f67ac1a1aeb152fd24682b4600a2beca2fe89d6..428a9e18b2c16c5453a2cfc6d1c42a4aa747ef30 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  */
-/* 
+/*
  * Mach Operating System
  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  * All Rights Reserved.
- * 
+ *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
- * 
+ *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
+ *
  * Carnegie Mellon requests users of this software to return to
- * 
+ *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
- * 
+ *
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  */
-/*
- */
 
 /*
  *     File:   mach/host_info.h
@@ -100,6 +98,7 @@ typedef      integer_t       host_flavor_t;
 #define HOST_SEMAPHORE_TRAPS   7       /* Has semaphore traps */
 #define HOST_MACH_MSG_TRAP     8       /* Has mach_msg_trap */
 #define HOST_VM_PURGABLE       9       /* purg'e'able memory info */
+#define HOST_DEBUG_INFO_INTERNAL 10    /* Used for kernel internal development tests only */
 
 #ifdef MACH_KERNEL_PRIVATE
 struct host_basic_info_old {
@@ -114,7 +113,7 @@ typedef     struct host_basic_info_old      host_basic_info_data_old_t;
 typedef struct host_basic_info_old     *host_basic_info_old_t;
 #define HOST_BASIC_INFO_OLD_COUNT ((mach_msg_type_number_t) \
                (sizeof(host_basic_info_data_old_t)/sizeof(integer_t)))
-#endif
+#endif /* MACH_KERNEL_PRIVATE */
 
 #pragma pack(4)
 
@@ -236,7 +235,7 @@ typedef struct vm_purgeable_info    *host_purgable_info_t;
 #define        HOST_VM_INFO_REV1_COUNT /* added "speculative_count" (1 int) */ \
        ((mach_msg_type_number_t) \
         (HOST_VM_INFO_REV2_COUNT - 1))
-#define        HOST_VM_INFO_REV0_COUNT /* added "purgable" info (2 ints) */    \
+#define        HOST_VM_INFO_REV0_COUNT /* added "purgable" info (2 ints) */    \
        ((mach_msg_type_number_t) \
         (HOST_VM_INFO_REV1_COUNT - 2))
 
@@ -266,13 +265,26 @@ struct _processor_statistics_np  {
        uint32_t                ps_interrupt_count;
        uint32_t                ps_ipi_count;
        uint32_t                ps_timer_pop_count;
-       
+
        uint64_t                ps_runq_count_sum __attribute((aligned(8)));
 
        uint32_t                ps_idle_transitions;
        uint32_t                ps_quantum_timer_expirations;
 };
 
+struct host_debug_info_internal {
+       uint64_t config_bank:1,   /* built configurations */
+                config_atm:1,
+                config_csr:1,
+                config_coalitions:1,
+                config_unused:60;
+};
+
+typedef struct host_debug_info_internal *host_debug_info_internal_t;
+typedef struct host_debug_info_internal  host_debug_info_internal_data_t;
+#define HOST_DEBUG_INFO_INTERNAL_COUNT  ((mach_msg_type_number_t)\
+       (sizeof (host_debug_info_internal_data_t) / sizeof(integer_t)))
+
 #endif /* PRIVATE */
 
 #ifdef KERNEL_PRIVATE
@@ -280,8 +292,8 @@ struct _processor_statistics_np  {
 extern kern_return_t   set_sched_stats_active(
                                        boolean_t active);
 
-extern kern_return_t   get_sched_statistics( 
-                                       struct _processor_statistics_np *out, 
+extern kern_return_t   get_sched_statistics(
+                                       struct _processor_statistics_np *out,
                                        uint32_t *count);
 #endif  /* KERNEL_PRIVATE */
 
index 1fa0dff88c9a3b070b183746d6756d0beca33a41..84461ae8b89b59546806c1032d76217961197685 100644 (file)
 #define HOST_AMFID_PORT                        (11 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_GSSD_PORT                 (12 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_TELEMETRY_PORT            (13 + HOST_MAX_SPECIAL_KERNEL_PORT)
-#define HOST_ATM_NOTIFICATION_PORT             (14 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_ATM_NOTIFICATION_PORT     (14 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_COALITION_PORT            (15 + HOST_MAX_SPECIAL_KERNEL_PORT)
-#define HOST_MAX_SPECIAL_PORT           (16 + HOST_MAX_SPECIAL_KERNEL_PORT)
-                                        /* room to grow here as well */
+#define HOST_SYSDIAGNOSE_PORT           (16 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_XPC_EXCEPTION_PORT                (17 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_CONTAINERD_PORT           (18 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_MAX_SPECIAL_PORT          HOST_CONTAINERD_PORT
+                                        /* See rdar://19421223 */
 
 /*
  * Special node identifier to always represent the local node.
 #define host_set_coalition_port(host, port)    \
        (host_set_special_port((host), HOST_COALITION_PORT, (port)))
 
+#define host_get_sysdiagnose_port(host, port)  \
+       (host_get_special_port((host),                  \
+       HOST_LOCAL_NODE, HOST_SYSDIAGNOSE_PORT, (port)))
+#define host_set_sysdiagnose_port(host, port)  \
+       (host_set_special_port((host), HOST_SYSDIAGNOSE_PORT, (port)))
+
+#define host_get_container_port(host, port)    \
+       (host_get_special_port((host),                  \
+       HOST_LOCAL_NODE, HOST_CONTAINERD_PORT, (port)))
+#define host_set_container_port(host, port)    \
+       (host_set_special_port((host), HOST_CONTAINERD_PORT, (port)))
+
 #endif /* _MACH_HOST_SPECIAL_PORTS_H_ */
index d7edf6ca3334f508024c3e95be75327be1299214..26a04f65020d9d06ac67f5a62ebc3693180a2cc2 100644 (file)
@@ -15,8 +15,13 @@ DATAFILES = \
        vm_types.h rpc.h \
        _structs.h sdt_isa.h
 
+PRIVATE_DATAFILES = \
+       syscall_sw.h
+
 INSTALL_MD_LIST = ${DATAFILES}
 
+INSTALL_MD_LCL_LIST = ${PRIVATE_DATAFILES}
+
 INSTALL_MD_GEN_LIST = \
        asm.h 
 
index db603d5886333d205ad7fc68332bc707113f3ccb..3bdf1fcddb27198607c95b13ace3af6f8ecdfc3a 100644 (file)
@@ -63,7 +63,7 @@
  * No machine dependent types for the 80386
  */
 
-#define        EXC_TYPES_COUNT 13      /* incl. illegal exception 0 */
+#define        EXC_TYPES_COUNT 14      /* incl. illegal exception 0 */
 
 /*
  *     Codes and subcodes for 80386 exceptions.
diff --git a/osfmk/mach/i386/flipc_dep.h b/osfmk/mach/i386/flipc_dep.h
deleted file mode 100644 (file)
index e04a83a..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- *
- */
-
-/*
- * mach/i386/flipc_dep.h
- *
- * This file will have all of the FLIPC implementation machine dependent
- * defines that need to be visible to both kernel and AIL (eg. bus locks
- * and bus synchronization primitives).
- */
-
-#ifndef _MACH_FLIPC_DEP_H_
-#define _MACH_FLIPC_DEP_H_
-
-/* For the 386, we don't need to wrap synchronization variable writes
-   at all.  */
-#define SYNCVAR_WRITE(statement)  statement
-
-/* And similarly (I believe; check on this), for the 386 there isn't any
-   requirement for write fences.  */
-#define WRITE_FENCE()
-
-/*
- * Flipc simple lock defines.  These are almost completely for the use
- * of the AIL; the reason they are in this file is that they need to
- * be initialized properly in the communications buffer initialization
- * routine.  Sigh.  Note in particular that the kernel has no defined
- * "simple_lock_yield_function", so it had better never expand the
- * macro simple_lock_acquire.
- *
- * These locks may be declared by "flipc_simple_lock lock;".  If they
- * are instead declared by FLIPC_DECL_SIMPLE_LOCK(class,lockname) they
- * may be used without initialization.
- */
-
-#define SIMPLE_LOCK_INITIALIZER 0
-#define FLIPC_DECL_SIMPLE_LOCK(class,lockname) \
-class flipc_simple_lock (lockname) = SIMPLE_LOCK_INITIALIZER
-
-/*
- * Lower case because they may be macros or functions.
- * I'll include the function prototypes just for examples here.
- */
-
-#define flipc_simple_lock_init(lock)           \
-do {                                           \
-    *(lock) = SIMPLE_LOCK_INITIALIZER;         \
-} while (0)
-
-/*
- * Defines of the actual routines, for gcc.
- */
-
-#define flipc_simple_lock_locked(lock) ((*lock) != SIMPLE_LOCK_INITIALIZER)
-
-#ifdef __GNUC__
-     extern __inline__ int flipc_simple_lock_try(flipc_simple_lock *lock)
-{
-    int r;
-    __asm__ volatile("movl $1, %0; xchgl %0, %1" : "=&r" (r), "=m" (*lock));
-    return !r;
-}
-
-/* I don't know why this requires an ASM, but I'll follow the leader. */
-extern __inline__ void flipc_simple_lock_release(flipc_simple_lock *lock)
-{
-    register int t;                            
-    
-    __asm__ volatile("xorl %0, %0; xchgl %0, %1" : "=&r" (t), "=m" (*lock));
-} 
-#else  /* __GNUC__ */
-/* If we aren't compiling with gcc, the above need to be functions.  */
-#endif /* __GNUC__ */
-
-#endif /* _MACH_FLIPC_DEP_H_ */
index 63d8402cd9320d9a5717e3376ccbf5c0daa71dba..bc44a2842f6103e2193633f3b79d001f790e0ef6 100644 (file)
@@ -300,4 +300,29 @@ routine host_register_well_known_mach_voucher_attr_manager(
                key             : mach_voucher_attr_key_t;
        out     new_attr_control: ipc_voucher_attr_control_t);
 
+
+/*
+ * Update the global ATM diagnostic flag, readable from the commpage
+ */
+routine host_set_atm_diagnostic_flag(
+        host_priv      : host_priv_t;
+    in  diagnostic_flag : uint32_t);
+
+#if !KERNEL && LIBSYSCALL_INTERFACE
+routine host_get_atm_diagnostic_flag(
+               host            : host_t;
+       out     diagnostic_flag : uint32_t);
+#else
+skip;
+#endif
+
+routine mach_memory_info(
+               host            : host_priv_t;
+       out     names           : mach_zone_name_array_t,
+                                       Dealloc;
+       out     info            : mach_zone_info_array_t,
+                                       Dealloc;
+       out     memory_info     : mach_memory_info_array_t,
+                                       Dealloc);
+
 /* vim: set ft=c : */
index d3077945ac1a22f77456e2e3dfd8cf0b78b1e46b..28d867651d566ee1d843f2857be9cb498fa09471 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -210,11 +210,12 @@ type vm_sync_t = int;
                 * policy_timeshare_info_t (5 ints)
                 * policy_fifo_info_t (4 ints)
                 * policy_rr_info_t (5 ints)
+                * thread_extended_info (12 ints + 64 chars)
                 * if other thread_info flavors are added, this
                 * definition may need to be changed. (See
                 * mach/thread_info.h and mach/policy.h) */
 type thread_flavor_t           = int;
-type thread_info_t             = array[*:12] of integer_t;
+type thread_info_t             = array[*:32] of integer_t;
 
 type thread_policy_flavor_t    = natural_t;
 type thread_policy_t           = array[*:16] of integer_t;
@@ -495,7 +496,6 @@ type semaphore_consume_ref_t = mach_port_move_send_t
 #if    KERNEL_SERVER
                intran: semaphore_t convert_port_to_semaphore(mach_port_t)
                outtran: mach_port_t convert_semaphore_to_port(semaphore_t)
-               destructor: semaphore_dereference(semaphore_t)
 #endif /* KERNEL_SERVER */
                ;
 
@@ -561,6 +561,9 @@ type mach_voucher_attr_value_handle_t = uint64_t;
 type mach_voucher_attr_value_handle_array_t = array[*:4] of mach_voucher_attr_value_handle_t;
 type mach_voucher_attr_value_reference_t = uint32_t;
 
+type task_inspect_flavor_t = natural_t;
+type task_inspect_data_t = array[] of char;
+
 /* kernel module loader */
 type kmod_t = int;
 type kmod_control_flavor_t = int;
index abd9115623dc8e2253ab069a1d59e9306c4a2c7c..43ef60286c4d69fec7f74c39fbb17a4cea807da8 100644 (file)
@@ -403,6 +403,7 @@ __END_DECLS
 #define CPUFAMILY_ARM_15               0xa8511bca
 #define CPUFAMILY_ARM_SWIFT            0x1e2d6381
 #define CPUFAMILY_ARM_CYCLONE          0x37a09642
+#define CPUFAMILY_ARM_TYPHOON          0x2c91a47e
 
 /* The following synonyms are deprecated: */
 #define CPUFAMILY_INTEL_6_14   CPUFAMILY_INTEL_YONAH
index 6ff836179caff88407d47accda589909a75513ee..615a37e593f860edbd16144490b3e6af1e538a46 100644 (file)
@@ -14,8 +14,13 @@ DATAFILES = \
        vm_param.h vm_types.h machine_types.defs \
        syscall_sw.h sdt.h sdt_isa.h
 
+PRIVATE_DATAFILES = \
+       syscall_sw.h
+
 INSTALL_MI_LIST = ${DATAFILES}
 
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+
 INSTALL_MI_DIR = mach/machine
 
 EXPORT_MI_LIST = ${DATAFILES}
index d7b5c9d82c27b346e5381c80dc9dc26b4911bbbb..3bdba92a9a300f2cf2c44983de354b796c87e8e8 100644 (file)
 #define        DTRACE_TMR7(name, type1, arg1, type2, arg2, type3, arg3, arg4, arg5, arg6, arg7) \
        DTRACE_PROBE7(__sdt_, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
 
+#define        DTRACE_PHYSLAT3(name, type1, arg1, type2, arg2, type3, arg3)            \
+       DTRACE_PROBE3(__sdt_, name, arg1, arg2, arg3);
+
 #define        DTRACE_VM(name)                                                 \
        DTRACE_PROBE(__vminfo_, name)
 
     type3, arg3, type4, arg4)                                          \
        DTRACE_PROBE4(__vminfo_, name, arg1, arg2, arg3, arg4)
 
+#define        DTRACE_VM5(name, type1, arg1, type2, arg2,                      \
+    type3, arg3, type4, arg4, type5, arg5)                             \
+       DTRACE_PROBE5(__vminfo_, name, arg1, arg2, arg3, arg4, arg5)
+
 #define DTRACE_IP(name)                                                        \
        DTRACE_PROBE(__ip_, name)
 
 #define DTRACE_VM2(name, type1, arg1, type2, arg2) do {} while(0)
 #define DTRACE_VM3(name, type1, arg1, type2, arg2, type3, arg3) do {} while(0)
 #define DTRACE_VM4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0)
+#define DTRACE_VM5(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) do {} while(0)
 #define DTRACE_IP(name) do {} while(0)
 #define DTRACE_IP1(name, type1, arg1) do {} while(0)
 #define DTRACE_IP2(name, type1, arg1, type2, arg2) do {} while(0)
index 9e000c6af9c781ecfe7788a0f303c69556e234a4..2adf2b77a4eb8753e6e0d4f49e38564bd201b715 100644 (file)
@@ -387,6 +387,7 @@ typedef struct memory_object_attr_info      memory_object_attr_info_data_t;
 #define MAP_MEM_USE_DATA_ADDR  0x100000 /* preserve address of data, rather than base of page */
 #define MAP_MEM_VM_COPY                0x200000 /* make a copy of a VM range */
 #define MAP_MEM_VM_SHARE       0x400000 /* extract a VM range for remap */
+#define        MAP_MEM_4K_DATA_ADDR    0x800000 /* preserve 4K aligned address of data */
 
 #ifdef KERNEL
 
@@ -420,6 +421,7 @@ struct upl_page_info {
                cs_tainted:1,   /* CODE SIGNING: page is tainted */
                cs_nx:1,        /* CODE SIGNING: page is NX */
                needed:1,       /* page should be left in cache on abort */
+               mark:1,         /* a mark flag for the creator to use as they wish */
                :0;             /* force to long boundary */
 #else
                opaque;         /* use upl_page_xxx() accessor funcs */
@@ -444,45 +446,52 @@ typedef uint32_t  upl_size_t;     /* page-aligned byte size */
 /* upl invocation flags */
 /* top nibble is used by super upl */
 
-#define UPL_FLAGS_NONE         0x00000000
-#define UPL_COPYOUT_FROM       0x00000001
-#define UPL_PRECIOUS           0x00000002
-#define UPL_NO_SYNC            0x00000004
-#define UPL_CLEAN_IN_PLACE     0x00000008
-#define UPL_NOBLOCK            0x00000010
-#define UPL_RET_ONLY_DIRTY     0x00000020
-#define UPL_SET_INTERNAL       0x00000040
-#define UPL_QUERY_OBJECT_TYPE  0x00000080
-#define UPL_RET_ONLY_ABSENT    0x00000100 /* used only for COPY_FROM = FALSE */
-#define UPL_FILE_IO             0x00000200
-#define UPL_SET_LITE           0x00000400
-#define UPL_SET_INTERRUPTIBLE  0x00000800
-#define UPL_SET_IO_WIRE                0x00001000
-#define UPL_FOR_PAGEOUT                0x00002000
-#define UPL_WILL_BE_DUMPED      0x00004000
-#define UPL_FORCE_DATA_SYNC    0x00008000
+typedef uint64_t upl_control_flags_t;
+
+#define UPL_FLAGS_NONE         0x00000000ULL
+#define UPL_COPYOUT_FROM       0x00000001ULL
+#define UPL_PRECIOUS           0x00000002ULL
+#define UPL_NO_SYNC            0x00000004ULL
+#define UPL_CLEAN_IN_PLACE     0x00000008ULL
+#define UPL_NOBLOCK            0x00000010ULL
+#define UPL_RET_ONLY_DIRTY     0x00000020ULL
+#define UPL_SET_INTERNAL       0x00000040ULL
+#define UPL_QUERY_OBJECT_TYPE  0x00000080ULL
+#define UPL_RET_ONLY_ABSENT    0x00000100ULL /* used only for COPY_FROM = FALSE */
+#define UPL_FILE_IO             0x00000200ULL
+#define UPL_SET_LITE           0x00000400ULL
+#define UPL_SET_INTERRUPTIBLE  0x00000800ULL
+#define UPL_SET_IO_WIRE                0x00001000ULL
+#define UPL_FOR_PAGEOUT                0x00002000ULL
+#define UPL_WILL_BE_DUMPED      0x00004000ULL
+#define UPL_FORCE_DATA_SYNC    0x00008000ULL
 /* continued after the ticket bits... */
 
-#define UPL_PAGE_TICKET_MASK   0x000F0000
+#define UPL_PAGE_TICKET_MASK   0x000F0000ULL
 #define UPL_PAGE_TICKET_SHIFT   16
 
 /* ... flags resume here */
-#define UPL_BLOCK_ACCESS       0x00100000
-#define UPL_ENCRYPT            0x00200000
-#define UPL_NOZEROFILL         0x00400000
-#define UPL_WILL_MODIFY                0x00800000 /* caller will modify the pages */
-
-#define UPL_NEED_32BIT_ADDR    0x01000000
-#define UPL_UBC_MSYNC          0x02000000
-#define UPL_UBC_PAGEOUT                0x04000000
-#define UPL_UBC_PAGEIN         0x08000000
-#define UPL_REQUEST_SET_DIRTY  0x10000000
-#define UPL_REQUEST_NO_FAULT   0x20000000 /* fail if pages not all resident */
-#define UPL_NOZEROFILLIO       0x40000000 /* allow non zerofill pages present */
-#define UPL_REQUEST_FORCE_COHERENCY    0x80000000
+#define UPL_BLOCK_ACCESS       0x00100000ULL
+#define UPL_ENCRYPT            0x00200000ULL
+#define UPL_NOZEROFILL         0x00400000ULL
+#define UPL_WILL_MODIFY                0x00800000ULL /* caller will modify the pages */
+
+#define UPL_NEED_32BIT_ADDR    0x01000000ULL
+#define UPL_UBC_MSYNC          0x02000000ULL
+#define UPL_UBC_PAGEOUT                0x04000000ULL
+#define UPL_UBC_PAGEIN         0x08000000ULL
+#define UPL_REQUEST_SET_DIRTY  0x10000000ULL
+#define UPL_REQUEST_NO_FAULT   0x20000000ULL /* fail if pages not all resident */
+#define UPL_NOZEROFILLIO       0x40000000ULL /* allow non zerofill pages present */
+#define UPL_REQUEST_FORCE_COHERENCY    0x80000000ULL
+
+#define UPL_MEMORY_TAG_MASK    0xFF00000000ULL
+#define UPL_MEMORY_TAG_SHIFT   32
+#define UPL_MEMORY_TAG(x)      (((x) >> UPL_MEMORY_TAG_SHIFT) & 0xFF)
+#define UPL_MEMORY_TAG_MAKE(x) (((upl_control_flags_t)((x) & 0xFF)) << UPL_MEMORY_TAG_SHIFT)
 
 /* UPL flags known by this kernel */
-#define UPL_VALID_FLAGS                0xFFFFFFFF
+#define UPL_VALID_FLAGS                0xFFFFFFFFFFULL
 
 
 /* upl abort error flags */
@@ -743,6 +752,13 @@ extern void                upl_deallocate(upl_t upl);
 extern void            upl_mark_decmp(upl_t upl);
 extern void            upl_unmark_decmp(upl_t upl);
 
+#ifdef KERNEL_PRIVATE
+
+void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v);
+boolean_t upl_page_get_mark(upl_page_info_t *upl, int index);
+
+#endif // KERNEL_PRIVATE
+
 __END_DECLS
 
 #endif  /* KERNEL */
index 4bf640d2311780d8639bd631d095503f1d08e02d..b3769d484398b876418cbc757db0e578f638c624 100644 (file)
@@ -79,6 +79,7 @@
 
 #include <sys/cdefs.h>
 #include <sys/appleapiopts.h>
+#include <Availability.h>
 
 /*
  *  The timeout mechanism uses mach_msg_timeout_t values,
@@ -242,7 +243,7 @@ typedef unsigned int mach_msg_copy_options_t;
 #define MACH_MSG_PHYSICAL_COPY         0
 #define MACH_MSG_VIRTUAL_COPY          1
 #define MACH_MSG_ALLOCATE              2
-#define MACH_MSG_OVERWRITE             3
+#define MACH_MSG_OVERWRITE             3       /* deprecated */
 #ifdef  MACH_KERNEL
 #define MACH_MSG_KALLOC_COPY_T         4
 #endif  /* MACH_KERNEL */
@@ -670,7 +671,7 @@ typedef integer_t mach_msg_option_t;
 #define MACH_RCV_NOTIFY                0x00000200      /* reserved - legacy */
 #define MACH_RCV_INTERRUPT     0x00000400      /* don't restart interrupted receive */
 #define MACH_RCV_VOUCHER       0x00000800      /* willing to receive voucher port */
-#define MACH_RCV_OVERWRITE     0x00001000      /* scatter receive */
+#define MACH_RCV_OVERWRITE     0x00001000      /* scatter receive (deprecated) */
 
 /* 
  * NOTE: a 0x00------ RCV mask implies to ask for
@@ -701,8 +702,7 @@ typedef integer_t mach_msg_option_t;
                                  MACH_SEND_TIMEOUT | MACH_SEND_NOTIFY | \
                                  MACH_SEND_TRAILER | MACH_SEND_NOIMPORTANCE )
 
-#define MACH_RCV_USER           (MACH_RCV_MSG | \
-                                 MACH_RCV_TIMEOUT | MACH_RCV_OVERWRITE | \
+#define MACH_RCV_USER           (MACH_RCV_MSG | MACH_RCV_TIMEOUT | \
                                  MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY | \
                                  MACH_RCV_VOUCHER | MACH_RCV_TRAILER_MASK)
 
@@ -874,7 +874,7 @@ __BEGIN_DECLS
  *             already contain scatter control information to direct the
  *             receiving of the message.
  */
-
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg_overwrite(
                                        mach_msg_header_t *msg,
                                        mach_msg_option_t option,
@@ -896,6 +896,7 @@ extern mach_msg_return_t    mach_msg_overwrite(
  *             of that fact, then restart the appropriate parts of the
  *             operation silently (trap version does not restart).
  */
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern mach_msg_return_t       mach_msg(
                                        mach_msg_header_t *msg,
                                        mach_msg_option_t option,
@@ -911,6 +912,7 @@ extern mach_msg_return_t    mach_msg(
  *             Deallocate a mach voucher created or received in a message.  Drops
  *             one (send right) reference to the voucher.
  */
+__WATCHOS_PROHIBITED __TVOS_PROHIBITED
 extern kern_return_t           mach_voucher_deallocate(
                                        mach_port_name_t voucher);
 
index 55735bc8a7a13958172df54c9d2117bff556a880..49c248d6aa134a58e1374e3f716115f616336aad 100644 (file)
@@ -312,11 +312,11 @@ typedef struct mach_port_status {
 } mach_port_status_t;
 
 /* System-wide values for setting queue limits on a port */
-#define MACH_PORT_QLIMIT_ZERO          ((mach_port_msgcount_t) 0)
-#define MACH_PORT_QLIMIT_BASIC         ((mach_port_msgcount_t) 5)
-#define MACH_PORT_QLIMIT_SMALL         ((mach_port_msgcount_t) 16)
-#define MACH_PORT_QLIMIT_LARGE         ((mach_port_msgcount_t) 1024)
-#define MACH_PORT_QLIMIT_KERNEL                ((mach_port_msgcount_t) 65536)
+#define MACH_PORT_QLIMIT_ZERO          (0)
+#define MACH_PORT_QLIMIT_BASIC         (5)
+#define MACH_PORT_QLIMIT_SMALL         (16)
+#define MACH_PORT_QLIMIT_LARGE         (1024)
+#define MACH_PORT_QLIMIT_KERNEL                (65534)
 #define MACH_PORT_QLIMIT_MIN           MACH_PORT_QLIMIT_ZERO
 #define MACH_PORT_QLIMIT_DEFAULT       MACH_PORT_QLIMIT_BASIC
 #define MACH_PORT_QLIMIT_MAX           MACH_PORT_QLIMIT_LARGE
index db4f46e898601738baba0e143eaac5db2dd9da79..7f1d3fbb07ea916ca2200b07a1a557524c68dc70 100644 (file)
@@ -74,9 +74,9 @@
 /* ARM64_TODO: move to higher memory */
 #endif
 #define SHARED_REGION_BASE_ARM64               0x180000000ULL
-#define SHARED_REGION_SIZE_ARM64               0x20000000ULL
+#define SHARED_REGION_SIZE_ARM64               0x28000000ULL
 #define SHARED_REGION_NESTING_BASE_ARM64       0x180000000ULL
-#define SHARED_REGION_NESTING_SIZE_ARM64       0x20000000ULL
+#define SHARED_REGION_NESTING_SIZE_ARM64       0x28000000ULL
 #define SHARED_REGION_NESTING_MIN_ARM64                ?
 #define SHARED_REGION_NESTING_MAX_ARM64                ?
 
index 50e0b214232ea4489632c16c774d11fc864eafca..11277d0da7fac242cf166306d7a7c40a135fae78 100644 (file)
@@ -50,6 +50,7 @@ typedef int sync_policy_t;
  */
 
 #define SYNC_POLICY_PREPOST            0x4
+#define SYNC_POLICY_DISABLE_IRQ                0x8
 
 #endif /* KERNEL_PRIVATE */
 
diff --git a/osfmk/mach/sysdiagnose_notification.defs b/osfmk/mach/sysdiagnose_notification.defs
new file mode 100644 (file)
index 0000000..af048e2
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ *  Interface definition for the sysdiagnose facility.
+ */
+
+subsystem
+#if KERNEL_USER
+    KernelUser
+#endif /* KERNEL_USER */
+    sysdiagnose_notification 31337;
+
+#include <mach/std_types.defs>
+#include <mach/mach_types.defs>
+#include <atm/atm_types.defs>
+
+ServerPrefix receive_;
+UserPrefix   send_;
+
+simpleroutine sysdiagnose_notification(
+              sysdiagnose_port    : mach_port_t;
+              flags               : uint32_t);
+
+/* vim: set ft=c : */
index 599e00e24c0918f742bd4f03c2bc4400f52563fe..3311e3c15b8f822a819bbf11d2b7ff25144a0a87 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2007, 2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -321,12 +321,17 @@ struct task_vm_info {
        mach_vm_size_t  compressed;
        mach_vm_size_t  compressed_peak;
        mach_vm_size_t  compressed_lifetime;
+
+       /* added for rev1 */
+       mach_vm_size_t  phys_footprint;
 };
 typedef struct task_vm_info    task_vm_info_data_t;
 typedef struct task_vm_info    *task_vm_info_t;
 #define TASK_VM_INFO_COUNT     ((mach_msg_type_number_t) \
                (sizeof (task_vm_info_data_t) / sizeof (natural_t)))
-
+#define TASK_VM_INFO_REV0_COUNT /* doesn't include phys_footprint */ \
+               ((mach_msg_type_number_t) \
+               (TASK_VM_INFO_COUNT - 2))
 
 typedef struct vm_purgeable_info       task_purgable_info_t;
 
@@ -373,6 +378,48 @@ typedef struct task_power_info_v2  *task_power_info_v2_t;
 #define TASK_POWER_INFO_V2_COUNT       ((mach_msg_type_number_t) \
                (sizeof (task_power_info_v2_data_t) / sizeof (natural_t)))
 
+
+#define TASK_VM_INFO_PURGEABLE_ACCOUNT 27 /* Used for xnu purgeable vm unit tests */
+
+#ifdef PRIVATE
+struct pvm_account_info {
+       uint64_t pvm_volatile_count; /* Number of volatile bytes associated with a task */
+       uint64_t pvm_volatile_compressed_count; /* Number of volatile compressed bytes associated with a task */
+       uint64_t pvm_nonvolatile_count; /* Number of nonvolatile bytes associated with a task */
+       uint64_t pvm_nonvolatile_compressed_count; /* Number of nonvolatile compressed bytes associated with a task */
+};
+
+typedef struct pvm_account_info *pvm_account_info_t;
+typedef struct pvm_account_info pvm_account_info_data_t;
+
+#define PVM_ACCOUNT_INFO_COUNT ((mach_msg_type_number_t) \
+               (sizeof (pvm_account_info_data_t) / sizeof (natural_t)))
+#endif /* PRIVATE */
+
+#define TASK_FLAGS_INFO  28                    /* return t_flags field */
+struct task_flags_info {
+       uint32_t        flags;                          /* task flags */
+};
+typedef struct task_flags_info task_flags_info_data_t;
+typedef struct task_flags_info * task_flags_info_t;
+#define TASK_FLAGS_INFO_COUNT  ((mach_msg_type_number_t) \
+               (sizeof(task_flags_info_data_t) / sizeof (natural_t)))
+
+#define TF_LP64                 0x00000001                              /* task has 64-bit addressing */
+
+#define TASK_DEBUG_INFO_INTERNAL    29 /* Used for kernel internal development tests. */
+
+#ifdef PRIVATE
+struct task_debug_info_internal {
+       uint64_t ipc_space_size;
+};
+typedef struct task_debug_info_internal *task_debug_info_internal_t;
+typedef struct task_debug_info_internal task_debug_info_internal_data_t;
+#define TASK_DEBUG_INFO_INTERNAL_COUNT  ((mach_msg_type_number_t) \
+               (sizeof (task_debug_info_internal_data_t) / sizeof(natural_t)))
+
+#endif /* PRIVATE */
+
 /*
  * Obsolete interfaces.
  */
index 9c271d59078829563f9bdd78655151e887ca3a3b..c4794aab52fc1211af7b5141e9f8ba39d90a59f8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005, 2015 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -81,7 +81,7 @@
 typedef        natural_t       thread_flavor_t;
 typedef        integer_t       *thread_info_t;         /* varying array of int */
 
-#define THREAD_INFO_MAX                (1024)  /* maximum array size */
+#define THREAD_INFO_MAX                (32)    /* maximum array size */
 typedef        integer_t       thread_info_data_t[THREAD_INFO_MAX];
 
 /*
@@ -93,7 +93,7 @@ struct thread_basic_info {
         time_value_t    user_time;      /* user run time */
         time_value_t    system_time;    /* system run time */
         integer_t       cpu_usage;      /* scaled cpu usage percentage */
-       policy_t        policy;         /* scheduling policy in effect */
+        policy_t        policy;         /* scheduling policy in effect */
         integer_t       run_state;      /* run state (see below) */
         integer_t       flags;          /* various flags (see below) */
         integer_t       suspend_count;  /* suspend count for thread */
@@ -142,6 +142,46 @@ typedef struct thread_identifier_info  *thread_identifier_info_t;
  */
 #define TH_FLAGS_SWAPPED       0x1     /* thread is swapped out */
 #define TH_FLAGS_IDLE          0x2     /* thread is an idle thread */
+#define TH_FLAGS_GLOBAL_FORCED_IDLE    0x4     /* thread performs global forced idle */
+
+/*
+ *  Thread extended info (returns same info as proc_pidinfo(...,PROC_PIDTHREADINFO,...)
+ */
+#define THREAD_EXTENDED_INFO 5
+#define MAXTHREADNAMESIZE 64
+struct thread_extended_info {          // same as proc_threadinfo (from proc_info.h) & proc_threadinfo_internal (from bsd_taskinfo.h)
+       uint64_t                pth_user_time;          /* user run time */
+       uint64_t                pth_system_time;        /* system run time */
+       int32_t                 pth_cpu_usage;          /* scaled cpu usage percentage */
+       int32_t                 pth_policy;                     /* scheduling policy in effect */
+       int32_t                 pth_run_state;          /* run state (see below) */
+       int32_t                 pth_flags;              /* various flags (see below) */
+       int32_t                 pth_sleep_time;         /* number of seconds that thread */
+       int32_t                 pth_curpri;                     /* cur priority*/
+       int32_t                 pth_priority;           /*  priority*/
+       int32_t                 pth_maxpriority;        /* max priority*/
+       char                    pth_name[MAXTHREADNAMESIZE];    /* thread name, if any */
+};
+typedef struct thread_extended_info thread_extended_info_data_t;
+typedef struct thread_extended_info * thread_extended_info_t;
+#define THREAD_EXTENDED_INFO_COUNT  ((mach_msg_type_number_t) \
+               (sizeof(thread_extended_info_data_t) / sizeof (natural_t)))
+
+#define THREAD_DEBUG_INFO_INTERNAL 6    /* for kernel development internal info */
+
+#if PRIVATE
+struct thread_debug_info_internal{
+       uint64_t page_creation_count;
+};
+
+typedef struct thread_debug_info_internal *thread_debug_info_internal_t;
+typedef struct thread_debug_info_internal  thread_debug_info_internal_data_t;
+
+#define THREAD_DEBUG_INFO_INTERNAL_COUNT  ((mach_msg_type_number_t)            \
+                       (sizeof (thread_debug_info_internal_data_t) / sizeof (natural_t)))
+
+#endif /* PRIVATE */
+
 
 /*
  * Obsolete interfaces.
index ec0ee729ccdeb750c57b6307271150391c0caef8..13a0b301f0c67d9e4f0296c4d25a336bc2bf4244 100644 (file)
@@ -74,5 +74,6 @@ typedef int           vm_behavior_t;
 #define VM_BEHAVIOR_REUSABLE   ((vm_behavior_t) 8)
 #define VM_BEHAVIOR_REUSE      ((vm_behavior_t) 9)
 #define VM_BEHAVIOR_CAN_REUSE  ((vm_behavior_t) 10)
+#define VM_BEHAVIOR_PAGEOUT    ((vm_behavior_t) 11)
 
 #endif /*_MACH_VM_BEHAVIOR_H_*/
index 87592a57fa5ac79df34e0675ec4eb2ae0390ecf6..6a33043a6a5a4df55dd9de3cff11632e9171ea24 100644 (file)
@@ -260,21 +260,21 @@ extern vm_offset_t      vm_elinkedit;
 #define VM_KERNEL_IS_SLID(_o)                                                 \
                (((vm_offset_t)(_o) >= vm_kernel_base) &&                      \
                 ((vm_offset_t)(_o) <=  vm_kernel_top))
-#define VM_KERNEL_IS_KEXT(_o)                                                  \
-                (((vm_offset_t)(_o) >= vm_kext_base) &&                        \
+#define VM_KERNEL_IS_KEXT(_o)      \
+                (((vm_offset_t)(_o) >= vm_kext_base) &&   \
                  ((vm_offset_t)(_o) <  vm_kext_top))
 
 #define VM_KERNEL_IS_PRELINKTEXT(_o)        \
         (((vm_offset_t)(_o) >= vm_prelink_stext) &&     \
-         ((vm_offset_t)(_o) <  vm_prelink_etext))
+        ((vm_offset_t)(_o) <  vm_prelink_etext))
 
 #define VM_KERNEL_IS_PRELINKINFO(_o)        \
-        (((vm_offset_t)(_o) >= vm_prelink_sinfo) &&     \
-         ((vm_offset_t)(_o) <  vm_prelink_einfo))
+    (((vm_offset_t)(_o) >= vm_prelink_sinfo) &&     \
+    ((vm_offset_t)(_o) <  vm_prelink_einfo))
 
 #define VM_KERNEL_IS_KEXT_LINKEDIT(_o)        \
-        (((vm_offset_t)(_o) >= vm_slinkedit) &&     \
-         ((vm_offset_t)(_o) <  vm_elinkedit))
+    (((vm_offset_t)(_o) >= vm_slinkedit) &&     \
+    ((vm_offset_t)(_o) <  vm_elinkedit))
 
 #define VM_KERNEL_SLIDE(_u)                                                   \
                ((vm_offset_t)(_u) + vm_kernel_slide)
@@ -314,11 +314,11 @@ extern vm_offset_t      vm_elinkedit;
  */
 #define VM_KERNEL_UNSLIDE(_v)                                                 \
                ((VM_KERNEL_IS_SLID(_v) ||                                     \
-          VM_KERNEL_IS_KEXT(_v) ||      \
+                 VM_KERNEL_IS_KEXT(_v) ||      \
           VM_KERNEL_IS_PRELINKTEXT(_v) ||   \
           VM_KERNEL_IS_PRELINKINFO(_v) ||   \
-          VM_KERNEL_IS_KEXT_LINKEDIT(_v)) ?     \
-                       (vm_offset_t)(_v) - vm_kernel_slide :                  \
+          VM_KERNEL_IS_KEXT_LINKEDIT(_v)) ?      \
+                       (vm_offset_t)(_v) - vm_kernel_slide :    \
                        (vm_offset_t)(_v))
 
 #define        VM_KERNEL_ADDRPERM(_v)                                                 \
@@ -331,8 +331,8 @@ extern vm_offset_t      vm_elinkedit;
           VM_KERNEL_IS_KEXT(_v) ||      \
           VM_KERNEL_IS_PRELINKTEXT(_v) ||   \
           VM_KERNEL_IS_PRELINKINFO(_v) ||   \
-          VM_KERNEL_IS_KEXT_LINKEDIT(_v)) ?     \
-                       (vm_offset_t)(_v) - vm_kernel_slide :                  \
+          VM_KERNEL_IS_KEXT_LINKEDIT(_v)) ?         \
+                       (vm_offset_t)(_v) - vm_kernel_slide :    \
                        VM_KERNEL_ADDRPERM(_v))
        
 
index ae2d67584131ac1c6b511e46ec17cb332afd5cea..039390c26e8a1f5673b16445fe3146ab8910d049 100644 (file)
@@ -145,4 +145,10 @@ typedef int                vm_prot_t;
  */
 #define VM_PROT_IS_MASK                ((vm_prot_t) 0x40)
 
+
+#define VM_PROT_MEMORY_TAG_MASK                0xFF000000
+#define VM_PROT_MEMORY_TAG_SHIFT       24
+#define VM_PROT_MEMORY_TAG(x)          (((x) >> VM_PROT_MEMORY_TAG_SHIFT) & 0xFF)
+#define VM_PROT_MEMORY_TAG_MAKE(x)     (((x) & 0xFF) << VM_PROT_MEMORY_TAG_SHIFT)
+
 #endif /* _MACH_VM_PROT_H_ */
index 4f9e7172da7c35a873263a6b0143917aeb65c9bf..bd74e1ef696da20b3399b6c10862b27bc65c808f 100644 (file)
@@ -293,7 +293,11 @@ typedef struct pmap_statistics     *pmap_statistics_t;
 #define VM_FLAGS_FIXED         0x0000
 #define VM_FLAGS_ANYWHERE      0x0001
 #define VM_FLAGS_PURGABLE      0x0002
+#ifdef KERNEL_PRIVATE
+#endif /* KERNEL_PRIVATE */
 #define VM_FLAGS_NO_CACHE      0x0010
+#define VM_FLAGS_RESILIENT_CODESIGN    0x0020
+#define VM_FLAGS_RESILIENT_MEDIA       0x0040
 #ifdef KERNEL_PRIVATE
 #define VM_FLAGS_PERMANENT     0x0100  /* mapping can NEVER be unmapped */
 #define VM_FLAGS_GUARD_AFTER   0x0200  /* guard page after the mapping */
@@ -320,6 +324,7 @@ typedef struct pmap_statistics      *pmap_statistics_t;
 #define VM_FLAGS_IOKIT_ACCT            0x200000 /* IOKit accounting */
 #define VM_FLAGS_KEEP_MAP_LOCKED       0x400000 /* Keep the map locked when returning from vm_map_enter() */
 #endif /* KERNEL_PRIVATE */
+#define VM_FLAGS_RETURN_4K_DATA_ADDR   0x800000 /* Return 4K aligned address of target data */
 #define VM_FLAGS_ALIAS_MASK    0xFF000000
 #define VM_GET_FLAGS_ALIAS(flags, alias)                       \
                (alias) = ((flags) & VM_FLAGS_ALIAS_MASK) >> 24 
@@ -336,11 +341,13 @@ typedef struct pmap_statistics    *pmap_statistics_t;
                                 VM_FLAGS_SUPERPAGE_MASK |      \
                                 VM_FLAGS_ALIAS_MASK)
 #define VM_FLAGS_USER_MAP      (VM_FLAGS_USER_ALLOCATE |       \
+                                VM_FLAGS_RETURN_4K_DATA_ADDR | \
                                 VM_FLAGS_RETURN_DATA_ADDR)
 #define VM_FLAGS_USER_REMAP    (VM_FLAGS_FIXED |    \
                                 VM_FLAGS_ANYWHERE | \
                                 VM_FLAGS_OVERWRITE| \
-                                VM_FLAGS_RETURN_DATA_ADDR)
+                                VM_FLAGS_RETURN_DATA_ADDR |\
+                                VM_FLAGS_RESILIENT_CODESIGN)
 
 #define VM_FLAGS_SUPERPAGE_SHIFT 16
 #define SUPERPAGE_NONE                 0       /* no superpages, if all bits are 0 */
@@ -462,10 +469,82 @@ typedef struct pmap_statistics    *pmap_statistics_t;
 /* Genealogy buffers */
 #define VM_MEMORY_GENEALOGY 78
 
+/* RawCamera VM allocated memory */
+#define VM_MEMORY_RAWCAMERA 79
+
+/* corpse info for dead process */
+#define VM_MEMORY_CORPSEINFO 80
+
+/* Apple System Logger (ASL) messages */
+#define VM_MEMORY_ASL 81
+
 /* Reserve 240-255 for application */
 #define VM_MEMORY_APPLICATION_SPECIFIC_1 240
 #define VM_MEMORY_APPLICATION_SPECIFIC_16 255
 
 #define VM_MAKE_TAG(tag) ((tag) << 24)
 
+
+
+#if KERNEL_PRIVATE
+
+/* kernel map tags */
+
+#define VM_KERN_MEMORY_NONE            0
+
+#define VM_KERN_MEMORY_OSFMK           1
+#define VM_KERN_MEMORY_BSD             2
+#define VM_KERN_MEMORY_IOKIT           3
+#define VM_KERN_MEMORY_LIBKERN         4
+#define VM_KERN_MEMORY_OSKEXT          5
+#define VM_KERN_MEMORY_KEXT            6
+#define VM_KERN_MEMORY_IPC             7
+#define VM_KERN_MEMORY_STACK           8
+#define VM_KERN_MEMORY_CPU             9
+#define VM_KERN_MEMORY_PMAP            10
+#define VM_KERN_MEMORY_PTE             11
+#define VM_KERN_MEMORY_ZONE            12
+#define VM_KERN_MEMORY_KALLOC          13
+#define VM_KERN_MEMORY_COMPRESSOR      14
+#define VM_KERN_MEMORY_COMPRESSED_DATA 15
+#define VM_KERN_MEMORY_PHANTOM_CACHE   16
+#define VM_KERN_MEMORY_WAITQ           17
+#define VM_KERN_MEMORY_DIAG            18
+#define VM_KERN_MEMORY_LOG             19
+#define VM_KERN_MEMORY_FILE            20
+#define VM_KERN_MEMORY_MBUF            21
+#define VM_KERN_MEMORY_UBC             22
+#define VM_KERN_MEMORY_SECURITY                23
+#define VM_KERN_MEMORY_MLOCK           24
+//
+#define VM_KERN_MEMORY_FIRST_DYNAMIC   25
+/* out of tags: */
+#define VM_KERN_MEMORY_ANY             255
+#define VM_KERN_MEMORY_COUNT           256
+
+/* end kernel map tags */
+
+// mach_memory_info.flags
+#define VM_KERN_SITE_TYPE              0x000000FF
+#define VM_KERN_SITE_TAG               0x00000000
+#define VM_KERN_SITE_KMOD              0x00000001
+#define VM_KERN_SITE_KERNEL            0x00000002
+#define VM_KERN_SITE_COUNTER           0x00000003
+#define VM_KERN_SITE_WIRED             0x00000100      /* add to wired count */
+#define VM_KERN_SITE_HIDE              0x00000200      /* no zprint */
+
+#define VM_KERN_COUNT_MANAGED          0
+#define VM_KERN_COUNT_RESERVED         1
+#define VM_KERN_COUNT_WIRED            2
+#define VM_KERN_COUNT_WIRED_MANAGED    3
+#define VM_KERN_COUNT_STOLEN           4
+#define VM_KERN_COUNT_LOPAGE           5
+#define VM_KERN_COUNT_MAP_KERNEL       6
+#define VM_KERN_COUNT_MAP_ZONE         7
+#define VM_KERN_COUNT_MAP_KALLOC       8
+
+#define VM_KERN_COUNTER_COUNT          9
+
+#endif /* KERNEL_PRIVATE */
+
 #endif /* _MACH_VM_STATISTICS_H_ */
index f586d876cda6446191f9d363e2ae1175da8ce42e..8e7cee2f8dd75424b876f5f127d6c1d42d148a09 100644 (file)
@@ -113,6 +113,24 @@ typedef mach_port_t                vm_map_t;
 typedef uint64_t               vm_object_offset_t;
 typedef uint64_t               vm_object_size_t;
 
+
+#ifdef XNU_KERNEL_PRIVATE
+
+typedef uint8_t vm_tag_t;
+
+#define VM_TAG_BT      0x00000001
+#define VM_TAG_KMOD    0x00000002
+#define VM_TAG_UNLOAD  0x00000004
+
+struct vm_allocation_site
+{
+    vm_tag_t tag;
+    uint8_t  flags;
+};
+typedef struct vm_allocation_site vm_allocation_site_t;
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 #ifdef  KERNEL_PRIVATE
 
 #ifndef        MACH_KERNEL_PRIVATE
index 9bd02e051235622c84e0b81b504e7b02cbac3429..07e906a6fd2f915c4be357009f90a4e72483aa75 100644 (file)
@@ -105,6 +105,9 @@ type symtab_name_t = c_string[*:32];
 type lockgroup_info_t = struct[63] of integer_t;
 type lockgroup_info_array_t = array[] of lockgroup_info_t;
 
+type mach_memory_info_t = struct[8] of uint64_t;
+type mach_memory_info_array_t = array[] of mach_memory_info_t;
+
 import <mach_debug/mach_debug_types.h>;
 
 #endif /* _MACH_DEBUG_MACH_DEBUG_TYPES_DEFS_ */
index 32754acad4dcc02a0c5d681812cb74dd0df85dd2..ff9aeac3cb32f02f5a824c9221f9bc27acc9183d 100644 (file)
 
 typedef        char    symtab_name_t[32];
 
+struct mach_core_fileheader
+{
+     uint64_t signature;
+     uint64_t log_offset;
+     uint64_t log_length;
+     uint64_t gzip_offset;
+     uint64_t gzip_length;
+};
+#define MACH_CORE_FILEHEADER_SIGNATURE 0x0063614d20646153ULL
+
 #endif /* _MACH_DEBUG_MACH_DEBUG_TYPES_H_ */
index 277801d5d3bed2c31b458dca3105bda219e0a224..facfe2a4f0d952026daab513decb72bed0c407f8 100644 (file)
@@ -133,4 +133,16 @@ typedef struct task_zone_info_data {
 } task_zone_info_t;
 
 typedef task_zone_info_t *task_zone_info_array_t;
+
+typedef struct mach_memory_info {
+    uint64_t flags;
+    uint64_t site;
+    uint64_t size;
+    uint64_t free;
+    uint64_t largest;
+    uint64_t _resv[3];
+} mach_memory_info_t;
+
+typedef mach_memory_info_t *mach_memory_info_array_t;
+
 #endif /* _MACH_DEBUG_ZONE_INFO_H_ */
index 82e05c4d82f1d4c128766f855a35ffef3b902b9b..add08c56c36c62bbf71a8fa85435c313908f6a06 100644 (file)
@@ -7,8 +7,11 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
+PRIVATE_DATAFILES = \
+       cpu_capabilities.h
 
-DATAFILES = \
+KERNELFILES = \
+       atomic.h        \
        cpu_capabilities.h      \
        cpu_number.h    \
        io_map_entries.h \
@@ -21,11 +24,11 @@ DATAFILES = \
        pal_hibernate.h         \
        simple_lock.h
 
-INSTALL_MI_LCL_LIST = cpu_capabilities.h
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 INSTALL_MI_DIR = machine
 
-EXPORT_MI_LIST = ${DATAFILES}
+EXPORT_MI_LIST = ${KERNELFILES}
 
 EXPORT_MI_DIR = machine
 
diff --git a/osfmk/machine/ast.h b/osfmk/machine/ast.h
deleted file mode 100644 (file)
index b4880d2..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _MACHINE_AST_H
-#define _MACHINE_AST_H
-
-#if defined (__i386__) || defined (__x86_64__)
-#include "i386/ast.h"
-#else
-#error architecture not supported
-#endif
-
-#endif /* _MACHINE_AST_H */
diff --git a/osfmk/machine/ast_types.h b/osfmk/machine/ast_types.h
deleted file mode 100644 (file)
index 57ae58b..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _MACHINE_AST_TYPES_H
-#define _MACHINE_AST_TYPES_H
-
-#if defined (__i386__) || defined (__x86_64__)
-#include "i386/ast_types.h"
-#else
-#error architecture not supported
-#endif
-
-#endif /* _MACHINE_AST_TYPES_H */
diff --git a/osfmk/machine/atomic.h b/osfmk/machine/atomic.h
new file mode 100644 (file)
index 0000000..5aa93d0
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _MACHINE_ATOMIC_H
+#define _MACHINE_ATOMIC_H
+
+/* This should be in stdatomic.h once supported by compiler */
+enum memory_order {
+    memory_order_relaxed,
+    memory_order_consume,
+    memory_order_acquire,
+    memory_order_release,
+    memory_order_acq_rel,
+    memory_order_seq_cst
+};
+
+#if defined (__x86_64__)
+#include "i386/atomic.h"
+#else
+#error architecture not supported
+#endif
+
+#endif /* _MACHINE_ATOMIC_H */
diff --git a/osfmk/machine/smp.h b/osfmk/machine/smp.h
new file mode 100644 (file)
index 0000000..b49db7a
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _MACHINE_SMP_H
+#define _MACHINE_SMP_H
+
+#if defined (__x86_64__)
+#include "i386/smp.h"
+#else
+#error architecture not supported
+#endif
+
+#endif /* _MACHINE_SMP_H */
diff --git a/osfmk/pmc/Makefile b/osfmk/pmc/Makefile
deleted file mode 100644 (file)
index 5db384d..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-DATAFILES =
-
-EXPORT_ONLY_FILES = \
-       pmc.h
-
-INSTALL_MI_LIST = ${DATAFILES}
-
-INSTALL_MI_DIR = pmc
-
-EXPORT_MI_LIST = ${DATAFILES} ${EXPORT_ONLY_FILES}
-
-EXPORT_MI_DIR = pmc
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/osfmk/pmc/pmc.c b/osfmk/pmc/pmc.c
deleted file mode 100644 (file)
index a637a1b..0000000
+++ /dev/null
@@ -1,2953 +0,0 @@
-/*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-#include <kern/kalloc.h>
-#include <kern/kern_types.h>
-#include <kern/locks.h>
-#include <kern/misc_protos.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/zalloc.h>
-#include <machine/machine_cpu.h>
-
-#include <pmc/pmc.h>
-
-#include <libkern/OSAtomic.h>
-
-#if defined(__i386__) || defined(__x86_64__)
-#include <i386/mp.h>
-#endif
-
-#if CONFIG_COUNTERS
-
-/* various debug logging enable */
-#undef DEBUG_COUNTERS
-
-typedef uint8_t pmc_state_event_t;
-
-#define PMC_STATE_EVENT_START                          0
-#define PMC_STATE_EVENT_STOP                           1
-#define PMC_STATE_EVENT_FREE                           2
-#define PMC_STATE_EVENT_INTERRUPT                      3
-#define PMC_STATE_EVENT_END_OF_INTERRUPT       4
-#define PMC_STATE_EVENT_CONTEXT_IN                     5
-#define PMC_STATE_EVENT_CONTEXT_OUT                    6
-#define PMC_STATE_EVENT_LOAD_FINISHED          7
-#define PMC_STATE_EVENT_STORE_FINISHED         8
-
-/* PMC spin timeouts */
-#define PMC_SPIN_THRESHOLD     10      /* Number of spins to allow before checking mach_absolute_time() */
-#define PMC_SPIN_TIMEOUT_US    10      /* Time in microseconds before the spin causes an assert */
-
-uint64_t pmc_spin_timeout_count = 0;   /* Number of times where a PMC spin loop causes a timeout */
-
-#ifdef DEBUG_COUNTERS
-#      include <pexpert/pexpert.h>
-#      define COUNTER_DEBUG(...) \
-       do { \
-               kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \
-               kprintf(__VA_ARGS__); \
-       } while(0)
-
-#      define PRINT_PERF_MON(x)        \
-       do { \
-               kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
-                       x, x->object, x->useCount, \
-                       (x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \
-                       1 : 0); \
-       } while(0)
-
-static const char const * pmc_state_state_name(pmc_state_t state) {
-       switch (PMC_STATE_STATE(state)) {
-               case PMC_STATE_STATE_INVALID:
-                       return "INVALID";
-               case PMC_STATE_STATE_STOP:
-                       return "STOP";
-               case PMC_STATE_STATE_CAN_RUN:
-                       return "CAN_RUN";
-               case PMC_STATE_STATE_LOAD:
-                       return "LOAD";
-               case PMC_STATE_STATE_RUN:
-                       return "RUN";
-               case PMC_STATE_STATE_STORE:
-                       return "STORE";
-               case PMC_STATE_STATE_INTERRUPT:
-                       return "INTERRUPT";
-               case PMC_STATE_STATE_DEALLOC:
-                       return "DEALLOC";
-               default:
-                       return "UNKNOWN";
-       }
-}
-
-static const char const * pmc_state_event_name(pmc_state_event_t event) {
-       switch (event) {
-               case PMC_STATE_EVENT_START:
-                       return "START";
-               case PMC_STATE_EVENT_STOP:
-                       return "STOP";
-               case PMC_STATE_EVENT_FREE:
-                       return "FREE";
-               case PMC_STATE_EVENT_INTERRUPT:
-                       return "INTERRUPT";
-               case PMC_STATE_EVENT_END_OF_INTERRUPT:
-                       return "END OF INTERRUPT";
-               case PMC_STATE_EVENT_CONTEXT_IN:
-                       return "CONTEXT IN";
-               case PMC_STATE_EVENT_CONTEXT_OUT:
-                       return "CONTEXT OUT";
-               case PMC_STATE_EVENT_LOAD_FINISHED:
-                       return "LOAD_FINISHED";
-               case PMC_STATE_EVENT_STORE_FINISHED:
-                       return "STORE_FINISHED";
-               default:
-                       return "UNKNOWN";
-       }
-}
-
-#      define PMC_STATE_FORMAT "<%s, %u, %s%s%s>"
-#      define PMC_STATE_ARGS(x)        pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \
-                                       ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "")
-#else
-#      define COUNTER_DEBUG(...)
-#      define PRINT_PERF_MON(x)
-#      define PMC_STATE_FORMAT
-#      define PMC_STATE_ARGS(x)
-#endif
-
-/*!struct
- * pmc_config is the data behind a pmc_config_t.
- * @member object A pointer to an instance of IOPerformanceCounterConfiguration
- * @member method A pointer to a method to call to handle PMI.
- * @member interrupt_after_value Cause a PMI after the counter counts this many
- * events.
- * @member refCon Passed to the @method method as the refCon argument.
- */
-struct pmc_config {
-       pmc_config_object_t object;
-       volatile pmc_interrupt_method_t method;
-       uint64_t interrupt_after_value;
-       void *refCon;
-};
-
-/*
- * Allocation Zones
- * 
- * Two allocation zones - Perf zone small and Perf zone big.
- * Each zone has associated maximums, defined below.
- * The small zone is the max of the smallest allocation objects (all sizes on
- * K64):
- *     perf_monitor_t - 48 bytes
- *             perf_monitor_methods_t - 28 bytes
- *     pmc_reservation_t - 48 bytes
- *  pmc_config_t - 32 bytes
- * perf_small_zone unit size is (on K64) 48 bytes
- * perf_small_zone max count must be max number of perf monitors, plus (max
- * number of reservations * 2). The "*2" is because each reservation has a
- * pmc_config_t within.
- *
- * Big zone is max of the larger allocation units
- *     pmc_t - 144 bytes
- *             pmc_methods_t - 116 bytes
- * perf_big_zone unit size is (on K64) 144 bytes
- * perf_big_zone max count is the max number of PMCs we support.
- */
-
-static zone_t perf_small_zone = NULL;
-#define MAX_PERF_SMALLS                (256 + 8196 + 8196)
-#define PERF_SMALL_UNIT_SZ     (MAX(MAX(sizeof(struct perf_monitor), \
-       sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) 
-
-static zone_t perf_big_zone = NULL;
-#define MAX_PERF_BIGS          (1024)
-#define PERF_BIG_UNIT_SZ       (sizeof(struct pmc))
-
-/*
- * Locks and Lock groups
- */
-static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL;
-static lck_grp_attr_t *pmc_lock_grp_attr;
-static lck_attr_t *pmc_lock_attr;
-
-/* PMC tracking queue locks */
-
-static lck_mtx_t  cpu_monitor_queue_mutex;   /* protects per-cpu queues at initialisation time */
-static lck_spin_t perf_monitor_queue_spin;   /* protects adding and removing from queue */
-static lck_spin_t perf_counters_queue_spin;  /* protects adding and removing from queue */
-
-/* Reservation tracking queues lock */
-static lck_spin_t reservations_spin;
-
-/*
- * Tracking queues
- *
- * Keeps track of registered perf monitors and perf counters
- */
-
-static queue_head_t **cpu_monitor_queues = NULL;
-
-static queue_head_t *perf_monitors_queue = NULL;
-static volatile uint32_t perf_monitors_count = 0U;
-
-static queue_head_t *perf_counters_queue = NULL;
-static volatile uint32_t perf_counters_count = 0U;
-
-/* 
- * Reservation queues
- *
- * Keeps track of all system, task, and thread-level reservations (both active and
- * inactive).
- *
- * We track them all here (rather than in their respective task or thread only)
- * so that we can inspect our tracking data directly (rather than peeking at
- * every task and thread) to determine if/when a new reservation would
- * constitute a conflict.
- */
-static queue_head_t *system_reservations = NULL;
-static volatile uint32_t system_reservation_count = 0U;
-
-static queue_head_t *task_reservations = NULL;
-static volatile uint32_t task_reservation_count = 0U;
-
-static queue_head_t *thread_reservations = NULL;
-static volatile uint32_t thread_reservation_count = 0U;
-
-#if XNU_KERNEL_PRIVATE
-
-/*
- * init_pmc_locks creates and initializes all the locks and lock groups and lock
- * attributes required for the pmc sub-system.
- */
-static void init_pmc_locks(void) {
-       pmc_lock_attr = lck_attr_alloc_init();
-       assert(pmc_lock_attr);
-
-       pmc_lock_grp_attr = lck_grp_attr_alloc_init();
-       assert(pmc_lock_grp_attr);
-
-       pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr);
-       assert(pmc_lock_grp);
-
-       lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr);
-       lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);
-
-       lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);
-
-       lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr);
-}
-
-/*
- * init_pmc_zones initializes the allocation zones used by the pmc subsystem
- */
-static void init_pmc_zones(void) {
-       perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, 
-               MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, 
-               "pmc.small zone");
-
-       assert(perf_small_zone);
-
-       perf_big_zone = zinit(PERF_BIG_UNIT_SZ,
-               MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, 
-               "pmc.big zone");
-
-       assert(perf_big_zone);
-}
-
-/*
- * init_pmc_queues allocates and initializes the tracking queues for
- * registering and reserving individual pmcs and perf monitors.
- */
-static void init_pmc_queues(void) {
-    
-       perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
-       assert(perf_monitors_queue);
-
-       queue_init(perf_monitors_queue);
-
-       perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
-       assert(perf_counters_queue);
-
-       queue_init(perf_counters_queue);
-
-       system_reservations = (queue_head_t*)kalloc(sizeof(queue_t));
-       assert(system_reservations);
-
-       queue_init(system_reservations);
-
-       task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
-       assert(task_reservations);
-
-       queue_init(task_reservations);
-
-       thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
-       assert(thread_reservations);
-
-       queue_init(thread_reservations);
-}
-
-/*
- * pmc_bootstrap brings up all the necessary infrastructure required to use the
- * pmc sub-system.
- */
-__private_extern__
-void pmc_bootstrap(void) {
-       /* build our alloc zones */
-       init_pmc_zones();
-
-       /* build the locks */
-       init_pmc_locks();
-
-       /* build our tracking queues */
-       init_pmc_queues();
-}
-
-#endif /* XNU_KERNEL_PRIVATE */
-
-/*
- * Perf Monitor Internals
- */
-
-static perf_monitor_t perf_monitor_alloc(void) {
-       /* perf monitors come from the perf small zone */
-       return (perf_monitor_t)zalloc(perf_small_zone);
-}
-
-static void perf_monitor_free(void *pm) {
-       zfree(perf_small_zone, pm);
-}
-
-static void perf_monitor_init(perf_monitor_t pm, int cpu) {
-       assert(pm);
-
-       pm->object = NULL;
-
-       bzero(&(pm->methods), sizeof(perf_monitor_methods_t));
-
-       pm->useCount = 1;       /* initial retain count of 1, for caller */
-       
-       pm->reservedCounters = 0;
-    
-       pm->cpu = cpu;
-
-       pm->link.next = pm->link.prev = (queue_entry_t)NULL;
-       pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL;
-}
-
-/*
- * perf_monitor_dequeue removes the given perf_monitor_t from the
- * perf_monitor_queue, thereby unregistering it with the system.
- */
-static void perf_monitor_dequeue(perf_monitor_t pm) {
-       lck_spin_lock(&perf_monitor_queue_spin);
-       
-       if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
-               /* If this flag is set, the monitor is already validated to be 
-                * accessible from a single cpu only.
-                */
-               queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 
-       }
-       
-       /* 
-        * remove the @pm object from the @perf_monitor_queue queue (it is of type
-        * <perf_monitor_t> and has a field called @link that is the queue_link_t
-        */
-       queue_remove(perf_monitors_queue, pm, perf_monitor_t, link);
-
-       perf_monitors_count--;
-
-       lck_spin_unlock(&perf_monitor_queue_spin);
-}
-
-/*
- * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue,
- * thereby registering it for use with the system.
- */
-static void perf_monitor_enqueue(perf_monitor_t pm) {
-    
-       lck_mtx_lock(&cpu_monitor_queue_mutex);
-       lck_spin_lock(&perf_monitor_queue_spin);
-
-       if (pm->cpu >= 0) {
-               /* Deferred initialisation; saves memory and permits ml_get_max_cpus()
-                * to block until cpu initialisation is complete.
-                */
-               if (!cpu_monitor_queues) {
-                       uint32_t max_cpus;
-                       queue_head_t **queues;
-                       uint32_t i;
-               
-                       lck_spin_unlock(&perf_monitor_queue_spin);
-               
-                       max_cpus = ml_get_max_cpus();
-
-                       queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus);
-                       assert(queues);
-                       for (i = 0; i < max_cpus; i++) {
-                               queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
-                               assert(queue);
-                               queue_init(queue);
-                               queues[i] = queue;
-                       }
-               
-                       lck_spin_lock(&perf_monitor_queue_spin);
-               
-                       cpu_monitor_queues = queues;
-               }
-           
-               queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link);
-       }
-       
-       queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
-       perf_monitors_count++;
-       
-       lck_spin_unlock(&perf_monitor_queue_spin);
-       lck_mtx_unlock(&cpu_monitor_queue_mutex);
-}
-
-/*
- * perf_monitor_reference increments the reference count for the given
- * perf_monitor_t.
- */
-static void perf_monitor_reference(perf_monitor_t pm) {
-       assert(pm);
-
-       OSIncrementAtomic(&(pm->useCount));
-}
-
-/*
- * perf_monitor_deallocate decrements the reference count for the given
- * perf_monitor_t.  If the reference count hits 0, the object is released back
- * to the perf_small_zone via a call to perf_monitor_free().
- */
-static void perf_monitor_deallocate(perf_monitor_t pm) {
-       assert(pm);
-
-       /* If we just removed the last reference count */
-       if(1 == OSDecrementAtomic(&(pm->useCount))) {
-               /* Free the object */
-               perf_monitor_free(pm);
-       }
-}
-
-/*
- * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the
- * given C++ object pointer that was used when registering with the subsystem.
- *
- * If found, the method returns the perf_monitor_t with an extra reference 
- * placed on the object (or NULL if not
- * found).
- *
- * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after
- * calling perf_monitor_find.
- */
-static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
-       assert(monitor);
-       perf_monitor_t element = NULL;
-       perf_monitor_t found = NULL;
-
-       lck_spin_lock(&perf_monitor_queue_spin);
-       
-       queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
-               if(element->object == monitor) {
-                       perf_monitor_reference(element);
-                       found = element;
-                       break;
-               }
-       }
-
-       lck_spin_unlock(&perf_monitor_queue_spin);
-
-       return found;
-}
-
-/*
- * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
- * associated with.
- */
-
-static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
-       assert(pm);
-       assert(pmc);
-
-       /* Today, we merely add a reference count now that a new pmc is attached */
-       perf_monitor_reference(pm);
-}
-
-/*
- * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf
- * monitor it is associated with.
- */
-static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
-       assert(pm);
-       assert(pmc);
-
-       /* Today, we merely remove a reference count now that the pmc is detached */
-       perf_monitor_deallocate(pm);
-}
-
-/*
- * Perf Counter internals
- */
-
-static pmc_t pmc_alloc(void) {
-       return (pmc_t)zalloc(perf_big_zone);
-}
-
-static void pmc_free(void *pmc) {
-       zfree(perf_big_zone, pmc);
-}
-
-/*
- * pmc_init initializes a newly allocated pmc_t
- */
-static void pmc_init(pmc_t pmc) {
-       assert(pmc);
-
-       pmc->object = NULL;
-       pmc->monitor = NULL;
-
-       bzero(&pmc->methods, sizeof(pmc_methods_t));
-
-       /* One reference for the caller */
-       pmc->useCount = 1;
-}
-
-/*
- * pmc_reference increments the reference count of the given pmc_t
- */
-static void pmc_reference(pmc_t pmc) {
-       assert(pmc);
-
-       OSIncrementAtomic(&(pmc->useCount));
-}
-
-/*
- * pmc_deallocate decrements the reference count of the given pmc_t. If the
- * reference count hits zero, the given pmc_t is deallocated and released back
- * to the allocation zone.
- */
-static void pmc_deallocate(pmc_t pmc) {
-       assert(pmc);
-
-       /* If we just removed the last reference count */
-       if(1 == OSDecrementAtomic(&(pmc->useCount))) {
-               /* Free the pmc */
-               pmc_free(pmc);
-       }
-}
-
-/*
- * pmc_dequeue removes the given, newly *un*registered pmc from the
- * perf_counters_queue.
- */
-static void pmc_dequeue(pmc_t pmc) {
-       lck_spin_lock(&perf_counters_queue_spin);
-
-       queue_remove(perf_counters_queue, pmc, pmc_t, link);
-
-       perf_counters_count--;
-
-       lck_spin_unlock(&perf_counters_queue_spin);
-}
-
-/*
- * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue
- */
-static void pmc_enqueue(pmc_t pmc) {
-       lck_spin_lock(&perf_counters_queue_spin);
-
-       queue_enter(perf_counters_queue, pmc, pmc_t, link);
-
-       perf_counters_count++;
-
-       lck_spin_unlock(&perf_counters_queue_spin);
-}
-
-/*
- * pmc_find attempts to locate a pmc_t that was registered with the given
- * pmc_object_t pointer.  If found, it returns the pmc_t with an extra reference
- * which must be dropped by the caller by calling pmc_deallocate().
- */
-static pmc_t pmc_find(pmc_object_t object) {
-       assert(object);
-
-       lck_spin_lock(&perf_counters_queue_spin);
-       
-       pmc_t element = NULL;
-       pmc_t found = NULL;
-
-       queue_iterate(perf_counters_queue, element, pmc_t, link) {
-               if(element->object == object) {
-                       pmc_reference(element);
-                       found = element;
-                       break;
-               }
-       }
-
-       lck_spin_unlock(&perf_counters_queue_spin);
-
-       return found;
-}
-
-/*
- * Config internals
- */
-
-/* Allocate a pmc_config_t */
-static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) {
-       return (pmc_config_t)zalloc(perf_small_zone);
-}
-
-/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */
-static void pmc_config_free(pmc_t pmc, pmc_config_t config) {
-       assert(pmc);
-       assert(config);
-
-       if(config->object) {
-               pmc->methods.free_config(pmc->object, config->object);
-               config->object = NULL;
-       }
-
-       zfree(perf_small_zone, config);
-}
-
-static kern_return_t pmc_open(pmc_t pmc) {
-       assert(pmc);
-       assert(pmc->object);
-       assert(pmc->open_object);
-
-       return pmc->methods.open(pmc->object, pmc->open_object);
-}
-
-static kern_return_t pmc_close(pmc_t pmc) {
-       assert(pmc);
-       assert(pmc->object);
-       assert(pmc->open_object);
-
-       return pmc->methods.close(pmc->object, pmc->open_object);
-}
-
-/*
- * Reservation Internals
- */
-
-static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc);
-static void pmc_internal_reservation_store(pmc_reservation_t reservation);
-static void pmc_internal_reservation_load(pmc_reservation_t reservation);
-
-static pmc_reservation_t reservation_alloc(void) {
-       /* pmc reservations come from the perf small zone */
-       return (pmc_reservation_t)zalloc(perf_small_zone);
-}
-
-/*
- * reservation_free deallocates and releases all resources associated with the
- * given pmc_reservation_t.  This includes freeing the config used to create the
- * reservation, decrementing the reference count for the pmc used to create the
- * reservation, and deallocating the reservation's memory.
- */
-static void reservation_free(pmc_reservation_t resv) {
-       /* Free config */
-       if(resv->config) {
-               assert(resv->pmc);
-
-               pmc_free_config(resv->pmc, resv->config);
-
-               resv->config = NULL;
-       }
-
-       /* release PMC */
-       (void)pmc_internal_reservation_set_pmc(resv, NULL);
-
-       /* Free reservation */
-       zfree(perf_small_zone, resv);
-}
-
-/*
- * reservation_init initializes a newly created reservation.
- */
-static void reservation_init(pmc_reservation_t resv) {
-       assert(resv);
-
-       resv->pmc = NULL;
-       resv->config = NULL;
-       resv->value = 0ULL;
-
-       resv->flags = 0U;
-       resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0);
-       resv->active_last_context_in = 0U;
-
-       /*
-        * Since this member is a union, we only need to set either the task 
-        * or thread to NULL.
-        */
-       resv->task = TASK_NULL;
-}
-
-/*
- * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If
- * there was one set already, it is deallocated (reference is dropped) before
- * the new one is set.  This methods increases the reference count of the given
- * pmc_t.
- *
- * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of
- * dropping the reference on any previously set pmc, and setting the reservation
- * to having no pmc set.
- */
-static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) {
-       assert(resv);
-
-       if(resv->pmc) {
-               (void)pmc_close(resv->pmc);
-               pmc_deallocate(resv->pmc);
-               resv->pmc = NULL;
-       }
-
-       resv->pmc = pmc;
-
-       if(resv->pmc) {
-               pmc_reference(resv->pmc);
-               if(KERN_SUCCESS != pmc_open(resv->pmc)) {
-                       pmc_deallocate(resv->pmc);
-                       resv->pmc = NULL;
-
-                       return KERN_FAILURE;
-               }
-       }
-
-       return KERN_SUCCESS;
-}
-
-/* 
- * Used to place reservation into one of the system, task, and thread queues
- * Assumes the queue's spin lock is already held.
- */
-static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) {
-       assert(queue);
-       assert(resv);
-
-       queue_enter(queue, resv, pmc_reservation_t, link);
-}
-
-static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) {
-       assert(queue);
-       assert(resv);
-
-       queue_remove(queue, resv, pmc_reservation_t, link);
-}
-
-/* Returns TRUE if the reservation applies to the current execution context */
-static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) {
-       boolean_t ret = FALSE;
-       assert(resv);
-
-       if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) {
-               ret = TRUE;
-       } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) {
-               if(current_task() == resv->task) {
-                       ret = TRUE;
-               }
-       } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) {
-               if(current_thread() == resv->thread) {
-                       ret = TRUE;
-               }
-       }
-
-       return ret;
-}
-
-/*
- * pmc_accessible_core_count returns the number of logical cores that can access
- * a given @pmc.  0 means every core in the system.
- */
-static uint32_t pmc_accessible_core_count(pmc_t pmc) {
-       assert(pmc);
-
-       uint32_t *cores = NULL;
-       size_t coreCt = 0UL;
-
-       if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object,
-               &cores, &coreCt)) {
-               coreCt = 0U;
-       }
-
-       return (uint32_t)coreCt;
-}
-
-/* spin lock for the queue must already be held */
-/*
- * This method will inspect the task/thread of the reservation to see if it
- * matches the new incoming one (for thread/task reservations only).  Will only
- * return TRUE if the task/thread matches.
- */
-static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) {
-       assert(queue);
-       assert(resv);
-
-       boolean_t ret = FALSE;
-       pmc_reservation_t tmp = NULL;
-
-       queue_iterate(queue, tmp, pmc_reservation_t, link) {
-               if(tmp->pmc == resv->pmc) {
-                       /* PMC matches - make sure scope matches first */
-                       switch(PMC_FLAG_SCOPE(tmp->flags)) {
-                               case PMC_FLAG_SCOPE_SYSTEM:
-                                       /*
-                                        * Found a reservation in system queue with same pmc - always a
-                                        * conflict.
-                                        */
-                                       ret = TRUE;
-                                       break;
-                               case PMC_FLAG_SCOPE_THREAD:
-                                       /*
-                                        * Found one in thread queue with the same PMC as the
-                                        * argument. Only a conflict if argument scope isn't
-                                        * thread or system, or the threads match.
-                                        */
-                                       ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 
-                                               (tmp->thread == resv->thread);
-
-                                       if(!ret) {
-                                               /*
-                                                * so far, no conflict - check that the pmc that is
-                                                * being reserved isn't accessible from more than
-                                                * one core, if it is, we need to say it's already
-                                                * taken.
-                                                */
-                                               if(1 != pmc_accessible_core_count(tmp->pmc)) {
-                                                       ret = TRUE;
-                                               }
-                                       }
-                                       break;
-                               case PMC_FLAG_SCOPE_TASK:
-                                       /* 
-                                        * Follow similar semantics for task scope.
-                                        */
-
-                                       ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
-                                               (tmp->task == resv->task);
-                                       if(!ret) {
-                                               /*
-                                                * so far, no conflict - check that the pmc that is
-                                                * being reserved isn't accessible from more than
-                                                * one core, if it is, we need to say it's already
-                                                * taken.
-                                                */
-                                               if(1 != pmc_accessible_core_count(tmp->pmc)) {
-                                                       ret = TRUE;
-                                               }
-                                       }
-
-                                       break;
-                       }
-
-                       if(ret) break;
-               }
-       }
-
-       return ret;
-}
-
-/*
- * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be 
- * added to its target queue without creating conflicts (target queue is 
- * determined by the reservation's scope flags). Further, this method returns
- * FALSE if any level contains a reservation for a PMC that can be accessed from
- * more than just 1 core, and the given reservation also wants the same PMC.
- */
-static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) {
-       assert(resv);
-       boolean_t ret = TRUE;
-
-       if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) ||
-               pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) ||
-               pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) {
-               ret = FALSE;
-       }
-
-       return ret;
-}
-
-static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) {
-       assert(thread);
-
-       /* See if this thread needs it's PMC flag set */
-       pmc_reservation_t tmp = NULL;
-
-       if(!newFlag) {
-               /*
-                * If the parent task just dropped its reservation, iterate the thread
-                * reservations to see if we need to keep the pmc flag set for the given
-                * thread or not.
-                */
-               lck_spin_lock(&reservations_spin);
-       
-               queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) {
-                       if(tmp->thread == thread) {
-                               newFlag = TRUE;
-                               break;
-                       }
-               }
-
-               lck_spin_unlock(&reservations_spin);
-       }
-
-       if(newFlag) {
-               OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud);
-       } else {
-               OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud);
-       }
-}
-
-/* 
- * This operation is (worst case) O(N*M) where N is number of threads in the
- * given task, and M is the number of thread reservations in our system.
- */
-static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) {
-       assert(task);
-       thread_t thread = NULL;
-
-       if(newFlag) {
-               OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud);
-       } else {
-               OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud);
-       }
-
-       task_lock(task);
-
-       queue_iterate(&task->threads, thread, thread_t, task_threads) {
-               /* propagate the task's mask down to each thread  */
-               pmc_internal_update_thread_flag(thread, newFlag);
-       }
-
-       task_unlock(task);
-}
-
-/*
- * pmc_internal_reservation_add adds a reservation to the global tracking queues after
- * ensuring there are no reservation conflicts.  To do this, it takes all the
- * spin locks for all the queue (to ensure no other core goes and adds a
- * reservation for the same pmc to a queue that has already been checked).
- */
-static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
-       assert(resv);
-
-       boolean_t ret = FALSE;
-
-       /* always lock all three in the same order */
-       lck_spin_lock(&reservations_spin);
-
-       /* Check if the reservation can be added without conflicts */
-       if(pmc_internal_reservation_validate_for_pmc(resv)) {
-           
-               /* add reservation to appropriate scope */
-               switch(PMC_FLAG_SCOPE(resv->flags)) {
-               case PMC_FLAG_SCOPE_SYSTEM:
-                       /* Simply add it to the system queue */
-                       pmc_internal_reservation_enqueue(system_reservations, resv);
-                       system_reservation_count++;
-                       
-                       lck_spin_unlock(&reservations_spin);
-
-                       break;
-
-               case PMC_FLAG_SCOPE_TASK:
-                       assert(resv->task);
-
-                       /* Not only do we enqueue it in our local queue for tracking */
-                       pmc_internal_reservation_enqueue(task_reservations, resv);
-                       task_reservation_count++;
-
-                       lck_spin_unlock(&reservations_spin);
-
-                       /* update the task mask, and propagate it to existing threads */
-                       pmc_internal_update_task_flag(resv->task, TRUE);
-                       break;
-
-               /* Thread-switched counter */
-               case PMC_FLAG_SCOPE_THREAD:
-                       assert(resv->thread);
-
-                       /*
-                        * Works the same as a task-switched counter, only at
-                        * thread-scope
-                        */
-
-                       pmc_internal_reservation_enqueue(thread_reservations, resv);
-                       thread_reservation_count++;
-
-                       lck_spin_unlock(&reservations_spin);
-                       
-                       pmc_internal_update_thread_flag(resv->thread, TRUE);
-                       break;
-               }
-               
-               ret = TRUE;
-       } else {
-               lck_spin_unlock(&reservations_spin);
-       }                       
-       
-       return ret;
-}
-
-static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) {
-       uint32_t * cores;
-       size_t core_cnt;
-       
-       /* Get the list of accessible cores */
-       if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) {
-               boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE);
-
-               /* Fast case: the PMC is only accessible from one core and we happen to be on it */
-               if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) {
-                       action_func(reservation);
-               } else {
-                       /* Call action_func on every accessible core */
-#if defined(__i386__) || defined(__x86_64__)
-                       size_t ii;
-                       cpumask_t mask = 0;
-                       
-                       /* Build a mask for the accessible cores */
-                       if (core_cnt > 0) {
-                               for (ii = 0; ii < core_cnt; ii++) {
-                                       mask |= cpu_to_cpumask(cores[ii]);
-                               }
-                       } else {
-                               /* core_cnt = 0 really means all cpus */
-                               mask = CPUMASK_ALL;
-                       }
-                       mp_cpus_call(mask, ASYNC, action_func, reservation);
-#else
-#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
-#endif
-               }
-
-               ml_set_interrupts_enabled(intrs_enabled);
-       }
-       
-}
-
-/*
- * pmc_internal_reservation_remove removes the given reservation from the appropriate
- * reservation queue according to its scope. 
- *
- * NOTE: The scope flag must have been set for this method to function.
- */
-static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
-       assert(resv);
-
-       /*
-        * Due to the way the macros are written, we can't just blindly queue-remove
-        * the reservation without knowing which queue it's in. We figure this out
-        * using the reservation's scope flags.
-        */
-
-       /* Lock the global spin lock */
-       lck_spin_lock(&reservations_spin);
-
-       switch(PMC_FLAG_SCOPE(resv->flags)) {
-
-               case PMC_FLAG_SCOPE_SYSTEM:
-                       pmc_internal_reservation_dequeue(system_reservations, resv);
-                       system_reservation_count--;
-                       
-                       lck_spin_unlock(&reservations_spin);
-                       
-                       break;
-
-               case PMC_FLAG_SCOPE_TASK:
-                       /* remove from the global queue */
-                       pmc_internal_reservation_dequeue(task_reservations, resv);
-                       task_reservation_count--;
-
-                       /* unlock the global */
-                       lck_spin_unlock(&reservations_spin);
-
-                       /* Recalculate task's counter mask */
-                       pmc_internal_update_task_flag(resv->task, FALSE);
-                       
-                       break;
-
-               case PMC_FLAG_SCOPE_THREAD:
-                       pmc_internal_reservation_dequeue(thread_reservations, resv);
-                       thread_reservation_count--;
-
-                       lck_spin_unlock(&reservations_spin);
-
-                       /* recalculate the thread's counter mask */
-                       pmc_internal_update_thread_flag(resv->thread, FALSE);
-
-                       break;
-       }
-}
-
-/* Reservation State Machine
- *
- * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a 
- * set of 9 events to provide MP-safe bookkeeping and control flow.  The 3-tuple is comprised 
- * of a state, a count of active contexts, and a set of modifier flags.  A state machine defines
- * the possible transitions at each event point given the current 3-tuple.  Atomicity is handled
- * by reading the current 3-tuple, applying the transformations indicated by the state machine
- * and then attempting to OSCompareAndSwap the transformed value.  If the OSCompareAndSwap fails,
- * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are
- * available.
- *
- * The state machine is described using tuple notation for the current state and a related notation
- * for describing the transformations.  For concisness, the flag and state names are abbreviated as
- * follows:
- * 
- * states:
- * S = STOP
- * CR = CAN_RUN
- * L = LOAD
- * R = RUN
- * ST = STORE
- * I = INTERRUPT
- * D = DEALLOC
- *
- * flags:
- *
- * S = STOPPING
- * D = DEALLOCING
- * I = INTERRUPTING
- *
- * The tuple notation is formed from the following pattern:
- *
- * tuple = < state, active-context-count, flags >
- * state = S | CR | L | R | ST | I | D
- * active-context-count = 0 | >0 | 1 | >1
- * flags = flags flag | blank
- * flag = S | D | I
- *
- * The transform notation is similar, but only describes the modifications made to the current state.
- * The notation is formed from the following pattern:
- * 
- * transform = < state, active-context-count, flags >
- * state = S | CR | L | R | ST | I | D
- * active-context-count = + | - | blank
- * flags = flags flag | flags !flag | blank
- * flag = S | D | I
- *
- * And now for the state machine:
- * State               Start           Stop            Free            Interrupt               End Interrupt           Context In              Context Out     Load Finished           Store Finished
- * <CR, 0, >                           <S, , >         <D, , >                 <L, +, >
- * <D, 0, >
- * <D, 1, D>                                                                   < , -, !D>
- * <D, >1, D>                                                                  < , -, >
- * <I, 0, D>                                                                   <D, , !D>
- * <I, 0, S>   < , , !S>                               < , , !SD>              <S, , !S>
- * <I, 0, >                                    < , , S>        < , , D>        <CR, , >
- * <L, 1, D>                                                                   <ST, -, >
- * <L, 1, ID>                                                                  <ST, -, >
- * <L, 1, IS>                                                  < , , !SD>      <ST, -, >
- * <L, 1, S>   < , , !S>                               < , , !SD>              <ST, -, >
- * <L, 1, >                                    < , , S>        < , , D>        < , , IS>                                                       < , +, >        <R, , >
- * <L, >1, D>                                                                  < , -, >                <R, -, >
- * <L, >1, ID>                                                                 < , -, >                <R, -, >
- * <L, >1, IS>                                                 < , , !SD>      < , -, >                <R, -, >
- * <L, >1, S>  < , , !S>                               < , , !SD>              < , -, >                <R, -, >
- * <L, >1, >                           < , , S>        < , , D>        < , , IS>                                                       < , +, >                < , -, >                <R, , >
- * <R, 1, D>                                                                   <ST, -, >
- * <R, 1, ID>                                                                  <ST, -, >
- * <R, 1, IS>                                                  < , , !SD>      <ST, -, >
- * <R, 1, S>   < , , !S>                               < , , !SD>              <ST, -, >
- * <R, 1, >                                    < , , S>        < , , D>        < , , IS>                                                       < , +, >        <ST, -, >
- * <R, >1, D>                                                                  < , -, >
- * <R, >1, ID>                                                                 < , -, >
- * <R, >1, IS>                                                 < , , !SD>      < , -, >
- * <R, >1, S>  < , , !S>                               < , , !SD>              < , -, >
- * <R, >1, >                           < , , S>        < , , D>        < , , IS>                                                       < , +, >                < , -, >
- * <S, 0, >            <CR, , >                                <D, , >
- * <S, 1, ID>                                                                  <I, -, !I>
- * <S, 1, IS>                                                  < , , !SD>      <I, -, !I>
- * <S, 1, S>   < , , !S>                               <D, , !SD>              < , -, !S>
- * <S, 1, >                                    < , , S>        <D, , D>        <L, +, >                <CR, -, >
- * <S, >1, ID>                                                                 < , -, >
- * <S, >1, IS>                                                 < , , !SD>      < , -, >
- * <S, >1, S>  < , , !S>                               <D, , !SD>              < , -, >
- * <S, >1, >                           < , , S>        <D, , D>                <L, +, >                < , -, >
- * <ST, 0, D>                                                                  <D, , !D>
- * <ST, 0, ID>                                                                 <I, , !I>
- * <ST, 0, IS>                                                 < , , !SD>      <I, , !I>
- * <ST, 0, S>  < , , !S>                               < , , !SD>              <S, , !S>
- * <ST, 0, >                           < , , S>        < , , D>        < , , IS>                                                       < , +, >                <CR, , >
- * <ST, >0, D>                                                                 < , -, >                                                        <D, , >
- * <ST, >0, ID>                                                                < , -, >                                                        <S, , >
- * <ST, >0, IS>                                                        < , , !SD>                                                                              < , -, >                        <S, , >
- * <ST, >0, S> < , , !S>                               < , , !SD>              < , -, >                                                        <S, , >
- * <ST, >0, >                          < , , S>        < , , D>        < , , IS>                                                       < , +, >                < , -, >                        <L, , >
- */
-
-static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) {
-       uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0);
-       
-       switch (event) {
-               case PMC_STATE_EVENT_START:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
-                                       new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
-                                       }
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_STOP:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
-                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
-                                               new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
-                                       }
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_FREE:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
-                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
-                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
-                                       } else {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
-                                       }
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_INTERRUPT:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
-                                       new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0);
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_END_OF_INTERRUPT:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_CONTEXT_IN:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
-                                       new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0);
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
-                                       }
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_CONTEXT_OUT:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING);
-                                       } else {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                       }                                       
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
-                                       } else {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING);
-                                       } else {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING);
-                                       } else {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
-                                               if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
-                                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0);
-                                               } else {
-                                                       new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                               }
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
-                                               new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
-                                       }
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_LOAD_FINISHED:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0);
-                                       } else {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
-                                       new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0);
-                                       break;
-                       }
-                       break;
-               case PMC_STATE_EVENT_STORE_FINISHED:
-                       switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
-                                       } else {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING);
-                                       } else {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
-                                       } else {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
-                                       }
-                                       break;
-                               case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
-                                       if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
-                                       } else {
-                                               new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0);
-                                       }
-                                       break;
-                       }
-                       break;
-       }
-       
-       return new_state;
-}
-
-static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) {
-       pmc_state_t oldState;
-       pmc_state_t newState;
-
-       assert(reservation);
-       
-       /* Determine what state change, if any, we need to do.  Keep trying until either we succeed doing a transition
-        * or the there is no valid move.
-        */     
-       do {
-               oldState = reservation->state;
-               newState = pmc_internal_reservation_next_state(oldState, event);
-       } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state)));
-       
-       if (newState != PMC_STATE_INVALID) {
-               COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event));
-       } else {
-               COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event));
-       }
-       
-       if (old_state_out != NULL) {
-               *old_state_out = oldState;
-       }
-       
-       return newState;
-}
-                                       
-static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) {
-       assert(reservation);
-       pmc_state_t newState;
-       pmc_state_t oldState;
-
-       /* Clear that the this reservation was active when this cpu did its last context in */
-       OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in));
-       
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) {
-               return;
-       }
-       
-       /* Do any actions required based on the state change */
-       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) {
-               /* Just moved into STORE, so store the reservation. */
-               pmc_internal_reservation_store(reservation);
-       } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
-               /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
-               thread_wakeup((event_t)reservation);
-       }
-       
-}
-
-static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) {
-       assert(reservation);
-       pmc_state_t oldState;
-       pmc_state_t newState;
-       
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) {
-               return;
-       }
-
-       /* Mark that the reservation was active when this cpu did its last context in */
-       OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
-               
-       /* Do any actions required based on the state change */
-       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) {
-               /* Just moved into LOAD, so load the reservation. */
-               pmc_internal_reservation_load(reservation);
-       }
-       
-}
-
-static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
-       assert(reservation);
-       assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE);
-       
-       assert(reservation->pmc);
-       assert(reservation->config);
-
-       pmc_state_t newState;
-       kern_return_t ret = KERN_SUCCESS;
-       
-       pmc_t store_pmc = reservation->pmc;
-       pmc_object_t store_pmc_obj = store_pmc->object;
-       perf_monitor_t store_pm = store_pmc->monitor;
-
-       /* 
-        * Instruct the Perf Monitor that contains this counter to turn 
-        * off the global disable for this counter.
-        */
-       ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret);
-               return;
-       }
-
-       /* Instruct the counter to disable itself */
-       ret = store_pmc->methods.disable(store_pmc_obj);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG("  [error] disable: 0x%x\n", ret);
-       }
-
-       /* store the counter value into the reservation's stored count */
-       ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG("  [error] get_count: 0x%x\n", ret);
-               return;
-       }
-               
-       /* Advance the state machine now that the STORE is finished */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) {
-               return;
-       }
-
-       /* Do any actions required based on the state change */
-       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) {
-               /* Just moved into LOAD, so load the reservation. */
-               pmc_internal_reservation_load(reservation);
-       } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
-               /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
-               thread_wakeup((event_t)reservation);
-       }
-       
-}
-
-static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
-       assert(reservation);
-       assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD);
-
-       pmc_state_t newState;
-       kern_return_t ret = KERN_SUCCESS;
-
-       assert(reservation->pmc);
-       assert(reservation->config);
-       
-       pmc_t load_pmc = reservation->pmc;
-       pmc_object_t load_pmc_obj = load_pmc->object;
-       perf_monitor_t load_pm = load_pmc->monitor;
-
-       /* Set the control register up with the stored configuration */
-       ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG("  [error] set_config: 0x%x\n", ret);
-               return;
-       }
-
-       /* load the counter value */
-       ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG("  [error] set_count: 0x%x\n", ret);
-               return;
-       }
-
-       /* Locally enable the counter */
-       ret = load_pmc->methods.enable(load_pmc_obj);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG("  [error] enable: 0x%x\n", ret);
-               return;
-       }
-
-       /*
-        * Instruct the Perf Monitor containing the pmc to enable the
-        * counter.
-        */
-       ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1);
-       if(KERN_SUCCESS != ret) {
-               COUNTER_DEBUG("  [error] enable_counters: 0x%x\n", ret);
-               /* not on the hardware. */
-               return;
-       }
-       
-       /* Advance the state machine now that the STORE is finished */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) {
-               return;
-       }
-
-       /* Do any actions required based on the state change */
-       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) {
-               /* Just moved into STORE, so store the reservation. */
-               pmc_internal_reservation_store(reservation);
-       }
-       
-}
-
-/*
- * pmc_accessible_from_core will return TRUE if the given @pmc is directly
- * (e.g., hardware) readable from the given logical core.
- *
- * NOTE: This method is interrupt safe.
- */
-static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
-       boolean_t ret = FALSE;
-
-       assert(pmc);
-
-       ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);
-
-       return ret;
-}
-
-static void pmc_internal_reservation_start_cpu(void * arg) {
-       pmc_reservation_t reservation = (pmc_reservation_t)arg;
-       
-       assert(reservation);
-       
-
-       if (pmc_internal_reservation_matches_context(reservation)) {
-               /* We are in context, but the reservation may have already had the context_in method run.  Attempt
-                * to set this cpu's bit in the active_last_context_in mask.  If we set it, call context_in.
-                */
-               uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
-               
-               if ((oldMask & (1U << cpu_number())) == 0) {
-                       COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number());
-                       
-                       pmc_internal_reservation_context_in(reservation);
-               }
-       }
-}
-
-static void pmc_internal_reservation_stop_cpu(void * arg) {
-       pmc_reservation_t reservation = (pmc_reservation_t)arg;
-       
-       assert(reservation);
-       
-       
-       if (pmc_internal_reservation_matches_context(reservation)) {
-               COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());
-
-               pmc_internal_reservation_context_out(reservation);
-       }
-}      
-
-/*!fn
- * pmc_reservation_interrupt is called when a PMC reservation which was setup
- * with an interrupt threshold counts the requested number of events. When the
- * underlying counter hits the threshold, an interrupt is generated, and this
- * method is called. This method marks the reservation as stopped, and passes
- * control off to the user-registered callback method, along with the
- * reservation (so that the user can, for example, write a 0 to the counter, and
- * restart the reservation).
- * This method assumes the reservation has a valid pmc_config_t within.
- *
- * @param target The pmc_reservation_t that caused the interrupt.
- * @param refCon User specified reference constant.
- */
-static void pmc_reservation_interrupt(void *target, void *refCon) {
-       pmc_reservation_t reservation = (pmc_reservation_t)target;
-       pmc_state_t newState;
-       uint64_t timeout;
-       uint32_t spins;
-
-       assert(reservation);
-
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) {
-               return;
-       }
-
-       /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
-        * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
-        * on every cpu that can access the PMC.
-        */
-       pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
-                       
-       /* Spin waiting for the state to turn to INTERRUPT */
-       nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
-       timeout += mach_absolute_time();
-       spins = 0;
-       while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) {
-               /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
-               if (++spins > PMC_SPIN_THRESHOLD) {
-                       if (mach_absolute_time() > timeout) {
-                               pmc_spin_timeout_count++;
-                               assert(0);
-                       }
-               }
-
-               cpu_pause();
-       }
-                       
-       assert(reservation->config);
-       assert(reservation->config->method);                    
-               
-       /* Call the registered callback handler */
-#if DEBUG_COUNTERS
-       uint64_t start = mach_absolute_time();
-#endif /* DEBUG */
-       
-       (void)reservation->config->method(reservation, refCon);
-       
-#if DEBUG_COUNTERS
-       uint64_t end = mach_absolute_time();
-       if((end - start) > 5000ULL) {
-               kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, 
-                               reservation->config->method, (end - start));
-       }
-#endif
-       
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) {
-               return;
-       }
-       
-       /* Do any post-move actions necessary */
-       if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) {
-               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
-       } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
-               /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
-               thread_wakeup((event_t)reservation);
-       }
-}      
-
-/*
- * Apple-private KPI for Apple kext's (IOProfileFamily) only
- */
-
-#if 0
-#pragma mark -
-#pragma mark IOProfileFamily private KPI
-#endif
-
-/*
- * perf_monitor_register registers a new Performance Monitor, and its associated
- * callback methods.  The given perf_monitor_object_t is the first argument to
- * each callback when they are called.
- */
-kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
-       perf_monitor_methods_t *methods) {
-       int cpu = -1;
-
-       COUNTER_DEBUG("registering perf monitor %p\n", monitor);
-
-       if(!monitor || !methods) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* Protect against out-of-date driver kexts */
-       if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* If the monitor requires idle notifications, ensure that it is 
-        * accessible from a single core only.
-        */
-       if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
-               uint32_t *cores;
-               size_t core_cnt;
-           
-               if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) {
-                       /* 
-                        * Guard against disabled cores - monitors will always match and
-                        * attempt registration, irrespective of 'cpus=x' boot-arg.
-                        */
-                       if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) {
-                               cpu = cores[0];
-                       } else {
-                               return KERN_INVALID_ARGUMENT;
-                       }
-               }           
-       }
-
-       /* All methods are required */
-       if(!methods->accessible_cores |
-          !methods->enable_counters || !methods->disable_counters ||
-          !methods->on_idle || !methods->on_idle_exit) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* prevent dupes. */
-       perf_monitor_t dupe = perf_monitor_find(monitor);
-       if(dupe) {
-               COUNTER_DEBUG("Duplicate registration for %p\n", monitor);
-               perf_monitor_deallocate(dupe);
-               return KERN_FAILURE;
-       }
-
-       perf_monitor_t pm = perf_monitor_alloc();
-       if(!pm) {
-               return KERN_RESOURCE_SHORTAGE;
-       }
-
-       /* initialize the object */
-       perf_monitor_init(pm, cpu);
-
-       /* copy in the registration info */
-       pm->object = monitor;
-       memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));
-
-       /* place it in the tracking queues */
-       perf_monitor_enqueue(pm);
-
-       /* debug it */
-       PRINT_PERF_MON(pm);
-
-       return KERN_SUCCESS;
-}
-
-/*
- * perf_monitor_unregister unregisters a previously registered Perf Monitor,
- * looking it up by reference pointer (the same that was used in
- * perf_monitor_register()).
- */
-kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
-       kern_return_t ret = KERN_FAILURE;
-
-       COUNTER_DEBUG("unregistering perf monitor %p\n", monitor);
-
-       if(!monitor) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       perf_monitor_t pm = perf_monitor_find(monitor);
-       if(pm) {
-               /* Remove it from the queues. */
-               perf_monitor_dequeue(pm);
-
-               /* drop extra retain from find */
-               perf_monitor_deallocate(pm);
-
-               /* and release the object */
-               perf_monitor_deallocate(pm);
-
-               ret = KERN_SUCCESS;
-       } else {
-               COUNTER_DEBUG("could not find a registered pm that matches!\n");
-       }
-
-       return ret;
-}
-
-/*
- * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is
- * associated with a Perf Monitor.  Perf Monitors are looked up by the reference
- * pointer that was used to previously register them. 
- *
- * PMCs are registered with a reference pointer (@pmc_object), and a set of
- * callback methods.  When the given callback methods are called from xnu, the
- * first argument will always be the reference pointer used to register the PMC.
- *
- * NOTE: @monitor must have been successfully registered via
- * perf_monitor_register before this method will succeed.
- */
-kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object,
-       pmc_methods_t *methods, void *object) {
-
-       COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
-
-       if(!monitor || !pmc_object || !methods || !object) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* Prevent version mismatches */
-       if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) {
-               COUNTER_DEBUG("version mismatch\n");
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* All methods are required. */
-       if(!methods->create_config || 
-               !methods->free_config ||
-               !methods->config_set_value || 
-               !methods->config_set_threshold || 
-               !methods->config_set_handler ||
-               !methods->set_config || 
-               !methods->get_monitor || 
-               !methods->get_name ||
-               !methods->accessible_from_core || 
-               !methods->accessible_cores ||
-               !methods->get_count || 
-               !methods->set_count ||
-               !methods->disable ||
-               !methods->enable ||
-               !methods->open || 
-               !methods->close) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* make sure this perf monitor object is already registered */
-       /*
-        * NOTE: this adds a reference to the parent, so we'll have to drop it in
-        * any failure code paths from here on out.
-        */
-       perf_monitor_t pm = perf_monitor_find(monitor);
-       if(!pm) {
-               COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor);
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* make a new pmc */
-       pmc_t pmc = pmc_alloc();
-       if(!pmc) {
-               /* drop the extra reference from perf_monitor_find() */
-               perf_monitor_deallocate(pm);
-               return KERN_RESOURCE_SHORTAGE;
-       }
-
-       /* init it */
-       pmc_init(pmc);
-
-       pmc->object = pmc_object;
-       pmc->open_object = object;
-
-       /* copy the callbacks in */
-       memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t));
-
-       pmc->monitor = pm;
-
-       perf_monitor_add_pmc(pmc->monitor, pmc);
-
-       /* enqueue it in our tracking queue */
-       pmc_enqueue(pmc);
-
-       /* drop extra reference from perf_monitor_find() */
-       perf_monitor_deallocate(pm);
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_unregister unregisters a previously registered PMC, looking it up by
- * reference point to *both* the Perf Monitor it was created with, and the PMC's
- * reference pointer itself.
- */
-kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) {
-       COUNTER_DEBUG("%p %p\n", monitor, pmc_object);
-
-       if(!monitor || !pmc_object) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       pmc_t pmc = pmc_find(pmc_object);
-       if(!pmc) {
-               COUNTER_DEBUG("Could not find a matching pmc.\n");
-               return KERN_FAILURE;
-       }
-
-       /* remove it from the global queue */
-       pmc_dequeue(pmc);
-
-       perf_monitor_remove_pmc(pmc->monitor, pmc);
-
-       /* remove extra reference count from pmc_find() */
-       pmc_deallocate(pmc);
-
-       /* dealloc the pmc */
-       pmc_deallocate(pmc);
-
-       return KERN_SUCCESS;
-}
-
-static void perf_monitor_reservation_add(perf_monitor_t monitor) {
-    assert(monitor);
-    OSIncrementAtomic(&(monitor->reservedCounters));
-}
-
-static void perf_monitor_reservation_remove(perf_monitor_t monitor) {
-    assert(monitor);
-    OSDecrementAtomic(&(monitor->reservedCounters));    
-}
-
-#if 0
-#pragma mark -
-#pragma mark KPI
-#endif
-
-/*
- * Begin in-kernel and in-kext KPI methods
- */
-
-/*
- * pmc_create_config creates a new configuration area from a given @pmc.
- *
- * NOTE: This method is not interrupt safe.
- */
-kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) {
-       pmc_config_t tmp = NULL;
-
-       if(!pmc || !config) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       pmc_reference(pmc);
-
-       tmp = pmc_config_alloc(pmc);
-       if(tmp) {
-               tmp->object = pmc->methods.create_config(pmc->object);
-
-               if(!tmp->object) {
-                       pmc_config_free(pmc, tmp);
-                       tmp = NULL;
-               } else {
-                       tmp->interrupt_after_value = 0ULL;
-                       tmp->method = NULL;
-                       tmp->refCon = NULL;
-               }
-       }
-
-       pmc_deallocate(pmc);
-
-       if(!tmp) {
-               return KERN_RESOURCE_SHORTAGE;
-       }
-
-       *config = tmp;
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_free_config frees a configuration area created from a given @pmc
- *
- * NOTE: This method is not interrupt safe.
- */
-void pmc_free_config(pmc_t pmc, pmc_config_t config) {
-       assert(pmc);
-       assert(config);
-
-       pmc_reference(pmc);
-
-       pmc_config_free(pmc, config);
-
-       pmc_deallocate(pmc);
-}
-
-/*
- * pmc_config_set_value sets up configuration area key-value pairs.  These pairs
- * are to be either pre-known, or looked up via CoreProfile.framework.
- *
- * NOTE: This method is not interrupt safe.
- */
-kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config,
-       uint8_t id, uint64_t value) {
-
-       kern_return_t ret = KERN_INVALID_ARGUMENT;
-       
-       if(!pmc || !config) {
-               return ret;
-       }
-
-       pmc_reference(pmc);
-
-       ret = pmc->methods.config_set_value(config->object, id, value);
-
-       pmc_deallocate(pmc);
-
-       return ret;
-}
-
-/*
- * pmc_config_set_interrupt_threshold modifies a config object, instructing
- * the pmc that it should generate a call to the given pmc_interrupt_method_t
- * after the counter counts @threshold events.
- *
- * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt
- * as the first argument when the interrupt handler is invoked, and the given
- * @refCon (which may be NULL) as the second.
- *
- * See pmc_interrupt_method_t.
- *
- * NOTE: This method is not interrupt safe.
- */
-kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, 
-       uint64_t threshold, pmc_interrupt_method_t method, void *refCon) {
-       kern_return_t ret = KERN_INVALID_ARGUMENT;
-
-       if(!config || !pmc) {
-               return ret;
-       }
-       
-       assert(config);
-       assert(pmc);
-
-       pmc_reference(pmc);
-
-       do {
-               /*
-                * We have a minor annoyance to side-step here. The driver layer expects
-                * the config to never change once a reservation has been taken out with
-                * it.  However, in order to have the PMI method have the reservation as
-                * the first argument (in order to allow the user-method to, for
-                * example, write a 0 to it, and restart it), we need to create the
-                * pmc_reservation_t before setting it up in the config object.
-                * We overcome this by caching the method in the pmc_config_t stand-in,
-                * and mutating the pmc_config_object_t just before returning a
-                * reservation (in pmc_reserve() and friends, below).
-                */
-
-               /* might as well stash this away too. */
-               config->interrupt_after_value = threshold;
-               config->method = method;
-               config->refCon = refCon;
-
-               ret = KERN_SUCCESS;
-
-       }while(0);
-
-       pmc_deallocate(pmc);
-
-       return ret;
-}
-
-/*
- * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number
- * of pmc_t's returned. Callers should free this list with a call to
- * pmc_free_pmc_list().
- *
- * NOTE: This method is not interrupt safe.
- */
-kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
-       pmc_t *array = NULL;
-       pmc_t pmc = NULL;
-       size_t count = 0UL;
-       
-       do {
-               /* Copy down (to the stack) the count of perf counters */
-               vm_size_t size = perf_counters_count;
-
-               /* Allocate that sized chunk */
-               array = (pmc_t *)kalloc(sizeof(pmc_t) * size);
-               if(!array) {
-                       return KERN_RESOURCE_SHORTAGE;
-               }
-
-               /* Take the spin lock */
-               lck_spin_lock(&perf_counters_queue_spin);
-
-               /* verify the size didn't change while we were allocating */
-               if(size != perf_counters_count) {
-                       /*
-                        * queue size has changed between alloc and now - go back and
-                        * make another pass.
-                        */
-
-                       /* drop the lock */
-                       lck_spin_unlock(&perf_counters_queue_spin);
-
-                       /* free the block */
-                       kfree(array, sizeof(pmc_t) * size);
-                       array = NULL;
-               }
-
-               /* if we get here, and array is NULL, we try again. */
-       }while(!array);
-
-       /* copy the bits out */
-       queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
-               /* copy out the pointer */
-               array[count++] = pmc;
-       }
-
-       lck_spin_unlock(&perf_counters_queue_spin);
-
-       /* return the list and the size */
-       *pmcs = array;
-       *pmcCount = count;
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_free_pmc_list frees an array of pmc_t that has been returned from
- * pmc_get_pmc_list.
- * 
- * NOTE: This method is not interrupt safe.
- */
-void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) {
-       if(pmcs && pmcCount) {
-               COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount);
-
-               kfree(pmcs, pmcCount * sizeof(pmc_t));
-       }
-}
-
-kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) {
-       kern_return_t ret = KERN_INVALID_ARGUMENT;
-
-       if(!name || !pmcs || !pmcCount) {
-               return ret;
-       }
-
-       pmc_t *list = NULL;
-       size_t count = 0UL;
-
-       if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) {
-               size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL;
-               size_t len = strlen(name);
-
-               for(ii = 0UL; ii < count; ii++) {
-                       const char *pmcName = pmc_get_name(list[ii]);
-
-                       if(strlen(pmcName) < len) {
-                               /*
-                                * If the pmc name is shorter than the requested match, it's no 
-                                * match, as we're looking for the most specific match(es).
-                                */
-                               continue;
-                       }
-
-                       if(0 == strncmp(name, pmcName, len)) {
-                               pmc_t temp = list[ii];
-                               
-                               // move matches to the head of the array.
-                               list[ii] = list[swapPtr];
-                               list[swapPtr] = temp;
-                               swapPtr++;
-
-                               // keep a count of the matches
-                               matchCount++;
-                       }
-               }
-
-               if(matchCount) {
-                       /*
-                        * If we have matches, they are all at the head of the array, so
-                        * just allocate enough space for @matchCount pmc_t's, and copy the
-                        * head of the array to the new allocation.  Then free the old
-                        * allocation.
-                        */
-
-                       pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount);
-                       if(result) {
-                               // copy the matches
-                               memcpy(result, list, sizeof(pmc_t) * matchCount);
-
-                               ret = KERN_SUCCESS;
-                       }
-
-                       pmc_free_pmc_list(list, count);
-
-                       if(!result) {
-                               *pmcs = NULL;
-                               *pmcCount = 0UL;
-                               return KERN_RESOURCE_SHORTAGE;
-                       }
-
-                       *pmcs = result;
-                       *pmcCount = matchCount;
-               } else {
-                       *pmcs = NULL;
-                       *pmcCount = 0UL;
-               }
-       }
-
-       return ret;
-}
-
-/*
- * pmc_get_name returns a pointer (not copied) to the human-readable name of the
- * given pmc.
- *
- * NOTE: Driver authors must take care to not allocate during this method, as
- * this method *IS* interrupt safe.
- */
-const char *pmc_get_name(pmc_t pmc) {
-       assert(pmc);
-
-       const char *name = pmc->methods.get_name(pmc->object);
-
-       return name;
-}
-
-/*
- * pmc_get_accessible_core_list returns a pointer to an array of logical core
- * numbers (as well as the size of that array) that represent the local cores
- * (hardware threads) from which the given @pmc can be accessed directly.
- *
- * NOTE: This method is interrupt safe.
- */
-kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
-       size_t *logicalCoreCt) {
-
-       kern_return_t ret = KERN_INVALID_ARGUMENT;
-
-       if(!pmc || !logicalCores || !logicalCoreCt) {
-               return ret;
-       }
-
-       ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt);
-
-       return ret;
-}
-
-static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
-       assert(resv);
-       assert(resv->pmc);
-       assert(config);
-       assert(config->object);
-
-       /* If there's no PMI to setup, return success */
-       if(config->interrupt_after_value && config->method) {
-
-               /* set the threshold */
-               kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object,
-                       config->interrupt_after_value);
-
-               if(KERN_SUCCESS != ret) {
-                       /*
-                        * This is the most useful error message here, as this only happens
-                        * as a result of pmc_reserve*()
-                        */
-                       COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc);
-                       return FALSE;
-               }
-
-               if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, 
-                       (void *)resv, &pmc_reservation_interrupt, config->refCon)) {
-
-                       COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc);
-                       return FALSE;
-               }
-       }
-
-       return TRUE;
-}
-
-/*
- * pmc_reserve will attempt to reserve the given @pmc, with a given
- * configuration object, for counting system-wide. This method will fail with
- * KERN_FAILURE if the given pmc is already reserved at any scope.
- *
- * This method consumes the given configuration object if it returns
- * KERN_SUCCESS. Any other return value indicates the caller
- * must free the config object via pmc_free_config().
- *
- * NOTE: This method is NOT interrupt safe.
- */
-kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
-       pmc_reservation_t *reservation) {
-
-       if(!pmc || !config || !reservation) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       pmc_reservation_t resv = reservation_alloc();
-       if(!resv) {
-               return KERN_RESOURCE_SHORTAGE;
-       }
-
-       reservation_init(resv);
-
-       resv->flags |= PMC_FLAG_SCOPE_SYSTEM;
-       resv->config = config;
-
-       if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
-               resv->config = NULL;
-               return KERN_FAILURE;
-       }
-       
-       /* enqueue reservation in proper place */
-       if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
-               /* Prevent free of config object */
-               resv->config = NULL;
-               
-               reservation_free(resv);
-               return KERN_FAILURE;
-       }
-
-       perf_monitor_reservation_add(pmc->monitor);
-       
-       *reservation = resv;
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_reserve_task will attempt to reserve the given @pmc with a given
- * configuration object, for counting when the given @task is running on any
- * logical core that can directly access the given @pmc.  This method will fail
- * with KERN_FAILURE if the given pmc is already reserved at either system or
- * thread scope.  
- *
- * This method consumes the given configuration object if it returns
- * KERN_SUCCESS. Any other return value indicates the caller
- * must free the config object via pmc_free_config().
- *
- * NOTE: You can reserve the same pmc for N different tasks concurrently.
- * NOTE: This method is NOT interrupt safe.
- */
-kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, 
-       task_t task, pmc_reservation_t *reservation) {
-
-       if(!pmc || !config || !reservation || !task) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
-               COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       pmc_reservation_t resv = reservation_alloc();
-       if(!resv) {
-               return KERN_RESOURCE_SHORTAGE;
-       }
-
-       reservation_init(resv);
-
-       resv->flags |= PMC_FLAG_SCOPE_TASK;
-       resv->task = task;
-
-       resv->config = config;
-
-       if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
-               resv->config = NULL;
-               return KERN_FAILURE;
-       }
-       
-       /* enqueue reservation in proper place */
-       if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
-               /* Prevent free of config object */
-               resv->config = NULL;
-
-               reservation_free(resv);
-               return KERN_FAILURE;
-       }
-
-       perf_monitor_reservation_add(pmc->monitor);
-
-       *reservation = resv;
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_reserve_thread will attempt to reserve the given @pmc with a given
- * configuration object, for counting when the given @thread is running on any
- * logical core that can directly access the given @pmc.  This method will fail
- * with KERN_FAILURE if the given pmc is already reserved at either system or
- * task scope.  
- *
- * This method consumes the given configuration object if it returns
- * KERN_SUCCESS. Any other return value indicates the caller
- * must free the config object via pmc_free_config().
- *
- * NOTE: You can reserve the same pmc for N different threads concurrently.
- * NOTE: This method is NOT interrupt safe.
- */
-kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, 
-       thread_t thread, pmc_reservation_t *reservation) {
-       if(!pmc || !config || !reservation || !thread) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
-               COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       pmc_reservation_t resv = reservation_alloc();
-       if(!resv) {
-               return KERN_RESOURCE_SHORTAGE;
-       }
-
-       reservation_init(resv);
-
-       resv->flags |= PMC_FLAG_SCOPE_THREAD;
-       resv->thread = thread;
-
-       resv->config = config;
-
-       if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
-               resv->config = NULL;
-               return KERN_FAILURE;
-       }
-       
-       /* enqueue reservation in proper place */
-       if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
-               /* Prevent free of config object */
-               resv->config = NULL;
-
-               reservation_free(resv);
-               return KERN_FAILURE;
-       }
-
-       perf_monitor_reservation_add(pmc->monitor);
-
-       *reservation = resv;
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_reservation_start instructs the given reservation to start counting as
- * soon as possible. 
- *
- * NOTE: This method is interrupt safe.
- */
-kern_return_t pmc_reservation_start(pmc_reservation_t reservation) {
-       pmc_state_t newState;
-
-       if(!reservation) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) {
-               return KERN_FAILURE;
-       }
-       
-       /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
-        * broadcast right before it leaves
-        */
-       if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) {   
-               /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
-                * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu
-                * on every cpu that can access the PMC.
-                */
-               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
-       }
-       
-       return KERN_SUCCESS;                     
-}
-
-/*
- * pmc_reservation_stop instructs the given reservation to stop counting as
- * soon as possible.  When this method returns, the pmc will be marked as stopping
- * and subsequent calls to pmc_reservation_start will succeed.  This does not mean
- * that the pmc hardware has _actually_ stopped running.  Assuming no other changes
- * to the reservation state, the pmc hardware _will_ stop shortly.
- *
- */
-kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) {
-       pmc_state_t newState;
-
-       if(!reservation) {
-               return KERN_INVALID_ARGUMENT;
-       }
-       
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) {
-               return KERN_FAILURE;
-       }
-       
-       /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
-        * broadcast right before it leaves.  Similarly, if we just moved directly to STOP, don't bother broadcasting.
-        */
-       if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) {      
-               /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
-                        * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
-                * on every cpu that can access the PMC.
-                */
-               
-               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
-       }
-       
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_reservation_read will read the event count associated with a reservation.
- * If the caller is current executing in a context that both a) matches the
- * reservation's context, and b) can access the reservation's pmc directly, the
- * value will be read from hardware.  Otherwise, this returns the reservation's
- * stored value.
- *
- * NOTE: This method is interrupt safe.
- * NOTE: When not on the interrupt stack, this method may block.
- */
-kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) {
-       kern_return_t ret = KERN_FAILURE;
-       uint64_t timeout;
-       uint32_t spins;
-
-       if(!reservation || !value) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
-       timeout += mach_absolute_time();
-       spins = 0;
-       do {
-               uint32_t state = reservation->state;
-               
-               if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
-                       /* Attempt read from hardware via drivers. */
-
-                       assert(reservation->pmc);
-
-                       ret = reservation->pmc->methods.get_count(reservation->pmc->object, value);
-                       
-                       break;
-               } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
-                                  (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
-                       /* Spin */
-                       /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
-                       if (++spins > PMC_SPIN_THRESHOLD) {
-                               if (mach_absolute_time() > timeout) {
-                                       pmc_spin_timeout_count++;
-                                       assert(0);
-                               }
-                       }
-
-                       cpu_pause();
-               } else {
-                       break;
-               }
-       } while (1);
-
-       /* If the direct hardware read failed (for whatever reason) */
-       if(KERN_SUCCESS != ret) {
-               /* Read stored value */
-               *value = reservation->value;
-       }
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_reservation_write will write the event count associated with a reservation.
- * If the caller is current executing in a context that both a) matches the
- * reservation's context, and b) can access the reservation's pmc directly, the
- * value will be written to hardware.  Otherwise, this writes the reservation's
- * stored value.
- *
- * NOTE: This method is interrupt safe.
- * NOTE: When not on the interrupt stack, this method may block.
- */
-kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) {
-       kern_return_t ret = KERN_FAILURE;
-       uint64_t timeout;
-       uint32_t spins;
-
-       if(!reservation) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
-       timeout += mach_absolute_time();
-       spins = 0;
-       do {
-               uint32_t state = reservation->state;
-               
-               if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
-                               /* Write to hardware via drivers. */
-                       assert(reservation->pmc);
-
-                       ret = reservation->pmc->methods.set_count(reservation->pmc->object, value);
-                       break;
-               } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
-                                  (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
-                       /* Spin */
-                       /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
-                       if (++spins > PMC_SPIN_THRESHOLD) {
-                               if (mach_absolute_time() > timeout) {
-                                       pmc_spin_timeout_count++;
-                                       assert(0);
-                               }
-                       }
-
-                       cpu_pause();
-               } else {
-                       break;
-               }
-       } while (1);
-       
-       if(KERN_SUCCESS != ret) {
-               /* Write stored value */
-               reservation->value = value;
-       }
-
-       return KERN_SUCCESS;
-}
-
-/* 
- * pmc_reservation_free releases a reservation and all associated resources.
- *
- * NOTE: This method is NOT interrupt safe.
- */
-kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
-       pmc_state_t newState;
-       
-       if(!reservation) {
-               return KERN_INVALID_ARGUMENT;
-       }
-       
-       perf_monitor_reservation_remove(reservation->pmc->monitor);
-       
-       /* Move the state machine */
-       if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
-               return KERN_FAILURE;
-       }
-
-       /* If we didn't move directly to DEALLOC, help things along */  
-       if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) {     
-               /* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
-                * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
-                * on every cpu that can access the PMC.
-                */
-               pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
-       }
-
-       /* Block until the reservation hits the <DEALLOC, 0, > state */
-       while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) {
-               assert_wait((event_t)reservation, THREAD_UNINT);
-               thread_block(THREAD_CONTINUE_NULL);
-       }
-
-       /* remove from queues */
-       pmc_internal_reservation_remove(reservation);
-               
-       /* free reservation */
-       reservation_free(reservation);
-
-       return KERN_SUCCESS;
-}
-
-/*
- * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state.
- */
-boolean_t pmc_idle(void) {
-       perf_monitor_t monitor = NULL;
-       queue_head_t *cpu_queue;
-
-       lck_spin_lock(&perf_monitor_queue_spin);
-       
-       if (cpu_monitor_queues) {
-               cpu_queue = cpu_monitor_queues[cpu_number()];
-       
-               queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
-                       perf_monitor_methods_t *methods = &(monitor->methods);
-                       if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {                 
-                               methods->on_idle(monitor->object);
-                       }
-               }
-       }
-
-       lck_spin_unlock(&perf_monitor_queue_spin);
-
-       return TRUE;
-}
-
-/*
- * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state.
- */
-boolean_t pmc_idle_exit(void) {
-       perf_monitor_t monitor = NULL;
-       queue_head_t *cpu_queue;
-
-       lck_spin_lock(&perf_monitor_queue_spin);
-       
-       if (cpu_monitor_queues) {
-               cpu_queue = cpu_monitor_queues[cpu_number()];
-       
-               queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
-                       perf_monitor_methods_t *methods = &(monitor->methods);
-                       if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {                 
-                               methods->on_idle_exit(monitor->object);
-                       }
-               }
-       }
-
-       lck_spin_unlock(&perf_monitor_queue_spin);
-
-       return TRUE;
-}
-
-/*
- * pmc_context_switch performs all context switching necessary to save all pmc
- * state associated with @oldThread (and the task to which @oldThread belongs),
- * as well as to restore all pmc state associated with @newThread (and the task
- * to which @newThread belongs).
- *
- * NOTE: This method IS interrupt safe.
- */
-boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
-       pmc_reservation_t resv = NULL;
-       uint32_t cpuNum = cpu_number();
-
-       lck_spin_lock(&reservations_spin);
-
-       /* Save pmc states */
-       if (thread_reservation_count) {
-               queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
-                       if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
-                               (void)pmc_internal_reservation_context_out(resv);
-                       }
-               }
-       }
-       
-       if (task_reservation_count) {
-               queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
-                       if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
-                       (void)pmc_internal_reservation_context_out(resv);
-                       }
-               }
-       }
-       
-       /* Restore */
-       if (thread_reservation_count) {
-               queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
-                       if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
-                               (void)pmc_internal_reservation_context_in(resv);
-                       }
-               }
-       }
-
-       if (task_reservation_count) {
-               queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
-                       if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
-                               (void)pmc_internal_reservation_context_in(resv);
-                       }
-               }
-       }
-       
-       lck_spin_unlock(&reservations_spin);
-
-       return TRUE;
-}
-
-#else /* !CONFIG_COUNTERS */
-
-#if 0
-#pragma mark -
-#pragma mark Dummy functions
-#endif
-
-/*
- * In the case that someone has chosen not to include the PMC KPI in some
- * configuration, we still have exports for kexts, so we'll need to define stub
- * methods that return failures.
- */
-kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused,
-       perf_monitor_methods_t *methods __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_register(perf_monitor_object_t monitor __unused, 
-       pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused,
-       pmc_object_t pmc __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_create_config(pmc_t pmc __unused, 
-       pmc_config_t *config __unused) {
-       return KERN_FAILURE;
-}
-
-void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) {
-}
-
-kern_return_t pmc_config_set_value(pmc_t pmc __unused, 
-       pmc_config_t config __unused, uint8_t id __unused, 
-       uint64_t value __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, 
-       pmc_config_t config __unused, uint64_t threshold __unused, 
-       pmc_interrupt_method_t method __unused, void *refCon __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) {
-       return KERN_FAILURE;
-}
-
-void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) {
-}
-
-kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, 
-       size_t *pmcCount __unused) {
-       return KERN_FAILURE;
-}
-
-const char *pmc_get_name(pmc_t pmc __unused) {
-       return "";
-}
-
-kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, 
-       uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reserve(pmc_t pmc __unused, 
-       pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reserve_task(pmc_t pmc __unused, 
-       pmc_config_t config __unused, task_t task __unused, 
-       pmc_reservation_t *reservation __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reserve_thread(pmc_t pmc __unused, 
-       pmc_config_t config __unused, thread_t thread __unused, 
-       pmc_reservation_t *reservation __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, 
-       uint64_t *value __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, 
-       uint64_t value __unused) {
-       return KERN_FAILURE;
-}
-
-kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) {
-       return KERN_FAILURE;
-}
-
-
-#endif /* !CONFIG_COUNTERS */
diff --git a/osfmk/pmc/pmc.h b/osfmk/pmc/pmc.h
deleted file mode 100644 (file)
index 6746929..0000000
+++ /dev/null
@@ -1,772 +0,0 @@
-/*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-
-#ifndef _MACH_PMC_H_
-#define _MACH_PMC_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include <kern/queue.h>
-#include <mach/boolean.h>
-#include <mach/mach_time.h>
-#include <mach/mach_types.h>
-
-#include <libkern/version.h>
-
-/****************************************************************************
- * The four main object types
- *
- * 1. Performance monitors (perf_monitor_t) - represent the hardware that 
- *     encapsulates a set of performance counters
- * 2. Performance Counters (pmc_t) - represents each individual counter
- * 3. Performance Counter Configs (pmc_config_t) - represents the settings 
- *     applied to a performance counter (e.g., what to count)
- * 4. Performance Counter Reservations (pmc_reservation_t) - represents a config along 
- *     with it's saved counter value, and the context underwhich it will count.  
- *
- ****************************************************************************/
-
-/*
- * The following objects are in-kernel stand-ins for objects that will be implemented
- * in the driver kexts.  They are generally instances of C++ objects.  We make opaque 
- * handles for each distinct type for a little bit of type safety when used from the 
- * kernel layer.  These objects are not to be introspected by the kernel at any time,
- * only used as arguments in the registered driver methods.
- */
-
-// IOPerformanceMonitor instances
-typedef void * perf_monitor_object_t;
-
-// IOPerformanceCounter instances
-typedef void * pmc_object_t;
-
-// IOPerformanceCounterConfig instances
-typedef void * pmc_config_object_t;
-
-// END Kext-implemented objects
-
-// Forward declations
-struct pmc_reservation;
-typedef struct pmc_reservation *pmc_reservation_t;
-
-struct pmc_config;
-typedef struct pmc_config *pmc_config_t;
-
-/****************************************************************************
- * Method types for performance monitor driver registration
- * 
- * Driver registration happens with no intervention from the driver writers -
- * it is handled automatically by the IOProfileFamily kext.  Registration
- * happens whenever any IOPerformanceMonitor subclass attaches to the registry.
- * Failure to successfully register with the kernel will prevent successful attachment
- * to the IORegistry (this way only usable PMCs and Perf Monitors will be shown.)
- ****************************************************************************/
-
-typedef kern_return_t (*perfmon_get_accessible_cores_method_t)(pmc_object_t pmc, uint32_t **cores, size_t *coreCt);
-
-/*!typedef
- * @abstract A pointer to a method that enables a set of counters.
- * @discussion Implementations of this method type must be safe to call at interrupt context.
- * @param pmcs An array of pmc_object_t instances (non-NULL).
- * @param pmcCount The number of elements in the @pmcs array.
- * @result KERN_SUCCESS upon successful global enable of the given counters (may return IOKit error codes).
- */
-typedef kern_return_t (*perfmon_enable_counters_method_t)(perf_monitor_object_t pm, pmc_object_t *pmcs, uint32_t pmcCount);
-
-/*!typedef
- * @abstract A pointer to a method that disables a set of counters.
- * @discussion Implementations of this method type must be safe to call at interrupt context.
- * See <link>perfmon_enable_counters_method_t</link>
- * @result See <link>perfmon_enable_counters_method_t</link>
- */
-typedef kern_return_t (*perfmon_disable_counters_method_t)(perf_monitor_object_t pm, pmc_object_t *pmcs, uint32_t pmcCount);
-
-typedef void (*perfmon_on_idle_method_t)(perf_monitor_object_t pm);
-typedef void (*perfmon_on_idle_exit_method_t)(perf_monitor_object_t pm);
-
-#define MACH_PERFMON_METHODS_VERSION 1
-
-#define PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING     0x1
-#define PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS    0x2
-#define PERFMON_FLAG_ALWAYS_ACTIVE                  0x4
-
-/*!struct perf_monitor_methods
- * @abstract A set of method pointers to be used when interacting with a performance monitor object
- * @discussion This structure is the set of driver-implemented callback methods to be used when
- * interacting with a new performance monitor from the kernel.
- */
-typedef struct perf_monitor_methods {
-       uint32_t perf_monitor_methods_version;  // Always set to MACH_PERFMON_METHODS_VERSION when writing driver kexts
-       
-       uint32_t flags;
-
-       perfmon_get_accessible_cores_method_t accessible_cores;
-
-       perfmon_enable_counters_method_t enable_counters;
-       perfmon_disable_counters_method_t disable_counters;
-
-       perfmon_on_idle_method_t on_idle;
-    perfmon_on_idle_exit_method_t on_idle_exit;
-} perf_monitor_methods_t;
-
-/****************************************************************************
- * Method types for performance counter registration
- *
- * Registration of individual Performance Counters happens after the 
- * encapsulating Performance Monitor has been registered. This, too, happens
- * with no intervention of driver-writers.  It happens automatically whenever
- * any IOPerformanceCounter subclass attaches to IORegistry.  Failure to register
- * with the kernel will prevent the IOPerformanceCounter instance from attaching
- * to IORegistry.
- ****************************************************************************/
-
-/*!typedef
- * @abstract A pointer to a method that creates a configuration object for a counter
- * @discussion Configuration objects create and hold the hardware representation for a set of driver-defined key-value pairs.
- * Corresponds to IOPerformanceCounter::createConfiguration() method.
- * @param pmc A valid pmc object
- * @result NULL on failure, or a pmc_config_t on success.
- */
-typedef pmc_config_object_t (*pmc_create_config_method_t)(pmc_object_t pmc);
-
-/*!typedef 
- * @abstract A pointer to a method to free a configuration object for a pmc
- * @discussion Method should free a pmc config object created with a pmc_create_config_method_t above
- * @param pmc The pmc object used to create the config
- * @param config The config object to release
- */
-typedef void (*pmc_free_config_method_t)(pmc_object_t pmc, pmc_config_object_t config);
-
-/*!typedef
- * @abstract A pointer to a method to set a key-value pair on a config object.
- * @discussion Configuration objects take key-value pairs for setting various bits in the pmc configs 
- * Corresponds to IOPerformanceCounterConfiguration::setValueForId() method.
- * @param config Pointer to config object.
- * @param id 8-bit integer ID (determined by the driver).
- * @param value 64-bit integer value (interpretted by the driver).
- * @result KERN_SUCCESS on success, KERN_FAILURE on bad value, KERN_INVALID_ARGUMENT on bad id
- */
-typedef kern_return_t (*pmc_config_set_value_method_t)(pmc_config_object_t config, uint8_t id, uint64_t value);
-
-/*!typedef
- * @abstract A pointer to a method that will be called when a Performance Counter causes a PMI interrupt
- * @discussion Implementations of this method type must be safe to call at interrupt context.
- * @param target The pmc_reservation_t that caused the interrupt
- * @param refCon Any value as defined by the end-user who called <link>pmc_config_set_interrupt_threshold</link>
- */
-typedef void (*pmc_interrupt_method_t)(void *target, void *refCon);
-
-/*!typedef
- * @abstract A pointer to a method that will set the counter PMI threshold.
- * @param config A configuration object
- * @param threshold The number of events after which to cause an interrupt
- * callback.
- */
-typedef kern_return_t (*pmc_config_set_interrupt_threshold_method_t)(pmc_config_object_t config, uint64_t threshold);
-
-/*!typedef
- * @abstract A pointer to a method that will set the method to be called when the counter threshold is reached.
- * @param config A configuration object.
- * @param target A reference pointer used as the first argument to the callback method.
- * @param method A pointer to the method to be called.
- * @param refCon A reference pointer to be used as the second argument to the callback method (may be NULL).
- */
-typedef kern_return_t (*pmc_config_set_interrupt_threshold_handler_method_t)(pmc_config_object_t config, void *target, pmc_interrupt_method_t method, void *refCon);
-
-/*!typedef
- * @abstract A pointer to a method that will configure a pmc's control registers according to the given configuration object.
- * @discussion Implementations of this method type must be safe to call at interrupt context.
- * @param pmc The pmc reference object.
- * @param config A configuration object.
- */
-typedef kern_return_t (*pmc_set_config_method_t)(pmc_object_t pmc, pmc_config_object_t config);
-
-/*!typedef
- * @abstract A pointer to a method that returns the Performance Monitor Object for a counter
- * @discussion A pointer to a method that returns the Performance Monitor Object for a counter.
- * Implementations of this method type must be safe to call at interrupt context.
- * Corresponds to IOPerformanceCounter::getMonitor() method.
- * @param pmc A valid pmc object
- * @result NULL on failure, or a perf_monitor_object_t on success.
- */
-typedef perf_monitor_object_t (*pmc_get_monitor_method_t)(pmc_object_t pmc);
-
-/*!typedef
- * @abstract A pointer to a method that returns the registered name of the PMC.
- * @discussion A pointer to a method that returns the registered name of the PMC.
- * Corresponds to IOPerformanceCounter::getRegisteredName() method.  
- *
- * NOTE: Driver authors must not allocate or copy the string during this method:
- * it may be called from interrupt context or with spin locks held.
- *
- * @param pmc A valid pmc object.
- * @result NULL on failure, or a pointer to the registered name of the pmc.
- */
-typedef const char *(*pmc_get_name_method_t)(pmc_object_t pmc);
-
-/*!typedef
- * @abstract A pointer to a method that returns if a pmc is accessible from a given logical core.
- * @discussion A pointer to a method that returns if a pmc is accessible from a given logical core.
- * Implementations of this method type must be safe to call at interrupt context.
- * @param pmc A valid pmc object.
- * @param core The logical core number.
- * @result TRUE if the pmc can be read in the execution context of the given logical core, FALSE otherwise.
- */
-typedef boolean_t (*pmc_is_accessible_from_logical_core_method_t)(pmc_object_t pmc, uint32_t core);
-
-/*!typedef 
- * @abstract A pointer to a method that returns an array of the logical cores from which a PMC can be accessed.
- * @discussion A pointer to a method that returns an array of the logical cores from which a PMC can be accessed. 
- * Resulting array of cores should not be released by xnu.
- * Implementations of this method type must be safe to call at interrupt context.
- * @param pmc A valid pmc object
- * @param cores A value-returned array of logical cores that can access the given PMC.
- * @param coreCt A value-return count of the number of entries in the @cores array.
- * @result KERN_SUCCESS on success, KERN_FAILURE otherwise.
- */
-typedef kern_return_t (*pmc_get_accessible_cores_method_t)(pmc_object_t pmc, uint32_t **cores, size_t *coreCt);
-
-/*!typedef
- * @abstract A pointer to a method that attempts to read the count from the given counter hardware. 
- * @discussion Implementations of this method type must be safe to call from interrupt context.  * @param pmc The counter from which to read
- * @param value Storage for the counter's hardware value.
- */
-typedef kern_return_t (*pmc_get_count_method_t)(pmc_object_t pmc, uint64_t *value);
-
-/*!typedef 
- * @abstract A pointer to a method that attempts to write the count to the given counter hardware.
- * @discussion Implementations of this method type must be safe to call from interrupt context.
- * @param pmc The counter to which to write.
- * @param value The value to write to the hardware.
- */
-typedef kern_return_t (*pmc_set_count_method_t)(pmc_object_t pmc, uint64_t value);
-
-
-/*!typedef
- * @abstract A pointer to a method that disables the counter hardware for a given PMC.
- * @discussion A pointer to a method that disables the counter hardware for
- * a given PMC.
- * Implementations of this method type must be safe to call at interrupt context.
- * @param pmc A valid pmc object.
- * @result KERN_SUCCESS on successful disable
- */
-typedef kern_return_t (*pmc_disable_method_t)(pmc_object_t pmc);
-
-/*!typedef
- * @abstract A pointer to a method that enables the counter hardware for a given PMC.
- * @discussion A pointer to a method that enables the counter hardware for a given PMC.
- * Implementations of this method type must be safe to call at interrupt context.
- * @param pmc A valid pmc object.
- * @result KERN_SUCCESS on successful enable
- */
-typedef kern_return_t (*pmc_enable_method_t)(pmc_object_t pmc);
-
-typedef kern_return_t (*pmc_open_method_t)(pmc_object_t pmc, void *object);
-typedef kern_return_t (*pmc_close_method_t)(pmc_object_t pmc, void *object);
-
-#define MACH_PMC_METHODS_VERSION       0
-
-/*!
- * @struct pmc_methods
- * @abstract Performance Counter Registration methods.
- * @discussion This structure represents a set of driver-implemented methods to be used by the kernel
- * when interacting with the associated performance counter.  Since a Performance Monitor may
- * implement any number of distinct types of Performance Counters, each counter registers with
- * its own set of callback methods.
- */
-typedef struct pmc_methods {
-       uint32_t pmc_methods_version;           // Always set to MACH_PMC_METHODS_VERSION in your driver.
-
-       // All methods are required.
-       pmc_create_config_method_t create_config;
-       pmc_free_config_method_t free_config;
-       pmc_config_set_value_method_t config_set_value;
-       pmc_config_set_interrupt_threshold_method_t config_set_threshold;
-       pmc_config_set_interrupt_threshold_handler_method_t config_set_handler;
-       pmc_set_config_method_t set_config;
-
-       pmc_get_monitor_method_t get_monitor;
-       pmc_get_name_method_t get_name;
-       pmc_is_accessible_from_logical_core_method_t accessible_from_core;
-       pmc_get_accessible_cores_method_t accessible_cores;
-       pmc_get_count_method_t get_count;
-       pmc_set_count_method_t set_count;
-       pmc_disable_method_t disable;
-       pmc_enable_method_t enable;
-       pmc_open_method_t open;
-       pmc_close_method_t close;
-} pmc_methods_t;
-
-/*
- * Kext interface Methods
- *
- * These methods would be exported to apple-internal kexts, but not to 3rd-party kexts, and 
- * definitely not to user space.
- *
- * All Performance Monitor and Performance Counter registration (accomplished via the following methods)
- * is handled automatically via IOProfileFamily's base classes.  However, we'd need to export these
- * methods to apple-private KPI so that IOProfileFamily can call these methods when new objects attach
- * to the IORegistry.
- *
- */
-
-/*!fn
- * @abstract Registers a new performance monitor driver and its associated pointers.
- * @discussion Kexts that implement performance monitor drivers will call this method with a
- * filled-in perf_monitor_methods_t structure (with version set to MACH_PERFMON_METHODS_VERSION).  
- * The PMC interface will then register the new driver internally.
- * @param monitor A handle to the performance monitor driver instance you are registering. Must not be NULL.
- * @param methods A filled-in perf_monitor_methods_t structure with version set to MACH_PERFMON_METHODS_VERSION.
- * @result KERN_SUCCESS if the new driver was successfully registered, KERN_INVALID_VALUE if the 
- * version of the passed-in perf_monitor_methods_t structure does not match that which is expected,
- * KERN_RESOURCE_SHORTAGE if the kernel lacks the resources to register another performance monitor
- * driver, KERN_INVALID_ARGUMENT if one or both of the arguments is null
- */
-
-/* Prevent older AppleProfileFamily kexts from loading on newer kernels.
- * Alas, C doesn't necessarily have a cleaner way to do the version number concatenation
- */
-#define PERF_REG_NAME1(a, b) a ## b
-#define PERF_REG_NAME(a, b) PERF_REG_NAME1(a, b)
-#define perf_monitor_register PERF_REG_NAME(perf_monitor_register_, VERSION_MAJOR)
-
-kern_return_t perf_monitor_register(perf_monitor_object_t monitor, perf_monitor_methods_t *methods);
-
-/*!fn
- * @abstract Unregisters a performance monitor driver and frees space associated with its pointers.
- * @discussion Kexts that implement performance monitor drivers will call this method just before they unload
- * to cause the performance monitor they implement to be removed from the kernel's PMC system.
- * @param monitor A handle to a performance monitor driver instance that was previously registered with <link>perf_monitor_register</link>
- * @result KERN_SUCCESS if the new driver was successfully unregistered, KERN_INVALID_VALUE if the 
- * passed-in perf_monitor_object_t does not match any registered performance monitor, KERN_INVALID_ARGUMENT if 
- * the argument is null, KERN_FAILURE if the performance monitor is currently in use.
- */
-kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor);
-
-/*!fn
- * @abstract Register a new Performance Counter, and attach it to the given Performance Monitor
- * @discussion This method takes a Performance Monitor driver instance that was previously registered 
- * with <link>perf_monitor_register</link>, and attaches an instance of a Performance Counter 
- * that will be accessed with the given set of pmc methods.
- * @param monitor A handle to a Performance Monitor that was previously registered.
- * @param pmc A handle to the Performance Counter instance to be attached to the monitor object
- * @param methods A filled-in pmc_methods_t structure with version set to MACH_PMC_METHODS_VERSION
- * @param object an Object to be used during the open() and close() methods. Must be a subclass of IOService, cannot be NULL.
- * @result KERN_SUCCESS if the new counter was successfully registered and attached, KERN_INVALID_VALUE if the 
- * version of the passed-in pmc_methods_t structure does not match that which is expected,
- * KERN_RESOURCE_SHORTAGE if the kernel lacks the resources to register another performance counter
- * instance, KERN_INVALID_ARGUMENT if any of the arguments is null
- */
-kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc, 
-       pmc_methods_t *methods, void *object);
-
-/*!fn
- * @abstract Unregisters a Performance Counter
- * @discussion Does the reverse of <link>pmc_register</link>. 
- * @param monitor The registered Performance Monitor from which to remove a pmc.
- * @param pmc The Performance Counter to unregister.
- * @result KERN_SUCCESS if the counter was successfully unregistered, KERN_INVALID_VALUE if the 
- * passed-in pmc_object_t does not match any registered performance counter, KERN_INVALID_ARGUMENT if 
- * any argument is null, KERN_FAILURE if the performance counter is currently in use.
- */
-kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc);
-
-/*
- * Here begins the interface in-kernel and in-kext users will use to interact with PMCs and 
- * Performance Monitors.
- *
- * Basic usage is as follows: find your target counter, create a config for it, setup the config, 
- * reserve the counter using that config in a given execution context (system, or 1 task, or 1 thread),
- * start the counter via the reservation object, stop the counter, and read the counter value similarly from the
- * reservation object.  When done, release the reservation object.
- */
-
-/*!struct perf_monitor
- * @abstract In-kernel object to track a driver-implemented performance monitor.
- */
-typedef struct perf_monitor {
-       /*
-        * A reference-pointer used as the first argument to all callback methods
-        * (to seamlessly work with C++ objects). This is the same value that was 
-        * used in the perf_monitor_register() method.
-        */
-       perf_monitor_object_t object;
-
-       // Copy of the pointers used to interact with the above instance
-       perf_monitor_methods_t methods;
-       
-       // reference counted
-       uint32_t useCount;
-       
-       uint32_t reservedCounters;
-    
-       // A value of -1 here indicates independence from a particular core
-       int cpu;
-       
-       // links to other perf monitors
-       queue_chain_t link;
-       queue_chain_t cpu_link;
-}*perf_monitor_t;
-
-/*!struct pmc
- * @abstract In-kernel object to track an individual driver-implemented performance counter
- */
-typedef struct pmc {
-       /*
-        * A reference-pointer used as the first argument to all callback methods
-        * (to seamlessly work with C++ objects). This is the same value that was
-        * used in the pmc_register() method.
-        */
-       pmc_object_t object;
-       
-       /* Copy of the pointers used to interact with the above instance */
-       pmc_methods_t methods;
-
-       /* Object to be used during open/close methods */
-       void *open_object;
-
-       /* reference counted */
-       uint32_t useCount;
-       
-       /* link to parent */
-       perf_monitor_t monitor;
-
-       /* link to other PMCs */
-       queue_chain_t link;
-}*pmc_t;
-
-// Scope flags (highest order bits)
-#define PMC_FLAG_SCOPE_SYSTEM  0x80000000U
-#define PMC_FLAG_SCOPE_TASK            0x40000000U
-#define PMC_FLAG_SCOPE_THREAD  0x20000000U
-#define PMC_SCOPE_MASK                 0xE0000000U
-
-#define PMC_FLAG_IS_SYSTEM_SCOPE(x)    \
-               ((x & PMC_FLAG_SCOPE_SYSTEM) == PMC_FLAG_SCOPE_SYSTEM)
-
-#define PMC_FLAG_IS_TASK_SCOPE(x)      \
-               ((x & PMC_FLAG_SCOPE_TASK) == PMC_FLAG_SCOPE_TASK)
-
-#define PMC_FLAG_IS_THREAD_SCOPE(x)    \
-               ((x & PMC_FLAG_SCOPE_THREAD) == PMC_FLAG_SCOPE_THREAD)
-
-#define PMC_FLAG_SCOPE(x)              (x & PMC_SCOPE_MASK)
-
-/*
- * Reservation state
- *
- * The state of a reservation is actually a 3-tuple of the current state, an active context count,
- * and a set of modifier flags.  To avoid using locks, these are combined into a single uint32_t
- * that can be modified with OSCompareAndSwap.
- *
- */
-
-typedef uint32_t pmc_state_t;
-       
-#define PMC_STATE_STATE_INVALID                        0x00000000U
-#define        PMC_STATE_STATE_STOP                    0x10000000U
-#define PMC_STATE_STATE_CAN_RUN                        0x20000000U
-#define PMC_STATE_STATE_LOAD                   0x30000000U
-#define PMC_STATE_STATE_RUN                            0x40000000U
-#define PMC_STATE_STATE_STORE                  0x50000000U
-#define PMC_STATE_STATE_INTERRUPT              0x60000000U
-#define PMC_STATE_STATE_DEALLOC                        0x70000000U
-
-#define PMC_STATE_STATE_MASK                   0xF0000000U
-
-#define PMC_STATE_STATE(x)                             ((x) & PMC_STATE_STATE_MASK)
-#define PMC_STATE_STATE_SET(x, state)  (((x) & ~(PMC_STATE_STATE_MASK)) | state)
-       
-#define PMC_STATE_FLAGS_STOPPING               0x08000000U
-#define PMC_STATE_FLAGS_DEALLOCING             0x04000000U
-#define PMC_STATE_FLAGS_INTERRUPTING   0x02000000U
-       
-#define PMC_STATE_FLAGS_MASK                   0x0F000000U
-
-#define PMC_STATE_FLAGS(x)                             ((x) & PMC_STATE_FLAGS_MASK)
-#define PMC_STATE_FLAGS_MODIFY(x, set, clear)  (((x) & ~(clear)) | set)        
-       
-#define PMC_STATE_CONTEXT_COUNT_MASK   0x0000FFFFU
-
-#define PMC_STATE_CONTEXT_COUNT(x)                             ((x) & PMC_STATE_CONTEXT_COUNT_MASK)
-#define PMC_STATE_CONTEXT_COUNT_MODIFY(x, mod)         (((PMC_STATE_CONTEXT_COUNT(x) + (mod)) < PMC_STATE_CONTEXT_COUNT_MASK) ? (x) + (mod) : PMC_STATE_CONTEXT_COUNT_MASK)
-       
-#define PMC_STATE(state, context_count, flags) (PMC_STATE_STATE(state) | PMC_STATE_FLAGS(flags) | PMC_STATE_CONTEXT_COUNT(context_count))
-#define PMC_STATE_MODIFY(x, context_count_mod, flags_set, flags_clear) (PMC_STATE_FLAGS_MODIFY(PMC_STATE_CONTEXT_COUNT_MODIFY(x, context_count_mod), flags_set, flags_clear))
-#define PMC_STATE_MOVE(x, state, context_count_mod, flags_set, flags_clear) (PMC_STATE_STATE_SET(PMC_STATE_MODIFY(x, context_count_mod, flags_set, flags_clear), state))
-
-#define PMC_STATE_INVALID                              PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0)
-       
-/*!struct pmc_reservation
- * @abstract In-kernel object to track an individual reservation
- */
-struct pmc_reservation {
-       pmc_t pmc;                                              // Pointer to in-kernel pmc which is reserved
-       pmc_config_t config;                    // counter configuration
-
-       // stored counter value
-       uint64_t value;
-
-       // TODO: Add mach-port (user-export object?)
-
-       volatile uint32_t flags __attribute__((aligned(4)));
-       volatile pmc_state_t state __attribute__((aligned(4)));
-       volatile uint32_t active_last_context_in __attribute__((aligned(4)));
-
-       union {
-               task_t task;            // not retained
-               thread_t thread;        // not retained
-       };
-
-       queue_chain_t link;
-};
-
-// END Kernel-objects
-
-
-// Methods exported to kernel (and kext) consumers
-
-/*!fn
- * @abstract Creates a new configuration object for the given pmc.
- * @discussion This method is not interrupt safe.
- * @param pmc The Perf Counter for which to create a configuration.
- * @param config A value-return configuration object.
- */
-kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config);
-
-/*!fn
- * @abstract Releases a configuration object for the given pmc.
- * @discussion This method is not interrupt safe.
- * @param pmc The Perf Counter for which to release a configuration.
- * @param config A configuration object to be released.
- */
-void pmc_free_config(pmc_t pmc, pmc_config_t config);
-
-/*!fn
- * @abstract Setup the configuration
- * @discussion Configurations for counter are architecture-neutral key-value pairs (8bit key, 64bit value). Meanings of the keys and values are defined
- * by the driver-writer and are listed in XML form available for interrogation via the CoreProfile framework. This method is not interrupt safe.
- * @result KERN_SUCCESS on success. 
- */
-kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, uint8_t id, uint64_t value);
-
-/*!fn
- * @abstract Interrupt Threshold Setup
- * @discussion In order to configure a PMC to use PMI (cause an interrupt after so-many events occur), use this method, and provide a function to be
- * called after the interrupt occurs, along with a reference context. PMC Threshold handler methods will have the pmc that generated the interrupt as 
- * the first argument when the interrupt handler is invoked, and the given  @refCon (which may be NULL) as the second. This method is not interrupt safe.
- */
-kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, uint64_t threshold, pmc_interrupt_method_t method, void *refCon);
-
-/*!fn 
- * @abstract Returns an allocated list of all pmc_t's known to the kernel.
- * @discussion Callers should free the resultant list via <link>pmc_free_pmc_list</link>. This method is not interrupt safe.
- * @param pmcs Storage for the resultant pmc_t array pointer.
- * @param pmcCount Storage for the resultant count of pmc_t's.
- */
-kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount);
-
-/*!fn
- * @abstract Free a previously allocated list of pmcs.
- * @discussion This method is not interrupt safe.
- * @param pmcs PMC list to free.
- * @param pmcCount Number of pmc_t's in list.
- */
-void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount);
-
-/*!fn
- * @abstract Finds pmcs by partial string matching.
- * @discussion This method returns a list of pmcs (similar to <link>pmc_get_pmc_list</link>) whose names match the given string up to it's length.  
- * For example, searching for "ia32" would return pmcs "ia32gp0" and "ia32gp1". Results should be released by the caller using <link>pmc_free_pmc_list</link>
- * @param name Partial string to search for.
- * @param pmcs Storage for the resultant pmc_t array pointer.
- * @param pmcCount Storage for the resultant count of pmc_t's.
- */
-kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount);
-
-/*!fn
- * @abstract Returns a pointer to the human-readable name of the given pmc.
- * @discussion The returned pointer is not a copy, and does not need to be freed. This method is interrupt safe.
- * @param pmc The PMC whose name should be returned.
- */
-const char *pmc_get_name(pmc_t pmc);
-
-/*!fn
- * @abstract Returns a list of logical cores from which the given pmc can be read from or written to.
- * @discussion This method can return a NULL list with count of 0 -- this indicates any core can read the given pmc. This method does not allocate the list, 
- * therefore callers should take care not to mutate or free the resultant list. This method is interrupt safe.
- * @param pmc The PMC for which to return the cores that can read/write it.
- * @param logicalCores Storage for the pointer to the list.
- * @param logicalCoreCt Value-return number of elements in the returned list.  0 indicates all cores can read/write the given pmc.
- */
-kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, size_t *logicalCoreCt);
-
-/* 
- * BEGIN PMC Reservations
- *
- * These are how you reserve a PMC, start and stop it counting, and read and write 
- * its value. 
- */
-
-/*!fn
- * @abstract Reserve a PMC for System-wide counting.
- * @discussion This method will attempt to reserve the given pmc at system-scope. It will configure the given pmc to count the event indicated by the given 
- * configuration object. This method consumes the given configuration object if the return value is KERN_SUCCESS - any other return value indicates the caller 
- * should free the configuration object via <link>pmc_free_config</link>. This method is not interrupt safe.
- * @param pmc The PMC to reserve.
- * @param config The configuration object to use with the given pmc.
- * @param reservation A value-return reservation object to be used in pmc_reservation_* methods.
- * @result This method will return one of the following values:
- *     KERN_SUCCESS: The given pmc was successfully reserved in system-scope; the given config object has been consumed and should not be freed by the caller,
- *     KERN_FAILURE: The given pmc is already reserved in a conflicting scope,
- *     KERN_INVALID_ARGUMENT: All three arguments are required to be non-NULL, but at least one is NULL,
- *     KERN_RESOURCE_SHORTAGE: Could not allocate a new reservation object.
- */
-kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, pmc_reservation_t *reservation);
-
-
-/*!fn
- * @abstract Reserve a PMC for task-wide counting.
- * @discussion This method will attempt to reserve the given pmc for task-wide counting. The resulting reservation will only count when the task is running 
- * on one of the logical cores that can read the given pmc. The semantics of this method are the same as <link>pmc_reserve</link> in all other respects.
- * @param pmc The PMC to reserve
- * @param config The configuration object to use.
- * @param task The task for which to enable the counter.
- * @param reservation A value-return reservation object.
- * @result See <link>pmc_reserve</link>
- */
-kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, task_t task, pmc_reservation_t *reservation);
-
-/*!fn
- * @abstract Reserve a PMC for thread-wide counting.
- * @discussion This method will attempt to reserve the given pmc for thread-wide counting. The resulting reservation will only count when the thread is 
- * running on one of the logical cores that can read the given pmc. The semantics of this method are the same as <link>pmc_reserve_task</link> in all other respects.
- * @param pmc The PMC to reserve
- * @param config The configuration object to use.
- * @param thread The thread for which to enable the counter.
- * @param reservation A value-return reservation object.
- * @result See <link>pmc_reserve</link>
- */
-kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, thread_t thread, pmc_reservation_t *reservation);
-
-/*!fn
- * @abstract Start counting
- * @discussion This method instructs the given reservation to start counting as soon as possible. If the reservation is for a thread (or task) other than the 
- * current thread, or for a pmc that is not accessible from the current logical core, the reservation will start counting the next time the thread (or task) 
- * runs on a logical core than can access the pmc. This method is interrupt safe. If this method is called from outside of interrupt context, it may block.
- * @param reservation The reservation to start counting
- */
-kern_return_t pmc_reservation_start(pmc_reservation_t reservation);
-
-/*!fn
- * @abstract Stop counting
- * @discussion This method instructs the given reservation to stop counting as soon as possible. If the reservation is for a thread (or task) other than the 
- * current thread, or for a pmc that is not accessible from the current logical core, the reservation will stop counting the next time the thread (or task) c
- * eases to run on a logical core than can access the pmc. This method is interrupt safe. If called form outside of interrupt context, this method may block.
- * @param reservation The reservation to stop counting
- */
-kern_return_t pmc_reservation_stop(pmc_reservation_t reservation);
-
-/*!fn
- * @abstract Read the counter value
- * @discussion This method will read the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently ]
- * executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be read directly 
- * from the hardware.  Otherwise, the value stored in the reservation is returned. This method is interrupt safe. If the caller is calling from outside of 
- * interrupt context, this method may block.
- * @param reservation The reservation whose value to read.
- * @param value Value-return event count
- */
-kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value);
-
-/*!fn
- * @abstract Write the counter value
- * @discussion This method will write the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently 
- * executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be written directly 
- * to the hardware. Otherwise, the value stored in the reservation is overwritten. This method is interrupt safe. If the caller is calling from outside of 
- * interrupt context, this method may block.
- * @param reservation The reservation to write.
- * @param value The event count to write
- */
-kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value);
-
-/*!fn
- * @abstract Free a reservation and all associated resources.
- * @discussion This method will free the resources associated with the given reservation and release the associated PMC back to general availability. 
- * If the reservation is currently counting, it will be stopped prior to release. This method is not interrupt safe.
- * @param reservation The reservation to free
- */
-kern_return_t pmc_reservation_free(pmc_reservation_t reservation);
-
-#if XNU_KERNEL_PRIVATE
-
-/*!fn
- * @abstract Brings up all the necessary infrastructure required to use the pmc sub-system.
- * @discussion For xnu-internal startup routines only.
- */
-void pmc_bootstrap(void);
-
-/*!fn
- * @abstract Performs a pmc context switch.
- * @discussion This method will save all PMCs reserved for oldThread (and the task associated with oldThread), as well as restore all PMCs reserved 
- * for newThread (and the task associated with newThread). This method is for xnu-internal context switching routines only.
- */
-boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread);
-
-/*!fn
- * @abstract Called on per-core idle.
- * @discussion This method notifies registered performance monitors of impending cpu idle, and can be used to save counter state.
- */
-boolean_t pmc_idle(void);
-
-/*!fn
- * @abstract Called on per-core wake from idle.
- * @discussion This method notifies registered performance monitors of wake-up from the prior idle, and can be used to restore 
- * previously saved counter configuration.
- */
-boolean_t pmc_idle_exit(void);
-
-#if defined(THREAD_PMC_FLAG)
-/* Allow inclusion from outside of MACH_KERNEL_PRIVATE scope. */
-
-/*!fn
- * @abstract Returns true if thread has been marked for counting.
- * @discussion Task-level reservations are propagated to child threads via thread_create_internal. Any mutation of task reservations forces a recalculate 
- * of t_chud (for the pmc flag) for all threads in that task. Consequently, we can simply check the current thread's flag against THREAD_PMC_FLAG.
- */
-static inline boolean_t pmc_thread_eligible(thread_t t) {
-       return (t != NULL) ? ((t->t_chud & THREAD_PMC_FLAG) ? TRUE : FALSE) : FALSE;
-}
-
-#endif /* THREAD_PMC_FLAG*/
-
-#endif // XNU_KERNEL_PRIVATE
-
-#ifdef __cplusplus
-};
-#endif
-
-#endif // _MACH_PMC_H_
-
index 74c161ed2553c2db01fb3bd12addacf9108b009e..b5f414468c3eb694e4ab43b31bc75bc86a85fa40 100644 (file)
@@ -314,7 +314,7 @@ yarrow_init(
                panic("Couldn't initialize Yarrow, /dev/random will not work.");
        }
 
-       perr = prngInput(yp->PrngRef, (BYTE*) entropy, (UINT) entropyLength,
+       perr = prngInput(yp->PrngRef, __DECONST(BYTE*, entropy), (UINT) entropyLength,
                        SYSTEM_SOURCE, (UINT) entropyLength * 8);
        if (perr != 0) {
                /* an error, complain */
@@ -347,7 +347,9 @@ yarrow_generate(
        int             bytes_remaining = (int) outlen;
 
        yp->bytes_since_reseed += outlen;
-       if (yp->bytes_since_reseed > RESEED_BYTES)
+       /* Reseed needed? But allow any length immediately after reseeding. */
+       if (yp->bytes_since_reseed != outlen &&
+           yp->bytes_since_reseed > RESEED_BYTES)
                return CCDRBG_STATUS_NEED_RESEED;
        
        while (bytes_remaining > 0) {
@@ -380,7 +382,7 @@ yarrow_reseed(
 #pragma unused(in)
        YarrowContextp  yp = (YarrowContextp) prng;
 
-       (void) prngInput(yp->PrngRef, (BYTE*) entropy, (UINT) entropylen,
+       (void) prngInput(yp->PrngRef, __DECONST(BYTE*, entropy), (UINT) entropylen,
                         SYSTEM_SOURCE, (UINT) entropylen * 8);
        (void) prngForceReseed(yp->PrngRef, RESEED_TICKS);
 
index 44045cc37082b18da111f7eabcc95bafe15fe0fc..b927d7f808c857a4bf2f568288be3bd67317d02c 100644 (file)
@@ -168,6 +168,7 @@ early_random(void)
        uint64_t        result;
        uint64_t        nonce;
        int             rc;
+       int             ps;
        ccdrbg_state_t  *state;
 
        if (!erandom_seed_set) {
@@ -205,23 +206,26 @@ early_random(void)
                erandom_state[0] = state;
 
                /*
-                * Init our DBRG from the boot entropy and a nonce composed of
-                * a timestamp swizzled with the first 8 bytes of this entropy.
+                * Init our DBRG from the boot entropy and a timestamp as nonce
+                * and the cpu number as personalization.
                 */
                assert(sizeof(erandom_seed) > sizeof(nonce));
-               bcopy(erandom_seed, &nonce, sizeof(nonce));
-               nonce ^= ml_get_timebase();
+               nonce = ml_get_timebase();
+               ps = 0;                         /* boot cpu */
                rc = ccdrbg_init(&erandom_info, state,
                                 sizeof(erandom_seed), erandom_seed,
                                 sizeof(nonce), &nonce,
-                                0, NULL);
-               assert(rc == CCDRBG_STATUS_OK);
+                                sizeof(ps), &ps);
+               cc_clear(sizeof(nonce), &nonce);
+               if (rc != CCDRBG_STATUS_OK)
+                       panic("ccdrbg_init() returned %d", rc);
 
                /* Generate output */
                rc = ccdrbg_generate(&erandom_info, state,
                                     sizeof(result), &result,
                                     0, NULL);
-               assert(rc == CCDRBG_STATUS_OK);
+               if (rc != CCDRBG_STATUS_OK)
+                       panic("ccdrbg_generate() returned %d", rc);
        
                return result;
        };
@@ -231,7 +235,7 @@ early_random(void)
        return result;
 }
 
-void
+static void
 read_erandom(void *buffer, u_int numBytes)
 {
        int             cpu;
@@ -258,6 +262,7 @@ read_erandom(void *buffer, u_int numBytes)
                        rc = ccdrbg_reseed(&erandom_info, state,
                                           sizeof(erandom_seed), erandom_seed,
                                           0, NULL);
+                       cc_clear(sizeof(erandom_seed), erandom_seed);
                        if (rc == CCDRBG_STATUS_OK)
                                continue;
                        panic("read_erandom reseed error %d\n", rc);
@@ -318,17 +323,17 @@ prng_cpu_init(int cpu)
                erandom_state[cpu] = state;
 
                /*
-                * Init our DBRG from boot entropy, nonce as timestamp xor'ed
-                * with the first 8 bytes of entropy, and use the cpu number
-                * as the personalization parameter.
+                * Init our DBRG from boot entropy, nonce as timestamp
+                * and use the cpu number as the personalization parameter.
                 */
-               bcopy(erandom_seed, &nonce, sizeof(nonce));
-               nonce ^= ml_get_timebase();
+               nonce = ml_get_timebase();
                rc = ccdrbg_init(&erandom_info, state,
                                 sizeof(erandom_seed), erandom_seed,
                                 sizeof(nonce), &nonce,
                                 sizeof(cpu), &cpu);
-               assert(rc == CCDRBG_STATUS_OK);
+               cc_clear(sizeof(nonce), &nonce);
+               if (rc != CCDRBG_STATUS_OK)
+                       panic("ccdrbg_init() returned %d", rc);
        }
 
        /* Non-boot cpus use the master cpu's global context */
@@ -404,6 +409,7 @@ prng_infop(prngContextp pp)
                           bytesToInput, rdBuffer,
                           0, NULL,
                           0, NULL);
+       cc_clear(sizeof(rdBuffer), rdBuffer);
        return pp->infop;
 }
 
@@ -419,6 +425,7 @@ Reseed(prngContextp pp)
                                         bytesToInput, rdBuffer,
                                         0, NULL)); 
 
+       cc_clear(sizeof(rdBuffer), rdBuffer);
        pp->bytes_reseeded = pp->bytes_generated;
 }
 
index f256a9c4761ebb418573aa40324bde1f30700862..b3bbc168d7a2da2adf5b1707eba86ab33ab390e2 100644 (file)
@@ -16,10 +16,20 @@ INSTINC_SUBDIRS_X86_64 = \
 INSTINC_SUBDIRS_X86_64H = \
        x86_64
 
+INSTINC_SUBDIRS_ARM = \
+       arm
+
+INSTINC_SUBDIRS_ARM64 = \
+       arm
 
 EXPINC_SUBDIRS = \
        machine
 
+EXPINC_SUBDIRS_ARM = \
+       arm
+
+EXPINC_SUBDIRS_ARM64 = \
+       arm
 
 EXPINC_SUBDIRS_X86_64 = \
        x86_64
index 8072e6d0df55b0f46b5936f065c943009a6f278e..6a11289d49f5ac7fe84e2c6c4a056ef616baca90 100644 (file)
@@ -80,7 +80,7 @@ WKdm_decompress_new (WK_word* src_buf,
                     WK_word* scratch,
                     unsigned int bytes);
 int
-WKdm_compress_new (WK_word* src_buf,
+WKdm_compress_new (const WK_word* src_buf,
                   WK_word* dest_buf,
                   WK_word* scratch,
                   unsigned int limit);
index a7f17574a4d0971ea57c4b8b018aa791f6d30813..f7e485bde671ff30e51db3dbddf9cc8b1e9746e4 100644 (file)
@@ -120,6 +120,7 @@ typedef struct vnode_pager {
        struct vnode            *vnode_handle;  /* vnode handle              */
 } *vnode_pager_t;
 
+
 #define pager_ikot pager_header.io_bits
 
 ipc_port_t
@@ -1189,13 +1190,13 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
 
            start = entry->vme_start;
 
-           pinfo->pri_offset = entry->offset;
+           pinfo->pri_offset = VME_OFFSET(entry);
            pinfo->pri_protection = entry->protection;
            pinfo->pri_max_protection = entry->max_protection;
            pinfo->pri_inheritance = entry->inheritance;
            pinfo->pri_behavior = entry->behavior;
            pinfo->pri_user_wired_count = entry->user_wired_count;
-           pinfo->pri_user_tag = entry->alias;
+           pinfo->pri_user_tag = VME_ALIAS(entry);
 
            if (entry->is_sub_map) {
                pinfo->pri_flags |= PROC_REGION_SUBMAP;
@@ -1206,7 +1207,7 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
 
 
            extended.protection = entry->protection;
-           extended.user_tag = entry->alias;
+           extended.user_tag = VME_ALIAS(entry);
            extended.pages_resident = 0;
            extended.pages_swapped_out = 0;
            extended.pages_shared_now_private = 0;
@@ -1214,7 +1215,7 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
            extended.external_pager = 0;
            extended.shadow_depth = 0;
 
-           vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
+           vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended);
 
            if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
                    extended.share_mode = SM_PRIVATE;
@@ -1286,20 +1287,20 @@ fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regi
                entry = tmp_entry;
        }
 
-       while ((entry != vm_map_to_entry(map))) {
+       while (entry != vm_map_to_entry(map)) {
                *vnodeaddr = 0;
                *vid = 0;
 
                if (entry->is_sub_map == 0) {
                        if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
 
-                               pinfo->pri_offset = entry->offset;
+                               pinfo->pri_offset = VME_OFFSET(entry);
                                pinfo->pri_protection = entry->protection;
                                pinfo->pri_max_protection = entry->max_protection;
                                pinfo->pri_inheritance = entry->inheritance;
                                pinfo->pri_behavior = entry->behavior;
                                pinfo->pri_user_wired_count = entry->user_wired_count;
-                               pinfo->pri_user_tag = entry->alias;
+                               pinfo->pri_user_tag = VME_ALIAS(entry);
                                
                                if (entry->is_shared)
                                        pinfo->pri_flags |= PROC_REGION_SHARED;
@@ -1355,7 +1356,7 @@ fill_vnodeinfoforaddr(
                 * The last object in the shadow chain has the
                 * relevant pager information.
                 */
-               top_object = entry->object.vm_object;
+               top_object = VME_OBJECT(entry);
                if (top_object == VM_OBJECT_NULL) {
                        object = VM_OBJECT_NULL;
                        shadow_depth = 0;
@@ -1458,7 +1459,7 @@ find_vnode_object(
                 * relevant pager information.
                 */
 
-               top_object = entry->object.vm_object;
+               top_object = VME_OBJECT(entry);
 
                if (top_object) {
                        vm_object_lock(top_object);
index f3df12b70644fdf8122dbc8515d4ac96019ff780..7b4c8f161b9ad9d18acd093b772d0481603e0c6d 100644 (file)
@@ -294,10 +294,10 @@ device_pager_data_return(
        if (device_object == DEVICE_PAGER_NULL)
                panic("device_pager_data_return: lookup failed");
 
-       return device_data_action(device_object->device_handle,
-                                 (ipc_port_t) device_object,
-                                 VM_PROT_READ | VM_PROT_WRITE,
-                                 offset, data_cnt);
+       __IGNORE_WCASTALIGN(return device_data_action(device_object->device_handle,
+                         (ipc_port_t) device_object,
+                         VM_PROT_READ | VM_PROT_WRITE,
+                         offset, data_cnt));
 }
 
 /*
@@ -318,9 +318,9 @@ device_pager_data_request(
        if (device_object == DEVICE_PAGER_NULL)
                panic("device_pager_data_request: lookup failed");
 
-       device_data_action(device_object->device_handle,
+       __IGNORE_WCASTALIGN(device_data_action(device_object->device_handle,
                           (ipc_port_t) device_object,
-                          VM_PROT_READ, offset, length);
+                          VM_PROT_READ, offset, length));
        return KERN_SUCCESS;
 }
 
index 19a1d566f47f40355d1e9405118a16566485be13..e8ace8c9b90fb0cc128c1657ee7947afe6a8700b 100644 (file)
 
 #include <vm/vm_protos.h>
 
-
 memory_object_default_t        memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
 decl_lck_mtx_data(,    memory_manager_default_lock)
 
@@ -555,7 +554,7 @@ vm_object_update_extent(
                        if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) || (next_offset != offset)) {
 
                                if (dw_count) {
-                                       vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+                                       vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
                                        dwp = &dw_array[0];
                                        dw_count = 0;
                                }
@@ -575,7 +574,7 @@ vm_object_update_extent(
                                 *      End of a run of dirty/precious pages.
                                 */
                                if (dw_count) {
-                                       vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+                                       vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
                                        dwp = &dw_array[0];
                                        dw_count = 0;
                                }
@@ -639,7 +638,7 @@ vm_object_update_extent(
                                VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
 
                                if (dw_count >= dw_limit) {
-                                       vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+                                       vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
                                        dwp = &dw_array[0];
                                        dw_count = 0;
                                }
@@ -652,7 +651,7 @@ vm_object_update_extent(
         *      Clean any pages that have been saved.
         */
        if (dw_count)
-               vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+               vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
 
        if (data_cnt) {
                LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
@@ -1452,11 +1451,11 @@ memory_object_iopl_request(
        upl_t                   *upl_ptr,
        upl_page_info_array_t   user_page_list,
        unsigned int            *page_list_count,
-       int                     *flags)
+       upl_control_flags_t     *flags)
 {
        vm_object_t             object;
        kern_return_t           ret;
-       int                     caller_flags;
+       upl_control_flags_t     caller_flags;
 
        caller_flags = *flags;
 
@@ -1611,7 +1610,7 @@ memory_object_upl_request(
                                     upl_ptr,
                                     user_page_list,
                                     page_list_count,
-                                    cntrl_flags);
+                                    (upl_control_flags_t)(unsigned int) cntrl_flags);
 }
 
 /*  
@@ -1649,7 +1648,7 @@ memory_object_super_upl_request(
                                           upl,
                                           user_page_list,
                                           page_list_count,
-                                          cntrl_flags);
+                                          (upl_control_flags_t)(unsigned int) cntrl_flags);
 }
 
 kern_return_t
@@ -1715,6 +1714,14 @@ host_default_memory_manager(
                returned_manager = current_manager;
                memory_object_default_reference(returned_manager);
        } else {
+               /*
+                *      Only allow the kernel to change the value.
+                */
+               extern task_t kernel_task;
+               if (current_task() != kernel_task) {
+                       result = KERN_NO_ACCESS;
+                       goto out;
+               }
 
                /*
                 *      If this is the first non-null manager, start
index a3cf7c1c669135d044c50b760deba3c1f34d3398..35a35591c526e9dd0c162f5ebb473832f7af53ad 100644 (file)
@@ -123,16 +123,6 @@ extern kern_return_t       memory_object_free_from_cache(
        memory_object_pager_ops_t       pager_ops,
        int                             *count);
 
-extern kern_return_t   memory_object_iopl_request(
-       ipc_port_t              port,
-       memory_object_offset_t  offset,
-       upl_size_t              *upl_size,
-       upl_t                   *upl_ptr,
-       upl_page_info_array_t   user_page_list,
-       unsigned int            *page_list_count,
-       int                     *flags);
-       
-
 extern kern_return_t   memory_object_pages_resident(
        memory_object_control_t         control,
        boolean_t                       *               has_pages_resident);
index 7de264ec573cbb8ba1ac4f9444bafa3dee4e0256..9d72bf588968489b0ca5cc13b0f5ae8bb21b2bf3 100644 (file)
@@ -110,6 +110,8 @@ extern kern_return_t        copypv(
 #define cppvKmap       64      /* Use the kernel's vm_map */
 #define cppvKmapb      25
 
+extern boolean_t pmap_has_managed_page(ppnum_t first, ppnum_t last);
+
 #ifdef MACH_KERNEL_PRIVATE
 
 #include <mach_assert.h>
@@ -191,7 +193,14 @@ extern void                pmap_virtual_space(
 extern pmap_t          pmap_create(    /* Create a pmap_t. */
                                ledger_t        ledger,
                                vm_map_size_t   size,
-                               __unused boolean_t      is_64bit);
+                               boolean_t       is_64bit);
+#if __x86_64__
+extern pmap_t          pmap_create_options(
+                               ledger_t        ledger,
+                               vm_map_size_t   size,
+                               int             flags);
+#endif
+
 extern pmap_t          (pmap_kernel)(void);    /* Return the kernel's pmap */
 extern void            pmap_reference(pmap_t pmap);    /* Gain a reference. */
 extern void            pmap_destroy(pmap_t pmap); /* Release a reference. */
@@ -558,6 +567,13 @@ extern kern_return_t pmap_nest(pmap_t,
 extern kern_return_t pmap_unnest(pmap_t,
                                 addr64_t,
                                 uint64_t);
+
+#define        PMAP_UNNEST_CLEAN       1
+
+extern kern_return_t pmap_unnest_options(pmap_t,
+                                addr64_t,
+                                uint64_t,
+                                unsigned int);
 extern boolean_t pmap_adjust_unnest_parameters(pmap_t, vm_map_offset_t *, vm_map_offset_t *);
 #endif /* MACH_KERNEL_PRIVATE */
 
@@ -587,6 +603,12 @@ extern pmap_t      kernel_pmap;                    /* The kernel's map */
 #define VM_MEM_SUPERPAGE       0x100           /* map a superpage instead of a base page */
 #define VM_MEM_STACK           0x200
 
+#if __x86_64__
+#define PMAP_CREATE_64BIT      0x1
+#define PMAP_CREATE_EPT                0x2
+#define PMAP_CREATE_KNOWN_FLAGS (PMAP_CREATE_64BIT | PMAP_CREATE_EPT)
+#endif
+
 #define PMAP_OPTIONS_NOWAIT    0x1             /* don't block, return 
                                                 * KERN_RESOURCE_SHORTAGE 
                                                 * instead */
@@ -603,6 +625,8 @@ extern pmap_t       kernel_pmap;                    /* The kernel's map */
 #define PMAP_OPTIONS_REMOVE    0x100           /* removing a mapping */
 #define PMAP_OPTIONS_SET_REUSABLE   0x200      /* page is now "reusable" */
 #define PMAP_OPTIONS_CLEAR_REUSABLE 0x400      /* page no longer "reusable" */
+#define PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED 0x800 /* credit the compressor
+                                                   * iff page was modified */
 
 #if    !defined(__LP64__)
 extern vm_offset_t     pmap_extract(pmap_t pmap,
@@ -636,7 +660,14 @@ void pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr);
 
 unsigned int pmap_query_resident(pmap_t pmap,
                                 vm_map_offset_t s,
-                                vm_map_offset_t e);
+                                vm_map_offset_t e,
+                                unsigned int *compressed_count_p);
+
+#if CONFIG_PGTRACE
+int pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end);
+int pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end);
+kern_return_t pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss);
+#endif
 
 #endif  /* KERNEL_PRIVATE */
 
index e5945fbad1b9027b440a5e343e28441d42078057..81301fd1102c610c5c2b9c655f841675f06e435f 100644 (file)
@@ -114,6 +114,10 @@ kern_return_t apple_protect_pager_map(memory_object_t mem_obj,
                                      vm_prot_t prot);
 kern_return_t apple_protect_pager_last_unmap(memory_object_t mem_obj);
 
+#define CRYPT_INFO_DEBUG 0
+void crypt_info_reference(struct pager_crypt_info *crypt_info);
+void crypt_info_deallocate(struct pager_crypt_info *crypt_info);
+
 /*
  * Vector of VM operations for this EMM.
  * These routines are invoked by VM via the memory_object_*() interfaces.
@@ -131,7 +135,7 @@ const struct memory_object_pager_ops apple_protect_pager_ops = {
        apple_protect_pager_map,
        apple_protect_pager_last_unmap,
        NULL, /* data_reclaim */
-       "apple protect pager"
+       "apple_protect"
 };
 
 /*
@@ -139,7 +143,7 @@ const struct memory_object_pager_ops apple_protect_pager_ops = {
  * the "apple protect" EMM.
  */
 typedef struct apple_protect_pager {
-       struct ipc_object_header        pager_header;   /* fake ip_kotype() */
+       struct ipc_object_header pager_header;  /* fake ip_kotype() */
        memory_object_pager_ops_t pager_ops; /* == &apple_protect_pager_ops */
        queue_chain_t           pager_queue;    /* next & prev pagers */
        unsigned int            ref_count;      /* reference count */
@@ -147,7 +151,11 @@ typedef struct apple_protect_pager {
        boolean_t               is_mapped;      /* is this mem_obj mapped ? */
        memory_object_control_t pager_control;  /* mem object control handle */
        vm_object_t             backing_object; /* VM obj w/ encrypted data */
-       struct pager_crypt_info crypt;
+       vm_object_offset_t      backing_offset;
+       vm_object_offset_t      crypto_backing_offset; /* for key... */
+       vm_object_offset_t      crypto_start;
+       vm_object_offset_t      crypto_end;
+       struct pager_crypt_info *crypt_info;
 } *apple_protect_pager_t;
 #define        APPLE_PROTECT_PAGER_NULL        ((apple_protect_pager_t) NULL)
 #define pager_ikot pager_header.io_bits
@@ -181,8 +189,13 @@ lck_attr_t         apple_protect_pager_lck_attr;
 
 
 /* internal prototypes */
-apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object,
-                                                struct pager_crypt_info *crypt_info);
+apple_protect_pager_t apple_protect_pager_create(
+       vm_object_t backing_object,
+       vm_object_offset_t backing_offset,
+       vm_object_offset_t crypto_backing_offset,
+       struct pager_crypt_info *crypt_info,
+       vm_object_offset_t crypto_start,
+       vm_object_offset_t crypto_end);
 apple_protect_pager_t apple_protect_pager_lookup(memory_object_t mem_obj);
 void apple_protect_pager_dequeue(apple_protect_pager_t pager);
 void apple_protect_pager_deallocate_internal(apple_protect_pager_t pager,
@@ -315,6 +328,7 @@ apple_protect_pager_data_unlock(
  *
  * Handles page-in requests from VM.
  */
+int apple_protect_pager_data_request_debug = 0;
 kern_return_t  
 apple_protect_pager_data_request(
        memory_object_t         mem_obj,
@@ -339,7 +353,6 @@ apple_protect_pager_data_request(
        vm_offset_t             src_vaddr, dst_vaddr;
        vm_offset_t             cur_offset;
        vm_offset_t             offset_in_page;
-       vm_map_entry_t          map_entry;
        kern_return_t           error_code;
        vm_prot_t               prot;
        vm_page_t               src_page, top_page;
@@ -391,11 +404,18 @@ apple_protect_pager_data_request(
        assert(dst_object != VM_OBJECT_NULL);
 
 
+#if __x86_64__ || __arm__ || __arm64__
+       /* we'll use the 1-to-1 mapping of physical memory */
+       src_vaddr = 0;
+       dst_vaddr = 0;
+#else /* __x86_64__ || __arm__ || __arm64__ */
        /*
         * Reserve 2 virtual pages in the kernel address space to map each
         * source and destination physical pages when it's their turn to
         * be processed.
         */
+       vm_map_entry_t          map_entry;
+
        vm_object_reference(kernel_object);     /* ref. for mapping */
        kr = vm_map_find_space(kernel_map,
                               &kernel_mapping,
@@ -413,6 +433,7 @@ apple_protect_pager_data_request(
        vm_map_unlock(kernel_map);
        src_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping);
        dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping + PAGE_SIZE_64);
+#endif /* __x86_64__ || __arm__ || __arm64__ */
 
        /*
         * We'll map the encrypted data in the kernel address space from the 
@@ -450,7 +471,7 @@ apple_protect_pager_data_request(
                prot = VM_PROT_READ;
                src_page = VM_PAGE_NULL;
                kr = vm_fault_page(src_object,
-                                  offset + cur_offset,
+                                  pager->backing_offset + offset + cur_offset,
                                   VM_PROT_READ,
                                   FALSE,
                                   FALSE, /* src_page not looked up */
@@ -507,18 +528,24 @@ apple_protect_pager_data_request(
                        }
                        vm_page_unlock_queues();
                }
-               
+
                /*
                 * Establish an explicit mapping of the source
                 * physical page.
                 */
+#if __x86_64__
+               src_vaddr = (vm_map_offset_t)
+                       PHYSMAP_PTOV((pmap_paddr_t)src_page->phys_page
+                                    << PAGE_SHIFT);
+#else
                pmap_enter(kernel_pmap,
-                          kernel_mapping,
+                          src_vaddr,
                           src_page->phys_page,
                           VM_PROT_READ,
                           VM_PROT_NONE,
                           0,
                           TRUE);
+#endif
                /*
                 * Establish an explicit pmap mapping of the destination
                 * physical page.
@@ -528,13 +555,47 @@ apple_protect_pager_data_request(
                dst_pnum = (ppnum_t)
                        upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
                assert(dst_pnum != 0);
+#if __x86_64__
+               dst_vaddr = (vm_map_offset_t)
+                       PHYSMAP_PTOV((pmap_paddr_t)dst_pnum << PAGE_SHIFT);
+#else
                pmap_enter(kernel_pmap,
-                          kernel_mapping + PAGE_SIZE_64,
+                          dst_vaddr,
                           dst_pnum,
                           VM_PROT_READ | VM_PROT_WRITE,
                           VM_PROT_NONE,
                           0,
                           TRUE);
+#endif
+
+               /*
+                * Validate the original page...
+                */
+               if (src_page->object->code_signed) {
+                       vm_page_validate_cs_mapped(
+                               src_page,
+                               (const void *) src_vaddr);
+               }
+               /*
+                * ... and transfer the results to the destination page.
+                */
+               UPL_SET_CS_VALIDATED(upl_pl, cur_offset / PAGE_SIZE,
+                                    src_page->cs_validated);
+               UPL_SET_CS_TAINTED(upl_pl, cur_offset / PAGE_SIZE,
+                                  src_page->cs_tainted);
+               UPL_SET_CS_NX(upl_pl, cur_offset / PAGE_SIZE,
+                                  src_page->cs_nx);
+
+               /*
+                * page_decrypt() might access a mapped file, so let's release
+                * the object lock for the source page to avoid a potential
+                * deadlock.  The source page is kept busy and we have a
+                * "paging_in_progress" reference on its object, so it's safe
+                * to unlock the object here.
+                */
+               assert(src_page->busy);
+               assert(src_page->object->paging_in_progress > 0);
+               vm_object_unlock(src_page->object);
 
                /*
                 * Decrypt the encrypted contents of the source page
@@ -543,16 +604,87 @@ apple_protect_pager_data_request(
                for (offset_in_page = 0;
                     offset_in_page < PAGE_SIZE;
                     offset_in_page += 4096) {
-                       ret = pager->crypt.page_decrypt((const void *)
-                                                       (src_vaddr +
-                                                        offset_in_page),
-                                                       (void *)
-                                                       (dst_vaddr +
-                                                        offset_in_page),
-                                                       (offset +
-                                                        cur_offset +
-                                                        offset_in_page),
-                                                       pager->crypt.crypt_ops);
+                       if (offset + cur_offset + offset_in_page <
+                           pager->crypto_start ||
+                           offset + cur_offset + offset_in_page >=
+                           pager->crypto_end) {
+                               /* not encrypted: just copy */
+                               bcopy((const char *)(src_vaddr +
+                                                    offset_in_page),
+                                     (char *)(dst_vaddr + offset_in_page),
+                                     4096);
+                               if (apple_protect_pager_data_request_debug) {
+                                       printf("apple_protect_data_request"
+                                              "(%p,0x%llx+0x%llx+0x%04llx): "
+                                              "out of crypto range "
+                                              "[0x%llx:0x%llx]: "
+                                              "COPY [0x%016llx 0x%016llx] "
+                                              "code_signed=%d "
+                                              "cs_validated=%d "
+                                              "cs_tainted=%d "
+                                              "cs_nx=%d\n",
+                                              pager,
+                                              offset,
+                                              (uint64_t) cur_offset,
+                                              (uint64_t) offset_in_page,
+                                              pager->crypto_start,
+                                              pager->crypto_end,
+                                              *(uint64_t *)(dst_vaddr+
+                                                            offset_in_page),
+                                              *(uint64_t *)(dst_vaddr+
+                                                            offset_in_page+8),
+                                              src_page->object->code_signed,
+                                              src_page->cs_validated,
+                                              src_page->cs_tainted,
+                                              src_page->cs_nx);
+                               }
+                               ret = 0;
+                               continue;
+                       }
+                       ret = pager->crypt_info->page_decrypt(
+                               (const void *)(src_vaddr + offset_in_page),
+                               (void *)(dst_vaddr + offset_in_page),
+                               ((pager->crypto_backing_offset -
+                                 pager->crypto_start) + /* XXX ? */
+                                offset +
+                                cur_offset +
+                                offset_in_page),
+                               pager->crypt_info->crypt_ops);
+                       if (apple_protect_pager_data_request_debug) {
+                               printf("apple_protect_data_request"
+                                      "(%p,0x%llx+0x%llx+0x%04llx): "
+                                      "in crypto range [0x%llx:0x%llx]: "
+                                      "DECRYPT offset 0x%llx="
+                                      "(0x%llx-0x%llx+0x%llx+0x%llx+0x%04llx)"
+                                      "[0x%016llx 0x%016llx] "
+                                      "code_signed=%d "
+                                      "cs_validated=%d "
+                                      "cs_tainted=%d "
+                                      "cs_nx=%d "
+                                      "ret=0x%x\n",
+                                      pager,
+                                      offset,
+                                      (uint64_t) cur_offset,
+                                      (uint64_t) offset_in_page,
+                                      pager->crypto_start, pager->crypto_end,
+                                      ((pager->crypto_backing_offset -
+                                        pager->crypto_start) +
+                                       offset +
+                                       cur_offset +
+                                       offset_in_page),
+                                      pager->crypto_backing_offset,
+                                      pager->crypto_start,
+                                      offset,
+                                      (uint64_t) cur_offset,
+                                      (uint64_t) offset_in_page,
+                                      *(uint64_t *)(dst_vaddr+offset_in_page),
+                                      *(uint64_t *)(dst_vaddr+offset_in_page+8),
+                                      src_page->object->code_signed,
+                                      src_page->cs_validated,
+                                      src_page->cs_tainted,
+                                      src_page->cs_nx,
+                                      ret);
+                       }
                        if (ret) {
                                break;
                        }
@@ -562,26 +694,17 @@ apple_protect_pager_data_request(
                         * Decryption failed.  Abort the fault.
                         */
                        retval = KERN_ABORTED;
-               } else {
-                       /*
-                        * Validate the original page...
-                        */
-                       if (src_page->object->code_signed) {
-                               vm_page_validate_cs_mapped(
-                                       src_page,
-                                       (const void *) src_vaddr);
-                       }
-                       /*
-                        * ... and transfer the results to the destination page.
-                        */
-                       UPL_SET_CS_VALIDATED(upl_pl, cur_offset / PAGE_SIZE,
-                                            src_page->cs_validated);
-                       UPL_SET_CS_TAINTED(upl_pl, cur_offset / PAGE_SIZE,
-                                          src_page->cs_tainted);
-                       UPL_SET_CS_NX(upl_pl, cur_offset / PAGE_SIZE,
-                                          src_page->cs_nx);
                }
-               
+
+               assert(src_page->busy);
+               assert(src_page->object->paging_in_progress > 0);
+               vm_object_lock(src_page->object);
+
+#if __x86_64__ || __arm__ || __arm64__
+               /* we used the 1-to-1 mapping of physical memory */
+               src_vaddr = 0;
+               dst_vaddr = 0;
+#else /* __x86_64__ || __arm__ || __arm64__ */
                /*
                 * Remove the pmap mapping of the source and destination pages
                 * in the kernel.
@@ -589,6 +712,7 @@ apple_protect_pager_data_request(
                pmap_remove(kernel_pmap,
                            (addr64_t) kernel_mapping,
                            (addr64_t) (kernel_mapping + (2 * PAGE_SIZE_64)));
+#endif /* __x86_64__ || __arm__ || __arm64__ */
 
                /*
                 * Cleanup the result of vm_fault_page() of the source page.
@@ -748,10 +872,16 @@ apple_protect_pager_terminate_internal(
                vm_object_deallocate(pager->backing_object);
                pager->backing_object = VM_OBJECT_NULL;
        }
-       
-       /* deallocate any crypt module data */
-       if(pager->crypt.crypt_end)
-               pager->crypt.crypt_end(pager->crypt.crypt_ops);
+
+       /* one less pager using this "pager_crypt_info" */
+#if CRYPT_INFO_DEBUG
+       printf("CRYPT_INFO %s: deallocate %p ref %d\n",
+              __FUNCTION__,
+              pager->crypt_info,
+              pager->crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+       crypt_info_deallocate(pager->crypt_info);
+       pager->crypt_info = NULL;
 
        /* trigger the destruction of the memory object */
        memory_object_destroy(pager->pager_control, 0);
@@ -971,12 +1101,17 @@ apple_protect_pager_lookup(
 
 apple_protect_pager_t
 apple_protect_pager_create(
-       vm_object_t     backing_object,
-       struct pager_crypt_info *crypt_info)
+       vm_object_t             backing_object,
+       vm_object_offset_t      backing_offset,
+       vm_object_offset_t      crypto_backing_offset,
+       struct pager_crypt_info *crypt_info,
+       vm_object_offset_t      crypto_start,
+       vm_object_offset_t      crypto_end)
 {
        apple_protect_pager_t   pager, pager2;
        memory_object_control_t control;
        kern_return_t           kr;
+       struct pager_crypt_info *old_crypt_info;
 
        pager = (apple_protect_pager_t) kalloc(sizeof (*pager));
        if (pager == APPLE_PROTECT_PAGER_NULL) {
@@ -993,32 +1128,88 @@ apple_protect_pager_create(
        pager->pager_ops = &apple_protect_pager_ops;
        pager->pager_ikot = IKOT_MEMORY_OBJECT;
        pager->is_ready = FALSE;/* not ready until it has a "name" */
-       pager->ref_count = 2;   /* existence + setup reference */
+       pager->ref_count = 1;   /* existence reference (for the cache) */
+       pager->ref_count++;     /* for the caller */
        pager->is_mapped = FALSE;
        pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
        pager->backing_object = backing_object;
-       pager->crypt = *crypt_info;
+       pager->backing_offset = backing_offset;
+       pager->crypto_backing_offset = crypto_backing_offset;
+       pager->crypto_start = crypto_start;
+       pager->crypto_end = crypto_end;
+       pager->crypt_info = crypt_info; /* allocated by caller */
+
+#if CRYPT_INFO_DEBUG
+       printf("CRYPT_INFO %s: crypt_info %p [%p,%p,%p,%d]\n",
+              __FUNCTION__,
+              crypt_info,
+              crypt_info->page_decrypt,
+              crypt_info->crypt_end,
+              crypt_info->crypt_ops,
+              crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
        
        vm_object_reference(backing_object);
 
+       old_crypt_info = NULL;
+
        lck_mtx_lock(&apple_protect_pager_lock);
        /* see if anyone raced us to create a pager for the same object */
        queue_iterate(&apple_protect_pager_queue,
                      pager2,
                      apple_protect_pager_t,
                      pager_queue) {
-               if (pager2->backing_object == backing_object) {
+               if ((pager2->crypt_info->page_decrypt !=
+                    crypt_info->page_decrypt) ||
+                   (pager2->crypt_info->crypt_end !=
+                    crypt_info->crypt_end) ||
+                   (pager2->crypt_info->crypt_ops !=
+                    crypt_info->crypt_ops)) {
+                       /* crypt_info contents do not match: next pager */
+                       continue;
+               }
+
+               /* found a match for crypt_info ... */
+               if (old_crypt_info) {
+                       /* ... already switched to that crypt_info */
+                       assert(old_crypt_info == pager2->crypt_info);
+               } else if (pager2->crypt_info != crypt_info) {
+                       /* ... switch to that pager's crypt_info */
+#if CRYPT_INFO_DEBUG
+                       printf("CRYPT_INFO %s: reference %p ref %d "
+                              "(create match)\n",
+                              __FUNCTION__,
+                              pager2->crypt_info,
+                              pager2->crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+                       old_crypt_info = pager2->crypt_info;
+                       crypt_info_reference(old_crypt_info);
+                       pager->crypt_info = old_crypt_info;
+               }
+               
+               if (pager2->backing_object == backing_object &&
+                   pager2->backing_offset == backing_offset &&
+                   pager2->crypto_backing_offset == crypto_backing_offset &&
+                   pager2->crypto_start == crypto_start &&
+                   pager2->crypto_end == crypto_end) {
+                       /* full match: use that pager */
                        break;
                }
        }
        if (! queue_end(&apple_protect_pager_queue,
                        (queue_entry_t) pager2)) {
-               /* while we hold the lock, transfer our setup ref to winner */
-               pager2->ref_count++;
                /* we lost the race, down with the loser... */
                lck_mtx_unlock(&apple_protect_pager_lock);
                vm_object_deallocate(pager->backing_object);
                pager->backing_object = VM_OBJECT_NULL;
+#if CRYPT_INFO_DEBUG
+               printf("CRYPT_INFO %s: %p ref %d (create pager match)\n",
+                      __FUNCTION__,
+                      pager->crypt_info,
+                      pager->crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+               crypt_info_deallocate(pager->crypt_info);
+               pager->crypt_info = NULL;
                kfree(pager, sizeof (*pager));
                /* ... and go with the winner */
                pager = pager2;
@@ -1050,6 +1241,20 @@ apple_protect_pager_create(
        /* wakeup anyone waiting for this pager to be ready */
        thread_wakeup(&pager->is_ready);
 
+       if (old_crypt_info != NULL &&
+           old_crypt_info != crypt_info) {
+               /* we re-used an old crypt_info instead of using our new one */
+#if CRYPT_INFO_DEBUG
+               printf("CRYPT_INFO %s: deallocate %p ref %d "
+                      "(create used old)\n",
+                      __FUNCTION__,
+                      crypt_info,
+                      crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+               crypt_info_deallocate(crypt_info);
+               crypt_info = NULL;
+       }
+
        return pager;
 }
 
@@ -1062,10 +1267,27 @@ apple_protect_pager_create(
  */
 memory_object_t
 apple_protect_pager_setup(
-                         vm_object_t   backing_object,
-                         struct pager_crypt_info *crypt_info)
+       vm_object_t             backing_object,
+       vm_object_offset_t      backing_offset,
+       vm_object_offset_t      crypto_backing_offset,
+       struct pager_crypt_info *crypt_info,
+       vm_object_offset_t      crypto_start,
+       vm_object_offset_t      crypto_end)
 {
        apple_protect_pager_t   pager;
+       struct pager_crypt_info *old_crypt_info, *new_crypt_info;
+
+#if CRYPT_INFO_DEBUG
+       printf("CRYPT_INFO %s: crypt_info=%p [%p,%p,%p,%d]\n",
+              __FUNCTION__,
+              crypt_info,
+              crypt_info->page_decrypt,
+              crypt_info->crypt_end,
+              crypt_info->crypt_ops,
+              crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+
+       old_crypt_info = NULL;
 
        lck_mtx_lock(&apple_protect_pager_lock);
 
@@ -1073,35 +1295,146 @@ apple_protect_pager_setup(
                      pager,
                      apple_protect_pager_t,
                      pager_queue) {
-               if (pager->backing_object == backing_object) {
-                       /* For the same object we must always use the same protection options */
-                       if (!((pager->crypt.page_decrypt == crypt_info->page_decrypt) &&
-                             (pager->crypt.crypt_ops == crypt_info->crypt_ops) )) {
-                               lck_mtx_unlock(&apple_protect_pager_lock);
-                               return MEMORY_OBJECT_NULL;
-                       }
+               if ((pager->crypt_info->page_decrypt !=
+                    crypt_info->page_decrypt) ||
+                   (pager->crypt_info->crypt_end !=
+                    crypt_info->crypt_end) ||
+                   (pager->crypt_info->crypt_ops !=
+                    crypt_info->crypt_ops)) {
+                       /* no match for "crypt_info": next pager */
+                       continue;
+               }
+               /* found a match for crypt_info ... */
+               if (old_crypt_info) {
+                       /* ... already switched to that crypt_info */
+                       assert(old_crypt_info == pager->crypt_info);
+               } else {
+                       /* ... switch to that pager's crypt_info */
+                       old_crypt_info = pager->crypt_info;
+#if CRYPT_INFO_DEBUG
+                       printf("CRYPT_INFO %s: "
+                              "switching crypt_info from %p [%p,%p,%p,%d] "
+                              "to %p [%p,%p,%p,%d] from pager %p\n",
+                              __FUNCTION__,
+                              crypt_info,
+                              crypt_info->page_decrypt,
+                              crypt_info->crypt_end,
+                              crypt_info->crypt_ops,
+                              crypt_info->crypt_refcnt,
+                              old_crypt_info,
+                              old_crypt_info->page_decrypt,
+                              old_crypt_info->crypt_end,
+                              old_crypt_info->crypt_ops,
+                              old_crypt_info->crypt_refcnt,
+                              pager);
+                       printf("CRYPT_INFO %s: %p ref %d (setup match)\n",
+                              __FUNCTION__,
+                              pager->crypt_info,
+                              pager->crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+                       crypt_info_reference(pager->crypt_info);
+               }
+               
+               if (pager->backing_object == backing_object &&
+                   pager->backing_offset == backing_offset &&
+                   pager->crypto_backing_offset == crypto_backing_offset &&
+                   pager->crypto_start == crypto_start &&
+                   pager->crypto_end == crypto_end) {
+                       /* full match: use that pager! */
+                       assert(old_crypt_info == pager->crypt_info);
+                       assert(old_crypt_info->crypt_refcnt > 1);
+#if CRYPT_INFO_DEBUG
+                       printf("CRYPT_INFO %s: "
+                              "pager match with %p crypt_info %p\n",
+                              __FUNCTION__,
+                              pager,
+                              pager->crypt_info);
+                       printf("CRYPT_INFO %s: deallocate %p ref %d "
+                              "(pager match)\n",
+                              __FUNCTION__,
+                              old_crypt_info,
+                              old_crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+                       /* release the extra ref on crypt_info we got above */
+                       crypt_info_deallocate(old_crypt_info);
+                       assert(old_crypt_info->crypt_refcnt > 0);
+                       /* give extra reference on pager to the caller */
+                       assert(pager->ref_count > 0);
+                       pager->ref_count++;
                        break;
                }
        }
        if (queue_end(&apple_protect_pager_queue,
                      (queue_entry_t) pager)) {
+               lck_mtx_unlock(&apple_protect_pager_lock);
                /* no existing pager for this backing object */
                pager = APPLE_PROTECT_PAGER_NULL;
-       } else {
-               /* make sure pager doesn't disappear */
-               pager->ref_count++;
-       }
-
-       lck_mtx_unlock(&apple_protect_pager_lock);
-
-       if (pager == APPLE_PROTECT_PAGER_NULL) {
-               pager = apple_protect_pager_create(backing_object, crypt_info);
+               if (old_crypt_info) {
+                       /* use this old crypt_info for new pager */
+                       new_crypt_info = old_crypt_info;
+#if CRYPT_INFO_DEBUG
+                       printf("CRYPT_INFO %s: "
+                              "will use old_crypt_info %p for new pager\n",
+                              __FUNCTION__,
+                              old_crypt_info);
+#endif /* CRYPT_INFO_DEBUG */
+               } else {
+                       /* allocate a new crypt_info for new pager */
+                       new_crypt_info = kalloc(sizeof (*new_crypt_info));
+                       *new_crypt_info = *crypt_info;
+                       new_crypt_info->crypt_refcnt = 1;
+#if CRYPT_INFO_DEBUG
+                       printf("CRYPT_INFO %s: "
+                              "will use new_crypt_info %p for new pager\n",
+                              __FUNCTION__,
+                              new_crypt_info);
+#endif /* CRYPT_INFO_DEBUG */
+               }
+               if (new_crypt_info == NULL) {
+                       /* can't create new pager without a crypt_info */
+               } else {
+                       /* create new pager */
+                       pager = apple_protect_pager_create(
+                               backing_object,
+                               backing_offset,
+                               crypto_backing_offset,
+                               new_crypt_info,
+                               crypto_start,
+                               crypto_end);
+               }
                if (pager == APPLE_PROTECT_PAGER_NULL) {
+                       /* could not create a new pager */
+                       if (new_crypt_info == old_crypt_info) {
+                               /* release extra reference on old_crypt_info */
+#if CRYPT_INFO_DEBUG
+                               printf("CRYPT_INFO %s: deallocate %p ref %d "
+                                      "(create fail old_crypt_info)\n",
+                                      __FUNCTION__,
+                                      old_crypt_info, 
+                                      old_crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+                               crypt_info_deallocate(old_crypt_info);
+                               old_crypt_info = NULL;
+                       } else {
+                               /* release unused new_crypt_info */
+                               assert(new_crypt_info->crypt_refcnt == 1);
+#if CRYPT_INFO_DEBUG
+                               printf("CRYPT_INFO %s: deallocate %p ref %d "
+                                      "(create fail new_crypt_info)\n",
+                                      __FUNCTION__,
+                                      new_crypt_info,
+                                      new_crypt_info->crypt_refcnt);
+#endif /* CRYPT_INFO_DEBUG */
+                               crypt_info_deallocate(new_crypt_info);
+                               new_crypt_info = NULL;
+                       }
                        return MEMORY_OBJECT_NULL;
                }
+               lck_mtx_lock(&apple_protect_pager_lock);
+       } else {
+               assert(old_crypt_info == pager->crypt_info);
        }
 
-       lck_mtx_lock(&apple_protect_pager_lock);
        while (!pager->is_ready) {
                lck_mtx_sleep(&apple_protect_pager_lock,
                        LCK_SLEEP_DEFAULT,
@@ -1185,3 +1518,47 @@ apple_protect_pager_trim(void)
                apple_protect_pager_terminate_internal(pager);
        }
 }
+
+
+void
+crypt_info_reference(
+       struct pager_crypt_info *crypt_info)
+{
+       assert(crypt_info->crypt_refcnt != 0);
+#if CRYPT_INFO_DEBUG
+       printf("CRYPT_INFO %s: %p ref %d -> %d\n",
+              __FUNCTION__,
+              crypt_info,
+              crypt_info->crypt_refcnt,
+              crypt_info->crypt_refcnt + 1);
+#endif /* CRYPT_INFO_DEBUG */
+       OSAddAtomic(+1, &crypt_info->crypt_refcnt);
+}
+
+void
+crypt_info_deallocate(
+       struct pager_crypt_info *crypt_info)
+{
+#if CRYPT_INFO_DEBUG
+       printf("CRYPT_INFO %s: %p ref %d -> %d\n",
+              __FUNCTION__,
+              crypt_info,
+              crypt_info->crypt_refcnt,
+              crypt_info->crypt_refcnt - 1);
+#endif /* CRYPT_INFO_DEBUG */
+       OSAddAtomic(-1, &crypt_info->crypt_refcnt);
+       if (crypt_info->crypt_refcnt == 0) {
+               /* deallocate any crypt module data */
+               if (crypt_info->crypt_end) {
+                       crypt_info->crypt_end(crypt_info->crypt_ops);
+                       crypt_info->crypt_end = NULL;
+               }
+#if CRYPT_INFO_DEBUG
+               printf("CRYPT_INFO %s: freeing %p\n",
+                      __FUNCTION__,
+                      crypt_info);
+#endif /* CRYPT_INFO_DEBUG */
+               kfree(crypt_info, sizeof (*crypt_info));
+               crypt_info = NULL;
+       }
+}
index 22485828281f6821bd7be4fcecb1fcf08ff6ccf5..cf05d1950b8c6493b5c16e655951b1ae2f51dee3 100644 (file)
@@ -38,6 +38,8 @@
 #include <mach/mach_host.h>            /* for host_info() */
 #include <kern/ledger.h>
 
+#include <i386/misc_protos.h>
+
 #include <default_pager/default_pager_alerts.h>
 #include <default_pager/default_pager_object_server.h>
 
  * the boot-arg & device-tree code.
  */
 
-extern ipc_port_t min_pages_trigger_port;
-extern lck_mtx_t paging_segments_lock;
-#define PSL_LOCK()             lck_mtx_lock(&paging_segments_lock)
-#define PSL_UNLOCK()   lck_mtx_unlock(&paging_segments_lock)
-
 
 int            vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
 int            vm_scale = 16;
@@ -62,36 +59,46 @@ int         vm_scale = 16;
 
 int            vm_compressor_is_active = 0;
 int            vm_compression_limit = 0;
+int            vm_compressor_available = 0;
 
 extern boolean_t vm_swap_up;
 extern void    vm_pageout_io_throttle(void);
+extern int     not_in_kdp;
 
 #if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA
 extern unsigned int hash_string(char *cp, int len);
 #endif
 
+#define UNPACK_C_SIZE(cs)      ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
+#define PACK_C_SIZE(cs, size)  (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
 
-struct c_slot {
-       uint64_t        c_offset:C_SEG_OFFSET_BITS,
-                       c_size:12,
-                       c_packed_ptr:36;
-#if CHECKSUM_THE_DATA
-       unsigned int    c_hash_data;
-#endif
-#if CHECKSUM_THE_COMPRESSED_DATA
-       unsigned int    c_hash_compressed_data;
-#endif
 
+struct c_sv_hash_entry {
+       union {
+               struct  {
+                       uint32_t        c_sv_he_ref;
+                       uint32_t        c_sv_he_data;
+               } c_sv_he;
+               uint64_t        c_sv_he_record;
+
+       } c_sv_he_un;   
 };
 
-#define UNPACK_C_SIZE(cs)      ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
-#define PACK_C_SIZE(cs, size)  (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
+#define        he_ref  c_sv_he_un.c_sv_he.c_sv_he_ref
+#define        he_data c_sv_he_un.c_sv_he.c_sv_he_data
+#define        he_record c_sv_he_un.c_sv_he_record
+
+#define C_SV_HASH_MAX_MISS     32
+#define C_SV_HASH_SIZE         ((1 << 10))
+#define C_SV_HASH_MASK         ((1 << 10) - 1)
+#define C_SV_CSEG_ID           ((1 << 22) - 1)
 
 
 struct  c_slot_mapping {
         uint32_t        s_cseg:22,     /* segment number + 1 */
                        s_cindx:10;     /* index in the segment */
 };
+#define C_SLOT_MAX_INDEX       (1 << 10)
 
 typedef struct c_slot_mapping *c_slot_mapping_t;
 
@@ -108,6 +115,7 @@ union c_segu {
 
 
 uint32_t       c_segment_count = 0;
+uint32_t       c_segment_count_max = 0;
 
 uint64_t       c_generation_id = 0;
 uint64_t       c_generation_id_flush_barrier;
@@ -119,32 +127,59 @@ boolean_t hibernate_no_swapspace = FALSE;
 clock_sec_t    hibernate_flushing_deadline = 0;
 
 
-#if TRACK_BAD_C_SEGMENTS
-queue_head_t   c_bad_list_head;
-uint32_t       c_bad_count = 0;
+#if RECORD_THE_COMPRESSED_DATA
+char   *c_compressed_record_sbuf;
+char   *c_compressed_record_ebuf;
+char   *c_compressed_record_cptr;
 #endif
 
+
 queue_head_t   c_age_list_head;
 queue_head_t   c_swapout_list_head;
 queue_head_t   c_swappedin_list_head;
 queue_head_t   c_swappedout_list_head;
 queue_head_t   c_swappedout_sparse_list_head;
+queue_head_t   c_major_list_head;
+queue_head_t   c_filling_list_head;
+queue_head_t   c_bad_list_head;
 
 uint32_t       c_age_count = 0;
 uint32_t       c_swapout_count = 0;
 uint32_t       c_swappedin_count = 0;
 uint32_t       c_swappedout_count = 0;
 uint32_t       c_swappedout_sparse_count = 0;
+uint32_t       c_major_count = 0;
+uint32_t       c_filling_count = 0;
+uint32_t       c_empty_count = 0;
+uint32_t       c_bad_count = 0;
+
 
 queue_head_t   c_minor_list_head;
 uint32_t       c_minor_count = 0;
 
+int            c_overage_swapped_count = 0;
+int            c_overage_swapped_limit = 0;
+
+int            c_seg_fixed_array_len;
 union  c_segu  *c_segments;
+vm_offset_t    c_buffers;
+vm_size_t       c_buffers_size;
 caddr_t                c_segments_next_page;
 boolean_t      c_segments_busy;
 uint32_t       c_segments_available;
 uint32_t       c_segments_limit;
 uint32_t       c_segments_nearing_limit;
+
+uint32_t       c_segment_svp_in_hash;
+uint32_t       c_segment_svp_hash_succeeded;
+uint32_t       c_segment_svp_hash_failed;
+uint32_t       c_segment_svp_zero_compressions;
+uint32_t       c_segment_svp_nonzero_compressions;
+uint32_t       c_segment_svp_zero_decompressions;
+uint32_t       c_segment_svp_nonzero_decompressions;
+
+uint32_t       c_segment_noncompressible_pages;
+
 uint32_t       c_segment_pages_compressed;
 uint32_t       c_segment_pages_compressed_limit;
 uint32_t       c_segment_pages_compressed_nearing_limit;
@@ -162,7 +197,6 @@ lck_grp_attr_t      vm_compressor_lck_grp_attr;
 lck_attr_t     vm_compressor_lck_attr;
 lck_grp_t      vm_compressor_lck_grp;
 
-
 #if __i386__ || __x86_64__
 lck_mtx_t      *c_list_lock;
 #else /* __i386__ || __x86_64__ */
@@ -177,7 +211,10 @@ int                c_compressor_swap_trigger = 0;
 
 uint32_t       compressor_cpus;
 char           *compressor_scratch_bufs;
-
+char           *kdp_compressor_scratch_buf;
+char           *kdp_compressor_decompressed_page;
+addr64_t       kdp_compressor_decompressed_page_paddr;
+ppnum_t                kdp_compressor_decompressed_page_ppnum;
 
 clock_sec_t    start_of_sample_period_sec = 0;
 clock_nsec_t   start_of_sample_period_nsec = 0;
@@ -190,6 +227,9 @@ uint32_t    last_eval_compression_count = 0;
 
 #define                DECOMPRESSION_SAMPLE_MAX_AGE            (60 * 30)
 
+boolean_t      vm_swapout_ripe_segments = FALSE;
+uint32_t       vm_ripe_target_age = (60 * 60 * 48);
+
 uint32_t       swapout_target_age = 0;
 uint32_t       age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE];
 uint32_t       overage_decompressions_during_sample_period = 0;
@@ -207,8 +247,10 @@ boolean_t  hibernate_flushing = FALSE;
 int64_t                c_segment_input_bytes __attribute__((aligned(8))) = 0;
 int64_t                c_segment_compressed_bytes __attribute__((aligned(8))) = 0;
 int64_t                compressor_bytes_used __attribute__((aligned(8))) = 0;
-uint64_t       compressor_kvspace_used __attribute__((aligned(8))) = 0;
-uint64_t       compressor_kvwaste_limit = 0;
+
+
+struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE]  __attribute__ ((aligned (8)));
+
 
 static boolean_t compressor_needs_to_swap(void);
 static void vm_compressor_swap_trigger_thread(void);
@@ -216,6 +258,8 @@ static void vm_compressor_do_delayed_compactions(boolean_t);
 static void vm_compressor_compact_and_swap(boolean_t);
 static void vm_compressor_age_swapped_in_segments(boolean_t);
 
+static void vm_compressor_take_paging_space_action(void);
+
 boolean_t vm_compressor_low_on_space(void);
 
 void compute_swapout_target_age(void);
@@ -231,11 +275,6 @@ void c_seg_need_delayed_compaction(c_segment_t);
 void c_seg_move_to_sparse_list(c_segment_t);
 void c_seg_insert_into_q(queue_head_t *, c_segment_t);
 
-boolean_t c_seg_try_free(c_segment_t);
-void     c_seg_free(c_segment_t);
-void     c_seg_free_locked(c_segment_t);
-
-
 uint64_t vm_available_memory(void);
 uint64_t vm_compressor_pages_compressed(void);
 
@@ -298,6 +337,28 @@ vm_wants_task_throttled(task_t task)
 }
 
 
+
+static uint32_t        no_paging_space_action_in_progress = 0;
+extern void memorystatus_send_low_swap_note(void);
+
+static void
+vm_compressor_take_paging_space_action(void)
+{
+       if (no_paging_space_action_in_progress == 0) {
+
+               if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) {
+
+                       if (no_paging_space_action()) {
+                               memorystatus_send_low_swap_note();
+                       }
+
+                       no_paging_space_action_in_progress = 0;
+               }
+       }
+}
+
+
+
 void
 vm_compressor_init_locks(void)
 {
@@ -339,6 +400,8 @@ vm_compressor_init(void)
        thread_t        thread;
        struct c_slot   cs_dummy;
        c_slot_t cs  = &cs_dummy;
+       int             c_segment_min_size;
+       int             c_segment_padded_size;
 
        /*
         * ensure that any pointer that gets created from
@@ -382,22 +445,26 @@ vm_compressor_init(void)
        c_list_lock = lck_spin_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr);
 #endif /* __i386__ || __x86_64__ */
 
-#if TRACK_BAD_C_SEGMENTS
+
        queue_init(&c_bad_list_head);
-#endif
        queue_init(&c_age_list_head);
        queue_init(&c_minor_list_head);
+       queue_init(&c_major_list_head);
+       queue_init(&c_filling_list_head);
        queue_init(&c_swapout_list_head);
        queue_init(&c_swappedin_list_head);
        queue_init(&c_swappedout_list_head);
        queue_init(&c_swappedout_sparse_list_head);
 
-       compressor_segment_zone = zinit(sizeof (struct c_segment),
-                                     128000 * sizeof (struct c_segment),
-                                     8192, "compressor_segment");
+       c_segment_min_size = sizeof(struct c_segment) + (C_SEG_SLOT_VAR_ARRAY_MIN_LEN * sizeof(struct c_slot));
+       
+       for (c_segment_padded_size = 128; c_segment_padded_size < c_segment_min_size; c_segment_padded_size = c_segment_padded_size << 1);
+
+       compressor_segment_zone = zinit(c_segment_padded_size, 128000 * c_segment_padded_size, PAGE_SIZE, "compressor_segment");
        zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE);
        zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE);
 
+       c_seg_fixed_array_len = (c_segment_padded_size - sizeof(struct c_segment)) / sizeof(struct c_slot);
        
        c_free_segno_head = -1;
        c_segments_available = 0;
@@ -425,12 +492,13 @@ vm_compressor_init(void)
        c_segment_pages_compressed_nearing_limit = (c_segment_pages_compressed_limit * 98) / 100;
        c_segments_nearing_limit = (c_segments_limit * 98) / 100;
 
-       compressor_kvwaste_limit = (vm_map_max(kernel_map) - vm_map_min(kernel_map)) / 16;
-
        c_segments_busy = FALSE;
 
-       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY) != KERN_SUCCESS)
-               panic("vm_compressor_init: kernel_memory_allocate failed\n");
+       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
+               panic("vm_compressor_init: kernel_memory_allocate failed - c_segments\n");
+       c_buffers_size = (vm_size_t)C_SEG_ALLOCSIZE * (vm_size_t)c_segments_limit;
+       if (kernel_memory_allocate(kernel_map, &c_buffers, c_buffers_size, 0, KMA_COMPRESSOR | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
+               panic("vm_compressor_init: kernel_memory_allocate failed - c_buffers\n");
 
        c_segments_next_page = (caddr_t)c_segments;
 
@@ -443,15 +511,29 @@ vm_compressor_init(void)
 
                compressor_cpus = hinfo.max_cpus;
 
-               compressor_scratch_bufs = kalloc(compressor_cpus * WKdm_SCRATCH_BUF_SIZE);
+               compressor_scratch_bufs = kalloc_tag(compressor_cpus * WKdm_SCRATCH_BUF_SIZE, VM_KERN_MEMORY_COMPRESSOR);
+
+               kdp_compressor_scratch_buf = kalloc_tag(WKdm_SCRATCH_BUF_SIZE, VM_KERN_MEMORY_COMPRESSOR);
+               kdp_compressor_decompressed_page = kalloc_tag(PAGE_SIZE, VM_KERN_MEMORY_COMPRESSOR);
+               kdp_compressor_decompressed_page_paddr = kvtophys((vm_offset_t)kdp_compressor_decompressed_page);
+               kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr);
        }
+#if CONFIG_FREEZE              
+       freezer_compressor_scratch_buf = kalloc_tag(WKdm_SCRATCH_BUF_SIZE, VM_KERN_MEMORY_COMPRESSOR);
+#endif
+
+#if RECORD_THE_COMPRESSED_DATA
+       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)&c_compressed_record_sbuf, (vm_size_t)C_SEG_ALLOCSIZE + (PAGE_SIZE * 2), 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
+               panic("vm_compressor_init: kernel_memory_allocate failed - c_compressed_record_sbuf\n");
+
+       c_compressed_record_cptr = c_compressed_record_sbuf;
+       c_compressed_record_ebuf = c_compressed_record_sbuf + C_SEG_ALLOCSIZE + (PAGE_SIZE * 2);
+#endif
 
        if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL,
                                         BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
                panic("vm_compressor_swap_trigger_thread: create failed");
        }
-       thread->options |= TH_OPT_VMPRIV;
-
        thread_deallocate(thread);
 
        assert(default_pager_init_flag == 0);
@@ -459,11 +541,8 @@ vm_compressor_init(void)
        if (vm_pageout_internal_start() != KERN_SUCCESS) {
                panic("vm_compressor_init: Failed to start the internal pageout thread.\n");
        }
-
-       if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) ||
-           (vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) {
+       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
                vm_compressor_swap_init();
-       }
 
        if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED)
                vm_compressor_is_active = 1;
@@ -473,6 +552,7 @@ vm_compressor_init(void)
 #endif /* CONFIG_FREEZE */
 
        default_pager_init_flag = 1;
+       vm_compressor_available = 1;
 
        vm_page_reactivate_all_throttled();
 }
@@ -550,7 +630,9 @@ c_seg_need_delayed_compaction(c_segment_t c_seg)
 
                clear_busy = TRUE;
        }
-       if (!c_seg->c_on_minorcompact_q && !c_seg->c_ondisk && !c_seg->c_on_swapout_q) {
+       assert(c_seg->c_state != C_IS_FILLING);
+
+       if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ONDISK(c_seg))) {
                queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list);
                c_seg->c_on_minorcompact_q = 1;
                c_minor_count++;
@@ -578,17 +660,7 @@ c_seg_move_to_sparse_list(c_segment_t c_seg)
                
                clear_busy = TRUE;
        }
-       assert(c_seg->c_ondisk);
-       assert(c_seg->c_on_swappedout_q);
-       assert(!c_seg->c_on_swappedout_sparse_q);
-
-       queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
-       c_seg->c_on_swappedout_q = 0;
-       c_swappedout_count--;
-
-       c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg);
-       c_seg->c_on_swappedout_sparse_q = 1;
-       c_swappedout_sparse_count++;
+       c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE);
 
        c_seg_moved_to_sparse_list++;
 
@@ -664,6 +736,19 @@ c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, bo
 
        assert(c_seg->c_busy);
 
+       /*
+        * check for the case that can occur when we are not swapping
+        * and this segment has been major compacted in the past
+        * and moved to the majorcompact q to remove it from further
+        * consideration... if the occupancy falls too low we need
+        * to put it back on the age_q so that it will be considered
+        * in the next major compaction sweep... if we don't do this
+        * we will eventually run into the c_segments_limit
+        */
+       if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT(c_seg)) {
+               
+               c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
+       }
        if (!c_seg->c_on_minorcompact_q) {
                if (clear_busy == TRUE)
                        C_SEG_WAKEUP_DONE(c_seg);
@@ -708,49 +793,186 @@ c_seg_wait_on_busy(c_segment_t c_seg)
 }
 
 
+void
+c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head)
+{
+       int     old_state = c_seg->c_state;
 
-int    try_free_succeeded = 0;
-int    try_free_failed = 0;
+#if DEVELOPMENT || DEBUG
+#if __i386__ || __x86_64__
+       if (new_state != C_IS_FILLING)
+               lck_mtx_assert(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
+       lck_mtx_assert(c_list_lock, LCK_MTX_ASSERT_OWNED);
+#endif
+#endif
+       switch (old_state) {
 
-boolean_t
-c_seg_try_free(c_segment_t c_seg)
-{
-       /*
-        * c_seg is currently on the delayed minor compaction
-        * or the spapped out sparse queue and we have c_seg locked...
-        * if we can get the c_list_lock w/o blocking (if we blocked we
-        * could deadlock because the lock order is c_list_lock then c_seg's lock)
-        * we'll pull it from the appropriate queue and free it
-        */
-       if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
-               /*
-                * c_list_lock is held, we need to bail
-                */
-               try_free_failed++;
-               return (FALSE);
-       }
-       if (c_seg->c_on_minorcompact_q) {
-               queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list);
-               c_seg->c_on_minorcompact_q = 0;
-               c_minor_count--;
-       } else {
-               assert(c_seg->c_on_swappedout_sparse_q);
+               case C_IS_EMPTY:
+                       assert(new_state == C_IS_FILLING || new_state == C_IS_FREE);
 
-               /*
-                * c_seg_free_locked will remove it from the swappedout sparse list
-                */
+                       c_empty_count--;
+                       break;
+
+               case C_IS_FILLING:
+                       assert(new_state == C_ON_AGE_Q || new_state == C_ON_SWAPOUT_Q);
+
+                       queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
+                       c_filling_count--;
+                       break;
+
+               case C_ON_AGE_Q:
+                       assert(new_state == C_ON_SWAPOUT_Q || new_state == C_ON_MAJORCOMPACT_Q ||
+                              new_state == C_IS_FREE);
+
+                       queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
+                       c_age_count--;
+                       break;
+               
+               case C_ON_SWAPPEDIN_Q:
+                       assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
+
+                       queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
+                       c_swappedin_count--;
+                       break;
+
+               case C_ON_SWAPOUT_Q:
+                       assert(new_state == C_ON_SWAPPEDOUT_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
+                              new_state == C_ON_AGE_Q || new_state == C_IS_FREE || new_state == C_IS_EMPTY);
+
+                       queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
+                       thread_wakeup((event_t)&compaction_swapper_running);
+                       c_swapout_count--;
+                       break;
+
+               case C_ON_SWAPPEDOUT_Q:
+                       assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
+                              new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
+
+                       queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
+                       c_swappedout_count--;
+                       break;
+
+               case C_ON_SWAPPEDOUTSPARSE_Q:
+                       assert(new_state == C_ON_SWAPPEDIN_Q ||
+                              new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
+
+                       queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
+                       c_swappedout_sparse_count--;
+                       break;
+
+               case C_ON_MAJORCOMPACT_Q:
+                       assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
+
+                       queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list);
+                       c_major_count--;
+                       break;
+
+               case C_ON_BAD_Q:
+                       assert(new_state == C_IS_FREE);
+
+                       queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
+                       c_bad_count--;
+                       break;
+
+               default:
+                       panic("c_seg %p has bad c_state = %d\n", c_seg, old_state);
        }
-       if (!c_seg->c_busy_swapping)
-               C_SEG_BUSY(c_seg);
 
-       c_seg_free_locked(c_seg);
+       switch(new_state) {
+               case C_IS_FREE:
+                       assert(old_state != C_IS_FILLING);
+
+                       break;
 
-       try_free_succeeded++;
+               case C_IS_EMPTY:
+                       assert(old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
 
-       return (TRUE);
+                       c_empty_count++;
+                       break;
+
+               case C_IS_FILLING:
+                       assert(old_state == C_IS_EMPTY);
+
+                       queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
+                       c_filling_count++;
+                       break;
+
+               case C_ON_AGE_Q:
+                       assert(old_state == C_IS_FILLING || old_state == C_ON_SWAPPEDIN_Q ||
+                              old_state == C_ON_MAJORCOMPACT_Q || old_state == C_ON_SWAPOUT_Q);
+
+                       if (old_state == C_IS_FILLING)
+                               queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
+                       else
+                               c_seg_insert_into_q(&c_age_list_head, c_seg);
+                       c_age_count++;
+                       break;
+               
+               case C_ON_SWAPPEDIN_Q:
+                       assert(c_seg->c_state == C_ON_SWAPPEDOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
+
+                       if (insert_head == TRUE)
+                               queue_enter_first(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
+                       else
+                               queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
+                       c_swappedin_count++;
+                       break;
+
+               case C_ON_SWAPOUT_Q:
+                       assert(old_state == C_ON_AGE_Q || old_state == C_IS_FILLING);
+
+                       if (insert_head == TRUE)
+                               queue_enter_first(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
+                       else
+                               queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
+                       c_swapout_count++;
+                       break;
+
+               case C_ON_SWAPPEDOUT_Q:
+                       assert(c_seg->c_state == C_ON_SWAPOUT_Q);
+
+                       if (insert_head == TRUE)
+                               queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
+                       else
+                               queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
+                       c_swappedout_count++;
+                       break;
+
+               case C_ON_SWAPPEDOUTSPARSE_Q:
+                       assert(c_seg->c_state == C_ON_SWAPOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUT_Q);
+                       
+                       c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg);
+                       c_swappedout_sparse_count++;
+                       break;
+
+               case C_ON_MAJORCOMPACT_Q:
+                       assert(c_seg->c_state == C_ON_AGE_Q);
+
+                       if (insert_head == TRUE)
+                               queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list);
+                       else
+                               queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list);
+                       c_major_count++;
+                       break;
+
+               case C_ON_BAD_Q:
+                       assert(c_seg->c_state == C_ON_SWAPPEDOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
+
+                       if (insert_head == TRUE)
+                               queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
+                       else
+                               queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
+                       c_bad_count++;
+                       break;
+
+               default:
+                      panic("c_seg %p requesting bad c_state = %d\n", c_seg, new_state);
+       }
+       c_seg->c_state = new_state;
 }
 
 
+
 void
 c_seg_free(c_segment_t c_seg)
 {
@@ -767,83 +989,66 @@ c_seg_free(c_segment_t c_seg)
 void
 c_seg_free_locked(c_segment_t c_seg)
 {
-       int             segno, i;
+       int             segno;
        int             pages_populated = 0;
        int32_t         *c_buffer = NULL;
        uint64_t        c_swap_handle = 0;
 
+       assert(c_seg->c_busy);
        assert(!c_seg->c_on_minorcompact_q);
+       assert(!c_seg->c_busy_swapping);
 
-       if (c_seg->c_on_age_q) {
-               queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_age_q = 0;
-               c_age_count--;
-       } else if (c_seg->c_on_swappedin_q) {
-               queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedin_q = 0;
-               c_swappedin_count--;
-       } else if (c_seg->c_on_swapout_q) {
-               queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swapout_q = 0;
-               c_swapout_count--;
-               thread_wakeup((event_t)&compaction_swapper_running);
-       } else if (c_seg->c_on_swappedout_q) {
-               queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedout_q = 0;
-               c_swappedout_count--;
-       } else if (c_seg->c_on_swappedout_sparse_q) {
-               queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedout_sparse_q = 0;
-               c_swappedout_sparse_count--;
-       }
-#if TRACK_BAD_C_SEGMENTS
-       else if (c_seg->c_on_bad_q) {
-               queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_bad_q = 0;
-               c_bad_count--;
-       }
-#endif
-       segno = c_seg->c_mysegno;
-       c_segments[segno].c_segno = c_free_segno_head;
-       c_free_segno_head = segno;
-       c_segment_count--;
+       if (c_seg->c_overage_swap == TRUE) {
+               c_overage_swapped_count--;
+               c_seg->c_overage_swap = FALSE;
+       }       
+       if ( !(C_SEG_IS_ONDISK(c_seg)))
+               c_buffer = c_seg->c_store.c_buffer;
+       else
+               c_swap_handle = c_seg->c_store.c_swap_handle;
 
-       lck_mtx_unlock_always(c_list_lock);
+       c_seg_switch_state(c_seg, C_IS_FREE, FALSE);
 
-       if (c_seg->c_wanted) {
-               thread_wakeup((event_t) (c_seg));
-               c_seg->c_wanted = 0;
-       }
-       if (c_seg->c_busy_swapping) {
-               c_seg->c_must_free = 1;
+       lck_mtx_unlock_always(c_list_lock);
 
-               lck_mtx_unlock_always(&c_seg->c_lock);
-               return;
-       }
-       if (c_seg->c_ondisk == 0) {
+       if (c_buffer) {
                pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
-
-               c_buffer = c_seg->c_store.c_buffer;
                c_seg->c_store.c_buffer = NULL;
-       } else {
-                /*
-                 * Free swap space on disk.
-                */
-               c_swap_handle = c_seg->c_store.c_swap_handle;
+       } else
                c_seg->c_store.c_swap_handle = (uint64_t)-1;
-       }
+
        lck_mtx_unlock_always(&c_seg->c_lock);
 
        if (c_buffer) {
                if (pages_populated)
                        kernel_memory_depopulate(kernel_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
 
-               kmem_free(kernel_map, (vm_offset_t) c_buffer, C_SEG_ALLOCSIZE);
-               OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used);
-
-       } else if (c_swap_handle)
+       } else if (c_swap_handle) {
+                /*
+                 * Free swap space on disk.
+                */
                vm_swap_free(c_swap_handle);
+       }
+       lck_mtx_lock_spin_always(&c_seg->c_lock);
+
+       C_SEG_WAKEUP_DONE(c_seg);
+       lck_mtx_unlock_always(&c_seg->c_lock);
+
+       segno = c_seg->c_mysegno;
 
+       lck_mtx_lock_spin_always(c_list_lock);
+       /*
+        * because the c_buffer is now associated with the segno,
+        * we can't put the segno back on the free list until
+        * after we have depopulated the c_buffer range, or 
+        * we run the risk of depopulating a range that is 
+        * now being used in one of the compressor heads
+        */
+       c_segments[segno].c_segno = c_free_segno_head;
+       c_free_segno_head = segno;
+       c_segment_count--;
+
+       lck_mtx_unlock_always(c_list_lock);
 
 #if __i386__ || __x86_64__
        lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp);
@@ -851,12 +1056,9 @@ c_seg_free_locked(c_segment_t c_seg)
        lck_spin_destroy(&c_seg->c_lock, &vm_compressor_lck_grp);
 #endif /* __i386__ || __x86_64__ */
 
-       for (i = 0; i < C_SEG_SLOT_ARRAYS; i++) {
-               if (c_seg->c_slots[i] == 0)
-                       break;
+       if (c_seg->c_slot_var_array_len)
+               kfree(c_seg->c_slot_var_array, sizeof(struct c_slot) * c_seg->c_slot_var_array_len);
 
-               kfree((char *)c_seg->c_slots[i], sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE);
-       }
        zfree(compressor_segment_zone, c_seg);
 }
 
@@ -1010,6 +1212,45 @@ done:
 }
 
 
+static void
+c_seg_alloc_nextslot(c_segment_t c_seg)
+{
+       struct c_slot   *old_slot_array = NULL;
+       struct c_slot   *new_slot_array = NULL;
+       int             newlen;
+       int             oldlen;
+
+       if (c_seg->c_nextslot < c_seg_fixed_array_len)
+               return;
+
+       if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) {
+
+               oldlen = c_seg->c_slot_var_array_len;
+               old_slot_array = c_seg->c_slot_var_array;
+
+               if (oldlen == 0)
+                       newlen = C_SEG_SLOT_VAR_ARRAY_MIN_LEN;
+               else
+                       newlen = oldlen * 2;
+
+               new_slot_array = (struct c_slot *)kalloc(sizeof(struct c_slot) * newlen);
+
+               lck_mtx_lock_spin_always(&c_seg->c_lock);
+
+               if (old_slot_array)
+                       memcpy((char *)new_slot_array, (char *)old_slot_array, sizeof(struct c_slot) * oldlen);
+
+               c_seg->c_slot_var_array_len = newlen;
+               c_seg->c_slot_var_array = new_slot_array;
+
+               lck_mtx_unlock_always(&c_seg->c_lock);
+               
+               if (old_slot_array)
+                       kfree(old_slot_array, sizeof(struct c_slot) * oldlen);
+       }
+}
+
+
 
 struct {
        uint64_t asked_permission;
@@ -1018,6 +1259,7 @@ struct {
        uint64_t moved_bytes;
        uint64_t wasted_space_in_swapouts;
        uint64_t count_of_swapouts;
+       uint64_t count_of_freed_segs;
 } c_seg_major_compact_stats;
 
 
@@ -1032,17 +1274,11 @@ c_seg_major_compact_ok(
 
        c_seg_major_compact_stats.asked_permission++;
 
-       if (c_seg_src->c_filling) {
-               /*
-                * we're at or near the head... don't compact
-                */
-               return (FALSE);
-       }
        if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE &&
            c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE)
                return (FALSE);
 
-       if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) {
+       if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
                /*
                 * destination segment is full... can't compact
                 */
@@ -1065,7 +1301,6 @@ c_seg_major_compact(
        int             i;
        c_slot_t        c_dst;
        c_slot_t        c_src;
-       int             slotarray;
        boolean_t       keep_compacting = TRUE;
        
        /*
@@ -1096,30 +1331,30 @@ c_seg_major_compact(
                }
 
                if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) {
+                       int     size_to_populate;
+
                        /* doesn't fit */
-                       if ((C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) == C_SEG_BUFSIZE)) {
+                       size_to_populate = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset);
+
+                       if (size_to_populate == 0) {
                                /* can't fit */
                                keep_compacting = FALSE;
                                break;
                        }
+                       if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
+                               size_to_populate = C_SEG_MAX_POPULATE_SIZE;
+
                        kernel_memory_populate(kernel_map,
                                               (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset],
-                                              PAGE_SIZE,
-                                              KMA_COMPRESSOR);
+                                              size_to_populate,
+                                              KMA_COMPRESSOR, 
+                                              VM_KERN_MEMORY_COMPRESSOR);
 
-                       c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE);
+                       c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
                        assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE);
                }
+               c_seg_alloc_nextslot(c_seg_dst);
 
-               slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
-
-               if (c_seg_dst->c_slots[slotarray] == 0) {
-                       KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0);
-                       c_seg_dst->c_slots[slotarray] = (struct c_slot *)
-                               kalloc(sizeof(struct c_slot) *
-                                      C_SEG_SLOT_ARRAY_SIZE);
-                       KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0);
-               }
                c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
 
                memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
@@ -1151,7 +1386,7 @@ c_seg_major_compact(
                c_seg_src->c_bytes_unused += c_rounded_size;
                c_seg_src->c_firstemptyslot = 0;
 
-               if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) {
+               if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
                        /* dest segment is now full */
                        keep_compacting = FALSE;
                        break;
@@ -1347,14 +1582,35 @@ compressor_needs_to_swap(void)
 {
        boolean_t       should_swap = FALSE;
 
-       if (vm_swap_up == TRUE) {
+       if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) {
+               c_segment_t     c_seg;
+               clock_sec_t     now;
+               clock_sec_t     age;
+               clock_nsec_t    nsec;
+               
+               clock_get_system_nanotime(&now,  &nsec);
+               age = 0;
+
+               lck_mtx_lock_spin_always(c_list_lock);
+
+               if ( !queue_empty(&c_age_list_head)) {
+                       c_seg = (c_segment_t) queue_first(&c_age_list_head);
+
+                       age = now - c_seg->c_creation_ts;
+               }
+               lck_mtx_unlock_always(c_list_lock);
+
+               if (age >= vm_ripe_target_age)
+                       return (TRUE);
+       }
+       if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) && vm_swap_up == TRUE) {
                if (COMPRESSOR_NEEDS_TO_SWAP()) {
                        return (TRUE);
                }
                if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) {
                        return (TRUE);
                }
-               if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT))
+               if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * 2)))
                        return (TRUE);
        }
        compute_swapout_target_age();
@@ -1380,42 +1636,52 @@ compressor_needs_to_swap(void)
        if (swapout_target_age)
                should_swap = TRUE;
 
-       if (vm_swap_up == FALSE) {
-
-               if (should_swap) {
 #if CONFIG_JETSAM
-                       if (vm_compressor_thrashing_detected == FALSE) {
-                               vm_compressor_thrashing_detected = TRUE;
+       if (should_swap || c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) {
+
+               if (vm_compressor_thrashing_detected == FALSE) {
+                       vm_compressor_thrashing_detected = TRUE;
                                
-                               if (swapout_target_age) {
-                                       memorystatus_kill_on_VM_thrashing(TRUE /* async */);
-                                       compressor_thrashing_induced_jetsam++;
-                               } else {
-                                       memorystatus_kill_on_FC_thrashing(TRUE /* async */);
-                                       filecache_thrashing_induced_jetsam++;
-                               }
-                               /*
-                                * let the jetsam take precedence over
-                                * any major compactions we might have
-                                * been able to do... otherwise we run
-                                * the risk of doing major compactions
-                                * on segments we're about to free up
-                                * due to the jetsam activity.
-                                */
-                               should_swap = FALSE;
+                       if (swapout_target_age || c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) {
+                               memorystatus_kill_on_VM_thrashing(TRUE /* async */);
+                               compressor_thrashing_induced_jetsam++;
+                       } else {
+                               memorystatus_kill_on_FC_thrashing(TRUE /* async */);
+                               filecache_thrashing_induced_jetsam++;
                        }
-#endif /* CONFIG_JETSAM */
-               } else
-                       should_swap = COMPRESSOR_NEEDS_TO_MAJOR_COMPACT();
+               }
+               /*
+                * let the jetsam take precedence over
+                * any major compactions we might have
+                * been able to do... otherwise we run
+                * the risk of doing major compactions
+                * on segments we're about to free up
+                * due to the jetsam activity.
+                */
+               should_swap = FALSE;
        }
 
-       /*
-        * returning TRUE when swap_supported == FALSE
+#endif /* CONFIG_JETSAM */
+
+       if (should_swap == FALSE) {
+               /*
+                * COMPRESSOR_NEEDS_TO_MAJOR_COMPACT returns true only if we're
+                * about to run out of available compressor segments... in this
+                * case, we absolutely need to run a major compaction even if
+                * we've just kicked off a jetsam or we don't otherwise need to
+                * swap... terminating objects releases
+                * pages back to the uncompressed cache, but does not guarantee
+                * that we will free up even a single compression segment
+                */
+               should_swap = COMPRESSOR_NEEDS_TO_MAJOR_COMPACT();
+       }
+
+       /*
+        * returning TRUE when swap_supported == FALSE
         * will cause the major compaction engine to
         * run, but will not trigger any swapping...
         * segments that have been major compacted
-        * will be moved to the swapped_out_q
-        * but will not have the c_ondisk flag set
+        * will be moved to the majorcompact queue
         */
        return (should_swap);
 }
@@ -1452,25 +1718,84 @@ uint32_t vm_wake_compactor_swapper_calls = 0;
 void
 vm_wake_compactor_swapper(void)
 {
-       boolean_t need_major_compaction = FALSE;
-
-       if (compaction_swapper_running)
+       if (compaction_swapper_running || c_segment_count == 0)
                return;
 
-       if (c_minor_count == 0 && need_major_compaction == FALSE)
-               return;
+       if (c_minor_count || COMPRESSOR_NEEDS_TO_MAJOR_COMPACT()) {
+
+               lck_mtx_lock_spin_always(c_list_lock);
+
+               fastwake_warmup = FALSE;
+
+               if (compaction_swapper_running == 0) {
+
+                       vm_wake_compactor_swapper_calls++;
+
+                       thread_wakeup((event_t)&c_compressor_swap_trigger);
+                       
+                       compaction_swapper_running = 1;
+               }
+               lck_mtx_unlock_always(c_list_lock);
+       }
+}
+
+
+void
+vm_consider_swapping()
+{
+       c_segment_t     c_seg, c_seg_next;
+       clock_sec_t     now;
+       clock_nsec_t    nsec;
+
 
        lck_mtx_lock_spin_always(c_list_lock);
 
-       fastwake_warmup = FALSE;
+       compaction_swapper_abort = 1;
 
-       if (compaction_swapper_running == 0) {
-               vm_wake_compactor_swapper_calls++;
+       while (compaction_swapper_running) {
+               assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
 
-               thread_wakeup((event_t)&c_compressor_swap_trigger);
+               lck_mtx_unlock_always(c_list_lock);
                
-               compaction_swapper_running = 1;
+               thread_block(THREAD_CONTINUE_NULL);
+
+               lck_mtx_lock_spin_always(c_list_lock);
+       }
+       compaction_swapper_abort = 0;
+       compaction_swapper_running = 1;
+
+       vm_swapout_ripe_segments = TRUE;
+
+       if (!queue_empty(&c_major_list_head)) {
+               
+               clock_get_system_nanotime(&now, &nsec);
+                       
+               c_seg = (c_segment_t)queue_first(&c_major_list_head);
+
+               while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) {
+
+                       if (c_overage_swapped_count >= c_overage_swapped_limit)
+                               break;
+
+                       c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
+
+                       if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
+                       
+                               lck_mtx_lock_spin_always(&c_seg->c_lock);
+                               
+                               c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
+
+                               lck_mtx_unlock_always(&c_seg->c_lock);
+                       }
+                       c_seg = c_seg_next;
+               }
        }
+       vm_compressor_compact_and_swap(FALSE);
+
+       compaction_swapper_running = 0;
+
+       vm_swapout_ripe_segments = FALSE;
+       
        lck_mtx_unlock_always(c_list_lock);
 }
 
@@ -1483,6 +1808,9 @@ vm_consider_waking_compactor_swapper(void)
        if (compaction_swapper_running)
                return;
 
+       if (c_segment_count == 0)
+               return;
+
        if (!compaction_swapper_inited && !compaction_swapper_init_now) {
                compaction_swapper_init_now = 1;
                need_wakeup = TRUE;
@@ -1585,13 +1913,7 @@ vm_compressor_age_swapped_in_segments(boolean_t flush_all)
                        
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
-               queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedin_q = 0;
-               c_swappedin_count--;
-
-               c_seg_insert_into_q(&c_age_list_head, c_seg);
-               c_seg->c_on_age_q = 1;
-               c_age_count++;
+               c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
 
                lck_mtx_unlock_always(&c_seg->c_lock);
        }
@@ -1677,6 +1999,8 @@ int               compaction_swap_trigger_thread_awakened = 0;
 static void
 vm_compressor_swap_trigger_thread(void)
 {
+       current_thread()->options |= TH_OPT_VMPRIV;
+
        /*
         * compaction_swapper_init_now is set when the first call to
         * vm_consider_waking_compactor_swapper is made from 
@@ -1691,6 +2015,9 @@ vm_compressor_swap_trigger_thread(void)
                if (vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP)
                        vm_swap_file_set_tuneables();
 
+               if (vm_restricted_to_single_processor == TRUE)
+                       thread_vm_bind_group_add();
+
                compaction_swapper_inited = 1;
        }
        lck_mtx_lock_spin_always(c_list_lock);
@@ -1868,6 +2195,8 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
 {
        c_segment_t     c_seg, c_seg_next;
        boolean_t       keep_compacting;
+       clock_sec_t     now;
+       clock_nsec_t    nsec;
 
 
        if (fastwake_warmup == TRUE) {
@@ -1894,12 +2223,18 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
 
        vm_compressor_age_swapped_in_segments(flush_all);
 
+       /*
+        * we only need to grab the timestamp once per
+        * invocation of this function since the 
+        * timescale we're interested in is measured
+        * in days
+        */
+       clock_get_system_nanotime(&now,  &nsec);
 
        while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) {
 
                if (hibernate_flushing == TRUE) {
                        clock_sec_t     sec;
-                       clock_nsec_t    nsec;
 
                        if (hibernate_should_abort()) {
                                HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
@@ -1954,6 +2289,9 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
 
                        needs_to_swap = compressor_needs_to_swap();
 
+                       if (needs_to_swap == TRUE && vm_swap_low_on_space())
+                               vm_compressor_take_paging_space_action();
+
                        lck_mtx_lock_spin_always(c_list_lock);
                        
                        if (needs_to_swap == FALSE)
@@ -1963,15 +2301,11 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
                        break;
                c_seg = (c_segment_t) queue_first(&c_age_list_head);
 
-               if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier)
-                       break;
+               assert(c_seg->c_state == C_ON_AGE_Q);
 
-               if (c_seg->c_filling) {
-                       /*
-                        * we're at or near the head... no more work to do
-                        */
+               if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier)
                        break;
-               }
+               
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
                if (c_seg->c_busy) {
@@ -1989,6 +2323,7 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
                         * found an empty c_segment and freed it
                         * so go grab the next guy in the queue
                         */
+                       c_seg_major_compact_stats.count_of_freed_segs++;
                        continue;
                }
                /*
@@ -2007,6 +2342,8 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
                        if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next))
                                break;
 
+                       assert(c_seg_next->c_state == C_ON_AGE_Q);
+
                        if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE)
                                break;
 
@@ -2028,6 +2365,7 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
                                 * found an empty c_segment and freed it
                                 * so we can't continue to use c_seg_next
                                 */
+                               c_seg_major_compact_stats.count_of_freed_segs++;
                                continue;
                        }
 
@@ -2051,7 +2389,8 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
                         * by passing TRUE, we ask for c_busy to be cleared
                         * and c_wanted to be taken care of
                         */
-                       c_seg_minor_compaction_and_unlock(c_seg_next, TRUE);
+                       if (c_seg_minor_compaction_and_unlock(c_seg_next, TRUE))
+                               c_seg_major_compact_stats.count_of_freed_segs++;
 
                        PAGE_REPLACEMENT_DISALLOWED(FALSE);
 
@@ -2060,27 +2399,46 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
 
                } /* major compaction */
 
-               c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used;
-               c_seg_major_compact_stats.count_of_swapouts++;
-
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
                assert(c_seg->c_busy);
-               assert(c_seg->c_on_age_q);
                assert(!c_seg->c_on_minorcompact_q);
 
-               queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_age_q = 0;
-               c_age_count--;
-
                if (vm_swap_up == TRUE) {
-                       queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
-                       c_seg->c_on_swapout_q = 1;
-                       c_swapout_count++;
+                       /*
+                        * This mode of putting a generic c_seg on the swapout list is
+                        * only supported when we have general swap ON i.e.
+                        * we compress pages into c_segs as we process them off
+                        * the paging queues in vm_pageout_scan().
+                        */
+                       if (COMPRESSED_PAGER_IS_SWAPBACKED)
+                               c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
+                       else {
+                               if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) {
+                                       /*
+                                        * we are running compressor sweeps with swap-behind
+                                        * make sure the c_seg has aged enough before swapping it
+                                        * out...
+                                        */
+                                       if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
+                                               c_seg->c_overage_swap = TRUE;
+                                               c_overage_swapped_count++;
+                                               c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
+                                       }
+                               }
+                       }
+               }
+               if (c_seg->c_state == C_ON_AGE_Q) {
+                       /*
+                        * this c_seg didn't get moved to the swapout queue
+                        * so we need to move it out of the way...
+                        * we just did a major compaction on it so put it
+                        * on that queue
+                        */ 
+                       c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE);
                } else {
-                       queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
-                       c_seg->c_on_swappedout_q = 1;
-                       c_swappedout_count++;
+                       c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used;
+                       c_seg_major_compact_stats.count_of_swapouts++;
                }
                C_SEG_WAKEUP_DONE(c_seg);
 
@@ -2097,37 +2455,19 @@ vm_compressor_compact_and_swap(boolean_t flush_all)
 }
 
 
-static uint32_t        no_paging_space_action_in_progress = 0;
-extern void memorystatus_send_low_swap_note(void);
-
-
 static c_segment_t
 c_seg_allocate(c_segment_t *current_chead)
 {
-        clock_sec_t    sec;
-        clock_nsec_t   nsec;
        c_segment_t     c_seg;
-       int             slotarray;
+       int             min_needed;
+       int             size_to_populate;
+
+       if (vm_compressor_low_on_space())
+               vm_compressor_take_paging_space_action();
 
        if ( (c_seg = *current_chead) == NULL ) {
                uint32_t        c_segno;
 
-               if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
-
-                       if (no_paging_space_action_in_progress == 0) {
-
-                               if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) {
-
-                                       if (no_paging_space_action()) {
-                                               memorystatus_send_low_swap_note();
-                                       }
-
-                                       no_paging_space_action_in_progress = 0;
-                               }
-                       }
-               }
-               KERNEL_DEBUG(0xe0400004 | DBG_FUNC_START, 0, 0, 0, 0, 0);
-
                lck_mtx_lock_spin_always(c_list_lock);
 
                while (c_segments_busy == TRUE) {
@@ -2140,54 +2480,57 @@ c_seg_allocate(c_segment_t *current_chead)
                        lck_mtx_lock_spin_always(c_list_lock);
                }
                if (c_free_segno_head == (uint32_t)-1) {
+                       uint32_t        c_segments_available_new;
 
                        if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) {
                                lck_mtx_unlock_always(c_list_lock);
 
-                               KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 1, 0);
                                return (NULL);
                        }
                        c_segments_busy = TRUE;
                        lck_mtx_unlock_always(c_list_lock);
 
-                       kernel_memory_populate(kernel_map, (vm_offset_t)c_segments_next_page, PAGE_SIZE, KMA_KOBJECT);
+                       kernel_memory_populate(kernel_map, (vm_offset_t)c_segments_next_page, 
+                                               PAGE_SIZE, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
                        c_segments_next_page += PAGE_SIZE;
 
-                       for (c_segno = c_segments_available + 1; c_segno < (c_segments_available + C_SEGMENTS_PER_PAGE); c_segno++)
+                       c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE;
+
+                       if (c_segments_available_new > c_segments_limit)
+                               c_segments_available_new = c_segments_limit;
+
+                       for (c_segno = c_segments_available + 1; c_segno < c_segments_available_new; c_segno++)
                                c_segments[c_segno - 1].c_segno = c_segno;
 
                        lck_mtx_lock_spin_always(c_list_lock);
 
                        c_segments[c_segno - 1].c_segno = c_free_segno_head;
                        c_free_segno_head = c_segments_available;
-                       c_segments_available += C_SEGMENTS_PER_PAGE;
+                       c_segments_available = c_segments_available_new;
 
                        c_segments_busy = FALSE;
                        thread_wakeup((event_t) (&c_segments_busy));
                }
                c_segno = c_free_segno_head;
+               assert(c_segno >= 0 && c_segno < c_segments_limit);
+
                c_free_segno_head = c_segments[c_segno].c_segno;
 
+               /*
+                * do the rest of the bookkeeping now while we're still behind
+                * the list lock and grab our generation id now into a local
+                * so that we can install it once we have the c_seg allocated
+                */
+               c_segment_count++;
+               if (c_segment_count > c_segment_count_max)
+                       c_segment_count_max = c_segment_count;
+
                lck_mtx_unlock_always(c_list_lock);
 
                c_seg = (c_segment_t)zalloc(compressor_segment_zone);
                bzero((char *)c_seg, sizeof(struct c_segment));
 
-               if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_seg->c_store.c_buffer), C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) {
-                       zfree(compressor_segment_zone, c_seg);
-
-                       lck_mtx_lock_spin_always(c_list_lock);
-
-                       c_segments[c_segno].c_segno = c_free_segno_head;
-                       c_free_segno_head = c_segno;
-
-                       lck_mtx_unlock_always(c_list_lock);
-
-                       KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 2, 0);
-
-                       return (NULL);
-               }
-               OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used);
+               c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno);
 
 #if __i386__ || __x86_64__
                lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
@@ -2195,57 +2538,58 @@ c_seg_allocate(c_segment_t *current_chead)
                lck_spin_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
 #endif /* __i386__ || __x86_64__ */
        
-               kernel_memory_populate(kernel_map, (vm_offset_t)(c_seg->c_store.c_buffer), 3 * PAGE_SIZE, KMA_COMPRESSOR);
-
-               c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(3 * PAGE_SIZE);
-               c_seg->c_firstemptyslot = C_SLOT_MAX;
+               c_seg->c_state = C_IS_EMPTY;
+               c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX;
                c_seg->c_mysegno = c_segno;
-               c_seg->c_filling = 1;
 
                lck_mtx_lock_spin_always(c_list_lock);
-
-               c_segment_count++;
+               c_empty_count++;
+               c_seg_switch_state(c_seg, C_IS_FILLING, FALSE);
                c_segments[c_segno].c_seg = c_seg;
-
-               c_seg->c_generation_id = c_generation_id++;
-               
-               queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_age_q = 1;
-               c_age_count++;
-
                lck_mtx_unlock_always(c_list_lock);
 
-               clock_get_system_nanotime(&sec, &nsec);
-               c_seg->c_creation_ts = (uint32_t)sec;
-
                *current_chead = c_seg;
-
-               KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, c_seg, 0, 0, 3, 0);
        }
-       slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg, c_seg->c_nextslot);
+       c_seg_alloc_nextslot(c_seg);
+
+       size_to_populate = C_SEG_ALLOCSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset);
+       
+       if (size_to_populate) {
+
+               min_needed = PAGE_SIZE + (C_SEG_ALLOCSIZE - C_SEG_BUFSIZE);
 
-       if (c_seg->c_slots[slotarray] == 0) {
-               KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0);
+               if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) {
 
-               c_seg->c_slots[slotarray] = (struct c_slot *)kalloc(sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE);
+                       if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
+                               size_to_populate = C_SEG_MAX_POPULATE_SIZE;
 
-               KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0);
+                       kernel_memory_populate(kernel_map,
+                                              (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
+                                              size_to_populate,
+                                              KMA_COMPRESSOR,
+                                              VM_KERN_MEMORY_COMPRESSOR);
+               } else
+                       size_to_populate = 0;
        }
-               
        PAGE_REPLACEMENT_DISALLOWED(TRUE);
 
        lck_mtx_lock_spin_always(&c_seg->c_lock);
 
+       if (size_to_populate)
+               c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
+
        return (c_seg);
 }
 
 
-
 static void
 c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
 {
        uint32_t        unused_bytes;
        uint32_t        offset_to_depopulate;
+       int             new_state = C_ON_AGE_Q;
+       clock_sec_t     sec;
+       clock_nsec_t    nsec;
 
        unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset));
 
@@ -2268,22 +2612,46 @@ c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
 
                c_seg->c_populated_offset = offset_to_depopulate;
        }
-       c_seg->c_filling = 0;
+       assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= C_SEG_BUFSIZE);
 
-       if (C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
-               c_seg_need_delayed_compaction(c_seg);
+#if CONFIG_FREEZE
+       if (current_chead == (c_segment_t*)&freezer_chead && DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED &&
+           c_freezer_swapout_count < VM_MAX_FREEZER_CSEG_SWAP_COUNT) {
+               new_state = C_ON_SWAPOUT_Q;
+       }
+#endif /* CONFIG_FREEZE */
 
-       lck_mtx_unlock_always(&c_seg->c_lock);
+       clock_get_system_nanotime(&sec, &nsec);
+       c_seg->c_creation_ts = (uint32_t)sec;
+
+       lck_mtx_lock_spin_always(c_list_lock);
+
+#if CONFIG_FREEZE
+       if (c_seg->c_state == C_ON_SWAPOUT_Q)
+               c_freezer_swapout_count++;
+#endif /* CONFIG_FREEZE */
+
+       c_seg->c_generation_id = c_generation_id++;
+       c_seg_switch_state(c_seg, new_state, FALSE);
+
+       lck_mtx_unlock_always(c_list_lock);
+
+#if CONFIG_FREEZE
+       if (c_seg->c_state == C_ON_SWAPOUT_Q)
+               thread_wakeup((event_t)&c_swapout_list_head);
+#endif /* CONFIG_FREEZE */
+
+       if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
+               c_seg_need_delayed_compaction(c_seg);
 
        *current_chead = NULL;
 }
 
-
 /*
  * returns with c_seg locked
  */
 void
-c_seg_swapin_requeue(c_segment_t c_seg)
+c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data)
 {
         clock_sec_t    sec;
         clock_nsec_t   nsec;
@@ -2293,32 +2661,21 @@ c_seg_swapin_requeue(c_segment_t c_seg)
        lck_mtx_lock_spin_always(c_list_lock);
        lck_mtx_lock_spin_always(&c_seg->c_lock);
 
-       if (c_seg->c_on_swappedout_q) {
-               queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedout_q = 0;
-               c_swappedout_count--;
+       c_seg->c_busy_swapping = 0;
+
+       if (c_seg->c_overage_swap == TRUE) {
+               c_overage_swapped_count--;
+               c_seg->c_overage_swap = FALSE;
+       }       
+       if (has_data == TRUE) {
+               c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
        } else {
-               assert(c_seg->c_on_swappedout_sparse_q);
+               c_seg->c_store.c_buffer = (int32_t*) NULL;
+               c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
 
-               queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedout_sparse_q = 0;
-               c_swappedout_sparse_count--;
-       }
-       if (c_seg->c_store.c_buffer) {
-               queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swappedin_q = 1;
-               c_swappedin_count++;
+               c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE);
        }
-#if TRACK_BAD_C_SEGMENTS
-       else {
-               queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_bad_q = 1;
-               c_bad_count++;
-       }
-#endif
        c_seg->c_swappedin_ts = (uint32_t)sec;
-       c_seg->c_ondisk = 0;
-       c_seg->c_was_swapped_in = 1;
 
        lck_mtx_unlock_always(c_list_lock);
 }
@@ -2337,63 +2694,142 @@ c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction)
        uint32_t        io_size = 0;
        uint64_t        f_offset;
 
+       assert(C_SEG_IS_ONDISK(c_seg));
+       
 #if !CHECKSUM_THE_SWAP
-       if (c_seg->c_ondisk)
-               c_seg_trim_tail(c_seg);
+       c_seg_trim_tail(c_seg);
 #endif
        io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
        f_offset = c_seg->c_store.c_swap_handle;
 
        C_SEG_BUSY(c_seg);
+       c_seg->c_busy_swapping = 1;
        lck_mtx_unlock_always(&c_seg->c_lock);
-       
-       if (c_seg->c_ondisk) {
 
-               PAGE_REPLACEMENT_DISALLOWED(FALSE);
+       PAGE_REPLACEMENT_DISALLOWED(FALSE);
 
-               if (kernel_memory_allocate(kernel_map, &addr, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS)
-                       panic("c_seg_swapin: kernel_memory_allocate failed\n");
+       addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
 
-               kernel_memory_populate(kernel_map, addr, io_size, KMA_COMPRESSOR);
+       kernel_memory_populate(kernel_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
 
-               if (vm_swap_get(addr, f_offset, io_size) != KERN_SUCCESS) {
-                       PAGE_REPLACEMENT_DISALLOWED(TRUE);
+       if (vm_swap_get(addr, f_offset, io_size) != KERN_SUCCESS) {
+               PAGE_REPLACEMENT_DISALLOWED(TRUE);
 
-                       kernel_memory_depopulate(kernel_map, addr, io_size, KMA_COMPRESSOR);
-                       kmem_free(kernel_map, addr, C_SEG_ALLOCSIZE);
+               kernel_memory_depopulate(kernel_map, addr, io_size, KMA_COMPRESSOR);
 
-                       c_seg->c_store.c_buffer = (int32_t*) NULL;
-                       c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
-               } else {
-                       c_seg->c_store.c_buffer = (int32_t*) addr;
+               c_seg_swapin_requeue(c_seg, FALSE);
+       } else {
+               c_seg->c_store.c_buffer = (int32_t*) addr;
 #if ENCRYPTED_SWAP
-                       vm_swap_decrypt(c_seg);
+               vm_swap_decrypt(c_seg);
 #endif /* ENCRYPTED_SWAP */
 
 #if CHECKSUM_THE_SWAP
-                       if (c_seg->cseg_swap_size != io_size)
-                               panic("swapin size doesn't match swapout size");
+               if (c_seg->cseg_swap_size != io_size)
+                       panic("swapin size doesn't match swapout size");
 
-                       if (c_seg->cseg_hash != hash_string((char*) c_seg->c_store.c_buffer, (int)io_size)) {
-                               panic("c_seg_swapin - Swap hash mismatch\n");
-                       }
+               if (c_seg->cseg_hash != hash_string((char*) c_seg->c_store.c_buffer, (int)io_size)) {
+                       panic("c_seg_swapin - Swap hash mismatch\n");
+               }
 #endif /* CHECKSUM_THE_SWAP */
 
-                       PAGE_REPLACEMENT_DISALLOWED(TRUE);
+               PAGE_REPLACEMENT_DISALLOWED(TRUE);
 
-                       if (force_minor_compaction == TRUE) {
-                               lck_mtx_lock_spin_always(&c_seg->c_lock);
+               if (force_minor_compaction == TRUE) {
+                       lck_mtx_lock_spin_always(&c_seg->c_lock);
                        
-                               c_seg_minor_compaction_and_unlock(c_seg, FALSE);
+                       c_seg_minor_compaction_and_unlock(c_seg, FALSE);
+               }
+               OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
+
+               c_seg_swapin_requeue(c_seg, TRUE);
+       }
+       C_SEG_WAKEUP_DONE(c_seg);
+}
+
+
+static void
+c_segment_sv_hash_drop_ref(int hash_indx)
+{
+       struct c_sv_hash_entry o_sv_he, n_sv_he;
+
+       while (1) {
+
+               o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record;
+
+               n_sv_he.he_ref = o_sv_he.he_ref - 1;
+               n_sv_he.he_data = o_sv_he.he_data;
+
+               if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) {
+                       if (n_sv_he.he_ref == 0)
+                               OSAddAtomic(-1, &c_segment_svp_in_hash);
+                       break;
+               }
+       }
+}
+
+
+static int
+c_segment_sv_hash_insert(uint32_t data)
+{
+       int             hash_sindx;
+       int             misses;
+       struct c_sv_hash_entry o_sv_he, n_sv_he;
+       boolean_t       got_ref = FALSE;
+
+       if (data == 0)
+               OSAddAtomic(1, &c_segment_svp_zero_compressions);
+       else
+               OSAddAtomic(1, &c_segment_svp_nonzero_compressions);
+
+       hash_sindx = data & C_SV_HASH_MASK;
+       
+       for (misses = 0; misses < C_SV_HASH_MAX_MISS; misses++)
+       {
+               o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
+
+               while (o_sv_he.he_data == data || o_sv_he.he_ref == 0) {
+                       n_sv_he.he_ref = o_sv_he.he_ref + 1;
+                       n_sv_he.he_data = data;
+
+                       if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) {
+                               if (n_sv_he.he_ref == 1)
+                                       OSAddAtomic(1, &c_segment_svp_in_hash);
+                               got_ref = TRUE;
+                               break;
                        }
-                       OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
-                       OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used);
+                       o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
                }
+               if (got_ref == TRUE)
+                       break;
+               hash_sindx++;
+
+               if (hash_sindx == C_SV_HASH_SIZE)
+                       hash_sindx = 0;
        }
-       c_seg_swapin_requeue(c_seg);
+       if (got_ref == FALSE)
+               return(-1);
 
-       C_SEG_WAKEUP_DONE(c_seg);
+       return (hash_sindx);
+}
+
+
+#if RECORD_THE_COMPRESSED_DATA
+
+static void
+c_compressed_record_data(char *src, int c_size)
+{
+       if ((c_compressed_record_cptr + c_size + 4) >= c_compressed_record_ebuf)
+               panic("c_compressed_record_cptr >= c_compressed_record_ebuf");
+
+       *(int *)((void *)c_compressed_record_cptr) = c_size;
+
+       c_compressed_record_cptr += 4;
+
+       memcpy(c_compressed_record_cptr, src, c_size);
+       c_compressed_record_cptr += c_size;
 }
+#endif
 
 
 static int
@@ -2411,8 +2847,12 @@ retry:
                return (1);
        /*
         * returns with c_seg lock held
-        * and PAGE_REPLACEMENT_DISALLOWED(TRUE)
+        * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
+        * c_nextslot has been allocated and
+        * c_store.c_buffer populated
         */
+       assert(c_seg->c_state == C_IS_FILLING);
+
        cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot);
 
        cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr);
@@ -2425,28 +2865,11 @@ retry:
        if (max_csize > PAGE_SIZE)
                max_csize = PAGE_SIZE;
 
-       if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset -
-                                 c_seg->c_nextoffset)
-           < (unsigned) max_csize + PAGE_SIZE &&
-           (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)
-            < C_SEG_ALLOCSIZE)) {
-               lck_mtx_unlock_always(&c_seg->c_lock);
-
-               kernel_memory_populate(kernel_map,
-                                      (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
-                                      PAGE_SIZE,
-                                      KMA_COMPRESSOR);
-
-               lck_mtx_lock_spin_always(&c_seg->c_lock);
-
-               c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE);
-       }
-
 #if CHECKSUM_THE_DATA
        cs->c_hash_data = hash_string(src, PAGE_SIZE);
 #endif
 
-       c_size = WKdm_compress_new((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
+       c_size = WKdm_compress_new((const WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
                                  (WK_word *)(uintptr_t)scratch_buf, max_csize - 4);
        assert(c_size <= (max_csize - 4) && c_size >= -1);
 
@@ -2454,15 +2877,48 @@ retry:
 
                if (max_csize < PAGE_SIZE) {
                        c_current_seg_filled(c_seg, current_chead);
+                       assert(*current_chead == NULL);
 
-                       PAGE_REPLACEMENT_DISALLOWED(FALSE);
+                       lck_mtx_unlock_always(&c_seg->c_lock);
 
+                       PAGE_REPLACEMENT_DISALLOWED(FALSE);
                        goto retry;
                }
                c_size = PAGE_SIZE;
 
                memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
+
+               OSAddAtomic(1, &c_segment_noncompressible_pages);
+
+       } else if (c_size == 0) {
+               int             hash_index;
+
+               /*
+                * special case - this is a page completely full of a single 32 bit value
+                */
+               hash_index = c_segment_sv_hash_insert(*(uint32_t *)(uintptr_t)src);
+
+               if (hash_index != -1) {
+                       slot_ptr->s_cindx = hash_index;
+                       slot_ptr->s_cseg = C_SV_CSEG_ID;
+
+                       OSAddAtomic(1, &c_segment_svp_hash_succeeded);
+#if RECORD_THE_COMPRESSED_DATA
+                       c_compressed_record_data(src, 4);
+#endif
+                       goto sv_compression;
+               }
+               c_size = 4;
+               
+               memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
+
+               OSAddAtomic(1, &c_segment_svp_hash_failed);
        }
+
+#if RECORD_THE_COMPRESSED_DATA
+       c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
+#endif
+
 #if CHECKSUM_THE_COMPRESSED_DATA
        cs->c_hash_compressed_data = hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
 #endif
@@ -2476,16 +2932,26 @@ retry:
        /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
        slot_ptr->s_cseg = c_seg->c_mysegno + 1; 
 
-       if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX)
+sv_compression:
+       if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX_INDEX) {
                c_current_seg_filled(c_seg, current_chead);
-       else
-               lck_mtx_unlock_always(&c_seg->c_lock);
+               assert(*current_chead == NULL);
+       }
+       lck_mtx_unlock_always(&c_seg->c_lock);
 
        PAGE_REPLACEMENT_DISALLOWED(FALSE);
 
-       OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
+#if RECORD_THE_COMPRESSED_DATA
+       if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= C_SEG_ALLOCSIZE) {
+               c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf));
+               c_compressed_record_cptr = c_compressed_record_sbuf;
+       }
+#endif
+       if (c_size) {
+               OSAddAtomic64(c_size, &c_segment_compressed_bytes);
+               OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
+       }
        OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes);
-       OSAddAtomic64(c_size, &c_segment_compressed_bytes);
 
        OSAddAtomic(1, &c_segment_pages_compressed);
        OSAddAtomic(1, &sample_period_compression_count);
@@ -2505,13 +2971,33 @@ c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int
        int             c_rounded_size;
        uint32_t        c_size;
        int             retval = 0;
-       boolean_t       c_seg_has_data = TRUE;
-       boolean_t       c_seg_swappedin = FALSE;
        boolean_t       need_unlock = TRUE;
        boolean_t       consider_defragmenting = FALSE;
+       boolean_t       kdp_mode = FALSE;
+
+       if (flags & C_KDP) {
+               if (not_in_kdp) {
+                       panic("C_KDP passed to decompress page from outside of debugger context");
+               }
+
+               assert((flags & C_KEEP) ==  C_KEEP);
+               assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK);
+
+               if ((flags & (C_DONT_BLOCK | C_KEEP)) != (C_DONT_BLOCK | C_KEEP)) {
+                       return (-2);
+               }
+
+               kdp_mode = TRUE;
+       }
 
 ReTry:
-       PAGE_REPLACEMENT_DISALLOWED(TRUE);
+       if (!kdp_mode) {
+               PAGE_REPLACEMENT_DISALLOWED(TRUE);
+       } else {
+               if (kdp_lck_rw_lock_is_acquired_exclusive(&c_master_lock)) {
+                       return (-2);
+               }
+       }
 
 #if HIBERNATION
        /*
@@ -2526,7 +3012,9 @@ ReTry:
        if (dst && decompressions_blocked == TRUE) {
                if (flags & C_DONT_BLOCK) {
 
-                       PAGE_REPLACEMENT_DISALLOWED(FALSE);
+                       if (!kdp_mode) {
+                               PAGE_REPLACEMENT_DISALLOWED(FALSE);
+                       }
 
                        *zeroslot = 0;
                        return (-2);
@@ -2549,14 +3037,21 @@ ReTry:
        /* s_cseg is actually "segno+1" */
        c_seg = c_segments[slot_ptr->s_cseg - 1].c_seg;
 
-       lck_mtx_lock_spin_always(&c_seg->c_lock);
+       if (!kdp_mode) {
+               lck_mtx_lock_spin_always(&c_seg->c_lock);
+       } else {
+               if (kdp_lck_mtx_lock_spin_is_acquired(&c_seg->c_lock)) {
+                       return (-2);
+               }
+       }
 
-       if (flags & C_DONT_BLOCK) {
-               if (c_seg->c_busy || (c_seg->c_ondisk && dst)) {
+       assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
 
-                       retval = -2;
+       if (flags & C_DONT_BLOCK) {
+               if (c_seg->c_busy || (C_SEG_IS_ONDISK(c_seg) && dst)) {
                        *zeroslot = 0;
 
+                       retval = -2;
                        goto done;
                }
        }
@@ -2581,13 +3076,16 @@ ReTry:
                clock_sec_t     cur_ts_sec;
                clock_nsec_t    cur_ts_nsec;
 
-               if (c_seg->c_on_swappedout_q || c_seg->c_on_swappedout_sparse_q) {
-                       if (c_seg->c_ondisk)
-                               c_seg_swappedin = TRUE;
+               if (C_SEG_IS_ONDISK(c_seg)) {
+                       assert(kdp_mode == FALSE);
                        c_seg_swapin(c_seg, FALSE);
+
+                       retval = 1;
                }               
-               if (c_seg->c_store.c_buffer == NULL) {
-                       c_seg_has_data = FALSE;
+               if (c_seg->c_state == C_ON_BAD_Q) {
+                       assert(c_seg->c_store.c_buffer == NULL);
+
+                       retval = -1;
                        goto c_seg_invalid_data;
                }
 #if CHECKSUM_THE_COMPRESSED_DATA
@@ -2599,20 +3097,46 @@ ReTry:
                         * page wasn't compressible... just copy it out
                         */
                        memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE);
+               } else if (c_size == 4) {
+                       int32_t         data;
+                       int32_t         *dptr;
+
+                       /*
+                        * page was populated with a single value
+                        * that didn't fit into our fast hash
+                        * so we packed it in as a single non-compressed value
+                        * that we need to populate the page with
+                        */
+                       dptr = (int32_t *)(uintptr_t)dst;
+                       data = *(int32_t *)(&c_seg->c_store.c_buffer[cs->c_offset]);
+#if __x86_64__
+                       memset_word(dptr, data, PAGE_SIZE / sizeof(int32_t));
+#else
+                       {
+                       int             i;
+
+                       for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
+                               *dptr++ = data;
+                       }
+#endif
                } else {
                        uint32_t        my_cpu_no;
                        char            *scratch_buf;
 
-                       /*
-                        * we're behind the c_seg lock held in spin mode
-                        * which means pre-emption is disabled... therefore
-                        * the following sequence is atomic and safe
-                        */
-                       my_cpu_no = cpu_number();
+                       if (!kdp_mode) {
+                               /*
+                                * we're behind the c_seg lock held in spin mode
+                                * which means pre-emption is disabled... therefore
+                                * the following sequence is atomic and safe
+                                */
+                               my_cpu_no = cpu_number();
 
-                       assert(my_cpu_no < compressor_cpus);
+                               assert(my_cpu_no < compressor_cpus);
 
-                       scratch_buf = &compressor_scratch_bufs[my_cpu_no * WKdm_SCRATCH_BUF_SIZE];
+                               scratch_buf = &compressor_scratch_bufs[my_cpu_no * WKdm_SCRATCH_BUF_SIZE];
+                       } else {
+                               scratch_buf = kdp_compressor_scratch_buf;
+                       }
                        WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
                                            (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
                }
@@ -2621,7 +3145,7 @@ ReTry:
                if (cs->c_hash_data != hash_string(dst, PAGE_SIZE))
                        panic("decompressed data doesn't match original");
 #endif
-               if (!c_seg->c_was_swapped_in) {
+               if (c_seg->c_swappedin_ts == 0 && !kdp_mode) {
 
                        clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
 
@@ -2634,24 +3158,15 @@ ReTry:
 
                        OSAddAtomic(1, &sample_period_decompression_count);
                }
-       } else {
-               if (c_seg->c_store.c_buffer == NULL)
-                       c_seg_has_data = FALSE;
        }
 c_seg_invalid_data:
 
-       if (c_seg_has_data == TRUE) {
-               if (c_seg_swappedin == TRUE)
-                       retval = 1;
-               else
-                       retval = 0;
-       } else
-               retval = -1;
-
        if (flags & C_KEEP) {
                *zeroslot = 0;
                goto done;
        }
+
+       assert(kdp_mode == FALSE);
        c_seg->c_bytes_unused += c_rounded_size;
        c_seg->c_bytes_used -= c_rounded_size;
        PACK_C_SIZE(cs, 0);
@@ -2661,22 +3176,24 @@ c_seg_invalid_data:
 
        OSAddAtomic(-1, &c_segment_pages_compressed);
 
-       if (c_seg_has_data == TRUE && !c_seg->c_ondisk) {
+       if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) {
                /*
-                * c_ondisk == TRUE can occur when we're doing a
+                * C_SEG_IS_ONDISK == TRUE can occur when we're doing a
                 * free of a compressed page (i.e. dst == NULL)
                 */
                OSAddAtomic64(-c_rounded_size, &compressor_bytes_used);
        }
-       if (!c_seg->c_filling) {
+       if (c_seg->c_state != C_IS_FILLING) {
                if (c_seg->c_bytes_used == 0) {
-                       if (!c_seg->c_ondisk) {
+                       if ( !(C_SEG_IS_ONDISK(c_seg))) {
                                int     pages_populated;
 
                                pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
                                c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
 
                                if (pages_populated) {
+
+                                       assert(c_seg->c_state != C_ON_BAD_Q);
                                        assert(c_seg->c_store.c_buffer != NULL);
 
                                        C_SEG_BUSY(c_seg);
@@ -2687,29 +3204,35 @@ c_seg_invalid_data:
                                        lck_mtx_lock_spin_always(&c_seg->c_lock);
                                        C_SEG_WAKEUP_DONE(c_seg);
                                }
-                               if (!c_seg->c_on_minorcompact_q && !c_seg->c_on_swapout_q)
+                               if (!c_seg->c_on_minorcompact_q)
                                        c_seg_need_delayed_compaction(c_seg);
                        } else
-                               assert(c_seg->c_on_swappedout_sparse_q);
+                               assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
 
                } else if (c_seg->c_on_minorcompact_q) {
 
-                       if (C_SEG_INCORE_IS_SPARSE(c_seg)) {
+                       assert(c_seg->c_state != C_ON_BAD_Q);
+
+                       if (C_SEG_SHOULD_MINORCOMPACT(c_seg)) {
                                c_seg_try_minor_compaction_and_unlock(c_seg);
                                need_unlock = FALSE;
                        }
-               } else if (!c_seg->c_ondisk) {
+               } else if ( !(C_SEG_IS_ONDISK(c_seg))) {
 
-                       if (c_seg_has_data == TRUE && !c_seg->c_on_swapout_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
+                       if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
                                c_seg_need_delayed_compaction(c_seg);
                        }
-               } else if (!c_seg->c_on_swappedout_sparse_q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
+               } else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
 
                        c_seg_move_to_sparse_list(c_seg);
                        consider_defragmenting = TRUE;
                }
        }
 done:
+       if (kdp_mode) {
+               return retval;
+       }
+
        if (need_unlock == TRUE)
                lck_mtx_unlock_always(&c_seg->c_lock);
 
@@ -2726,6 +3249,7 @@ done:
 int
 vm_compressor_get(ppnum_t pn, int *slot, int flags)
 {
+       c_slot_mapping_t  slot_ptr;
        char    *dst;
        int     zeroslot = 1;
        int     retval;
@@ -2735,12 +3259,49 @@ vm_compressor_get(ppnum_t pn, int *slot, int flags)
 #else
 #error "unsupported architecture"
 #endif
+       slot_ptr = (c_slot_mapping_t)slot;
+
+       if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
+               int32_t         data;
+               int32_t         *dptr;
+
+               /*
+                * page was populated with a single value
+                * that found a home in our hash table
+                * grab that value from the hash and populate the page
+                * that we need to populate the page with
+                */
+               dptr = (int32_t *)(uintptr_t)dst;
+               data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data;
+#if __x86_64__
+               memset_word(dptr, data, PAGE_SIZE / sizeof(int32_t));
+#else
+               {
+               int             i;
+
+               for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
+                       *dptr++ = data;
+               }
+#endif
+               c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
 
-       retval = c_decompress_page(dst, (c_slot_mapping_t)slot, flags, &zeroslot);
+               if ( !(flags & C_KEEP)) {
+                       OSAddAtomic(-1, &c_segment_pages_compressed);
+                       *slot = 0;
+               }
+               if (data)
+                       OSAddAtomic(1, &c_segment_svp_nonzero_decompressions);
+               else
+                       OSAddAtomic(1, &c_segment_svp_zero_decompressions);
+
+               return (0);
+       }
+
+       retval = c_decompress_page(dst, slot_ptr, flags, &zeroslot);
 
        /*
         * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
-        * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set
+        * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE
         */
        if (zeroslot) {
                *slot = 0;
@@ -2749,7 +3310,7 @@ vm_compressor_get(ppnum_t pn, int *slot, int flags)
         * returns 0 if we successfully decompressed a page from a segment already in memory
         * returns 1 if we had to first swap in the segment, before successfully decompressing the page
         * returns -1 if we encountered an error swapping in the segment - decompression failed
-        * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set
+        * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true
         */
        return (retval);
 }
@@ -2758,12 +3319,23 @@ vm_compressor_get(ppnum_t pn, int *slot, int flags)
 int
 vm_compressor_free(int *slot, int flags)
 {
+       c_slot_mapping_t  slot_ptr;
        int     zeroslot = 1;
        int     retval;
 
        assert(flags == 0 || flags == C_DONT_BLOCK);
 
-       retval = c_decompress_page(NULL, (c_slot_mapping_t)slot, flags, &zeroslot);
+       slot_ptr = (c_slot_mapping_t)slot;
+
+       if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
+
+               c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
+               OSAddAtomic(-1, &c_segment_pages_compressed);
+
+               *slot = 0;
+               return (0);
+       }
+       retval = c_decompress_page(NULL, slot_ptr, flags, &zeroslot);
        /*
         * returns 0 if we successfully freed the specified compressed page
         * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set
@@ -2771,6 +3343,8 @@ vm_compressor_free(int *slot, int flags)
 
        if (retval == 0)
                *slot = 0;
+       else
+               assert(retval == -2);
 
        return (retval);
 }
@@ -2802,9 +3376,14 @@ vm_compressor_transfer(
        int                     c_indx;
        c_slot_t                cs;
 
-       dst_slot = (c_slot_mapping_t) dst_slot_p;
        src_slot = (c_slot_mapping_t) src_slot_p;
 
+       if (src_slot->s_cseg == C_SV_CSEG_ID) {
+               *dst_slot_p = *src_slot_p;
+               *src_slot_p = 0;
+               return;
+       }
+       dst_slot = (c_slot_mapping_t) dst_slot_p;
 Retry:
        PAGE_REPLACEMENT_DISALLOWED(TRUE);
        /* get segment for src_slot */
@@ -2812,7 +3391,7 @@ Retry:
        /* lock segment */
        lck_mtx_lock_spin_always(&c_seg->c_lock);
        /* wait if it's busy */
-       if (c_seg->c_busy) {
+       if (c_seg->c_busy && !c_seg->c_busy_swapping) {
                PAGE_REPLACEMENT_DISALLOWED(FALSE);
                c_seg_wait_on_busy(c_seg);
                goto Retry;
@@ -2828,3 +3407,273 @@ Retry:
        lck_mtx_unlock_always(&c_seg->c_lock);
        PAGE_REPLACEMENT_DISALLOWED(FALSE);
 }
+
+#if CONFIG_FREEZE
+
+int    freezer_finished_filling = 0;
+
+void
+vm_compressor_finished_filling(
+       void    **current_chead)
+{
+       c_segment_t     c_seg;
+
+       if ((c_seg = *(c_segment_t *)current_chead) == NULL)
+               return;
+
+       assert(c_seg->c_state == C_IS_FILLING);
+       
+       lck_mtx_lock_spin_always(&c_seg->c_lock);
+
+       c_current_seg_filled(c_seg, (c_segment_t *)current_chead);
+
+       lck_mtx_unlock_always(&c_seg->c_lock);
+
+       freezer_finished_filling++;
+}
+
+
+/*
+ * This routine is used to transfer the compressed chunks from
+ * the c_seg/cindx pointed to by slot_p into a new c_seg headed
+ * by the current_chead and a new cindx within that c_seg.
+ *
+ * Currently, this routine is only used by the "freezer backed by
+ * compressor with swap" mode to create a series of c_segs that
+ * only contain compressed data belonging to one task. So, we 
+ * move a task's previously compressed data into a set of new
+ * c_segs which will also hold the task's yet to be compressed data.
+ */
+
+kern_return_t
+vm_compressor_relocate(
+       void            **current_chead,
+       int             *slot_p)
+{
+       c_slot_mapping_t        slot_ptr;
+       c_slot_mapping_t        src_slot;
+       uint32_t                c_rounded_size;
+       uint32_t                c_size;
+       uint16_t                dst_slot;
+       c_slot_t                c_dst;
+       c_slot_t                c_src;
+       int                     c_indx;
+       c_segment_t             c_seg_dst = NULL;
+       c_segment_t             c_seg_src = NULL;
+       kern_return_t           kr = KERN_SUCCESS;
+
+
+       src_slot = (c_slot_mapping_t) slot_p;
+
+       if (src_slot->s_cseg == C_SV_CSEG_ID) {
+               /*
+                * no need to relocate... this is a page full of a single
+                * value which is hashed to a single entry not contained
+                * in a c_segment_t
+                */
+               return (kr);
+       }
+
+Relookup_dst:
+       c_seg_dst = c_seg_allocate((c_segment_t *)current_chead);
+       /*
+        * returns with c_seg lock held
+        * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
+        * c_nextslot has been allocated and
+        * c_store.c_buffer populated
+        */
+       if (c_seg_dst == NULL) {
+               /*
+                * Out of compression segments?
+                */
+               kr = KERN_RESOURCE_SHORTAGE;
+               goto out;
+       }
+
+       assert(c_seg_dst->c_busy == 0);
+
+       C_SEG_BUSY(c_seg_dst);
+
+       dst_slot = c_seg_dst->c_nextslot;
+       
+       lck_mtx_unlock_always(&c_seg_dst->c_lock);
+
+Relookup_src:
+       c_seg_src = c_segments[src_slot->s_cseg - 1].c_seg;
+
+       assert(c_seg_dst != c_seg_src);
+
+       lck_mtx_lock_spin_always(&c_seg_src->c_lock);
+
+       if (C_SEG_IS_ONDISK(c_seg_src)) {
+       
+               /*
+                * A "thaw" can mark a process as eligible for
+                * another freeze cycle without bringing any of
+                * its swapped out c_segs back from disk (because
+                * that is done on-demand).
+                *
+                * If the src c_seg we find for our pre-compressed
+                * data is already on-disk, then we are dealing
+                * with an app's data that is already packed and
+                * swapped out. Don't do anything.
+                */
+               
+               PAGE_REPLACEMENT_DISALLOWED(FALSE);
+
+               lck_mtx_unlock_always(&c_seg_src->c_lock);
+
+               c_seg_src = NULL;
+
+               goto out;
+       }
+
+       if (c_seg_src->c_busy) {
+
+               PAGE_REPLACEMENT_DISALLOWED(FALSE);
+               c_seg_wait_on_busy(c_seg_src);
+                       
+               c_seg_src = NULL;
+
+               PAGE_REPLACEMENT_DISALLOWED(TRUE);
+
+               goto Relookup_src;
+       }
+
+       C_SEG_BUSY(c_seg_src);
+
+       lck_mtx_unlock_always(&c_seg_src->c_lock);
+       
+       PAGE_REPLACEMENT_DISALLOWED(FALSE);
+
+       /* find the c_slot */
+       c_indx = src_slot->s_cindx;
+
+       c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx);
+
+       c_size = UNPACK_C_SIZE(c_src);
+
+       assert(c_size);
+
+       if (c_size > (uint32_t)(C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) {
+               /*
+                * This segment is full. We need a new one.
+                */
+
+               PAGE_REPLACEMENT_DISALLOWED(TRUE);
+       
+               lck_mtx_lock_spin_always(&c_seg_src->c_lock);
+               C_SEG_WAKEUP_DONE(c_seg_src);
+               lck_mtx_unlock_always(&c_seg_src->c_lock);
+
+               c_seg_src = NULL;
+
+               lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
+
+               assert(c_seg_dst->c_busy);
+               assert(c_seg_dst->c_state == C_IS_FILLING);
+               assert(!c_seg_dst->c_on_minorcompact_q);
+
+               c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
+               assert(*current_chead == NULL);
+       
+               C_SEG_WAKEUP_DONE(c_seg_dst);
+       
+               lck_mtx_unlock_always(&c_seg_dst->c_lock);
+
+               c_seg_dst = NULL;
+
+               PAGE_REPLACEMENT_DISALLOWED(FALSE);
+
+               goto Relookup_dst;
+       }
+
+       c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
+
+       memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
+
+       c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
+
+#if CHECKSUM_THE_DATA
+       c_dst->c_hash_data = c_src->c_hash_data;
+#endif
+#if CHECKSUM_THE_COMPRESSED_DATA
+       c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data;
+#endif
+
+       c_dst->c_size = c_src->c_size;
+       c_dst->c_packed_ptr = c_src->c_packed_ptr;
+       c_dst->c_offset = c_seg_dst->c_nextoffset;
+
+       if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
+               c_seg_dst->c_firstemptyslot++;
+
+       c_seg_dst->c_nextslot++;
+       c_seg_dst->c_bytes_used += c_rounded_size;
+       c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
+               
+
+       PACK_C_SIZE(c_src, 0);
+
+       c_seg_src->c_bytes_used -= c_rounded_size;
+       c_seg_src->c_bytes_unused += c_rounded_size;
+       
+       if (c_indx < c_seg_src->c_firstemptyslot) {
+               c_seg_src->c_firstemptyslot = c_indx;
+       }
+
+       c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
+               
+       PAGE_REPLACEMENT_ALLOWED(TRUE);
+       slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
+       /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
+       slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
+       slot_ptr->s_cindx = dst_slot;
+
+       PAGE_REPLACEMENT_ALLOWED(FALSE);
+
+out:
+       if (c_seg_src) {
+
+               lck_mtx_lock_spin_always(&c_seg_src->c_lock);
+
+               C_SEG_WAKEUP_DONE(c_seg_src);
+
+               if (c_seg_src->c_bytes_used == 0 && c_seg_src->c_state != C_IS_FILLING) {
+                       if (!c_seg_src->c_on_minorcompact_q)
+                               c_seg_need_delayed_compaction(c_seg_src);
+               }
+
+               lck_mtx_unlock_always(&c_seg_src->c_lock);
+       }
+       
+       if (c_seg_dst) {
+
+               PAGE_REPLACEMENT_DISALLOWED(TRUE);
+
+               lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
+
+               if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
+                       /*
+                        * Nearing or exceeded maximum slot and offset capacity.
+                        */
+                       assert(c_seg_dst->c_busy);
+                       assert(c_seg_dst->c_state == C_IS_FILLING);
+                       assert(!c_seg_dst->c_on_minorcompact_q);
+
+                       c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
+                       assert(*current_chead == NULL);
+               }  
+               
+               C_SEG_WAKEUP_DONE(c_seg_dst);
+
+               lck_mtx_unlock_always(&c_seg_dst->c_lock);
+
+               c_seg_dst = NULL;
+
+               PAGE_REPLACEMENT_DISALLOWED(FALSE);
+       }
+
+       return kr;
+}
+#endif /* CONFIG_FREEZE */
index 9ac8d64b11ee8e8bab8c13968d895edfe709948e..45b1108926bf1dbbdb549b558dffb0f00ae7c4fc 100644 (file)
 
 #define C_SEG_OFFSET_BITS      16
 #define C_SEG_BUFSIZE          (1024 * 256)
-#define C_SEG_ALLOCSIZE                (C_SEG_BUFSIZE + PAGE_SIZE)
-#define C_SEG_OFF_LIMIT                (C_SEG_BYTES_TO_OFFSET((C_SEG_BUFSIZE - 512)))
+#define        C_SEG_MAX_PAGES         (C_SEG_BUFSIZE / PAGE_SIZE)
 
-#define C_SEG_SLOT_ARRAYS      6
-#define C_SEG_SLOT_ARRAY_SIZE  64              /* must be a power of 2 */
-#define C_SEG_SLOT_ARRAY_MASK  (C_SEG_SLOT_ARRAY_SIZE - 1)
-#define C_SLOT_MAX             (C_SEG_SLOT_ARRAYS * C_SEG_SLOT_ARRAY_SIZE)
+#define C_SEG_OFF_LIMIT                (C_SEG_BYTES_TO_OFFSET((C_SEG_BUFSIZE - 128)))
+#define C_SEG_ALLOCSIZE                (C_SEG_BUFSIZE)
+#define C_SEG_MAX_POPULATE_SIZE        (4 * PAGE_SIZE)
 
 
 #define CHECKSUM_THE_SWAP              0       /* Debug swap data */
 #define CHECKSUM_THE_DATA              0       /* Debug compressor/decompressor data */
 #define CHECKSUM_THE_COMPRESSED_DATA   0       /* Debug compressor/decompressor compressed data */
 #define VALIDATE_C_SEGMENTS            0       /* Debug compaction */
-#define TRACK_BAD_C_SEGMENTS           0       /* Debug I/O error handling */
+
+#define RECORD_THE_COMPRESSED_DATA     0
+
+
+
+struct c_slot {
+       uint64_t        c_offset:C_SEG_OFFSET_BITS,
+                       c_size:12,
+                       c_packed_ptr:36;
+#if CHECKSUM_THE_DATA
+       unsigned int    c_hash_data;
+#endif
+#if CHECKSUM_THE_COMPRESSED_DATA
+       unsigned int    c_hash_compressed_data;
+#endif
+
+};
+
+#define        C_IS_EMPTY              0
+#define        C_IS_FREE               1
+#define        C_IS_FILLING            2
+#define C_ON_AGE_Q             3
+#define C_ON_SWAPOUT_Q         4
+#define C_ON_SWAPPEDOUT_Q      5
+#define        C_ON_SWAPPEDOUTSPARSE_Q 6
+#define        C_ON_SWAPPEDIN_Q        7
+#define        C_ON_MAJORCOMPACT_Q     8
+#define        C_ON_BAD_Q              9
+
 
 struct c_segment {
 #if __i386__ || __x86_64__
@@ -71,22 +97,15 @@ struct c_segment {
        
 #define C_SEG_MAX_LIMIT                (1 << 19)       /* this needs to track the size of c_mysegno */
        uint32_t        c_mysegno:19,
-                       c_filling:1,
                        c_busy:1,
                        c_busy_swapping:1,
                        c_wanted:1,
-                       c_must_free:1,
-                       c_ondisk:1,
-                       c_was_swapped_in:1,
-                       c_on_minorcompact_q:1,  /* can also be on the age_q or the swappedin_q */
-                       c_on_age_q:1,           /* creation age ordered list of in-core segments that
-                                                  are available to be major-compacted and swapped out */
-                       c_on_swappedin_q:1,     /* allows us to age newly swapped in segments */
-                       c_on_swapout_q:1,       /* this is a transient queue */
-                       c_on_swappedout_q:1,    /* segment has been major-compacted and
-                                                  possibly swapped out to disk (c_ondisk == 1) */
-                       c_on_swappedout_sparse_q:1;     /* segment has become sparse and should be garbage
-                                                          collected if too many segments reach this state */
+                       c_on_minorcompact_q:1,  /* can also be on the age_q, the majorcompact_q or the swappedin_q */
+
+                       c_state:4,              /* what state is the segment in which dictates which q to find it on */
+                       c_overage_swap:1,
+                       c_reserved:4;
+
        uint16_t        c_firstemptyslot;
        uint16_t        c_nextslot;
        uint32_t        c_nextoffset;
@@ -100,10 +119,6 @@ struct c_segment {
                uint64_t c_swap_handle;
        } c_store;
 
-#if TRACK_BAD_C_SEGMENTS
-       uint32_t        c_on_bad_q;
-#endif
-
 #if    VALIDATE_C_SEGMENTS
         uint32_t       c_was_minor_compacted;
         uint32_t       c_was_major_compacted;
@@ -118,12 +133,18 @@ struct c_segment {
        thread_t        c_busy_for_thread;
 #endif /* MACH_ASSERT */
 
-       struct c_slot   *c_slots[C_SEG_SLOT_ARRAYS];
+       int             c_slot_var_array_len;
+       struct  c_slot  *c_slot_var_array;
+       struct  c_slot  c_slot_fixed_array[0];
 };
 
+#define C_SEG_SLOT_VAR_ARRAY_MIN_LEN   C_SEG_MAX_PAGES
+
+extern int             c_seg_fixed_array_len;
+extern vm_offset_t     c_buffers;
+#define        C_SEG_BUFFER_ADDRESS(c_segno)   ((c_buffers + ((uint64_t)c_segno * (uint64_t)C_SEG_ALLOCSIZE)))
 
-#define C_SEG_SLOT_FROM_INDEX(cseg, index)     (&(cseg->c_slots[index / C_SEG_SLOT_ARRAY_SIZE])[index & C_SEG_SLOT_ARRAY_MASK])
-#define C_SEG_SLOTARRAY_FROM_INDEX(cseg, index)        (index / C_SEG_SLOT_ARRAY_SIZE)
+#define C_SEG_SLOT_FROM_INDEX(cseg, index)     (index < c_seg_fixed_array_len ? &(cseg->c_slot_fixed_array[index]) : &(cseg->c_slot_var_array[index - c_seg_fixed_array_len]))
 
 #define        C_SEG_OFFSET_TO_BYTES(off)      ((off) * (int) sizeof(int32_t))
 #define C_SEG_BYTES_TO_OFFSET(bytes)   ((bytes) / (int) sizeof(int32_t))
@@ -133,7 +154,11 @@ struct c_segment {
 #define C_SEG_OFFSET_ALIGNMENT_MASK    0x3
 
 #define        C_SEG_ONDISK_IS_SPARSE(cseg)    ((cseg->c_bytes_used < (C_SEG_BUFSIZE / 2)) ? 1 : 0)
-#define C_SEG_INCORE_IS_SPARSE(cseg)   ((C_SEG_UNUSED_BYTES(cseg) >= (C_SEG_BUFSIZE / 2)) ? 1 : 0)
+#define C_SEG_SHOULD_MINORCOMPACT(cseg)        ((C_SEG_UNUSED_BYTES(cseg) >= (C_SEG_BUFSIZE / 3)) ? 1 : 0)
+#define C_SEG_SHOULD_MAJORCOMPACT(cseg)        (((cseg->c_bytes_unused + (C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset))) >= (C_SEG_BUFSIZE / 8)) ? 1 : 0)
+
+#define C_SEG_IS_ONDISK(cseg)          ((cseg->c_state == C_ON_SWAPPEDOUT_Q || cseg->c_state == C_ON_SWAPPEDOUTSPARSE_Q))
+
 
 #define C_SEG_WAKEUP_DONE(cseg)                                \
        MACRO_BEGIN                                     \
@@ -164,6 +189,7 @@ uint64_t vm_compressor_total_compressions(void);
 void vm_wake_compactor_swapper(void);
 void vm_thrashing_jetsam_done(void);
 void vm_consider_waking_compactor_swapper(void);
+void vm_consider_swapping(void);
 void vm_compressor_flush(void);
 void c_seg_free(c_segment_t);
 void c_seg_free_locked(c_segment_t);
@@ -193,15 +219,19 @@ extern kern_return_t      vm_swap_get(vm_offset_t, uint64_t, uint64_t);
 extern void            vm_swap_free(uint64_t);
 extern void            vm_swap_consider_defragmenting(void);
 
-extern void            c_seg_swapin_requeue(c_segment_t);
+extern void            c_seg_swapin_requeue(c_segment_t, boolean_t);
 extern void            c_seg_swapin(c_segment_t, boolean_t);
 extern void            c_seg_wait_on_busy(c_segment_t);
 extern void            c_seg_trim_tail(c_segment_t);
+extern void            c_seg_switch_state(c_segment_t, int, boolean_t);
 
 extern boolean_t       fastwake_recording_in_progress;
 extern int             compaction_swapper_running;
 extern uint64_t                vm_swap_put_failures;
 
+extern int             c_overage_swapped_count;
+extern int             c_overage_swapped_limit;
+
 extern queue_head_t    c_minor_list_head;
 extern queue_head_t    c_age_list_head;
 extern queue_head_t    c_swapout_list_head;
@@ -214,7 +244,6 @@ extern uint32_t             c_swappedout_count;
 extern uint32_t                c_swappedout_sparse_count;
 
 extern int64_t         compressor_bytes_used;
-extern uint64_t                compressor_kvspace_used;
 extern uint64_t                first_c_segment_to_warm_generation_id;
 extern uint64_t                last_c_segment_to_warm_generation_id;
 extern boolean_t       hibernate_flushing;
@@ -244,22 +273,35 @@ extern uint64_t vm_compressor_compute_elapsed_msecs(clock_sec_t, clock_nsec_t, c
 #define COMPRESSOR_NEEDS_TO_SWAP()             ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) ? 1 : 0)
 
 #define VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()                            \
-       ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP ||        \
-         vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP) && \
+       (vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP &&         \
         ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_CATCHUP_THRESHOLD) ? 1 : 0))
 #define HARD_THROTTLE_LIMIT_REACHED()          ((AVAILABLE_NON_COMPRESSED_MEMORY < (VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 2) ? 1 : 0)
 #define SWAPPER_NEEDS_TO_UNTHROTTLE()          ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) ? 1 : 0)
 #define COMPRESSOR_NEEDS_TO_MINOR_COMPACT()    ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0)
 
-#define COMPRESSOR_NEEDS_TO_MAJOR_COMPACT()    (((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) || \
-                                                 (compressor_kvspace_used - (compressor_object->resident_page_count * PAGE_SIZE_64)) > compressor_kvwaste_limit) \
+/*
+ * indicate the need to do a major compaction if
+ * the overall set of in-use compression segments
+ * becomes sparse... on systems that support pressure
+ * driven swapping, this will also cause swapouts to
+ * be initiated.
+ */
+#define COMPRESSOR_NEEDS_TO_MAJOR_COMPACT()    (((c_segment_count >= (c_segments_nearing_limit / 8)) && \
+                                                 ((c_segment_count * C_SEG_MAX_PAGES) - VM_PAGE_COMPRESSOR_COUNT) > \
+                                                 ((c_segment_count / 8) * C_SEG_MAX_PAGES)) \
                                                 ? 1 : 0)
 
-#define COMPRESSOR_FREE_RESERVED_LIMIT         28
+#define COMPRESSOR_FREE_RESERVED_LIMIT         128
 
 #define COMPRESSOR_SCRATCH_BUF_SIZE WKdm_SCRATCH_BUF_SIZE
 
 
+#if RECORD_THE_COMPRESSED_DATA
+extern void     c_compressed_record_init(void);
+extern void     c_compressed_record_write(char *, int);
+#endif
+
+
 #if __i386__ || __x86_64__
 extern lck_mtx_t       *c_list_lock;
 #else /* __i386__ || __x86_64__ */
index 0cb37f7dcfeb547c02921ae4e2d041ccfc858da1..f21599eb6dae74eaeca037358d74c3ba6bcb6d6b 100644 (file)
@@ -51,6 +51,7 @@ int           vm_swapfile_create_thread_running = 0;
 int            vm_swapfile_gc_thread_awakened = 0;
 int            vm_swapfile_gc_thread_running = 0;
 
+int64_t                vm_swappin_avail = 0;
 unsigned int   vm_swapfile_total_segs_alloced = 0;
 unsigned int   vm_swapfile_total_segs_used = 0;
 
@@ -59,6 +60,8 @@ unsigned int  vm_swapfile_total_segs_used = 0;
 #define SWAP_RECLAIM   0x2     /* Swap file is marked to be reclaimed */
 #define SWAP_WANTED    0x4     /* Swap file has waiters */
 #define SWAP_REUSE     0x8     /* Swap file is on the Q and has a name. Reuse after init-ing.*/
+#define SWAP_PINNED    0x10    /* Swap file is pinned (FusionDrive) */
+
 
 struct swapfile{
        queue_head_t            swp_queue;      /* list of swap files */
@@ -82,8 +85,6 @@ struct swapfile{
 queue_head_t   swf_global_queue;
 boolean_t      swp_trim_supported = FALSE;
 
-#define                VM_SWAPFILE_DELAYED_TRIM_MAX    128
-
 extern clock_sec_t     dont_trim_until_ts;
 clock_sec_t            vm_swapfile_last_failed_to_create_ts = 0;
 clock_sec_t            vm_swapfile_last_successful_create_ts = 0;
@@ -102,10 +103,14 @@ static void vm_swap_wait_on_trim_handling_in_progress(void);
 
 
 
+#define VM_MAX_SWAP_FILE_NUM           100
+#define        VM_SWAPFILE_DELAYED_TRIM_MAX    128
+
 #define        VM_SWAP_SHOULD_DEFRAGMENT()     (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
 #define VM_SWAP_SHOULD_RECLAIM()       (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
-#define VM_SWAP_SHOULD_CREATE(cur_ts)  (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
+#define VM_SWAP_SHOULD_PIN(_size)      (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
+#define VM_SWAP_SHOULD_CREATE(cur_ts)  ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
                                         ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
 #define VM_SWAP_SHOULD_TRIM(swf)       ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
 
@@ -119,6 +124,15 @@ static void vm_swap_wait_on_trim_handling_in_progress(void);
 extern unsigned int hash_string(char *cp, int len);
 #endif
 
+#if RECORD_THE_COMPRESSED_DATA
+boolean_t      c_compressed_record_init_done = FALSE;
+int            c_compressed_record_write_error = 0;
+struct vnode   *c_compressed_record_vp = NULL;
+uint64_t       c_compressed_record_file_offset = 0;
+void   c_compressed_record_init(void);
+void   c_compressed_record_write(char *, int);
+#endif
+
 #if ENCRYPTED_SWAP
 extern boolean_t               swap_crypt_ctx_initialized;
 extern void                    swap_crypt_ctx_initialize(void);
@@ -189,7 +203,6 @@ vm_compressor_swap_init()
                                         BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
                panic("vm_swapout_thread: create failed");
        }
-       thread->options |= TH_OPT_VMPRIV;
        vm_swapout_thread_id = thread->thread_id;
 
        thread_deallocate(thread);
@@ -198,11 +211,9 @@ vm_compressor_swap_init()
                                 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
                panic("vm_swapfile_create_thread: create failed");
        }
-       thread->options |= TH_OPT_VMPRIV;
 
        thread_deallocate(thread);
 
-
        if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
                                 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
                panic("vm_swapfile_gc_thread: create failed");
@@ -228,6 +239,29 @@ vm_compressor_swap_init()
 }
 
 
+#if RECORD_THE_COMPRESSED_DATA
+
+void
+c_compressed_record_init()
+{
+       if (c_compressed_record_init_done == FALSE) {
+               vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
+               c_compressed_record_init_done = TRUE;
+       }
+}
+
+void
+c_compressed_record_write(char *buf, int size)
+{
+       if (c_compressed_record_write_error == 0) {
+               c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
+               c_compressed_record_file_offset += size;
+       }
+}
+#endif
+
+
+
 void
 vm_swap_file_set_tuneables()
 {
@@ -259,6 +293,7 @@ vm_swap_file_set_tuneables()
         if (vnode_pager_isSSD(vp) == FALSE)
                vm_pageout_reinit_tuneables();
        vnode_setswapmount(vp);
+       vm_swappin_avail = vnode_getswappin_avail(vp);
        vm_swapfile_close((uint64_t)pathname, vp);
 done:
        kfree(pathname, namelen);
@@ -400,7 +435,7 @@ vm_swap_defragment()
 
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
-               assert(c_seg->c_on_swappedout_sparse_q);
+               assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
 
                if (c_seg->c_busy) {
                        lck_mtx_unlock_always(c_list_lock);
@@ -423,6 +458,7 @@ vm_swap_defragment()
                         * c_seg_free_locked consumes the c_list_lock
                         * and c_seg->c_lock
                         */
+                       C_SEG_BUSY(c_seg);
                        c_seg_free_locked(c_seg);
 
                        vm_swap_defragment_free++;
@@ -461,6 +497,8 @@ vm_swapfile_create_thread(void)
        clock_sec_t     sec;
        clock_nsec_t    nsec;
 
+       current_thread()->options |= TH_OPT_VMPRIV;
+
        vm_swapfile_create_thread_awakened++;
        vm_swapfile_create_thread_running = 1;
 
@@ -624,6 +662,9 @@ done:
 }
 
 
+int vm_swapout_found_empty = 0;
+
+
 static void
 vm_swapout_thread(void)
 {
@@ -633,6 +674,8 @@ vm_swapout_thread(void)
        kern_return_t   kr = KERN_SUCCESS;
        vm_offset_t     addr = 0;
 
+       current_thread()->options |= TH_OPT_VMPRIV;
+
        vm_swapout_thread_awakened++;
 
        lck_mtx_lock_spin_always(c_list_lock);
@@ -643,7 +686,7 @@ vm_swapout_thread(void)
 
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
-               assert(c_seg->c_on_swapout_q);
+               assert(c_seg->c_state == C_ON_SWAPOUT_Q);
 
                if (c_seg->c_busy) {
                        lck_mtx_unlock_always(c_list_lock);
@@ -654,19 +697,20 @@ vm_swapout_thread(void)
 
                        continue;
                }
-               queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
-               c_seg->c_on_swapout_q = 0;
-               c_swapout_count--;
-
                vm_swapout_thread_processed_segments++;
 
-               thread_wakeup((event_t)&compaction_swapper_running);
-
                size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
                
                if (size == 0) {
-                       c_seg_free_locked(c_seg);
-                       goto c_seg_was_freed;
+                       assert(c_seg->c_on_minorcompact_q);
+                       assert(c_seg->c_bytes_used == 0);
+
+                       c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
+                       lck_mtx_unlock_always(&c_seg->c_lock);
+                       lck_mtx_unlock_always(c_list_lock);
+
+                       vm_swapout_found_empty++;
+                       goto c_seg_is_empty;
                }
                C_SEG_BUSY(c_seg);
                c_seg->c_busy_swapping = 1;
@@ -692,28 +736,26 @@ vm_swapout_thread(void)
 
                PAGE_REPLACEMENT_DISALLOWED(TRUE);
 
+               if (kr == KERN_SUCCESS) {
+                       kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
+               }
                lck_mtx_lock_spin_always(c_list_lock);
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
                if (kr == KERN_SUCCESS) {
+                       int             new_state = C_ON_SWAPPEDOUT_Q;
+                       boolean_t       insert_head = FALSE;
 
-                       if (C_SEG_ONDISK_IS_SPARSE(c_seg) && hibernate_flushing == FALSE) {
+                       if (hibernate_flushing == TRUE) {
+                               if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
+                                   c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)
+                                       insert_head = TRUE;
+                       } else if (C_SEG_ONDISK_IS_SPARSE(c_seg))
+                               new_state = C_ON_SWAPPEDOUTSPARSE_Q;
 
-                               c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg);
-                               c_seg->c_on_swappedout_sparse_q = 1;
-                               c_swappedout_sparse_count++;
+                       c_seg_switch_state(c_seg, new_state, insert_head);
 
-                       } else {
-                               if (hibernate_flushing == TRUE && (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
-                                                                  c_seg->c_generation_id <= last_c_segment_to_warm_generation_id))
-                                       queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
-                               else
-                                       queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
-                               c_seg->c_on_swappedout_q = 1;
-                               c_swappedout_count++;
-                       }
                        c_seg->c_store.c_swap_handle = f_offset;
-                       c_seg->c_ondisk = 1;
 
                        VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
                        
@@ -723,33 +765,22 @@ vm_swapout_thread(void)
 #if ENCRYPTED_SWAP
                        vm_swap_decrypt(c_seg);
 #endif /* ENCRYPTED_SWAP */
-                       c_seg_insert_into_q(&c_age_list_head, c_seg);
-                       c_seg->c_on_age_q = 1;
-                       c_age_count++;
-
-                       vm_swap_put_failures++;
+                       if (c_seg->c_overage_swap == TRUE) {
+                               c_seg->c_overage_swap = FALSE;
+                               c_overage_swapped_count--;
+                       }
+                       c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
                }
                lck_mtx_unlock_always(c_list_lock);
 
-               if (c_seg->c_must_free)
-                       c_seg_free(c_seg);
-               else {
-                       c_seg->c_busy_swapping = 0;
-                       C_SEG_WAKEUP_DONE(c_seg);
-                       lck_mtx_unlock_always(&c_seg->c_lock);
-               }
-
-               if (kr == KERN_SUCCESS)
-                       kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
+               c_seg->c_busy_swapping = 0;
+               C_SEG_WAKEUP_DONE(c_seg);
+               lck_mtx_unlock_always(&c_seg->c_lock);
 
                PAGE_REPLACEMENT_DISALLOWED(FALSE);
 
-               if (kr == KERN_SUCCESS) {
-                       kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE);
-                       OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used);
-               }
                vm_pageout_io_throttle();
-c_seg_was_freed:
+c_seg_is_empty:
                if (c_swapout_count == 0)
                        vm_swap_consider_defragmenting();
 
@@ -772,6 +803,7 @@ vm_swap_create_file()
        int             namelen = 0;
        boolean_t       swap_file_created = FALSE;
        boolean_t       swap_file_reuse = FALSE;
+       boolean_t       swap_file_pin = FALSE;
        struct swapfile *swf = NULL;
 
        /*
@@ -835,7 +867,9 @@ vm_swap_create_file()
 
        while (size >= MIN_SWAP_FILE_SIZE) {
 
-               if (vm_swapfile_preallocate(swf->swp_vp, &size) == 0) {
+               swap_file_pin = VM_SWAP_SHOULD_PIN(size);
+
+               if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
 
                        int num_bytes_for_bitmap = 0;
 
@@ -877,10 +911,14 @@ vm_swap_create_file()
 
                        vm_swapfile_total_segs_alloced += swf->swp_nsegs;
 
+                       if (swap_file_pin == TRUE) {
+                               swf->swp_flags |= SWAP_PINNED;
+                               vm_swappin_avail -= swf->swp_size;
+                       }
+
                        lck_mtx_unlock(&vm_swap_data_lock);
 
                        thread_wakeup((event_t) &vm_num_swap_files);
-
                        break;
                } else {
 
@@ -1056,6 +1094,7 @@ retry:
                        goto retry;
                }
        }
+       vm_swap_put_failures++;
 
        return KERN_FAILURE;
 
@@ -1081,6 +1120,8 @@ done:
        if (error) {
                vm_swap_free(*f_offset);
 
+               vm_swap_put_failures++;
+
                return KERN_FAILURE;
        }
        return KERN_SUCCESS;
@@ -1317,7 +1358,7 @@ vm_swap_reclaim(void)
 
        c_segment_t     c_seg = NULL;
        
-       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT) != KERN_SUCCESS) {
+       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
                panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
        }
 
@@ -1402,13 +1443,20 @@ ReTry_for_cseg:
                }
 
                c_seg = swf->swp_csegs[segidx];
+               assert(c_seg);
 
                lck_mtx_lock_spin_always(&c_seg->c_lock);
 
-               assert(c_seg->c_ondisk);
-
                if (c_seg->c_busy) {
-
+                       /*
+                        * a swapped out c_segment in the process of being freed will remain in the
+                        * busy state until after the vm_swap_free is called on it... vm_swap_free
+                        * takes the vm_swap_data_lock, so can't change the swap state until after
+                        * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
+                        * which will allow c_seg_free_locked to clear busy and wake up this thread...
+                        * at that point, we re-look up the swap state which will now indicate that
+                        * this c_segment no longer exists.
+                        */
                        c_seg->c_wanted = 1;
                        
                        assert_wait((event_t) (c_seg), THREAD_UNINT);
@@ -1425,88 +1473,83 @@ ReTry_for_cseg:
                (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
 
                f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
-               
+
+               assert(c_seg == swf->swp_csegs[segidx]);
                swf->swp_csegs[segidx] = NULL;
                swf->swp_nseginuse--;
 
                vm_swapfile_total_segs_used--;
                        
                lck_mtx_unlock(&vm_swap_data_lock);
-       
-               if (c_seg->c_must_free) {
-                       C_SEG_BUSY(c_seg);
-                       c_seg_free(c_seg);
-               } else {
 
-                       C_SEG_BUSY(c_seg);
-                       c_seg->c_busy_swapping = 1;
+               assert(C_SEG_IS_ONDISK(c_seg)); 
+
+               C_SEG_BUSY(c_seg);
+               c_seg->c_busy_swapping = 1;
 #if !CHECKSUM_THE_SWAP
-                       c_seg_trim_tail(c_seg);
+               c_seg_trim_tail(c_seg);
 #endif
-                       c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
+               c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
                
-                       assert(c_size <= C_SEG_BUFSIZE);
+               assert(c_size <= C_SEG_BUFSIZE && c_size);
 
-                       lck_mtx_unlock_always(&c_seg->c_lock);
+               lck_mtx_unlock_always(&c_seg->c_lock);
 
-                       if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
+               if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
 
-                               /*
-                                * reading the data back in failed, so convert c_seg
-                                * to a swapped in c_segment that contains no data
-                                */
-                               c_seg->c_store.c_buffer = (int32_t *)NULL;
-                               c_seg_swapin_requeue(c_seg);
+                       /*
+                        * reading the data back in failed, so convert c_seg
+                        * to a swapped in c_segment that contains no data
+                        */
+                       c_seg_swapin_requeue(c_seg, FALSE);
+                       /*
+                        * returns with c_busy_swapping cleared
+                        */
 
-                               goto swap_io_failed;
-                       }
-                       VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
+                       vm_swap_get_failures++;
+                       goto swap_io_failed;
+               }
+               VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
 
-                       if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
-                               vm_offset_t     c_buffer;
+               if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
+                       vm_offset_t     c_buffer;
 
-                               /*
-                                * the put failed, so convert c_seg to a fully swapped in c_segment
-                                * with valid data
-                                */
-                               if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS)
-                                       panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
-                               OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used);
+                       /*
+                        * the put failed, so convert c_seg to a fully swapped in c_segment
+                        * with valid data
+                        */
+                       c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
 
-                               kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR);
+                       kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
 
-                               memcpy((char *)c_buffer, (char *)addr, c_size);
+                       memcpy((char *)c_buffer, (char *)addr, c_size);
 
-                               c_seg->c_store.c_buffer = (int32_t *)c_buffer;
+                       c_seg->c_store.c_buffer = (int32_t *)c_buffer;
 #if ENCRYPTED_SWAP
-                               vm_swap_decrypt(c_seg);
+                       vm_swap_decrypt(c_seg);
 #endif /* ENCRYPTED_SWAP */
-                               c_seg_swapin_requeue(c_seg);
-
-                               OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
+                       c_seg_swapin_requeue(c_seg, TRUE);
+                       /*
+                        * returns with c_busy_swapping cleared
+                        */
+                       OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
 
-                               goto swap_io_failed;
-                       }
-                       VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
+                       goto swap_io_failed;
+               }
+               VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
 
-                       lck_mtx_lock_spin_always(&c_seg->c_lock);
+               lck_mtx_lock_spin_always(&c_seg->c_lock);
                                
-                       assert(c_seg->c_ondisk);
-                       /*
-                        * The c_seg will now know about the new location on disk.
-                        */
-                       c_seg->c_store.c_swap_handle = f_offset;
+               assert(C_SEG_IS_ONDISK(c_seg));
+               /*
+                * The c_seg will now know about the new location on disk.
+                */
+               c_seg->c_store.c_swap_handle = f_offset;
+               c_seg->c_busy_swapping = 0;
 swap_io_failed:
-                       c_seg->c_busy_swapping = 0;
-               
-                       if (c_seg->c_must_free)
-                               c_seg_free(c_seg);
-                       else {
-                               C_SEG_WAKEUP_DONE(c_seg);
+               C_SEG_WAKEUP_DONE(c_seg);
                                
-                               lck_mtx_unlock_always(&c_seg->c_lock);
-                       }
-               }
+               lck_mtx_unlock_always(&c_seg->c_lock);
                lck_mtx_lock(&vm_swap_data_lock);
        }
 
@@ -1538,6 +1581,10 @@ swap_io_failed:
        
        lck_mtx_lock(&vm_swap_data_lock);
 
+       if (swf->swp_flags & SWAP_PINNED) {
+               vm_swappin_avail += swf->swp_size;
+       }
+
        swf->swp_vp = NULL;     
        swf->swp_size = 0;
        swf->swp_free_hint = 0;
index fd2ba4bd0491632896043fed86f151676bf4e14a..dc22fe7e62559bfe0a3a466bc6e159c5b6d4959b 100644 (file)
@@ -84,9 +84,12 @@ uint64_t vm_swap_get_free_space(void);
 struct vnode;
 extern void vm_swapfile_open(const char *path, struct vnode **vp);
 extern void vm_swapfile_close(uint64_t path, struct vnode *vp);
-extern int vm_swapfile_preallocate(struct vnode *vp, uint64_t *size);
+extern int vm_swapfile_preallocate(struct vnode *vp, uint64_t *size, boolean_t *pin);
 extern uint64_t vm_swapfile_get_blksize(struct vnode *vp);
 extern uint64_t vm_swapfile_get_transfer_size(struct vnode *vp);
 extern int vm_swapfile_io(struct vnode *vp, uint64_t offset, uint64_t start, int npages, int flags);
 
+#if RECORD_THE_COMPRESSED_DATA
+extern int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size);
+#endif
 
index a638590ded790c8a4eb0c8303177efd0fab00325..f4a1124cec5645ea7580aeca30c62fb8eee51965 100644 (file)
@@ -156,11 +156,11 @@ typedef struct compressor_pager {
 
        unsigned int                    cpgr_references;
        unsigned int                    cpgr_num_slots;
-       unsigned int                    cpgr_num_slots_occupied_pager;
        unsigned int                    cpgr_num_slots_occupied;
        union {
-               compressor_slot_t       *cpgr_dslots;
-               compressor_slot_t       **cpgr_islots;
+               compressor_slot_t       cpgr_eslots[2]; /* embedded slots */
+               compressor_slot_t       *cpgr_dslots;   /* direct slots */
+               compressor_slot_t       **cpgr_islots;  /* indirect slots */
        } cpgr_slots;
 } *compressor_pager_t;
 
@@ -372,11 +372,6 @@ compressor_memory_object_deallocate(
                                                COMPRESSOR_SLOTS_PER_CHUNK,
                                                0,
                                                NULL);
-                               assert(pager->cpgr_num_slots_occupied_pager >=
-                                      num_slots_freed);
-                               OSAddAtomic(-num_slots_freed,
-                                           &pager->cpgr_num_slots_occupied_pager);
-                               assert(pager->cpgr_num_slots_occupied_pager >= 0);
                                pager->cpgr_slots.cpgr_islots[i] = NULL;
                                kfree(chunk, COMPRESSOR_SLOTS_CHUNK_SIZE);
                        }
@@ -384,7 +379,7 @@ compressor_memory_object_deallocate(
                kfree(pager->cpgr_slots.cpgr_islots,
                      num_chunks * sizeof (pager->cpgr_slots.cpgr_islots[0]));
                pager->cpgr_slots.cpgr_islots = NULL;
-       } else {
+       } else if (pager->cpgr_num_slots > 2) {
                chunk = pager->cpgr_slots.cpgr_dslots;
                num_slots_freed =
                        compressor_pager_slots_chunk_free(
@@ -392,15 +387,19 @@ compressor_memory_object_deallocate(
                                pager->cpgr_num_slots,
                                0,
                                NULL);
-               assert(pager->cpgr_num_slots_occupied_pager >= num_slots_freed);
-               OSAddAtomic(-num_slots_freed, &pager->cpgr_num_slots_occupied_pager);
-               assert(pager->cpgr_num_slots_occupied_pager >= 0);
                pager->cpgr_slots.cpgr_dslots = NULL;
                kfree(chunk,
                      (pager->cpgr_num_slots *
                       sizeof (pager->cpgr_slots.cpgr_dslots[0])));
+       } else {
+               chunk = &pager->cpgr_slots.cpgr_eslots[0];
+               num_slots_freed =
+                       compressor_pager_slots_chunk_free(
+                               chunk,
+                               pager->cpgr_num_slots,
+                               0,
+                               NULL);
        }
-       assert(pager->cpgr_num_slots_occupied_pager == 0);
 
        compressor_pager_lock_destroy(pager);
        zfree(compressor_pager_zone, pager);
@@ -553,16 +552,18 @@ compressor_memory_object_create(
        pager->cpgr_control = MEMORY_OBJECT_CONTROL_NULL;
        pager->cpgr_references = 1;
        pager->cpgr_num_slots = (uint32_t)(new_size/PAGE_SIZE);
-       pager->cpgr_num_slots_occupied_pager = 0;
        pager->cpgr_num_slots_occupied = 0;
 
        num_chunks = (pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK - 1) / COMPRESSOR_SLOTS_PER_CHUNK;
        if (num_chunks > 1) {
                pager->cpgr_slots.cpgr_islots = kalloc(num_chunks * sizeof (pager->cpgr_slots.cpgr_islots[0]));
                bzero(pager->cpgr_slots.cpgr_islots, num_chunks * sizeof (pager->cpgr_slots.cpgr_islots[0]));
-       } else {
+       } else if (pager->cpgr_num_slots > 2) {
                pager->cpgr_slots.cpgr_dslots = kalloc(pager->cpgr_num_slots * sizeof (pager->cpgr_slots.cpgr_dslots[0]));
                bzero(pager->cpgr_slots.cpgr_dslots, pager->cpgr_num_slots * sizeof (pager->cpgr_slots.cpgr_dslots[0]));
+       } else {
+               pager->cpgr_slots.cpgr_eslots[0] = 0;
+               pager->cpgr_slots.cpgr_eslots[1] = 0;
        }
 
        /*
@@ -586,6 +587,7 @@ compressor_pager_slots_chunk_free(
        int                     *failures)
 {
        int i;
+       int retval;
        unsigned int num_slots_freed;
 
        if (failures)
@@ -593,10 +595,13 @@ compressor_pager_slots_chunk_free(
        num_slots_freed = 0;
        for (i = 0; i < num_slots; i++) {
                if (chunk[i] != 0) {
-                       if (vm_compressor_free(&chunk[i], flags) == 0)
+                       retval = vm_compressor_free(&chunk[i], flags);
+
+                       if (retval == 0)
                                num_slots_freed++;
                        else {
-                               assert(flags & C_DONT_BLOCK);
+                               if (retval == -2)
+                                       assert(flags & C_DONT_BLOCK);
 
                                if (failures)
                                        *failures += 1;
@@ -660,9 +665,12 @@ compressor_pager_slot_lookup(
                        slot_idx = page_num % COMPRESSOR_SLOTS_PER_CHUNK;
                        *slot_pp = &chunk[slot_idx];
                }
-       } else {
+       } else if (pager->cpgr_num_slots > 2) {
                slot_idx = page_num;
                *slot_pp = &pager->cpgr_slots.cpgr_dslots[slot_idx];
+       } else {
+               slot_idx = page_num;
+               *slot_pp = &pager->cpgr_slots.cpgr_eslots[slot_idx];
        }
 }
 
@@ -730,16 +738,10 @@ vm_compressor_pager_put(
                 * "object" had an equivalent page resident.
                 */
                vm_compressor_free(slot_p, 0);
-               assert(pager->cpgr_num_slots_occupied_pager >= 1);
-               OSAddAtomic(-1, &pager->cpgr_num_slots_occupied_pager);
-               assert(pager->cpgr_num_slots_occupied_pager >= 0);
                *compressed_count_delta_p -= 1;
        }
        if (vm_compressor_put(ppnum, slot_p, current_chead, scratch_buf))
                return (KERN_RESOURCE_SHORTAGE);
-       assert(pager->cpgr_num_slots_occupied_pager >= 0);
-       OSAddAtomic(+1, &pager->cpgr_num_slots_occupied_pager);
-       assert(pager->cpgr_num_slots_occupied_pager > 0);
        *compressed_count_delta_p += 1;
 
        return (KERN_SUCCESS);
@@ -810,9 +812,6 @@ vm_compressor_pager_get(
                         * is still occupied.
                         */
                } else {
-                       assert(pager->cpgr_num_slots_occupied_pager >= 1);
-                       OSAddAtomic(-1, &pager->cpgr_num_slots_occupied_pager);
-                       assert(pager->cpgr_num_slots_occupied_pager >= 0);
                        *compressed_count_delta_p -= 1;
                }
        }
@@ -848,9 +847,6 @@ vm_compressor_pager_state_clr(
                vm_compressor_free(slot_p, 0);
                num_slots_freed++;
                assert(*slot_p == 0);
-               assert(pager->cpgr_num_slots_occupied_pager >= 1);
-               OSAddAtomic(-1, &pager->cpgr_num_slots_occupied_pager);
-               assert(pager->cpgr_num_slots_occupied_pager >= 0);
        }
 
        return num_slots_freed;
@@ -929,7 +925,7 @@ vm_compressor_pager_reap_pages(
                                }
                        }
                }
-       } else {
+       } else if (pager->cpgr_num_slots > 2) {
                chunk = pager->cpgr_slots.cpgr_dslots;
                num_slots_freed +=
                        compressor_pager_slots_chunk_free(
@@ -937,29 +933,21 @@ vm_compressor_pager_reap_pages(
                                pager->cpgr_num_slots,
                                flags,
                                NULL);
+       } else {
+               chunk = &pager->cpgr_slots.cpgr_eslots[0];
+               num_slots_freed +=
+                       compressor_pager_slots_chunk_free(
+                               chunk,
+                               pager->cpgr_num_slots,
+                               flags,
+                               NULL);
        }
-       OSAddAtomic(-num_slots_freed, &pager->cpgr_num_slots_occupied_pager);
 
        compressor_pager_unlock(pager);
 
        return num_slots_freed;
 }
 
-unsigned int
-vm_compressor_pager_get_slots_occupied(
-       memory_object_t mem_obj)
-{
-       compressor_pager_t      pager;
-
-       compressor_pager_lookup(mem_obj, pager);
-       if (pager == NULL)
-               return 0;
-
-       assert(pager->cpgr_num_slots_occupied_pager >= 0);
-
-       return pager->cpgr_num_slots_occupied_pager;
-}
-
 void
 vm_compressor_pager_transfer(
        memory_object_t         dst_mem_obj,
@@ -992,8 +980,6 @@ vm_compressor_pager_transfer(
 
        /* transfer the slot from source to destination */
        vm_compressor_transfer(dst_slot_p, src_slot_p);
-       OSAddAtomic(-1, &src_pager->cpgr_num_slots_occupied_pager);
-       OSAddAtomic(+1, &dst_pager->cpgr_num_slots_occupied_pager);
        OSAddAtomic(-1, &src_pager->cpgr_num_slots_occupied);
        OSAddAtomic(+1, &dst_pager->cpgr_num_slots_occupied);
 }
@@ -1021,11 +1007,16 @@ vm_compressor_pager_next_compressed(
                /* out of range */
                return (memory_object_offset_t) -1;
        }
+
        num_chunks = ((pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK - 1) /
                      COMPRESSOR_SLOTS_PER_CHUNK);
 
        if (num_chunks == 1) {
-               chunk = pager->cpgr_slots.cpgr_dslots;
+               if (pager->cpgr_num_slots > 2) {
+                       chunk = pager->cpgr_slots.cpgr_dslots;
+               } else {
+                       chunk = &pager->cpgr_slots.cpgr_eslots[0];
+               }
                for (slot_idx = page_num;
                     slot_idx < pager->cpgr_num_slots;
                     slot_idx++) {
@@ -1128,3 +1119,29 @@ vm_compressor_pager_count(
                pager->cpgr_num_slots_occupied += compressed_count_delta;
        }
 }
+
+#if CONFIG_FREEZE
+kern_return_t
+vm_compressor_pager_relocate(
+       memory_object_t         mem_obj,
+       memory_object_offset_t  offset,
+       void                    **current_chead)
+{
+       /*
+        * Has the page at this offset been compressed?
+        */
+
+       compressor_slot_t *slot_p;
+       compressor_pager_t dst_pager;
+
+       assert(mem_obj);
+               
+       compressor_pager_lookup(mem_obj, dst_pager);
+       if (dst_pager == NULL)
+               return KERN_FAILURE;
+
+       compressor_pager_slot_lookup(dst_pager, FALSE, offset, &slot_p);
+       return (vm_compressor_relocate(current_chead, slot_p));
+}
+#endif /* CONFIG_FREEZE */
+
index 6016888e59df17ead922c438494c064ee800ddd7..729a30ca0811189dd0e8a3717c0ef5637ac468e7 100644 (file)
@@ -52,6 +52,7 @@ extern kern_return_t vm_compressor_pager_get(
 
 #define        C_DONT_BLOCK            0x01
 #define C_KEEP                 0x02
+#define C_KDP                  0x04
 
 extern unsigned int vm_compressor_pager_state_clr(
        memory_object_t         mem_obj,
@@ -116,14 +117,20 @@ extern int vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *
 extern int vm_compressor_get(ppnum_t pn, int *slot, int flags);
 extern int vm_compressor_free(int *slot, int flags);
 extern unsigned int vm_compressor_pager_reap_pages(memory_object_t mem_obj, int flags);
-extern unsigned int vm_compressor_pager_get_slots_occupied(memory_object_t mem_obj);
 extern unsigned int vm_compressor_pager_get_count(memory_object_t mem_obj);
 extern void vm_compressor_pager_count(memory_object_t mem_obj,
                                      int compressed_count_delta,
                                      boolean_t shared_lock,
                                      vm_object_t object);
+
 extern void vm_compressor_transfer(int *dst_slot_p, int        *src_slot_p);
 
+#if CONFIG_FREEZE
+extern kern_return_t vm_compressor_pager_relocate(memory_object_t mem_obj, memory_object_offset_t mem_offset, void **current_chead);
+extern kern_return_t vm_compressor_relocate(void **current_chead, int *src_slot_p);
+extern void vm_compressor_finished_filling(void **current_chead);
+#endif /* CONFIG_FREEZE */
+
 #endif /* _VM_VM_COMPRESSOR_PAGER_H_ */
 
 #endif /* XNU_KERNEL_PRIVATE */
index 317a364200b3a1c029ee93ce73227b962b8584d2..95531ddad0d7efd245e6b5d4be56eff5bd572013 100644 (file)
@@ -160,7 +160,7 @@ vm32_region_info(
                        }
 
                        if (entry->is_sub_map)
-                               nmap = entry->object.sub_map;
+                               nmap = VME_SUBMAP(entry);
                        else
                                break;
 
@@ -172,11 +172,11 @@ vm32_region_info(
 
                /* cmap is read-locked; we have a real entry */
 
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                region.vir_start = (natural_t) entry->vme_start;
                region.vir_end = (natural_t) entry->vme_end;
                region.vir_object = (natural_t)(uintptr_t) object;
-               region.vir_offset = (natural_t) entry->offset;
+               region.vir_offset = (natural_t) VME_OFFSET(entry);
                region.vir_needs_copy = entry->needs_copy;
                region.vir_protection = entry->protection;
                region.vir_max_protection = entry->max_protection;
@@ -270,7 +270,7 @@ vm32_region_info(
                size = vm_map_round_page(2 * used * sizeof(vm_info_object_t),
                                         VM_MAP_PAGE_MASK(ipc_kernel_map));
 
-               kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE);
+               kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
                if (kr != KERN_SUCCESS)
                        return KERN_RESOURCE_SHORTAGE;
 
@@ -374,7 +374,7 @@ vm32_region_info_64(
                        }
 
                        if (entry->is_sub_map)
-                               nmap = entry->object.sub_map;
+                               nmap = VME_SUBMAP(entry);
                        else
                                break;
 
@@ -386,11 +386,11 @@ vm32_region_info_64(
 
                /* cmap is read-locked; we have a real entry */
 
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                region.vir_start = (natural_t) entry->vme_start;
                region.vir_end = (natural_t) entry->vme_end;
                region.vir_object = (natural_t)(uintptr_t) object;
-               region.vir_offset = entry->offset;
+               region.vir_offset = VME_OFFSET(entry);
                region.vir_needs_copy = entry->needs_copy;
                region.vir_protection = entry->protection;
                region.vir_max_protection = entry->max_protection;
@@ -484,7 +484,7 @@ vm32_region_info_64(
                size = vm_map_round_page(2 * used * sizeof(vm_info_object_t),
                                         VM_MAP_PAGE_MASK(ipc_kernel_map));
 
-               kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE);
+               kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
                if (kr != KERN_SUCCESS)
                        return KERN_RESOURCE_SHORTAGE;
 
@@ -562,7 +562,7 @@ vm32_mapped_pages_info(
                                 VM_MAP_PAGE_MASK(ipc_kernel_map));
 
        for (;;) {
-           (void) vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE);
+           (void) vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC));
            (void) vm_map_unwire(
                    ipc_kernel_map,
                    vm_map_trunc_page(addr,
@@ -673,7 +673,7 @@ host_virtual_physical_table_info(
 
                size = vm_map_round_page(actual * sizeof *info,
                                         VM_MAP_PAGE_MASK(ipc_kernel_map));
-               kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size);
+               kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size, VM_KERN_MEMORY_IPC);
                if (kr != KERN_SUCCESS)
                        return KERN_RESOURCE_SHORTAGE;
 
index 381f69b3a9e2585cf942ee700a26b9555fc57f92..d20d7a3657af37d1b4033ae179ee8236a7aeb572 100644 (file)
@@ -148,21 +148,30 @@ uint64_t vm_hard_throttle_threshold;
 boolean_t current_thread_aborted(void);
 
 /* Forward declarations of internal routines. */
-extern kern_return_t vm_fault_wire_fast(
+static kern_return_t vm_fault_wire_fast(
                                vm_map_t        map,
                                vm_map_offset_t va,
+                               vm_prot_t       prot,
                                vm_map_entry_t  entry,
                                pmap_t          pmap,
                                vm_map_offset_t pmap_addr,
                                ppnum_t         *physpage_p);
 
-extern void vm_fault_continue(void);
-
-extern void vm_fault_copy_cleanup(
+static kern_return_t vm_fault_internal(
+               vm_map_t        map,
+               vm_map_offset_t vaddr,
+               vm_prot_t       caller_prot,
+               boolean_t       change_wiring,
+               int             interruptible,
+               pmap_t          pmap,
+               vm_map_offset_t pmap_addr,
+               ppnum_t         *physpage_p);
+
+static void vm_fault_copy_cleanup(
                                vm_page_t       page,
                                vm_page_t       top_page);
 
-extern void vm_fault_copy_dst_cleanup(
+static void vm_fault_copy_dst_cleanup(
                                vm_page_t       page);
 
 #if    VM_FAULT_CLASSIFY
@@ -184,6 +193,11 @@ unsigned long vm_cs_bitmap_validated = 0;
 
 void vm_pre_fault(vm_map_offset_t);
 
+extern int not_in_kdp;
+extern char *kdp_compressor_decompressed_page;
+extern addr64_t        kdp_compressor_decompressed_page_paddr;
+extern ppnum_t kdp_compressor_decompressed_page_ppnum;
+
 /*
  *     Routine:        vm_fault_init
  *     Purpose:
@@ -226,6 +240,19 @@ vm_fault_init(void)
                PE_get_default("kern.vm_compressor", &vm_compressor_mode, sizeof(vm_compressor_mode));
        }
        PE_parse_boot_argn("vm_compressor_threads", &vm_compressor_thread_count, sizeof (vm_compressor_thread_count));
+
+       if (PE_parse_boot_argn("vm_compressor_immediate", &vm_compressor_temp, sizeof (vm_compressor_temp)))
+               vm_compressor_immediate_preferred_override = TRUE;
+       else {
+               if (PE_get_default("kern.vm_compressor_immediate", &vm_compressor_temp, sizeof(vm_compressor_temp)))
+                       vm_compressor_immediate_preferred_override = TRUE;
+       }
+       if (vm_compressor_immediate_preferred_override == TRUE) {
+               if (vm_compressor_temp)
+                       vm_compressor_immediate_preferred = TRUE;
+               else
+                       vm_compressor_immediate_preferred = FALSE;
+       }
        printf("\"vm_compressor_mode\" is %d\n", vm_compressor_mode);
 }
 
@@ -551,6 +578,7 @@ vm_fault_deactivate_behind(
 #if (DEVELOPMENT || DEBUG)
 uint32_t       vm_page_creation_throttled_hard = 0;
 uint32_t       vm_page_creation_throttled_soft = 0;
+uint64_t       vm_page_creation_throttle_avoided = 0;
 #endif /* DEVELOPMENT || DEBUG */
 
 static int
@@ -582,6 +610,12 @@ vm_page_throttled(boolean_t page_kept)
        if ((vm_page_free_count < vm_page_throttle_limit || ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && SWAPPER_NEEDS_TO_UNTHROTTLE())) &&
            thread->t_page_creation_count > (VM_PAGE_CREATION_THROTTLE_PERIOD_SECS * VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC)) {
                
+               if (vm_page_free_wanted == 0 && vm_page_free_wanted_privileged == 0) {
+#if (DEVELOPMENT || DEBUG)
+                       OSAddAtomic64(1, &vm_page_creation_throttle_avoided);
+#endif
+                       goto no_throttle;
+               }
                clock_get_system_microtime(&tv_sec, &tv_usec);
 
                elapsed_sec = tv_sec - thread->t_page_creation_time;
@@ -629,6 +663,7 @@ no_throttle:
        return (0);
 }
 
+
 /*
  * check for various conditions that would
  * prevent us from creating a ZF page...
@@ -679,26 +714,28 @@ vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t int
                        return (VM_FAULT_RETRY);
                }
        }
-       if (page_throttle == TRUE && (throttle_delay = vm_page_throttled(FALSE))) {
-               /*
-                * we're throttling zero-fills...
-                * treat this as if we couldn't grab a page
-                */
-               if (m != VM_PAGE_NULL)
-                       VM_PAGE_FREE(m);
-               vm_fault_cleanup(object, first_m);
+       if (page_throttle == TRUE) {
+               if ((throttle_delay = vm_page_throttled(FALSE))) {
+                       /*
+                        * we're throttling zero-fills...
+                        * treat this as if we couldn't grab a page
+                        */
+                       if (m != VM_PAGE_NULL)
+                               VM_PAGE_FREE(m);
+                       vm_fault_cleanup(object, first_m);
 
-               VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
+                       VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
 
-               delay(throttle_delay);
+                       delay(throttle_delay);
 
-               if (current_thread_aborted()) {
+                       if (current_thread_aborted()) {
+                               thread_interrupt_level(interruptible_state);
+                               return VM_FAULT_INTERRUPTED;
+                       }
                        thread_interrupt_level(interruptible_state);
-                       return VM_FAULT_INTERRUPTED;
-               }
-               thread_interrupt_level(interruptible_state);
 
-               return (VM_FAULT_MEMORY_SHORTAGE);
+                       return (VM_FAULT_MEMORY_SHORTAGE);
+               }
        }
        return (VM_FAULT_SUCCESS);
 }
@@ -768,8 +805,8 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
                         */
                        assert(!m->pageout_queue);
 
-                       VM_PAGE_QUEUES_REMOVE(m);
-
+                       vm_page_queues_remove(m);
+                       vm_page_check_pageable_safe(m);
                        queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
                        m->throttled = TRUE;
                        vm_page_throttled_count++;
@@ -1249,7 +1286,7 @@ vm_fault_page(
                                                vm_page_lockspin_queues();
 
                                                assert(!m->pageout_queue);
-                                               VM_PAGE_QUEUES_REMOVE(m);
+                                               vm_page_queues_remove(m);
 
                                                vm_page_unlock_queues();
                                        }
@@ -1346,7 +1383,7 @@ vm_fault_page(
                                 */
                                vm_page_lockspin_queues();
                                if (m->speculative)
-                                       VM_PAGE_QUEUES_REMOVE(m);
+                                       vm_page_queues_remove(m);
                                vm_page_unlock_queues();
                        }
 
@@ -1459,7 +1496,7 @@ vm_fault_page(
                        }
 
                        if (fault_info && fault_info->batch_pmap_op == TRUE) {
-                               vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE);
+                               vm_page_insert_internal(m, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, TRUE, FALSE, NULL);
                        } else {
                                vm_page_insert(m, object, offset);
                        }
@@ -1572,7 +1609,7 @@ vm_fault_page(
 
                                        m->absent = TRUE;
                                        if (fault_info && fault_info->batch_pmap_op == TRUE) {
-                                               vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE);
+                                               vm_page_insert_internal(m, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, TRUE, FALSE, NULL);
                                        } else {
                                                vm_page_insert(m, object, offset);
                                        }
@@ -2567,7 +2604,7 @@ vm_fault_enter(vm_page_t m,
               pmap_t pmap,
               vm_map_offset_t vaddr,
               vm_prot_t prot,
-              vm_prot_t fault_type,
+              vm_prot_t caller_prot,
               boolean_t wired,
               boolean_t change_wiring,
               boolean_t no_cache,
@@ -2582,7 +2619,10 @@ vm_fault_enter(vm_page_t m,
        boolean_t       must_disconnect = 0;
        boolean_t       map_is_switched, map_is_switch_protected;
        int             cs_enforcement_enabled;
+       vm_prot_t       fault_type;
        
+       fault_type = change_wiring ? VM_PROT_NONE : caller_prot;
+
        vm_object_lock_assert_held(m->object);
 #if DEBUG
        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
@@ -2695,11 +2735,12 @@ vm_fault_enter(vm_page_t m,
         *   can be changed without the kernel noticing, therefore unsigned
         *   code can be created
         */
-       if (m->cs_tainted ||
-           ((cs_enforcement_enabled && !cs_bypass ) &&
-            (/* The page is unsigned and wants to be executable */
-             (!m->cs_validated && (prot & VM_PROT_EXECUTE))  ||
-             /* The page should be immutable, but is in danger of being modified
+       if (!cs_bypass &&
+           (m->cs_tainted ||
+            (cs_enforcement_enabled &&
+             (/* The page is unsigned and wants to be executable */
+              (!m->cs_validated && (prot & VM_PROT_EXECUTE))  ||
+              /* The page should be immutable, but is in danger of being modified
                * This is the case where we want policy from the code directory -
                * is the page immutable or not? For now we have to assume that 
                * code pages will be immutable, data pages not.
@@ -2714,7 +2755,7 @@ vm_fault_enter(vm_page_t m,
                */
              (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped))
              ))
-               ) 
+                   )
        {
                /* We will have a tainted page. Have to handle the special case
                 * of a switched map now. If the map is not switched, standard
@@ -2788,10 +2829,8 @@ vm_fault_enter(vm_page_t m,
                        pathname_len = 0;
                        filename_len = 0;
                        truncated_path = FALSE;
-                       if (file_object->pager == NULL) {
-                               /* no pager -> no file -> no pathname */
-                               pathname = (char *) "<nil>";
-                       } else {
+                       /* no pager -> no file -> no pathname, use "<nil>" in that case */
+                       if (file_object->pager != NULL) {
                                pathname = (char *)kalloc(__PATH_MAX * 2);
                                if (pathname) {
                                        pathname[0] = '\0';
@@ -2805,6 +2844,11 @@ vm_fault_enter(vm_page_t m,
                                                            filename,
                                                            filename_len,
                                                            &truncated_path);
+                               if (pathname) {
+                                       /* safety first... */
+                                       pathname[__PATH_MAX-1] = '\0';
+                                       filename[__PATH_MAX-1] = '\0';
+                               }
                                vnode_pager_get_object_mtime(file_object->pager,
                                                             &mtime,
                                                             &cs_mtime);
@@ -2817,7 +2861,7 @@ vm_fault_enter(vm_page_t m,
                               "wpmapped:%d slid:%d)\n",
                               pid, procname, (addr64_t) vaddr,
                               file_offset,
-                              (pathname ? pathname : ""),
+                              (pathname ? pathname : "<nil>"),
                               (truncated_path ? "/.../" : ""),
                               (truncated_path ? filename : ""),
                               cs_mtime.tv_sec, cs_mtime.tv_nsec,
@@ -2927,7 +2971,7 @@ MACRO_END
 
                if (wired) {
                        if (kr == KERN_SUCCESS) {
-                               vm_page_wire(m);
+                               vm_page_wire(m, VM_PROT_MEMORY_TAG(caller_prot), TRUE);
                        }
                } else {
                        vm_page_unwire(m, TRUE);
@@ -2974,6 +3018,7 @@ MACRO_END
 
                                VPL_LOCK(&lq->vpl_lock);
 
+                               vm_page_check_pageable_safe(m);
                                queue_enter(&lq->vpl_queue, m,
                                            vm_page_t, pageq);
                                m->local = TRUE;
@@ -3017,7 +3062,7 @@ MACRO_END
                                 */
                                if (!VM_PAGE_WIRED(m)) {
                                        if (m->clean_queue) {
-                                               VM_PAGE_QUEUES_REMOVE(m);
+                                               vm_page_queues_remove(m);
 
                                                vm_pageout_cleaned_reactivated++;
                                                vm_pageout_cleaned_fault_reactivated++;
@@ -3282,11 +3327,12 @@ vm_fault(
                                 NULL);
 }
 
+
 kern_return_t
 vm_fault_internal(
        vm_map_t        map,
        vm_map_offset_t vaddr,
-       vm_prot_t       fault_type,
+       vm_prot_t       caller_prot,
        boolean_t       change_wiring,
        int             interruptible,
        pmap_t          caller_pmap,
@@ -3314,6 +3360,7 @@ vm_fault_internal(
        boolean_t               interruptible_state;
        vm_map_t                real_map = map;
        vm_map_t                original_map = map;
+       vm_prot_t               fault_type;
        vm_prot_t               original_fault_type;
        struct vm_object_fault_info fault_info;
        boolean_t               need_collapse = FALSE;
@@ -3348,6 +3395,8 @@ vm_fault_internal(
        
        interruptible_state = thread_interrupt_level(interruptible);
 
+       fault_type = (change_wiring ? VM_PROT_NONE : caller_prot);
+
        VM_STAT_INCR(faults);
        current_task()->faults++;
        original_fault_type = fault_type;
@@ -3381,6 +3430,7 @@ RetryFault:
                                  &fault_info,
                                  &real_map);
 
+
        if (kr != KERN_SUCCESS) {
                vm_map_unlock_read(map);
                goto done;
@@ -3841,7 +3891,7 @@ FastPmapEnter:
                                                            caller_pmap,
                                                            caller_pmap_addr,
                                                            prot,
-                                                           fault_type,
+                                                           caller_prot,
                                                            wired,
                                                            change_wiring,
                                                            fault_info.no_cache,
@@ -3855,7 +3905,7 @@ FastPmapEnter:
                                                            pmap,
                                                            vaddr,
                                                            prot,
-                                                           fault_type,
+                                                           caller_prot,
                                                            wired,
                                                            change_wiring,
                                                            fault_info.no_cache,
@@ -4658,7 +4708,7 @@ handle_copy_delay:
                                            caller_pmap,
                                            caller_pmap_addr,
                                            prot,
-                                           fault_type,
+                                           caller_prot,
                                            wired,
                                            change_wiring,
                                            fault_info.no_cache,
@@ -4672,7 +4722,7 @@ handle_copy_delay:
                                            pmap,
                                            vaddr,
                                            prot,
-                                           fault_type,
+                                           caller_prot,
                                            wired,
                                            change_wiring,
                                            fault_info.no_cache,
@@ -4754,17 +4804,17 @@ handle_copy_delay:
                                hdelta = entry->vme_end - laddr;
                        if (entry->is_sub_map) {
                                
-                               laddr = (laddr - entry->vme_start) 
-                                                       + entry->offset;
-                               vm_map_lock_read(entry->object.sub_map);
+                               laddr = ((laddr - entry->vme_start) 
+                                        + VME_OFFSET(entry));
+                               vm_map_lock_read(VME_SUBMAP(entry));
 
                                if (map != real_map)
                                        vm_map_unlock_read(map);
                                if (entry->use_pmap) {
                                        vm_map_unlock_read(real_map);
-                                       real_map = entry->object.sub_map;
+                                       real_map = VME_SUBMAP(entry);
                                }
-                               map = entry->object.sub_map;
+                               map = VME_SUBMAP(entry);
                                
                        } else {
                                break;
@@ -4772,17 +4822,25 @@ handle_copy_delay:
                }
 
                if (vm_map_lookup_entry(map, laddr, &entry) && 
-                   (entry->object.vm_object != NULL) &&
-                   (entry->object.vm_object == object)) {
+                   (VME_OBJECT(entry) != NULL) &&
+                   (VME_OBJECT(entry) == object)) {
+                       int superpage;
 
-                       int superpage = (!object->pager_created && object->phys_contiguous)? VM_MEM_SUPERPAGE : 0;
+                       if (!object->pager_created &&
+                           object->phys_contiguous) {
+                               superpage = VM_MEM_SUPERPAGE;
+                       } else {
+                               superpage = 0;
+                       }
 
                        if (superpage && physpage_p) {
                                /* for vm_map_wire_and_extract() */
-                               *physpage_p = (ppnum_t) ((((vm_map_offset_t) entry->object.vm_object->vo_shadow_offset)
-                                                         + entry->offset
-                                                         + (laddr - entry->vme_start))
-                                                        >> PAGE_SHIFT);
+                               *physpage_p = (ppnum_t)
+                                       ((((vm_map_offset_t)
+                                          object->vo_shadow_offset)
+                                         + VME_OFFSET(entry)
+                                         + (laddr - entry->vme_start))
+                                        >> PAGE_SHIFT);
                        }
 
                        if (caller_pmap) {
@@ -4792,8 +4850,8 @@ handle_copy_delay:
                                assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT));
                                pmap_map_block(caller_pmap, 
                                               (addr64_t)(caller_pmap_addr - ldelta), 
-                                              (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->vo_shadow_offset)) +
-                                                         entry->offset + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT),
+                                              (ppnum_t)((((vm_map_offset_t) (VME_OBJECT(entry)->vo_shadow_offset)) +
+                                                         VME_OFFSET(entry) + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT),
                                               (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot, 
                                               (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);
                        } else { 
@@ -4803,8 +4861,8 @@ handle_copy_delay:
                                assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT));
                                pmap_map_block(real_map->pmap, 
                                               (addr64_t)(vaddr - ldelta), 
-                                              (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->vo_shadow_offset)) +
-                                                         entry->offset + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT),
+                                              (ppnum_t)((((vm_map_offset_t)(VME_OBJECT(entry)->vo_shadow_offset)) +
+                                                         VME_OFFSET(entry) + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT),
                                               (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot, 
                                               (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);
                        }
@@ -4875,6 +4933,7 @@ kern_return_t
 vm_fault_wire(
        vm_map_t        map,
        vm_map_entry_t  entry,
+       vm_prot_t       prot,
        pmap_t          pmap,
        vm_map_offset_t pmap_addr,
        ppnum_t         *physpage_p)
@@ -4886,9 +4945,9 @@ vm_fault_wire(
 
        assert(entry->in_transition);
 
-       if ((entry->object.vm_object != NULL) && 
+       if ((VME_OBJECT(entry) != NULL) && 
            !entry->is_sub_map && 
-           entry->object.vm_object->phys_contiguous) {
+           VME_OBJECT(entry)->phys_contiguous) {
                return KERN_SUCCESS;
        }
 
@@ -4907,11 +4966,11 @@ vm_fault_wire(
         */
 
        for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
-               rc = vm_fault_wire_fast(map, va, entry, pmap, 
+               rc = vm_fault_wire_fast(map, va, prot, entry, pmap, 
                                        pmap_addr + (va - entry->vme_start),
                                        physpage_p);
                if (rc != KERN_SUCCESS) {
-                       rc = vm_fault_internal(map, va, VM_PROT_NONE, TRUE, 
+                       rc = vm_fault_internal(map, va, prot, TRUE, 
                                               ((pmap == kernel_pmap)
                                                ? THREAD_UNINT
                                                : THREAD_ABORTSAFE), 
@@ -4954,8 +5013,7 @@ vm_fault_unwire(
        vm_object_t             object;
        struct vm_object_fault_info fault_info;
 
-       object = (entry->is_sub_map)
-                       ? VM_OBJECT_NULL : entry->object.vm_object;
+       object = (entry->is_sub_map) ? VM_OBJECT_NULL : VME_OBJECT(entry);
 
        /*
         * If it's marked phys_contiguous, then vm_fault_wire() didn't actually
@@ -4968,14 +5026,14 @@ vm_fault_unwire(
 
        fault_info.interruptible = THREAD_UNINT;
        fault_info.behavior = entry->behavior;
-       fault_info.user_tag = entry->alias;
+       fault_info.user_tag = VME_ALIAS(entry);
        fault_info.pmap_options = 0;
        if (entry->iokit_acct ||
            (!entry->is_sub_map && !entry->use_pmap)) {
                fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
        }
-       fault_info.lo_offset = entry->offset;
-       fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
+       fault_info.lo_offset = VME_OFFSET(entry);
+       fault_info.hi_offset = (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
        fault_info.no_cache = entry->no_cache;
        fault_info.stealth = TRUE;
        fault_info.io_sync = FALSE;
@@ -5023,7 +5081,8 @@ vm_fault_unwire(
                                result_page = VM_PAGE_NULL;
                                result = vm_fault_page(
                                        object,
-                                       entry->offset + (va - entry->vme_start),
+                                       (VME_OFFSET(entry) +
+                                        (va - entry->vme_start)),
                                        VM_PROT_NONE, TRUE,
                                        FALSE, /* page not looked up */
                                        &prot, &result_page, &top_page,
@@ -5120,10 +5179,11 @@ vm_fault_unwire(
  *     other than the common case will return KERN_FAILURE, and the caller
  *     is expected to call vm_fault().
  */
-kern_return_t
+static kern_return_t
 vm_fault_wire_fast(
        __unused vm_map_t       map,
        vm_map_offset_t va,
+       vm_prot_t       caller_prot,
        vm_map_entry_t  entry,
        pmap_t          pmap,
        vm_map_offset_t pmap_addr,
@@ -5188,8 +5248,8 @@ vm_fault_wire_fast(
         *      Find the backing store object and offset into it.
         */
 
-       object = entry->object.vm_object;
-       offset = (va - entry->vme_start) + entry->offset;
+       object = VME_OBJECT(entry);
+       offset = (va - entry->vme_start) + VME_OFFSET(entry);
        prot = entry->protection;
 
        /*
@@ -5245,7 +5305,7 @@ vm_fault_wire_fast(
         */
 
        vm_page_lockspin_queues();
-       vm_page_wire(m);
+       vm_page_wire(m, VM_PROT_MEMORY_TAG(caller_prot), TRUE);
        vm_page_unlock_queues();
 
        /*
@@ -5276,7 +5336,7 @@ vm_fault_wire_fast(
                            FALSE,
                            FALSE,
                            FALSE,
-                           entry->alias,
+                           VME_ALIAS(entry),
                            ((entry->iokit_acct ||
                              (!entry->is_sub_map && !entry->use_pmap))
                             ? PMAP_OPTIONS_ALT_ACCT
@@ -5315,7 +5375,7 @@ done:
  *             Release a page used by vm_fault_copy.
  */
 
-void
+static void
 vm_fault_copy_cleanup(
        vm_page_t       page,
        vm_page_t       top_page)
@@ -5333,7 +5393,7 @@ vm_fault_copy_cleanup(
        vm_fault_cleanup(object, top_page);
 }
 
-void
+static void
 vm_fault_copy_dst_cleanup(
        vm_page_t       page)
 {
@@ -5523,7 +5583,7 @@ vm_fault_copy(
                 */
 
                vm_page_lockspin_queues();
-               vm_page_wire(dst_page);
+               vm_page_wire(dst_page, VM_KERN_MEMORY_OSFMK, TRUE);
                vm_page_unlock_queues();
                PAGE_WAKEUP_DONE(dst_page);
                vm_object_unlock(dst_page->object);
@@ -5804,19 +5864,150 @@ vm_fault_classify_init(void)
 }
 #endif /* VM_FAULT_CLASSIFY */
 
+vm_offset_t
+kdp_lightweight_fault(vm_map_t map, vm_offset_t cur_target_addr, uint32_t *fault_results)
+{
+#pragma unused(map, cur_target_addr, fault_results)
+
+       return 0;
+#if 0
+       vm_map_entry_t  entry;
+       vm_object_t     object;
+       vm_offset_t     object_offset;
+       vm_page_t       m;
+       int             compressor_external_state, compressed_count_delta;
+       int             compressor_flags = (C_DONT_BLOCK | C_KEEP | C_KDP);
+       int             my_fault_type = VM_PROT_READ;
+       kern_return_t   kr;
+
+
+       if (not_in_kdp) {
+               panic("kdp_lightweight_fault called from outside of debugger context");
+       }
+
+       assert(map != VM_MAP_NULL);
+
+       assert((cur_target_addr & PAGE_MASK) == 0);
+       if ((cur_target_addr & PAGE_MASK) != 0) {
+               return 0;
+       }
+
+       if (kdp_lck_rw_lock_is_acquired_exclusive(&map->lock)) {
+               return 0;
+       }
+
+       if (!vm_map_lookup_entry(map, cur_target_addr, &entry)) {
+               return 0;
+       }
+
+       if (entry->is_sub_map) {
+               return 0;
+       }
+
+       object = VME_OBJECT(entry);
+       if (object == VM_OBJECT_NULL) {
+               return 0;
+       }
+
+       object_offset = cur_target_addr - entry->vme_start + VME_OFFSET(entry);
+
+       while (TRUE) {
+               if (kdp_lck_rw_lock_is_acquired_exclusive(&object->Lock)) {
+                       return 0;
+               }
+
+               if (object->pager_created && (object->paging_in_progress ||
+                       object->activity_in_progress)) {
+                       return 0;
+               }
+
+               m = kdp_vm_page_lookup(object, object_offset);
+
+               if (m != VM_PAGE_NULL) {
+
+                       if ((object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
+                               return 0;
+                       }
+
+                       if (m->laundry || m->busy || m->pageout || m->absent || m->error || m->cleaning ||
+                               m->overwriting || m->restart || m->unusual) {
+                               return 0;
+                       }
+
+                       assert(!m->private);
+                       if (m->private) {
+                               return 0;
+                       }
+
+                       assert(!m->fictitious);
+                       if (m->fictitious) {
+                               return 0;
+                       }
+
+                       assert(!m->encrypted);
+                       if (m->encrypted) {
+                               return 0;
+                       }
+
+                       assert(!m->encrypted_cleaning);
+                       if (m->encrypted_cleaning) {
+                               return 0;
+                       }
+
+                       assert(!m->compressor);
+                       if (m->compressor) {
+                               return 0;
+                       }
 
+                       if (fault_results) {
+                               *fault_results |= kThreadFaultedBT;
+                       }
+                       return ptoa(m->phys_page);
+               }
+
+               compressor_external_state = VM_EXTERNAL_STATE_UNKNOWN;
+
+               if (object->pager_created && MUST_ASK_PAGER(object, object_offset, compressor_external_state)) {
+                       if (compressor_external_state == VM_EXTERNAL_STATE_EXISTS) {
+                               kr = vm_compressor_pager_get(object->pager, (object_offset + object->paging_offset),
+                                                               kdp_compressor_decompressed_page_ppnum, &my_fault_type,
+                                                               compressor_flags, &compressed_count_delta);
+                               if (kr == KERN_SUCCESS) {
+                                       if (fault_results) {
+                                               *fault_results |= kThreadDecompressedBT;
+                                       }
+                                       return kdp_compressor_decompressed_page_paddr;
+                               } else {
+                                       return 0;
+                               }
+                       }
+               }
+
+               if (object->shadow == VM_OBJECT_NULL) {
+                       return 0;
+               }
+
+               object_offset += object->vo_shadow_offset;
+               object = object->shadow;
+       }
+#endif /* 0 */
+}
+
+
+#define CODE_SIGNING_CHUNK_SIZE 4096
 void
 vm_page_validate_cs_mapped(
        vm_page_t       page,
        const void      *kaddr)
 {
        vm_object_t             object;
-       vm_object_offset_t      offset;
+       vm_object_offset_t      offset, offset_in_page;
        kern_return_t           kr;
        memory_object_t         pager;
        void                    *blobs;
        boolean_t               validated;
-       unsigned                        tainted;
+       unsigned                tainted;
+       int                     num_chunks, num_chunks_validated;
 
        assert(page->busy);
        vm_object_lock_assert_exclusive(page->object);
@@ -5844,7 +6035,7 @@ vm_page_validate_cs_mapped(
                vm_cs_validated_dirtied++;
        }
 
-       if (page->cs_validated) {
+       if (page->cs_validated || page->cs_tainted) {
                return;
        }
 
@@ -5878,17 +6069,32 @@ vm_page_validate_cs_mapped(
        }
 
        /* verify the SHA1 hash for this page */
-       tainted = 0;
-       validated = cs_validate_page(blobs,
-                                    pager,
-                                    offset + object->paging_offset,
-                                    (const void *)kaddr,
-                                    &tainted);
-
-       page->cs_validated = validated;
-       if (validated) {
-               page->cs_tainted = !!(tainted & CS_VALIDATE_TAINTED);
-               page->cs_nx = !!(tainted & CS_VALIDATE_NX);
+       num_chunks_validated = 0;
+       for (offset_in_page = 0, num_chunks = 0;
+            offset_in_page < PAGE_SIZE_64;
+            offset_in_page += CODE_SIGNING_CHUNK_SIZE, num_chunks++) {
+               tainted = 0;
+               validated = cs_validate_page(blobs,
+                                            pager,
+                                            (object->paging_offset +
+                                             offset +
+                                             offset_in_page),
+                                            (const void *)((const char *)kaddr
+                                                           + offset_in_page),
+                                            &tainted);
+               if (validated) {
+                       num_chunks_validated++;
+               }
+               if (tainted & CS_VALIDATE_TAINTED) {
+                       page->cs_tainted = TRUE;
+               } 
+               if (tainted & CS_VALIDATE_NX) {
+                       page->cs_nx = TRUE;
+               }
+       }
+       /* page is validated only if all its chunks are */
+       if (num_chunks_validated == num_chunks) {
+               page->cs_validated = TRUE;
        }
 }
 
@@ -5932,7 +6138,7 @@ vm_page_validate_cs(
                vm_cs_validated_dirtied++;
        }
 
-       if (page->cs_validated) {
+       if (page->cs_validated || page->cs_tainted) {
                return;
        }
 
@@ -6009,3 +6215,78 @@ vm_page_validate_cs(
        }
        vm_object_paging_end(object);
 }
+
+void
+vm_page_validate_cs_mapped_chunk(
+       vm_page_t       page,
+       const void      *kaddr,
+       vm_offset_t     chunk_offset,
+       boolean_t       *validated_p,
+       unsigned        *tainted_p)
+{
+       vm_object_t             object;
+       vm_object_offset_t      offset, offset_in_page;
+       kern_return_t           kr;
+       memory_object_t         pager;
+       void                    *blobs;
+       boolean_t               validated;
+       unsigned                tainted;
+
+       *validated_p = FALSE;
+       *tainted_p = 0;
+
+       assert(page->busy);
+       vm_object_lock_assert_exclusive(page->object);
+
+       if (!cs_validation) {
+               return;
+       }
+
+       object = page->object;
+       assert(object->code_signed);
+       offset = page->offset;
+
+       if (!object->alive || object->terminating || object->pager == NULL) {
+               /*
+                * The object is terminating and we don't have its pager
+                * so we can't validate the data...
+                */
+               return;
+       }
+       /*
+        * Since we get here to validate a page that was brought in by
+        * the pager, we know that this pager is all setup and ready
+        * by now.
+        */
+       assert(!object->internal);
+       assert(object->pager != NULL);
+       assert(object->pager_ready);
+
+       pager = object->pager;
+       assert(object->paging_in_progress);
+       kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
+       if (kr != KERN_SUCCESS) {
+               blobs = NULL;
+       }
+
+       /* verify the signature for this chunk */
+       offset_in_page = chunk_offset;
+       assert(offset_in_page < PAGE_SIZE);
+       assert((offset_in_page & (CODE_SIGNING_CHUNK_SIZE-1)) == 0);
+
+       tainted = 0;
+       validated = cs_validate_page(blobs,
+                                    pager,
+                                    (object->paging_offset +
+                                     offset +
+                                     offset_in_page),
+                                    (const void *)((const char *)kaddr
+                                                   + offset_in_page),
+                                    &tainted);
+       if (validated) {
+               *validated_p = TRUE;
+       }
+       if (tainted) {
+               *tainted_p = tainted;
+       }
+}
index 65e8fd658a1759613da19d5bc9db5abec495f3c0..d6824c4fd7abf98a40235dc3fb22a569ea3e06d0 100644 (file)
@@ -105,16 +105,6 @@ extern void vm_pre_fault(vm_map_offset_t);
 
 extern void vm_fault_init(void);
 
-extern kern_return_t vm_fault_internal(
-               vm_map_t        map,
-               vm_map_offset_t vaddr,
-               vm_prot_t       fault_type,
-               boolean_t       change_wiring,
-               int             interruptible,
-               pmap_t          pmap,
-               vm_map_offset_t pmap_addr,
-               ppnum_t         *physpage_p);
-
 /*
  *     Page fault handling based on vm_object only.
  */
@@ -147,6 +137,7 @@ extern void vm_fault_cleanup(
 extern kern_return_t vm_fault_wire(
                vm_map_t        map,
                vm_map_entry_t  entry,
+               vm_prot_t       prot,
                pmap_t          pmap,
                vm_map_offset_t pmap_addr,
                ppnum_t         *physpage_p);
@@ -183,6 +174,12 @@ extern kern_return_t vm_fault_enter(
        boolean_t *need_retry,
        int *type_of_fault);
 
+extern vm_offset_t kdp_lightweight_fault(
+               vm_map_t map,
+               vm_offset_t cur_target_addr,
+               uint32_t *fault_results);
+
+
 #endif /* MACH_KERNEL_PRIVATE */
 
 #endif /* KERNEL_PRIVATE */
diff --git a/osfmk/vm/vm_fourk_pager.c b/osfmk/vm/vm_fourk_pager.c
new file mode 100644 (file)
index 0000000..57f3ed7
--- /dev/null
@@ -0,0 +1,1348 @@
+/*
+ * Copyright (c) 2014 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/errno.h>
+
+#include <mach/mach_types.h>
+#include <mach/mach_traps.h>
+#include <mach/host_priv.h>
+#include <mach/kern_return.h>
+#include <mach/memory_object_control.h>
+#include <mach/memory_object_types.h>
+#include <mach/port.h>
+#include <mach/policy.h>
+#include <mach/upl.h>
+#include <mach/thread_act.h>
+#include <mach/mach_vm.h>
+
+#include <kern/host.h>
+#include <kern/kalloc.h>
+#include <kern/page_decrypt.h>
+#include <kern/queue.h>
+#include <kern/thread.h>
+
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+
+#include <default_pager/default_pager_types.h>
+#include <default_pager/default_pager_object_server.h>
+
+#include <vm/vm_fault.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+#include <vm/memory_object.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_protos.h>
+
+
+/* 
+ * 4K MEMORY PAGER 
+ *
+ * This external memory manager (EMM) handles memory mappings that are
+ * 4K-aligned but not page-aligned and can therefore not be mapped directly.
+ * 
+ * It mostly handles page-in requests (from memory_object_data_request()) by
+ * getting the data needed to fill in each 4K-chunk.  That can require
+ * getting data from one or two pages from its backing VM object
+ * (a file or a "apple-protected" pager backed by an encrypted file), and
+ * copies the data to another page so that it is aligned as expected by
+ * the mapping.
+ *
+ * Returned pages can never be dirtied and must always be mapped copy-on-write,
+ * so the memory manager does not need to handle page-out requests (from
+ * memory_object_data_return()).
+ *
+ */
+
+/* forward declarations */
+void fourk_pager_reference(memory_object_t mem_obj);
+void fourk_pager_deallocate(memory_object_t mem_obj);
+kern_return_t fourk_pager_init(memory_object_t mem_obj,
+                                memory_object_control_t control,
+                                memory_object_cluster_size_t pg_size);
+kern_return_t fourk_pager_terminate(memory_object_t mem_obj);
+kern_return_t fourk_pager_data_request(memory_object_t mem_obj,
+                                        memory_object_offset_t offset,
+                                        memory_object_cluster_size_t length,
+                                        vm_prot_t protection_required,
+                                        memory_object_fault_info_t fault_info);
+kern_return_t fourk_pager_data_return(memory_object_t mem_obj,
+                                       memory_object_offset_t offset,
+                                       memory_object_cluster_size_t    data_cnt,
+                                       memory_object_offset_t *resid_offset,
+                                       int *io_error,
+                                       boolean_t dirty,
+                                       boolean_t kernel_copy,
+                                       int upl_flags);
+kern_return_t fourk_pager_data_initialize(memory_object_t mem_obj,
+                                           memory_object_offset_t offset,
+                                           memory_object_cluster_size_t data_cnt);
+kern_return_t fourk_pager_data_unlock(memory_object_t mem_obj,
+                                       memory_object_offset_t offset,
+                                       memory_object_size_t size,
+                                       vm_prot_t desired_access);
+kern_return_t fourk_pager_synchronize(memory_object_t mem_obj,
+                                       memory_object_offset_t offset,
+                                       memory_object_size_t length,
+                                       vm_sync_t sync_flags);
+kern_return_t fourk_pager_map(memory_object_t mem_obj,
+                               vm_prot_t prot);
+kern_return_t fourk_pager_last_unmap(memory_object_t mem_obj);
+
+/*
+ * Vector of VM operations for this EMM.
+ * These routines are invoked by VM via the memory_object_*() interfaces.
+ */
+const struct memory_object_pager_ops fourk_pager_ops = {
+       fourk_pager_reference,
+       fourk_pager_deallocate,
+       fourk_pager_init,
+       fourk_pager_terminate,
+       fourk_pager_data_request,
+       fourk_pager_data_return,
+       fourk_pager_data_initialize,
+       fourk_pager_data_unlock,
+       fourk_pager_synchronize,
+       fourk_pager_map,
+       fourk_pager_last_unmap,
+       NULL, /* data_reclaim */
+       "fourk_pager"
+};
+
+/*
+ * The "fourk_pager" describes a memory object backed by
+ * the "4K" EMM.
+ */
+#define FOURK_PAGER_SLOTS 4    /* 16K / 4K */
+typedef struct fourk_pager_backing {
+       vm_object_t             backing_object;
+       vm_object_offset_t      backing_offset;
+} *fourk_pager_backing_t;
+typedef struct fourk_pager {
+       struct ipc_object_header        pager_header;   /* fake ip_kotype() */
+       memory_object_pager_ops_t pager_ops; /* == &fourk_pager_ops */
+       memory_object_control_t pager_control;  /* mem object control handle */
+       queue_chain_t           pager_queue;    /* next & prev pagers */
+       unsigned int            ref_count;      /* reference count */
+       int     is_ready;       /* is this pager ready ? */
+       int     is_mapped;      /* is this mem_obj mapped ? */
+       struct fourk_pager_backing slots[FOURK_PAGER_SLOTS]; /* backing for each
+                                                               4K-chunk */
+} *fourk_pager_t;
+#define        FOURK_PAGER_NULL        ((fourk_pager_t) NULL)
+#define pager_ikot pager_header.io_bits
+
+/*
+ * List of memory objects managed by this EMM.
+ * The list is protected by the "fourk_pager_lock" lock.
+ */
+int fourk_pager_count = 0;             /* number of pagers */
+int fourk_pager_count_mapped = 0;      /* number of unmapped pagers */
+queue_head_t fourk_pager_queue;
+decl_lck_mtx_data(,fourk_pager_lock)
+
+/*
+ * Maximum number of unmapped pagers we're willing to keep around.
+ */
+int fourk_pager_cache_limit = 0;
+
+/*
+ * Statistics & counters.
+ */
+int fourk_pager_count_max = 0;
+int fourk_pager_count_unmapped_max = 0;
+int fourk_pager_num_trim_max = 0;
+int fourk_pager_num_trim_total = 0;
+
+
+lck_grp_t      fourk_pager_lck_grp;
+lck_grp_attr_t fourk_pager_lck_grp_attr;
+lck_attr_t     fourk_pager_lck_attr;
+
+
+/* internal prototypes */
+fourk_pager_t fourk_pager_lookup(memory_object_t mem_obj);
+void fourk_pager_dequeue(fourk_pager_t pager);
+void fourk_pager_deallocate_internal(fourk_pager_t pager,
+                                      boolean_t locked);
+void fourk_pager_terminate_internal(fourk_pager_t pager);
+void fourk_pager_trim(void);
+
+
+#if DEBUG
+int fourk_pagerdebug = 0;
+#define PAGER_ALL              0xffffffff
+#define        PAGER_INIT              0x00000001
+#define        PAGER_PAGEIN            0x00000002
+
+#define PAGER_DEBUG(LEVEL, A)                                          \
+       MACRO_BEGIN                                                     \
+       if ((fourk_pagerdebug & LEVEL)==LEVEL) {                \
+               printf A;                                               \
+       }                                                               \
+       MACRO_END
+#else
+#define PAGER_DEBUG(LEVEL, A)
+#endif
+
+
+void
+fourk_pager_bootstrap(void)
+{
+       lck_grp_attr_setdefault(&fourk_pager_lck_grp_attr);
+       lck_grp_init(&fourk_pager_lck_grp, "4K-pager", &fourk_pager_lck_grp_attr);
+       lck_attr_setdefault(&fourk_pager_lck_attr);
+       lck_mtx_init(&fourk_pager_lock, &fourk_pager_lck_grp, &fourk_pager_lck_attr);
+       queue_init(&fourk_pager_queue);
+}
+
+/*
+ * fourk_pager_init()
+ *
+ * Initialize the memory object and makes it ready to be used and mapped.
+ */
+kern_return_t
+fourk_pager_init(
+       memory_object_t         mem_obj, 
+       memory_object_control_t control, 
+#if !DEBUG
+       __unused
+#endif
+       memory_object_cluster_size_t pg_size)
+{
+       fourk_pager_t   pager;
+       kern_return_t           kr;
+       memory_object_attr_info_data_t  attributes;
+
+       PAGER_DEBUG(PAGER_ALL,
+                   ("fourk_pager_init: %p, %p, %x\n",
+                    mem_obj, control, pg_size));
+
+       if (control == MEMORY_OBJECT_CONTROL_NULL)
+               return KERN_INVALID_ARGUMENT;
+
+       pager = fourk_pager_lookup(mem_obj);
+
+       memory_object_control_reference(control);
+
+       pager->pager_control = control;
+
+       attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+       /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
+       attributes.cluster_size = (1 << (PAGE_SHIFT));
+       attributes.may_cache_object = FALSE;
+       attributes.temporary = TRUE;
+
+       kr = memory_object_change_attributes(
+                                       control,
+                                       MEMORY_OBJECT_ATTRIBUTE_INFO,
+                                       (memory_object_info_t) &attributes,
+                                       MEMORY_OBJECT_ATTR_INFO_COUNT);
+       if (kr != KERN_SUCCESS)
+               panic("fourk_pager_init: "
+                     "memory_object_change_attributes() failed");
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * fourk_pager_data_return()
+ *
+ * Handles page-out requests from VM.  This should never happen since
+ * the pages provided by this EMM are not supposed to be dirty or dirtied
+ * and VM should simply discard the contents and reclaim the pages if it
+ * needs to.
+ */
+kern_return_t
+fourk_pager_data_return(
+        __unused memory_object_t       mem_obj,
+        __unused memory_object_offset_t        offset,
+        __unused memory_object_cluster_size_t          data_cnt,
+        __unused memory_object_offset_t        *resid_offset,
+       __unused int                    *io_error,
+       __unused boolean_t              dirty,
+       __unused boolean_t              kernel_copy,
+       __unused int                    upl_flags)  
+{
+       panic("fourk_pager_data_return: should never get called");
+       return KERN_FAILURE;
+}
+
+kern_return_t
+fourk_pager_data_initialize(
+       __unused memory_object_t        mem_obj,
+       __unused memory_object_offset_t offset,
+       __unused memory_object_cluster_size_t           data_cnt)
+{
+       panic("fourk_pager_data_initialize: should never get called");
+       return KERN_FAILURE;
+}
+
+kern_return_t
+fourk_pager_data_unlock(
+       __unused memory_object_t        mem_obj,
+       __unused memory_object_offset_t offset,
+       __unused memory_object_size_t           size,
+       __unused vm_prot_t              desired_access)
+{
+       return KERN_FAILURE;
+}
+
+/*
+ * fourk_pager_reference()
+ *
+ * Get a reference on this memory object.
+ * For external usage only.  Assumes that the initial reference count is not 0,
+ * i.e one should not "revive" a dead pager this way.
+ */
+void
+fourk_pager_reference(
+       memory_object_t         mem_obj)
+{      
+       fourk_pager_t   pager;
+
+       pager = fourk_pager_lookup(mem_obj);
+
+       lck_mtx_lock(&fourk_pager_lock);
+       assert(pager->ref_count > 0);
+       pager->ref_count++;
+       lck_mtx_unlock(&fourk_pager_lock);
+}
+
+
+/*
+ * fourk_pager_dequeue:
+ *
+ * Removes a pager from the list of pagers.
+ *
+ * The caller must hold "fourk_pager_lock".
+ */
+void
+fourk_pager_dequeue(
+       fourk_pager_t pager)
+{
+       assert(!pager->is_mapped);
+
+       queue_remove(&fourk_pager_queue,
+                    pager,
+                    fourk_pager_t,
+                    pager_queue);
+       pager->pager_queue.next = NULL;
+       pager->pager_queue.prev = NULL;
+       
+       fourk_pager_count--;
+}
+
+/*
+ * fourk_pager_terminate_internal:
+ *
+ * Trigger the asynchronous termination of the memory object associated
+ * with this pager.
+ * When the memory object is terminated, there will be one more call
+ * to memory_object_deallocate() (i.e. fourk_pager_deallocate())
+ * to finish the clean up.
+ *
+ * "fourk_pager_lock" should not be held by the caller.
+ * We don't need the lock because the pager has already been removed from
+ * the pagers' list and is now ours exclusively.
+ */
+void
+fourk_pager_terminate_internal(
+       fourk_pager_t pager)
+{
+       int i;
+
+       assert(pager->is_ready);
+       assert(!pager->is_mapped);
+
+       for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
+               if (pager->slots[i].backing_object != VM_OBJECT_NULL &&
+                   pager->slots[i].backing_object != (vm_object_t) -1) {
+                       vm_object_deallocate(pager->slots[i].backing_object);
+                       pager->slots[i].backing_object = (vm_object_t) -1;
+                       pager->slots[i].backing_offset = (vm_object_offset_t) -1;
+               }
+       }
+       
+       /* trigger the destruction of the memory object */
+       memory_object_destroy(pager->pager_control, 0);
+}
+
+/*
+ * fourk_pager_deallocate_internal()
+ *
+ * Release a reference on this pager and free it when the last
+ * reference goes away.
+ * Can be called with fourk_pager_lock held or not but always returns
+ * with it unlocked.
+ */
+void
+fourk_pager_deallocate_internal(
+       fourk_pager_t   pager,
+       boolean_t               locked)
+{
+       boolean_t       needs_trimming;
+       int             count_unmapped;
+
+       if (! locked) {
+               lck_mtx_lock(&fourk_pager_lock);
+       }
+
+       count_unmapped = (fourk_pager_count - 
+                         fourk_pager_count_mapped);
+       if (count_unmapped > fourk_pager_cache_limit) {
+               /* we have too many unmapped pagers:  trim some */
+               needs_trimming = TRUE;
+       } else {
+               needs_trimming = FALSE;
+       }
+
+       /* drop a reference on this pager */
+       pager->ref_count--;
+
+       if (pager->ref_count == 1) {
+               /*
+                * Only the "named" reference is left, which means that
+                * no one is really holding on to this pager anymore.
+                * Terminate it.
+                */
+               fourk_pager_dequeue(pager);
+               /* the pager is all ours: no need for the lock now */
+               lck_mtx_unlock(&fourk_pager_lock);
+               fourk_pager_terminate_internal(pager);
+       } else if (pager->ref_count == 0) {
+               /*
+                * Dropped the existence reference;  the memory object has
+                * been terminated.  Do some final cleanup and release the
+                * pager structure.
+                */
+               lck_mtx_unlock(&fourk_pager_lock);
+               if (pager->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
+                       memory_object_control_deallocate(pager->pager_control);
+                       pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
+               }
+               kfree(pager, sizeof (*pager));
+               pager = FOURK_PAGER_NULL;
+       } else {
+               /* there are still plenty of references:  keep going... */
+               lck_mtx_unlock(&fourk_pager_lock);
+       }
+
+       if (needs_trimming) {
+               fourk_pager_trim();
+       }
+       /* caution: lock is not held on return... */
+}
+
+/*
+ * fourk_pager_deallocate()
+ *
+ * Release a reference on this pager and free it when the last
+ * reference goes away.
+ */
+void
+fourk_pager_deallocate(
+       memory_object_t         mem_obj)
+{
+       fourk_pager_t   pager;
+
+       PAGER_DEBUG(PAGER_ALL, ("fourk_pager_deallocate: %p\n", mem_obj));
+       pager = fourk_pager_lookup(mem_obj);
+       fourk_pager_deallocate_internal(pager, FALSE);
+}
+
+/*
+ *
+ */
+kern_return_t
+fourk_pager_terminate(
+#if !DEBUG
+       __unused
+#endif
+       memory_object_t mem_obj)
+{
+       PAGER_DEBUG(PAGER_ALL, ("fourk_pager_terminate: %p\n", mem_obj));
+
+       return KERN_SUCCESS;
+}
+
+/*
+ *
+ */
+kern_return_t
+fourk_pager_synchronize(
+       memory_object_t         mem_obj,
+       memory_object_offset_t  offset,
+       memory_object_size_t            length,
+       __unused vm_sync_t              sync_flags)
+{
+       fourk_pager_t   pager;
+
+       PAGER_DEBUG(PAGER_ALL, ("fourk_pager_synchronize: %p\n", mem_obj));
+
+       pager = fourk_pager_lookup(mem_obj);
+
+       memory_object_synchronize_completed(pager->pager_control,
+                                           offset, length);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * fourk_pager_map()
+ *
+ * This allows VM to let us, the EMM, know that this memory object
+ * is currently mapped one or more times.  This is called by VM each time
+ * the memory object gets mapped and we take one extra reference on the
+ * memory object to account for all its mappings.
+ */
+kern_return_t
+fourk_pager_map(
+       memory_object_t         mem_obj,
+       __unused vm_prot_t      prot)
+{
+       fourk_pager_t   pager;
+
+       PAGER_DEBUG(PAGER_ALL, ("fourk_pager_map: %p\n", mem_obj));
+
+       pager = fourk_pager_lookup(mem_obj);
+
+       lck_mtx_lock(&fourk_pager_lock);
+       assert(pager->is_ready);
+       assert(pager->ref_count > 0); /* pager is alive */
+       if (pager->is_mapped == FALSE) {
+               /*
+                * First mapping of this pager:  take an extra reference
+                * that will remain until all the mappings of this pager
+                * are removed.
+                */
+               pager->is_mapped = TRUE;
+               pager->ref_count++;
+               fourk_pager_count_mapped++;
+       }
+       lck_mtx_unlock(&fourk_pager_lock);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * fourk_pager_last_unmap()
+ *
+ * This is called by VM when this memory object is no longer mapped anywhere.
+ */
+kern_return_t
+fourk_pager_last_unmap(
+       memory_object_t         mem_obj)
+{
+       fourk_pager_t   pager;
+       int                     count_unmapped;
+
+       PAGER_DEBUG(PAGER_ALL,
+                   ("fourk_pager_last_unmap: %p\n", mem_obj));
+
+       pager = fourk_pager_lookup(mem_obj);
+
+       lck_mtx_lock(&fourk_pager_lock);
+       if (pager->is_mapped) {
+               /*
+                * All the mappings are gone, so let go of the one extra
+                * reference that represents all the mappings of this pager.
+                */
+               fourk_pager_count_mapped--;
+               count_unmapped = (fourk_pager_count -
+                                 fourk_pager_count_mapped);
+               if (count_unmapped > fourk_pager_count_unmapped_max) {
+                       fourk_pager_count_unmapped_max = count_unmapped;
+               }
+               pager->is_mapped = FALSE;
+               fourk_pager_deallocate_internal(pager, TRUE);
+               /* caution: deallocate_internal() released the lock ! */
+       } else {
+               lck_mtx_unlock(&fourk_pager_lock);
+       }
+       
+       return KERN_SUCCESS;
+}
+
+
+/*
+ *
+ */
+fourk_pager_t
+fourk_pager_lookup(
+       memory_object_t  mem_obj)
+{
+       fourk_pager_t   pager;
+
+       pager = (fourk_pager_t) mem_obj;
+       assert(pager->pager_ops == &fourk_pager_ops);
+       assert(pager->ref_count > 0);
+       return pager;
+}
+
+void
+fourk_pager_trim(void)
+{
+       fourk_pager_t   pager, prev_pager;
+       queue_head_t            trim_queue;
+       int                     num_trim;
+       int                     count_unmapped;
+
+       lck_mtx_lock(&fourk_pager_lock);
+
+       /*
+        * We have too many pagers, try and trim some unused ones,
+        * starting with the oldest pager at the end of the queue.
+        */
+       queue_init(&trim_queue);
+       num_trim = 0;
+
+       for (pager = (fourk_pager_t)
+                    queue_last(&fourk_pager_queue);
+            !queue_end(&fourk_pager_queue,
+                       (queue_entry_t) pager);
+            pager = prev_pager) {
+               /* get prev elt before we dequeue */
+               prev_pager = (fourk_pager_t)
+                       queue_prev(&pager->pager_queue);
+
+               if (pager->ref_count == 2 &&
+                   pager->is_ready &&
+                   !pager->is_mapped) {
+                       /* this pager can be trimmed */
+                       num_trim++;
+                       /* remove this pager from the main list ... */
+                       fourk_pager_dequeue(pager);
+                       /* ... and add it to our trim queue */
+                       queue_enter_first(&trim_queue,
+                                         pager,
+                                         fourk_pager_t,
+                                         pager_queue);
+
+                       count_unmapped = (fourk_pager_count -
+                                         fourk_pager_count_mapped);
+                       if (count_unmapped <= fourk_pager_cache_limit) {
+                               /* we have enough pagers to trim */
+                               break;
+                       }
+               }
+       }
+       if (num_trim > fourk_pager_num_trim_max) {
+               fourk_pager_num_trim_max = num_trim;
+       }
+       fourk_pager_num_trim_total += num_trim;
+
+       lck_mtx_unlock(&fourk_pager_lock);
+
+       /* terminate the trimmed pagers */
+       while (!queue_empty(&trim_queue)) {
+               queue_remove_first(&trim_queue,
+                                  pager,
+                                  fourk_pager_t,
+                                  pager_queue);
+               pager->pager_queue.next = NULL;
+               pager->pager_queue.prev = NULL;
+               assert(pager->ref_count == 2);
+               /*
+                * We can't call deallocate_internal() because the pager
+                * has already been dequeued, but we still need to remove
+                * a reference.
+                */
+               pager->ref_count--;
+               fourk_pager_terminate_internal(pager);
+       }
+}
+
+
+
+
+
+
+vm_object_t
+fourk_pager_to_vm_object(
+       memory_object_t mem_obj)
+{
+       fourk_pager_t   pager;
+       vm_object_t     object;
+
+       pager = fourk_pager_lookup(mem_obj);
+       if (pager == NULL) {
+               return VM_OBJECT_NULL;
+       }
+
+       assert(pager->ref_count > 0);
+       assert(pager->pager_control != MEMORY_OBJECT_CONTROL_NULL);
+       object = memory_object_control_to_vm_object(pager->pager_control);
+       assert(object != VM_OBJECT_NULL);
+       return object;
+}
+
+memory_object_t
+fourk_pager_create(void)
+{
+       fourk_pager_t           pager;
+       memory_object_control_t control;
+       kern_return_t           kr;
+       int                     i;
+
+#if 00
+       if (PAGE_SIZE_64 == FOURK_PAGE_SIZE) {
+               panic("fourk_pager_create: page size is 4K !?");
+       }
+#endif
+
+       pager = (fourk_pager_t) kalloc(sizeof (*pager));
+       if (pager == FOURK_PAGER_NULL) {
+               return MEMORY_OBJECT_NULL;
+       }
+       bzero(pager, sizeof (*pager));
+
+       /*
+        * The vm_map call takes both named entry ports and raw memory
+        * objects in the same parameter.  We need to make sure that
+        * vm_map does not see this object as a named entry port.  So,
+        * we reserve the first word in the object for a fake ip_kotype
+        * setting - that will tell vm_map to use it as a memory object.
+        */
+       pager->pager_ops = &fourk_pager_ops;
+       pager->pager_ikot = IKOT_MEMORY_OBJECT;
+       pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
+       pager->ref_count = 2;   /* existence + setup reference */
+       pager->is_ready = FALSE;/* not ready until it has a "name" */
+       pager->is_mapped = FALSE;
+
+       for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
+               pager->slots[i].backing_object = (vm_object_t) -1;
+               pager->slots[i].backing_offset = (vm_object_offset_t) -1;
+       }
+       
+       lck_mtx_lock(&fourk_pager_lock);
+
+       /* enter new pager at the head of our list of pagers */
+       queue_enter_first(&fourk_pager_queue,
+                         pager,
+                         fourk_pager_t,
+                         pager_queue);
+       fourk_pager_count++;
+       if (fourk_pager_count > fourk_pager_count_max) {
+               fourk_pager_count_max = fourk_pager_count;
+       }
+       lck_mtx_unlock(&fourk_pager_lock);
+
+       kr = memory_object_create_named((memory_object_t) pager,
+                                       0,
+                                       &control);
+       assert(kr == KERN_SUCCESS);
+
+       lck_mtx_lock(&fourk_pager_lock);
+       /* the new pager is now ready to be used */
+       pager->is_ready = TRUE;
+       lck_mtx_unlock(&fourk_pager_lock);
+
+       /* wakeup anyone waiting for this pager to be ready */
+       thread_wakeup(&pager->is_ready);
+
+       return (memory_object_t) pager;
+}
+
+/*
+ * fourk_pager_data_request()
+ *
+ * Handles page-in requests from VM.
+ */
+int fourk_pager_data_request_debug = 0;
+kern_return_t  
+fourk_pager_data_request(
+       memory_object_t         mem_obj,
+       memory_object_offset_t  offset,
+       memory_object_cluster_size_t            length,
+#if !DEBUG
+       __unused
+#endif
+       vm_prot_t               protection_required,
+       memory_object_fault_info_t mo_fault_info)
+{
+       fourk_pager_t           pager;
+       memory_object_control_t mo_control;
+       upl_t                   upl;
+       int                     upl_flags;
+       upl_size_t              upl_size;
+       upl_page_info_t         *upl_pl;
+       unsigned int            pl_count;
+       vm_object_t             dst_object;
+       kern_return_t           kr, retval;
+       vm_map_offset_t         kernel_mapping;
+       vm_offset_t             src_vaddr, dst_vaddr;
+       vm_offset_t             cur_offset;
+       int                     sub_page;
+       int                     sub_page_idx, sub_page_cnt;
+
+       pager = fourk_pager_lookup(mem_obj);
+       assert(pager->is_ready);
+       assert(pager->ref_count > 1); /* pager is alive and mapped */
+
+       PAGER_DEBUG(PAGER_PAGEIN, ("fourk_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
+
+       retval = KERN_SUCCESS;
+       kernel_mapping = 0;
+
+       offset = memory_object_trunc_page(offset);
+
+       /*
+        * Gather in a UPL all the VM pages requested by VM.
+        */
+       mo_control = pager->pager_control;
+
+       upl_size = length;
+       upl_flags =
+               UPL_RET_ONLY_ABSENT |
+               UPL_SET_LITE |
+               UPL_NO_SYNC |
+               UPL_CLEAN_IN_PLACE |    /* triggers UPL_CLEAR_DIRTY */
+               UPL_SET_INTERNAL;
+       pl_count = 0;
+       kr = memory_object_upl_request(mo_control,
+                                      offset, upl_size,
+                                      &upl, NULL, NULL, upl_flags);
+       if (kr != KERN_SUCCESS) {
+               retval = kr;
+               goto done;
+       }
+       dst_object = mo_control->moc_object;
+       assert(dst_object != VM_OBJECT_NULL);
+
+#if __x86_64__ || __arm__ || __arm64__
+       /* use the 1-to-1 mapping of physical memory */
+#else /* __x86_64__ || __arm__ || __arm64__ */
+       /*
+        * Reserve 2 virtual pages in the kernel address space to map the
+        * source and destination physical pages when it's their turn to
+        * be processed.
+        */
+       vm_map_entry_t          map_entry;
+
+       vm_object_reference(kernel_object);     /* ref. for mapping */
+       kr = vm_map_find_space(kernel_map,
+                              &kernel_mapping,
+                              2 * PAGE_SIZE_64,
+                              0,
+                              0,
+                              &map_entry);
+       if (kr != KERN_SUCCESS) {
+               vm_object_deallocate(kernel_object);
+               retval = kr;
+               goto done;
+       }
+       map_entry->object.vm_object = kernel_object;
+       map_entry->offset = kernel_mapping;
+       vm_map_unlock(kernel_map);
+       src_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping);
+       dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping + PAGE_SIZE_64);
+#endif /* __x86_64__ || __arm__ || __arm64__ */
+
+       /*
+        * Fill in the contents of the pages requested by VM.
+        */
+       upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+       pl_count = length / PAGE_SIZE;
+       for (cur_offset = 0;
+            retval == KERN_SUCCESS && cur_offset < length;
+            cur_offset += PAGE_SIZE) {
+               ppnum_t dst_pnum;
+               int num_subpg_signed, num_subpg_validated;
+               int num_subpg_tainted, num_subpg_nx;
+
+               if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) {
+                       /* this page is not in the UPL: skip it */
+                       continue;
+               }
+
+               /*
+                * Establish an explicit pmap mapping of the destination
+                * physical page.
+                * We can't do a regular VM mapping because the VM page
+                * is "busy".
+                */
+               dst_pnum = (ppnum_t)
+                       upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
+               assert(dst_pnum != 0);
+#if __x86_64__
+               dst_vaddr = (vm_map_offset_t)
+                       PHYSMAP_PTOV((pmap_paddr_t)dst_pnum << PAGE_SHIFT);
+#else
+               pmap_enter(kernel_pmap,
+                          dst_vaddr,
+                          dst_pnum,
+                          VM_PROT_READ | VM_PROT_WRITE,
+                          VM_PROT_NONE,
+                          0,
+                          TRUE);
+#endif
+
+               /* retrieve appropriate data for each 4K-page in this page */
+               if (PAGE_SHIFT == FOURK_PAGE_SHIFT &&
+                   page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
+                       /*
+                        * Find the slot for the requested 4KB page in
+                        * the 16K page...
+                        */
+                       assert(PAGE_SHIFT == FOURK_PAGE_SHIFT);
+                       assert(page_shift_user32 == SIXTEENK_PAGE_SHIFT);
+                       sub_page_idx = ((offset & SIXTEENK_PAGE_MASK) /
+                                       PAGE_SIZE);
+                       /*
+                        * ... and provide only that one 4KB page.
+                        */
+                       sub_page_cnt = 1;
+               } else {
+                       /*
+                        * Iterate over all slots, i.e. retrieve all four 4KB
+                        * pages in the requested 16KB page.
+                        */
+                       assert(PAGE_SHIFT == SIXTEENK_PAGE_SHIFT);
+                       sub_page_idx = 0;
+                       sub_page_cnt = FOURK_PAGER_SLOTS;
+               }
+
+               num_subpg_signed = 0;
+               num_subpg_validated = 0;
+               num_subpg_tainted = 0;
+               num_subpg_nx = 0;
+
+               /* retrieve appropriate data for each 4K-page in this page */
+               for (sub_page = sub_page_idx;
+                    sub_page < sub_page_idx + sub_page_cnt;
+                    sub_page++) {
+                       vm_object_t             src_object;
+                       memory_object_offset_t  src_offset;
+                       vm_offset_t             offset_in_src_page;
+                       kern_return_t           error_code;
+                       vm_page_t               src_page;
+                       vm_page_t               top_page;
+                       vm_prot_t               prot;
+                       int                     interruptible;
+                       struct vm_object_fault_info     fault_info;
+                       boolean_t       subpg_validated;
+                       unsigned        subpg_tainted;
+
+
+                       if (offset < SIXTEENK_PAGE_SIZE) {
+                               /*
+                                * The 1st 16K-page can cover multiple
+                                * sub-mappings, as described in the 
+                                * pager->slots[] array.
+                                */
+                               src_object =
+                                       pager->slots[sub_page].backing_object;
+                               src_offset =
+                                       pager->slots[sub_page].backing_offset;
+                       } else {
+                               fourk_pager_backing_t slot;
+
+                               /*
+                                * Beyond the 1st 16K-page in the pager is
+                                * an extension of the last "sub page" in
+                                * the pager->slots[] array.
+                                */
+                               slot = &pager->slots[FOURK_PAGER_SLOTS-1];
+                               src_object = slot->backing_object;
+                               src_offset = slot->backing_offset;
+                               src_offset += FOURK_PAGE_SIZE;
+                               src_offset +=
+                                       (vm_map_trunc_page(offset,
+                                                          SIXTEENK_PAGE_MASK)
+                                        - SIXTEENK_PAGE_SIZE);
+                               src_offset += sub_page * FOURK_PAGE_SIZE;
+                       }
+                       offset_in_src_page = src_offset & PAGE_MASK_64;
+                       src_offset = vm_object_trunc_page(src_offset);
+                               
+                       if (src_object == VM_OBJECT_NULL ||
+                           src_object == (vm_object_t) -1) {
+                               /* zero-fill */
+                               bzero((char *)(dst_vaddr +
+                                              ((sub_page-sub_page_idx)
+                                               * FOURK_PAGE_SIZE)),
+                                     FOURK_PAGE_SIZE);
+                               if (fourk_pager_data_request_debug) {
+                                       printf("fourk_pager_data_request"
+                                              "(%p,0x%llx+0x%lx+0x%04x): "
+                                              "ZERO\n",
+                                              pager,
+                                              offset,
+                                              cur_offset,
+                                              ((sub_page - sub_page_idx)
+                                               * FOURK_PAGE_SIZE));
+                               }
+                               continue;
+                       }
+
+                       /* fault in the source page from src_object */
+               retry_src_fault:
+                       src_page = VM_PAGE_NULL;
+                       top_page = VM_PAGE_NULL;
+                       fault_info = *((struct vm_object_fault_info *)
+                                      (uintptr_t)mo_fault_info);
+                       fault_info.stealth = TRUE;
+                       fault_info.io_sync = FALSE;
+                       fault_info.mark_zf_absent = FALSE;
+                       fault_info.batch_pmap_op = FALSE;
+                       interruptible = fault_info.interruptible;
+                       prot = VM_PROT_READ;
+                       error_code = 0;
+
+                       vm_object_lock(src_object);
+                       vm_object_paging_begin(src_object);
+                       kr = vm_fault_page(src_object,
+                                          src_offset,
+                                          VM_PROT_READ,
+                                          FALSE,
+                                          FALSE, /* src_page not looked up */
+                                          &prot,
+                                          &src_page,
+                                          &top_page,
+                                          NULL,
+                                          &error_code,
+                                          FALSE,
+                                          FALSE,
+                                          &fault_info);
+                       switch (kr) {
+                       case VM_FAULT_SUCCESS:
+                               break;
+                       case VM_FAULT_RETRY:
+                               goto retry_src_fault;
+                       case VM_FAULT_MEMORY_SHORTAGE:
+                               if (vm_page_wait(interruptible)) {
+                                       goto retry_src_fault;
+                               }
+                               /* fall thru */
+                       case VM_FAULT_INTERRUPTED:
+                               retval = MACH_SEND_INTERRUPTED;
+                               goto src_fault_done;
+                       case VM_FAULT_SUCCESS_NO_VM_PAGE:
+                               /* success but no VM page: fail */
+                               vm_object_paging_end(src_object);
+                               vm_object_unlock(src_object);
+                               /*FALLTHROUGH*/
+                       case VM_FAULT_MEMORY_ERROR:
+                               /* the page is not there! */
+                               if (error_code) {
+                                       retval = error_code;
+                               } else {
+                                       retval = KERN_MEMORY_ERROR;
+                               }
+                               goto src_fault_done;
+                       default:
+                               panic("fourk_pager_data_request: "
+                                     "vm_fault_page() unexpected error 0x%x\n",
+                                     kr);
+                       }
+                       assert(src_page != VM_PAGE_NULL);
+                       assert(src_page->busy);
+
+                       if (!src_page->active &&
+                           !src_page->inactive &&
+                           !src_page->speculative &&
+                           !src_page->throttled &&
+                           !VM_PAGE_WIRED(src_page)) {
+                               vm_page_lockspin_queues();
+                               if (!src_page->active &&
+                                   !src_page->inactive &&
+                                   !src_page->speculative &&
+                                   !src_page->throttled &&
+                                   !VM_PAGE_WIRED(src_page)) {
+                                       vm_page_deactivate(src_page);
+                               }
+                               vm_page_unlock_queues();
+                       }
+
+#if __x86_64__
+                       src_vaddr = (vm_map_offset_t)
+                               PHYSMAP_PTOV((pmap_paddr_t)src_page->phys_page
+                                            << PAGE_SHIFT);
+#else
+                       /*
+                        * Establish an explicit mapping of the source
+                        * physical page.
+                        */
+                       pmap_enter(kernel_pmap,
+                                  src_vaddr,
+                                  src_page->phys_page,
+                                  VM_PROT_READ,
+                                  VM_PROT_NONE,
+                                  0,
+                                  TRUE);
+#endif
+
+                       /*
+                        * Validate the 4K page we want from
+                        * this source page...
+                        */
+                       subpg_validated = FALSE;
+                       subpg_tainted = 0;
+                       if (src_page->object->code_signed) {
+                               vm_page_validate_cs_mapped_chunk(
+                                       src_page,
+                                       (const void *) src_vaddr,
+                                       offset_in_src_page,
+                                       &subpg_validated,
+                                       &subpg_tainted);
+                               num_subpg_signed++;
+                               if (subpg_validated) {
+                                       num_subpg_validated++;
+                               }
+                               if (subpg_tainted & CS_VALIDATE_TAINTED) {
+                                       num_subpg_tainted++;
+                               }
+                               if (subpg_tainted & CS_VALIDATE_NX) {
+                                       /* subpg should not be executable */
+                                       if (sub_page_cnt > 1) {
+                                               /*
+                                                * The destination page has
+                                                * more than 1 subpage and its
+                                                * other subpages might need
+                                                * EXEC, so we do not propagate
+                                                * CS_VALIDATE_NX to the
+                                                * destination page...
+                                                */
+                                       } else {
+                                               num_subpg_nx++;
+                                       }
+                               }
+                       }
+
+                       /*
+                        * Copy the relevant portion of the source page
+                        * into the appropriate part of the destination page.
+                        */
+                       bcopy((const char *)(src_vaddr + offset_in_src_page),
+                             (char *)(dst_vaddr +
+                                      ((sub_page - sub_page_idx) *
+                                       FOURK_PAGE_SIZE)),
+                             FOURK_PAGE_SIZE);
+                       if (fourk_pager_data_request_debug) {
+                               printf("fourk_data_request"
+                                      "(%p,0x%llx+0x%lx+0x%04x): "
+                                      "backed by [%p:0x%llx]: "
+                                      "[0x%016llx 0x%016llx] "
+                                      "code_signed=%d "
+                                      "cs_valid=%d cs_tainted=%d cs_nx=%d\n",
+                                      pager,
+                                      offset, cur_offset,
+                                      (sub_page-sub_page_idx)*FOURK_PAGE_SIZE,
+                                      src_page->object,
+                                      src_page->offset + offset_in_src_page,
+                                      *(uint64_t *)(dst_vaddr +
+                                                    ((sub_page-sub_page_idx) *
+                                                     FOURK_PAGE_SIZE)),
+                                      *(uint64_t *)(dst_vaddr +
+                                                    ((sub_page-sub_page_idx) *
+                                                     FOURK_PAGE_SIZE) +
+                                                    8),
+                                      src_page->object->code_signed,
+                                      subpg_validated,
+                                      !!(subpg_tainted & CS_VALIDATE_TAINTED),
+                                      !!(subpg_tainted & CS_VALIDATE_NX));
+                       }
+
+#if __x86_64__ || __arm__ || __arm64__
+                       /* we used the 1-to-1 mapping of physical memory */
+                       src_vaddr = 0;
+#else /* __x86_64__ || __arm__ || __arm64__ */
+                       /*
+                        * Remove the pmap mapping of the source page 
+                        * in the kernel.
+                        */
+                       pmap_remove(kernel_pmap,
+                                   (addr64_t) src_vaddr,
+                                   (addr64_t) src_vaddr + PAGE_SIZE_64);
+#endif /* __x86_64__ || __arm__ || __arm64__ */
+
+               src_fault_done:
+                       /*
+                        * Cleanup the result of vm_fault_page().
+                        */
+                       if (src_page) {
+                               vm_object_t     src_page_object;
+
+                               src_page_object = src_page->object;
+                               PAGE_WAKEUP_DONE(src_page);
+                               src_page = VM_PAGE_NULL;
+                               vm_object_paging_end(src_page_object);
+                               vm_object_unlock(src_page_object);
+                               if (top_page) {
+                                       vm_object_t     top_object;
+
+                                       top_object = top_page->object;
+                                       vm_object_lock(top_object);
+                                       VM_PAGE_FREE(top_page);
+                                       top_page = VM_PAGE_NULL;
+                                       vm_object_paging_end(top_object);
+                                       vm_object_unlock(top_object);
+                               }
+                       }
+               }
+               if (num_subpg_signed > 0) {
+                       /* some code-signing involved with this 16K page */
+                       if (num_subpg_tainted > 0) {
+                               /* a tainted subpage taints entire 16K page */
+                               UPL_SET_CS_TAINTED(upl_pl,
+                                                  cur_offset / PAGE_SIZE,
+                                                  TRUE);
+                               /* also mark as "validated" for consisteny */
+                               UPL_SET_CS_VALIDATED(upl_pl,
+                                                    cur_offset / PAGE_SIZE,
+                                                    TRUE);
+                       } else if (num_subpg_validated == num_subpg_signed) {
+                               /*
+                                * All the code-signed 4K subpages of this
+                                * 16K page are validated:  our 16K page is
+                                * considered validated.
+                                */
+                               UPL_SET_CS_VALIDATED(upl_pl,
+                                                    cur_offset / PAGE_SIZE,
+                                                    TRUE);
+                       }
+                       if (num_subpg_nx > 0) {
+                               UPL_SET_CS_NX(upl_pl,
+                                             cur_offset / PAGE_SIZE,
+                                             TRUE);
+                       }
+               }
+       }
+
+done:
+       if (upl != NULL) {
+               /* clean up the UPL */
+
+               /*
+                * The pages are currently dirty because we've just been
+                * writing on them, but as far as we're concerned, they're
+                * clean since they contain their "original" contents as
+                * provided by us, the pager.
+                * Tell the UPL to mark them "clean".
+                */
+               upl_clear_dirty(upl, TRUE);
+
+               /* abort or commit the UPL */
+               if (retval != KERN_SUCCESS) {
+                       upl_abort(upl, 0);
+                       if (retval == KERN_ABORTED) {
+                               wait_result_t   wait_result;
+
+                               /*
+                                * We aborted the fault and did not provide
+                                * any contents for the requested pages but
+                                * the pages themselves are not invalid, so
+                                * let's return success and let the caller
+                                * retry the fault, in case it might succeed
+                                * later (when the decryption code is up and
+                                * running in the kernel, for example).
+                                */
+                               retval = KERN_SUCCESS;
+                               /*
+                                * Wait a little bit first to avoid using
+                                * too much CPU time retrying and failing
+                                * the same fault over and over again.
+                                */
+                               wait_result = assert_wait_timeout(
+                                       (event_t) fourk_pager_data_request,
+                                       THREAD_UNINT,
+                                       10000,  /* 10ms */
+                                       NSEC_PER_USEC);
+                               assert(wait_result == THREAD_WAITING);
+                               wait_result = thread_block(THREAD_CONTINUE_NULL);
+                               assert(wait_result == THREAD_TIMED_OUT);
+                       }
+               } else {
+                       boolean_t empty;
+                       upl_commit_range(upl, 0, upl->size, 
+                                        UPL_COMMIT_CS_VALIDATED | UPL_COMMIT_WRITTEN_BY_KERNEL,
+                                        upl_pl, pl_count, &empty);
+               }
+
+               /* and deallocate the UPL */
+               upl_deallocate(upl);
+               upl = NULL;
+       }
+       if (kernel_mapping != 0) {
+               /* clean up the mapping of the source and destination pages */
+               kr = vm_map_remove(kernel_map,
+                                  kernel_mapping,
+                                  kernel_mapping + (2 * PAGE_SIZE_64),
+                                  VM_MAP_NO_FLAGS);
+               assert(kr == KERN_SUCCESS);
+               kernel_mapping = 0;
+               src_vaddr = 0;
+               dst_vaddr = 0;
+       }
+
+       return retval;
+}
+
+
+
+kern_return_t
+fourk_pager_populate(
+       memory_object_t         mem_obj,
+       boolean_t               overwrite,
+       int                     index,
+       vm_object_t             new_backing_object,
+       vm_object_offset_t      new_backing_offset,
+       vm_object_t             *old_backing_object,
+       vm_object_offset_t      *old_backing_offset)
+{
+       fourk_pager_t   pager;
+
+       pager = fourk_pager_lookup(mem_obj);
+       if (pager == NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       assert(pager->ref_count > 0);
+       assert(pager->pager_control != MEMORY_OBJECT_CONTROL_NULL);
+
+       if (index < 0 || index > FOURK_PAGER_SLOTS) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if (!overwrite &&
+           (pager->slots[index].backing_object != (vm_object_t) -1 ||
+            pager->slots[index].backing_offset != (vm_object_offset_t) -1)) {
+               return KERN_INVALID_ADDRESS;
+       }
+
+       *old_backing_object = pager->slots[index].backing_object;
+       *old_backing_offset = pager->slots[index].backing_offset;
+
+       pager->slots[index].backing_object = new_backing_object;
+       pager->slots[index].backing_offset = new_backing_offset;
+
+       return KERN_SUCCESS;
+}
+
index 82787a83cedde52e8657b097f30b5ff7ec8321aa..b5796e35ee31600e37afebe304d54b1229c8d7df 100644 (file)
@@ -101,7 +101,7 @@ static inline void
 vm_mem_bootstrap_log(const char *message)
 {
 //     kprintf("vm_mem_bootstrap: %s\n", message);
-       kernel_debug_string(message);
+       kernel_debug_string_simple(message);
 }
 
 /*
@@ -153,7 +153,7 @@ vm_mem_bootstrap(void)
 
        if (kmapoff_pgcnt > 0 &&
            vm_allocate(kernel_map, &kmapoff_kaddr,
-           kmapoff_pgcnt * PAGE_SIZE_64, VM_FLAGS_ANYWHERE) != KERN_SUCCESS)
+           kmapoff_pgcnt * PAGE_SIZE_64, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK)) != KERN_SUCCESS)
                panic("cannot vm_allocate %u kernel_map pages", kmapoff_pgcnt);
 
        vm_mem_bootstrap_log("pmap_init");
index 5dcffd7402bc6a60a310aa2095c6d595009c4ee4..d015ebd2c7cc79e1728f7fc89411875777cd5feb 100644 (file)
@@ -97,13 +97,6 @@ extern kern_return_t kmem_alloc_pages(
        register vm_object_offset_t     offset,
        register vm_object_size_t       size);
 
-extern void kmem_remap_pages(
-       register vm_object_t            object,
-       register vm_object_offset_t     offset,
-       register vm_offset_t            start,
-       register vm_offset_t            end,
-       vm_prot_t                       protection);
-
 kern_return_t
 kmem_alloc_contig(
        vm_map_t                map,
@@ -112,7 +105,8 @@ kmem_alloc_contig(
        vm_offset_t             mask,
        ppnum_t                 max_pnum,
        ppnum_t                 pnum_mask,
-       int                     flags)
+       int                     flags,
+       vm_tag_t                tag)
 {
        vm_object_t             object;
        vm_object_offset_t      offset;
@@ -154,9 +148,14 @@ kmem_alloc_contig(
                return kr;
        }
 
-       entry->object.vm_object = object;
-       entry->offset = offset = (object == kernel_object) ? 
-                       map_addr : 0;
+       if (object == kernel_object) {
+               offset = map_addr;
+       } else {
+               offset = 0;
+       }
+       VME_OBJECT_SET(entry, object);
+       VME_OFFSET_SET(entry, offset);
+       VME_ALIAS_SET(entry, tag);
 
        /* Take an extra object ref in case the map entry gets deleted */
        vm_object_reference(object);
@@ -191,8 +190,9 @@ kmem_alloc_contig(
                                           VM_MAP_PAGE_MASK(map)),
                         vm_map_round_page(map_addr + map_size,
                                           VM_MAP_PAGE_MASK(map)),
-                        VM_PROT_DEFAULT,
+                        VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(tag),
                         FALSE);
+
        if (kr != KERN_SUCCESS) {
                if (object == kernel_object) {
                        vm_object_lock(object);
@@ -241,7 +241,8 @@ kernel_memory_allocate(
        register vm_offset_t    *addrp,
        register vm_size_t      size,
        register vm_offset_t    mask,
-       int                     flags)
+       int                     flags,
+       vm_tag_t                tag)
 {
        vm_object_t             object;
        vm_object_offset_t      offset;
@@ -267,7 +268,8 @@ kernel_memory_allocate(
        map_size = vm_map_round_page(size,
                                     VM_MAP_PAGE_MASK(map));
        map_mask = (vm_map_offset_t) mask;
-       vm_alloc_flags = 0;
+
+       vm_alloc_flags = VM_MAKE_TAG(tag);
 
        /* Check for zero allocation size (either directly or via overflow) */
        if (map_size == 0) {
@@ -281,7 +283,7 @@ kernel_memory_allocate(
         * too many pages get wired down
         * limit raised to 2GB with 128GB max physical limit
         */
-        if (map_size > (1ULL << 31)) {
+        if ( !(flags & KMA_VAONLY) && map_size > (1ULL << 31)) {
                 return KERN_RESOURCE_SHORTAGE;
         }
 
@@ -399,9 +401,13 @@ kernel_memory_allocate(
                goto out;
        }
 
-       entry->object.vm_object = object;
-       entry->offset = offset = (object == kernel_object || object == compressor_object) ? 
-                       map_addr : 0;
+       if (object == kernel_object || object == compressor_object) {
+               offset = map_addr;
+       } else {
+               offset = 0;
+       }
+       VME_OBJECT_SET(entry, object);
+       VME_OFFSET_SET(entry, offset);
        
        if (object != compressor_object)
                entry->wired_count++;
@@ -445,7 +451,7 @@ kernel_memory_allocate(
                mem->pageq.next = NULL;
                mem->wire_count++;
 
-               vm_page_insert(mem, object, offset + pg_offset);
+               vm_page_insert_wired(mem, object, offset + pg_offset, tag);
 
                mem->busy = FALSE;
                mem->pmapped = TRUE;
@@ -522,7 +528,8 @@ kernel_memory_populate(
        vm_map_t        map,
        vm_offset_t     addr,
        vm_size_t       size,
-       int             flags)
+       int             flags,
+       vm_tag_t        tag)
 {
        vm_object_t             object;
        vm_object_offset_t      offset, pg_offset;
@@ -538,7 +545,9 @@ kernel_memory_populate(
 
        if (flags & KMA_COMPRESSOR) {
 
-               for (i = 0; i < page_count; i++) {
+               pg_offset = page_count * PAGE_SIZE_64;
+
+               do {
                        for (;;) {
                                mem = vm_page_grab();
 
@@ -549,7 +558,17 @@ kernel_memory_populate(
                        }
                        mem->pageq.next = (queue_entry_t) page_list;
                        page_list = mem;
-               }
+
+                       pg_offset -= PAGE_SIZE_64;
+
+                       kr = pmap_enter_options(kernel_pmap,
+                                                 addr + pg_offset, mem->phys_page,
+                                                 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
+                                                 PMAP_OPTIONS_INTERNAL, NULL);
+                       assert(kr == KERN_SUCCESS);
+
+               } while (pg_offset);
+
                offset = addr;
                object = compressor_object;
 
@@ -566,19 +585,6 @@ kernel_memory_populate(
                        vm_page_insert(mem, object, offset + pg_offset);
                        assert(mem->busy);
 
-                       PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
-                                          VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
-                                          0, TRUE, PMAP_OPTIONS_NOWAIT, pe_result);
-
-                       if (pe_result == KERN_RESOURCE_SHORTAGE) {
-
-                               vm_object_unlock(object);
-
-                               PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
-                                          VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
-
-                               vm_object_lock(object);
-                       }
                        mem->busy = FALSE;
                        mem->pmapped = TRUE;
                        mem->wpmapped = TRUE;
@@ -645,7 +651,7 @@ kernel_memory_populate(
 
                mem->wire_count++;
 
-               vm_page_insert(mem, object, offset + pg_offset);
+               vm_page_insert_wired(mem, object, offset + pg_offset, tag);
 
                mem->busy = FALSE;
                mem->pmapped = TRUE;
@@ -762,12 +768,22 @@ kernel_memory_depopulate(
  */
 
 kern_return_t
-kmem_alloc(
+kmem_alloc_external(
        vm_map_t        map,
        vm_offset_t     *addrp,
        vm_size_t       size)
 {
-       kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0);
+    return (kmem_alloc(map, addrp, size, vm_tag_bt()));
+}
+
+kern_return_t
+kmem_alloc(
+       vm_map_t        map,
+       vm_offset_t     *addrp,
+       vm_size_t       size,
+       vm_tag_t        tag)
+{
+       kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0, tag);
        TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
        return kr;
 }
@@ -788,7 +804,8 @@ kmem_realloc(
        vm_offset_t             oldaddr,
        vm_size_t               oldsize,
        vm_offset_t             *newaddrp,
-       vm_size_t               newsize)
+       vm_size_t               newsize,
+       vm_tag_t                tag)
 {
        vm_object_t             object;
        vm_object_offset_t      offset;
@@ -819,7 +836,7 @@ kmem_realloc(
 
        if (!vm_map_lookup_entry(map, oldmapmin, &oldentry))
                panic("kmem_realloc");
-       object = oldentry->object.vm_object;
+       object = VME_OBJECT(oldentry);
 
        /*
         *      Increase the size of the object and
@@ -861,9 +878,10 @@ kmem_realloc(
                vm_object_deallocate(object);
                return kr;
        }
-       newentry->object.vm_object = object;
-       newentry->offset = 0;
-       assert (newentry->wired_count == 0);
+       VME_OBJECT_SET(newentry, object);
+       VME_OFFSET_SET(newentry, 0);
+       VME_ALIAS_SET(newentry, tag);
+       assert(newentry->wired_count == 0);
 
        
        /* add an extra reference in case we have someone doing an */
@@ -871,7 +889,8 @@ kmem_realloc(
        vm_object_reference(object);
        vm_map_unlock(map);
 
-       kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE);
+       kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize,
+                        VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(tag), FALSE);
        if (KERN_SUCCESS != kr) {
                vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0);
                vm_object_lock(object);
@@ -903,12 +922,22 @@ kmem_realloc(
  */
 
 kern_return_t
-kmem_alloc_kobject(
+kmem_alloc_kobject_external(
        vm_map_t        map,
        vm_offset_t     *addrp,
        vm_size_t       size)
 {
-       return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT);
+    return (kmem_alloc_kobject(map, addrp, size, vm_tag_bt()));
+}
+
+kern_return_t
+kmem_alloc_kobject(
+       vm_map_t        map,
+       vm_offset_t     *addrp,
+       vm_size_t       size,
+       vm_tag_t        tag)
+{
+       return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
 }
 
 /*
@@ -922,11 +951,12 @@ kern_return_t
 kmem_alloc_aligned(
        vm_map_t        map,
        vm_offset_t     *addrp,
-       vm_size_t       size)
+       vm_size_t       size,
+       vm_tag_t        tag)
 {
        if ((size & (size - 1)) != 0)
                panic("kmem_alloc_aligned: size not aligned");
-       return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT);
+       return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
 }
 
 /*
@@ -936,10 +966,20 @@ kmem_alloc_aligned(
  */
 
 kern_return_t
-kmem_alloc_pageable(
+kmem_alloc_pageable_external(
        vm_map_t        map,
        vm_offset_t     *addrp,
        vm_size_t       size)
+{
+    return (kmem_alloc_pageable(map, addrp, size, vm_tag_bt()));
+}
+
+kern_return_t
+kmem_alloc_pageable(
+       vm_map_t        map,
+       vm_offset_t     *addrp,
+       vm_size_t       size,
+       vm_tag_t        tag)
 {
        vm_map_offset_t map_addr;
        vm_map_size_t   map_size;
@@ -954,7 +994,8 @@ kmem_alloc_pageable(
                                     VM_MAP_PAGE_MASK(map));
 
        kr = vm_map_enter(map, &map_addr, map_size,
-                         (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
+                         (vm_map_offset_t) 0, 
+                         VM_FLAGS_ANYWHERE | VM_MAKE_TAG(tag),
                          VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
                          VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
 
@@ -1038,75 +1079,6 @@ kmem_alloc_pages(
        return KERN_SUCCESS;
 }
 
-/*
- *     Remap wired pages in an object into a new region.
- *     The object is assumed to be mapped into the kernel map or
- *     a submap.
- */
-void
-kmem_remap_pages(
-       register vm_object_t            object,
-       register vm_object_offset_t     offset,
-       register vm_offset_t            start,
-       register vm_offset_t            end,
-       vm_prot_t                       protection)
-{
-
-       vm_map_offset_t                 map_start;
-       vm_map_offset_t                 map_end;
-
-       /*
-        *      Mark the pmap region as not pageable.
-        */
-       map_start = vm_map_trunc_page(start,
-                                     VM_MAP_PAGE_MASK(kernel_map));
-       map_end = vm_map_round_page(end,
-                                   VM_MAP_PAGE_MASK(kernel_map));
-
-       pmap_pageable(kernel_pmap, map_start, map_end, FALSE);
-
-       while (map_start < map_end) {
-           register vm_page_t  mem;
-
-           vm_object_lock(object);
-
-           /*
-            *  Find a page
-            */
-           if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
-               panic("kmem_remap_pages");
-
-           /*
-            *  Wire it down (again)
-            */
-           vm_page_lockspin_queues();
-           vm_page_wire(mem);
-           vm_page_unlock_queues();
-           vm_object_unlock(object);
-
-           /*
-            * ENCRYPTED SWAP:
-            * The page is supposed to be wired now, so it
-            * shouldn't be encrypted at this point.  It can
-            * safely be entered in the page table.
-            */
-           ASSERT_PAGE_DECRYPTED(mem);
-
-           /*
-            *  Enter it in the kernel pmap.  The page isn't busy,
-            *  but this shouldn't be a problem because it is wired.
-            */
-
-           mem->pmapped = TRUE;
-           mem->wpmapped = TRUE;
-
-           PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE);
-
-           map_start += PAGE_SIZE;
-           offset += PAGE_SIZE;
-       }
-}
-
 /*
  *     kmem_suballoc:
  *
@@ -1341,7 +1313,7 @@ vm_conflict_check(
        while(vm_map_lookup_entry(map, off, &entry)) {
                local_len = len;
 
-               if (entry->object.vm_object == VM_OBJECT_NULL) {
+               if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
                        vm_map_unlock(map);
                        return KERN_SUCCESS;
                }
@@ -1349,14 +1321,14 @@ vm_conflict_check(
                        vm_map_t        old_map;
 
                        old_map = map;
-                       vm_map_lock(entry->object.sub_map);
-                       map = entry->object.sub_map;
-                       off = entry->offset + (off - entry->vme_start);
+                       vm_map_lock(VME_SUBMAP(entry));
+                       map = VME_SUBMAP(entry);
+                       off = VME_OFFSET(entry) + (off - entry->vme_start);
                        vm_map_unlock(old_map);
                        continue;
                }
-               obj = entry->object.vm_object;
-               obj_off = (off - entry->vme_start) + entry->offset;
+               obj = VME_OBJECT(entry);
+               obj_off = (off - entry->vme_start) + VME_OFFSET(entry);
                while(obj->shadow) {
                        obj_off += obj->vo_shadow_offset;
                        obj = obj->shadow;
@@ -1428,3 +1400,57 @@ vm_conflict_check(
        vm_map_unlock(map);
        return kr;
 }
+
+/*
+ *
+ *     The following two functions are to be used when exposing kernel
+ *     addresses to userspace via any of the various debug or info
+ *     facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
+ *     and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
+ *     are exported to KEXTs.
+ *
+ *     NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
+ */
+
+/*
+ *     vm_kernel_addrperm_external:
+ *
+ *     Used when exposing an address to userspace which is in the kernel's
+ *     "heap". These addresses are not loaded from anywhere and are resultingly
+ *     unslid. We apply a permutation value to obscure the address.
+ */
+void
+vm_kernel_addrperm_external(
+       vm_offset_t addr,
+       vm_offset_t *perm_addr)
+{
+       if (addr == 0) {
+               *perm_addr = 0;
+               return;
+       }
+
+       *perm_addr = (addr + vm_kernel_addrperm_ext);
+       return;
+}
+
+/*
+ *     vm_kernel_unslide_or_perm_external:
+ *
+ *     Use this macro when exposing an address to userspace that could come from
+ *     either kernel text/data *or* the heap.
+ */
+void
+vm_kernel_unslide_or_perm_external(
+       vm_offset_t addr,
+       vm_offset_t *up_addr)
+{
+       if (VM_KERNEL_IS_SLID(addr) || VM_KERNEL_IS_KEXT(addr) ||
+        VM_KERNEL_IS_PRELINKTEXT(addr) || VM_KERNEL_IS_PRELINKINFO(addr) ||
+        VM_KERNEL_IS_KEXT_LINKEDIT(addr)) {
+               *up_addr = addr - vm_kernel_slide;
+               return;
+       }
+
+       vm_kernel_addrperm_external(addr, up_addr);
+       return;
+}
index 6fd232e46eece41a4a206d8ac692826f5b78e8b4..435dae135bafdee461efe51595216745814dd057 100644 (file)
 
 #ifdef KERNEL_PRIVATE
 
+#ifdef XNU_KERNEL_PRIVATE
+
 extern kern_return_t   kernel_memory_allocate(
                                vm_map_t        map,
                                vm_offset_t     *addrp,
                                vm_size_t       size,
                                vm_offset_t     mask,
-                               int             flags);
+                               int             flags,
+                               vm_tag_t        tag);
 
 /* flags for kernel_memory_allocate */
 #define KMA_HERE       0x01
@@ -99,29 +102,34 @@ extern kern_return_t kmem_alloc_contig(
                                vm_offset_t     mask,
                                ppnum_t         max_pnum,
                                ppnum_t         pnum_mask,
-                               int             flags);
+                               int             flags,
+                               vm_tag_t        tag);
 
 extern kern_return_t   kmem_alloc(
                                vm_map_t        map,
                                vm_offset_t     *addrp,
-                               vm_size_t       size);
+                               vm_size_t       size,
+                               vm_tag_t        tag);
 
 extern kern_return_t   kmem_alloc_pageable(
                                vm_map_t        map,
                                vm_offset_t     *addrp,
-                               vm_size_t       size);
+                               vm_size_t       size,
+                               vm_tag_t        tag);
 
 extern kern_return_t   kmem_alloc_aligned(
                                vm_map_t        map,
                                vm_offset_t     *addrp,
-                               vm_size_t       size);
+                               vm_size_t       size,
+                               vm_tag_t        tag);
 
 extern kern_return_t   kmem_realloc(
                                vm_map_t        map,
                                vm_offset_t     oldaddr,
                                vm_size_t       oldsize,
                                vm_offset_t     *newaddrp,
-                               vm_size_t       newsize);
+                               vm_size_t       newsize,
+                               vm_tag_t        tag);
 
 extern void            kmem_free(
                                vm_map_t        map,
@@ -136,23 +144,75 @@ extern kern_return_t      kmem_suballoc(
                                int             flags,
                                vm_map_t        *new_map);
 
-
 extern kern_return_t   kmem_alloc_kobject(
                                vm_map_t        map,
                                vm_offset_t     *addrp,
-                               vm_size_t       size);
+                               vm_size_t       size,
+                               vm_tag_t        tag);
 
 extern kern_return_t kernel_memory_populate(
        vm_map_t        map,
        vm_offset_t     addr,
        vm_size_t       size,
-       int             flags);
+       int             flags,
+       vm_tag_t        tag);
+
 extern void kernel_memory_depopulate(
        vm_map_t        map,
        vm_offset_t     addr,
        vm_size_t       size,
        int             flags);
 
+extern kern_return_t   memory_object_iopl_request(
+       ipc_port_t              port,
+       memory_object_offset_t  offset,
+       upl_size_t              *upl_size,
+       upl_t                   *upl_ptr,
+       upl_page_info_array_t   user_page_list,
+       unsigned int            *page_list_count,
+       upl_control_flags_t     *flags);
+
+struct mach_memory_info;
+extern kern_return_t   vm_page_diagnose(struct mach_memory_info * sites, 
+                                        unsigned int num_sites);
+
+extern vm_tag_t        vm_tag_bt(void);
+
+extern vm_tag_t                vm_tag_alloc(vm_allocation_site_t * site);
+
+extern void            vm_tag_alloc_locked(vm_allocation_site_t * site);
+
+extern vm_tag_t        vm_tag_bt_debug(void);
+
+extern boolean_t       vm_kernel_map_is_kernel(vm_map_t map);
+
+extern ppnum_t         kernel_pmap_present_mapping(uint64_t vaddr, uint64_t * pvincr);
+
+#else /* XNU_KERNEL_PRIVATE */
+
+extern kern_return_t   kmem_alloc(
+                               vm_map_t        map,
+                               vm_offset_t     *addrp,
+                               vm_size_t       size);
+
+extern kern_return_t   kmem_alloc_pageable(
+                               vm_map_t        map,
+                               vm_offset_t     *addrp,
+                               vm_size_t       size);
+
+extern kern_return_t   kmem_alloc_kobject(
+                               vm_map_t        map,
+                               vm_offset_t     *addrp,
+                               vm_size_t       size);
+
+extern void            kmem_free(
+                               vm_map_t        map,
+                               vm_offset_t     addr,
+                               vm_size_t       size);
+
+#endif /* !XNU_KERNEL_PRIVATE */
+
+
 #ifdef MACH_KERNEL_PRIVATE
 
 extern void            kmem_init(
@@ -179,6 +239,21 @@ extern kern_return_t       vm_conflict_check(
                                memory_object_t         pager,
                                vm_object_offset_t      file_off);
 
+extern kern_return_t   kmem_alloc_external(
+                               vm_map_t        map,
+                               vm_offset_t     *addrp,
+                               vm_size_t       size);
+
+extern kern_return_t   kmem_alloc_kobject_external(
+                               vm_map_t        map,
+                               vm_offset_t     *addrp,
+                               vm_size_t       size);
+
+extern kern_return_t   kmem_alloc_pageable_external(
+                               vm_map_t        map,
+                               vm_offset_t     *addrp,
+                               vm_size_t       size);
+
 #endif /* MACH_KERNEL_PRIVATE */
 
 extern vm_map_t        kernel_map;
@@ -187,4 +262,16 @@ extern vm_map_t ipc_kernel_map;
 
 #endif /* KERNEL_PRIVATE */
 
+#ifdef KERNEL
+
+extern vm_offset_t vm_kernel_addrperm_ext;
+
+extern void    vm_kernel_addrperm_external(
+                       vm_offset_t addr,
+                       vm_offset_t *perm_addr);
+
+extern void    vm_kernel_unslide_or_perm_external(
+               vm_offset_t addr,
+               vm_offset_t *up_addr);
+#endif /* KERNEL */
 #endif /* _VM_VM_KERN_H_ */
index 58a8c7828aec7159f3e5b1d1d90509269ae77960..f0a560f067d8a0324229a1534f7a95a57e791082 100644 (file)
 #include <vm/vm_shared_region.h>
 #include <vm/vm_map_store.h>
 
+
 extern u_int32_t random(void); /* from <libkern/libkern.h> */
 /* Internal prototypes
  */
@@ -216,7 +217,7 @@ static kern_return_t        vm_map_wire_nested(
        vm_map_t                   map,
        vm_map_offset_t            start,
        vm_map_offset_t            end,
-       vm_prot_t                  access_type,
+       vm_prot_t                  caller_prot,
        boolean_t                  user_wire,
        pmap_t                     map_pmap, 
        vm_map_offset_t            pmap_addr,
@@ -297,6 +298,12 @@ static kern_return_t       vm_map_can_reuse(
        vm_map_offset_t start,
        vm_map_offset_t end);
 
+#if MACH_ASSERT
+static kern_return_t   vm_map_pageout(
+       vm_map_t        map,
+       vm_map_offset_t start,
+       vm_map_offset_t end);
+#endif /* MACH_ASSERT */
 
 /*
  * Macros to copy a vm_map_entry. We must be careful to correctly
@@ -321,6 +328,8 @@ boolean_t _vmec_reserved = (NEW)->from_reserved_zone;       \
        (NEW)->used_for_jit = FALSE;    \
        (NEW)->from_reserved_zone = _vmec_reserved;     \
        (NEW)->iokit_acct = FALSE;      \
+       (NEW)->vme_resilient_codesign = FALSE; \
+       (NEW)->vme_resilient_media = FALSE;     \
 MACRO_END
 
 #define vm_map_entry_copy_full(NEW,OLD)                        \
@@ -367,6 +376,8 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 {
        int current_abi;
 
+       if (map->pmap == kernel_pmap) return FALSE;
+
        /*
         * Determine if the app is running in 32 or 64 bit mode.
         */
@@ -436,6 +447,7 @@ static zone_t       vm_map_entry_zone;      /* zone for vm_map_entry structures */
 static zone_t  vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
                                         * allocations */
 static zone_t  vm_map_copy_zone;       /* zone for vm_map_copy structures */
+zone_t         vm_map_holes_zone;      /* zone for vm map holes (vm_map_links) structures */
 
 
 /*
@@ -450,6 +462,8 @@ static void         *map_data;
 static vm_size_t       map_data_size;
 static void            *kentry_data;
 static vm_size_t       kentry_data_size;
+static void            *map_holes_data;
+static vm_size_t       map_holes_data_size;
 
 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 
@@ -477,7 +491,7 @@ vm_map_set_cache_attr(
                kr = KERN_INVALID_ARGUMENT;
                goto done;
        }
-       object = map_entry->object.vm_object;
+       object = VME_OBJECT(map_entry);
 
        if (object == VM_OBJECT_NULL) {
                /*
@@ -509,87 +523,178 @@ done:
  */
 kern_return_t
 vm_map_apple_protected(
-       vm_map_t        map,
-       vm_map_offset_t start,
-       vm_map_offset_t end,
+       vm_map_t                map,
+       vm_map_offset_t         start,
+       vm_map_offset_t         end,
+       vm_object_offset_t      crypto_backing_offset,
        struct pager_crypt_info *crypt_info)
 {
        boolean_t       map_locked;
        kern_return_t   kr;
        vm_map_entry_t  map_entry;
-       memory_object_t protected_mem_obj;
+       struct vm_map_entry tmp_entry;
+       memory_object_t unprotected_mem_obj;
        vm_object_t     protected_object;
        vm_map_offset_t map_addr;
+       vm_map_offset_t start_aligned, end_aligned;
+       vm_object_offset_t      crypto_start, crypto_end;
+       int             vm_flags;
 
-       vm_map_lock_read(map);
-       map_locked = TRUE;
-
-       /* lookup the protected VM object */
-       if (!vm_map_lookup_entry(map,
-                                start,
-                                &map_entry) ||
-           map_entry->vme_end < end ||
-           map_entry->is_sub_map ||
-           !(map_entry->protection & VM_PROT_EXECUTE)) {
-               /* that memory is not properly mapped */
-               kr = KERN_INVALID_ARGUMENT;
-               goto done;
-       }
-       protected_object = map_entry->object.vm_object;
-       if (protected_object == VM_OBJECT_NULL) {
-               /* there should be a VM object here at this point */
-               kr = KERN_INVALID_ARGUMENT;
-               goto done;
-       }
+       map_locked = FALSE;
+       unprotected_mem_obj = MEMORY_OBJECT_NULL;
 
-       /* make sure protected object stays alive while map is unlocked */
-       vm_object_reference(protected_object);
+       start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
+       end_aligned = vm_map_round_page(end, PAGE_MASK_64);
+       start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
+       end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 
-       vm_map_unlock_read(map);
-       map_locked = FALSE;
+       assert(start_aligned == start);
+       assert(end_aligned == end);
 
-       /*
-        * Lookup (and create if necessary) the protected memory object
-        * matching that VM object.
-        * If successful, this also grabs a reference on the memory object,
-        * to guarantee that it doesn't go away before we get a chance to map
-        * it.
-        */
-       protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
+       map_addr = start_aligned;
+       for (map_addr = start_aligned;
+            map_addr < end;
+            map_addr = tmp_entry.vme_end) {
+               vm_map_lock(map);
+               map_locked = TRUE;
 
-       /* release extra ref on protected object */
-       vm_object_deallocate(protected_object);
+               /* lookup the protected VM object */
+               if (!vm_map_lookup_entry(map,
+                                        map_addr,
+                                        &map_entry) ||
+                   map_entry->is_sub_map ||
+                   VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
+                   !(map_entry->protection & VM_PROT_EXECUTE)) {
+                       /* that memory is not properly mapped */
+                       kr = KERN_INVALID_ARGUMENT;
+                       goto done;
+               }
 
-       if (protected_mem_obj == NULL) {
-               kr = KERN_FAILURE;
-               goto done;
-       }
+               /* get the protected object to be decrypted */
+               protected_object = VME_OBJECT(map_entry);
+               if (protected_object == VM_OBJECT_NULL) {
+                       /* there should be a VM object here at this point */
+                       kr = KERN_INVALID_ARGUMENT;
+                       goto done;
+               }
+               /* ensure protected object stays alive while map is unlocked */
+               vm_object_reference(protected_object);
+
+               /* limit the map entry to the area we want to cover */
+               vm_map_clip_start(map, map_entry, start_aligned);
+               vm_map_clip_end(map, map_entry, end_aligned);
+
+               tmp_entry = *map_entry;
+               map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
+               vm_map_unlock(map);
+               map_locked = FALSE;
+
+               /*
+                * This map entry might be only partially encrypted
+                * (if not fully "page-aligned").
+                */
+               crypto_start = 0;
+               crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
+               if (tmp_entry.vme_start < start) {
+                       if (tmp_entry.vme_start != start_aligned) {
+                               kr = KERN_INVALID_ADDRESS;
+                       }
+                       crypto_start += (start - tmp_entry.vme_start);
+               }
+               if (tmp_entry.vme_end > end) {
+                       if (tmp_entry.vme_end != end_aligned) {
+                               kr = KERN_INVALID_ADDRESS;
+                       }
+                       crypto_end -= (tmp_entry.vme_end - end);
+               }
+
+               /*
+                * This "extra backing offset" is needed to get the decryption
+                * routine to use the right key.  It adjusts for the possibly
+                * relative offset of an interposed "4K" pager...
+                */
+               if (crypto_backing_offset == (vm_object_offset_t) -1) {
+                       crypto_backing_offset = VME_OFFSET(&tmp_entry);
+               }
 
-       /* map this memory object in place of the current one */
-       map_addr = start;
-       kr = vm_map_enter_mem_object(map,
-                                    &map_addr,
-                                    end - start,
-                                    (mach_vm_offset_t) 0,
-                                    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
-                                    (ipc_port_t) protected_mem_obj,
-                                    (map_entry->offset +
-                                     (start - map_entry->vme_start)),
-                                    TRUE,
-                                    map_entry->protection,
-                                    map_entry->max_protection,
-                                    map_entry->inheritance);
-       assert(map_addr == start);
-       /*
-        * Release the reference obtained by apple_protect_pager_setup().
-        * The mapping (if it succeeded) is now holding a reference on the
-        * memory object.
-        */
-       memory_object_deallocate(protected_mem_obj);
+               /*
+                * Lookup (and create if necessary) the protected memory object
+                * matching that VM object.
+                * If successful, this also grabs a reference on the memory object,
+                * to guarantee that it doesn't go away before we get a chance to map
+                * it.
+                */
+               unprotected_mem_obj = apple_protect_pager_setup(
+                       protected_object,
+                       VME_OFFSET(&tmp_entry),
+                       crypto_backing_offset,
+                       crypt_info,
+                       crypto_start,
+                       crypto_end);
+
+               /* release extra ref on protected object */
+               vm_object_deallocate(protected_object);
+
+               if (unprotected_mem_obj == NULL) {
+                       kr = KERN_FAILURE;
+                       goto done;
+               }
+
+               vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
+
+               /* map this memory object in place of the current one */
+               map_addr = tmp_entry.vme_start;
+               kr = vm_map_enter_mem_object(map,
+                                            &map_addr,
+                                            (tmp_entry.vme_end -
+                                             tmp_entry.vme_start),
+                                            (mach_vm_offset_t) 0,
+                                            vm_flags,
+                                            (ipc_port_t) unprotected_mem_obj,
+                                            0,
+                                            TRUE,
+                                            tmp_entry.protection,
+                                            tmp_entry.max_protection,
+                                            tmp_entry.inheritance);
+               assert(kr == KERN_SUCCESS);
+               assert(map_addr == tmp_entry.vme_start);
+
+#if VM_MAP_DEBUG_APPLE_PROTECT
+               printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: "
+                      "backing:[object:%p,offset:0x%llx,"
+                      "crypto_backing_offset:0x%llx,"
+                      "crypto_start:0x%llx,crypto_end:0x%llx]\n",
+                      map,
+                      (uint64_t) map_addr,
+                      (uint64_t) (map_addr + (tmp_entry.vme_end -
+                                              tmp_entry.vme_start)),
+                      unprotected_mem_obj,
+                      protected_object,
+                      VME_OFFSET(&tmp_entry),
+                      crypto_backing_offset,
+                      crypto_start,
+                      crypto_end);
+#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
+                      
+               /*
+                * Release the reference obtained by
+                * apple_protect_pager_setup().
+                * The mapping (if it succeeded) is now holding a reference on
+                * the memory object.
+                */
+               memory_object_deallocate(unprotected_mem_obj);
+               unprotected_mem_obj = MEMORY_OBJECT_NULL;
+
+               /* continue with next map entry */
+               crypto_backing_offset += (tmp_entry.vme_end -
+                                         tmp_entry.vme_start);
+               crypto_backing_offset -= crypto_start;
+       }
+       kr = KERN_SUCCESS;
 
 done:
        if (map_locked) {
-               vm_map_unlock_read(map);
+               vm_map_unlock(map);
        }
        return kr;
 }
@@ -654,6 +759,10 @@ vm_map_init(
                                 16*1024, PAGE_SIZE, "VM map copies");
        zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 
+       vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
+                                16*1024, PAGE_SIZE, "VM map holes");
+       zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
+
        /*
         *      Cram the map and kentry zones with initial data.
         *      Set reserved_zone non-collectible to aid zone_gc().
@@ -668,9 +777,21 @@ vm_map_init(
        zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
        zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 
+       zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
+       zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
+       zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
+       zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
+       zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
+       zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
+
+       /* 
+        * Add the stolen memory to zones, adjust zone size and stolen counts.
+        */
        zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
        zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
-       
+       zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
+       VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
+
        lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
        lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
        lck_attr_setdefault(&vm_map_lck_attr);  
@@ -714,10 +835,52 @@ vm_map_steal_memory(
 
        kentry_data_size = kentry_initial_pages * PAGE_SIZE;
        kentry_data = pmap_steal_memory(kentry_data_size);
+
+       map_holes_data_size = kentry_data_size;
+       map_holes_data = pmap_steal_memory(map_holes_data_size);
 }
 
-void vm_kernel_reserved_entry_init(void) {
+void
+vm_kernel_reserved_entry_init(void) {
        zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
+       zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
+}
+
+void
+vm_map_disable_hole_optimization(vm_map_t map)
+{
+       vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
+
+       if (map->holelistenabled) {
+
+               head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
+
+               while (hole_entry != NULL) {
+
+                       next_hole_entry = hole_entry->vme_next;
+
+                       hole_entry->vme_next = NULL;
+                       hole_entry->vme_prev = NULL;
+                       zfree(vm_map_holes_zone, hole_entry);
+
+                       if (next_hole_entry == head_entry) {
+                               hole_entry = NULL;
+                       } else {
+                               hole_entry = next_hole_entry;
+                       }
+               }
+
+               map->holes_list = NULL;
+               map->holelistenabled = FALSE;
+
+               map->first_free = vm_map_first_entry(map);
+               SAVE_HINT_HOLE_WRITE(map, NULL);
+       }
+}
+
+boolean_t
+vm_kernel_map_is_kernel(vm_map_t map) {
+       return (map->pmap == kernel_pmap);
 }
 
 /*
@@ -727,6 +890,9 @@ void vm_kernel_reserved_entry_init(void) {
  *     the given physical map structure, and having
  *     the given lower and upper address bounds.
  */
+
+boolean_t vm_map_supports_hole_optimization = TRUE;
+
 vm_map_t
 vm_map_create(
        pmap_t                  pmap,
@@ -736,6 +902,7 @@ vm_map_create(
 {
        static int              color_seed = 0;
        register vm_map_t       result;
+       struct vm_map_links     *hole_entry = NULL;
 
        result = (vm_map_t) zalloc(vm_map_zone);
        if (result == VM_MAP_NULL)
@@ -773,6 +940,21 @@ vm_map_create(
        result->hint = vm_map_to_entry(result);
        result->color_rr = (color_seed++) & vm_color_mask;
        result->jit_entry_exists = FALSE;
+
+       if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
+               hole_entry = zalloc(vm_map_holes_zone);
+
+               hole_entry->start = min;
+               hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
+               result->holes_list = result->hole_hint = hole_entry;
+               hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
+               result->holelistenabled = TRUE;
+
+       } else {
+
+               result->holelistenabled = FALSE;
+       }
+
 #if CONFIG_FREEZE
        result->default_freezer_handle = NULL;
 #endif
@@ -970,6 +1152,9 @@ vm_map_destroy(
 {      
        vm_map_lock(map);
 
+       /* final cleanup: no need to unnest shared region */
+       flags |= VM_MAP_REMOVE_NO_UNNESTING;
+
        /* clean up regular map entries */
        (void) vm_map_delete(map, map->min_offset, map->max_offset,
                             flags, VM_MAP_NULL);
@@ -983,6 +1168,7 @@ vm_map_destroy(
                map->default_freezer_handle = NULL;
        }
 #endif
+       vm_map_disable_hole_optimization(map);
        vm_map_unlock(map);
 
        assert(map->hdr.nentries == 0);
@@ -1088,14 +1274,14 @@ void vm_map_swapin (vm_map_t map)
        entry = vm_map_first_entry(map);
 
        while (entry != vm_map_to_entry(map)) {
-               if (entry->object.vm_object != VM_OBJECT_NULL) {
+               if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
                        if (entry->is_sub_map) {
-                               vm_map_t lmap = entry->object.sub_map;
+                               vm_map_t lmap = VME_SUBMAP(entry);
                                lck_mtx_lock(&lmap->s_lock);
                                vm_map_res_reference(lmap);
                                lck_mtx_unlock(&lmap->s_lock);
                        } else {
-                               vm_object_t object = entry->object.vm_object;
+                               vm_object_t object = VME_OBEJCT(entry);
                                vm_object_lock(object);
                                /*
                                 * This call may iterate through the
@@ -1149,14 +1335,14 @@ void vm_map_swapout(vm_map_t map)
        entry = vm_map_first_entry(map);
 
        while (entry != vm_map_to_entry(map)) {
-               if (entry->object.vm_object != VM_OBJECT_NULL) {
+               if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
                        if (entry->is_sub_map) {
-                               vm_map_t lmap = entry->object.sub_map;
+                               vm_map_t lmap = VME_SUBMAP(entry);
                                lck_mtx_lock(&lmap->s_lock);
                                vm_map_res_deallocate(lmap);
                                lck_mtx_unlock(&lmap->s_lock);
                        } else {
-                               vm_object_t object = entry->object.vm_object;
+                               vm_object_t object = VME_OBJECT(entry);
                                vm_object_lock(object);
                                /*
                                 * This call may take a long time, 
@@ -1217,9 +1403,10 @@ vm_map_find_space(
        int                     flags,
        vm_map_entry_t          *o_entry)       /* OUT */
 {
-       register vm_map_entry_t entry, new_entry;
+       vm_map_entry_t                  entry, new_entry;
        register vm_map_offset_t        start;
        register vm_map_offset_t        end;
+       vm_map_entry_t                  hole_entry;
 
        if (size == 0) {
                *address = 0;
@@ -1243,11 +1430,27 @@ vm_map_find_space(
        if( map->disable_vmentry_reuse == TRUE) {
                VM_MAP_HIGHEST_ENTRY(map, entry, start);
        } else {
-               assert(first_free_is_valid(map));
-               if ((entry = map->first_free) == vm_map_to_entry(map))
-                       start = map->min_offset;
-               else
-                       start = entry->vme_end;
+               if (map->holelistenabled) {
+                       hole_entry = (vm_map_entry_t)map->holes_list;
+
+                       if (hole_entry == NULL) {
+                               /*
+                                * No more space in the map?
+                                */
+                               vm_map_entry_dispose(map, new_entry);
+                               vm_map_unlock(map);
+                               return(KERN_NO_SPACE);
+                       }
+
+                       entry = hole_entry;
+                       start = entry->vme_start;
+               } else {
+                       assert(first_free_is_valid(map));
+                       if ((entry = map->first_free) == vm_map_to_entry(map))
+                               start = map->min_offset;
+                       else
+                               start = entry->vme_end;
+               }
        }
 
        /*
@@ -1284,28 +1487,53 @@ vm_map_find_space(
                        return(KERN_NO_SPACE);
                }
 
-               /*
-                *      If there are no more entries, we must win.
-                */
-
                next = entry->vme_next;
-               if (next == vm_map_to_entry(map))
-                       break;
 
-               /*
-                *      If there is another entry, it must be
-                *      after the end of the potential new region.
-                */
+               if (map->holelistenabled) {
+                       if (entry->vme_end >= end)
+                               break;
+               } else {
+                       /*
+                        *      If there are no more entries, we must win.
+                        *
+                        *      OR
+                        *
+                        *      If there is another entry, it must be
+                        *      after the end of the potential new region.
+                        */
 
-               if (next->vme_start >= end)
-                       break;
+                       if (next == vm_map_to_entry(map))
+                               break;
+
+                       if (next->vme_start >= end)
+                               break;
+               }
 
                /*
                 *      Didn't fit -- move to the next entry.
                 */
 
                entry = next;
-               start = entry->vme_end;
+
+               if (map->holelistenabled) {
+                       if (entry == (vm_map_entry_t) map->holes_list) {
+                               /*
+                                * Wrapped around
+                                */
+                               vm_map_entry_dispose(map, new_entry);
+                               vm_map_unlock(map);
+                               return(KERN_NO_SPACE);
+                       }
+                       start = entry->vme_start;
+               } else {
+                       start = entry->vme_end;
+               }
+       }
+
+       if (map->holelistenabled) {
+               if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
+                       panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
+               }
        }
 
        /*
@@ -1337,8 +1565,8 @@ vm_map_find_space(
        new_entry->is_shared = FALSE;
        new_entry->is_sub_map = FALSE;
        new_entry->use_pmap = TRUE;
-       new_entry->object.vm_object = VM_OBJECT_NULL;
-       new_entry->offset = (vm_object_offset_t) 0;
+       VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
+       VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
 
        new_entry->needs_copy = FALSE;
 
@@ -1360,13 +1588,15 @@ vm_map_find_space(
                new_entry->map_aligned = FALSE;
        }
 
-       new_entry->used_for_jit = 0;
-
-       new_entry->alias = 0;
+       new_entry->used_for_jit = FALSE;
        new_entry->zero_wired_pages = FALSE;
        new_entry->iokit_acct = FALSE;
+       new_entry->vme_resilient_codesign = FALSE;
+       new_entry->vme_resilient_media = FALSE;
 
-       VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
+       int alias;
+       VM_GET_FLAGS_ALIAS(flags, alias);
+       VME_ALIAS_SET(new_entry, alias);
 
        /*
         *      Insert the new entry into the list
@@ -1601,11 +1831,14 @@ vm_map_enter(
        boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
        boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
        boolean_t               iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
+       boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
+       boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
        unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
-       char                    alias;
+       vm_tag_t                alias, user_alias;
        vm_map_offset_t         effective_min_offset, effective_max_offset;
        kern_return_t           kr;
        boolean_t               clear_map_aligned = FALSE;
+       vm_map_entry_t          hole_entry;
 
        if (superpage_size) {
                switch (superpage_size) {
@@ -1634,6 +1867,13 @@ vm_map_enter(
 
 
 
+       if (resilient_codesign || resilient_media) {
+               if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
+                   (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
+                       return KERN_PROTECTION_FAILURE;
+               }
+       }
+
        if (is_submap) {
                if (purgable) {
                        /* submaps can not be purgeable */
@@ -1679,6 +1919,11 @@ vm_map_enter(
        }
 
        VM_GET_FLAGS_ALIAS(flags, alias);
+       if (map->pmap == kernel_pmap) {
+               user_alias = VM_KERN_MEMORY_NONE;
+       } else {
+               user_alias = alias;
+       }
 
 #define        RETURN(value)   { result = value; goto BailOut; }
 
@@ -1736,6 +1981,7 @@ vm_map_enter(
                                            *address + size,
                                            map->hdr.entries_pageable);
                vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
+               vm_map_disable_hole_optimization(zap_old_map);
        }
 
 StartAgain: ;
@@ -1780,41 +2026,82 @@ StartAgain: ;
                if( map->disable_vmentry_reuse == TRUE) {
                        VM_MAP_HIGHEST_ENTRY(map, entry, start);
                } else {
-                       assert(first_free_is_valid(map));
 
-                       entry = map->first_free;
+                       if (map->holelistenabled) {
+                               hole_entry = (vm_map_entry_t)map->holes_list;
+
+                               if (hole_entry == NULL) {
+                                       /*
+                                        * No more space in the map?
+                                        */
+                                       result = KERN_NO_SPACE;
+                                       goto BailOut;
+                               } else {
+
+                                       boolean_t found_hole = FALSE;
+
+                                       do {
+                                               if (hole_entry->vme_start >= start) {
+                                                       start = hole_entry->vme_start;
+                                                       found_hole = TRUE;
+                                                       break;
+                                               }
+
+                                               if (hole_entry->vme_end > start) {
+                                                       found_hole = TRUE;
+                                                       break;
+                                               }
+                                               hole_entry = hole_entry->vme_next;
+
+                                       } while (hole_entry != (vm_map_entry_t) map->holes_list);
+
+                                       if (found_hole == FALSE) {
+                                               result = KERN_NO_SPACE;
+                                               goto BailOut;
+                                       }
+
+                                       entry = hole_entry;
 
-                       if (entry == vm_map_to_entry(map)) {
-                               entry = NULL;
+                                       if (start == 0)
+                                               start += PAGE_SIZE_64;
+                               }
                        } else {
-                              if (entry->vme_next == vm_map_to_entry(map)){
-                                      /*
-                                       * Hole at the end of the map.
-                                       */
+                               assert(first_free_is_valid(map));
+
+                               entry = map->first_free;
+
+                               if (entry == vm_map_to_entry(map)) {
                                        entry = NULL;
-                              } else {
-                                       if (start < (entry->vme_next)->vme_start ) {
-                                               start = entry->vme_end;
+                               } else {
+                                      if (entry->vme_next == vm_map_to_entry(map)){
+                                              /*
+                                               * Hole at the end of the map.
+                                               */
+                                               entry = NULL;
+                                      } else {
+                                               if (start < (entry->vme_next)->vme_start ) {
+                                                       start = entry->vme_end;
+                                                       start = vm_map_round_page(start,
+                                                                                 VM_MAP_PAGE_MASK(map));
+                                               } else {
+                                                       /*
+                                                        * Need to do a lookup.
+                                                        */
+                                                       entry = NULL;
+                                               }
+                                      }
+                               }
+
+                               if (entry == NULL) {
+                                       vm_map_entry_t  tmp_entry;
+                                       if (vm_map_lookup_entry(map, start, &tmp_entry)) {
+                                               assert(!entry_for_jit);
+                                               start = tmp_entry->vme_end;
                                                start = vm_map_round_page(start,
                                                                          VM_MAP_PAGE_MASK(map));
-                                       } else {
-                                               /*
-                                                * Need to do a lookup.
-                                                */
-                                               entry = NULL;
                                        }
-                              }
-                       }
-
-                       if (entry == NULL) {
-                               vm_map_entry_t  tmp_entry;
-                               if (vm_map_lookup_entry(map, start, &tmp_entry)) {
-                                       assert(!entry_for_jit);
-                                       start = tmp_entry->vme_end;
-                                       start = vm_map_round_page(start,
-                                                                 VM_MAP_PAGE_MASK(map));
+                                       entry = tmp_entry;
                                }
-                               entry = tmp_entry;
                        }
                }
 
@@ -1859,31 +2146,57 @@ StartAgain: ;
                                RETURN(KERN_NO_SPACE);
                        }
 
-                       /*
-                        *      If there are no more entries, we must win.
-                        */
-
                        next = entry->vme_next;
-                       if (next == vm_map_to_entry(map))
-                               break;
 
-                       /*
-                        *      If there is another entry, it must be
-                        *      after the end of the potential new region.
-                        */
+                       if (map->holelistenabled) {
+                               if (entry->vme_end >= end)
+                                       break;
+                       } else {
+                               /*
+                                *      If there are no more entries, we must win.
+                                *
+                                *      OR
+                                *
+                                *      If there is another entry, it must be
+                                *      after the end of the potential new region.
+                                */
 
-                       if (next->vme_start >= end)
-                               break;
+                               if (next == vm_map_to_entry(map))
+                                       break;
+
+                               if (next->vme_start >= end)
+                                       break;
+                       }
 
                        /*
                         *      Didn't fit -- move to the next entry.
                         */
 
                        entry = next;
-                       start = entry->vme_end;
+
+                       if (map->holelistenabled) {
+                               if (entry == (vm_map_entry_t) map->holes_list) {
+                                       /*
+                                        * Wrapped around
+                                        */
+                                       result = KERN_NO_SPACE;
+                                       goto BailOut;
+                               }
+                               start = entry->vme_start;
+                       } else {
+                               start = entry->vme_end;
+                       }
+
                        start = vm_map_round_page(start,
                                                  VM_MAP_PAGE_MASK(map));
                }
+
+               if (map->holelistenabled) {
+                       if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
+                               panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
+                       }
+               }
+
                *address = start;
                assert(VM_MAP_PAGE_ALIGNED(*address,
                                           VM_MAP_PAGE_MASK(map)));
@@ -1950,13 +2263,13 @@ StartAgain: ;
                                if (entry == vm_map_to_entry(map) ||
                                    entry->vme_start != tmp_start ||
                                    entry->is_sub_map != is_submap ||
-                                   entry->offset != tmp_offset ||
+                                   VME_OFFSET(entry) != tmp_offset ||
                                    entry->needs_copy != needs_copy ||
                                    entry->protection != cur_protection ||
                                    entry->max_protection != max_protection ||
                                    entry->inheritance != inheritance ||
                                    entry->iokit_acct != iokit_acct ||
-                                   entry->alias != alias) {
+                                   VME_ALIAS(entry) != alias) {
                                        /* not the same mapping ! */
                                        RETURN(KERN_NO_SPACE);
                                }
@@ -1964,17 +2277,17 @@ StartAgain: ;
                                 * Check if the same object is being mapped.
                                 */
                                if (is_submap) {
-                                       if (entry->object.sub_map !=
+                                       if (VME_SUBMAP(entry) !=
                                            (vm_map_t) object) {
                                                /* not the same submap */
                                                RETURN(KERN_NO_SPACE);
                                        }
                                } else {
-                                       if (entry->object.vm_object != object) {
+                                       if (VME_OBJECT(entry) != object) {
                                                /* not the same VM object... */
                                                vm_object_t obj2;
 
-                                               obj2 = entry->object.vm_object;
+                                               obj2 = VME_OBJECT(entry);
                                                if ((obj2 == VM_OBJECT_NULL ||
                                                     obj2->internal) &&
                                                    (object == VM_OBJECT_NULL ||
@@ -2032,6 +2345,7 @@ StartAgain: ;
 
        if (purgable || entry_for_jit) {
                if (object == VM_OBJECT_NULL) {
+
                        object = vm_object_allocate(size);
                        object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
                        object->true_share = TRUE;
@@ -2073,7 +2387,8 @@ StartAgain: ;
                   (entry->protection == cur_protection) &&
                   (entry->max_protection == max_protection) &&
                   (entry->inheritance == inheritance) &&
-                  ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
+                  ((user_alias == VM_MEMORY_REALLOC) ||
+                   (VME_ALIAS(entry) == alias)) &&
                   (entry->no_cache == no_cache) &&
                   (entry->permanent == permanent) &&
                   (!entry->superpage_size && !superpage_size) &&
@@ -2085,16 +2400,18 @@ StartAgain: ;
                   (!entry->zero_wired_pages) &&
                   (!entry->used_for_jit && !entry_for_jit) &&
                   (entry->iokit_acct == iokit_acct) &&
+                  (!entry->vme_resilient_codesign) &&
+                  (!entry->vme_resilient_media) &&
 
                   ((entry->vme_end - entry->vme_start) + size <=
-                   (alias == VM_MEMORY_REALLOC ?
+                   (user_alias == VM_MEMORY_REALLOC ?
                     ANON_CHUNK_SIZE :
                     NO_COALESCE_LIMIT)) &&
 
                   (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
-               if (vm_object_coalesce(entry->object.vm_object,
+               if (vm_object_coalesce(VME_OBJECT(entry),
                                       VM_OBJECT_NULL,
-                                      entry->offset,
+                                      VME_OFFSET(entry),
                                       (vm_object_offset_t) 0,
                                       (vm_map_size_t)(entry->vme_end - entry->vme_start),
                                       (vm_map_size_t)(end - entry->vme_end))) {
@@ -2108,8 +2425,14 @@ StartAgain: ;
                        assert(entry->vme_start < end);
                        assert(VM_MAP_PAGE_ALIGNED(end,
                                                   VM_MAP_PAGE_MASK(map)));
+                       if (__improbable(vm_debug_events))
+                               DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
                        entry->vme_end = end;
-                       vm_map_store_update_first_free(map, map->first_free);
+                       if (map->holelistenabled) {
+                               vm_map_store_update_first_free(map, entry, TRUE);
+                       } else {
+                               vm_map_store_update_first_free(map, map->first_free, TRUE);
+                       }
                        new_mapping_established = TRUE;
                        RETURN(KERN_SUCCESS);
                }
@@ -2155,7 +2478,10 @@ StartAgain: ;
                                                        superpage_size,
                                                        clear_map_aligned,
                                                        is_submap);
-                       new_entry->alias = alias;
+
+                       assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
+                       VME_ALIAS_SET(new_entry, alias);
+
                        if (entry_for_jit){
                                if (!(map->jit_entry_exists)){
                                        new_entry->used_for_jit = TRUE;
@@ -2163,6 +2489,18 @@ StartAgain: ;
                                }
                        }
 
+                       if (resilient_codesign &&
+                           ! ((cur_protection | max_protection) &
+                              (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
+                               new_entry->vme_resilient_codesign = TRUE;
+                       }
+
+                       if (resilient_media &&
+                           ! ((cur_protection | max_protection) &
+                              (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
+                               new_entry->vme_resilient_media = TRUE;
+                       }
+
                        assert(!new_entry->iokit_acct);
                        if (!is_submap &&
                            object != VM_OBJECT_NULL &&
@@ -2201,7 +2539,7 @@ StartAgain: ;
                                assert(!new_entry->iokit_acct);
                                submap = (vm_map_t) object;
                                submap_is_64bit = vm_map_is_64bit(submap);
-                               use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
+                               use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
 #ifndef NO_NESTED_PMAP
                                if (use_pmap && submap->pmap == NULL) {
                                        ledger_t ledger = map->pmap->ledger;
@@ -2239,12 +2577,15 @@ StartAgain: ;
                                vm_page_t pages, m;
                                vm_object_t sp_object;
 
-                               entry->offset = 0;
+                               VME_OFFSET_SET(entry, 0);
 
                                /* allocate one superpage */
                                kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
                                if (kr != KERN_SUCCESS) {
-                                       new_mapping_established = TRUE; /* will cause deallocation of whole range */
+                                       /* deallocate whole range... */
+                                       new_mapping_established = TRUE;
+                                       /* ... but only up to "tmp_end" */
+                                       size -= end - tmp_end;
                                        RETURN(kr);
                                }
 
@@ -2252,7 +2593,7 @@ StartAgain: ;
                                sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
                                sp_object->phys_contiguous = TRUE;
                                sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
-                               entry->object.vm_object = sp_object;
+                               VME_OBJECT_SET(entry, sp_object);
                                assert(entry->use_pmap);
 
                                /* enter the base pages into the object */
@@ -2262,7 +2603,7 @@ StartAgain: ;
                                        pmap_zero_page(m->phys_page);
                                        pages = NEXT_PAGE(m);
                                        *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
-                                       vm_page_insert(m, sp_object, offset);
+                                       vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
                                }
                                vm_object_unlock(sp_object);
                        }
@@ -2350,7 +2691,8 @@ BailOut:
                        assert(!keep_map_locked);
                        pmap_empty = FALSE; /* pmap won't be empty */
                        kr = vm_map_wire(map, start, end,
-                                            new_entry->protection, TRUE);
+                                            new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
+                                            TRUE);
                        result = kr;
                }
 
@@ -2370,6 +2712,8 @@ BailOut:
                                                    map->hdr.entries_pageable);
                        vm_map_set_page_shift(zap_new_map,
                                              VM_MAP_PAGE_SHIFT(map));
+                       vm_map_disable_hole_optimization(zap_new_map);
+
                        if (!map_locked) {
                                vm_map_lock(map);
                                map_locked = TRUE;
@@ -2461,6 +2805,7 @@ BailOut:
 #undef RETURN
 }
 
+
 /*
  * Counters for the prefault optimization.
  */
@@ -2490,7 +2835,7 @@ vm_map_enter_mem_object_helper(
        kern_return_t           result;
        boolean_t               mask_cur_protection, mask_max_protection;
        boolean_t               try_prefault = (page_list_count != 0);
-       vm_map_offset_t         offset_in_mapping;
+       vm_map_offset_t         offset_in_mapping = 0;
 
        mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
        mask_max_protection = max_protection & VM_PROT_IS_MASK;
@@ -2505,13 +2850,16 @@ vm_map_enter_mem_object_helper(
            (max_protection & ~VM_PROT_ALL) ||
            (inheritance > VM_INHERIT_LAST_VALID) ||
            (try_prefault && (copy || !page_list)) ||
-           initial_size == 0)
+           initial_size == 0) {
                return KERN_INVALID_ARGUMENT;
+       }
        
-       map_addr = vm_map_trunc_page(*address,
-                                    VM_MAP_PAGE_MASK(target_map));
-       map_size = vm_map_round_page(initial_size,
-                                    VM_MAP_PAGE_MASK(target_map));
+       {
+               map_addr = vm_map_trunc_page(*address,
+                                            VM_MAP_PAGE_MASK(target_map));
+               map_size = vm_map_round_page(initial_size,
+                                            VM_MAP_PAGE_MASK(target_map));
+       }
        size = vm_object_round_page(initial_size);
 
        /*
@@ -2526,7 +2874,8 @@ vm_map_enter_mem_object_helper(
 
                named_entry = (vm_named_entry_t) port->ip_kobject;
 
-               if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+               if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                            VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                        offset += named_entry->data_offset;
                }
                
@@ -2552,8 +2901,9 @@ vm_map_enter_mem_object_helper(
                        /* overflow */
                        return KERN_INVALID_ARGUMENT;
                }
-               if (named_entry->size < (offset + size))
+               if (named_entry->size < (offset + initial_size)) {
                        return KERN_INVALID_ARGUMENT;
+               }
 
                if (named_entry->is_copy) {
                        /* for a vm_map_copy, we can only map it whole */
@@ -2598,7 +2948,8 @@ vm_map_enter_mem_object_helper(
                if (named_entry->is_sub_map) {
                        vm_map_t                submap;
 
-                       if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+                       if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                                    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                                panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
                        }
 
@@ -2656,7 +3007,8 @@ vm_map_enter_mem_object_helper(
                        protections = named_entry->protection & VM_PROT_ALL;
                        access = GET_MAP_MEM(named_entry->protection);
 
-                       if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+                       if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
+                                    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                                panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
                        }
 
@@ -2738,13 +3090,17 @@ vm_map_enter_mem_object_helper(
                        if (flags & ~(VM_FLAGS_FIXED |
                                      VM_FLAGS_ANYWHERE |
                                      VM_FLAGS_OVERWRITE |
+                                     VM_FLAGS_RETURN_4K_DATA_ADDR |
                                      VM_FLAGS_RETURN_DATA_ADDR)) {
                                named_entry_unlock(named_entry);
                                return KERN_INVALID_ARGUMENT;
                        }
 
-                       if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+                       if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                                    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                                offset_in_mapping = offset - vm_object_trunc_page(offset);
+                               if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
+                                       offset_in_mapping &= ~((signed)(0xFFF));
                                offset = vm_object_trunc_page(offset);
                                map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
                        }
@@ -2769,6 +3125,7 @@ vm_map_enter_mem_object_helper(
                                          mask,
                                          flags & (VM_FLAGS_ANYWHERE |
                                                   VM_FLAGS_OVERWRITE |
+                                                  VM_FLAGS_RETURN_4K_DATA_ADDR |
                                                   VM_FLAGS_RETURN_DATA_ADDR),
                                          VM_OBJECT_NULL,
                                          0,
@@ -2792,7 +3149,7 @@ vm_map_enter_mem_object_helper(
                                vm_map_size_t           copy_size;
                                vm_object_offset_t      copy_offset;
 
-                               copy_offset = copy_entry->offset;
+                               copy_offset = VME_OFFSET(copy_entry);
                                copy_size = (copy_entry->vme_end -
                                             copy_entry->vme_start);
 
@@ -2809,15 +3166,13 @@ vm_map_enter_mem_object_helper(
                                /* take a reference on the object */
                                if (copy_entry->is_sub_map) {
                                        remap_flags |= VM_FLAGS_SUBMAP;
-                                       copy_submap =
-                                               copy_entry->object.sub_map;
+                                       copy_submap = VME_SUBMAP(copy_entry);
                                        vm_map_lock(copy_submap);
                                        vm_map_reference(copy_submap);
                                        vm_map_unlock(copy_submap);
                                        copy_object = (vm_object_t) copy_submap;
                                } else {
-                                       copy_object =
-                                               copy_entry->object.vm_object;
+                                       copy_object = VME_OBJECT(copy_entry);
                                        vm_object_reference(copy_object);
                                }
 
@@ -2852,7 +3207,8 @@ vm_map_enter_mem_object_helper(
                        }
                        
                        if (kr == KERN_SUCCESS) {
-                               if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+                               if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                                            VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                                        *address = map_addr + offset_in_mapping;
                                } else {
                                        *address = map_addr;
@@ -2902,8 +3258,11 @@ vm_map_enter_mem_object_helper(
                        /* object cannot be mapped until it is ready  */
                        /* we can therefore avoid the ready check     */
                        /* in this case.  */
-                       if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+                       if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                                    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                                offset_in_mapping = offset - vm_object_trunc_page(offset);
+                               if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
+                                       offset_in_mapping &= ~((signed)(0xFFF));
                                offset = vm_object_trunc_page(offset);
                                map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
                        } 
@@ -2922,7 +3281,8 @@ vm_map_enter_mem_object_helper(
                 * this case, the port isn't really a port at all, but
                 * instead is just a raw memory object.
                 */
-               if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+               if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                            VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                        panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
                }
 
@@ -3006,7 +3366,8 @@ vm_map_enter_mem_object_helper(
                vm_object_t             new_object;
                vm_object_offset_t      new_offset;
 
-               result = vm_object_copy_strategically(object, offset, size,
+               result = vm_object_copy_strategically(object, offset,
+                                                     map_size,
                                                      &new_object, &new_offset,
                                                      &copy);
 
@@ -3028,7 +3389,8 @@ vm_map_enter_mem_object_helper(
                        new_object = object;
                        new_offset = offset;
                        success = vm_object_copy_quickly(&new_object,
-                                                        new_offset, size,
+                                                        new_offset,
+                                                        map_size,
                                                         &src_needs_copy,
                                                         &copy);
                        assert(success);
@@ -3041,8 +3403,9 @@ vm_map_enter_mem_object_helper(
 
                vm_object_deallocate(object);
 
-               if (result != KERN_SUCCESS)
+               if (result != KERN_SUCCESS) {
                        return result;
+               }
 
                object = new_object;
                offset = new_offset;
@@ -3054,13 +3417,17 @@ vm_map_enter_mem_object_helper(
         */
        if (try_prefault)
                flags |= VM_FLAGS_KEEP_MAP_LOCKED;
-       result = vm_map_enter(target_map,
-                             &map_addr, map_size,
-                             (vm_map_offset_t)mask,
-                             flags,
-                             object, offset,
-                             copy,
-                             cur_protection, max_protection, inheritance);
+
+       {
+               result = vm_map_enter(target_map,
+                                     &map_addr, map_size,
+                                     (vm_map_offset_t)mask,
+                                     flags,
+                                     object, offset,
+                                     copy,
+                                     cur_protection, max_protection,
+                                     inheritance);
+       }
        if (result != KERN_SUCCESS)
                vm_object_deallocate(object);
 
@@ -3089,7 +3456,7 @@ vm_map_enter_mem_object_helper(
                                                        0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
                                if (kr != KERN_SUCCESS) {
                                        OSIncrementAtomic64(&vm_prefault_nb_bailout);
-                                       goto BailOut;
+                                       break;
                                }
                                OSIncrementAtomic64(&vm_prefault_nb_pages);
                        }
@@ -3097,11 +3464,11 @@ vm_map_enter_mem_object_helper(
                        /* Next virtual address */
                        va += PAGE_SIZE;
                }
-BailOut:
                vm_map_unlock(target_map);
        }
 
-       if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
+       if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
+                    VM_FLAGS_RETURN_4K_DATA_ADDR)) {
                *address = map_addr + offset_in_mapping;
        } else {
                *address = map_addr;
@@ -3178,14 +3545,17 @@ vm_map_enter_mem_object_control(
            (cur_protection & ~VM_PROT_ALL) ||
            (max_protection & ~VM_PROT_ALL) ||
            (inheritance > VM_INHERIT_LAST_VALID) ||
-           initial_size == 0)
+           initial_size == 0) {
                return KERN_INVALID_ARGUMENT;
+       }
 
-       map_addr = vm_map_trunc_page(*address,
-                                    VM_MAP_PAGE_MASK(target_map));
-       map_size = vm_map_round_page(initial_size,
-                                    VM_MAP_PAGE_MASK(target_map));
-       size = vm_object_round_page(initial_size);      
+       {
+               map_addr = vm_map_trunc_page(*address,
+                                            VM_MAP_PAGE_MASK(target_map));
+               map_size = vm_map_round_page(initial_size,
+                                            VM_MAP_PAGE_MASK(target_map));
+       }
+       size = vm_object_round_page(initial_size);
 
        object = memory_object_control_to_vm_object(control);
 
@@ -3275,20 +3645,24 @@ vm_map_enter_mem_object_control(
 
                vm_object_deallocate(object);
 
-               if (result != KERN_SUCCESS)
+               if (result != KERN_SUCCESS) {
                        return result;
+               }
 
                object = new_object;
                offset = new_offset;
        }
 
-       result = vm_map_enter(target_map,
-                             &map_addr, map_size,
-                             (vm_map_offset_t)mask,
-                             flags,
-                             object, offset,
-                             copy,
-                             cur_protection, max_protection, inheritance);
+       {
+               result = vm_map_enter(target_map,
+                                     &map_addr, map_size,
+                                     (vm_map_offset_t)mask,
+                                     flags,
+                                     object, offset,
+                                     copy,
+                                     cur_protection, max_protection,
+                                     inheritance);
+       }
        if (result != KERN_SUCCESS)
                vm_object_deallocate(object);
        *address = map_addr;
@@ -3330,6 +3704,9 @@ vm_map_enter_cpm(
 #endif /* MACH_ASSERT */
 
        boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
+       vm_tag_t tag;
+
+       VM_GET_FLAGS_ALIAS(flags, tag);
 
        if (size == 0) {
                *addr = 0;
@@ -3551,7 +3928,7 @@ vm_map_clip_unnest(
        vm_map_offset_t old_end_unnest = end_unnest;
 
        assert(entry->is_sub_map);
-       assert(entry->object.sub_map != NULL);
+       assert(VME_SUBMAP(entry) != NULL);
        assert(entry->use_pmap);
 
        /*
@@ -3578,13 +3955,21 @@ vm_map_clip_unnest(
                _vm_map_clip_start(&map->hdr,
                                   entry,
                                   start_unnest);
-               vm_map_store_update_first_free(map, map->first_free);
+               if (map->holelistenabled) {
+                       vm_map_store_update_first_free(map, NULL, FALSE);
+               } else {
+                       vm_map_store_update_first_free(map, map->first_free, FALSE);
+               }
        }
        if (entry->vme_end > end_unnest) {
                _vm_map_clip_end(&map->hdr,
                                 entry,
                                 end_unnest);
-               vm_map_store_update_first_free(map, map->first_free);
+               if (map->holelistenabled) {
+                       vm_map_store_update_first_free(map, NULL, FALSE);
+               } else {
+                       vm_map_store_update_first_free(map, map->first_free, FALSE);
+               }
        }
 
        pmap_unnest(map->pmap,
@@ -3595,12 +3980,13 @@ vm_map_clip_unnest(
                vm_map_submap_pmap_clean(
                        map, entry->vme_start,
                        entry->vme_end,
-                       entry->object.sub_map,
-                       entry->offset);
+                       VME_SUBMAP(entry),
+                       VME_OFFSET(entry));
        }
        entry->use_pmap = FALSE;
-       if (entry->alias == VM_MEMORY_SHARED_PMAP) {
-               entry->alias = VM_MEMORY_UNSHARED_PMAP;
+       if ((map->pmap != kernel_pmap) &&
+           (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
+               VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
        }
 }
 #endif /* NO_NESTED_PMAP */
@@ -3637,15 +4023,19 @@ vm_map_clip_start(
        }
 #endif /* NO_NESTED_PMAP */
        if (startaddr > entry->vme_start) {
-               if (entry->object.vm_object &&
+               if (VME_OBJECT(entry) &&
                    !entry->is_sub_map &&
-                   entry->object.vm_object->phys_contiguous) {
+                   VME_OBJECT(entry)->phys_contiguous) {
                        pmap_remove(map->pmap,
                                    (addr64_t)(entry->vme_start),
                                    (addr64_t)(entry->vme_end));
                }
                _vm_map_clip_start(&map->hdr, entry, startaddr);
-               vm_map_store_update_first_free(map, map->first_free);
+               if (map->holelistenabled) {
+                       vm_map_store_update_first_free(map, NULL, FALSE);
+               } else {
+                       vm_map_store_update_first_free(map, map->first_free, FALSE);
+               }
        }
 }
 
@@ -3664,7 +4054,7 @@ static void
 _vm_map_clip_start(
        register struct vm_map_header   *map_header,
        register vm_map_entry_t         entry,
-       register vm_map_offset_t                start)
+       register vm_map_offset_t        start)
 {
        register vm_map_entry_t new_entry;
 
@@ -3686,16 +4076,16 @@ _vm_map_clip_start(
 
        new_entry->vme_end = start;
        assert(new_entry->vme_start < new_entry->vme_end);
-       entry->offset += (start - entry->vme_start);
+       VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
        assert(start < entry->vme_end);
        entry->vme_start = start;
 
        _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
 
        if (entry->is_sub_map)
-               vm_map_reference(new_entry->object.sub_map);
+               vm_map_reference(VME_SUBMAP(new_entry));
        else
-               vm_object_reference(new_entry->object.vm_object);
+               vm_object_reference(VME_OBJECT(new_entry));
 }
 
 
@@ -3738,15 +4128,19 @@ vm_map_clip_end(
        }
 #endif /* NO_NESTED_PMAP */
        if (endaddr < entry->vme_end) {
-               if (entry->object.vm_object &&
+               if (VME_OBJECT(entry) &&
                    !entry->is_sub_map &&
-                   entry->object.vm_object->phys_contiguous) {
+                   VME_OBJECT(entry)->phys_contiguous) {
                        pmap_remove(map->pmap,
                                    (addr64_t)(entry->vme_start),
                                    (addr64_t)(entry->vme_end));
                }
                _vm_map_clip_end(&map->hdr, entry, endaddr);
-               vm_map_store_update_first_free(map, map->first_free);
+               if (map->holelistenabled) {
+                       vm_map_store_update_first_free(map, NULL, FALSE);
+               } else {
+                       vm_map_store_update_first_free(map, map->first_free, FALSE);
+               }
        }
 }
 
@@ -3784,15 +4178,16 @@ _vm_map_clip_end(
 
        assert(entry->vme_start < end);
        new_entry->vme_start = entry->vme_end = end;
-       new_entry->offset += (end - entry->vme_start);
+       VME_OFFSET_SET(new_entry,
+                      VME_OFFSET(new_entry) + (end - entry->vme_start));
        assert(new_entry->vme_start < new_entry->vme_end);
 
        _vm_map_store_entry_link(map_header, entry, new_entry);
 
        if (entry->is_sub_map)
-               vm_map_reference(new_entry->object.sub_map);
+               vm_map_reference(VME_SUBMAP(new_entry));
        else
-               vm_object_reference(new_entry->object.vm_object);
+               vm_object_reference(VME_OBJECT(new_entry));
 }
 
 
@@ -3924,17 +4319,17 @@ vm_map_submap(
 
        if ((entry->vme_start == start) && (entry->vme_end == end) &&
            (!entry->is_sub_map) &&
-           ((object = entry->object.vm_object) == vm_submap_object) &&
+           ((object = VME_OBJECT(entry)) == vm_submap_object) &&
            (object->resident_page_count == 0) &&
            (object->copy == VM_OBJECT_NULL) &&
            (object->shadow == VM_OBJECT_NULL) &&
            (!object->pager_created)) {
-               entry->offset = (vm_object_offset_t)offset;
-               entry->object.vm_object = VM_OBJECT_NULL;
+               VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
+               VME_OBJECT_SET(entry, VM_OBJECT_NULL);
                vm_object_deallocate(object);
                entry->is_sub_map = TRUE;
                entry->use_pmap = FALSE;
-               entry->object.sub_map = submap;
+               VME_SUBMAP_SET(entry, submap);
                vm_map_reference(submap);
                if (submap->mapped_in_other_pmaps == FALSE &&
                    vm_map_pmap(submap) != PMAP_NULL &&
@@ -3963,7 +4358,7 @@ vm_map_submap(
                                }
                        }
                        result = pmap_nest(map->pmap,
-                                          (entry->object.sub_map)->pmap, 
+                                          (VME_SUBMAP(entry))->pmap, 
                                           (addr64_t)start,
                                           (addr64_t)start,
                                           (uint64_t)(end - start));
@@ -4110,11 +4505,17 @@ vm_map_protect(
                        /* for loss of shared memory communication in the */
                        /* target area after taking this step */
 
-                       if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
-                               current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
-                               current->offset = 0;
+                       if (current->is_sub_map == FALSE &&
+                           VME_OBJECT(current) == VM_OBJECT_NULL) {
+                               VME_OBJECT_SET(current, 
+                                              vm_object_allocate(
+                                                      (vm_map_size_t)
+                                                      (current->vme_end -
+                                                       current->vme_start)));
+                               VME_OFFSET_SET(current, 0);
                                assert(current->use_pmap);
                        }
+                       assert(current->wired_count == 0);
                        current->needs_copy = TRUE;
                        current->max_protection |= VM_PROT_WRITE;
                }
@@ -4145,11 +4546,11 @@ vm_map_protect(
 
                        prot = current->protection & ~VM_PROT_WRITE;
 
-                       if (override_nx(map, current->alias) && prot)
+                       if (override_nx(map, VME_ALIAS(current)) && prot)
                                prot |= VM_PROT_EXECUTE;
 
                        if (current->is_sub_map && current->use_pmap) {
-                               pmap_protect(current->object.sub_map->pmap, 
+                               pmap_protect(VME_SUBMAP(current)->pmap, 
                                             current->vme_start,
                                             current->vme_end,
                                             prot);
@@ -4369,13 +4770,14 @@ vm_map_wire_nested(
        register vm_map_t       map,
        register vm_map_offset_t        start,
        register vm_map_offset_t        end,
-       register vm_prot_t      access_type,
+       register vm_prot_t      caller_prot,
        boolean_t               user_wire,
        pmap_t                  map_pmap, 
        vm_map_offset_t         pmap_addr,
        ppnum_t                 *physpage_p)
 {
        register vm_map_entry_t entry;
+       register vm_prot_t      access_type;
        struct vm_map_entry     *first_entry, tmp_entry;
        vm_map_t                real_map;
        register vm_map_offset_t        s,e;
@@ -4388,6 +4790,8 @@ vm_map_wire_nested(
        vm_map_size_t           size;
        boolean_t               wire_and_extract;
 
+       access_type = (caller_prot & VM_PROT_ALL);
+
        wire_and_extract = FALSE;
        if (physpage_p != NULL) {
                /*
@@ -4536,9 +4940,9 @@ vm_map_wire_nested(
                        vm_map_clip_start(map, entry, s);
                        vm_map_clip_end(map, entry, end);
 
-                       sub_start = entry->offset;
+                       sub_start = VME_OFFSET(entry);
                        sub_end = entry->vme_end;
-                       sub_end += entry->offset - entry->vme_start;
+                       sub_end += VME_OFFSET(entry) - entry->vme_start;
                
                        local_end = entry->vme_end;
                        if(map_pmap == NULL) {
@@ -4551,7 +4955,7 @@ vm_map_wire_nested(
                                vm_map_t                lookup_map;
 
                                if(entry->use_pmap) {
-                                       pmap = entry->object.sub_map->pmap;
+                                       pmap = VME_SUBMAP(entry)->pmap;
                                        /* ppc implementation requires that */
                                        /* submaps pmap address ranges line */
                                        /* up with parent map */
@@ -4642,9 +5046,9 @@ vm_map_wire_nested(
                        entry->in_transition = TRUE;
 
                        vm_map_unlock(map);
-                       rc = vm_map_wire_nested(entry->object.sub_map
+                       rc = vm_map_wire_nested(VME_SUBMAP(entry)
                                                sub_start, sub_end,
-                                               access_type
+                                               caller_prot
                                                user_wire, pmap, pmap_addr,
                                                NULL);
                        vm_map_lock(map);
@@ -4735,18 +5139,18 @@ vm_map_wire_nested(
                                       == PAGE_SIZE);
                                assert(!entry->needs_copy);
                                assert(!entry->is_sub_map);
-                               assert(entry->object.vm_object);
+                               assert(VME_OBJECT(entry));
                                if (((entry->vme_end - entry->vme_start)
                                     != PAGE_SIZE) ||
                                    entry->needs_copy ||
                                    entry->is_sub_map ||
-                                   entry->object.vm_object == VM_OBJECT_NULL) {
+                                   VME_OBJECT(entry) == VM_OBJECT_NULL) {
                                        rc = KERN_INVALID_ARGUMENT;
                                        goto done;
                                }
 
-                               object = entry->object.vm_object;
-                               offset = entry->offset;
+                               object = VME_OBJECT(entry);
+                               offset = VME_OFFSET(entry);
                                /* need exclusive lock to update m->dirty */
                                if (entry->protection & VM_PROT_WRITE) {
                                        vm_object_lock(object);
@@ -4802,11 +5206,10 @@ vm_map_wire_nested(
                                rc = KERN_INVALID_ARGUMENT;
                                goto done;
                        }
-                                
-                       vm_object_shadow(&entry->object.vm_object,
-                                        &entry->offset, size);
+
+                       VME_OBJECT_SHADOW(entry, size);
                        entry->needs_copy = FALSE;
-               } else if (entry->object.vm_object == VM_OBJECT_NULL) {
+               } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
                        if (wire_and_extract) {
                                /*
                                 * We're supposed to share with the original
@@ -4815,8 +5218,8 @@ vm_map_wire_nested(
                                rc = KERN_INVALID_ARGUMENT;
                                goto done;
                        }
-                       entry->object.vm_object = vm_object_allocate(size);
-                       entry->offset = (vm_object_offset_t)0;
+                       VME_OBJECT_SET(entry, vm_object_allocate(size));
+                       VME_OFFSET_SET(entry, (vm_object_offset_t)0);
                        assert(entry->use_pmap);
                }
 
@@ -4880,11 +5283,11 @@ vm_map_wire_nested(
 
                if(map_pmap)
                        rc = vm_fault_wire(map, 
-                                          &tmp_entry, map_pmap, pmap_addr,
+                                          &tmp_entry, caller_prot, map_pmap, pmap_addr,
                                           physpage_p);
                else
                        rc = vm_fault_wire(map, 
-                                          &tmp_entry, map->pmap, 
+                                          &tmp_entry, caller_prot, map->pmap, 
                                           tmp_entry.vme_start,
                                           physpage_p);
 
@@ -4955,36 +5358,78 @@ done:
 }
 
 kern_return_t
-vm_map_wire(
+vm_map_wire_external(
        register vm_map_t       map,
        register vm_map_offset_t        start,
        register vm_map_offset_t        end,
-       register vm_prot_t      access_type,
+       register vm_prot_t      caller_prot,
        boolean_t               user_wire)
 {
+       kern_return_t   kret;
+
+       caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
+       caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
+       kret = vm_map_wire_nested(map, start, end, caller_prot, 
+                                 user_wire, (pmap_t)NULL, 0, NULL);
+       return kret;
+}
 
+kern_return_t
+vm_map_wire(
+       register vm_map_t       map,
+       register vm_map_offset_t        start,
+       register vm_map_offset_t        end,
+       register vm_prot_t      caller_prot,
+       boolean_t               user_wire)
+{
        kern_return_t   kret;
 
-       kret = vm_map_wire_nested(map, start, end, access_type
+       kret = vm_map_wire_nested(map, start, end, caller_prot
                                  user_wire, (pmap_t)NULL, 0, NULL);
        return kret;
 }
 
 kern_return_t
-vm_map_wire_and_extract(
+vm_map_wire_and_extract_external(
        vm_map_t        map,
        vm_map_offset_t start,
-       vm_prot_t       access_type,
+       vm_prot_t       caller_prot,
        boolean_t       user_wire,
        ppnum_t         *physpage_p)
 {
+       kern_return_t   kret;
+
+       caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
+       caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
+       kret = vm_map_wire_nested(map,
+                                 start,
+                                 start+VM_MAP_PAGE_SIZE(map),
+                                 caller_prot, 
+                                 user_wire,
+                                 (pmap_t)NULL,
+                                 0,
+                                 physpage_p);
+       if (kret != KERN_SUCCESS &&
+           physpage_p != NULL) {
+               *physpage_p = 0;
+       }
+       return kret;
+}
 
+kern_return_t
+vm_map_wire_and_extract(
+       vm_map_t        map,
+       vm_map_offset_t start,
+       vm_prot_t       caller_prot,
+       boolean_t       user_wire,
+       ppnum_t         *physpage_p)
+{
        kern_return_t   kret;
 
        kret = vm_map_wire_nested(map,
                                  start,
                                  start+VM_MAP_PAGE_SIZE(map),
-                                 access_type
+                                 caller_prot
                                  user_wire,
                                  (pmap_t)NULL,
                                  0,
@@ -5117,13 +5562,13 @@ vm_map_unwire_nested(
                        vm_map_clip_start(map, entry, start);
                        vm_map_clip_end(map, entry, end);
 
-                       sub_start = entry->offset;
+                       sub_start = VME_OFFSET(entry);
                        sub_end = entry->vme_end - entry->vme_start;
-                       sub_end += entry->offset;
+                       sub_end += VME_OFFSET(entry);
                        local_end = entry->vme_end;
                        if(map_pmap == NULL) {
                                if(entry->use_pmap) {
-                                       pmap = entry->object.sub_map->pmap;
+                                       pmap = VME_SUBMAP(entry)->pmap;
                                        pmap_addr = sub_start;
                                } else {
                                        pmap = map->pmap;
@@ -5169,7 +5614,7 @@ vm_map_unwire_nested(
                                 * guarantees existance of the entry.
                                 */
                                vm_map_unlock(map);
-                               vm_map_unwire_nested(entry->object.sub_map
+                               vm_map_unwire_nested(VME_SUBMAP(entry)
                                                     sub_start, sub_end, user_wire, pmap, pmap_addr);
                                vm_map_lock(map);
 
@@ -5207,7 +5652,7 @@ vm_map_unwire_nested(
                                continue;
                        } else {
                                vm_map_unlock(map);
-                               vm_map_unwire_nested(entry->object.sub_map,
+                               vm_map_unwire_nested(VME_SUBMAP(entry),
                                                     sub_start, sub_end, user_wire, map_pmap,
                                                     pmap_addr);
                                vm_map_lock(map);
@@ -5382,10 +5827,10 @@ vm_map_entry_delete(
 
        if (entry->is_sub_map) {
                object = NULL;
-               submap = entry->object.sub_map;
+               submap = VME_SUBMAP(entry);
        } else {
                submap = NULL;
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
        }
 
        vm_map_store_entry_unlink(map, entry);
@@ -5438,19 +5883,22 @@ vm_map_submap_pmap_clean(
                                sub_map,
                                start,
                                start + remove_size,
-                               entry->object.sub_map,
-                               entry->offset);
+                               VME_SUBMAP(entry),
+                               VME_OFFSET(entry));
                } else {
 
                        if((map->mapped_in_other_pmaps) && (map->ref_count)
-                          && (entry->object.vm_object != NULL)) {
-                               vm_object_pmap_protect(
-                                       entry->object.vm_object,
-                                       entry->offset+(offset-entry->vme_start),
+                          && (VME_OBJECT(entry) != NULL)) {
+                               vm_object_pmap_protect_options(
+                                       VME_OBJECT(entry),
+                                       (VME_OFFSET(entry) +
+                                        offset -
+                                        entry->vme_start),
                                        remove_size,
                                        PMAP_NULL,
                                        entry->vme_start,
-                                       VM_PROT_NONE);
+                                       VM_PROT_NONE,
+                                       PMAP_OPTIONS_REMOVE);
                        } else {
                                pmap_remove(map->pmap, 
                                            (addr64_t)start, 
@@ -5472,18 +5920,19 @@ vm_map_submap_pmap_clean(
                                sub_map,
                                (start + entry->vme_start) - offset,
                                ((start + entry->vme_start) - offset) + remove_size,
-                               entry->object.sub_map,
-                               entry->offset);
+                               VME_SUBMAP(entry),
+                               VME_OFFSET(entry));
                } else {
                        if((map->mapped_in_other_pmaps) && (map->ref_count)
-                          && (entry->object.vm_object != NULL)) {
-                               vm_object_pmap_protect(
-                                       entry->object.vm_object,
-                                       entry->offset,
+                          && (VME_OBJECT(entry) != NULL)) {
+                               vm_object_pmap_protect_options(
+                                       VME_OBJECT(entry),
+                                       VME_OFFSET(entry),
                                        remove_size,
                                        PMAP_NULL,
                                        entry->vme_start,
-                                       VM_PROT_NONE);
+                                       VM_PROT_NONE,
+                                       PMAP_OPTIONS_REMOVE);
                        } else {
                                pmap_remove(map->pmap, 
                                            (addr64_t)((start + entry->vme_start) 
@@ -5735,8 +6184,6 @@ vm_map_delete(
                         * may not exist anymore.  Look it up again.
                         */
                        if (!vm_map_lookup_entry(map, s, &first_entry)) {
-                               assert((map != kernel_map) && 
-                                      (!entry->is_sub_map));
                                /*
                                 * User: use the next entry
                                 */
@@ -5843,8 +6290,8 @@ vm_map_delete(
                                vm_map_offset_t pmap_addr;
                                
 
-                               sub_map = tmp_entry.object.sub_map;
-                               sub_start = tmp_entry.offset;
+                               sub_map = VME_SUBMAP(&tmp_entry);
+                               sub_start = VME_OFFSET(&tmp_entry);
                                sub_end = sub_start + (tmp_entry.vme_end -
                                                       tmp_entry.vme_start);
                                if (tmp_entry.use_pmap) {
@@ -5860,7 +6307,7 @@ vm_map_delete(
                                                            pmap, pmap_addr);
                        } else {
 
-                               if (tmp_entry.object.vm_object == kernel_object) {
+                               if (VME_OBJECT(&tmp_entry) == kernel_object) {
                                        pmap_protect_options(
                                                map->pmap,
                                                tmp_entry.vme_start,
@@ -5870,7 +6317,7 @@ vm_map_delete(
                                                NULL);
                                }
                                vm_fault_unwire(map, &tmp_entry,
-                                               tmp_entry.object.vm_object == kernel_object,
+                                               VME_OBJECT(&tmp_entry) == kernel_object,
                                                map->pmap, tmp_entry.vme_start);
                        }
 
@@ -5935,37 +6382,61 @@ vm_map_delete(
                } else if (entry->is_sub_map) {
                        if (entry->use_pmap) {
 #ifndef NO_NESTED_PMAP
-                               pmap_unnest(map->pmap,
-                                           (addr64_t)entry->vme_start,
-                                           entry->vme_end - entry->vme_start);
+                               int pmap_flags;
+
+                               if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
+                                       /*
+                                        * This is the final cleanup of the
+                                        * address space being terminated.
+                                        * No new mappings are expected and
+                                        * we don't really need to unnest the
+                                        * shared region (and lose the "global"
+                                        * pmap mappings, if applicable).
+                                        *
+                                        * Tell the pmap layer that we're
+                                        * "clean" wrt nesting.
+                                        */
+                                       pmap_flags = PMAP_UNNEST_CLEAN;
+                               } else {
+                                       /*
+                                        * We're unmapping part of the nested
+                                        * shared region, so we can't keep the
+                                        * nested pmap.
+                                        */
+                                       pmap_flags = 0;
+                               }
+                               pmap_unnest_options(
+                                       map->pmap,
+                                       (addr64_t)entry->vme_start,
+                                       entry->vme_end - entry->vme_start,
+                                       pmap_flags);
 #endif /* NO_NESTED_PMAP */
                                if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
                                        /* clean up parent map/maps */
                                        vm_map_submap_pmap_clean(
                                                map, entry->vme_start,
                                                entry->vme_end,
-                                               entry->object.sub_map,
-                                               entry->offset);
+                                               VME_SUBMAP(entry),
+                                               VME_OFFSET(entry));
                                }
                        } else {
                                vm_map_submap_pmap_clean(
                                        map, entry->vme_start, entry->vme_end,
-                                       entry->object.sub_map,
-                                       entry->offset);
+                                       VME_SUBMAP(entry),
+                                       VME_OFFSET(entry));
                        }
-               } else if (entry->object.vm_object != kernel_object &&
-                          entry->object.vm_object != compressor_object) {
-                       object = entry->object.vm_object;
+               } else if (VME_OBJECT(entry) != kernel_object &&
+                          VME_OBJECT(entry) != compressor_object) {
+                       object = VME_OBJECT(entry);
                        if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
                                vm_object_pmap_protect_options(
-                                       object, entry->offset,
+                                       object, VME_OFFSET(entry),
                                        entry->vme_end - entry->vme_start,
                                        PMAP_NULL,
                                        entry->vme_start,
                                        VM_PROT_NONE,
                                        PMAP_OPTIONS_REMOVE);
-                       } else if ((entry->object.vm_object !=
-                                   VM_OBJECT_NULL) ||
+                       } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
                                   (map->pmap == kernel_pmap)) {
                                /* Remove translations associated
                                 * with this range unless the entry
@@ -6144,9 +6615,9 @@ vm_map_copy_discard(
 
                        vm_map_copy_entry_unlink(copy, entry);
                        if (entry->is_sub_map) {
-                               vm_map_deallocate(entry->object.sub_map);
+                               vm_map_deallocate(VME_SUBMAP(entry));
                        } else {
-                               vm_object_deallocate(entry->object.vm_object);
+                               vm_object_deallocate(VME_OBJECT(entry));
                        }
                        vm_map_copy_entry_dispose(copy, entry);
                }
@@ -6161,7 +6632,10 @@ vm_map_copy_discard(
                 * allocated by a single call to kalloc(), i.e. the
                 * vm_map_copy_t was not allocated out of the zone.
                 */
-               kfree(copy, copy->cpy_kalloc_size);
+               if (copy->size > msg_ool_size_small || copy->offset)
+                       panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
+                             (long long)copy->size, (long long)copy->offset);
+               kfree(copy, copy->size + cpy_kdata_hdr_sz);
                return;
        }
        zfree(vm_map_copy_zone, copy);
@@ -6286,19 +6760,19 @@ start_pass_1:
                        }
 
                        encountered_sub_map = TRUE;
-                       sub_start = entry->offset;
+                       sub_start = VME_OFFSET(entry);
 
                        if(entry->vme_end < dst_end)
                                sub_end = entry->vme_end;
                        else 
                                sub_end = dst_end;
                        sub_end -= entry->vme_start;
-                       sub_end += entry->offset;
+                       sub_end += VME_OFFSET(entry);
                        local_end = entry->vme_end;
                        vm_map_unlock(dst_map);
                        
                        result = vm_map_overwrite_submap_recurse(
-                               entry->object.sub_map,
+                               VME_SUBMAP(entry),
                                sub_start,
                                sub_end - sub_start);
 
@@ -6356,9 +6830,9 @@ start_pass_1:
                /*
                 *      Check for permanent objects in the destination.
                 */
-               if ((entry->object.vm_object != VM_OBJECT_NULL) &&
-                   ((!entry->object.vm_object->internal) ||
-                    (entry->object.vm_object->true_share))) {
+               if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
+                   ((!VME_OBJECT(entry)->internal) ||
+                    (VME_OBJECT(entry)->true_share))) {
                        if(encountered_sub_map) {
                                vm_map_unlock(dst_map);
                                return(KERN_FAILURE);
@@ -6542,18 +7016,18 @@ start_pass_1:
                                /* there is no need for the follow-  */
                                /* ing check.                        */
                                encountered_sub_map = TRUE;
-                               sub_start = entry->offset;
+                               sub_start = VME_OFFSET(entry);
 
                                if(entry->vme_end < dst_end)
                                        sub_end = entry->vme_end;
                                else 
                                        sub_end = dst_end;
                                sub_end -= entry->vme_start;
-                               sub_end += entry->offset;
+                               sub_end += VME_OFFSET(entry);
                                vm_map_unlock(dst_map);
                        
                                kr = vm_map_overwrite_submap_recurse(
-                                       entry->object.sub_map,
+                                       VME_SUBMAP(entry),
                                        sub_start,
                                        sub_end - sub_start);
                                if(kr != KERN_SUCCESS)
@@ -6610,9 +7084,9 @@ start_pass_1:
                /*
                 *      Check for permanent objects in the destination.
                 */
-               if ((entry->object.vm_object != VM_OBJECT_NULL) &&
-                   ((!entry->object.vm_object->internal) ||
-                    (entry->object.vm_object->true_share))) {
+               if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
+                   ((!VME_OBJECT(entry)->internal) ||
+                    (VME_OBJECT(entry)->true_share))) {
                        contains_permanent_objects = TRUE;
                }
 
@@ -6717,11 +7191,11 @@ start_overwrite:
                                        assert(!entry->use_pmap);
                                        entry->is_sub_map = FALSE;
                                        vm_map_deallocate(
-                                               entry->object.sub_map);
-                                       entry->object.sub_map = NULL;
+                                               VME_SUBMAP(entry));
+                                       VME_SUBMAP_SET(entry, NULL);
                                        entry->is_shared = FALSE;
                                        entry->needs_copy = FALSE;
-                                       entry->offset = 0;
+                                       VME_OFFSET_SET(entry, 0);
                                        /*
                                         * XXX FBDP
                                         * We should propagate the protections
@@ -6747,14 +7221,14 @@ start_overwrite:
                                                entry->vme_start - base_addr;
                                        break;
                                }
-                               sub_start = entry->offset;
+                               sub_start = VME_OFFSET(entry);
 
                                if(entry->vme_end < dst_end)
                                        sub_end = entry->vme_end;
                                else 
                                        sub_end = dst_end;
                                sub_end -= entry->vme_start;
-                               sub_end += entry->offset;
+                               sub_end += VME_OFFSET(entry);
                                local_end = entry->vme_end;
                                vm_map_unlock(dst_map);
                                copy_size = sub_end - sub_start;
@@ -6807,22 +7281,22 @@ start_overwrite:
                        
                                if((entry->use_pmap) && (pmap == NULL)) {
                                        kr = vm_map_copy_overwrite_nested(
-                                               entry->object.sub_map,
+                                               VME_SUBMAP(entry),
                                                sub_start,
                                                copy,
                                                interruptible, 
-                                               entry->object.sub_map->pmap,
+                                               VME_SUBMAP(entry)->pmap,
                                                TRUE);
                                } else if (pmap != NULL) {
                                        kr = vm_map_copy_overwrite_nested(
-                                               entry->object.sub_map,
+                                               VME_SUBMAP(entry),
                                                sub_start,
                                                copy,
                                                interruptible, pmap,
                                                TRUE);
                                } else {
                                        kr = vm_map_copy_overwrite_nested(
-                                               entry->object.sub_map,
+                                               VME_SUBMAP(entry),
                                                sub_start,
                                                copy,
                                                interruptible,
@@ -7387,14 +7861,13 @@ vm_map_copy_overwrite_unaligned(
                                vm_map_lock_read(dst_map);
                                goto RetryLookup;
                        }
-                       vm_object_shadow(&entry->object.vm_object,
-                                        &entry->offset,
-                                        (vm_map_size_t)(entry->vme_end
-                                                        - entry->vme_start));
+                       VME_OBJECT_SHADOW(entry,
+                                         (vm_map_size_t)(entry->vme_end
+                                                         - entry->vme_start));
                        entry->needs_copy = FALSE;
                        vm_map_lock_write_to_read(dst_map);
                }
-               dst_object = entry->object.vm_object;
+               dst_object = VME_OBJECT(entry);
 /*
  *             unlike with the virtual (aligned) copy we're going
  *             to fault on it therefore we need a target object.
@@ -7406,8 +7879,8 @@ vm_map_copy_overwrite_unaligned(
                        }
                        dst_object = vm_object_allocate((vm_map_size_t)
                                                        entry->vme_end - entry->vme_start);
-                       entry->object.vm_object = dst_object;
-                       entry->offset = 0;
+                       VME_OBJECT(entry) = dst_object;
+                       VME_OFFSET_SET(entry, 0);
                        assert(entry->use_pmap);
                        vm_map_lock_write_to_read(dst_map);
                }
@@ -7417,15 +7890,15 @@ vm_map_copy_overwrite_unaligned(
  */
                vm_object_reference(dst_object);
                version.main_timestamp = dst_map->timestamp;
-               entry_offset = entry->offset;
+               entry_offset = VME_OFFSET(entry);
                entry_end = entry->vme_end;
                vm_map_unlock_read(dst_map);
 /*
  *             Copy as much as possible in one pass
  */
                kr = vm_fault_copy(
-                       copy_entry->object.vm_object,
-                       copy_entry->offset + src_offset,
+                       VME_OBJECT(copy_entry),
+                       VME_OFFSET(copy_entry) + src_offset,
                        &copy_size,
                        dst_object,
                        entry_offset + dst_offset,
@@ -7457,8 +7930,7 @@ vm_map_copy_overwrite_unaligned(
                        if (discard_on_success) {
                                vm_map_copy_entry_unlink(copy, copy_entry);
                                assert(!copy_entry->is_sub_map);
-                               vm_object_deallocate(
-                                       copy_entry->object.vm_object);
+                               vm_object_deallocate(VME_OBJECT(copy_entry));
                                vm_map_copy_entry_dispose(copy, copy_entry);
                        }
 
@@ -7624,21 +8096,21 @@ vm_map_copy_overwrite_aligned(
                 *      installing the source data.
                 */
 
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                if ((!entry->is_shared && 
                     ((object == VM_OBJECT_NULL) || 
                      (object->internal && !object->true_share))) ||
                    entry->needs_copy) {
-                       vm_object_t     old_object = entry->object.vm_object;
-                       vm_object_offset_t      old_offset = entry->offset;
+                       vm_object_t     old_object = VME_OBJECT(entry);
+                       vm_object_offset_t      old_offset = VME_OFFSET(entry);
                        vm_object_offset_t      offset;
 
                        /*
                         * Ensure that the source and destination aren't
                         * identical
                         */
-                       if (old_object == copy_entry->object.vm_object &&
-                           old_offset == copy_entry->offset) {
+                       if (old_object == VME_OBJECT(copy_entry) &&
+                           old_offset == VME_OFFSET(copy_entry)) {
                                vm_map_copy_entry_unlink(copy, copy_entry);
                                vm_map_copy_entry_dispose(copy, copy_entry);
 
@@ -7652,8 +8124,8 @@ vm_map_copy_overwrite_aligned(
 
 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024)        /* 64 MB */
 #define __TRADEOFF1_COPY_SIZE (128 * 1024)     /* 128 KB */
-                       if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
-                           copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
+                       if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
+                           VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
                            copy_size <= __TRADEOFF1_COPY_SIZE) {
                                /*
                                 * Virtual vs. Physical copy tradeoff #1.
@@ -7668,15 +8140,16 @@ vm_map_copy_overwrite_aligned(
                                goto slow_copy;
                        }
 
-                       if (entry->alias >= VM_MEMORY_MALLOC &&
-                           entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
+                       if ((dst_map->pmap != kernel_pmap) &&
+                           (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
+                           (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
                                vm_object_t new_object, new_shadow;
 
                                /*
                                 * We're about to map something over a mapping
                                 * established by malloc()...
                                 */
-                               new_object = copy_entry->object.vm_object;
+                               new_object = VME_OBJECT(copy_entry);
                                if (new_object != VM_OBJECT_NULL) {
                                        vm_object_lock_shared(new_object);
                                }
@@ -7743,23 +8216,22 @@ vm_map_copy_overwrite_aligned(
                                                        vm_map_submap_pmap_clean(
                                                                dst_map, entry->vme_start,
                                                                entry->vme_end,
-                                                               entry->object.sub_map,
-                                                               entry->offset);
+                                                               VME_SUBMAP(entry),
+                                                               VME_OFFSET(entry));
                                                }
                                        } else {
                                                vm_map_submap_pmap_clean(
                                                        dst_map, entry->vme_start, 
                                                        entry->vme_end,
-                                                       entry->object.sub_map,
-                                                       entry->offset);
+                                                       VME_SUBMAP(entry),
+                                                       VME_OFFSET(entry));
                                        }
-                                       vm_map_deallocate(
-                                               entry->object.sub_map);
+                                       vm_map_deallocate(VME_SUBMAP(entry));
                                } else {
                                        if(dst_map->mapped_in_other_pmaps) {
                                                vm_object_pmap_protect_options(
-                                                       entry->object.vm_object,
-                                                       entry->offset,
+                                                       VME_OBJECT(entry),
+                                                       VME_OFFSET(entry),
                                                        entry->vme_end 
                                                        - entry->vme_start,
                                                        PMAP_NULL,
@@ -7778,12 +8250,13 @@ vm_map_copy_overwrite_aligned(
                        }
 
                        entry->is_sub_map = FALSE;
-                       entry->object = copy_entry->object;
-                       object = entry->object.vm_object;
+                       VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
+                       object = VME_OBJECT(entry);
                        entry->needs_copy = copy_entry->needs_copy;
                        entry->wired_count = 0;
                        entry->user_wired_count = 0;
-                       offset = entry->offset = copy_entry->offset;
+                       offset = VME_OFFSET(copy_entry);
+                       VME_OFFSET_SET(entry, offset); 
 
                        vm_map_copy_entry_unlink(copy, copy_entry);
                        vm_map_copy_entry_dispose(copy, copy_entry);
@@ -7815,15 +8288,14 @@ vm_map_copy_overwrite_aligned(
 
                slow_copy:
                        if (entry->needs_copy) {
-                               vm_object_shadow(&entry->object.vm_object,
-                                                &entry->offset,
-                                                (entry->vme_end -
-                                                 entry->vme_start));
+                               VME_OBJECT_SHADOW(entry,
+                                                 (entry->vme_end -
+                                                  entry->vme_start));
                                entry->needs_copy = FALSE;
                        }
 
-                       dst_object = entry->object.vm_object;
-                       dst_offset = entry->offset;
+                       dst_object = VME_OBJECT(entry);
+                       dst_offset = VME_OFFSET(entry);
 
                        /*
                         *      Take an object reference, and record
@@ -7847,8 +8319,8 @@ vm_map_copy_overwrite_aligned(
                                dst_object = vm_object_allocate(
                                        entry->vme_end - entry->vme_start);
                                dst_offset = 0;
-                               entry->object.vm_object = dst_object;
-                               entry->offset = dst_offset;
+                               VME_OBJECT_SET(entry, dst_object);
+                               VME_OFFSET_SET(entry, dst_offset);
                                assert(entry->use_pmap);
                                
                        }
@@ -7866,8 +8338,8 @@ vm_map_copy_overwrite_aligned(
 
                        copy_size = size;
                        r = vm_fault_copy(
-                               copy_entry->object.vm_object,
-                               copy_entry->offset,
+                               VME_OBJECT(copy_entry),
+                               VME_OFFSET(copy_entry),
                                &copy_size,
                                dst_object,
                                dst_offset,
@@ -7896,7 +8368,7 @@ vm_map_copy_overwrite_aligned(
                                vm_map_copy_clip_end(copy, copy_entry,
                                                     copy_entry->vme_start + copy_size);
                                vm_map_copy_entry_unlink(copy, copy_entry);
-                               vm_object_deallocate(copy_entry->object.vm_object);
+                               vm_object_deallocate(VME_OBJECT(copy_entry));
                                vm_map_copy_entry_dispose(copy, copy_entry);
                        }
 
@@ -7966,24 +8438,19 @@ vm_map_copyin_kernel_buffer(
        vm_map_copy_t copy;
        vm_size_t kalloc_size;
 
-       if ((vm_size_t) len != len) {
-               /* "len" is too big and doesn't fit in a "vm_size_t" */
-               return KERN_RESOURCE_SHORTAGE;
-       }
-       kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
-       assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
+       if (len > msg_ool_size_small)
+               return KERN_INVALID_ARGUMENT;
 
-       copy = (vm_map_copy_t) kalloc(kalloc_size);
-       if (copy == VM_MAP_COPY_NULL) {
+       kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
+
+       copy = (vm_map_copy_t)kalloc(kalloc_size);
+       if (copy == VM_MAP_COPY_NULL)
                return KERN_RESOURCE_SHORTAGE;
-       }
        copy->type = VM_MAP_COPY_KERNEL_BUFFER;
        copy->size = len;
        copy->offset = 0;
-       copy->cpy_kdata = (void *) (copy + 1);
-       copy->cpy_kalloc_size = kalloc_size;
 
-       kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
+       kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
        if (kr != KERN_SUCCESS) {
                kfree(copy, kalloc_size);
                return kr;
@@ -8026,6 +8493,13 @@ vm_map_copyout_kernel_buffer(
        kern_return_t kr = KERN_SUCCESS;
        thread_t thread = current_thread();
 
+       /*
+        * check for corrupted vm_map_copy structure
+        */
+       if (copy->size > msg_ool_size_small || copy->offset)
+               panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
+                     (long long)copy->size, (long long)copy->offset);
+
        if (!overwrite) {
 
                /*
@@ -8103,7 +8577,7 @@ vm_map_copyout_kernel_buffer(
        } else {
                /* copy was successful, dicard the copy structure */
                if (consume_on_success) {
-                       kfree(copy, copy->cpy_kalloc_size);
+                       kfree(copy, copy->size + cpy_kdata_hdr_sz);
                }
        }
 
@@ -8158,11 +8632,11 @@ vm_map_copy_remap(
                /* take an extra reference on the entry's "object" */
                if (new_entry->is_sub_map) {
                        assert(!new_entry->use_pmap); /* not nested */
-                       vm_map_lock(new_entry->object.sub_map);
-                       vm_map_reference(new_entry->object.sub_map);
-                       vm_map_unlock(new_entry->object.sub_map);
+                       vm_map_lock(VME_SUBMAP(new_entry));
+                       vm_map_reference(VME_SUBMAP(new_entry));
+                       vm_map_unlock(VME_SUBMAP(new_entry));
                } else {
-                       vm_object_reference(new_entry->object.vm_object);
+                       vm_object_reference(VME_OBJECT(new_entry));
                }
                /* insert the new entry in the map */
                vm_map_store_entry_link(map, where, new_entry);
@@ -8211,6 +8685,7 @@ vm_map_copyout_internal(
        vm_object_offset_t      vm_copy_start;
        vm_map_entry_t          last;
        vm_map_entry_t          entry;
+       vm_map_entry_t          hole_entry;
 
        /*
         *      Check for null copy object.
@@ -8281,9 +8756,24 @@ StartAgain: ;
                VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
                last = entry;
        } else {
-               assert(first_free_is_valid(dst_map));
-               start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
-               vm_map_min(dst_map) : last->vme_end;
+               if (dst_map->holelistenabled) {
+                       hole_entry = (vm_map_entry_t)dst_map->holes_list;
+
+                       if (hole_entry == NULL) {
+                               /*
+                                * No more space in the map?
+                                */
+                               vm_map_unlock(dst_map);
+                               return(KERN_NO_SPACE);
+                       }
+
+                       last = hole_entry;
+                       start = last->vme_start;
+               } else {
+                       assert(first_free_is_valid(dst_map));
+                       start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
+                       vm_map_min(dst_map) : last->vme_end;
+               }
                start = vm_map_round_page(start,
                                          VM_MAP_PAGE_MASK(dst_map));
        }
@@ -8306,16 +8796,51 @@ StartAgain: ;
                        return(KERN_NO_SPACE);
                }
 
-               if ((next == vm_map_to_entry(dst_map)) ||
-                   (next->vme_start >= end))
-                       break;
+               if (dst_map->holelistenabled) {
+                       if (last->vme_end >= end)
+                               break;
+               } else {
+                       /*
+                        *      If there are no more entries, we must win.
+                        *
+                        *      OR
+                        *
+                        *      If there is another entry, it must be
+                        *      after the end of the potential new region.
+                        */
+
+                       if (next == vm_map_to_entry(dst_map))
+                               break;
+
+                       if (next->vme_start >= end)
+                               break;
+               }
 
                last = next;
-               start = last->vme_end;
+
+               if (dst_map->holelistenabled) {
+                       if (last == (vm_map_entry_t) dst_map->holes_list) {
+                               /*
+                                * Wrapped around
+                                */
+                               vm_map_unlock(dst_map);
+                               return(KERN_NO_SPACE);
+                       }
+                       start = last->vme_start;
+               } else {
+                       start = last->vme_end;
+               }
                start = vm_map_round_page(start,
                                          VM_MAP_PAGE_MASK(dst_map));
        }
 
+       if (dst_map->holelistenabled) {
+               if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
+                       panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
+               }
+       }
+
+
        adjustment = start - vm_copy_start;
        if (! consume_on_success) {
                /*
@@ -8423,8 +8948,8 @@ StartAgain: ;
                        vm_prot_t prot;
                        int     type_of_fault;
 
-                       object = entry->object.vm_object;
-                       offset = entry->offset;
+                       object = VME_OBJECT(entry);
+                       offset = VME_OFFSET(entry);
                        va = entry->vme_start;
 
                        pmap_pageable(dst_map->pmap,
@@ -8471,14 +8996,15 @@ StartAgain: ;
 
                                prot = entry->protection;
 
-                               if (override_nx(dst_map, entry->alias) && prot)
+                               if (override_nx(dst_map, VME_ALIAS(entry)) &&
+                                   prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                type_of_fault = DBG_CACHE_HIT_FAULT;
 
                                vm_fault_enter(m, dst_map->pmap, va, prot, prot,
                                               VM_PAGE_WIRED(m), FALSE, FALSE,
-                                              FALSE, entry->alias,
+                                              FALSE, VME_ALIAS(entry),
                                               ((entry->iokit_acct ||
                                                 (!entry->is_sub_map &&
                                                  !entry->use_pmap))
@@ -8785,9 +9311,9 @@ vm_map_copyin_common(
                        ptr->base_len = submap_len;
        
                        src_start -= tmp_entry->vme_start;
-                       src_start += tmp_entry->offset;
+                       src_start += VME_OFFSET(tmp_entry);
                        src_end = src_start + submap_len;
-                       src_map = tmp_entry->object.sub_map;
+                       src_map = VME_SUBMAP(tmp_entry);
                        vm_map_lock(src_map);
                        /* keep an outstanding reference for all maps in */
                        /* the parents tree except the base map */
@@ -8803,8 +9329,8 @@ vm_map_copyin_common(
                }
                /* we are now in the lowest level submap... */
 
-               if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) && 
-                   (tmp_entry->object.vm_object->phys_contiguous)) {
+               if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) && 
+                   (VME_OBJECT(tmp_entry)->phys_contiguous)) {
                        /* This is not, supported for now.In future */
                        /* we will need to detect the phys_contig   */
                        /* condition and then upgrade copy_slowly   */
@@ -8853,8 +9379,8 @@ vm_map_copyin_common(
                vm_map_clip_end(src_map, src_entry, src_end);
 
                src_size = src_entry->vme_end - src_start;
-               src_object = src_entry->object.vm_object;
-               src_offset = src_entry->offset;
+               src_object = VME_OBJECT(src_entry);
+               src_offset = VME_OFFSET(src_entry);
                was_wired = (src_entry->wired_count != 0);
 
                vm_map_entry_copy(new_entry, src_entry);
@@ -8892,12 +9418,12 @@ vm_map_copyin_common(
 
        RestartCopy:
                XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
-                   src_object, new_entry, new_entry->object.vm_object,
+                   src_object, new_entry, VME_OBJECT(new_entry),
                    was_wired, 0);
                if ((src_object == VM_OBJECT_NULL ||
                     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
                    vm_object_copy_quickly(
-                           &new_entry->object.vm_object,
+                           &VME_OBJECT(new_entry),
                            src_offset,
                            src_size,
                            &src_needs_copy,
@@ -8914,7 +9440,8 @@ vm_map_copyin_common(
 
                                prot = src_entry->protection & ~VM_PROT_WRITE;
 
-                               if (override_nx(src_map, src_entry->alias) && prot)
+                               if (override_nx(src_map, VME_ALIAS(src_entry))
+                                   && prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                vm_object_pmap_protect(
@@ -8927,6 +9454,7 @@ vm_map_copyin_common(
                                        src_entry->vme_start,
                                        prot);
 
+                               assert(tmp_entry->wired_count == 0);
                                tmp_entry->needs_copy = TRUE;
                        }
 
@@ -8967,8 +9495,8 @@ vm_map_copyin_common(
                                src_offset,
                                src_size,
                                THREAD_UNINT,
-                               &new_entry->object.vm_object);
-                       new_entry->offset = 0;
+                               &VME_OBJECT(new_entry));
+                       VME_OFFSET_SET(new_entry, 0);
                        new_entry->needs_copy = FALSE;
 
                }
@@ -8985,7 +9513,8 @@ vm_map_copyin_common(
                        if (new_object == VM_OBJECT_NULL)
                                goto CopySlowly;
 
-                       new_entry->object.vm_object = new_object;
+                       VME_OBJECT_SET(new_entry, new_object);
+                       assert(new_entry->wired_count == 0);
                        new_entry->needs_copy = TRUE;
                        assert(!new_entry->iokit_acct);
                        assert(new_object->purgable == VM_PURGABLE_DENY);
@@ -8993,12 +9522,17 @@ vm_map_copyin_common(
                        result = KERN_SUCCESS;
 
                } else {
+                       vm_object_offset_t new_offset;
+                       new_offset = VME_OFFSET(new_entry);
                        result = vm_object_copy_strategically(src_object,
                                                              src_offset,
                                                              src_size,
-                                                             &new_entry->object.vm_object,
-                                                             &new_entry->offset,
+                                                             &VME_OBJECT(new_entry),
+                                                             &new_offset,
                                                              &new_entry_needs_copy);
+                       if (new_offset != VME_OFFSET(new_entry)) {
+                               VME_OFFSET_SET(new_entry, new_offset);
+                       }
 
                        new_entry->needs_copy = new_entry_needs_copy;
                }
@@ -9039,8 +9573,8 @@ vm_map_copyin_common(
 
                if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
                        if (result != KERN_MEMORY_RESTART_COPY) {
-                               vm_object_deallocate(new_entry->object.vm_object);
-                               new_entry->object.vm_object = VM_OBJECT_NULL;
+                               vm_object_deallocate(VME_OBJECT(new_entry));
+                               VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
                                assert(!new_entry->iokit_acct);
                                new_entry->use_pmap = TRUE;
                        }
@@ -9062,8 +9596,8 @@ vm_map_copyin_common(
                        src_size = new_entry->vme_end - src_start;
                }
 
-               if ((src_entry->object.vm_object != src_object) ||
-                   (src_entry->offset != src_offset) ) {
+               if ((VME_OBJECT(src_entry) != src_object) ||
+                   (VME_OFFSET(src_entry) != src_offset) ) {
 
                        /*
                         *      Verification failed.
@@ -9073,7 +9607,7 @@ vm_map_copyin_common(
 
                VerificationFailed: ;
 
-                       vm_object_deallocate(new_entry->object.vm_object);
+                       vm_object_deallocate(VME_OBJECT(new_entry));
                        tmp_entry = src_entry;
                        continue;
                }
@@ -9213,7 +9747,7 @@ vm_map_copyin_common(
                        vm_map_offset_t adjustment;
 
                        original_start = tmp_entry->vme_start;
-                       original_offset = tmp_entry->offset;
+                       original_offset = VME_OFFSET(tmp_entry);
 
                        /* map-align the start of the first copy entry... */
                        adjustment = (tmp_entry->vme_start -
@@ -9221,7 +9755,8 @@ vm_map_copyin_common(
                                              tmp_entry->vme_start,
                                              VM_MAP_PAGE_MASK(src_map)));
                        tmp_entry->vme_start -= adjustment;
-                       tmp_entry->offset -= adjustment;
+                       VME_OFFSET_SET(tmp_entry,
+                                      VME_OFFSET(tmp_entry) - adjustment);
                        copy_addr -= adjustment;
                        assert(tmp_entry->vme_start < tmp_entry->vme_end);
                        /* ... adjust for mis-aligned start of copy range */
@@ -9234,7 +9769,9 @@ vm_map_copyin_common(
                                assert(page_aligned(adjustment));
                                assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
                                tmp_entry->vme_start += adjustment;
-                               tmp_entry->offset += adjustment;
+                               VME_OFFSET_SET(tmp_entry,
+                                              (VME_OFFSET(tmp_entry) +
+                                               adjustment));
                                copy_addr += adjustment;
                                assert(tmp_entry->vme_start < tmp_entry->vme_end);
                        }
@@ -9244,7 +9781,7 @@ vm_map_copyin_common(
                         * more than was originally copied...
                         */
                        assert(tmp_entry->vme_start >= original_start);
-                       assert(tmp_entry->offset >= original_offset);
+                       assert(VME_OFFSET(tmp_entry) >= original_offset);
                        /*
                         * ... and that it did not adjust outside of a
                         * a single 16K page.
@@ -9478,7 +10015,7 @@ vm_map_fork_share(
         *      make a new shadow and share it.
         */
        
-       object = old_entry->object.vm_object;
+       object = VME_OBJECT(old_entry);
        if (old_entry->is_sub_map) {
                assert(old_entry->wired_count == 0);
 #ifndef NO_NESTED_PMAP
@@ -9486,7 +10023,7 @@ vm_map_fork_share(
                        kern_return_t   result;
 
                        result = pmap_nest(new_map->pmap, 
-                                          (old_entry->object.sub_map)->pmap, 
+                                          (VME_SUBMAP(old_entry))->pmap, 
                                           (addr64_t)old_entry->vme_start,
                                           (addr64_t)old_entry->vme_start,
                                           (uint64_t)(old_entry->vme_end - old_entry->vme_start));
@@ -9497,8 +10034,8 @@ vm_map_fork_share(
        } else if (object == VM_OBJECT_NULL) {
                object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
                                                            old_entry->vme_start));
-               old_entry->offset = 0;
-               old_entry->object.vm_object = object;
+               VME_OFFSET_SET(old_entry, 0);
+               VME_OBJECT_SET(old_entry, object);
                old_entry->use_pmap = TRUE;
                assert(!old_entry->needs_copy);
        } else if (object->copy_strategy !=
@@ -9596,10 +10133,9 @@ vm_map_fork_share(
                 *      (This is a preemptive version of
                 *      case 2.)
                 */
-               vm_object_shadow(&old_entry->object.vm_object,
-                                &old_entry->offset,
-                                (vm_map_size_t) (old_entry->vme_end -
-                                                 old_entry->vme_start));
+               VME_OBJECT_SHADOW(old_entry,
+                                 (vm_map_size_t) (old_entry->vme_end -
+                                                  old_entry->vme_start));
                
                /*
                 *      If we're making a shadow for other than
@@ -9613,13 +10149,13 @@ vm_map_fork_share(
 
                        prot = old_entry->protection & ~VM_PROT_WRITE;
 
-                       if (override_nx(old_map, old_entry->alias) && prot)
+                       if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
                                prot |= VM_PROT_EXECUTE;
 
                        if (old_map->mapped_in_other_pmaps) {
                                vm_object_pmap_protect(
-                                       old_entry->object.vm_object,
-                                       old_entry->offset,
+                                       VME_OBJECT(old_entry),
+                                       VME_OFFSET(old_entry),
                                        (old_entry->vme_end -
                                         old_entry->vme_start),
                                        PMAP_NULL,
@@ -9634,7 +10170,7 @@ vm_map_fork_share(
                }
                
                old_entry->needs_copy = FALSE;
-               object = old_entry->object.vm_object;
+               object = VME_OBJECT(old_entry);
        }
 
        
@@ -9648,9 +10184,9 @@ vm_map_fork_share(
         */
        
        if(old_entry->is_sub_map) {
-               vm_map_lock(old_entry->object.sub_map);
-               vm_map_reference(old_entry->object.sub_map);
-               vm_map_unlock(old_entry->object.sub_map);
+               vm_map_lock(VME_SUBMAP(old_entry));
+               vm_map_reference(VME_SUBMAP(old_entry));
+               vm_map_unlock(VME_SUBMAP(old_entry));
        } else {
                vm_object_lock(object);
                vm_object_reference_locked(object);
@@ -9788,14 +10324,16 @@ vm_map_fork(
        vm_map_entry_t  new_entry;
        boolean_t       src_needs_copy;
        boolean_t       new_entry_needs_copy;
+       boolean_t       pmap_is64bit;
 
-       new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
+       pmap_is64bit =
 #if defined(__i386__) || defined(__x86_64__)
-                              old_map->pmap->pm_task_map != TASK_MAP_32BIT
+                              old_map->pmap->pm_task_map != TASK_MAP_32BIT;
 #else
 #error Unknown architecture.
 #endif
-                              );
+
+       new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
 
        vm_map_reference_swap(old_map);
        vm_map_lock(old_map);
@@ -9833,8 +10371,8 @@ vm_map_fork(
                        if(old_entry->is_sub_map)
                                break;
                        if ((old_entry->wired_count != 0) ||
-                           ((old_entry->object.vm_object != NULL) &&
-                            (old_entry->object.vm_object->true_share))) {
+                           ((VME_OBJECT(old_entry) != NULL) &&
+                            (VME_OBJECT(old_entry)->true_share))) {
                                goto slow_vm_map_fork_copy;
                        }
 
@@ -9846,8 +10384,8 @@ vm_map_fork(
                        }
 
                        if (! vm_object_copy_quickly(
-                                   &new_entry->object.vm_object,
-                                   old_entry->offset,
+                                   &VME_OBJECT(new_entry),
+                                   VME_OFFSET(old_entry),
                                    (old_entry->vme_end -
                                     old_entry->vme_start),
                                    &src_needs_copy,
@@ -9865,12 +10403,13 @@ vm_map_fork(
 
                                prot = old_entry->protection & ~VM_PROT_WRITE;
 
-                               if (override_nx(old_map, old_entry->alias) && prot)
+                               if (override_nx(old_map, VME_ALIAS(old_entry))
+                                   && prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                vm_object_pmap_protect(
-                                       old_entry->object.vm_object,
-                                       old_entry->offset,
+                                       VME_OBJECT(old_entry),
+                                       VME_OFFSET(old_entry),
                                        (old_entry->vme_end -
                                         old_entry->vme_start),
                                        ((old_entry->is_shared 
@@ -9880,6 +10419,7 @@ vm_map_fork(
                                        old_entry->vme_start,
                                        prot);
 
+                               assert(old_entry->wired_count == 0);
                                old_entry->needs_copy = TRUE;
                        }
                        new_entry->needs_copy = new_entry_needs_copy;
@@ -10059,7 +10599,7 @@ submap_recurse:
                        if ((*real_map != map) && 
                            (*real_map != cow_sub_map_parent))
                                vm_map_unlock(*real_map);
-                       *real_map = entry->object.sub_map;
+                       *real_map = VME_SUBMAP(entry);
                }
 
                if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
@@ -10069,8 +10609,8 @@ submap_recurse:
                                        *real_map = map;
                                        goto RetryLookup;
                                }
-                               vm_map_lock_read(entry->object.sub_map);
-                               *var_map = entry->object.sub_map;
+                               vm_map_lock_read(VME_SUBMAP(entry));
+                               *var_map = VME_SUBMAP(entry);
                                cow_sub_map_parent = map;
                                /* reset base to map before cow object */
                                /* this is the map which will accept   */
@@ -10080,15 +10620,15 @@ submap_recurse:
                                cow_parent_vaddr = vaddr;
                                mapped_needs_copy = TRUE;
                        } else {
-                               vm_map_lock_read(entry->object.sub_map);
-                               *var_map = entry->object.sub_map;
+                               vm_map_lock_read(VME_SUBMAP(entry));
+                               *var_map = VME_SUBMAP(entry);
                                if((cow_sub_map_parent != map) &&
                                   (*real_map != map))
                                        vm_map_unlock(map);
                        }
                } else {
-                       vm_map_lock_read(entry->object.sub_map);
-                       *var_map = entry->object.sub_map;       
+                       vm_map_lock_read(VME_SUBMAP(entry));
+                       *var_map = VME_SUBMAP(entry);   
                        /* leave map locked if it is a target */
                        /* cow sub_map above otherwise, just  */
                        /* follow the maps down to the object */
@@ -10101,7 +10641,7 @@ submap_recurse:
                map = *var_map;
 
                /* calculate the offset in the submap for vaddr */
-               local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
+               local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
 
        RetrySubMap:
                if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
@@ -10126,13 +10666,13 @@ submap_recurse:
                /* ultimately be clipped in the top map will only need    */
                /* to be as big as the portion of the underlying entry    */
                /* which is mapped */
-               start_delta = submap_entry->vme_start > entry->offset ?
-                       submap_entry->vme_start - entry->offset : 0;
+               start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
+                       submap_entry->vme_start - VME_OFFSET(entry) : 0;
 
                end_delta = 
-                       (entry->offset + start_delta + (old_end - old_start)) <=
+                       (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
                        submap_entry->vme_end ?
-                       0 : (entry->offset + 
+                       0 : (VME_OFFSET(entry) + 
                             (old_end - old_start))
                        - submap_entry->vme_end; 
 
@@ -10161,15 +10701,15 @@ submap_recurse:
                        }
 
 
-                       sub_object = submap_entry->object.vm_object;
+                       sub_object = VME_OBJECT(submap_entry);
                        if (sub_object == VM_OBJECT_NULL) {
                                sub_object =
                                        vm_object_allocate(
                                                (vm_map_size_t)
                                                (submap_entry->vme_end -
                                                 submap_entry->vme_start));
-                               submap_entry->object.vm_object = sub_object;
-                               submap_entry->offset = 0;
+                               VME_OBJECT_SET(submap_entry, sub_object);
+                               VME_OFFSET_SET(submap_entry, 0);
                        }
                        local_start =  local_vaddr - 
                                (cow_parent_vaddr - old_start);
@@ -10193,7 +10733,7 @@ submap_recurse:
                            MEMORY_OBJECT_COPY_NONE)) {
                                vm_object_lock(sub_object);
                                vm_object_copy_slowly(sub_object,
-                                                     submap_entry->offset,
+                                                     VME_OFFSET(submap_entry),
                                                      (submap_entry->vme_end -
                                                       submap_entry->vme_start),
                                                      FALSE,
@@ -10205,16 +10745,19 @@ submap_recurse:
                                copy_object = sub_object;
                                vm_object_reference(copy_object);
                                sub_object->shadowed = TRUE;
+                               assert(submap_entry->wired_count == 0);
                                submap_entry->needs_copy = TRUE;
 
                                prot = submap_entry->protection & ~VM_PROT_WRITE;
 
-                               if (override_nx(old_map, submap_entry->alias) && prot)
+                               if (override_nx(old_map,
+                                               VME_ALIAS(submap_entry))
+                                   && prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                vm_object_pmap_protect(
                                        sub_object,
-                                       submap_entry->offset,
+                                       VME_OFFSET(submap_entry),
                                        submap_entry->vme_end - 
                                        submap_entry->vme_start,
                                        (submap_entry->is_shared 
@@ -10229,7 +10772,7 @@ submap_recurse:
                         */
                        copy_offset = (local_vaddr -
                                       submap_entry->vme_start +
-                                      submap_entry->offset);
+                                      VME_OFFSET(submap_entry));
 
                        /* This works diffently than the   */
                        /* normal submap case. We go back  */
@@ -10290,22 +10833,23 @@ submap_recurse:
 
                        /* substitute copy object for */
                        /* shared map entry           */
-                       vm_map_deallocate(entry->object.sub_map);
+                       vm_map_deallocate(VME_SUBMAP(entry));
                        assert(!entry->iokit_acct);
                        entry->is_sub_map = FALSE;
                        entry->use_pmap = TRUE;
-                       entry->object.vm_object = copy_object;
+                       VME_OBJECT_SET(entry, copy_object);
 
                        /* propagate the submap entry's protections */
                        entry->protection |= submap_entry->protection;
                        entry->max_protection |= submap_entry->max_protection;
 
                        if(copied_slowly) {
-                               entry->offset = local_start - old_start;
+                               VME_OFFSET_SET(entry, local_start - old_start);
                                entry->needs_copy = FALSE;
                                entry->is_shared = FALSE;
                        } else {
-                               entry->offset = copy_offset;
+                               VME_OFFSET_SET(entry, copy_offset);
+                               assert(entry->wired_count == 0);
                                entry->needs_copy = TRUE;
                                if(entry->inheritance == VM_INHERIT_SHARE) 
                                        entry->inheritance = VM_INHERIT_COPY;
@@ -10334,7 +10878,7 @@ submap_recurse:
 
        prot = entry->protection;
 
-       if (override_nx(old_map, entry->alias) && prot) {
+       if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
                /*
                 * HACK -- if not a stack, then allow execution
                 */
@@ -10395,12 +10939,11 @@ submap_recurse:
                                vm_map_lock_read(map);
                                goto RetryLookup;
                        }
-                       vm_object_shadow(&entry->object.vm_object,
-                                        &entry->offset,
-                                        (vm_map_size_t) (entry->vme_end -
-                                                         entry->vme_start));
+                       VME_OBJECT_SHADOW(entry,
+                                         (vm_map_size_t) (entry->vme_end -
+                                                          entry->vme_start));
 
-                       entry->object.vm_object->shadowed = TRUE;
+                       VME_OBJECT(entry)->shadowed = TRUE;
                        entry->needs_copy = FALSE;
                        vm_map_lock_write_to_read(map);
                }
@@ -10417,16 +10960,18 @@ submap_recurse:
        /*
         *      Create an object if necessary.
         */
-       if (entry->object.vm_object == VM_OBJECT_NULL) {
+       if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
 
                if (vm_map_lock_read_to_write(map)) {
                        vm_map_lock_read(map);
                        goto RetryLookup;
                }
 
-               entry->object.vm_object = vm_object_allocate(
-                       (vm_map_size_t)(entry->vme_end - entry->vme_start));
-               entry->offset = 0;
+               VME_OBJECT_SET(entry,
+                              vm_object_allocate(
+                                      (vm_map_size_t)(entry->vme_end -
+                                                      entry->vme_start)));
+               VME_OFFSET_SET(entry, 0);
                vm_map_lock_write_to_read(map);
        }
 
@@ -10436,27 +10981,33 @@ submap_recurse:
         *      return the protection.
         */
 
-        *offset = (vaddr - entry->vme_start) + entry->offset;
-        *object = entry->object.vm_object;
+        *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
+        *object = VME_OBJECT(entry);
        *out_prot = prot;
 
        if (fault_info) {
                fault_info->interruptible = THREAD_UNINT; /* for now... */
                /* ... the caller will change "interruptible" if needed */
                fault_info->cluster_size = 0;
-               fault_info->user_tag = entry->alias;
+               fault_info->user_tag = VME_ALIAS(entry);
                fault_info->pmap_options = 0;
                if (entry->iokit_acct ||
                    (!entry->is_sub_map && !entry->use_pmap)) {
                        fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
                }
                fault_info->behavior = entry->behavior;
-               fault_info->lo_offset = entry->offset;
-               fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
+               fault_info->lo_offset = VME_OFFSET(entry);
+               fault_info->hi_offset =
+                       (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
                fault_info->no_cache  = entry->no_cache;
                fault_info->stealth = FALSE;
                fault_info->io_sync = FALSE;
-               fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
+               if (entry->used_for_jit ||
+                   entry->vme_resilient_codesign) {
+                       fault_info->cs_bypass = TRUE;
+               } else {
+                       fault_info->cs_bypass = FALSE;
+               }
                fault_info->mark_zf_absent = FALSE;
                fault_info->batch_pmap_op = FALSE;
        }
@@ -10612,6 +11163,11 @@ vm_map_region_recurse_64(
        user_address = *address;
        user_max_depth = *nesting_depth;
        
+       if (not_in_kdp) {
+               vm_map_lock_read(map);
+       }
+
+recurse_again:
        curr_entry = NULL;
        curr_map = map;
        curr_address = user_address;
@@ -10630,10 +11186,6 @@ vm_map_region_recurse_64(
        next_max_above = (vm_map_offset_t) -1;
        next_max_below = (vm_map_offset_t) -1;
 
-       if (not_in_kdp) {
-               vm_map_lock_read(curr_map);
-       }
-
        for (;;) {
                if (vm_map_lookup_entry(curr_map,
                                        curr_address,
@@ -10658,6 +11210,7 @@ vm_map_region_recurse_64(
                                }
                                curr_entry = NULL;
                                curr_map = NULL;
+                               curr_skip = 0;
                                curr_offset = 0;
                                curr_depth = 0;
                                curr_max_above = 0;
@@ -10668,7 +11221,7 @@ vm_map_region_recurse_64(
                        /* adjust current address and offset */
                        skip = curr_entry->vme_start - curr_address;
                        curr_address = curr_entry->vme_start;
-                       curr_skip = skip;
+                       curr_skip += skip;
                        curr_offset += skip;
                        curr_max_above -= skip;
                        curr_max_below = 0;
@@ -10707,6 +11260,7 @@ vm_map_region_recurse_64(
                        next_depth = curr_depth;
                        next_address = next_entry->vme_start;
                        next_skip = curr_skip;
+                       next_skip += (next_address - curr_address);
                        next_offset = curr_offset;
                        next_offset += (next_address - curr_address);
                        next_max_above = MIN(next_max_above, curr_max_above);
@@ -10723,7 +11277,7 @@ vm_map_region_recurse_64(
                 * the rest of that submap is irrelevant to us, since it's not
                 * mapped here.
                 * The relevant portion of the map starts at
-                * "curr_entry->offset" up to the size of "curr_entry".
+                * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
                 */
                curr_max_above = MIN(curr_max_above,
                                     curr_entry->vme_end - curr_address);
@@ -10750,7 +11304,7 @@ vm_map_region_recurse_64(
                 * later.
                 */
                if (not_in_kdp) {
-                       vm_map_lock_read(curr_entry->object.sub_map);
+                       vm_map_lock_read(VME_SUBMAP(curr_entry));
                }
                if (curr_map == next_map) {
                        /* keep "next_map" locked in case we need it */
@@ -10763,17 +11317,17 @@ vm_map_region_recurse_64(
                /*
                 * Adjust the offset.  "curr_entry" maps the submap
                 * at relative address "curr_entry->vme_start" in the
-                * curr_map but skips the first "curr_entry->offset"
+                * curr_map but skips the first "VME_OFFSET(curr_entry)"
                 * bytes of the submap.
                 * "curr_offset" always represents the offset of a virtual
                 * address in the curr_map relative to the absolute address
                 * space (i.e. the top-level VM map).
                 */
                curr_offset +=
-                       (curr_entry->offset - curr_entry->vme_start);
+                       (VME_OFFSET(curr_entry) - curr_entry->vme_start);
                curr_address = user_address + curr_offset;
                /* switch to the submap */
-               curr_map = curr_entry->object.sub_map;
+               curr_map = VME_SUBMAP(curr_entry);
                curr_depth++;
                curr_entry = NULL;
        }
@@ -10793,9 +11347,6 @@ vm_map_region_recurse_64(
                curr_depth = next_depth;
                curr_max_above = next_max_above;
                curr_max_below = next_max_below;
-               if (curr_map == map) {
-                       user_address = curr_address;
-               }
        } else {
                /* we won't need "next_entry" after all */
                if (next_entry != NULL) {
@@ -10813,6 +11364,18 @@ vm_map_region_recurse_64(
        next_max_below = -1;
        next_max_above = -1;
 
+       if (curr_entry->is_sub_map &&
+           curr_depth < user_max_depth) {
+               /*
+                * We're not as deep as we could be:  we must have
+                * gone back up after not finding anything mapped
+                * below the original top-level map entry's.
+                * Let's move "curr_address" forward and recurse again.
+                */
+               user_address = curr_address;
+               goto recurse_again;
+       }
+
        *nesting_depth = curr_depth;
        *size = curr_max_above + curr_max_below;
        *address = user_address + curr_skip - curr_max_below;
@@ -10823,25 +11386,25 @@ vm_map_region_recurse_64(
 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
 
        if (look_for_pages) {
-               submap_info->user_tag = curr_entry->alias;
-               submap_info->offset = curr_entry->offset
+               submap_info->user_tag = VME_ALIAS(curr_entry);
+               submap_info->offset = VME_OFFSET(curr_entry)
                submap_info->protection = curr_entry->protection;
                submap_info->inheritance = curr_entry->inheritance;
                submap_info->max_protection = curr_entry->max_protection;
                submap_info->behavior = curr_entry->behavior;
                submap_info->user_wired_count = curr_entry->user_wired_count;
                submap_info->is_submap = curr_entry->is_sub_map;
-               submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
+               submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
        } else {
-               short_info->user_tag = curr_entry->alias;
-               short_info->offset = curr_entry->offset
+               short_info->user_tag = VME_ALIAS(curr_entry);
+               short_info->offset = VME_OFFSET(curr_entry)
                short_info->protection = curr_entry->protection;
                short_info->inheritance = curr_entry->inheritance;
                short_info->max_protection = curr_entry->max_protection;
                short_info->behavior = curr_entry->behavior;
                short_info->user_wired_count = curr_entry->user_wired_count;
                short_info->is_submap = curr_entry->is_sub_map;
-               short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
+               short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
        }
 
        extended.pages_resident = 0;
@@ -10851,6 +11414,8 @@ vm_map_region_recurse_64(
        extended.pages_reusable = 0;
        extended.external_pager = 0;
        extended.shadow_depth = 0;
+       extended.share_mode = SM_EMPTY;
+       extended.ref_count = 0;
 
        if (not_in_kdp) {
                if (!curr_entry->is_sub_map) {
@@ -10862,7 +11427,7 @@ vm_map_region_recurse_64(
                        vm_map_region_walk(curr_map,
                                           range_start,
                                           curr_entry,
-                                          (curr_entry->offset +
+                                          (VME_OFFSET(curr_entry) +
                                            (range_start -
                                             curr_entry->vme_start)),
                                           range_end - range_start,
@@ -10879,8 +11444,7 @@ vm_map_region_recurse_64(
                        } else {
                                extended.share_mode = SM_PRIVATE;
                        }
-                       extended.ref_count =
-                               curr_entry->object.sub_map->ref_count;
+                       extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
                }
        }
 
@@ -10968,7 +11532,7 @@ vm_map_region(
 
                start = entry->vme_start;
 
-               basic->offset = (uint32_t)entry->offset;
+               basic->offset = (uint32_t)VME_OFFSET(entry);
                basic->protection = entry->protection;
                basic->inheritance = entry->inheritance;
                basic->max_protection = entry->max_protection;
@@ -11013,7 +11577,7 @@ vm_map_region(
 
                start = entry->vme_start;
 
-               basic->offset = entry->offset;
+               basic->offset = VME_OFFSET(entry);
                basic->protection = entry->protection;
                basic->inheritance = entry->inheritance;
                basic->max_protection = entry->max_protection;
@@ -11061,7 +11625,7 @@ vm_map_region(
                start = entry->vme_start;
 
                extended->protection = entry->protection;
-               extended->user_tag = entry->alias;
+               extended->user_tag = VME_ALIAS(entry);
                extended->pages_resident = 0;
                extended->pages_swapped_out = 0;
                extended->pages_shared_now_private = 0;
@@ -11077,7 +11641,7 @@ vm_map_region(
                        *count = VM_REGION_EXTENDED_INFO_COUNT;
                }
 
-               vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
+               vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
 
                if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
                        extended->share_mode = SM_PRIVATE;
@@ -11144,7 +11708,7 @@ vm_map_region_top_walk(
        vm_region_top_info_t       top)
 {
 
-       if (entry->object.vm_object == 0 || entry->is_sub_map) {
+       if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
                top->share_mode = SM_EMPTY;
                top->ref_count = 0;
                top->obj_id = 0;
@@ -11158,7 +11722,7 @@ vm_map_region_top_walk(
 
                entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
 
-               obj = entry->object.vm_object;
+               obj = VME_OBJECT(entry);
 
                vm_object_lock(obj);
 
@@ -11239,9 +11803,9 @@ vm_map_region_walk(
        struct vm_object        *shadow_object;
        int                     shadow_depth;
 
-       if ((entry->object.vm_object == 0) ||
+       if ((VME_OBJECT(entry) == 0) ||
            (entry->is_sub_map) ||
-           (entry->object.vm_object->phys_contiguous &&
+           (VME_OBJECT(entry)->phys_contiguous &&
             !entry->superpage_size)) {
                extended->share_mode = SM_EMPTY;
                extended->ref_count = 0;
@@ -11259,7 +11823,7 @@ vm_map_region_walk(
        }
 
        {
-               obj = entry->object.vm_object;
+               obj = VME_OBJECT(entry);
 
                vm_object_lock(obj);
 
@@ -11336,7 +11900,7 @@ vm_map_region_walk(
                        register vm_map_entry_t      last;
                        int      my_refs;
 
-                       obj = entry->object.vm_object;
+                       obj = VME_OBJECT(entry);
                        last = vm_map_to_entry(map);
                        my_refs = 0;
 
@@ -11492,7 +12056,7 @@ vm_map_region_count_obj_refs(
        register vm_object_t chk_obj;
        register vm_object_t tmp_obj;
 
-       if (entry->object.vm_object == 0)
+       if (VME_OBJECT(entry) == 0)
                return(0);
 
         if (entry->is_sub_map)
@@ -11500,7 +12064,7 @@ vm_map_region_count_obj_refs(
        else {
                ref_count = 0;
 
-               chk_obj = entry->object.vm_object;
+               chk_obj = VME_OBJECT(entry);
                vm_object_lock(chk_obj);
 
                while (chk_obj) {
@@ -11548,10 +12112,10 @@ vm_map_simplify_entry(
            (prev_entry->vme_end == this_entry->vme_start) &&
 
            (prev_entry->is_sub_map == this_entry->is_sub_map) &&
-           (prev_entry->object.vm_object == this_entry->object.vm_object) &&
-           ((prev_entry->offset + (prev_entry->vme_end -
+           (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
+           ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
                                    prev_entry->vme_start))
-            == this_entry->offset) &&
+            == VME_OFFSET(this_entry)) &&
 
            (prev_entry->behavior == this_entry->behavior) &&
            (prev_entry->needs_copy == this_entry->needs_copy) &&
@@ -11559,7 +12123,7 @@ vm_map_simplify_entry(
            (prev_entry->max_protection == this_entry->max_protection) &&
            (prev_entry->inheritance == this_entry->inheritance) &&
            (prev_entry->use_pmap == this_entry->use_pmap) &&
-           (prev_entry->alias == this_entry->alias) &&
+           (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
            (prev_entry->no_cache == this_entry->no_cache) &&
            (prev_entry->permanent == this_entry->permanent) &&
            (prev_entry->map_aligned == this_entry->map_aligned) &&
@@ -11567,6 +12131,10 @@ vm_map_simplify_entry(
            (prev_entry->used_for_jit == this_entry->used_for_jit) &&
            /* from_reserved_zone: OK if that field doesn't match */
            (prev_entry->iokit_acct == this_entry->iokit_acct) &&
+           (prev_entry->vme_resilient_codesign ==
+            this_entry->vme_resilient_codesign) &&
+           (prev_entry->vme_resilient_media ==
+            this_entry->vme_resilient_media) &&
 
            (prev_entry->wired_count == this_entry->wired_count) &&
            (prev_entry->user_wired_count == this_entry->user_wired_count) &&
@@ -11586,11 +12154,16 @@ vm_map_simplify_entry(
                        assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
                                                   VM_MAP_PAGE_MASK(map)));
                this_entry->vme_start = prev_entry->vme_start;
-               this_entry->offset = prev_entry->offset;
+               VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
+
+               if (map->holelistenabled) {
+                       vm_map_store_update_first_free(map, this_entry, TRUE);
+               }
+
                if (prev_entry->is_sub_map) {
-                       vm_map_deallocate(prev_entry->object.sub_map);
+                       vm_map_deallocate(VME_SUBMAP(prev_entry));
                } else {
-                       vm_object_deallocate(prev_entry->object.vm_object);
+                       vm_object_deallocate(VME_OBJECT(prev_entry));
                }
                vm_map_entry_dispose(map, prev_entry);
                SAVE_HINT_MAP_WRITE(map, this_entry);
@@ -11716,15 +12289,15 @@ vm_map_machine_attribute(
                                vm_map_offset_t sub_end;
 
                                sub_start = (start - entry->vme_start) 
-                                       + entry->offset;
+                                       + VME_OFFSET(entry);
                                sub_end = sub_start + sub_size;
                                vm_map_machine_attribute(
-                                       entry->object.sub_map
+                                       VME_SUBMAP(entry)
                                        sub_start,
                                        sub_end,
                                        attribute, value);
                        } else {
-                               if(entry->object.vm_object) {
+                               if (VME_OBJECT(entry)) {
                                        vm_page_t               m;
                                        vm_object_t             object;
                                        vm_object_t             base_object;
@@ -11734,9 +12307,9 @@ vm_map_machine_attribute(
                                        vm_map_size_t           range;
                                        range = sub_size;
                                        offset = (start - entry->vme_start)
-                                               + entry->offset;
+                                               + VME_OFFSET(entry);
                                        base_offset = offset;
-                                       object = entry->object.vm_object;
+                                       object = VME_OBJECT(entry);
                                        base_object = object;
                                        last_object = NULL;
 
@@ -11888,6 +12461,11 @@ vm_map_behavior_set(
        case VM_BEHAVIOR_CAN_REUSE:
                return vm_map_can_reuse(map, start, end);
 
+#if MACH_ASSERT
+       case VM_BEHAVIOR_PAGEOUT:
+               return vm_map_pageout(map, start, end);
+#endif /* MACH_ASSERT */
+
        default:
                return(KERN_INVALID_ARGUMENT);
        }
@@ -11964,7 +12542,7 @@ vm_map_willneed(
                 * correspond.  After that, the offset will always be zero to
                 * correspond to the beginning of the current vm_map_entry.
                 */
-               offset = (start - entry->vme_start) + entry->offset;
+               offset = (start - entry->vme_start) + VME_OFFSET(entry);
 
                /*
                 * Set the length so we don't go beyond the end of the
@@ -11985,7 +12563,7 @@ vm_map_willneed(
                fault_info.cluster_size = (vm_size_t) len;
                fault_info.lo_offset    = offset; 
                fault_info.hi_offset    = offset + len;
-               fault_info.user_tag     = entry->alias;
+               fault_info.user_tag     = VME_ALIAS(entry);
                fault_info.pmap_options = 0;
                if (entry->iokit_acct ||
                    (!entry->is_sub_map && !entry->use_pmap)) {
@@ -12083,9 +12661,11 @@ static boolean_t
 vm_map_entry_is_reusable(
        vm_map_entry_t entry)
 {
+       /* Only user map entries */
+
        vm_object_t object;
 
-       switch (entry->alias) {
+       switch (VME_ALIAS(entry)) {
        case VM_MEMORY_MALLOC:
        case VM_MEMORY_MALLOC_SMALL:
        case VM_MEMORY_MALLOC_LARGE:
@@ -12122,7 +12702,7 @@ vm_map_entry_is_reusable(
                return FALSE;
        }
 
-       object = entry->object.vm_object;
+       object = VME_OBJECT(entry);
        if (object == VM_OBJECT_NULL) {
                return TRUE;
        }
@@ -12173,6 +12753,7 @@ vm_map_reuse_pages(
         */
 
        vm_map_lock_read(map);
+       assert(map->pmap != kernel_pmap);       /* protect alias access */
 
        /*
         * The madvise semantics require that the address range be fully
@@ -12211,10 +12792,10 @@ vm_map_reuse_pages(
                        start_offset = 0;
                }
                end_offset = MIN(end, entry->vme_end) - entry->vme_start;
-               start_offset += entry->offset;
-               end_offset += entry->offset;
+               start_offset += VME_OFFSET(entry);
+               end_offset += VME_OFFSET(entry);
 
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                if (object != VM_OBJECT_NULL) {
                        vm_object_lock(object);
                        vm_object_reuse_pages(object, start_offset, end_offset,
@@ -12222,7 +12803,7 @@ vm_map_reuse_pages(
                        vm_object_unlock(object);
                }
 
-               if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
+               if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
                        /*
                         * XXX
                         * We do not hold the VM map exclusively here.
@@ -12231,7 +12812,7 @@ vm_map_reuse_pages(
                         * one that can be modified while holding the VM map
                         * "shared".
                         */
-                       entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
+                       VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
                }
        }
        
@@ -12250,6 +12831,7 @@ vm_map_reusable_pages(
        vm_map_entry_t                  entry;
        vm_object_t                     object;
        vm_object_offset_t              start_offset, end_offset;
+       vm_map_offset_t                 pmap_offset;
 
        /*
         * The MADV_REUSABLE operation doesn't require any changes to the
@@ -12257,6 +12839,7 @@ vm_map_reusable_pages(
         */
 
        vm_map_lock_read(map);
+       assert(map->pmap != kernel_pmap);       /* protect alias access */
 
        /*
         * The madvise semantics require that the address range be fully
@@ -12293,14 +12876,16 @@ vm_map_reusable_pages(
                 */
                if (entry->vme_start < start) {
                        start_offset = start - entry->vme_start;
+                       pmap_offset = start;
                } else {
                        start_offset = 0;
+                       pmap_offset = entry->vme_start;
                }
                end_offset = MIN(end, entry->vme_end) - entry->vme_start;
-               start_offset += entry->offset;
-               end_offset += entry->offset;
+               start_offset += VME_OFFSET(entry);
+               end_offset += VME_OFFSET(entry);
 
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                if (object == VM_OBJECT_NULL)
                        continue;
 
@@ -12325,14 +12910,16 @@ vm_map_reusable_pages(
                                                   start_offset,
                                                   end_offset - start_offset,
                                                   kill_pages,
-                                                  TRUE /*reusable_pages*/);
+                                                  TRUE /*reusable_pages*/,
+                                                  map->pmap,
+                                                  pmap_offset);
                } else {
                        vm_page_stats_reusable.reusable_pages_shared++;
                }
                vm_object_unlock(object);
 
-               if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
-                   entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
+               if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
+                   VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
                        /*
                         * XXX
                         * We do not hold the VM map exclusively here.
@@ -12341,7 +12928,7 @@ vm_map_reusable_pages(
                         * one that can be modified while holding the VM map
                         * "shared".
                         */
-                       entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
+                       VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
                }
        }
        
@@ -12365,6 +12952,7 @@ vm_map_can_reuse(
         */
 
        vm_map_lock_read(map);
+       assert(map->pmap != kernel_pmap);       /* protect alias access */
 
        /*
         * The madvise semantics require that the address range be fully
@@ -12399,6 +12987,97 @@ vm_map_can_reuse(
 }
 
 
+#if MACH_ASSERT
+static kern_return_t
+vm_map_pageout(
+       vm_map_t        map,
+       vm_map_offset_t start,
+       vm_map_offset_t end)
+{
+       vm_map_entry_t                  entry;
+
+       /*
+        * The MADV_PAGEOUT operation doesn't require any changes to the
+        * vm_map_entry_t's, so the read lock is sufficient.
+        */
+
+       vm_map_lock_read(map);
+
+       /*
+        * The madvise semantics require that the address range be fully
+        * allocated with no holes.  Otherwise, we're required to return
+        * an error.
+        */
+
+       if (!vm_map_range_check(map, start, end, &entry)) {
+               vm_map_unlock_read(map);
+               return KERN_INVALID_ADDRESS;
+       }
+
+       /*
+        * Examine each vm_map_entry_t in the range.
+        */
+       for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
+            entry = entry->vme_next) {
+               vm_object_t     object;
+
+               /*
+                * Sanity check on the VM map entry.
+                */
+               if (entry->is_sub_map) {
+                       vm_map_t submap;
+                       vm_map_offset_t submap_start;
+                       vm_map_offset_t submap_end;
+                       vm_map_entry_t submap_entry;
+
+                       submap = VME_SUBMAP(entry);
+                       submap_start = VME_OFFSET(entry);
+                       submap_end = submap_start + (entry->vme_end - 
+                                                    entry->vme_start);
+
+                       vm_map_lock_read(submap);
+
+                       if (! vm_map_range_check(submap,
+                                                submap_start,
+                                                submap_end,
+                                                &submap_entry)) {
+                               vm_map_unlock_read(submap);
+                               vm_map_unlock_read(map);
+                               return KERN_INVALID_ADDRESS;
+                       }
+
+                       object = VME_OBJECT(submap_entry);
+                       if (submap_entry->is_sub_map ||
+                           object == VM_OBJECT_NULL ||
+                           !object->internal) {
+                               vm_map_unlock_read(submap);
+                               continue;
+                       }
+
+                       vm_object_pageout(object);
+
+                       vm_map_unlock_read(submap);
+                       submap = VM_MAP_NULL;
+                       submap_entry = VM_MAP_ENTRY_NULL;
+                       continue;
+               }
+
+               object = VME_OBJECT(entry);
+               if (entry->is_sub_map ||
+                   object == VM_OBJECT_NULL ||
+                   !object->internal) {
+                       continue;
+               }
+
+               vm_object_pageout(object);
+       }
+       
+       vm_map_unlock_read(map);
+       return KERN_SUCCESS;
+}
+#endif /* MACH_ASSERT */
+
+
 /*
  *     Routine:        vm_map_entry_insert
  *
@@ -12455,8 +13134,8 @@ vm_map_entry_insert(
        }
        assert(new_entry->vme_start < new_entry->vme_end);
 
-       new_entry->object.vm_object = object;
-       new_entry->offset = offset;
+       VME_OBJECT_SET(new_entry, object);
+       VME_OFFSET_SET(new_entry, offset);
        new_entry->is_shared = is_shared;
        new_entry->is_sub_map = is_submap;
        new_entry->needs_copy = needs_copy;
@@ -12481,7 +13160,7 @@ vm_map_entry_insert(
                 */
                new_entry->use_pmap = TRUE;
        }
-       new_entry->alias = 0;
+       VME_ALIAS_SET(new_entry, 0);
        new_entry->zero_wired_pages = FALSE;
        new_entry->no_cache = no_cache;
        new_entry->permanent = permanent;
@@ -12491,6 +13170,8 @@ vm_map_entry_insert(
                new_entry->superpage_size = FALSE;
        new_entry->used_for_jit = FALSE;
        new_entry->iokit_acct = FALSE;
+       new_entry->vme_resilient_codesign = FALSE;
+       new_entry->vme_resilient_media = FALSE;
 
        /*
         *      Insert the new entry into the list.
@@ -12601,10 +13282,10 @@ vm_map_remap_extract(
                                             src_entry->vme_start);
 
                if(src_entry->is_sub_map) {
-                       vm_map_reference(src_entry->object.sub_map);
+                       vm_map_reference(VME_SUBMAP(src_entry));
                        object = VM_OBJECT_NULL;
                } else {
-                       object = src_entry->object.vm_object;
+                       object = VME_OBJECT(src_entry);
                        if (src_entry->iokit_acct) {
                                /*
                                 * This entry uses "IOKit accounting".
@@ -12626,8 +13307,8 @@ vm_map_remap_extract(
 
                        if (object == VM_OBJECT_NULL) {
                                object = vm_object_allocate(entry_size);
-                               src_entry->offset = 0;
-                               src_entry->object.vm_object = object;
+                               VME_OFFSET_SET(src_entry, 0);
+                               VME_OBJECT_SET(src_entry, object);
                        } else if (object->copy_strategy !=
                                   MEMORY_OBJECT_COPY_SYMMETRIC) {
                                /*
@@ -12641,9 +13322,7 @@ vm_map_remap_extract(
                                    !src_entry->is_shared &&
                                    object->vo_size > entry_size)) {
 
-                               vm_object_shadow(&src_entry->object.vm_object,
-                                                &src_entry->offset,
-                                                entry_size);
+                               VME_OBJECT_SHADOW(src_entry, entry_size);
 
                                if (!src_entry->needs_copy &&
                                    (src_entry->protection & VM_PROT_WRITE)) {
@@ -12651,13 +13330,15 @@ vm_map_remap_extract(
 
                                        prot = src_entry->protection & ~VM_PROT_WRITE;
 
-                                       if (override_nx(map, src_entry->alias) && prot)
+                                       if (override_nx(map,
+                                                       VME_ALIAS(src_entry))
+                                           && prot)
                                                prot |= VM_PROT_EXECUTE;
 
                                        if(map->mapped_in_other_pmaps) {
                                                vm_object_pmap_protect(
-                                                       src_entry->object.vm_object,
-                                                       src_entry->offset,
+                                                       VME_OBJECT(src_entry),
+                                                       VME_OFFSET(src_entry),
                                                        entry_size,
                                                        PMAP_NULL,
                                                        src_entry->vme_start,
@@ -12670,7 +13351,7 @@ vm_map_remap_extract(
                                        }
                                }
 
-                               object = src_entry->object.vm_object;
+                               object = VME_OBJECT(src_entry);
                                src_entry->needs_copy = FALSE;
                        }
 
@@ -12685,7 +13366,8 @@ vm_map_remap_extract(
                        vm_object_unlock(object);
                }
 
-               offset = src_entry->offset + (src_start - src_entry->vme_start);
+               offset = (VME_OFFSET(src_entry) +
+                         (src_start - src_entry->vme_start));
 
                new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
                vm_map_entry_copy(new_entry, src_entry);
@@ -12700,7 +13382,7 @@ vm_map_remap_extract(
                new_entry->vme_end = map_address + tmp_size;
                assert(new_entry->vme_start < new_entry->vme_end);
                new_entry->inheritance = inheritance;
-               new_entry->offset = offset;
+               VME_OFFSET_SET(new_entry, offset);
 
                /*
                 * The new region has to be copied now if required.
@@ -12722,11 +13404,12 @@ vm_map_remap_extract(
 
                } else if (src_entry->is_sub_map) {
                        /* make this a COW sub_map if not already */
+                       assert(new_entry->wired_count == 0);
                        new_entry->needs_copy = TRUE;
                        object = VM_OBJECT_NULL;
                } else if (src_entry->wired_count == 0 &&
-                          vm_object_copy_quickly(&new_entry->object.vm_object,
-                                                 new_entry->offset,
+                          vm_object_copy_quickly(&VME_OBJECT(new_entry),
+                                                 VME_OFFSET(new_entry),
                                                  (new_entry->vme_end -
                                                   new_entry->vme_start),
                                                  &src_needs_copy,
@@ -12743,7 +13426,9 @@ vm_map_remap_extract(
 
                                prot = src_entry->protection & ~VM_PROT_WRITE;
 
-                               if (override_nx(map, src_entry->alias) && prot)
+                               if (override_nx(map,
+                                               VME_ALIAS(src_entry))
+                                   && prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                vm_object_pmap_protect(object,
@@ -12755,6 +13440,7 @@ vm_map_remap_extract(
                                                       src_entry->vme_start,
                                                       prot);
 
+                               assert(src_entry->wired_count == 0);
                                src_entry->needs_copy = TRUE;
                        }
                        /*
@@ -12785,18 +13471,24 @@ vm_map_remap_extract(
                                        offset,
                                        entry_size,
                                        THREAD_UNINT,
-                                       &new_entry->object.vm_object);
+                                       &VME_OBJECT(new_entry));
 
-                               new_entry->offset = 0;
+                               VME_OFFSET_SET(new_entry, 0);
                                new_entry->needs_copy = FALSE;
                        } else {
+                               vm_object_offset_t new_offset;
+
+                               new_offset = VME_OFFSET(new_entry);
                                result = vm_object_copy_strategically(
                                        object,
                                        offset,
                                        entry_size,
-                                       &new_entry->object.vm_object,
-                                       &new_entry->offset,
+                                       &VME_OBJECT(new_entry),
+                                       &new_offset,
                                        &new_entry_needs_copy);
+                               if (new_offset != VME_OFFSET(new_entry)) {
+                                       VME_OFFSET_SET(new_entry, new_offset);
+                               }
 
                                new_entry->needs_copy = new_entry_needs_copy;
                        }
@@ -12825,8 +13517,7 @@ vm_map_remap_extract(
                                 * Retry the lookup and verify that the
                                 * same object/offset are still present.
                                 */
-                               vm_object_deallocate(new_entry->
-                                                    object.vm_object);
+                               vm_object_deallocate(VME_OBJECT(new_entry));
                                _vm_map_entry_dispose(map_header, new_entry);
                                if (result == KERN_MEMORY_RESTART_COPY)
                                        result = KERN_SUCCESS;
@@ -12864,9 +13555,9 @@ vm_map_remap_extract(
                        new_entry = src_entry->vme_next;
                        _vm_map_store_entry_unlink(map_header, src_entry);
                        if (src_entry->is_sub_map) {
-                               vm_map_deallocate(src_entry->object.sub_map);
+                               vm_map_deallocate(VME_SUBMAP(src_entry));
                        } else {
-                               vm_object_deallocate(src_entry->object.vm_object);
+                               vm_object_deallocate(VME_OBJECT(src_entry));
                        }
                        _vm_map_entry_dispose(map_header, src_entry);
                }
@@ -12974,6 +13665,13 @@ vm_map_remap(
                new_entry = entry->vme_next;
                _vm_map_store_entry_unlink(&map_header, entry);
                if (result == KERN_SUCCESS) {
+                       if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
+                               /* no codesigning -> read-only access */
+                               assert(!entry->used_for_jit);
+                               entry->max_protection = VM_PROT_READ;
+                               entry->protection = VM_PROT_READ;
+                               entry->vme_resilient_codesign = TRUE;
+                       }
                        entry->vme_start += *address;
                        entry->vme_end += *address;
                        assert(!entry->map_aligned);
@@ -12981,14 +13679,19 @@ vm_map_remap(
                        insp_entry = entry;
                } else {
                        if (!entry->is_sub_map) {
-                               vm_object_deallocate(entry->object.vm_object);
+                               vm_object_deallocate(VME_OBJECT(entry));
                        } else {
-                               vm_map_deallocate(entry->object.sub_map);
+                               vm_map_deallocate(VME_SUBMAP(entry));
                        }
                        _vm_map_entry_dispose(&map_header, entry);
                }
        }
 
+       if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
+               *cur_protection = VM_PROT_READ;
+               *max_protection = VM_PROT_READ;
+       }
+
        if( target_map->disable_vmentry_reuse == TRUE) {
                if( target_map->highest_entry_end < insp_entry->vme_end ){
                        target_map->highest_entry_end = insp_entry->vme_end;
@@ -13003,7 +13706,8 @@ vm_map_remap(
 
        if (result == KERN_SUCCESS && target_map->wiring_required)
                result = vm_map_wire(target_map, *address,
-                                    *address + size, *cur_protection, TRUE);
+                                    *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
+                                    TRUE);
 
        /* 
         * If requested, return the address of the data pointed to by the 
@@ -13040,6 +13744,7 @@ vm_map_remap_range_allocate(
        vm_map_offset_t start;
        vm_map_offset_t end;
        kern_return_t   kr;
+       vm_map_entry_t          hole_entry;
 
 StartAgain: ;
 
@@ -13065,15 +13770,51 @@ StartAgain: ;
                if( map->disable_vmentry_reuse == TRUE) {
                        VM_MAP_HIGHEST_ENTRY(map, entry, start);
                } else {
-                       assert(first_free_is_valid(map));
-                       if (start == map->min_offset) {
-                               if ((entry = map->first_free) != vm_map_to_entry(map))
-                                       start = entry->vme_end;
+
+                       if (map->holelistenabled) {
+                               hole_entry = (vm_map_entry_t)map->holes_list;
+
+                               if (hole_entry == NULL) {
+                                       /*
+                                        * No more space in the map?
+                                        */
+                                       return(KERN_NO_SPACE);
+                               } else {
+
+                                       boolean_t found_hole = FALSE;
+
+                                       do {
+                                               if (hole_entry->vme_start >= start) {
+                                                       start = hole_entry->vme_start;
+                                                       found_hole = TRUE;
+                                                       break;
+                                               }
+
+                                               if (hole_entry->vme_end > start) {
+                                                       found_hole = TRUE;
+                                                       break;
+                                               }
+                                               hole_entry = hole_entry->vme_next;
+
+                                       } while (hole_entry != (vm_map_entry_t) map->holes_list);
+
+                                       if (found_hole == FALSE) {
+                                               return (KERN_NO_SPACE);
+                                       }
+
+                                       entry = hole_entry;
+                               }
                        } else {
-                               vm_map_entry_t  tmp_entry;
-                               if (vm_map_lookup_entry(map, start, &tmp_entry))
-                                       start = tmp_entry->vme_end;
-                               entry = tmp_entry;
+                               assert(first_free_is_valid(map));
+                               if (start == map->min_offset) {
+                                       if ((entry = map->first_free) != vm_map_to_entry(map))
+                                               start = entry->vme_end;
+                               } else {
+                                       vm_map_entry_t  tmp_entry;
+                                       if (vm_map_lookup_entry(map, start, &tmp_entry))
+                                               start = tmp_entry->vme_end;
+                                       entry = tmp_entry;
+                               }
                        }
                        start = vm_map_round_page(start,
                                                  VM_MAP_PAGE_MASK(map));
@@ -13117,30 +13858,56 @@ StartAgain: ;
                                return(KERN_NO_SPACE);
                        }
 
-                       /*
-                        *      If there are no more entries, we must win.
-                        */
-
                        next = entry->vme_next;
-                       if (next == vm_map_to_entry(map))
-                               break;
 
-                       /*
-                        *      If there is another entry, it must be
-                        *      after the end of the potential new region.
-                        */
+                       if (map->holelistenabled) {
+                               if (entry->vme_end >= end)
+                                       break;
+                       } else {
+                               /*
+                                *      If there are no more entries, we must win.
+                                *
+                                *      OR
+                                *
+                                *      If there is another entry, it must be
+                                *      after the end of the potential new region.
+                                */
 
-                       if (next->vme_start >= end)
-                               break;
+                               if (next == vm_map_to_entry(map))
+                                       break;
+
+                               if (next->vme_start >= end)
+                                       break;
+                       }
 
                        /*
                         *      Didn't fit -- move to the next entry.
                         */
 
                        entry = next;
-                       start = entry->vme_end;
+
+                       if (map->holelistenabled) {
+                               if (entry == (vm_map_entry_t) map->holes_list) {
+                                       /*
+                                        * Wrapped around
+                                        */
+                                       return(KERN_NO_SPACE);
+                               }
+                               start = entry->vme_start;
+                       } else {
+                               start = entry->vme_end;
+                       }
+               }
+
+               if (map->holelistenabled) {
+
+                       if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
+                               panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
+                       }
                }
+
                *address = start;
+
        } else {
                vm_map_entry_t          temp_entry;
        
@@ -13187,6 +13954,7 @@ StartAgain: ;
                                return KERN_RESOURCE_SHORTAGE;
                        }
                        vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
+                       vm_map_disable_hole_optimization(zap_map);
 
                        kr = vm_map_delete(map, start, end,
                                           (VM_MAP_REMOVE_SAVE_ENTRIES |
@@ -13448,7 +14216,7 @@ vm_map_purgable_control(
                return(KERN_PROTECTION_FAILURE);
        }
 
-       object = entry->object.vm_object;
+       object = VME_OBJECT(entry);
        if (object == VM_OBJECT_NULL ||
            object->purgable == VM_PURGABLE_DENY) {
                /*
@@ -13461,7 +14229,7 @@ vm_map_purgable_control(
        vm_object_lock(object);
 
 #if 00
-       if (entry->offset != 0 || 
+       if (VME_OFFSET(entry) != 0 || 
            entry->vme_end - entry->vme_start != object->vo_size) {
                /*
                 * Can only apply purgable controls to the whole (existing)
@@ -13582,12 +14350,12 @@ vm_map_page_info(
                /* compute offset from this map entry's start */
                offset -= map_entry->vme_start;
                /* compute offset into this map entry's object (or submap) */
-               offset += map_entry->offset;
+               offset += VME_OFFSET(map_entry);
 
                if (map_entry->is_sub_map) {
                        vm_map_t sub_map;
 
-                       sub_map = map_entry->object.sub_map;
+                       sub_map = VME_SUBMAP(map_entry);
                        vm_map_lock_read(sub_map);
                        vm_map_unlock_read(map);
 
@@ -13599,7 +14367,7 @@ vm_map_page_info(
                break;
        }
 
-       object = map_entry->object.vm_object;
+       object = VME_OBJECT(map_entry);
        if (object == VM_OBJECT_NULL) {
                /* no object -> no page */
                vm_map_unlock_read(map);
@@ -13812,6 +14580,7 @@ vm_map_msync(
        boolean_t               do_sync_req;
        boolean_t               had_hole = FALSE;
        memory_object_t         pager;
+       vm_map_offset_t         pmap_offset;
        
        if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
            (sync_flags & VM_SYNC_SYNCHRONOUS))
@@ -13842,9 +14611,7 @@ vm_map_msync(
 
                vm_map_lock(map);
                if (!vm_map_lookup_entry(map,
-                                        vm_map_trunc_page(
-                                                address,
-                                                VM_MAP_PAGE_MASK(map)),
+                                        address,
                                         &entry)) {
 
                        vm_map_size_t   skip;
@@ -13885,6 +14652,7 @@ vm_map_msync(
                }
 
                offset = address - entry->vme_start;
+               pmap_offset = address;
 
                /*
                 * do we have more to flush than is contained in this
@@ -13903,8 +14671,8 @@ vm_map_msync(
                        vm_map_t        local_map;
                        vm_map_offset_t local_offset;
 
-                       local_map = entry->object.sub_map;
-                       local_offset = entry->offset;
+                       local_map = VME_SUBMAP(entry);
+                       local_offset = VME_OFFSET(entry);
                        vm_map_unlock(map);
                        if (vm_map_msync(
                                    local_map,
@@ -13915,7 +14683,7 @@ vm_map_msync(
                        }
                        continue;
                }
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
 
                /*
                 * We can't sync this object if the object has not been
@@ -13925,7 +14693,7 @@ vm_map_msync(
                        vm_map_unlock(map);
                        continue;
                }
-               offset += entry->offset;
+               offset += VME_OFFSET(entry);
 
                 vm_object_lock(object);
 
@@ -13940,8 +14708,14 @@ vm_map_msync(
                                        kill_pages = -1;
                        }
                        if (kill_pages != -1)
-                               vm_object_deactivate_pages(object, offset, 
-                                                          (vm_object_size_t)flush_size, kill_pages, reusable_pages);
+                               vm_object_deactivate_pages(
+                                       object,
+                                       offset,
+                                       (vm_object_size_t) flush_size,
+                                       kill_pages,
+                                       reusable_pages,
+                                       map->pmap,
+                                       pmap_offset);
                        vm_object_unlock(object);
                        vm_map_unlock(map);
                        continue;
@@ -14306,7 +15080,7 @@ vm_map_set_64bit(vm_map_t map)
 }
 
 vm_map_offset_t
-vm_compute_max_offset(unsigned is64)
+vm_compute_max_offset(boolean_t is64)
 {
        return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
 }
@@ -14399,6 +15173,11 @@ vm_map_raise_min_offset(
                vm_map_unlock(map);
                return KERN_INVALID_ADDRESS;
        }
+       if (new_min_offset >= map->max_offset) {
+               /* can't go beyond the end of the address space */
+               vm_map_unlock(map);
+               return KERN_INVALID_ADDRESS;
+       }
 
        first_entry = vm_map_first_entry(map);
        if (first_entry != vm_map_to_entry(map) &&
@@ -14413,6 +15192,10 @@ vm_map_raise_min_offset(
 
        map->min_offset = new_min_offset;
 
+       assert(map->holes_list);
+       map->holes_list->start = new_min_offset;
+       assert(new_min_offset < map->holes_list->end);
+
        vm_map_unlock(map);
 
        return KERN_SUCCESS;
@@ -14500,7 +15283,7 @@ kern_return_t vm_map_sign(vm_map_t map,
                return(KERN_INVALID_ARGUMENT);
        }
        
-       object = entry->object.vm_object;
+       object = VME_OBJECT(entry);
        if (object == VM_OBJECT_NULL) {
                /*
                 * Object must already be present or we can't sign.
@@ -14515,7 +15298,8 @@ kern_return_t vm_map_sign(vm_map_t map,
        while(start < end) {
                uint32_t refmod;
                
-               m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
+               m = vm_page_lookup(object,
+                                  start - entry->vme_start + VME_OFFSET(entry));
                if (m==VM_PAGE_NULL) {
                        /* shoud we try to fault a page here? we can probably 
                         * demand it exists and is locked for this request */
@@ -14580,17 +15364,20 @@ kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident
 
        vm_map_set_page_shift(zap_map, 
                              VM_MAP_PAGE_SHIFT(map));
+       vm_map_disable_hole_optimization(zap_map);
 
        for (entry = vm_map_first_entry(map);
             entry != vm_map_to_entry(map);
             entry = next_entry) {
                next_entry = entry->vme_next;
                
-               if (entry->object.vm_object && !entry->is_sub_map && (entry->object.vm_object->internal == TRUE)
-                   && (entry->object.vm_object->ref_count == 1)) {
+               if (VME_OBJECT(entry) &&
+                   !entry->is_sub_map &&
+                   (VME_OBJECT(entry)->internal == TRUE) &&
+                   (VME_OBJECT(entry)->ref_count == 1)) {
 
-                       *reclaimed_resident += entry->object.vm_object->resident_page_count;
-                       *reclaimed_compressed += vm_compressor_pager_get_count(entry->object.vm_object->pager);
+                       *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
+                       *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
 
                        (void)vm_map_delete(map, 
                                            entry->vme_start, 
@@ -14638,13 +15425,13 @@ kern_return_t vm_map_freeze_walk(
                unsigned int purgeable, clean, dirty, wired;
                boolean_t shared;
 
-               if ((entry->object.vm_object == 0) ||
+               if ((VME_OBJECT(entry) == 0) ||
                    (entry->is_sub_map) ||
-                   (entry->object.vm_object->phys_contiguous)) {
+                   (VME_OBJECT(entry)->phys_contiguous)) {
                        continue;
                }
 
-               default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
+               default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL);
                
                *purgeable_count += purgeable;
                *wired_count += wired;
@@ -14669,6 +15456,10 @@ kern_return_t vm_map_freeze_walk(
        return KERN_SUCCESS;
 }
 
+int c_freezer_swapout_count;
+int c_freezer_compression_count = 0;
+AbsoluteTime c_freezer_last_yield_ts = 0;
+
 kern_return_t vm_map_freeze(
                vm_map_t map,
                unsigned int *purgeable_count,
@@ -14694,7 +15485,13 @@ kern_return_t vm_map_freeze(
 
        if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
                default_freezer_active = FALSE;
+                               
+               if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
+                       kr = KERN_NO_SPACE;
+                       goto done;      
+               }
        }
+       assert(default_freezer_active == FALSE);
        
        if (default_freezer_active) {
                if (map->default_freezer_handle == NULL) {      
@@ -14710,14 +15507,18 @@ kern_return_t vm_map_freeze(
                        goto done;
                }
        }
-       
+       c_freezer_compression_count = 0;
+       clock_get_uptime(&c_freezer_last_yield_ts);
+
        for (entry2 = vm_map_first_entry(map);
             entry2 != vm_map_to_entry(map);
             entry2 = entry2->vme_next) {
        
-               vm_object_t     src_object = entry2->object.vm_object;
+               vm_object_t     src_object = VME_OBJECT(entry2);
 
-               if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
+               if (VME_OBJECT(entry2) &&
+                   !entry2->is_sub_map &&
+                   !VME_OBJECT(entry2)->phys_contiguous) {
                        /* If eligible, scan the entry, moving eligible pages over to our parent object */
                        if (default_freezer_active) {
                                unsigned int purgeable, clean, dirty, wired;
@@ -14743,11 +15544,24 @@ kern_return_t vm_map_freeze(
                                        *has_shared = TRUE;
                                }
                        } else {
-                               /*
-                                * To the compressor.
-                                */
-                               if (entry2->object.vm_object->internal == TRUE) {
-                                       vm_object_pageout(entry2->object.vm_object);
+                               if (VME_OBJECT(entry2)->internal == TRUE) {
+                                       
+                                       if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+                                               /*
+                                                * Pages belonging to this object could be swapped to disk.
+                                                * Make sure it's not a shared object because we could end
+                                                * up just bringing it back in again.
+                                                */
+                                               if (VME_OBJECT(entry2)->ref_count > 1) {
+                                                       continue;
+                                               }
+                                       }
+                                       vm_object_compressed_freezer_pageout(VME_OBJECT(entry2));
+                               }
+
+                               if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
+                                       kr = KERN_NO_SPACE;
+                                       break;  
                                }
                        }
                }
@@ -14761,6 +15575,16 @@ kern_return_t vm_map_freeze(
 done:
        vm_map_unlock(map);
        
+       if (!default_freezer_active) {
+               vm_object_compressed_freezer_done();
+       }
+       if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+               /*
+                * reset the counter tracking the # of swapped c_segs
+                * because we are now done with this freeze session and task.
+                */
+               c_freezer_swapout_count = 0;
+       }
        return kr;
 }
 
@@ -14812,6 +15636,8 @@ out:
  *     - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
  *     - !true_share
  *     - vo_size == ANON_CHUNK_SIZE
+ *
+ * Only non-kernel map entries.
  */
 boolean_t
 vm_map_entry_should_cow_for_true_share(
@@ -14829,8 +15655,8 @@ vm_map_entry_should_cow_for_true_share(
                return FALSE;
        }
 
-       if (entry->alias != VM_MEMORY_MALLOC &&
-           entry->alias != VM_MEMORY_MALLOC_SMALL) {
+       if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
+           VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
                /* not a malloc heap or Obj-C Garbage Collector heap */
                return FALSE;
        }
@@ -14841,7 +15667,7 @@ vm_map_entry_should_cow_for_true_share(
                return FALSE;
        }
 
-       object = entry->object.vm_object;
+       object = VME_OBJECT(entry);
 
        if (object == VM_OBJECT_NULL) {
                /* no object yet... */
@@ -14863,13 +15689,13 @@ vm_map_entry_should_cow_for_true_share(
                return FALSE;
        }
 
-       if (entry->alias == VM_MEMORY_MALLOC &&
+       if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
            object->vo_size != ANON_CHUNK_SIZE) {
                /* ... not an object created for the ObjC Garbage Collector */
                return FALSE;
        }
 
-       if (entry->alias == VM_MEMORY_MALLOC_SMALL &&
+       if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
            object->vo_size != 2048 * 4096) {
                /* ... not a "MALLOC_SMALL" heap */
                return FALSE;
@@ -14900,6 +15726,14 @@ vm_map_trunc_page_mask(
        return VM_MAP_TRUNC_PAGE(offset, mask);
 }
 
+boolean_t
+vm_map_page_aligned(
+       vm_map_offset_t offset,
+       vm_map_offset_t mask)
+{
+       return ((offset) & mask) == 0;
+}
+
 int
 vm_map_page_shift(
        vm_map_t map)
@@ -14914,7 +15748,7 @@ vm_map_page_size(
        return VM_MAP_PAGE_SIZE(map);
 }
 
-int
+vm_map_offset_t
 vm_map_page_mask(
        vm_map_t map)
 {
@@ -14958,7 +15792,7 @@ vm_map_purge(
                if (! (entry->protection & VM_PROT_WRITE)) {
                        goto next;
                }
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                if (object == VM_OBJECT_NULL) {
                        goto next;
                }
@@ -14968,7 +15802,7 @@ vm_map_purge(
 
                vm_object_lock(object);
 #if 00
-               if (entry->offset != 0 ||
+               if (VME_OFFSET(entry) != 0 ||
                    (entry->vme_end - entry->vme_start) != object->vo_size) {
                        vm_object_unlock(object);
                        goto next;
@@ -15002,12 +15836,17 @@ vm_map_query_volatile(
        vm_map_t        map,
        mach_vm_size_t  *volatile_virtual_size_p,
        mach_vm_size_t  *volatile_resident_size_p,
-       mach_vm_size_t  *volatile_pmap_size_p)
+       mach_vm_size_t  *volatile_compressed_size_p,
+       mach_vm_size_t  *volatile_pmap_size_p,
+       mach_vm_size_t  *volatile_compressed_pmap_size_p)
 {
        mach_vm_size_t  volatile_virtual_size;
        mach_vm_size_t  volatile_resident_count;
+       mach_vm_size_t  volatile_compressed_count;
        mach_vm_size_t  volatile_pmap_count;
+       mach_vm_size_t  volatile_compressed_pmap_count;
        mach_vm_size_t  resident_count;
+       unsigned int    compressed_count;
        vm_map_entry_t  entry;
        vm_object_t     object;
 
@@ -15015,7 +15854,9 @@ vm_map_query_volatile(
 
        volatile_virtual_size = 0;
        volatile_resident_count = 0;
+       volatile_compressed_count = 0;
        volatile_pmap_count = 0;
+       volatile_compressed_pmap_count = 0;
 
        for (entry = vm_map_first_entry(map);
             entry != vm_map_to_entry(map);
@@ -15026,14 +15867,15 @@ vm_map_query_volatile(
                if (! (entry->protection & VM_PROT_WRITE)) {
                        continue;
                }
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                if (object == VM_OBJECT_NULL) {
                        continue;
                }
-               if (object->purgable != VM_PURGABLE_VOLATILE) {
+               if (object->purgable != VM_PURGABLE_VOLATILE &&
+                   object->purgable != VM_PURGABLE_EMPTY) {
                        continue;
                }
-               if (entry->offset != 0) {
+               if (VME_OFFSET(entry)) {
                        /*
                         * If the map entry has been split and the object now
                         * appears several times in the VM map, we don't want
@@ -15044,28 +15886,72 @@ vm_map_query_volatile(
                        continue;
                }
                resident_count = object->resident_page_count;
-               if ((entry->offset / PAGE_SIZE) >= resident_count) {
+               if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
                        resident_count = 0;
                } else {
-                       resident_count -= (entry->offset / PAGE_SIZE);
+                       resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
                }
 
                volatile_virtual_size += entry->vme_end - entry->vme_start;
                volatile_resident_count += resident_count;
+               if (object->pager) {
+                       volatile_compressed_count +=
+                               vm_compressor_pager_get_count(object->pager);
+               }
+               compressed_count = 0;
                volatile_pmap_count += pmap_query_resident(map->pmap,
                                                           entry->vme_start,
-                                                          entry->vme_end);
+                                                          entry->vme_end,
+                                                          &compressed_count);
+               volatile_compressed_pmap_count += compressed_count;
        }
 
        /* map is still locked on return */
 
        *volatile_virtual_size_p = volatile_virtual_size;
        *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
+       *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
        *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
+       *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
 
        return KERN_SUCCESS;
 }
 
+void
+vm_map_sizes(vm_map_t map,
+               vm_map_size_t * psize,
+               vm_map_size_t * pfree,
+               vm_map_size_t * plargest_free)
+{
+    vm_map_entry_t  entry;
+    vm_map_offset_t prev;
+    vm_map_size_t   free, total_free, largest_free;
+    boolean_t       end;
+
+    total_free = largest_free = 0;
+
+    vm_map_lock_read(map);
+    if (psize) *psize = map->max_offset - map->min_offset;
+
+    prev = map->min_offset;
+    for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
+    {
+       end = (entry == vm_map_to_entry(map));
+
+       if (end) free = entry->vme_end   - prev;
+       else     free = entry->vme_start - prev;
+
+       total_free += free;
+       if (free > largest_free) largest_free = free;
+
+       if (end) break;
+       prev = entry->vme_end;
+    }
+    vm_map_unlock_read(map);
+    if (pfree)         *pfree = total_free;
+    if (plargest_free) *plargest_free = largest_free;
+}
+
 #if VM_SCAN_FOR_SHADOW_CHAIN
 int vm_map_shadow_max(vm_map_t map);
 int vm_map_shadow_max(
@@ -15088,7 +15974,7 @@ int vm_map_shadow_max(
                if (entry->is_sub_map) {
                        continue;
                }
-               object = entry->object.vm_object;
+               object = VME_OBJECT(entry);
                if (object == NULL) {
                        continue;
                }
index 6d05060c1c8ac4a90e60bf598e094ffd4e26f9dd..07ed5ad3aac148169f9471cb764a59907224f989 100644 (file)
@@ -138,8 +138,8 @@ typedef struct vm_map_entry *vm_map_entry_t;
  *             memory object or a sub map (of the kernel map).
  */
 typedef union vm_map_object {
-       vm_object_t             vm_object;      /* object object */
-       vm_map_t                sub_map;        /* belongs to another map */
+       vm_object_t             vmo_object;     /* object object */
+       vm_map_t                vmo_submap;     /* belongs to another map */
 } vm_map_object_t;
 
 #define named_entry_lock_init(object)  lck_mtx_init(&(object)->Lock, &vm_object_lck_grp, &vm_object_lck_attr)
@@ -206,6 +206,54 @@ struct vm_map_links {
        vm_map_offset_t         end;            /* end address */
 };
 
+/*
+ * IMPORTANT:
+ * The "alias" field can be updated while holding the VM map lock
+ * "shared".  It's OK as along as it's the only field that can be
+ * updated without the VM map "exclusive" lock.
+ */
+#define VME_OBJECT(entry) ((entry)->vme_object.vmo_object)
+#define VME_OBJECT_SET(entry, object)                          \
+       MACRO_BEGIN                                             \
+       (entry)->vme_object.vmo_object = (object);              \
+       MACRO_END
+#define VME_SUBMAP(entry) ((entry)->vme_object.vmo_submap)
+#define VME_SUBMAP_SET(entry, submap)                          \
+       MACRO_BEGIN                                             \
+       (entry)->vme_object.vmo_submap = (submap);              \
+       MACRO_END
+#define VME_OFFSET(entry) ((entry)->vme_offset & ~PAGE_MASK)
+#define VME_OFFSET_SET(entry, offset)          \
+       MACRO_BEGIN                             \
+       int __alias;                            \
+       __alias = VME_ALIAS((entry));           \
+       assert((offset & PAGE_MASK) == 0);      \
+       (entry)->vme_offset = offset | __alias; \
+       MACRO_END
+#define VME_OBJECT_SHADOW(entry, length)                       \
+       MACRO_BEGIN                                             \
+       vm_object_t             __object;                       \
+       vm_object_offset_t      __offset;                       \
+       __object = VME_OBJECT((entry));                         \
+       __offset = VME_OFFSET((entry));                         \
+       vm_object_shadow(&__object, &__offset, (length));       \
+       if (__object != VME_OBJECT((entry))) {                  \
+               VME_OBJECT_SET((entry), __object);              \
+       }                                                       \
+       if (__offset != VME_OFFSET((entry))) {                  \
+               VME_OFFSET_SET((entry), __offset);              \
+       }                                                       \
+       MACRO_END
+
+#define VME_ALIAS_MASK (PAGE_MASK)
+#define VME_ALIAS(entry) ((unsigned int)((entry)->vme_offset & VME_ALIAS_MASK))
+#define VME_ALIAS_SET(entry, alias) \
+       MACRO_BEGIN                                                     \
+       vm_map_offset_t __offset;                                       \
+       __offset = VME_OFFSET((entry));                                 \
+       (entry)->vme_offset = __offset | ((alias) & VME_ALIAS_MASK);    \
+       MACRO_END
+
 struct vm_map_entry {
        struct vm_map_links     links;          /* links to other entries */
 #define vme_prev               links.prev
@@ -214,46 +262,47 @@ struct vm_map_entry {
 #define vme_end                        links.end
 
        struct vm_map_store     store;
-       union vm_map_object     object;         /* object I point to */
-       vm_object_offset_t      offset;         /* offset into object */
+       union vm_map_object     vme_object;     /* object I point to */
+       vm_object_offset_t      vme_offset;     /* offset into object */
+
        unsigned int
-       /* boolean_t */         is_shared:1,    /* region is shared */
-       /* boolean_t */         is_sub_map:1,   /* Is "object" a submap? */
-       /* boolean_t */         in_transition:1, /* Entry being changed */
-       /* boolean_t */         needs_wakeup:1,  /* Waiters on in_transition */
-       /* vm_behavior_t */     behavior:2,     /* user paging behavior hint */
+       /* boolean_t */ is_shared:1,    /* region is shared */
+       /* boolean_t */ is_sub_map:1,   /* Is "object" a submap? */
+       /* boolean_t */ in_transition:1, /* Entry being changed */
+       /* boolean_t */ needs_wakeup:1, /* Waiters on in_transition */
+       /* vm_behavior_t */ behavior:2, /* user paging behavior hint */
                /* behavior is not defined for submap type */
-       /* boolean_t */         needs_copy:1,   /* object need to be copied? */
+       /* boolean_t */ needs_copy:1,   /* object need to be copied? */
+
                /* Only in task maps: */
-       /* vm_prot_t */         protection:3,   /* protection code */
-       /* vm_prot_t */         max_protection:3,/* maximum protection */
-       /* vm_inherit_t */      inheritance:2,  /* inheritance */
-       /* boolean_t */         use_pmap:1,     /*
-                                                * use_pmap is overloaded:
-                                                * if "is_sub_map":
-                                                *      use a nested pmap?
-                                                * else (i.e. if object):
-                                                *      use pmap accounting
-                                                *      for footprint?
-                                                */
-       /*
-        * IMPORTANT:
-        * The "alias" field can be updated while holding the VM map lock
-        * "shared".  It's OK as along as it's the only field that can be
-        * updated without the VM map "exclusive" lock.
-        */
-       /* unsigned char */     alias:8,        /* user alias */
-       /* boolean_t */         no_cache:1,     /* should new pages be cached? */
-       /* boolean_t */         permanent:1,    /* mapping can not be removed */
-       /* boolean_t */         superpage_size:1,/* use superpages of a certain size */
-       /* boolean_t */         map_aligned:1,  /* align to map's page size */
-       /* boolean_t */         zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */
-       /* boolean_t */         used_for_jit:1,
-       /* boolean_t */ from_reserved_zone:1,   /* Allocated from
-                                                * kernel reserved zone  */
+       /* vm_prot_t */ protection:3,   /* protection code */
+       /* vm_prot_t */ max_protection:3, /* maximum protection */
+       /* vm_inherit_t */ inheritance:2, /* inheritance */
+       /* boolean_t */ use_pmap:1,     /*
+                                        * use_pmap is overloaded:
+                                        * if "is_sub_map":
+                                        *      use a nested pmap?
+                                        * else (i.e. if object):
+                                        *      use pmap accounting
+                                        *      for footprint?
+                                        */
+       /* boolean_t */ no_cache:1,     /* should new pages be cached? */
+       /* boolean_t */ permanent:1,    /* mapping can not be removed */
+       /* boolean_t */ superpage_size:1, /* use superpages of a certain size */
+       /* boolean_t */ map_aligned:1,  /* align to map's page size */
+       /* boolean_t */ zero_wired_pages:1, /* zero out the wired pages of
+                                            * this entry it is being deleted
+                                            * without unwiring them */
+       /* boolean_t */ used_for_jit:1,
+       /* boolean_t */ from_reserved_zone:1, /* Allocated from
+                                              * kernel reserved zone    */
 
        /* iokit accounting: use the virtual size rather than resident size: */
-       /* boolean_t */ iokit_acct:1;
+       /* boolean_t */ iokit_acct:1,
+       /* boolean_t */ vme_resilient_codesign:1,
+       /* boolean_t */ vme_resilient_media:1,
+               __unused:6;
+;
 
        unsigned short          wired_count;    /* can be paged if = 0 */
        unsigned short          user_wired_count; /* for vm_wire */
@@ -344,7 +393,15 @@ struct _vm_map {
        decl_lck_mtx_data(,     s_lock)         /* Lock ref, res fields */
        lck_mtx_ext_t           s_lock_ext;
        vm_map_entry_t          hint;           /* hint for quick lookups */
-       vm_map_entry_t          first_free;     /* First free space hint */
+       struct vm_map_links*    hole_hint;      /* hint for quick hole lookups */
+       union{
+               vm_map_entry_t          _first_free;    /* First free space hint */
+               struct vm_map_links*    _holes;         /* links all holes between entries */
+       }f_s;                                           /* Union for free space data structures being used */
+
+#define first_free             f_s._first_free
+#define holes_list             f_s._holes
+
        unsigned int            
        /* boolean_t */         wait_for_space:1, /* Should callers wait for space? */
        /* boolean_t */         wiring_required:1, /* All memory wired? */
@@ -353,7 +410,8 @@ struct _vm_map {
        /* boolean_t */         switch_protect:1, /*  Protect map from write faults while switched */
        /* boolean_t */         disable_vmentry_reuse:1, /*  All vm entries should keep using newer and higher addresses in the map */ 
        /* boolean_t */         map_disallow_data_exec:1, /* Disallow execution from data pages on exec-permissive architectures */
-       /* reserved */          pad:25;
+       /* boolean_t */         holelistenabled:1,
+       /* reserved */          pad:24;
        unsigned int            timestamp;      /* Version number */
        unsigned int            color_rr;       /* next color (not protected by a lock) */
 #if CONFIG_FREEZE
@@ -420,7 +478,11 @@ typedef struct vm_map_version {
  *
  *             The third format is a kernel buffer copy object - for data
  *             small enough that physical copies were the most efficient
- *             method.
+ *             method. This method uses a zero-sized array unioned with
+ *             other format-specific data in the 'c_u' member. This unsized
+ *             array overlaps the other elements and allows us to use this
+ *             extra structure space for physical memory copies. On 64-bit
+ *             systems this saves ~64 bytes per vm_map_copy.
  */
 
 struct vm_map_copy {
@@ -431,12 +493,9 @@ struct vm_map_copy {
        vm_object_offset_t      offset;
        vm_map_size_t           size;
        union {
-           struct vm_map_header        hdr;    /* ENTRY_LIST */
-           vm_object_t                 object; /* OBJECT */
-           struct {                            
-               void                    *kdata;       /* KERNEL_BUFFER */
-               vm_size_t               kalloc_size;  /* size of this copy_t */
-           } c_k;
+               struct vm_map_header    hdr;      /* ENTRY_LIST */
+               vm_object_t             object;   /* OBJECT */
+               uint8_t                 kdata[0]; /* KERNEL_BUFFER */
        } c_u;
 };
 
@@ -444,9 +503,8 @@ struct vm_map_copy {
 #define cpy_hdr                        c_u.hdr
 
 #define cpy_object             c_u.object
-
-#define cpy_kdata              c_u.c_k.kdata
-#define cpy_kalloc_size                c_u.c_k.kalloc_size
+#define cpy_kdata              c_u.kdata
+#define cpy_kdata_hdr_sz       (offsetof(struct vm_map_copy, c_u.kdata))
 
 #define VM_MAP_COPY_PAGE_SHIFT(copy) ((copy)->cpy_hdr.page_shift)
 #define VM_MAP_COPY_PAGE_SIZE(copy) (1 << VM_MAP_COPY_PAGE_SHIFT((copy)))
@@ -487,6 +545,22 @@ struct vm_map_copy {
  */
 #define vm_map_lock_read_to_write(map) (lck_rw_lock_shared_to_exclusive(&(map)->lock) != TRUE)
 
+#if MACH_ASSERT || DEBUG
+#define vm_map_lock_assert_held(map) \
+       lck_rw_assert(&(map)->lock, LCK_RW_ASSERT_HELD)
+#define vm_map_lock_assert_shared(map) \
+       lck_rw_assert(&(map)->lock, LCK_RW_ASSERT_SHARED)
+#define vm_map_lock_assert_exclusive(map) \
+       lck_rw_assert(&(map)->lock, LCK_RW_ASSERT_EXCLUSIVE)
+#define vm_map_lock_assert_notheld(map) \
+       lck_rw_assert(&(map)->lock, LCK_RW_ASSERT_NOTHELD)
+#else  /* MACH_ASSERT || DEBUG */ 
+#define vm_map_lock_assert_held(map)
+#define vm_map_lock_assert_shared(map)
+#define vm_map_lock_assert_exclusive(map)
+#define vm_map_lock_assert_notheld(map)
+#endif /* MACH_ASSERT || DEBUG */
+
 /*
  *     Exported procedures that operate on vm_map_t.
  */
@@ -764,6 +838,7 @@ extern kern_return_t        vm_map_enter(
                                vm_prot_t               max_protection,
                                vm_inherit_t            inheritance);
 
+
 /* XXX should go away - replaced with regular enter of contig object */
 extern  kern_return_t  vm_map_enter_cpm(
                                vm_map_t                map,
@@ -866,7 +941,9 @@ extern kern_return_t vm_map_query_volatile(
        vm_map_t        map,
        mach_vm_size_t  *volatile_virtual_size_p,
        mach_vm_size_t  *volatile_resident_size_p,
-       mach_vm_size_t  *volatile_pmap_size_p);
+       mach_vm_size_t  *volatile_compressed_size_p,
+       mach_vm_size_t  *volatile_pmap_size_p,
+       mach_vm_size_t  *volatile_compressed_pmap_size_p);
 
 extern kern_return_t   vm_map_submap(
                                vm_map_t                map,
@@ -906,6 +983,23 @@ extern int override_nx(vm_map_t map, uint32_t user_tag);
 
 extern int vm_map_purge(vm_map_t map);
 
+
+/* kext exported versions */
+
+extern kern_return_t vm_map_wire_external(
+       register vm_map_t       map,
+       register vm_map_offset_t        start,
+       register vm_map_offset_t        end,
+       register vm_prot_t      caller_prot,
+       boolean_t               user_wire);
+
+extern kern_return_t vm_map_wire_and_extract_external(
+       vm_map_t        map,
+       vm_map_offset_t start,
+       vm_prot_t       caller_prot,
+       boolean_t       user_wire,
+       ppnum_t         *physpage_p);
+
 #endif /* MACH_KERNEL_PRIVATE */
 
 __BEGIN_DECLS
@@ -917,6 +1011,8 @@ extern vm_map_t            vm_map_create(
                                vm_map_offset_t         max_off,
                                boolean_t               pageable);
 
+extern void            vm_map_disable_hole_optimization(vm_map_t map);
+
 /* Get rid of a map */
 extern void            vm_map_destroy(
                                vm_map_t                map,
@@ -1095,7 +1191,7 @@ extern kern_return_t      vm_map_raise_min_offset(
        vm_map_offset_t new_min_offset);
 
 extern vm_map_offset_t vm_compute_max_offset(
-                               unsigned                is64);
+                               boolean_t               is64);
 
 extern uint64_t        vm_map_get_max_aslr_slide_pages(
                                vm_map_t map);
@@ -1122,7 +1218,7 @@ extern boolean_t first_free_is_valid(vm_map_t);
 extern int             vm_map_page_shift(
                                vm_map_t                map);
 
-extern int             vm_map_page_mask(
+extern vm_map_offset_t vm_map_page_mask(
                                vm_map_t                map);
 
 extern int             vm_map_page_size(
@@ -1136,6 +1232,10 @@ extern vm_map_offset_t   vm_map_trunc_page_mask(
                                vm_map_offset_t         offset,
                                vm_map_offset_t         mask);
 
+extern boolean_t       vm_map_page_aligned(
+                               vm_map_offset_t         offset,
+                               vm_map_offset_t         mask);
+
 #ifdef XNU_KERNEL_PRIVATE
 extern kern_return_t vm_map_page_info(
        vm_map_t                map,
@@ -1200,6 +1300,7 @@ extern kern_return_t vm_map_set_page_shift(vm_map_t map, int pageshift);
 #define VM_MAP_REMOVE_SAVE_ENTRIES     0x8
 #define VM_MAP_REMOVE_NO_PMAP_CLEANUP  0x10
 #define VM_MAP_REMOVE_NO_MAP_ALIGN     0x20
+#define VM_MAP_REMOVE_NO_UNNESTING     0x40
 
 /* Support for UPLs from vm_maps */
 
@@ -1209,9 +1310,15 @@ extern kern_return_t vm_map_get_upl(
                                upl_size_t              *size,
                                upl_t                   *upl,
                                upl_page_info_array_t   page_info,
-                               unsigned int    *page_infoCnt,
-                               int             *flags,
-                               int             force_data_sync);
+                               unsigned int            *page_infoCnt,
+                               upl_control_flags_t     *flags,
+                               int                     force_data_sync);
+
+extern void
+vm_map_sizes(vm_map_t map,
+               vm_map_size_t * psize,
+               vm_map_size_t * pfree,
+               vm_map_size_t * plargest_free);
 
 #if CONFIG_DYNAMIC_CODE_SIGNING
 extern kern_return_t vm_map_sign(vm_map_t map, 
index 288bafba1d7c4e1818a68e369d9bec6712106dd7..70f0624f0f80e85d850cd4423790c4a81b8eccc3 100644 (file)
@@ -26,7 +26,9 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+#include <mach/sdt.h>
 #include <vm/vm_map_store.h>
+#include <vm/vm_pageout.h> /* for vm_debug_events */
 
 #if MACH_ASSERT
 boolean_t
@@ -81,7 +83,7 @@ vm_map_store_update( vm_map_t map, vm_map_entry_t entry, int update_type )
                case VM_MAP_ENTRY_CREATE:
                        break;
                case VM_MAP_ENTRY_DELETE:
-                       if((entry) == (map)->first_free) {
+                       if((map->holelistenabled == FALSE) && ((entry) == (map)->first_free)) {
                                (map)->first_free = vm_map_to_entry(map);
                        }
                        if((entry) == (map)->hint) {
@@ -95,6 +97,23 @@ vm_map_store_update( vm_map_t map, vm_map_entry_t entry, int update_type )
 
 void   vm_map_store_copy_insert( vm_map_t map, vm_map_entry_t after_where, vm_map_copy_t copy)
 {
+       if (__improbable(vm_debug_events)) {
+               vm_map_entry_t entry;
+               for (entry = vm_map_copy_first_entry(copy); entry != vm_map_copy_to_entry(copy); entry = entry->vme_next) {
+                       DTRACE_VM4(map_entry_link_copy, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->links.start, vm_address_t, entry->links.end);
+               }
+       }
+
+       if (map->holelistenabled) {
+               vm_map_entry_t entry = NULL;
+
+               entry = vm_map_copy_first_entry(copy);
+               while (entry != vm_map_copy_to_entry(copy)) {
+                       vm_map_store_update_first_free(map, entry, TRUE);
+                       entry = entry->vme_next;
+               }
+       }
+
        vm_map_store_copy_insert_ll(map, after_where, copy);
 #ifdef VM_MAP_STORE_USE_RB
        if (vm_map_store_has_RB_support( &map->hdr )) {
@@ -120,6 +139,9 @@ void
 _vm_map_store_entry_link( struct vm_map_header * mapHdr, vm_map_entry_t after_where, vm_map_entry_t entry)
 {
        assert(entry->vme_start < entry->vme_end);
+       if (__improbable(vm_debug_events))
+               DTRACE_VM4(map_entry_link, vm_map_t, (char *)mapHdr - sizeof (lck_rw_t), vm_map_entry_t, entry, vm_address_t, entry->links.start, vm_address_t, entry->links.end);
+
        vm_map_store_entry_link_ll(mapHdr, after_where, entry);
 #ifdef VM_MAP_STORE_USE_RB
        if (vm_map_store_has_RB_support( mapHdr )) {
@@ -147,7 +169,7 @@ vm_map_store_entry_link( vm_map_t map, vm_map_entry_t after_where, vm_map_entry_
                update_first_free_ll(VMEL_map, VMEL_map->first_free);
 #ifdef VM_MAP_STORE_USE_RB
                if (vm_map_store_has_RB_support( &VMEL_map->hdr )) {
-                       update_first_free_rb(VMEL_map, VMEL_map->first_free);
+                       update_first_free_rb(VMEL_map, entry, TRUE);
                }
 #endif
        }
@@ -156,6 +178,9 @@ vm_map_store_entry_link( vm_map_t map, vm_map_entry_t after_where, vm_map_entry_
 void
 _vm_map_store_entry_unlink( struct vm_map_header * mapHdr, vm_map_entry_t entry)
 {
+       if (__improbable(vm_debug_events))
+               DTRACE_VM4(map_entry_unlink, vm_map_t, (char *)mapHdr - sizeof (lck_rw_t), vm_map_entry_t, entry, vm_address_t, entry->links.start, vm_address_t, entry->links.end);
+
        vm_map_store_entry_unlink_ll(mapHdr, entry);
 #ifdef VM_MAP_STORE_USE_RB
        if (vm_map_store_has_RB_support( mapHdr )) {
@@ -168,22 +193,24 @@ void
 vm_map_store_entry_unlink( vm_map_t map, vm_map_entry_t entry)
 {
        vm_map_t VMEU_map;
-       vm_map_entry_t VMEU_entry;
-       vm_map_entry_t VMEU_first_free;
+       vm_map_entry_t VMEU_entry = NULL;
+       vm_map_entry_t VMEU_first_free = NULL;
        VMEU_map = (map);
        VMEU_entry = (entry);
-       if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start){
-               VMEU_first_free = VMEU_entry->vme_prev;         
-       } else  {
-               VMEU_first_free = VMEU_map->first_free;
+
+       if (map->holelistenabled == FALSE) {
+               if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start){
+                       VMEU_first_free = VMEU_entry->vme_prev;
+               } else  {
+                       VMEU_first_free = VMEU_map->first_free;
+               }
        }
-       
        _vm_map_store_entry_unlink(&VMEU_map->hdr, VMEU_entry);
        vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);
        update_first_free_ll(VMEU_map, VMEU_first_free);
 #ifdef VM_MAP_STORE_USE_RB
        if (vm_map_store_has_RB_support( &VMEU_map->hdr )) {
-               update_first_free_rb(VMEU_map, VMEU_first_free);
+               update_first_free_rb(VMEU_map, entry, FALSE);
        }
 #endif
 }
@@ -201,12 +228,12 @@ vm_map_store_copy_reset( vm_map_copy_t copy,vm_map_entry_t entry)
 }
 
 void
-vm_map_store_update_first_free( vm_map_t map, vm_map_entry_t first_free)
+vm_map_store_update_first_free( vm_map_t map, vm_map_entry_t first_free_entry, boolean_t new_entry_creation)
 {
-       update_first_free_ll(map, first_free);
+       update_first_free_ll(map, first_free_entry);
 #ifdef VM_MAP_STORE_USE_RB
        if (vm_map_store_has_RB_support( &map->hdr )) {
-               update_first_free_rb(map, first_free);
+               update_first_free_rb(map, first_free_entry, new_entry_creation);
        }
 #endif
 }
index b6c12fe19dcf261b9cb2ffe33b42a5f0be8eec6b..8d6687f3990bab22691017d04772d83450faa658 100644 (file)
@@ -114,6 +114,11 @@ struct vm_map_store {
        (map)->hint = (value);         \
        MACRO_END
 
+#define        SAVE_HINT_HOLE_WRITE(map,value) \
+       MACRO_BEGIN                    \
+       (map)->hole_hint = (value);     \
+       MACRO_END
+
 #define SKIP_RB_TREE           0xBAADC0D1
 
 #define VM_MAP_ENTRY_CREATE    1
@@ -126,7 +131,7 @@ void        _vm_map_store_entry_link( struct vm_map_header *, struct vm_map_entry*, st
 void   vm_map_store_entry_link( struct _vm_map*, struct vm_map_entry*, struct vm_map_entry*);
 void   _vm_map_store_entry_unlink( struct vm_map_header *, struct vm_map_entry*);
 void   vm_map_store_entry_unlink( struct _vm_map*, struct vm_map_entry*);
-void   vm_map_store_update_first_free( struct _vm_map*, struct vm_map_entry*);
+void   vm_map_store_update_first_free( struct _vm_map*, struct vm_map_entry*, boolean_t new_entry_creation);
 void   vm_map_store_copy_insert( struct _vm_map*, struct vm_map_entry*, struct vm_map_copy*);
 void   vm_map_store_copy_reset( struct vm_map_copy*, struct vm_map_entry*);
 #if MACH_ASSERT
index f33ac3270441a9d301bbfed79e9b30afd547a5da..06bd7c971597fb7ec2df0e9b8477c065c2e2500f 100644 (file)
@@ -257,6 +257,9 @@ vm_map_store_copy_reset_ll( vm_map_copy_t copy, __unused vm_map_entry_t entry, _
 void
 update_first_free_ll( vm_map_t map, vm_map_entry_t new_first_free)
 {
+       if (map->holelistenabled)
+               return;
+
        UPDATE_FIRST_FREE_LL( map, new_first_free);
 }
 
index 1643d1dd92ab69f3672a7a29f3e03c1de595aa30..5e881f6b70b8035d4f2be153b1e2bb666e8adb66 100644 (file)
@@ -163,8 +163,559 @@ vm_map_store_copy_reset_rb( vm_map_copy_t copy, vm_map_entry_t entry, int nentri
        }
 }
 
-void   update_first_free_rb( __unused vm_map_t map, __unused vm_map_entry_t entry)
+extern zone_t  vm_map_holes_zone;      /* zone for vm map holes (vm_map_links) structures */
+
+void
+vm_map_combine_hole(vm_map_t map, vm_map_entry_t hole_entry);
+void
+vm_map_combine_hole(__unused vm_map_t map, vm_map_entry_t hole_entry)
+{
+
+       vm_map_entry_t middle_hole_entry, last_hole_entry;
+
+       hole_entry->vme_end = hole_entry->vme_next->vme_end;
+
+       middle_hole_entry = hole_entry->vme_next;
+       last_hole_entry = middle_hole_entry->vme_next;
+
+       assert(last_hole_entry->vme_prev == middle_hole_entry);
+       assert(middle_hole_entry->vme_end != last_hole_entry->vme_start);
+
+       last_hole_entry->vme_prev = hole_entry;
+       hole_entry->vme_next = last_hole_entry;
+
+       middle_hole_entry->vme_prev = NULL;
+       middle_hole_entry->vme_next = NULL;
+
+       zfree(vm_map_holes_zone, middle_hole_entry);
+
+       assert(hole_entry->vme_start < hole_entry->vme_end);
+       assert(last_hole_entry->vme_start < last_hole_entry->vme_end);
+}
+
+
+void
+vm_map_delete_hole(vm_map_t map, vm_map_entry_t hole_entry);
+void
+vm_map_delete_hole(vm_map_t map, vm_map_entry_t hole_entry)
+{
+       if (hole_entry == (vm_map_entry_t) map->holes_list) {
+
+               if (hole_entry->vme_next == (vm_map_entry_t) map->holes_list) {
+
+                       map->holes_list = NULL;
+                       SAVE_HINT_HOLE_WRITE(map, NULL);
+               } else {
+
+                       vm_map_entry_t l_next, l_prev;
+
+                       l_next = (vm_map_entry_t) map->holes_list->next;
+                       l_prev = (vm_map_entry_t) map->holes_list->prev;
+                       map->holes_list = (struct vm_map_links*) l_next;
+
+                       l_next->vme_prev = l_prev;
+                       l_prev->vme_next = l_next;
+
+                       SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) l_next);
+               }
+       } else {
+
+               SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry->vme_prev);
+
+               hole_entry->vme_prev->vme_next = hole_entry->vme_next;
+               hole_entry->vme_next->vme_prev = hole_entry->vme_prev;
+       }
+
+       hole_entry->vme_next = NULL;
+       hole_entry->vme_prev = NULL;
+       zfree(vm_map_holes_zone, hole_entry);
+}
+
+
+/*
+ * For Debugging.
+ */
+
+#if DEBUG
+static void
+check_map_sanity(vm_map_t map, vm_map_entry_t old_hole_entry)
+{
+       vm_map_entry_t  hole_entry, next_hole_entry;
+       vm_map_entry_t  map_entry, next_map_entry;
+
+       if (map->holes_list == NULL) {
+
+               return;
+       }
+
+       hole_entry = (vm_map_entry_t) map->holes_list;
+       next_hole_entry = hole_entry->vme_next;
+
+       map_entry = vm_map_first_entry(map);
+       next_map_entry = map_entry->vme_next;
+
+       while(map_entry->vme_start > hole_entry->vme_start) {
+               hole_entry = next_hole_entry;
+               next_hole_entry = hole_entry->vme_next;
+
+               if (hole_entry == (vm_map_entry_t)map->holes_list)
+                       break;
+       }
+
+       while (map_entry != vm_map_to_entry(map)) {
+
+               if (map_entry->vme_start >= map->max_offset)
+                       break;
+
+               if (map_entry->vme_end != map_entry->vme_next->vme_start) {
+
+                       if (map_entry->vme_next == vm_map_to_entry(map))
+                               break;
+
+                       if (hole_entry->vme_start != map_entry->vme_end) {
+                               panic("hole_entry not aligned %p(0x%llx), %p (0x%llx), %p", hole_entry, (unsigned long long)hole_entry->vme_start, map_entry->vme_next, (unsigned long long)map_entry->vme_end, old_hole_entry);
+                               assert(hole_entry->vme_start == map_entry->vme_end);
+                       }
+
+                       if (hole_entry->vme_end != map_entry->vme_next->vme_start) {
+                               panic("hole_entry not next aligned %p(0x%llx), %p (0x%llx), %p", hole_entry, (unsigned long long)hole_entry->vme_end, map_entry->vme_next, (unsigned long long)map_entry->vme_next->vme_start, old_hole_entry);
+                               assert(hole_entry->vme_end == map_entry->vme_next->vme_start);
+                       }
+
+                       hole_entry = next_hole_entry;
+                       next_hole_entry = hole_entry->vme_next;
+
+                       if (hole_entry == (vm_map_entry_t)map->holes_list)
+                               break;
+               }
+
+               map_entry = map_entry->vme_next;
+       }
+}
+
+/*
+ * For debugging.
+ */
+static void
+copy_hole_info(vm_map_entry_t hole_entry, vm_map_entry_t old_hole_entry)
 {
-       return ;
+       old_hole_entry->vme_prev = hole_entry->vme_prev;
+       old_hole_entry->vme_next = hole_entry->vme_next;
+       old_hole_entry->vme_start = hole_entry->vme_start;
+       old_hole_entry->vme_end = hole_entry->vme_end;
 }
+#endif /* DEBUG */
+
+void
+update_holes_on_entry_deletion(vm_map_t map, vm_map_entry_t old_entry);
+void
+update_holes_on_entry_deletion(vm_map_t map, vm_map_entry_t old_entry)
+{
+       /*
+        * Dealing with the deletion of an older entry.
+        */
+
+       vm_map_entry_t          hole_entry, next_hole_entry;
+#if DEBUG
+       struct vm_map_entry     old_hole_entry;
+#endif /* DEBUG */
+       boolean_t               create_new_hole = TRUE;
+
+       hole_entry = (vm_map_entry_t) map->hole_hint;
+
+       if (hole_entry) {
+
+               if (hole_entry->vme_end == old_entry->vme_start) {
+                       /*
+                        * Found a hole right after above our entry.
+                        * Hit.
+                        */
+
+               } else if (hole_entry->vme_start == old_entry->vme_end) {
+
+                       if (hole_entry != (vm_map_entry_t) map->holes_list) {
 
+                               /*
+                                * Found a hole right after below our entry but
+                                * make sure we don't erroneously extend backwards.
+                                *  
+                                * Hit.
+                                */
+
+                               hole_entry = hole_entry->vme_prev;
+                       }
+
+               } else if (hole_entry->vme_start > old_entry->vme_end) {
+
+                       /*
+                        * Useless hint. Start from the top.
+                        */
+
+                       hole_entry = (vm_map_entry_t) map->holes_list;
+               }
+
+               if (hole_entry != (vm_map_entry_t) map->holes_list) {
+                       if (hole_entry->vme_start > old_entry->vme_start) {
+                               panic("Hole hint failed: Hole entry start: 0x%llx, entry start: 0x%llx, map hole start: 0x%llx, map hint start: 0x%llx\n",
+                                       (unsigned long long)hole_entry->vme_start,
+                                       (unsigned long long)old_entry->vme_start,
+                                       (unsigned long long)map->holes_list->start,
+                                       (unsigned long long)map->hole_hint->start);
+                       }
+                       if (hole_entry->vme_end > old_entry->vme_start) {
+                               panic("Hole hint failed: Hole entry end: 0x%llx, entry start: 0x%llx, map hole start: 0x%llx, map hint start: 0x%llx\n",
+                                       (unsigned long long)hole_entry->vme_end,
+                                       (unsigned long long)old_entry->vme_start,
+                                       (unsigned long long)map->holes_list->start,
+                                       (unsigned long long)map->hole_hint->start);
+                       }
+               }
+
+               while (1) {
+
+                       next_hole_entry = hole_entry->vme_next;
+
+                       /*
+                        * Hole is right above the entry.
+                        */
+                       if (hole_entry->vme_end == old_entry->vme_start) {
+
+#if DEBUG
+                               copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                               /*
+                                * Is there another hole right below the entry?
+                                * Can we combine holes?
+                                */
+
+                               if (old_entry->vme_end == hole_entry->vme_next->vme_start) {
+
+                                       vm_map_combine_hole(map, hole_entry);
+                               } else {
+
+                                       hole_entry->vme_end = old_entry->vme_end;
+                               }
+                               create_new_hole = FALSE;
+#if DEBUG
+                               check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+                               break;
+                       }
+
+                       /*
+                        * Hole is right below the entry.
+                        */
+                       if (hole_entry->vme_start == old_entry->vme_end) {
+
+#if DEBUG
+                               copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                               hole_entry->vme_start = old_entry->vme_start;
+                               create_new_hole = FALSE;
+
+#if DEBUG
+                               check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+                               break;
+                       }
+
+                       /*
+                        * Hole is beyond our entry. Let's go back to the last hole
+                        * before our entry so we have the right place to link up the
+                        * new hole that will be needed.
+                        */
+                       if (hole_entry->vme_start > old_entry->vme_end) {
+
+#if DEBUG
+                               copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                               if (hole_entry != (vm_map_entry_t) map->holes_list) {
+                                       assert(hole_entry->vme_start != old_entry->vme_start);
+                                       hole_entry = hole_entry->vme_prev;
+                               }
+                               break;
+                       }
+
+                       hole_entry = next_hole_entry;
+
+                       if (hole_entry == (vm_map_entry_t)map->holes_list) {
+                               hole_entry = hole_entry->vme_prev;
+                               break;
+                       }
+               }
+       }
+
+       if (create_new_hole) {
+               struct vm_map_links     *new_hole_entry = NULL;
+               vm_map_entry_t          l_next, l_prev;
+
+               new_hole_entry = zalloc(vm_map_holes_zone);
+
+               /*
+                * First hole in the map?
+                * OR
+                * A hole that is located above the current first hole in the map?
+                */
+               if (map->holes_list == NULL || (hole_entry == (vm_map_entry_t) map->holes_list && hole_entry->vme_start > old_entry->vme_start)) {
+
+                       if (map->holes_list == NULL) {
+
+                               map->holes_list = new_hole_entry;
+                               new_hole_entry->prev = new_hole_entry->next = (vm_map_entry_t)map->holes_list;
+                       } else {
+
+                               l_next = (vm_map_entry_t) map->holes_list;
+                               l_prev = map->holes_list->prev;
+                               map->holes_list = new_hole_entry;
+                               new_hole_entry->next = l_next;
+                               new_hole_entry->prev = l_prev;
+
+                               l_prev->vme_next = l_next->vme_prev = (vm_map_entry_t) new_hole_entry;
+                       }
+               } else {
+
+                       l_next = hole_entry->vme_next;
+                       l_prev = hole_entry->vme_next->vme_prev;
+
+                       new_hole_entry->prev = hole_entry;
+                       new_hole_entry->next = l_next;
+
+                       hole_entry->vme_next = (vm_map_entry_t)new_hole_entry;
+                       l_next->vme_prev = (vm_map_entry_t) new_hole_entry;
+               }
+
+               new_hole_entry->start = old_entry->vme_start;
+               new_hole_entry->end = old_entry->vme_end;
+
+               hole_entry = (vm_map_entry_t) new_hole_entry;
+
+               assert(new_hole_entry->start < new_hole_entry->end);
+       }
+
+#if DEBUG
+       check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+
+       SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry);
+       return;
+}
+
+
+void
+update_holes_on_entry_creation(vm_map_t map, vm_map_entry_t new_entry);
+void
+update_holes_on_entry_creation(vm_map_t map, vm_map_entry_t new_entry)
+{
+
+       vm_map_entry_t                  hole_entry, next_hole_entry;
+#if DEBUG
+       struct vm_map_entry             old_hole_entry;
+       vm_map_entry_t                  tmp_entry;
+       boolean_t                               check_map_with_hole_sanity = TRUE;
+#endif /* DEBUG */
+
+       /*
+        * Case A: The entry is aligned exactly with the start and end of the hole.
+        *         This will delete the hole.
+        *
+        * Case B: The entry is completely within a hole but NOT aligned with the start/end of the hole.
+        *         This  will split a hole.
+        *
+        * Case C: The entry overlaps with the hole. The entry could be extending upwards (C1) or downwards (C2).
+        *         This will reduce the size of the hole or delete the hole completely if it is smaller than the entry.
+        */
+
+       hole_entry = (vm_map_entry_t) map->holes_list;
+       assert(hole_entry);
+       next_hole_entry = hole_entry->vme_next;
+
+       while (1) {
+
+#if DEBUG
+               /*
+                * If the entry doesn't exist in the RB tree, we are likely dealing with copy maps where
+                * the entries belonging to the copy map are linked into the list of entries silently and
+                * then added to the RB-tree later on.
+                * So sanity checks are useless in that case.
+                */
+               check_map_with_hole_sanity = vm_map_lookup_entry(map, new_entry->vme_start, &tmp_entry);
+#endif /* DEBUG */
+
+               if (hole_entry->vme_start == new_entry->vme_start &&
+                   hole_entry->vme_end == new_entry->vme_end) {
+
+                       /* Case A */
+#if DEBUG
+                       copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                       if (hole_entry == (vm_map_entry_t) map->holes_list) {
+
+                               if (hole_entry->vme_next == (vm_map_entry_t) map->holes_list) {
+
+                                       next_hole_entry = vm_map_last_entry(map);
+                                       assert(next_hole_entry->vme_end >= map->max_offset);
+                               }
+                       }
+
+                       vm_map_delete_hole(map, hole_entry);
+
+#if DEBUG
+                       if (check_map_with_hole_sanity)
+                               check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+                       return;
+
+               } else if (hole_entry->vme_start < new_entry->vme_start &&
+                          hole_entry->vme_end > new_entry->vme_end) {
+
+                       /* Case B */
+                       struct vm_map_links *new_hole_entry = NULL;
+
+                       new_hole_entry = zalloc(vm_map_holes_zone);
+
+#if DEBUG
+                       copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                       new_hole_entry->prev = hole_entry;
+                       new_hole_entry->next = hole_entry->vme_next;
+                       hole_entry->vme_next->vme_prev = (vm_map_entry_t)new_hole_entry;
+                       hole_entry->vme_next = (vm_map_entry_t)new_hole_entry;
+
+                       new_hole_entry->start = new_entry->vme_end;
+                       new_hole_entry->end = hole_entry->vme_end;
+                       hole_entry->vme_end = new_entry->vme_start;
+
+                       assert(hole_entry->vme_start < hole_entry->vme_end);
+                       assert(new_hole_entry->start < new_hole_entry->end);
+
+#if DEBUG
+                       if (check_map_with_hole_sanity)
+                               check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+
+                       SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry);
+                       return;
+
+               } else if ((new_entry->vme_start <= hole_entry->vme_start) && (hole_entry->vme_start < new_entry->vme_end)) {
+
+                       /*
+                        * Case C1: Entry moving upwards and a part/full hole lies within the bounds of the entry.
+                        */
+
+#if DEBUG
+                       copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                       if (hole_entry->vme_end <= new_entry->vme_end) {
+
+                               vm_map_delete_hole(map, hole_entry);
+                       } else {
+                               hole_entry->vme_start = new_entry->vme_end;
+                               SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry);
+                       }
+
+#if DEBUG
+                       if (check_map_with_hole_sanity)
+                               check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+
+                       return;
+
+               } else if ((new_entry->vme_start < hole_entry->vme_end) && (hole_entry->vme_end <= new_entry->vme_end)) {
+
+                       /*
+                        * Case C2: Entry moving downwards and a part/full hole lies within the bounds of the entry.
+                        */
+
+#if DEBUG
+                       copy_hole_info(hole_entry, &old_hole_entry);
+#endif /* DEBUG */
+
+                       if (hole_entry->vme_start >= new_entry->vme_start) {
+                               vm_map_delete_hole(map, hole_entry);
+                       } else {
+                               hole_entry->vme_end = new_entry->vme_start;
+                               SAVE_HINT_HOLE_WRITE(map, (struct vm_map_links*) hole_entry);
+                       }
+
+#if DEBUG
+                       if (check_map_with_hole_sanity)
+                               check_map_sanity(map, &old_hole_entry);
+#endif /* DEBUG */
+
+                       return;
+               }
+
+               hole_entry = next_hole_entry;
+               next_hole_entry = hole_entry->vme_next;
+
+               if (hole_entry == (vm_map_entry_t)map->holes_list)
+                       break;
+       }
+
+       panic("Illegal action: h1: %p, s:0x%llx, e:0x%llx...h2:%p, s:0x%llx, e:0x%llx...h3:0x%p, s:0x%llx, e:0x%llx\n",
+               hole_entry->vme_prev,
+               (unsigned long long)hole_entry->vme_prev->vme_start,
+               (unsigned long long)hole_entry->vme_prev->vme_end,
+               hole_entry,
+               (unsigned long long)hole_entry->vme_start,
+               (unsigned long long)hole_entry->vme_end,
+               hole_entry->vme_next,
+               (unsigned long long)hole_entry->vme_next->vme_start,
+               (unsigned long long)hole_entry->vme_next->vme_end);
+
+}
+
+void
+update_first_free_rb(vm_map_t map, vm_map_entry_t entry, boolean_t new_entry_creation)
+{
+
+       if (map->holelistenabled) {
+
+               /*
+                * Holes can be used to track ranges all the way up to MACH_VM_MAX_ADDRESS or more (e.g. kernel map).
+                */
+               vm_map_offset_t max_valid_offset = (map->max_offset > MACH_VM_MAX_ADDRESS) ? map->max_offset : MACH_VM_MAX_ADDRESS;
+
+               /*
+                * Clipping an entry will not result in the creation/deletion/modification of
+                * a hole. Those calls pass NULL for their target entry.
+                */
+               if (entry == NULL) {
+                       return;
+               }
+
+               /*
+                * Commpage is pinned beyond the map's max offset. That shouldn't affect the
+                * holes within the bounds of the map.
+                */
+               if (vm_map_trunc_page(entry->vme_start, VM_MAP_PAGE_MASK(map)) >= max_valid_offset) {
+                       return;
+               }
+
+               /*
+                *
+                * Note:
+                *
+                * - A new entry has already been added to the map
+                * OR
+                * - An older entry has already been deleted from the map
+                *
+                * We are updating the hole list after the fact (except in one special case involving copy maps).
+                *
+                */
+
+               if (new_entry_creation) {
+
+                       update_holes_on_entry_creation(map, entry);
+               } else {
+
+                       update_holes_on_entry_deletion(map, entry);
+               }
+       }
+}
index da679492989e8b3f4a6c56a1c0ee886ac62961c5..d9506e6b4bec5b8d99ebceb2ee4342d9f9717473 100644 (file)
@@ -41,6 +41,6 @@ void  vm_map_store_entry_link_rb( struct vm_map_header*, struct vm_map_entry*, s
 void   vm_map_store_entry_unlink_rb( struct vm_map_header*, struct vm_map_entry*);
 void   vm_map_store_copy_insert_rb( struct _vm_map*, struct vm_map_entry*, struct vm_map_copy*);
 void   vm_map_store_copy_reset_rb( struct vm_map_copy*, struct vm_map_entry*, int);
-void   update_first_free_rb(struct _vm_map*, struct vm_map_entry*);
+void   update_first_free_rb(struct _vm_map*, struct vm_map_entry*, boolean_t new_entry_creation);
 
 #endif /* _VM_VM_MAP_STORE_RB_H */
index e65ac5c15f9fbaf1dabd88de63d09c0fcf478603..a2d77426bee33803fa07c3470d5cb772da234189 100644 (file)
@@ -488,14 +488,26 @@ vm_object_hash_insert(
 {
        queue_t         bucket;
 
-       vm_object_lock_assert_exclusive(object);
+       assert(vm_object_hash_lookup(entry->pager, FALSE) == NULL);
 
        bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
 
        queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
 
+       if (object->hashed) {
+               /*
+                * "hashed" was pre-set on this (new) object to avoid
+                * locking issues in vm_object_enter() (can't attempt to
+                * grab the object lock while holding the hash lock as
+                * a spinlock), so no need to set it here (and no need to
+                * hold the object's lock).
+                */
+       } else {
+               vm_object_lock_assert_exclusive(object);
+               object->hashed = TRUE;
+       }
+
        entry->object = object;
-       object->hashed = TRUE;
 }
 
 static vm_object_hash_entry_t
@@ -747,6 +759,8 @@ vm_object_bootstrap(void)
        vm_object_template.purgeable_queue_group = 0;
 
        vm_object_template.vo_cache_ts = 0;
+
+       vm_object_template.wire_tag = VM_KERN_MEMORY_NONE;
        
 #if DEBUG
        bzero(&vm_object_template.purgeable_owner_bt[0],
@@ -767,16 +781,11 @@ vm_object_bootstrap(void)
  *     VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
  */
 
-#ifdef ppc
-       _vm_object_allocate(vm_last_addr + 1,
-                           kernel_object);
-#else
        _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
                            kernel_object);
 
        _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
                            compressor_object);
-#endif
        kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
        compressor_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 
@@ -786,13 +795,8 @@ vm_object_bootstrap(void)
         */
 
        vm_submap_object = &vm_submap_object_store;
-#ifdef ppc
-       _vm_object_allocate(vm_last_addr + 1,
-                           vm_submap_object);
-#else
        _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
                            vm_submap_object);
-#endif
        vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
 
        /*
@@ -894,6 +898,7 @@ static int cache_shadows = TRUE;
 unsigned long vm_object_deallocate_shared_successes = 0;
 unsigned long vm_object_deallocate_shared_failures = 0;
 unsigned long vm_object_deallocate_shared_swap_failures = 0;
+
 __private_extern__ void
 vm_object_deallocate(
        register vm_object_t    object)
@@ -1551,8 +1556,8 @@ vm_object_cache_evict(
                                 */
                                assert(!p->pageout_queue);
 
-                               VM_PAGE_QUEUES_REMOVE(p);
-                               VM_PAGE_ENQUEUE_INACTIVE(p, TRUE);
+                               vm_page_queues_remove(p);
+                               vm_page_enqueue_inactive(p, TRUE);
 
                                ep_moved++;
                        } else {
@@ -1866,6 +1871,7 @@ vm_object_terminate(
  * The VM object must be locked by caller.
  * The lock will be released on return and the VM object is no longer valid.
  */
+
 void
 vm_object_reap(
        vm_object_t object)
@@ -1911,6 +1917,8 @@ vm_object_reap(
 
                owner = object->vo_purgeable_owner;
 
+               VM_OBJECT_UNWIRED(object);
+
                if (object->purgable == VM_PURGABLE_DENY) {
                        /* not purgeable: nothing to do */
                } else if (object->purgable == VM_PURGABLE_VOLATILE) {
@@ -2227,13 +2235,15 @@ restart_after_sleep:
 
                        if ((p->dirty || p->precious) && !p->error && object->alive) {
 
+                               assert(!object->internal);
+                       
                                if (!p->laundry) {
-                                       VM_PAGE_QUEUES_REMOVE(p);
+                                       vm_page_queues_remove(p);
                                        /*
                                         * flush page... page will be freed
                                         * upon completion of I/O
                                         */
-                                       vm_pageout_cluster(p, TRUE);
+                                       (void)vm_pageout_cluster(p, TRUE, FALSE, FALSE);
                                }
                                vm_page_unlock_queues();
                                /*
@@ -2735,7 +2745,9 @@ deactivate_pages_in_object(
        boolean_t               reusable_page,
        boolean_t               all_reusable,
        chunk_state_t           *chunk_state,
-       pmap_flush_context      *pfc)
+       pmap_flush_context      *pfc,
+       struct pmap             *pmap,
+       vm_map_offset_t         pmap_offset)
 {
        vm_page_t       m;
        int             p;
@@ -2756,7 +2768,7 @@ deactivate_pages_in_object(
        dw_count = 0;
        dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
 
-       for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) {
+       for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64, pmap_offset += PAGE_SIZE_64) {
 
                /*
                 * If this offset has already been found and handled in a higher level object, then don't
@@ -2855,7 +2867,7 @@ deactivate_pages_in_object(
                                                vm_page_stats_reusable.reusable += reusable;
                                                reusable = 0;
                                        }
-                                       vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+                                       vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
 
                                        dwp = &dw_array[0];
                                        dw_count = 0;
@@ -2884,6 +2896,23 @@ deactivate_pages_in_object(
 #endif /* MACH_PAGEMAP */
                                        VM_COMPRESSOR_PAGER_STATE_CLR(object,
                                                                      offset);
+                                       if (pmap != PMAP_NULL &&
+                                           (COMPRESSED_PAGER_IS_ACTIVE ||
+                                            DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)) {
+                                               /*
+                                                * Tell pmap that this page
+                                                * is no longer mapped, to
+                                                * adjust the footprint ledger
+                                                * because this page is no
+                                                * longer compressed.
+                                                */
+                                               pmap_remove_options(
+                                                       pmap,
+                                                       pmap_offset,
+                                                       (pmap_offset +
+                                                        PAGE_SIZE),
+                                                       PMAP_OPTIONS_REMOVE);
+                                       }
                                }
                        }
                }
@@ -2896,7 +2925,7 @@ deactivate_pages_in_object(
        }
                
        if (dw_count)
-               vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+               vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
 }
 
 
@@ -2917,7 +2946,9 @@ deactivate_a_chunk(
        boolean_t               kill_page,
        boolean_t               reusable_page,
        boolean_t               all_reusable,
-       pmap_flush_context      *pfc)
+       pmap_flush_context      *pfc,
+       struct pmap             *pmap,
+       vm_map_offset_t         pmap_offset)
 {
        vm_object_t             object;
        vm_object_t             tmp_object;
@@ -2950,7 +2981,7 @@ deactivate_a_chunk(
        while (object && CHUNK_NOT_COMPLETE(chunk_state)) {
                vm_object_paging_begin(object);
 
-               deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state, pfc);
+               deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state, pfc, pmap, pmap_offset);
 
                vm_object_paging_end(object);
 
@@ -2996,7 +3027,9 @@ vm_object_deactivate_pages(
        vm_object_offset_t      offset,
        vm_object_size_t        size,
        boolean_t               kill_page,
-       boolean_t               reusable_page)
+       boolean_t               reusable_page,
+       struct pmap             *pmap,
+       vm_map_offset_t         pmap_offset)
 {
        vm_object_size_t        length;
        boolean_t               all_reusable;
@@ -3037,10 +3070,11 @@ vm_object_deactivate_pages(
        pmap_flush_context_init(&pmap_flush_context_storage);
 
        while (size) {
-               length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable, &pmap_flush_context_storage);
+               length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable, &pmap_flush_context_storage, pmap, pmap_offset);
 
                size -= length;
                offset += length;
+               pmap_offset += length;
        }
        pmap_flush(&pmap_flush_context_storage);
 
@@ -3491,6 +3525,34 @@ vm_object_copy_slowly(
                        kern_return_t   error_code;
 
                        vm_object_lock(src_object);
+
+                       if (src_object->internal &&
+                           src_object->shadow == VM_OBJECT_NULL &&
+                           (vm_page_lookup(src_object,
+                                           src_offset) == VM_PAGE_NULL) &&
+                           (src_object->pager == NULL ||
+                            (VM_COMPRESSOR_PAGER_STATE_GET(src_object,
+                                                           src_offset) ==
+                             VM_EXTERNAL_STATE_ABSENT))) {
+                               /*
+                                * This page is neither resident nor compressed
+                                * and there's no shadow object below 
+                                * "src_object", so this page is really missing.
+                                * There's no need to zero-fill it just to copy
+                                * it:  let's leave it missing in "new_object"
+                                * and get zero-filled on demand.
+                                */
+                               vm_object_unlock(src_object);
+                               /* free the unused "new_page"... */
+                               vm_object_lock(new_object);
+                               VM_PAGE_FREE(new_page);
+                               new_page = VM_PAGE_NULL;
+                               vm_object_unlock(new_object);
+                               /* ...and go to next page in "src_object" */
+                               result = VM_FAULT_SUCCESS;
+                               break;
+                       }
+
                        vm_object_paging_begin(src_object);
 
                        if (size > (vm_size_t) -1) {
@@ -3887,6 +3949,7 @@ vm_object_copy_delayed(
         *      the original object must be done carefully, to avoid deadlock.
         */
 
+       copy_size = vm_object_round_page(copy_size);
  Retry:
  
        /*
@@ -4477,15 +4540,32 @@ Retry:
                                assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
                                new_entry = vm_object_hash_entry_alloc(pager);
                                new_object = vm_object_allocate(size);
+                               /*
+                                * Set new_object->hashed now, while noone
+                                * knows about this object yet and we
+                                * don't need to lock it.  Once it's in
+                                * the hash table, we would have to lock
+                                * the object to set its "hashed" bit and
+                                * we can't lock the object while holding
+                                * the hash lock as a spinlock...
+                                */
+                               new_object->hashed = TRUE;
                                lck = vm_object_hash_lock_spin(pager);
                        } else {
                                /*
                                 *      Lookup failed twice, and we have something
                                 *      to insert; set the object.
                                 */
-                               vm_object_lock(new_object);
+                               /*
+                                * We can't lock the object here since we're
+                                * holding the hash lock as a spin lock.
+                                * We've already pre-set "new_object->hashed"
+                                * when we created "new_object" above, so we
+                                * won't need to modify the object in
+                                * vm_object_hash_insert().
+                                */
+                               assert(new_object->hashed);
                                vm_object_hash_insert(new_entry, new_object);
-                               vm_object_unlock(new_object);
                                entry = new_entry;
                                new_entry = VM_OBJECT_HASH_ENTRY_NULL;
                                new_object = VM_OBJECT_NULL;
@@ -4571,8 +4651,16 @@ Retry:
         *      throw away ours.
         */
 
-       if (new_object != VM_OBJECT_NULL)
+       if (new_object != VM_OBJECT_NULL) {
+               /*
+                * Undo the pre-setting of "new_object->hashed" before
+                * deallocating "new_object", since we did not insert it
+                * into the hash table after all.
+                */
+               assert(new_object->hashed);
+               new_object->hashed = FALSE;
                vm_object_deallocate(new_object);
+       }
 
        if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
                vm_object_hash_entry_free(new_entry);
@@ -6077,59 +6165,6 @@ vm_object_coalesce(
        return(TRUE);
 }
 
-/*
- *     Attach a set of physical pages to an object, so that they can
- *     be mapped by mapping the object.  Typically used to map IO memory.
- *
- *     The mapping function and its private data are used to obtain the
- *     physical addresses for each page to be mapped.
- */
-void
-vm_object_page_map(
-       vm_object_t             object,
-       vm_object_offset_t      offset,
-       vm_object_size_t        size,
-       vm_object_offset_t      (*map_fn)(void *map_fn_data, 
-               vm_object_offset_t offset),
-               void            *map_fn_data)   /* private to map_fn */
-{
-       int64_t num_pages;
-       int     i;
-       vm_page_t       m;
-       vm_page_t       old_page;
-       vm_object_offset_t      addr;
-
-       num_pages = atop_64(size);
-
-       for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
-
-           addr = (*map_fn)(map_fn_data, offset);
-
-           while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
-               vm_page_more_fictitious();
-
-           vm_object_lock(object);
-           if ((old_page = vm_page_lookup(object, offset))
-                       != VM_PAGE_NULL)
-           {
-                   VM_PAGE_FREE(old_page);
-           }
-
-           assert((ppnum_t) addr == addr);
-           vm_page_init(m, (ppnum_t) addr, FALSE);
-           /*
-            * private normally requires lock_queues but since we
-            * are initializing the page, its not necessary here
-            */
-           m->private = TRUE;          /* don`t free page */
-           m->wire_count = 1;
-           vm_page_insert(m, object, offset);
-
-           PAGE_WAKEUP_DONE(m);
-           vm_object_unlock(object);
-       }
-}
-
 kern_return_t
 vm_object_populate_with_private(
                vm_object_t             object,
@@ -7473,7 +7508,7 @@ vm_object_transpose(
                        vm_page_rename(page, object1, page->offset, FALSE);
                }
                assert(queue_empty(&object2->memq));
-               /* transfer tmp_object's pages to object1 */
+               /* transfer tmp_object's pages to object2 */
                while (!queue_empty(&tmp_object->memq)) {
                        page = (vm_page_t) queue_first(&tmp_object->memq);
                        queue_remove(&tmp_object->memq, page,
@@ -7605,10 +7640,10 @@ MACRO_END
 #if UPL_DEBUG
        /* "uplq" refers to the object not its contents (see upl_transpose()) */
 #endif
-       assert(object1->objq.next == NULL);
-       assert(object1->objq.prev == NULL);
-       assert(object2->objq.next == NULL);
-       assert(object2->objq.prev == NULL);
+       assert((object1->purgable == VM_PURGABLE_DENY) || (object1->objq.next == NULL));
+       assert((object1->purgable == VM_PURGABLE_DENY) || (object1->objq.prev == NULL));
+       assert((object2->purgable == VM_PURGABLE_DENY) || (object2->objq.next == NULL));
+       assert((object2->purgable == VM_PURGABLE_DENY) || (object2->objq.prev == NULL));
 
 #undef __TRANSPOSE_FIELD
 
@@ -7805,7 +7840,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
                                 * power of 2 nor a multiple of PAGE_SIZE... so round
                                 * it up to the nearest PAGE_SIZE boundary
                                 */
-                               pre_heat_size = (max_length * object->pages_used) / object->pages_created;
+                               pre_heat_size = (max_length * (uint64_t)object->pages_used) / object->pages_created;
 
                                if (pre_heat_size < min_ph_size)
                                        pre_heat_size = min_ph_size;
@@ -8225,8 +8260,10 @@ vm_object_range_op(
 
                                VM_PAGE_FREE(dst_page);
 
-                       } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent)
-                               break;
+                       } else if ((ops & UPL_ROP_ABSENT)
+                                          && (!dst_page->absent || dst_page->busy)) {
+                               break;
+                       }
                } else if (ops & UPL_ROP_PRESENT)
                        break;
 
@@ -8511,107 +8548,248 @@ vm_object_pack_pages(
        }
 }
 
+
+/*
+ * This routine does the "relocation" of previously
+ * compressed pages belonging to this object that are
+ * residing in a number of compressed segments into
+ * a set of compressed segments dedicated to hold
+ * compressed pages belonging to this object.
+ */
+
+extern void *freezer_chead;
+extern char *freezer_compressor_scratch_buf;
+extern int c_freezer_compression_count;
+extern AbsoluteTime c_freezer_last_yield_ts;
+
+#define        MAX_FREE_BATCH  32
+#define FREEZER_DUTY_CYCLE_ON_MS       5
+#define FREEZER_DUTY_CYCLE_OFF_MS      5
+
+static int c_freezer_should_yield(void);
+
+
+static int
+c_freezer_should_yield()
+{
+       AbsoluteTime    cur_time;
+       uint64_t        nsecs;
+
+       assert(c_freezer_last_yield_ts);
+       clock_get_uptime(&cur_time);
+
+       SUB_ABSOLUTETIME(&cur_time, &c_freezer_last_yield_ts);
+       absolutetime_to_nanoseconds(cur_time, &nsecs);
+
+       if (nsecs > 1000 * 1000 * FREEZER_DUTY_CYCLE_ON_MS)
+               return (1);
+       return (0);
+}
+
+
 void
-vm_object_pageout(
+vm_object_compressed_freezer_done()
+{
+       vm_compressor_finished_filling(&freezer_chead);
+}
+
+
+void
+vm_object_compressed_freezer_pageout(
        vm_object_t object)
 {
-       vm_page_t                       p, next;
-       struct  vm_pageout_queue        *iq;
-       boolean_t                       set_pageout_bit = FALSE;
+       vm_page_t                       p;
+       vm_page_t                       local_freeq = NULL;
+       int                             local_freed = 0;
+       kern_return_t                   retval = KERN_SUCCESS;
+       int                             obj_resident_page_count_snapshot = 0;
+
+       assert(object != VM_OBJECT_NULL);
 
-       iq = &vm_pageout_queue_internal;
-       
-       assert(object != VM_OBJECT_NULL );
-       
        vm_object_lock(object);
 
-       if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
+       if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL)  {
+                       
                if (!object->pager_initialized) {
-                       /*
-                       *   If there is no memory object for the page, create
-                       *   one and hand it to the default pager.
-                       */
-                       vm_object_pager_create(object);
+
+                       vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
+
+                       if (!object->pager_initialized)
+                               vm_object_compressor_pager_create(object);
                }
 
-               set_pageout_bit = TRUE;
+               if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL)  {
+                       vm_object_unlock(object);
+                       return;
+               }
        }
                        
-       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+       if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
+               vm_object_offset_t      curr_offset = 0;
+
+               /*
+                * Go through the object and make sure that any
+                * previously compressed pages are relocated into
+                * a compressed segment associated with our "freezer_chead".
+                */
+               while (curr_offset < object->vo_size) {
+
+                       curr_offset = vm_compressor_pager_next_compressed(object->pager, curr_offset);
+       
+                       if (curr_offset == (vm_object_offset_t) -1)
+                               break;
+
+                       retval = vm_compressor_pager_relocate(object->pager, curr_offset, &freezer_chead);
+
+                       if (retval != KERN_SUCCESS)
+                               break;
 
-               set_pageout_bit = FALSE;
+                       curr_offset += PAGE_SIZE_64;
+               }
        }
 
-ReScan:        
-       next = (vm_page_t)queue_first(&object->memq);
+       /*
+        * We can't hold the object lock while heading down into the compressed pager
+        * layer because we might need the kernel map lock down there to allocate new
+        * compressor data structures. And if this same object is mapped in the kernel
+        * and there's a fault on it, then that thread will want the object lock while
+        * holding the kernel map lock.
+        *
+        * Since we are going to drop/grab the object lock repeatedly, we must make sure
+        * we won't be stuck in an infinite loop if the same page(s) keep getting
+        * decompressed. So we grab a snapshot of the number of pages in the object and
+        * we won't process any more than that number of pages.
+        */
+
+       obj_resident_page_count_snapshot = object->resident_page_count;
+
+       vm_object_activity_begin(object);
+
+       while ((obj_resident_page_count_snapshot--) && !queue_empty(&object->memq)) {
+
+               p = (vm_page_t)queue_first(&object->memq);
+
+               KERNEL_DEBUG(0xe0430004 | DBG_FUNC_START, object, local_freed, 0, 0, 0);
 
-       while (!queue_end(&object->memq, (queue_entry_t)next)) {
-               p = next;
-               next = (vm_page_t)queue_next(&next->listq);
-               
-               /* Throw to the pageout queue */
                vm_page_lockspin_queues();
 
+               if (p->cleaning || p->fictitious || p->busy || p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) {
+                       if (p->cleaning)
+                               p->pageout = TRUE;
+
+                       vm_page_unlock_queues();
+
+                       KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END, object, local_freed, 1, 0, 0);
+
+                       queue_remove(&object->memq, p, vm_page_t, listq);
+                       queue_enter(&object->memq, p, vm_page_t, listq);
+
+                       continue;
+               }
+
+               if (p->pmapped == TRUE) {
+                       int refmod_state, pmap_flags;
+
+                       if (p->dirty || p->precious) {
+                               pmap_flags = PMAP_OPTIONS_COMPRESSOR;
+                       } else {
+                               pmap_flags = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                       }
+
+                       refmod_state = pmap_disconnect_options(p->phys_page, pmap_flags, NULL);
+                       if (refmod_state & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(p, FALSE);
+                       }
+               }
+               
+               if (p->dirty == FALSE && p->precious == FALSE) {
+                       /*
+                        * Clean and non-precious page.
+                        */
+                       vm_page_unlock_queues();
+                       VM_PAGE_FREE(p);
+
+                       KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END, object, local_freed, 2, 0, 0);
+                       continue;
+               }
+
+               if (p->laundry) {
+                       p->pageout = FALSE;
+                       vm_pageout_steal_laundry(p, TRUE);
+               }
+
+               vm_page_queues_remove(p);
+               vm_page_unlock_queues();
+
+
                /*
-                * see if page is already in the process of
-                * being cleaned... if so, leave it alone
+                * In case the compressor fails to compress this page, we need it at
+                * the back of the object memq so that we don't keep trying to process it.
+                * Make the move here while we have the object lock held.
                 */
-               if (!p->laundry) {
 
-                       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+               queue_remove(&object->memq, p, vm_page_t, listq);
+               queue_enter(&object->memq, p, vm_page_t, listq);
 
-                               if (VM_PAGE_Q_THROTTLED(iq)) {
-                                       
-                                       iq->pgo_draining = TRUE;
-                                       
-                                       assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
-                                       vm_page_unlock_queues();
-                                       vm_object_unlock(object);
-                                       
-                                       thread_block(THREAD_CONTINUE_NULL);
+               /*
+                * Grab an activity_in_progress here for vm_pageout_compress_page() to consume.
+                *
+                * Mark the page busy so no one messes with it while we have the object lock dropped.
+                */
 
-                                       vm_object_lock(object);
-                                       goto ReScan;
-                               }
+               p->busy = TRUE;
 
-                               if (p->fictitious || p->busy ) {
-                                       vm_page_unlock_queues();
-                                       continue;
-                               }
-                               
-                               if (p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) {
-                                       vm_page_unlock_queues();
-                                       continue;
-                               }
-                               
-                               if (p->cleaning) {
-                                       p->pageout = TRUE;
-                                       vm_page_unlock_queues();
-                                       continue;
-                               }
+               vm_object_activity_begin(object);
 
-                               if (p->pmapped == TRUE) {
-                                       int refmod_state;
-                                       refmod_state = pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
-                                       if (refmod_state & VM_MEM_MODIFIED) {
-                                               SET_PAGE_DIRTY(p, FALSE);
-                                       }
-                               }
+               vm_object_unlock(object);
+
+               /*
+                * arg3 == FALSE  tells vm_pageout_compress_page that we don't hold the object lock and the pager may not be initialized.
+                */
+               if (vm_pageout_compress_page(&freezer_chead, freezer_compressor_scratch_buf, p, FALSE) == KERN_SUCCESS) {
+                       /*
+                        * page has already been un-tabled from the object via 'vm_page_remove'
+                        */
+                       p->pageq.next = (queue_entry_t)local_freeq;
+                       local_freeq = p;
+                       local_freed++;
+
+                       if (local_freed >= MAX_FREE_BATCH) {
+               
+                               vm_page_free_list(local_freeq, TRUE);
                                
-                               if (p->dirty == FALSE) {
-                                       vm_page_unlock_queues();
-                                       VM_PAGE_FREE(p);
-                                       continue;
-                               }
+                               local_freeq = NULL;
+                               local_freed = 0;
                        }
+                       c_freezer_compression_count++;
+               }
+               KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END, object, local_freed, 0, 0, 0);
+
+               if (local_freed == 0 && c_freezer_should_yield()) {
 
-                       VM_PAGE_QUEUES_REMOVE(p);
-                       vm_pageout_cluster(p, set_pageout_bit);
+                       thread_yield_internal(FREEZER_DUTY_CYCLE_OFF_MS);
+                       clock_get_uptime(&c_freezer_last_yield_ts);
                }
-               vm_page_unlock_queues();
+
+               vm_object_lock(object);
        }
 
+       if (local_freeq) {
+               vm_page_free_list(local_freeq, TRUE);
+                               
+               local_freeq = NULL;
+               local_freed = 0;
+       }
+       
+       vm_object_activity_end(object);
+
        vm_object_unlock(object);
+
+       if (c_freezer_should_yield()) {
+
+               thread_yield_internal(FREEZER_DUTY_CYCLE_OFF_MS);
+               clock_get_uptime(&c_freezer_last_yield_ts);
+       }
 }
 
 kern_return_t
@@ -8650,6 +8828,148 @@ vm_object_pagein(
 #endif /* CONFIG_FREEZE */
 
 
+void
+vm_object_pageout(
+       vm_object_t object)
+{
+       vm_page_t                       p, next;
+       struct  vm_pageout_queue        *iq;
+       boolean_t                       need_unlock = TRUE;
+
+       iq = &vm_pageout_queue_internal;
+       
+       assert(object != VM_OBJECT_NULL );
+       assert(!DEFAULT_PAGER_IS_ACTIVE && !DEFAULT_FREEZER_IS_ACTIVE);
+       
+       vm_object_lock(object);
+
+       if (!object->internal ||
+           object->terminating ||
+           !object->alive) {
+               vm_object_unlock(object);
+               return;
+       }
+
+       if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL)  {
+                       
+               if (!object->pager_initialized) {
+
+                       vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
+
+                       if (!object->pager_initialized)
+                               vm_object_compressor_pager_create(object);
+               }
+
+               if (!object->pager_initialized || object->pager == MEMORY_OBJECT_NULL)  {
+                       vm_object_unlock(object);
+                       return;
+               }
+       }
+                       
+ReScan:        
+       next = (vm_page_t)queue_first(&object->memq);
+
+       while (!queue_end(&object->memq, (queue_entry_t)next)) {
+               p = next;
+               next = (vm_page_t)queue_next(&next->listq);
+               
+               if (!(p->active || p->inactive || p->speculative) ||
+                   p->encrypted_cleaning ||
+                   p->cleaning ||
+                   p->laundry ||
+                   p->pageout ||
+                   p->busy ||
+                   p->absent ||
+                   p->error ||
+                   p->fictitious ||
+                   VM_PAGE_WIRED(p)) {
+                       /*
+                        * Page is already being cleaned or can't be cleaned.
+                        */
+                       continue;
+               }
+
+               /* Throw to the pageout queue */
+
+               vm_page_lockspin_queues();
+               need_unlock = TRUE;
+
+               if (vm_compressor_low_on_space()) {
+                       vm_page_unlock_queues();
+                       break;          
+               }
+
+               if (VM_PAGE_Q_THROTTLED(iq)) {
+                                       
+                       iq->pgo_draining = TRUE;
+                                       
+                       assert_wait((event_t) (&iq->pgo_laundry + 1),
+                                   THREAD_INTERRUPTIBLE);
+                       vm_page_unlock_queues();
+                       vm_object_unlock(object);
+                                       
+                       thread_block(THREAD_CONTINUE_NULL);
+
+                       vm_object_lock(object);
+                       goto ReScan;
+               }
+
+               assert(!p->fictitious);
+               assert(!p->busy);
+               assert(!p->absent);
+               assert(!p->unusual);
+               assert(!p->error);
+               assert(!VM_PAGE_WIRED(p));
+               assert(!p->cleaning);
+
+               if (p->pmapped == TRUE) {
+                       int refmod_state;
+                       int pmap_options;
+
+                       pmap_options = 0;
+                       if (COMPRESSED_PAGER_IS_ACTIVE ||
+                           DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+                               /*
+                                * Tell pmap the page should be accounted
+                                * for as "compressed" if it's been modified.
+                                */
+                               pmap_options =
+                                       PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                               if (p->dirty || p->precious) {
+                                       /*
+                                        * We already know it's been modified,
+                                        * so tell pmap to account for it
+                                        * as "compressed".
+                                        */
+                                       pmap_options = PMAP_OPTIONS_COMPRESSOR;
+                               }
+                       }
+                       refmod_state = pmap_disconnect_options(p->phys_page,
+                                                              pmap_options,
+                                                              NULL);
+                       if (refmod_state & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(p, FALSE);
+                       }
+               }
+
+               if (!p->dirty && !p->precious) {
+                       vm_page_unlock_queues();
+                       VM_PAGE_FREE(p);
+                       continue;
+               }
+
+               vm_page_queues_remove(p);
+               if (vm_pageout_cluster(p, TRUE, FALSE, TRUE))
+                       need_unlock = FALSE;
+
+               if (need_unlock == TRUE)
+                       vm_page_unlock_queues();
+       }
+
+       vm_object_unlock(object);
+}
+
+
 #if CONFIG_IOSCHED
 void
 vm_page_request_reprioritize(vm_object_t o, uint64_t blkno, uint32_t len, int prio)
@@ -8660,9 +8980,14 @@ vm_page_request_reprioritize(vm_object_t o, uint64_t blkno, uint32_t len, int pr
        if(vnode_pager_get_object_devvp(o->pager, (uintptr_t *)&devvp) != KERN_SUCCESS)
                return;
        
-       /* Create the request for I/O reprioritization */
-       req = (io_reprioritize_req_t)zalloc(io_reprioritize_req_zone);
-       assert(req != NULL);
+       /*
+        * Create the request for I/O reprioritization.
+        * We use the noblock variant of zalloc because we're holding the object
+        * lock here and we could cause a deadlock in low memory conditions.
+        */
+       req = (io_reprioritize_req_t)zalloc_noblock(io_reprioritize_req_zone);
+       if (req == NULL)
+               return;
        req->blkno = blkno;
        req->len = len;
        req->priority = prio;
index 9462329d78db0da0597730cb492d7dfd6cf52c25..94537ded9ab2117c6bd673e83839aecec456bd10 100644 (file)
@@ -368,7 +368,10 @@ struct vm_object {
                io_tracking:1,
                __object2_unused_bits:7;        /* for expansion */
 
-       uint32_t                scan_collisions;
+       uint8_t                 scan_collisions;
+        vm_tag_t               wire_tag;
+       uint8_t                 __object4_unused_bits[2];
+
 #if CONFIG_PHANTOM_CACHE
        uint32_t                phantom_object_id;
 #endif
@@ -402,34 +405,6 @@ struct vm_object {
         ((object)->purgable == VM_PURGABLE_VOLATILE ||                 \
          (object)->purgable == VM_PURGABLE_EMPTY))
 
-#define VM_PAGE_REMOVE(page)                                           \
-       MACRO_BEGIN                                                     \
-       vm_page_t __page = (page);                                      \
-       vm_object_t __object = __page->object;                          \
-       if (__page == __object->memq_hint) {                            \
-               vm_page_t       __new_hint;                             \
-               queue_entry_t   __qe;                                   \
-               __qe = queue_next(&__page->listq);                      \
-               if (queue_end(&__object->memq, __qe)) {                 \
-                       __qe = queue_prev(&__page->listq);              \
-                       if (queue_end(&__object->memq, __qe)) {         \
-                               __qe = NULL;                            \
-                       }                                               \
-               }                                                       \
-               __new_hint = (vm_page_t) __qe;                          \
-               __object->memq_hint = __new_hint;                       \
-       }                                                               \
-       queue_remove(&__object->memq, __page, vm_page_t, listq);        \
-       MACRO_END
-
-#define VM_PAGE_INSERT(page, object)                           \
-       MACRO_BEGIN                                             \
-       vm_page_t __page = (page);                              \
-       vm_object_t __object = (object);                        \
-       queue_enter(&__object->memq, __page, vm_page_t, listq); \
-       __object->memq_hint = __page;                           \
-       MACRO_END
-
 extern
 vm_object_t    kernel_object;          /* the single kernel object */
 
@@ -480,6 +455,29 @@ extern lck_attr_t          vm_map_lck_attr;
 #define msr_lock(msr)   lck_mtx_lock(&(msr)->msync_req_lock)
 #define msr_unlock(msr) lck_mtx_unlock(&(msr)->msync_req_lock)
 
+#define VM_OBJECT_WIRED(object)                                                \
+    MACRO_BEGIN                                                                \
+    if ((object)->purgable == VM_PURGABLE_DENY)                                \
+    {                                                                  \
+       lck_spin_lock(&vm_objects_wired_lock);                          \
+       assert(!(object)->objq.next);                                   \
+       queue_enter(&vm_objects_wired, (object), vm_object_t, objq);    \
+       lck_spin_unlock(&vm_objects_wired_lock);                        \
+    }                                                                  \
+    MACRO_END
+
+#define VM_OBJECT_UNWIRED(object)                                       \
+    MACRO_BEGIN                                                                 \
+    (object)->wire_tag = VM_KERN_MEMORY_NONE;                           \
+    if (((object)->purgable == VM_PURGABLE_DENY) && (object)->objq.next) \
+    {                                                                   \
+       lck_spin_lock(&vm_objects_wired_lock);                           \
+       queue_remove(&vm_objects_wired, (object), vm_object_t, objq);    \
+       lck_spin_unlock(&vm_objects_wired_lock);                         \
+    }                                                                   \
+    MACRO_END
+
+
 /*
  *     Declare procedures that operate on VM objects.
  */
@@ -492,8 +490,7 @@ __private_extern__ void             vm_object_init_lck_grp(void);
 
 __private_extern__ void                vm_object_reaper_init(void);
 
-__private_extern__ vm_object_t vm_object_allocate(
-                                       vm_object_size_t        size);
+__private_extern__ vm_object_t vm_object_allocate(vm_object_size_t size);
 
 __private_extern__ void    _vm_object_allocate(vm_object_size_t size,
                            vm_object_t object);
@@ -590,7 +587,9 @@ __private_extern__ void             vm_object_deactivate_pages(
                                        vm_object_offset_t      offset,
                                        vm_object_size_t        size,
                                        boolean_t               kill_page,
-                                       boolean_t               reusable_page);
+                                       boolean_t               reusable_page,
+                                       struct pmap             *pmap,
+                                       vm_map_offset_t         pmap_offset);
 
 __private_extern__ void        vm_object_reuse_pages(
        vm_object_t             object,
@@ -687,7 +686,7 @@ __private_extern__ kern_return_t vm_object_upl_request(
                                upl_t                   *upl,
                                upl_page_info_t         *page_info,
                                unsigned int            *count,
-                               int                     flags);
+                               upl_control_flags_t     flags);
 
 __private_extern__ kern_return_t vm_object_transpose(
                                vm_object_t             object1,
@@ -798,14 +797,22 @@ vm_object_pack_pages(
        struct default_freezer_handle *df_handle);
 
 __private_extern__ void
-vm_object_pageout(
+vm_object_compressed_freezer_pageout(
        vm_object_t     object);
 
+__private_extern__ void
+vm_object_compressed_freezer_done(
+       void);
+
 __private_extern__  kern_return_t
 vm_object_pagein(
        vm_object_t     object);
 #endif /* CONFIG_FREEZE */
 
+__private_extern__ void
+vm_object_pageout(
+       vm_object_t     object);
+
 #if CONFIG_IOSCHED
 struct io_reprioritize_req {
        uint64_t        blkno;
@@ -1046,10 +1053,13 @@ extern boolean_t        vm_object_lock_try_shared(vm_object_t);
        lck_rw_assert(&(object)->Lock, LCK_RW_ASSERT_SHARED)
 #define vm_object_lock_assert_exclusive(object) \
        lck_rw_assert(&(object)->Lock, LCK_RW_ASSERT_EXCLUSIVE)
+#define vm_object_lock_assert_notheld(object) \
+       lck_rw_assert(&(object)->Lock, LCK_RW_ASSERT_NOTHELD)
 #else  /* MACH_ASSERT || DEBUG */ 
 #define vm_object_lock_assert_held(object)
 #define vm_object_lock_assert_shared(object)
 #define vm_object_lock_assert_exclusive(object)
+#define vm_object_lock_assert_notheld(object)
 #endif /* MACH_ASSERT || DEBUG */
 
 #define vm_object_round_page(x) (((vm_object_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK))
index 9c3d9f0aee9bfc86e4585c1adf803c18f329a359..ff8b1e0db5d27171deb05767eee97512da673f91 100644 (file)
@@ -206,7 +206,7 @@ struct vm_page {
         *
         * we use the 'wire_count' field to store the local
         * queue id if local queues are enabled...
-        * see the comments at 'VM_PAGE_QUEUES_REMOVE' as to
+        * see the comments at 'vm_page_queues_remove' as to
         * why this is safe to do
         */
 #define local_id wire_count
@@ -441,6 +441,12 @@ queue_head_t       vm_page_queue_anonymous;        /* inactive memory queue for anonymous pag
 extern
 queue_head_t   vm_page_queue_throttled;        /* memory queue for throttled pageout pages */
 
+extern
+queue_head_t   vm_objects_wired;
+extern
+lck_spin_t     vm_objects_wired_lock;
+
+
 extern
 vm_offset_t    first_phys_addr;        /* physical address for first_page */
 extern
@@ -492,6 +498,9 @@ extern
 unsigned int   vm_page_throttle_count; /* Count of page allocations throttled */
 extern
 unsigned int   vm_page_gobble_count;
+extern
+unsigned int   vm_page_stolen_count;   /* Count of stolen pages not acccounted in zones */
+
 
 #if DEVELOPMENT || DEBUG
 extern
@@ -552,6 +561,10 @@ extern void                vm_page_create(
                                        ppnum_t         start,
                                        ppnum_t         end);
 
+extern vm_page_t       kdp_vm_page_lookup(
+                                       vm_object_t             object,
+                                       vm_object_offset_t      offset);
+
 extern vm_page_t       vm_page_lookup(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
@@ -581,10 +594,6 @@ extern vm_page_t   vm_page_alloc(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
-extern vm_page_t       vm_page_alloclo(
-                                       vm_object_t             object,
-                                       vm_object_offset_t      offset);
-
 extern vm_page_t       vm_page_alloc_guard(
        vm_object_t             object,
        vm_object_offset_t      offset);
@@ -638,13 +647,22 @@ extern void               vm_page_insert(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
+extern void            vm_page_insert_wired(
+                                       vm_page_t               page,
+                                       vm_object_t             object,
+                                       vm_object_offset_t      offset,
+                                       vm_tag_t                tag);
+
 extern void            vm_page_insert_internal(
                                        vm_page_t               page,
                                        vm_object_t             object,
                                        vm_object_offset_t      offset,
+                                       vm_tag_t                tag,
                                        boolean_t               queues_lock_held,
                                        boolean_t               insert_in_hash,
-                                       boolean_t               batch_pmap_op);
+                                       boolean_t               batch_pmap_op,
+                                       boolean_t               delayed_accounting,
+                                       uint64_t                *delayed_ledger_update);
 
 extern void            vm_page_replace(
                                        vm_page_t               mem,
@@ -675,7 +693,9 @@ extern void         vm_page_part_copy(
                                        vm_size_t       len);
 
 extern void            vm_page_wire(
-                                       vm_page_t       page);
+                                       vm_page_t       page,
+                                       vm_tag_t        tag,
+                                       boolean_t       check_memorystatus);
 
 extern void            vm_page_unwire(
                                        vm_page_t       page,
@@ -690,6 +710,12 @@ extern void                vm_page_validate_cs(vm_page_t   page);
 extern void            vm_page_validate_cs_mapped(
        vm_page_t       page,
        const void      *kaddr);
+extern void            vm_page_validate_cs_mapped_chunk(
+       vm_page_t       page,
+       const void      *kaddr,
+       vm_offset_t     chunk_offset,
+       boolean_t       *validated,
+       unsigned        *tainted);
 
 extern void            vm_page_free_prepare_queues(
                                        vm_page_t       page);
@@ -807,170 +833,6 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 #define VM_PAGE_QUEUES_ASSERT(mem, val)
 #endif
 
-
-/*
- * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
- * local queues if they exist... its the only spot in the system where we add pages
- * to those queues...  once on those queues, those pages can only move to one of the
- * global page queues or the free queues... they NEVER move from local q to local q.
- * the 'local' state is stable when VM_PAGE_QUEUES_REMOVE is called since we're behind
- * the global vm_page_queue_lock at this point...  we still need to take the local lock
- * in case this operation is being run on a different CPU then the local queue's identity,
- * but we don't have to worry about the page moving to a global queue or becoming wired
- * while we're grabbing the local lock since those operations would require the global
- * vm_page_queue_lock to be held, and we already own it.
- *
- * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
- * 'wired' and local are ALWAYS mutually exclusive conditions.
- */
-
-#define VM_PAGE_QUEUES_REMOVE(mem)                             \
-       MACRO_BEGIN                                             \
-       boolean_t       was_pageable;                           \
-                                                               \
-       VM_PAGE_QUEUES_ASSERT(mem, 1);                          \
-       assert(!mem->pageout_queue);                            \
-/*                                                             \
- *     if (mem->pageout_queue)                                 \
- *             NOTE: VM_PAGE_QUEUES_REMOVE does not deal with removing pages from the pageout queue... \
- *             the caller is responsible for determing if the page is on that queue, and if so, must   \
- *             either first remove it (it needs both the page queues lock and the object lock to do    \
- *             this via vm_pageout_steal_laundry), or avoid the call to VM_PAGE_QUEUES_REMOVE          \
- */                                                            \
-       if (mem->local) {                                       \
-               struct vpl      *lq;                            \
-               assert(mem->object != kernel_object);           \
-               assert(mem->object != compressor_object);       \
-               assert(!mem->inactive && !mem->speculative);    \
-               assert(!mem->active && !mem->throttled);        \
-               assert(!mem->clean_queue);                      \
-               assert(!mem->fictitious);                       \
-               lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;        \
-               VPL_LOCK(&lq->vpl_lock);                        \
-               queue_remove(&lq->vpl_queue,                    \
-                            mem, vm_page_t, pageq);            \
-               mem->local = FALSE;                             \
-               mem->local_id = 0;                              \
-               lq->vpl_count--;                                \
-               if (mem->object->internal) {                    \
-                       lq->vpl_internal_count--;               \
-               } else {                                        \
-                       lq->vpl_external_count--;               \
-               }                                               \
-               VPL_UNLOCK(&lq->vpl_lock);                      \
-               was_pageable = FALSE;                           \
-       }                                                       \
-                                                               \
-       else if (mem->active) {                                 \
-               assert(mem->object != kernel_object);           \
-               assert(mem->object != compressor_object);       \
-               assert(!mem->inactive && !mem->speculative);    \
-               assert(!mem->clean_queue);                      \
-               assert(!mem->throttled);                        \
-               assert(!mem->fictitious);                       \
-               queue_remove(&vm_page_queue_active,             \
-                       mem, vm_page_t, pageq);                 \
-               mem->active = FALSE;                            \
-               vm_page_active_count--;                         \
-               was_pageable = TRUE;                            \
-       }                                                       \
-                                                               \
-       else if (mem->inactive) {                               \
-               assert(mem->object != kernel_object);           \
-               assert(mem->object != compressor_object);       \
-               assert(!mem->active && !mem->speculative);      \
-               assert(!mem->throttled);                        \
-               assert(!mem->fictitious);                       \
-               vm_page_inactive_count--;                       \
-               if (mem->clean_queue) {                         \
-                       queue_remove(&vm_page_queue_cleaned,    \
-                        mem, vm_page_t, pageq);                        \
-                       mem->clean_queue = FALSE;               \
-                       vm_page_cleaned_count--;                \
-               } else {                                        \
-                       if (mem->object->internal) {            \
-                               queue_remove(&vm_page_queue_anonymous,  \
-                               mem, vm_page_t, pageq);         \
-                               vm_page_anonymous_count--;      \
-                       } else {                                \
-                               queue_remove(&vm_page_queue_inactive,   \
-                               mem, vm_page_t, pageq);         \
-                       }                                       \
-                       vm_purgeable_q_advance_all();           \
-               }                                               \
-               mem->inactive = FALSE;                          \
-               was_pageable = TRUE;                            \
-       }                                                       \
-                                                               \
-       else if (mem->throttled) {                              \
-               assert(mem->object != compressor_object);       \
-               assert(!mem->active && !mem->inactive);         \
-               assert(!mem->speculative);                      \
-               assert(!mem->fictitious);                       \
-               queue_remove(&vm_page_queue_throttled,          \
-                            mem, vm_page_t, pageq);            \
-               mem->throttled = FALSE;                         \
-               vm_page_throttled_count--;                      \
-               was_pageable = FALSE;                           \
-       }                                                       \
-                                                               \
-       else if (mem->speculative) {                            \
-               assert(mem->object != compressor_object);       \
-               assert(!mem->active && !mem->inactive);         \
-               assert(!mem->throttled);                        \
-               assert(!mem->fictitious);                       \
-                remque(&mem->pageq);                           \
-               mem->speculative = FALSE;                       \
-               vm_page_speculative_count--;                    \
-               was_pageable = TRUE;                            \
-       }                                                       \
-                                                               \
-       else if (mem->pageq.next || mem->pageq.prev) {          \
-               was_pageable = FALSE;                           \
-               panic("VM_PAGE_QUEUES_REMOVE: unmarked page on Q");     \
-       } else {                                                \
-               was_pageable = FALSE;                           \
-       }                                                       \
-                                                               \
-       mem->pageq.next = NULL;                                 \
-       mem->pageq.prev = NULL;                                 \
-       VM_PAGE_QUEUES_ASSERT(mem, 0);                          \
-       if (was_pageable) {                                     \
-               if (mem->object->internal) {                    \
-                       vm_page_pageable_internal_count--;      \
-               } else {                                        \
-                       vm_page_pageable_external_count--;      \
-               }                                               \
-       }                                                       \
-       MACRO_END
-
-
-#define VM_PAGE_ENQUEUE_INACTIVE(mem, first)                   \
-       MACRO_BEGIN                                             \
-       VM_PAGE_QUEUES_ASSERT(mem, 0);                          \
-       assert(!mem->fictitious);                               \
-       assert(!mem->laundry);                                  \
-       assert(!mem->pageout_queue);                            \
-       if (mem->object->internal) {                            \
-               if (first == TRUE)                              \
-                       queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);     \
-               else                                            \
-                       queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);           \
-               vm_page_anonymous_count++;                      \
-               vm_page_pageable_internal_count++;              \
-       } else {                                                \
-               if (first == TRUE)                              \
-                       queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq); \
-               else                                            \
-                       queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);    \
-               vm_page_pageable_external_count++;                      \
-       }                                                       \
-       mem->inactive = TRUE;                                   \
-       vm_page_inactive_count++;                               \
-       token_new_pagecount++;                                  \
-       MACRO_END
-
-
 #if DEVELOPMENT || DEBUG
 #define VM_PAGE_SPECULATIVE_USED_ADD()                         \
        MACRO_BEGIN                                             \
@@ -1005,6 +867,12 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
        }                                                       \
        MACRO_END
 
+/* adjust for stolen pages accounted elsewhere */
+#define VM_PAGE_MOVE_STOLEN(page_count)                                \
+       MACRO_BEGIN                                             \
+       vm_page_stolen_count -= (page_count);                   \
+       vm_page_wire_count_initial -= (page_count);             \
+       MACRO_END
        
 #define DW_vm_page_unwire              0x01
 #define DW_vm_page_wire                        0x02
@@ -1028,7 +896,7 @@ struct vm_page_delayed_work {
        int             dw_mask;
 };
 
-void vm_page_do_delayed_work(vm_object_t object, struct vm_page_delayed_work *dwp, int dw_count);
+void vm_page_do_delayed_work(vm_object_t object, vm_tag_t tag, struct vm_page_delayed_work *dwp, int dw_count);
 
 extern unsigned int vm_max_delayed_work_limit;
 
@@ -1063,4 +931,10 @@ extern vm_page_t vm_object_page_grab(vm_object_t);
 extern void vm_page_buckets_check(void);
 #endif /* VM_PAGE_BUCKETS_CHECK */
 
+extern void vm_page_queues_remove(vm_page_t mem);
+extern void vm_page_remove_internal(vm_page_t page);
+extern void vm_page_enqueue_inactive(vm_page_t mem, boolean_t first);
+extern void vm_page_check_pageable_safe(vm_page_t page);
+
+
 #endif /* _VM_VM_PAGE_H_ */
index d2fb0aaf7f820258be9564e0f9b4b9b65b839da6..53185086d7953452ff30ebf63f47b74dfa052a23 100644 (file)
@@ -269,7 +269,7 @@ unsigned int        vm_page_speculative_percentage = 5;
 #ifndef        VM_PAGE_REACTIVATE_LIMIT
 #define        VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
 #endif /* VM_PAGE_REACTIVATE_LIMIT */
-#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM      100
+#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM      1000
 
 
 extern boolean_t hibernate_cleaning_in_progress;
@@ -290,7 +290,14 @@ struct cq {
        struct vm_pageout_queue *q;
        void                    *current_chead;
        char                    *scratch_buf;
+       int                     id;
 };
+#define MAX_COMPRESSOR_THREAD_COUNT    8
+
+struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
+
+void   *vm_pageout_immediate_chead;
+char   *vm_pageout_immediate_scratch_buf;
 
 
 #if VM_PRESSURE_EVENTS
@@ -311,6 +318,11 @@ static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_
 extern void vm_pageout_continue(void);
 extern void vm_pageout_scan(void);
 
+static void    vm_pageout_immediate(vm_page_t, boolean_t);
+boolean_t      vm_compressor_immediate_preferred = FALSE;
+boolean_t      vm_compressor_immediate_preferred_override = FALSE;
+boolean_t      vm_restricted_to_single_processor = FALSE;
+
 static thread_t        vm_pageout_external_iothread = THREAD_NULL;
 static thread_t        vm_pageout_internal_iothread = THREAD_NULL;
 
@@ -373,7 +385,7 @@ unsigned int vm_pageout_inactive_dirty_external = 0;        /* debugging */
 unsigned int vm_pageout_inactive_deactivated = 0;      /* debugging */
 unsigned int vm_pageout_inactive_anonymous = 0;        /* debugging */
 unsigned int vm_pageout_dirty_no_pager = 0;    /* debugging */
-unsigned int vm_pageout_purged_objects = 0;    /* debugging */
+unsigned int vm_pageout_purged_objects = 0;    /* used for sysctl vm stats */
 unsigned int vm_stat_discard = 0;              /* debugging */
 unsigned int vm_stat_discard_sent = 0;         /* debugging */
 unsigned int vm_stat_discard_failure = 0;      /* debugging */
@@ -394,6 +406,8 @@ unsigned int vm_pageout_scan_deadlock_detected = 0;         /* debugging */
 unsigned int vm_pageout_scan_active_throttle_success = 0;      /* debugging */
 unsigned int vm_pageout_scan_inactive_throttle_success = 0;    /* debugging */
 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0;     /* debugging */
+unsigned int vm_pageout_scan_throttle_deferred = 0;            /* debugging */
+unsigned int vm_pageout_scan_yield_unthrottled = 0;            /* debugging */
 unsigned int vm_page_speculative_count_drifts = 0;
 unsigned int vm_page_speculative_count_drift_max = 0;
 
@@ -442,8 +456,6 @@ extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
 extern void memorystatus_on_pageout_scan_end(void);
 #endif
 
-boolean_t      vm_page_compressions_failing = FALSE;
-
 /*
  *     Routine:        vm_backing_store_disable
  *     Purpose:
@@ -679,7 +691,7 @@ vm_pageout_object_terminate(
  *             must be locked.
  *
  */
-void
+static void
 vm_pageclean_setup(
        vm_page_t               m,
        vm_page_t               new_m,
@@ -717,10 +729,10 @@ vm_pageclean_setup(
        new_m->phys_page = m->phys_page;
 
        vm_page_lockspin_queues();
-       vm_page_wire(new_m);
+       vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
        vm_page_unlock_queues();
 
-       vm_page_insert(new_m, new_object, new_offset);
+       vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
        assert(!new_m->wanted);
        new_m->busy = FALSE;
 }
@@ -843,8 +855,8 @@ struct {
  * The page must not be on any pageout queue.
  */
 
-void
-vm_pageout_cluster(vm_page_t m, boolean_t pageout)
+int
+vm_pageout_cluster(vm_page_t m, boolean_t pageout, boolean_t immediate_ok, boolean_t keep_object_locked)
 {
        vm_object_t     object = m->object;
         struct         vm_pageout_queue *q;
@@ -878,9 +890,19 @@ vm_pageout_cluster(vm_page_t m, boolean_t pageout)
        m->pageout = pageout;
 
        if (object->internal == TRUE) {
-               if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
+               if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
                        m->busy = TRUE;
 
+                       if (vm_compressor_immediate_preferred == TRUE && immediate_ok == TRUE) {
+                               if (keep_object_locked == FALSE)
+                                       vm_object_unlock(object);
+                               vm_page_unlock_queues();
+
+                               vm_pageout_immediate(m, keep_object_locked);
+
+                               return (1);
+                       }
+               }
                q = &vm_pageout_queue_internal;
        } else
                q = &vm_pageout_queue_external;
@@ -899,6 +921,8 @@ vm_pageout_cluster(vm_page_t m, boolean_t pageout)
                thread_wakeup((event_t) &q->pgo_pending);
        }
        VM_PAGE_CHECK(m);
+
+       return (0);
 }
 
 
@@ -1150,6 +1174,231 @@ mach_vm_pressure_monitor(
 
 
 
+static void
+vm_pageout_page_queue(queue_head_t *, int);
+
+/*
+ * condition variable used to make sure there is
+ * only a single sweep going on at a time
+ */
+boolean_t      vm_pageout_anonymous_pages_active = FALSE;
+
+
+void
+vm_pageout_anonymous_pages()
+{
+       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
+
+               vm_page_lock_queues();
+
+               if (vm_pageout_anonymous_pages_active == TRUE) {
+                       vm_page_unlock_queues();
+                       return;
+               }
+               vm_pageout_anonymous_pages_active = TRUE;
+               vm_page_unlock_queues();
+
+               vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
+               vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
+               vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
+
+               vm_consider_swapping();
+
+               vm_page_lock_queues();
+               vm_pageout_anonymous_pages_active = FALSE;
+               vm_page_unlock_queues();
+       }
+}
+
+
+void
+vm_pageout_page_queue(queue_head_t *q, int qcount)
+{
+       vm_page_t       m;
+       vm_object_t     t_object = NULL;
+       vm_object_t     l_object = NULL;
+       vm_object_t     m_object = NULL;
+       int             delayed_unlock = 0;
+       int             try_failed_count = 0;
+       int             refmod_state;
+       int             pmap_options;
+       struct          vm_pageout_queue *iq;
+
+
+       iq = &vm_pageout_queue_internal;
+       
+       vm_page_lock_queues();
+
+       while (qcount && !queue_empty(q)) {
+
+               lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+
+               if (VM_PAGE_Q_THROTTLED(iq)) {
+
+                       if (l_object != NULL) {
+                               vm_object_unlock(l_object);
+                               l_object = NULL;
+                       }
+                       iq->pgo_draining = TRUE;
+                                       
+                       assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
+                       vm_page_unlock_queues();
+                                       
+                       thread_block(THREAD_CONTINUE_NULL);
+                       
+                       vm_page_lock_queues();
+                       delayed_unlock = 0;
+                       continue;
+               }
+               m = (vm_page_t) queue_first(q);
+               m_object = m->object;
+
+               /*
+                * check to see if we currently are working
+                * with the same object... if so, we've
+                * already got the lock
+                */
+               if (m_object != l_object) {
+                       if ( !m_object->internal) 
+                               goto reenter_pg_on_q;
+
+                       /*
+                        * the object associated with candidate page is 
+                        * different from the one we were just working
+                        * with... dump the lock if we still own it
+                        */
+                       if (l_object != NULL) {
+                               vm_object_unlock(l_object);
+                               l_object = NULL;
+                       }
+                       if (m_object != t_object)
+                               try_failed_count = 0;
+
+                       /*
+                        * Try to lock object; since we've alread got the
+                        * page queues lock, we can only 'try' for this one.
+                        * if the 'try' fails, we need to do a mutex_pause
+                        * to allow the owner of the object lock a chance to
+                        * run... 
+                        */
+                       if ( !vm_object_lock_try_scan(m_object)) {
+
+                               if (try_failed_count > 20) {
+                                       goto reenter_pg_on_q;
+                               }
+                               vm_page_unlock_queues();
+                               mutex_pause(try_failed_count++);
+                               vm_page_lock_queues();
+                               delayed_unlock = 0;
+
+                               t_object = m_object;
+                               continue;
+                       }
+                       l_object = m_object;
+               }
+               if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->pageout) {
+                       /*
+                        * page is not to be cleaned
+                        * put it back on the head of its queue
+                        */
+                       goto reenter_pg_on_q;
+               }
+               if (m->reference == FALSE && m->pmapped == TRUE) {
+                       refmod_state = pmap_get_refmod(m->phys_page);
+                 
+                       if (refmod_state & VM_MEM_REFERENCED)
+                               m->reference = TRUE;
+                       if (refmod_state & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(m, FALSE);
+                       }
+               }
+               if (m->reference == TRUE) {
+                       m->reference = FALSE;
+                       pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
+                       goto reenter_pg_on_q;
+               }
+               if (m->pmapped == TRUE) {
+                       if (m->dirty || m->precious) {
+                               pmap_options = PMAP_OPTIONS_COMPRESSOR;
+                       } else {
+                               pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                       }
+                       refmod_state = pmap_disconnect_options(m->phys_page, pmap_options, NULL);
+                       if (refmod_state & VM_MEM_MODIFIED) {
+                               SET_PAGE_DIRTY(m, FALSE);
+                       }
+               }
+               if ( !m->dirty && !m->precious) {
+                       vm_page_unlock_queues();
+                       VM_PAGE_FREE(m);
+                       vm_page_lock_queues();
+                       delayed_unlock = 0;
+
+                       goto next_pg;
+               }
+               if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)  {
+                       
+                       if (!m_object->pager_initialized) {
+
+                               vm_page_unlock_queues();
+
+                               vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
+
+                               if (!m_object->pager_initialized)
+                                       vm_object_compressor_pager_create(m_object);
+
+                               vm_page_lock_queues();
+                               delayed_unlock = 0;
+                       }
+                       if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
+                               goto reenter_pg_on_q;
+                       /*
+                        * vm_object_compressor_pager_create will drop the object lock
+                        * which means 'm' may no longer be valid to use
+                        */
+                       continue;
+               }
+               /*
+                * we've already factored out pages in the laundry which
+                * means this page can't be on the pageout queue so it's
+                * safe to do the vm_page_queues_remove
+                */
+                assert(!m->pageout_queue);
+
+               vm_page_queues_remove(m);
+
+               lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+
+               vm_pageout_cluster(m, TRUE, FALSE, FALSE);
+
+               goto next_pg;
+
+reenter_pg_on_q:
+               queue_remove(q, m, vm_page_t, pageq);
+               queue_enter(q, m, vm_page_t, pageq);
+next_pg:
+               qcount--;
+               try_failed_count = 0;
+
+               if (delayed_unlock++ > 128) {
+
+                       if (l_object != NULL) {
+                               vm_object_unlock(l_object);
+                               l_object = NULL;
+                       }
+                       lck_mtx_yield(&vm_page_queue_lock);
+                       delayed_unlock = 0;
+               }
+       }
+       if (l_object != NULL) {
+               vm_object_unlock(l_object);
+               l_object = NULL;
+       }
+       vm_page_unlock_queues();
+}
+
+
+
 /*
  * function in BSD to apply I/O throttle to the pageout thread
  */
@@ -1244,12 +1493,14 @@ vm_pageout_scan(void)
        int             cache_evict_throttle = 0;
        uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
        int             force_purge = 0;
+#define        DELAY_SPECULATIVE_AGE   1000
+       int             delay_speculative_age = 0;
 
 #if VM_PRESSURE_EVENTS
        vm_pressure_level_t pressure_level;
 #endif /* VM_PRESSURE_EVENTS */
 
-       VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
+       VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
                       vm_pageout_speculative_clean, vm_pageout_inactive_clean,
                       vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
 
@@ -1335,10 +1586,8 @@ Restart:
 
                DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
 
-               if (delayed_unlock == 0) {
-                       vm_page_lock_queues();
-                       delayed_unlock = 1;
-               }
+               assert(delayed_unlock);
+
                if (vm_upl_wait_for_pages < 0)
                        vm_upl_wait_for_pages = 0;
 
@@ -1456,8 +1705,9 @@ done_moving_active_pages:
                        }
                        vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 
+                       vm_page_unlock_queues();
+
                        if (local_freeq) {
-                               vm_page_unlock_queues();
                                        
                                VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
                                               vm_page_free_count, local_freed, delayed_unlock_limit, 2);
@@ -1469,8 +1719,11 @@ done_moving_active_pages:
 
                                local_freeq = NULL;
                                local_freed = 0;
-                               vm_page_lock_queues();
                        }
+                       vm_consider_waking_compactor_swapper();
+
+                       vm_page_lock_queues();
+
                        /*
                         * make sure the pageout I/O threads are running
                         * throttled in case there are still requests 
@@ -1506,9 +1759,9 @@ done_moving_active_pages:
 return_from_scan:
                                assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
 
-                               VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
+                               VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
                                               vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
-                               VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
+                               VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
                                               vm_pageout_speculative_clean, vm_pageout_inactive_clean,
                                               vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
 
@@ -1554,7 +1807,7 @@ return_from_scan:
 
                        VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
                        if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
-
+                               vm_pageout_purged_objects++;
                                VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
                                memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
                                continue;
@@ -1570,7 +1823,6 @@ return_from_scan:
                         * this mechanism works
                         */
                        struct vm_speculative_age_q     *aq;
-                       mach_timespec_t ts_fully_aged;
                        boolean_t       can_steal = FALSE;
                        int num_scanned_queues;
                       
@@ -1616,23 +1868,33 @@ return_from_scan:
                        if (vm_page_speculative_count > vm_page_speculative_target)
                                can_steal = TRUE;
                        else {
-                               ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
-                               ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
-                                                     * 1000 * NSEC_PER_USEC;
+                               if (!delay_speculative_age) {
+                                       mach_timespec_t ts_fully_aged;
 
-                               ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
+                                       ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
+                                       ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
+                                               * 1000 * NSEC_PER_USEC;
 
-                               clock_sec_t sec;
-                               clock_nsec_t nsec;
-                               clock_get_system_nanotime(&sec, &nsec);
-                               ts.tv_sec = (unsigned int) sec;
-                               ts.tv_nsec = nsec;
+                                       ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
+
+                                       clock_sec_t sec;
+                                       clock_nsec_t nsec;
+                                       clock_get_system_nanotime(&sec, &nsec);
+                                       ts.tv_sec = (unsigned int) sec;
+                                       ts.tv_nsec = nsec;
 
-                               if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
-                                       can_steal = TRUE;
+                                       if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
+                                               can_steal = TRUE;
+                                       else
+                                               delay_speculative_age++;
+                               } else {
+                                       delay_speculative_age++;
+                                       if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
+                                               delay_speculative_age = 0;
+                               }
                        }
                        if (can_steal == TRUE)
-                               vm_page_speculate_ageit(aq);
+                               vm_page_speculate_ageit(aq);
                }
                if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
                        int     pages_evicted;
@@ -1663,8 +1925,11 @@ return_from_scan:
                if  (cache_evict_throttle)
                        cache_evict_throttle--;
 
+#if CONFIG_JETSAM
                /*
-                * don't let the filecache_min fall below 33% of available memory...
+                * don't let the filecache_min fall below 15% of available memory
+                * on systems with an active compressor that isn't nearing its
+                * limits w/r to accepting new data
                 *
                 * on systems w/o the compressor/swapper, the filecache is always
                 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
@@ -1672,7 +1937,16 @@ return_from_scan:
                 * throttled queue (which isn't counted as available) which
                 * effectively disables this filter
                 */
+               if (vm_compressor_low_on_space())
+                       vm_page_filecache_min = 0;
+               else
+                       vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
+#else
+                /*
+                * don't let the filecache_min fall below 33% of available memory...
+                */
                vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
+#endif
 
                exceeded_burst_throttle = FALSE;
                /*
@@ -1713,8 +1987,38 @@ return_from_scan:
                        case FCS_IDLE:
                                if ((vm_page_free_count + local_freed) < vm_page_free_target) {
 
+                                       if (object != NULL) {
+                                               vm_object_unlock(object);
+                                               object = NULL;
+                                       }
+                                       vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+
+                                       vm_page_unlock_queues();
+
+                                       if (local_freeq) {
+
+                                               VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+                                                              vm_page_free_count, local_freed, delayed_unlock_limit, 3);
+
+                                               vm_page_free_list(local_freeq, TRUE);
+                                                       
+                                               VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+                                                              vm_page_free_count, local_freed, 0, 3);
+
+                                               local_freeq = NULL;
+                                               local_freed = 0;
+                                       }
+                                       thread_yield_internal(1);
+
+                                       vm_page_lock_queues();
+
+                                       if (!VM_PAGE_Q_THROTTLED(iq)) {
+                                               vm_pageout_scan_yield_unthrottled++;
+                                               continue;
+                                       }
                                        if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) {
                                                anons_grabbed = ANONS_GRABBED_LIMIT;
+                                               vm_pageout_scan_throttle_deferred++;
                                                goto consider_inactive;
                                        }
                                        if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
@@ -1798,8 +2102,7 @@ vm_pageout_scan_delay:
                                local_freeq = NULL;
                                local_freed = 0;
                        }
-                       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
-                               vm_consider_waking_compactor_swapper();
+                       vm_consider_waking_compactor_swapper();
 
                        vm_page_lock_queues();
 
@@ -1880,9 +2183,6 @@ vm_pageout_scan_delay:
                                vm_pageout_scan_throttle++;
                        iq->pgo_throttled = TRUE;
 
-                       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)
-                               vm_consider_waking_compactor_swapper();
-
                        assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
                        counter(c_vm_pageout_scan_block++);
 
@@ -1927,6 +2227,8 @@ consider_inactive:
                 * Choose a victim.
                 */
                while (1) {
+                       uint32_t        inactive_external_count;
+
                        m = NULL;
                        
                        if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
@@ -1956,8 +2258,10 @@ consider_inactive:
                        }
 
                        grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
+                       inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
 
-                       if (vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) {
+                       if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
+                           ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
                                grab_anonymous = TRUE;
                                anons_grabbed = 0;
                        }
@@ -2037,11 +2341,11 @@ consider_inactive:
                /*
                 * we just found this page on one of our queues...
                 * it can't also be on the pageout queue, so safe
-                * to call VM_PAGE_QUEUES_REMOVE
+                * to call vm_page_queues_remove
                 */
                assert(!m->pageout_queue);
 
-               VM_PAGE_QUEUES_REMOVE(m);
+               vm_page_queues_remove(m);
 
                assert(!m->laundry);
                assert(!m->private);
@@ -2104,7 +2408,7 @@ consider_inactive:
                                 * is possible for the value to be a bit non-determistic, but that's ok
                                 * since it's only used as a hint
                                 */
-                               m->object->scan_collisions++;
+                               m->object->scan_collisions = 1;
 
                                if ( !queue_empty(&sq->age_q) )
                                        m_want = (vm_page_t) queue_first(&sq->age_q);
@@ -2176,17 +2480,14 @@ requeue_page:
                        switch (page_prev_state) {
 
                        case PAGE_STATE_SPECULATIVE:
-                               vm_page_speculate(m, FALSE);
-                               break;
-
                        case PAGE_STATE_ANONYMOUS:
                        case PAGE_STATE_CLEAN:
                        case PAGE_STATE_INACTIVE:
-                               VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+                               vm_page_enqueue_inactive(m, FALSE);
                                break;
 
                        case PAGE_STATE_INACTIVE_FIRST:
-                               VM_PAGE_ENQUEUE_INACTIVE(m, TRUE);
+                               vm_page_enqueue_inactive(m, TRUE);
                                break;
                        }
                        goto done_with_inactivepage;
@@ -2494,6 +2795,7 @@ throttle_inactive:
                    (object->purgable == VM_PURGABLE_DENY ||
                     object->purgable == VM_PURGABLE_NONVOLATILE ||
                     object->purgable == VM_PURGABLE_VOLATILE)) {
+                       vm_page_check_pageable_safe(m);
                        queue_enter(&vm_page_queue_throttled, m,
                                    vm_page_t, pageq);
                        m->throttled = TRUE;
@@ -2532,8 +2834,9 @@ throttle_inactive:
                                 * that we can try to find clean pages in the active/inactive queues before
                                 * deciding to jetsam a process
                                 */
-                               vm_pageout_scan_inactive_throttled_external++;                  
+                               vm_pageout_scan_inactive_throttled_external++;
 
+                               vm_page_check_pageable_safe(m);
                                queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
                                m->active = TRUE;
                                vm_page_active_count++;
@@ -2554,7 +2857,7 @@ throttle_inactive:
                                        object = VM_OBJECT_NULL;
                                        vm_page_unlock_queues();
                                        
-                                       VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
+                                       VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
                                               vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
 
                                         /* Kill first suitable process */
@@ -2562,7 +2865,7 @@ throttle_inactive:
                                                panic("vm_pageout_scan: Jetsam request failed\n");      
                                        }
                                        
-                                       VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
+                                       VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
 
                                        vm_pageout_inactive_external_forced_jetsam_count++;
                                        vm_page_lock_queues();  
@@ -2579,7 +2882,7 @@ throttle_inactive:
 
                                vm_pageout_scan_inactive_throttled_internal++;
 
-                               goto requeue_page;
+                               goto must_activate_page;
                        }
                }
 
@@ -2600,22 +2903,50 @@ throttle_inactive:
                 * of likely usage of the page.
                 */
                if (m->pmapped == TRUE) {
+                       int pmap_options;
 
-                       if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE || object->internal == FALSE) {
+                       /*
+                        * Don't count this page as going into the compressor
+                        * if any of these are true:
+                        * 1) We have the dynamic pager i.e. no compressed pager
+                        * 2) Freezer enabled device with a freezer file to
+                        *    hold the app data i.e. no compressed pager
+                        * 3) Freezer enabled device with compressed pager
+                        *    backend (exclusive use) i.e. most of the VM system
+                        *    (including vm_pageout_scan) has no knowledge of
+                        *    the compressor
+                        * 4) This page belongs to a file and hence will not be
+                        *    sent into the compressor
+                        */
+                       if (DEFAULT_PAGER_IS_ACTIVE ||
+                           DEFAULT_FREEZER_IS_ACTIVE ||
+                           DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS ||
+                           object->internal == FALSE) {
+                               pmap_options = 0;
+                       } else if (m->dirty || m->precious) {
                                /*
-                                * Don't count this page as going into the compressor if any of these are true:
-                                * 1) We have the dynamic pager i.e. no compressed pager
-                                * 2) Freezer enabled device with a freezer file to hold the app data i.e. no compressed pager
-                                * 3) Freezer enabled device with compressed pager backend (exclusive use) i.e. most of the VM system
-                                     (including vm_pageout_scan) has no knowledge of the compressor
-                                * 4) This page belongs to a file and hence will not be sent into the compressor
+                                * VM knows that this page is dirty (or
+                                * precious) and needs to be compressed
+                                * rather than freed.
+                                * Tell the pmap layer to count this page
+                                * as "compressed".
                                 */
-
-                               refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL);
+                               pmap_options = PMAP_OPTIONS_COMPRESSOR;
                        } else {
-                               refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
+                               /*
+                                * VM does not know if the page needs to
+                                * be preserved but the pmap layer might tell
+                                * us if any mapping has "modified" it.
+                                * Let's the pmap layer to count this page
+                                * as compressed if and only if it has been
+                                * modified.
+                                */
+                               pmap_options =
+                                       PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
                        }
-
+                       refmod_state = pmap_disconnect_options(m->phys_page,
+                                                              pmap_options,
+                                                              NULL);
                        if (refmod_state & VM_MEM_MODIFIED) {
                                SET_PAGE_DIRTY(m, FALSE);
                        }
@@ -2685,14 +3016,6 @@ throttle_inactive:
 #endif /* CONFIG_JETSAM */
 #endif /* VM_PRESSURE_EVENTS */
                
-               /*
-                * do NOT set the pageout bit!
-                * sure, we might need free pages, but this page is going to take time to become free 
-                * anyway, so we may as well put it on the clean queue first and take it from there later
-                * if necessary.  that way, we'll ensure we don't free up too much. -mj
-                */
-               vm_pageout_cluster(m, FALSE);
-
                if (page_prev_state == PAGE_STATE_ANONYMOUS)
                        vm_pageout_inactive_anonymous++;
                if (object->internal)
@@ -2700,6 +3023,13 @@ throttle_inactive:
                else
                        vm_pageout_inactive_dirty_external++;
 
+               /*
+                * do NOT set the pageout bit!
+                * sure, we might need free pages, but this page is going to take time to become free 
+                * anyway, so we may as well put it on the clean queue first and take it from there later
+                * if necessary.  that way, we'll ensure we don't free up too much. -mj
+                */
+               vm_pageout_cluster(m, FALSE, FALSE, FALSE);
 
 done_with_inactivepage:
 
@@ -2727,10 +3057,8 @@ done_with_inactivepage:
                                local_freed = 0;
                                need_delay = FALSE;
                        }
-                       if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
-                               vm_consider_waking_compactor_swapper();
-                               need_delay = FALSE;
-                       }
+                       vm_consider_waking_compactor_swapper();
+
                        vm_page_lock_queues();
 
                        if (need_delay == TRUE)
@@ -2785,7 +3113,7 @@ vm_page_free_reserve(
        if (vm_page_free_target < vm_page_free_min + 5)
                vm_page_free_target = vm_page_free_min + 5;
 
-       vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3);
+       vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
 }
 
 /*
@@ -3177,29 +3505,32 @@ vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
 
 uint32_t       vm_compressor_failed;
 
+#define                MAX_FREE_BATCH          32
+
 static void
 vm_pageout_iothread_internal_continue(struct cq *cq)
 {
        struct vm_pageout_queue *q;
        vm_page_t       m = NULL;
-       vm_object_t     object;
-       memory_object_t pager;
        boolean_t       pgo_draining;
        vm_page_t   local_q;
        int         local_cnt;
        vm_page_t   local_freeq = NULL;
        int         local_freed = 0;
        int         local_batch_size;
-       kern_return_t   retval;
-       int             compressed_count_delta;
 
 
        KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
 
        q = cq->q;
-       local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 4);
+       local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
 
+#if RECORD_THE_COMPRESSED_DATA
+       if (q->pgo_laundry)
+               c_compressed_record_init();
+#endif
        while (TRUE) {
+               int     pages_left_on_q = 0;
 
                local_cnt = 0;
                local_q = NULL;
@@ -3210,7 +3541,7 @@ vm_pageout_iothread_internal_continue(struct cq *cq)
 
                KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
 
-               KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, 0, 0, 0, 0, 0);
+               KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
 
                while ( !queue_empty(&q->pgo_pending) && local_cnt <  local_batch_size) {
 
@@ -3230,104 +3561,43 @@ vm_pageout_iothread_internal_continue(struct cq *cq)
 
                q->pgo_busy = TRUE;
 
-               if ((pgo_draining = q->pgo_draining) == FALSE) 
+               if ((pgo_draining = q->pgo_draining) == FALSE) {
                        vm_pageout_throttle_up_batch(q, local_cnt);
+                       pages_left_on_q = q->pgo_laundry;
+               } else
+                       pages_left_on_q = q->pgo_laundry - local_cnt;
 
                vm_page_unlock_queues();
 
-               KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
+#if !RECORD_THE_COMPRESSED_DATA
+               if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1)) 
+                       thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
+#endif
+               KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
 
                while (local_q) {
-               
+
+                       KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
+
                        m = local_q;
                        local_q = (vm_page_t)m->pageq.next;
                        m->pageq.next = NULL;
 
-                       if (m->object->object_slid) {
-                               panic("slid page %p not allowed on this path\n", m);
-                       }
-
-                       object = m->object;
-                       pager = object->pager;
-
-                       if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)  {
-                               
-                               KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
-
-                               vm_object_lock(object);
-
-                               /*
-                                * If there is no memory object for the page, create
-                                * one and hand it to the compression pager.
-                                */
-
-                               if (!object->pager_initialized)
-                                       vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
-                               if (!object->pager_initialized)
-                                       vm_object_compressor_pager_create(object);
-
-                               if (!object->pager_initialized) {
-                                       /*
-                                        * Still no pager for the object.
-                                        * Reactivate the page.
-                                        *
-                                        * Should only happen if there is no
-                                        * compression pager
-                                        */
-                                       m->pageout = FALSE;
-                                       m->laundry = FALSE;
-                                       PAGE_WAKEUP_DONE(m);
-
-                                       vm_page_lockspin_queues();
-                                       vm_page_activate(m);
-                                       vm_pageout_dirty_no_pager++;
-                                       vm_page_unlock_queues();
-                                       
-                                       /*
-                                        *      And we are done with it.
-                                        */
-                                       vm_object_activity_end(object);
-                                       vm_object_unlock(object);
-
-                                       continue;
-                               }
-                               pager = object->pager;
-
-                               if (pager == MEMORY_OBJECT_NULL) {
-                                       /*
-                                        * This pager has been destroyed by either
-                                        * memory_object_destroy or vm_object_destroy, and
-                                        * so there is nowhere for the page to go.
-                                        */
-                                       if (m->pageout) {
-                                               /*
-                                                * Just free the page... VM_PAGE_FREE takes
-                                                * care of cleaning up all the state...
-                                                * including doing the vm_pageout_throttle_up
-                                                */
-                                               VM_PAGE_FREE(m);
-                                       } else {
-                                               m->laundry = FALSE;
-                                               PAGE_WAKEUP_DONE(m);
+                       if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
 
-                                               vm_page_lockspin_queues();
-                                               vm_page_activate(m);
-                                               vm_page_unlock_queues();
+                               m->pageq.next = (queue_entry_t)local_freeq;
+                               local_freeq = m;
+                               local_freed++;
 
-                                               /*
-                                                *      And we are done with it.
-                                                */
-                                       }
-                                       vm_object_activity_end(object);
-                                       vm_object_unlock(object);
+                               if (local_freed >= MAX_FREE_BATCH) {
 
-                                       continue;
+                                       vm_page_free_list(local_freeq, TRUE);
+                                       local_freeq = NULL;
+                                       local_freed = 0;
                                }
-                               vm_object_unlock(object);
-
-                               KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
                        }
-                       while (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) {
+#if !CONFIG_JETSAM
+                       while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
                                kern_return_t   wait_result;
                                int             need_wakeup = 0;
 
@@ -3341,8 +3611,8 @@ vm_pageout_iothread_internal_continue(struct cq *cq)
                                }
                                lck_mtx_lock_spin(&vm_page_queue_free_lock);
 
-                               if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) {
-                               
+                               if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
+
                                        if (vm_page_free_wanted_privileged++ == 0)
                                                need_wakeup = 1;
                                        wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
@@ -3353,77 +3623,12 @@ vm_pageout_iothread_internal_continue(struct cq *cq)
                                                thread_wakeup((event_t)&vm_page_free_wanted);
 
                                        if (wait_result == THREAD_WAITING)
+
                                                thread_block(THREAD_CONTINUE_NULL);
                                } else
                                        lck_mtx_unlock(&vm_page_queue_free_lock);
                        }
-
-                       assert(object->activity_in_progress > 0);
-
-                       retval = vm_compressor_pager_put(
-                               pager,
-                               m->offset + object->paging_offset,
-                               m->phys_page,
-                               &cq->current_chead,
-                               cq->scratch_buf,
-                               &compressed_count_delta);
-
-                       vm_object_lock(object);
-                       assert(object->activity_in_progress > 0);
-
-                       assert(m->object == object);
-
-                       vm_compressor_pager_count(pager,
-                                                 compressed_count_delta,
-                                                 FALSE, /* shared_lock */
-                                                 object);
-
-                       m->laundry = FALSE;
-                       m->pageout = FALSE;
-
-                       if (retval == KERN_SUCCESS) {
-                               /*
-                                * If the object is purgeable, its owner's
-                                * purgeable ledgers will be updated in
-                                * vm_page_remove() but the page still
-                                * contributes to the owner's memory footprint,
-                                * so account for it as such.
-                                */
-                               if (object->purgable != VM_PURGABLE_DENY &&
-                                   object->vo_purgeable_owner != NULL) {
-                                       /* one more compressed purgeable page */
-                                       vm_purgeable_compressed_update(object,
-                                                                      +1);
-                               }
-
-                               vm_page_compressions_failing = FALSE;
-                               
-                               VM_STAT_INCR(compressions);
-                       
-                               if (m->tabled)
-                                       vm_page_remove(m, TRUE);
-                               vm_object_activity_end(object);
-                               vm_object_unlock(object);
-
-                               m->pageq.next = (queue_entry_t)local_freeq;
-                               local_freeq = m;
-                               local_freed++;
-
-                       } else {
-                               PAGE_WAKEUP_DONE(m);
-
-                               vm_page_lockspin_queues();
-
-                               vm_page_activate(m);
-                               vm_compressor_failed++;
-
-                               vm_page_compressions_failing = TRUE;
-
-                               vm_page_unlock_queues();
-
-                               vm_object_activity_end(object);
-                               vm_object_unlock(object);
-                       }
+#endif
                }
                if (local_freeq) {
                        vm_page_free_list(local_freeq, TRUE);
@@ -3445,7 +3650,7 @@ vm_pageout_iothread_internal_continue(struct cq *cq)
        q->pgo_busy = FALSE;
        q->pgo_idle = TRUE;
 
-       assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
+       assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
        vm_page_unlock_queues();
 
        KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
@@ -3456,6 +3661,176 @@ vm_pageout_iothread_internal_continue(struct cq *cq)
 
 
 
+static void
+vm_pageout_immediate(vm_page_t m, boolean_t object_locked_by_caller)
+{
+       assert(vm_pageout_immediate_scratch_buf);
+
+       if (vm_pageout_compress_page(&vm_pageout_immediate_chead, vm_pageout_immediate_scratch_buf, m, object_locked_by_caller) == KERN_SUCCESS) {
+
+               vm_page_free_prepare_object(m, TRUE);
+               vm_page_release(m);
+       }
+}
+
+
+kern_return_t
+vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller) 
+{
+       vm_object_t     object;
+       memory_object_t pager;
+       int             compressed_count_delta;
+       kern_return_t   retval;
+
+       if (m->object->object_slid) {
+               panic("slid page %p not allowed on this path\n", m);
+       }
+
+       object = m->object;
+       pager = object->pager;
+
+       if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL))  {
+                               
+               KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
+
+               vm_object_lock(object);
+
+               /*
+                * If there is no memory object for the page, create
+                * one and hand it to the compression pager.
+                */
+
+               if (!object->pager_initialized)
+                       vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
+               if (!object->pager_initialized)
+                       vm_object_compressor_pager_create(object);
+
+               if (!object->pager_initialized) {
+                       /*
+                        * Still no pager for the object.
+                        * Reactivate the page.
+                        *
+                        * Should only happen if there is no
+                        * compression pager
+                        */
+                       m->pageout = FALSE;
+                       m->laundry = FALSE;
+                       PAGE_WAKEUP_DONE(m);
+
+                       vm_page_lockspin_queues();
+                       vm_page_activate(m);
+                       vm_pageout_dirty_no_pager++;
+                       vm_page_unlock_queues();
+                                       
+                       /*
+                        *      And we are done with it.
+                        */
+                       vm_object_activity_end(object);
+                       vm_object_unlock(object);
+
+                       return KERN_FAILURE;
+               }
+               pager = object->pager;
+
+               if (pager == MEMORY_OBJECT_NULL) {
+                       /*
+                        * This pager has been destroyed by either
+                        * memory_object_destroy or vm_object_destroy, and
+                        * so there is nowhere for the page to go.
+                        */
+                       if (m->pageout) {
+                               /*
+                                * Just free the page... VM_PAGE_FREE takes
+                                * care of cleaning up all the state...
+                                * including doing the vm_pageout_throttle_up
+                                */
+                               VM_PAGE_FREE(m);
+                       } else {
+                               m->laundry = FALSE;
+                               PAGE_WAKEUP_DONE(m);
+
+                               vm_page_lockspin_queues();
+                               vm_page_activate(m);
+                               vm_page_unlock_queues();
+
+                               /*
+                                *      And we are done with it.
+                                */
+                       }
+                       vm_object_activity_end(object);
+                       vm_object_unlock(object);
+
+                       return KERN_FAILURE;
+               }
+               vm_object_unlock(object);
+                               
+               KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
+       }
+       assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
+
+       if (object_locked_by_caller == FALSE)
+               assert(object->activity_in_progress > 0);
+
+       retval = vm_compressor_pager_put(
+               pager,
+               m->offset + object->paging_offset,
+               m->phys_page,
+               current_chead,
+               scratch_buf,
+               &compressed_count_delta);
+
+       if (object_locked_by_caller == FALSE) {
+               vm_object_lock(object);
+
+               assert(object->activity_in_progress > 0);
+               assert(m->object == object);
+       }
+
+       vm_compressor_pager_count(pager,
+                                 compressed_count_delta,
+                                 FALSE, /* shared_lock */
+                                 object);
+
+       m->laundry = FALSE;
+       m->pageout = FALSE;
+
+       if (retval == KERN_SUCCESS) {
+               /*
+                * If the object is purgeable, its owner's
+                * purgeable ledgers will be updated in
+                * vm_page_remove() but the page still
+                * contributes to the owner's memory footprint,
+                * so account for it as such.
+                */
+               if (object->purgable != VM_PURGABLE_DENY &&
+                   object->vo_purgeable_owner != NULL) {
+                       /* one more compressed purgeable page */
+                       vm_purgeable_compressed_update(object,
+                                                      +1);
+               }
+               VM_STAT_INCR(compressions);
+                       
+               if (m->tabled)
+                       vm_page_remove(m, TRUE);
+
+       } else {
+               PAGE_WAKEUP_DONE(m);
+
+               vm_page_lockspin_queues();
+
+               vm_page_activate(m);
+               vm_compressor_failed++;
+
+               vm_page_unlock_queues();
+       }
+       if (object_locked_by_caller == FALSE) {
+               vm_object_activity_end(object);
+               vm_object_unlock(object);
+       }
+       return retval;
+}
+
+
 static void
 vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority)
 {
@@ -3549,9 +3924,9 @@ vm_pageout_iothread_internal(struct cq *cq)
        vm_page_unlock_queues();
 
        if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
-               cq->q = &vm_pageout_queue_internal;
-               cq->current_chead = NULL;
-               cq->scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
+
+               if (vm_restricted_to_single_processor == TRUE)
+                       thread_vm_bind_group_add();
 
                vm_pageout_iothread_internal_continue(cq);
        } else
@@ -3781,6 +4156,7 @@ void      vm_pageout_reinit_tuneables(void);
 void
 vm_pageout_reinit_tuneables(void)
 {
+
        vm_compressor_minorcompact_threshold_divisor = 18;
        vm_compressor_majorcompact_threshold_divisor = 22;
        vm_compressor_unthrottle_threshold_divisor = 32;
@@ -3807,6 +4183,40 @@ extern ppnum_t vm_map_get_phys_page(vm_map_t map,
                                    vm_offset_t offset);
 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
 
+
+void
+vm_set_restrictions()
+{
+       host_basic_info_data_t hinfo;
+       mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+
+#define BSD_HOST 1
+       host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
+
+       assert(hinfo.max_cpus > 0);
+
+       if (hinfo.max_cpus <= 3) {
+               /*
+                * on systems with a limited number of CPUS, bind the 
+                * 4 major threads that can free memory and that tend to use
+                * a fair bit of CPU under pressured conditions to a single processor.
+                * This insures that these threads don't hog all of the available CPUs
+                * (important for camera launch), while allowing them to run independently
+                * w/r to locks... the 4 threads are
+                * vm_pageout_scan,  vm_pageout_iothread_internal (compressor), 
+                * vm_compressor_swap_trigger_thread (minor and major compactions),
+                * memorystatus_thread (jetsams).
+                *
+                * the first time the thread is run, it is responsible for checking the
+                * state of vm_restricted_to_single_processor, and if TRUE it calls
+                * thread_bind_master...  someday this should be replaced with a group
+                * scheduling mechanism and KPI.
+                */
+               vm_restricted_to_single_processor = TRUE;
+       }
+}
+
+
 void
 vm_pageout(void)
 {
@@ -3819,14 +4229,18 @@ vm_pageout(void)
         * Set thread privileges.
         */
        s = splsched();
+
        thread_lock(self);
-       self->priority = BASEPRI_PREEMPT - 1;
-       set_sched_pri(self, self->priority);
+       self->options |= TH_OPT_VMPRIV;
+       sched_set_thread_base_priority(self, BASEPRI_PREEMPT - 1);
        thread_unlock(self);
 
        if (!self->reserved_stack)
                self->reserved_stack = self->kernel_stack;
 
+       if (vm_restricted_to_single_processor == TRUE)
+               thread_vm_bind_group_add();
+
        splx(s);
 
        /*
@@ -3894,7 +4308,6 @@ vm_pageout(void)
        vm_pageout_queue_external.pgo_tid = -1;
        vm_pageout_queue_external.pgo_inited = FALSE;
 
-
        queue_init(&vm_pageout_queue_internal.pgo_pending);
        vm_pageout_queue_internal.pgo_maxlaundry = 0;
        vm_pageout_queue_internal.pgo_laundry = 0;
@@ -4144,7 +4557,7 @@ vm_pageout(void)
 
        ledger = ledger_instantiate(task_ledger_template,
                                    LEDGER_CREATE_ACTIVE_ENTRIES);
-       user_map = vm_map_create(pmap_create(ledger, 0, TRUE),
+       user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
                                 0x100000000ULL,
                                 0x200000000ULL,
                                 TRUE);
@@ -4178,7 +4591,7 @@ vm_pageout(void)
             cur_offset += PAGE_SIZE) {
                kr = vm_map_wire_and_extract(wire_map,
                                             wire_addr + cur_offset,
-                                            VM_PROT_DEFAULT,
+                                            VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK)),
                                             TRUE,
                                             &wire_ppnum);
                assert(kr == KERN_SUCCESS);
@@ -4216,7 +4629,6 @@ vm_pageout(void)
        printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
 
-
        vm_pageout_continue();
 
        /*
@@ -4245,10 +4657,6 @@ vm_pageout(void)
 
 
 
-#define MAX_COMRPESSOR_THREAD_COUNT    8
-
-struct cq ciq[MAX_COMRPESSOR_THREAD_COUNT];
-
 int vm_compressor_thread_count = 2;
 
 kern_return_t
@@ -4257,6 +4665,8 @@ vm_pageout_internal_start(void)
        kern_return_t   result;
        int             i;
        host_basic_info_data_t hinfo;
+       int             thread_count;
+
 
        if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
                mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
@@ -4269,18 +4679,33 @@ vm_pageout_internal_start(void)
                        vm_compressor_thread_count = hinfo.max_cpus - 1;
                if (vm_compressor_thread_count <= 0)
                        vm_compressor_thread_count = 1;
-               else if (vm_compressor_thread_count > MAX_COMRPESSOR_THREAD_COUNT)
-                       vm_compressor_thread_count = MAX_COMRPESSOR_THREAD_COUNT;
+               else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
+                       vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
+
+               if (vm_compressor_immediate_preferred == TRUE) {
+                       vm_pageout_immediate_chead = NULL;
+                       vm_pageout_immediate_scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
+
+                       vm_compressor_thread_count = 1;
+               }
+               thread_count = vm_compressor_thread_count;
 
                vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
        } else {
-               vm_compressor_thread_count = 1;
+               vm_compressor_thread_count = 0;
+               thread_count = 1;
                vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
        }
 
        for (i = 0; i < vm_compressor_thread_count; i++) {
-
+               ciq[i].id = i;
+               ciq[i].q = &vm_pageout_queue_internal;
+               ciq[i].current_chead = NULL;
+               ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
+       }               
+       for (i = 0; i < thread_count; i++) {
                result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
+
                if (result == KERN_SUCCESS)
                        thread_deallocate(vm_pageout_internal_iothread);
                else
@@ -4370,6 +4795,7 @@ upl_create(int type, int flags, upl_size_t size)
        upl->highest_page = 0;
        upl_lock_init(upl);
        upl->vector_upl = NULL;
+       upl->associated_upl = NULL;
 #if CONFIG_IOSCHED
        if (type & UPL_CREATE_IO_TRACKING) {
                upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
@@ -4599,7 +5025,7 @@ vm_object_upl_request(
        upl_t                   *upl_ptr,
        upl_page_info_array_t   user_page_list,
        unsigned int            *page_list_count,
-       int                     cntrl_flags)
+       upl_control_flags_t     cntrl_flags)
 {
        vm_page_t               dst_page = VM_PAGE_NULL;
        vm_object_offset_t      dst_offset;
@@ -4970,6 +5396,8 @@ check_busy:
                        if (dst_page->phys_page > upl->highest_page)
                                upl->highest_page = dst_page->phys_page;
 
+                       assert (!pmap_is_noencrypt(dst_page->phys_page));
+
                        if (cntrl_flags & UPL_SET_LITE) {
                                unsigned int    pg_num;
 
@@ -5325,6 +5753,7 @@ check_busy:
 
                if (dst_page->phys_page > upl->highest_page)
                        upl->highest_page = dst_page->phys_page;
+               assert (!pmap_is_noencrypt(dst_page->phys_page));
                if (user_page_list) {
                        user_page_list[entry].phys_addr = dst_page->phys_page;
                        user_page_list[entry].pageout   = dst_page->pageout;
@@ -5340,6 +5769,7 @@ check_busy:
                        user_page_list[entry].cs_validated = dst_page->cs_validated;
                        user_page_list[entry].cs_tainted = dst_page->cs_tainted;
                        user_page_list[entry].cs_nx = dst_page->cs_nx;
+                       user_page_list[entry].mark      = FALSE;
                }
                /*
                 * if UPL_RET_ONLY_ABSENT is set, then
@@ -5365,7 +5795,7 @@ try_next_page:
                        VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
 
                        if (dw_count >= dw_limit) {
-                               vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+                               vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
 
                                dwp = &dw_array[0];
                                dw_count = 0;
@@ -5376,7 +5806,7 @@ try_next_page:
                xfer_size -= PAGE_SIZE;
        }
        if (dw_count)
-               vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+               vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
 
        if (alias_page != NULL) {
                VM_PAGE_FREE(alias_page);
@@ -5396,60 +5826,6 @@ try_next_page:
        return KERN_SUCCESS;
 }
 
-/* JMM - Backward compatability for now */
-kern_return_t
-vm_fault_list_request(                 /* forward */
-       memory_object_control_t         control,
-       vm_object_offset_t      offset,
-       upl_size_t              size,
-       upl_t                   *upl_ptr,
-       upl_page_info_t         **user_page_list_ptr,
-       unsigned int            page_list_count,
-       int                     cntrl_flags);
-kern_return_t
-vm_fault_list_request(
-       memory_object_control_t         control,
-       vm_object_offset_t      offset,
-       upl_size_t              size,
-       upl_t                   *upl_ptr,
-       upl_page_info_t         **user_page_list_ptr,
-       unsigned int            page_list_count,
-       int                     cntrl_flags)
-{
-       unsigned int            local_list_count;
-       upl_page_info_t         *user_page_list;
-       kern_return_t           kr;
-
-       if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)
-                return KERN_INVALID_ARGUMENT;
-
-       if (user_page_list_ptr != NULL) {
-               local_list_count = page_list_count;
-               user_page_list = *user_page_list_ptr;
-       } else {
-               local_list_count = 0;
-               user_page_list = NULL;
-       }
-       kr =  memory_object_upl_request(control,
-                               offset,
-                               size,
-                               upl_ptr,
-                               user_page_list,
-                               &local_list_count,
-                               cntrl_flags);
-
-       if(kr != KERN_SUCCESS)
-               return kr;
-
-       if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
-               *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
-       }
-
-       return KERN_SUCCESS;
-}
-
-               
-
 /*  
  *     Routine:        vm_object_super_upl_request
  *     Purpose:        
@@ -5470,7 +5846,7 @@ vm_object_super_upl_request(
        upl_t                   *upl,
        upl_page_info_t         *user_page_list,
        unsigned int            *page_list_count,
-       int                     cntrl_flags)
+       upl_control_flags_t     cntrl_flags)
 {
        if (object->paging_offset > offset  || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
                return KERN_FAILURE;
@@ -5522,16 +5898,16 @@ vm_map_create_upl(
        upl_t                   *upl,
        upl_page_info_array_t   page_list,
        unsigned int            *count,
-       int                     *flags)
+       upl_control_flags_t     *flags)
 {
-       vm_map_entry_t  entry;
-       int             caller_flags;
-       int             force_data_sync;
-       int             sync_cow_data;
-       vm_object_t     local_object;
-       vm_map_offset_t local_offset;
-       vm_map_offset_t local_start;
-       kern_return_t   ret;
+       vm_map_entry_t          entry;
+       upl_control_flags_t     caller_flags;
+       int                     force_data_sync;
+       int                     sync_cow_data;
+       vm_object_t             local_object;
+       vm_map_offset_t         local_offset;
+       vm_map_offset_t         local_start;
+       kern_return_t           ret;
 
        caller_flags = *flags;
 
@@ -5551,237 +5927,323 @@ vm_map_create_upl(
 REDISCOVER_ENTRY:
        vm_map_lock_read(map);
 
-       if (vm_map_lookup_entry(map, offset, &entry)) {
+       if (!vm_map_lookup_entry(map, offset, &entry)) {
+               vm_map_unlock_read(map);
+               return KERN_FAILURE;
+       }
 
-               if ((entry->vme_end - offset) < *upl_size) {
-                       *upl_size = (upl_size_t) (entry->vme_end - offset);
-                       assert(*upl_size == entry->vme_end - offset);
+       if ((entry->vme_end - offset) < *upl_size) {
+               *upl_size = (upl_size_t) (entry->vme_end - offset);
+               assert(*upl_size == entry->vme_end - offset);
+       }
+
+       if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
+               *flags = 0;
+
+               if (!entry->is_sub_map &&
+                   VME_OBJECT(entry) != VM_OBJECT_NULL) {
+                       if (VME_OBJECT(entry)->private)
+                               *flags = UPL_DEV_MEMORY;
+
+                       if (VME_OBJECT(entry)->phys_contiguous)
+                               *flags |= UPL_PHYS_CONTIG;
                }
+               vm_map_unlock_read(map);
+               return KERN_SUCCESS;
+       }
 
-               if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
-                       *flags = 0;
+       if (entry->is_sub_map) {
+               vm_map_t        submap;
 
-                       if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) {
-                               if (entry->object.vm_object->private)
-                                       *flags = UPL_DEV_MEMORY;
+               submap = VME_SUBMAP(entry);
+               local_start = entry->vme_start;
+               local_offset = VME_OFFSET(entry);
 
-                               if (entry->object.vm_object->phys_contiguous)
-                                       *flags |= UPL_PHYS_CONTIG;
-                       }
-                       vm_map_unlock_read(map);
+               vm_map_reference(submap);
+               vm_map_unlock_read(map);
 
-                       return KERN_SUCCESS;
-               }
+               ret = vm_map_create_upl(submap, 
+                                       local_offset + (offset - local_start), 
+                                       upl_size, upl, page_list, count, flags);
+               vm_map_deallocate(submap);
 
-               if (entry->is_sub_map) {
-                       vm_map_t        submap;
+               return ret;
+       }
 
-                       submap = entry->object.sub_map;
-                       local_start = entry->vme_start;
-                       local_offset = entry->offset;
+       if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
+           !VME_OBJECT(entry)->phys_contiguous) {
+               if (*upl_size > MAX_UPL_SIZE_BYTES)
+                       *upl_size = MAX_UPL_SIZE_BYTES;
+       }
 
-                       vm_map_reference(submap);
-                       vm_map_unlock_read(map);
+       /*
+        *      Create an object if necessary.
+        */
+       if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
 
-                       ret = vm_map_create_upl(submap, 
-                                               local_offset + (offset - local_start), 
-                                               upl_size, upl, page_list, count, flags);
-                       vm_map_deallocate(submap);
+               if (vm_map_lock_read_to_write(map))
+                       goto REDISCOVER_ENTRY;
 
-                       return ret;
-               }
+               VME_OBJECT_SET(entry,
+                              vm_object_allocate((vm_size_t)
+                                                 (entry->vme_end -
+                                                  entry->vme_start)));
+               VME_OFFSET_SET(entry, 0);
 
-               if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) {
-                       if (*upl_size > MAX_UPL_SIZE_BYTES)
-                                       *upl_size = MAX_UPL_SIZE_BYTES;
-               }
-               /*
-                *      Create an object if necessary.
-                */
-               if (entry->object.vm_object == VM_OBJECT_NULL) {
+               vm_map_lock_write_to_read(map);
+       }
 
-                       if (vm_map_lock_read_to_write(map))
-                               goto REDISCOVER_ENTRY;
+       if (!(caller_flags & UPL_COPYOUT_FROM) &&
+           !(entry->protection & VM_PROT_WRITE)) {
+               vm_map_unlock_read(map);
+               return KERN_PROTECTION_FAILURE;
+       }
+
+       local_object = VME_OBJECT(entry);
+       assert(local_object != VM_OBJECT_NULL);
+
+       if (*upl_size != 0 &&
+           local_object->vo_size > *upl_size && /* partial UPL */
+           entry->wired_count == 0 && /* No COW for entries that are wired */
+           (map->pmap != kernel_pmap) && /* alias checks */
+           (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
+            ||
+            (!entry->needs_copy &&     /* case 2 */
+             local_object->internal &&
+             (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
+             local_object->ref_count > 1))) {
+               vm_prot_t       prot;
 
-                       entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start));
-                       entry->offset = 0;
+               /*
+                * Case 1:
+                * Set up the targeted range for copy-on-write to avoid
+                * applying true_share/copy_delay to the entire object.
+                *
+                * Case 2:
+                * This map entry covers only part of an internal
+                * object.  There could be other map entries covering
+                * other areas of this object and some of these map
+                * entries could be marked as "needs_copy", which
+                * assumes that the object is COPY_SYMMETRIC.
+                * To avoid marking this object as COPY_DELAY and
+                * "true_share", let's shadow it and mark the new
+                * (smaller) object as "true_share" and COPY_DELAY.
+                */
 
-                       vm_map_lock_write_to_read(map);
+               if (vm_map_lock_read_to_write(map)) {
+                       goto REDISCOVER_ENTRY;
                }
-               if (!(caller_flags & UPL_COPYOUT_FROM)) {
-                       if (!(entry->protection & VM_PROT_WRITE)) {
-                               vm_map_unlock_read(map);
-                               return KERN_PROTECTION_FAILURE;
-                       }
+               vm_map_lock_assert_exclusive(map);
+               assert(VME_OBJECT(entry) == local_object);
+
+               vm_map_clip_start(map,
+                                 entry,
+                                 vm_map_trunc_page(offset,
+                                                   VM_MAP_PAGE_MASK(map)));
+               vm_map_clip_end(map,
+                               entry,
+                               vm_map_round_page(offset + *upl_size,
+                                                 VM_MAP_PAGE_MASK(map)));
+               if ((entry->vme_end - offset) < *upl_size) {
+                       *upl_size = (upl_size_t) (entry->vme_end - offset);
+                       assert(*upl_size == entry->vme_end - offset);
                }
 
-               local_object = entry->object.vm_object;
-               if (vm_map_entry_should_cow_for_true_share(entry) &&
-                   local_object->vo_size > *upl_size &&
-                   *upl_size != 0) {
-                       vm_prot_t       prot;
+               prot = entry->protection & ~VM_PROT_WRITE;
+               if (override_nx(map, VME_ALIAS(entry)) && prot)
+                       prot |= VM_PROT_EXECUTE;
+               vm_object_pmap_protect(local_object,
+                                      VME_OFFSET(entry),
+                                      entry->vme_end - entry->vme_start,
+                                      ((entry->is_shared ||
+                                        map->mapped_in_other_pmaps)
+                                       ? PMAP_NULL
+                                       : map->pmap),
+                                      entry->vme_start,
+                                      prot);
 
-                       /*
-                        * Set up the targeted range for copy-on-write to avoid
-                        * applying true_share/copy_delay to the entire object.
-                        */
+               assert(entry->wired_count == 0);
 
-                       if (vm_map_lock_read_to_write(map)) {
-                               goto REDISCOVER_ENTRY;
-                       }
-
-                       vm_map_clip_start(map,
-                                         entry,
-                                         vm_map_trunc_page(offset,
-                                                           VM_MAP_PAGE_MASK(map)));
-                       vm_map_clip_end(map,
-                                       entry,
-                                       vm_map_round_page(offset + *upl_size,
-                                                         VM_MAP_PAGE_MASK(map)));
-                       if ((entry->vme_end - offset) < *upl_size) {
-                               *upl_size = (upl_size_t) (entry->vme_end - offset);
-                               assert(*upl_size == entry->vme_end - offset);
-                       }
-
-                       prot = entry->protection & ~VM_PROT_WRITE;
-                       if (override_nx(map, entry->alias) && prot)
-                               prot |= VM_PROT_EXECUTE;
-                       vm_object_pmap_protect(local_object,
-                                              entry->offset,
-                                              entry->vme_end - entry->vme_start,
-                                              ((entry->is_shared || map->mapped_in_other_pmaps)
-                                               ? PMAP_NULL
-                                               : map->pmap),
-                                              entry->vme_start,
-                                              prot);
+               /*
+                * Lock the VM object and re-check its status: if it's mapped
+                * in another address space, we could still be racing with
+                * another thread holding that other VM map exclusively.
+                */
+               vm_object_lock(local_object);
+               if (local_object->true_share) {
+                       /* object is already in proper state: no COW needed */
+                       assert(local_object->copy_strategy !=
+                              MEMORY_OBJECT_COPY_SYMMETRIC);
+               } else {
+                       /* not true_share: ask for copy-on-write below */
+                       assert(local_object->copy_strategy ==
+                              MEMORY_OBJECT_COPY_SYMMETRIC);
                        entry->needs_copy = TRUE;
-
-                       vm_map_lock_write_to_read(map);
                }
+               vm_object_unlock(local_object);
 
-               if (entry->needs_copy)  {
-                       /*
-                        * Honor copy-on-write for COPY_SYMMETRIC
-                        * strategy.
-                        */
-                       vm_map_t                local_map;
-                       vm_object_t             object;
-                       vm_object_offset_t      new_offset;
-                       vm_prot_t               prot;
-                       boolean_t               wired;
-                       vm_map_version_t        version;
-                       vm_map_t                real_map;
-                       vm_prot_t               fault_type;
-
-                       local_map = map;
-
-                       if (caller_flags & UPL_COPYOUT_FROM) {
-                               fault_type = VM_PROT_READ | VM_PROT_COPY;
-                               vm_counters.create_upl_extra_cow++;
-                               vm_counters.create_upl_extra_cow_pages += (entry->vme_end - entry->vme_start) / PAGE_SIZE;
+               vm_map_lock_write_to_read(map);
+       }
+
+       if (entry->needs_copy)  {
+               /*
+                * Honor copy-on-write for COPY_SYMMETRIC
+                * strategy.
+                */
+               vm_map_t                local_map;
+               vm_object_t             object;
+               vm_object_offset_t      new_offset;
+               vm_prot_t               prot;
+               boolean_t               wired;
+               vm_map_version_t        version;
+               vm_map_t                real_map;
+               vm_prot_t               fault_type;
+
+               local_map = map;
+
+               if (caller_flags & UPL_COPYOUT_FROM) {
+                       fault_type = VM_PROT_READ | VM_PROT_COPY;
+                       vm_counters.create_upl_extra_cow++;
+                       vm_counters.create_upl_extra_cow_pages +=
+                               (entry->vme_end - entry->vme_start) / PAGE_SIZE;
+               } else {
+                       fault_type = VM_PROT_WRITE;
+               }
+               if (vm_map_lookup_locked(&local_map,
+                                        offset, fault_type,
+                                        OBJECT_LOCK_EXCLUSIVE,
+                                        &version, &object,
+                                        &new_offset, &prot, &wired,
+                                        NULL,
+                                        &real_map) != KERN_SUCCESS) {
+                       if (fault_type == VM_PROT_WRITE) {
+                               vm_counters.create_upl_lookup_failure_write++;
                        } else {
-                               fault_type = VM_PROT_WRITE;
-                       }
-                       if (vm_map_lookup_locked(&local_map,
-                                                offset, fault_type,
-                                                OBJECT_LOCK_EXCLUSIVE,
-                                                &version, &object,
-                                                &new_offset, &prot, &wired,
-                                                NULL,
-                                                &real_map) != KERN_SUCCESS) {
-                               if (fault_type == VM_PROT_WRITE) {
-                                       vm_counters.create_upl_lookup_failure_write++;
-                               } else {
-                                       vm_counters.create_upl_lookup_failure_copy++;
-                               }
-                               vm_map_unlock_read(local_map);
-                               return KERN_FAILURE;
+                               vm_counters.create_upl_lookup_failure_copy++;
                        }
-                       if (real_map != map)
-                               vm_map_unlock(real_map);
                        vm_map_unlock_read(local_map);
-
-                       vm_object_unlock(object);
-
-                       goto REDISCOVER_ENTRY;
+                       return KERN_FAILURE;
                }
+               if (real_map != map)
+                       vm_map_unlock(real_map);
+               vm_map_unlock_read(local_map);
 
-               if (sync_cow_data) {
-                       if (entry->object.vm_object->shadow || entry->object.vm_object->copy) {
-                               local_object = entry->object.vm_object;
-                               local_start = entry->vme_start;
-                               local_offset = entry->offset;
-
-                               vm_object_reference(local_object);
-                               vm_map_unlock_read(map);
-
-                               if (local_object->shadow && local_object->copy) {
-                                       vm_object_lock_request(
-                                                              local_object->shadow,
-                                                              (vm_object_offset_t)
-                                                              ((offset - local_start) +
-                                                               local_offset) +
-                                                              local_object->vo_shadow_offset,
-                                                              *upl_size, FALSE, 
-                                                              MEMORY_OBJECT_DATA_SYNC,
-                                                              VM_PROT_NO_CHANGE);
-                               }
-                               sync_cow_data = FALSE;
-                               vm_object_deallocate(local_object);
+               vm_object_unlock(object);
 
-                               goto REDISCOVER_ENTRY;
-                       }
-               }
-               if (force_data_sync) {
-                       local_object = entry->object.vm_object;
-                       local_start = entry->vme_start;
-                       local_offset = entry->offset;
+               goto REDISCOVER_ENTRY;
+       }
 
-                       vm_object_reference(local_object);
-                       vm_map_unlock_read(map);
+       if (sync_cow_data &&
+           (VME_OBJECT(entry)->shadow ||
+            VME_OBJECT(entry)->copy)) {
+               local_object = VME_OBJECT(entry);
+               local_start = entry->vme_start;
+               local_offset = VME_OFFSET(entry);
+
+               vm_object_reference(local_object);
+               vm_map_unlock_read(map);
 
-                       vm_object_lock_request(
-                                              local_object,
-                                              (vm_object_offset_t)
-                                              ((offset - local_start) + local_offset),
-                                              (vm_object_size_t)*upl_size, FALSE, 
+               if (local_object->shadow && local_object->copy) {
+                       vm_object_lock_request(local_object->shadow,
+                                              ((vm_object_offset_t)
+                                               ((offset - local_start) +
+                                                local_offset) +
+                                               local_object->vo_shadow_offset),
+                                              *upl_size, FALSE, 
                                               MEMORY_OBJECT_DATA_SYNC,
                                               VM_PROT_NO_CHANGE);
-
-                       force_data_sync = FALSE;
-                       vm_object_deallocate(local_object);
-
-                       goto REDISCOVER_ENTRY;
                }
-               if (entry->object.vm_object->private)
-                       *flags = UPL_DEV_MEMORY;
-               else
-                       *flags = 0;
-
-               if (entry->object.vm_object->phys_contiguous)
-                       *flags |= UPL_PHYS_CONTIG;
+               sync_cow_data = FALSE;
+               vm_object_deallocate(local_object);
 
-               local_object = entry->object.vm_object;
-               local_offset = entry->offset;
+               goto REDISCOVER_ENTRY;
+       }
+       if (force_data_sync) {
+               local_object = VME_OBJECT(entry);
                local_start = entry->vme_start;
+               local_offset = VME_OFFSET(entry);
 
                vm_object_reference(local_object);
                vm_map_unlock_read(map);
 
-               ret = vm_object_iopl_request(local_object, 
-                                             (vm_object_offset_t) ((offset - local_start) + local_offset),
-                                             *upl_size,
-                                             upl,
-                                             page_list,
-                                             count,
-                                             caller_flags);
+               vm_object_lock_request(local_object,
+                                      ((vm_object_offset_t)
+                                       ((offset - local_start) +
+                                        local_offset)),
+                                      (vm_object_size_t)*upl_size,
+                                      FALSE, 
+                                      MEMORY_OBJECT_DATA_SYNC,
+                                      VM_PROT_NO_CHANGE);
+
+               force_data_sync = FALSE;
                vm_object_deallocate(local_object);
 
-               return(ret);
-       } 
+               goto REDISCOVER_ENTRY;
+       }
+       if (VME_OBJECT(entry)->private)
+               *flags = UPL_DEV_MEMORY;
+       else
+               *flags = 0;
+
+       if (VME_OBJECT(entry)->phys_contiguous)
+               *flags |= UPL_PHYS_CONTIG;
+
+       local_object = VME_OBJECT(entry);
+       local_offset = VME_OFFSET(entry);
+       local_start = entry->vme_start;
+
+       vm_object_lock(local_object);
+
+       /*
+        * Ensure that this object is "true_share" and "copy_delay" now,
+        * while we're still holding the VM map lock.  After we unlock the map,
+        * anything could happen to that mapping, including some copy-on-write
+        * activity.  We need to make sure that the IOPL will point at the
+        * same memory as the mapping.
+        */
+       if (local_object->true_share) {
+               assert(local_object->copy_strategy !=
+                      MEMORY_OBJECT_COPY_SYMMETRIC);
+       } else if (local_object != kernel_object &&
+                  local_object != compressor_object &&
+                  !local_object->phys_contiguous) {
+#if VM_OBJECT_TRACKING_OP_TRUESHARE
+               if (!local_object->true_share &&
+                   vm_object_tracking_inited) {
+                       void *bt[VM_OBJECT_TRACKING_BTDEPTH];
+                       int num = 0;
+                       num = OSBacktrace(bt,
+                                         VM_OBJECT_TRACKING_BTDEPTH);
+                       btlog_add_entry(vm_object_tracking_btlog,
+                                       local_object,
+                                       VM_OBJECT_TRACKING_OP_TRUESHARE,
+                                       bt,
+                                       num);
+               }
+#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
+               local_object->true_share = TRUE;
+               if (local_object->copy_strategy ==
+                   MEMORY_OBJECT_COPY_SYMMETRIC) {
+                       local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+               }
+       }
+
+       vm_object_reference_locked(local_object);
+       vm_object_unlock(local_object);
+
        vm_map_unlock_read(map);
 
-       return(KERN_FAILURE);
+       ret = vm_object_iopl_request(local_object, 
+                                    ((vm_object_offset_t)
+                                     ((offset - local_start) + local_offset)),
+                                    *upl_size,
+                                    upl,
+                                    page_list,
+                                    count,
+                                    caller_flags);
+       vm_object_deallocate(local_object);
+
+       return ret;
 }
 
 /*
@@ -5934,7 +6396,7 @@ process_upl_to_enter:
                                vm_object_unlock(object);
 
                                vm_page_lockspin_queues();
-                               vm_page_wire(alias_page);
+                               vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
                                vm_page_unlock_queues();
                                
                                /*
@@ -5953,7 +6415,7 @@ process_upl_to_enter:
                                 */
                                ASSERT_PAGE_DECRYPTED(m);
 
-                               vm_page_insert(alias_page, upl->map_object, new_offset);
+                               vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
 
                                assert(!alias_page->wanted);
                                alias_page->busy = FALSE;
@@ -5980,7 +6442,8 @@ process_upl_to_enter:
                * NEED A UPL_MAP ALIAS
                */
                kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
-                                 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
+                                 VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK), 
+                                 upl->map_object, offset, FALSE,
                                  VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
 
                if (kr != KERN_SUCCESS) {
@@ -5990,7 +6453,8 @@ process_upl_to_enter:
        }
        else {
                kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
-                                 VM_FLAGS_FIXED, upl->map_object, offset, FALSE,
+                                 VM_FLAGS_FIXED | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
+                                 upl->map_object, offset, FALSE,
                                  VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
                if(kr)
                        panic("vm_map_enter failed for a Vector UPL\n");
@@ -6249,6 +6713,9 @@ process_upl_to_commit:
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
+       assert(!(target_offset & PAGE_MASK));
+       assert(!(xfer_size & PAGE_MASK));
+
        if (upl->flags & UPL_KERNEL_OBJECT)
                vm_object_lock_shared(shadow_object);
        else
@@ -6669,7 +7136,7 @@ commit_next_page:
                                VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
 
                                if (dw_count >= dw_limit) {
-                                       vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
+                                       vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
                        
                                        dwp = &dw_array[0];
                                        dw_count = 0;
@@ -6684,7 +7151,7 @@ commit_next_page:
                }
        }
        if (dw_count)
-               vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
+               vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
 
        if (fast_path_possible) {
 
@@ -6758,6 +7225,10 @@ commit_next_page:
                        vm_page_unlock_queues();
 
                        shadow_object->wired_page_count -= unwired_count;
+
+                       if (!shadow_object->wired_page_count) {
+                           VM_OBJECT_UNWIRED(shadow_object);
+                       }
                }
        }
        occupied = 1;
@@ -6947,6 +7418,9 @@ process_upl_to_abort:
        entry = offset/PAGE_SIZE;
        target_offset = (vm_object_offset_t)offset;
 
+       assert(!(target_offset & PAGE_MASK));
+       assert(!(xfer_size & PAGE_MASK));
+
        if (upl->flags & UPL_KERNEL_OBJECT)
                vm_object_lock_shared(shadow_object);
        else
@@ -7153,7 +7627,7 @@ abort_next_page:
                                VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
 
                                if (dw_count >= dw_limit) {
-                                       vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
+                                       vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
                                
                                        dwp = &dw_array[0];
                                        dw_count = 0;
@@ -7168,7 +7642,7 @@ abort_next_page:
                }
        }
        if (dw_count)
-               vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
+               vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
 
        occupied = 1;
 
@@ -7335,6 +7809,10 @@ iopl_valid_data(
                size -= PAGE_SIZE;
        }
        if (wired_count) {
+
+               if (!object->wired_page_count) {
+                   VM_OBJECT_WIRED(object);
+               }
                object->wired_page_count += wired_count;
 
                vm_page_lockspin_queues();
@@ -7344,9 +7822,6 @@ iopl_valid_data(
        vm_object_unlock(object);
 }
 
-
-
-
 void
 vm_object_set_pmap_cache_attr(
                vm_object_t             object,
@@ -7363,8 +7838,228 @@ vm_object_set_pmap_cache_attr(
        }
 }
 
+
+boolean_t      vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t);
+kern_return_t  vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_object_offset_t *, int);
+
+
+
+boolean_t
+vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
+                           wpl_array_t lite_list, upl_control_flags_t cntrl_flags)
+{
+       vm_page_t       dst_page;
+       vm_tag_t        tag;
+       unsigned int    entry;
+       int             page_count;
+       int             delayed_unlock = 0;
+       boolean_t       retval = TRUE;
+
+       vm_object_lock_assert_exclusive(object);
+       assert(object->purgable != VM_PURGABLE_VOLATILE);
+       assert(object->purgable != VM_PURGABLE_EMPTY);
+       assert(object->pager == NULL);
+       assert(object->copy == NULL);
+       assert(object->shadow == NULL);
+
+       tag = UPL_MEMORY_TAG(cntrl_flags);
+       page_count = object->resident_page_count;
+       dst_page = (vm_page_t)queue_first(&object->memq);
+
+       vm_page_lock_queues();
+
+       while (page_count--) {
+
+               if (dst_page->busy ||
+                   dst_page->fictitious ||
+                   dst_page->absent ||
+                   dst_page->error ||
+                   dst_page->cleaning ||
+                   dst_page->restart ||
+                   dst_page->encrypted ||
+                   dst_page->laundry) {
+                       retval = FALSE;
+                       goto done;
+               }
+               if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
+                       retval = FALSE;
+                       goto done;
+               }
+               dst_page->reference = TRUE;
+
+               vm_page_wire(dst_page, tag, FALSE);
+
+               if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
+                       SET_PAGE_DIRTY(dst_page, FALSE);
+               }
+               entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
+               assert(entry >= 0 && entry < object->resident_page_count);
+               lite_list[entry>>5] |= 1 << (entry & 31);
+               
+               if (dst_page->phys_page > upl->highest_page)
+                       upl->highest_page = dst_page->phys_page;
+
+               if (user_page_list) {
+                       user_page_list[entry].phys_addr = dst_page->phys_page;
+                       user_page_list[entry].absent    = dst_page->absent;
+                       user_page_list[entry].dirty     = dst_page->dirty;
+                       user_page_list[entry].pageout   = dst_page->pageout;;
+                       user_page_list[entry].precious  = dst_page->precious;
+                       user_page_list[entry].device    = FALSE;
+                       user_page_list[entry].speculative = FALSE;
+                       user_page_list[entry].cs_validated = FALSE;
+                       user_page_list[entry].cs_tainted = FALSE;
+                       user_page_list[entry].cs_nx     = FALSE;
+                       user_page_list[entry].needed    = FALSE;
+                       user_page_list[entry].mark      = FALSE;
+               }
+               if (delayed_unlock++ > 256) {
+                       delayed_unlock = 0;
+                       lck_mtx_yield(&vm_page_queue_lock);
+
+                       VM_CHECK_MEMORYSTATUS;
+               }
+               dst_page = (vm_page_t)queue_next(&dst_page->listq);
+       }
+done:
+       vm_page_unlock_queues();
+
+       VM_CHECK_MEMORYSTATUS;
+
+       return (retval);
+}
+
+
+kern_return_t
+vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
+                            wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_object_offset_t *dst_offset, int page_count)
+{
+       vm_page_t       dst_page;
+       vm_tag_t        tag;
+       boolean_t       no_zero_fill = FALSE;
+       int             interruptible;
+       int             pages_wired = 0;
+       int             pages_inserted = 0;
+       int             entry = 0;
+       uint64_t        delayed_ledger_update = 0;
+       kern_return_t   ret = KERN_SUCCESS;
+
+       vm_object_lock_assert_exclusive(object);
+       assert(object->purgable != VM_PURGABLE_VOLATILE);
+       assert(object->purgable != VM_PURGABLE_EMPTY);
+       assert(object->pager == NULL);
+       assert(object->copy == NULL);
+       assert(object->shadow == NULL);
+
+       if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
+               interruptible = THREAD_ABORTSAFE;
+       else
+               interruptible = THREAD_UNINT;
+
+       if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
+               no_zero_fill = TRUE;
+
+       tag = UPL_MEMORY_TAG(cntrl_flags);
+
+       while (page_count--) {
+                       
+               while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
+
+                       OSAddAtomic(page_count, &vm_upl_wait_for_pages);
+
+                       VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
+
+                       if (vm_page_wait(interruptible) == FALSE) {
+                               /*
+                                * interrupted case
+                                */
+                               OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
+
+                               VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
+                               
+                               ret = MACH_SEND_INTERRUPTED;
+                               goto done;
+                       }
+                       OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
+
+                       VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
+               }
+               if (no_zero_fill == FALSE)
+                       vm_page_zero_fill(dst_page);
+               else
+                       dst_page->absent = TRUE;
+
+               dst_page->reference = TRUE;
+
+               if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
+                       SET_PAGE_DIRTY(dst_page, FALSE);        
+               }
+               if (dst_page->absent == FALSE) {
+                       dst_page->wire_count++;
+                       pages_wired++;
+                       PAGE_WAKEUP_DONE(dst_page);
+               }
+               pages_inserted++;
+
+               vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
+
+               lite_list[entry>>5] |= 1 << (entry & 31);
+               
+               if (dst_page->phys_page > upl->highest_page)
+                       upl->highest_page = dst_page->phys_page;
+
+               if (user_page_list) {
+                       user_page_list[entry].phys_addr = dst_page->phys_page;
+                       user_page_list[entry].absent    = dst_page->absent;
+                       user_page_list[entry].dirty     = dst_page->dirty;
+                       user_page_list[entry].pageout   = FALSE;
+                       user_page_list[entry].precious  = FALSE;
+                       user_page_list[entry].device    = FALSE;
+                       user_page_list[entry].speculative = FALSE;
+                       user_page_list[entry].cs_validated = FALSE;
+                       user_page_list[entry].cs_tainted = FALSE;
+                       user_page_list[entry].cs_nx     = FALSE;
+                       user_page_list[entry].needed    = FALSE;
+                       user_page_list[entry].mark      = FALSE;
+               }
+               entry++;
+               *dst_offset += PAGE_SIZE_64;
+       }
+done:
+       if (pages_wired) {
+               vm_page_lockspin_queues();
+               vm_page_wire_count += pages_wired;
+               vm_page_unlock_queues();
+       }
+       if (pages_inserted) {
+               if (object->internal) {
+                       OSAddAtomic(pages_inserted, &vm_page_internal_count);
+               } else {
+                       OSAddAtomic(pages_inserted, &vm_page_external_count);
+               }
+       }
+       if (delayed_ledger_update) {
+               task_t          owner;
+
+               owner = object->vo_purgeable_owner;
+               assert(owner);
+
+               /* more non-volatile bytes */
+               ledger_credit(owner->ledger,
+                             task_ledgers.purgeable_nonvolatile,
+                             delayed_ledger_update);
+               /* more footprint */
+               ledger_credit(owner->ledger,
+                             task_ledgers.phys_footprint,
+                             delayed_ledger_update);
+       }
+       return (ret);
+}
+
+
 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
 
+
 kern_return_t
 vm_object_iopl_request(
        vm_object_t             object,
@@ -7373,7 +8068,7 @@ vm_object_iopl_request(
        upl_t                   *upl_ptr,
        upl_page_info_array_t   user_page_list,
        unsigned int            *page_list_count,
-       int                     cntrl_flags)
+       upl_control_flags_t     cntrl_flags)
 {
        vm_page_t               dst_page;
        vm_object_offset_t      dst_offset;
@@ -7398,8 +8093,8 @@ vm_object_iopl_request(
 
        boolean_t               set_cache_attr_needed = FALSE;
        boolean_t               free_wired_pages = FALSE;
-       int                     fast_path_possible = 0;
-       
+       boolean_t               fast_path_empty_req = FALSE;
+       boolean_t               fast_path_full_req = FALSE;
 
        if (cntrl_flags & ~UPL_VALID_FLAGS) {
                /*
@@ -7514,20 +8209,6 @@ vm_object_iopl_request(
                upl->flags |= UPL_ACCESS_BLOCKED;
        }
 
-       if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
-           object->purgable != VM_PURGABLE_VOLATILE &&
-           object->purgable != VM_PURGABLE_EMPTY &&
-           object->copy == NULL &&
-           size == object->vo_size &&
-           offset == 0 &&
-           object->resident_page_count == 0 &&
-           object->shadow == NULL &&
-           object->pager == NULL)
-       {
-               fast_path_possible = 1;
-               set_cache_attr_needed = TRUE;
-       }
-
 #if CONFIG_IOSCHED || UPL_DEBUG
        if (upl->flags & UPL_TRACKED_BY_OBJECT) {
                vm_object_activity_begin(object);
@@ -7622,6 +8303,30 @@ vm_object_iopl_request(
                iopl_cow_pages += size >> PAGE_SHIFT;
 #endif
        }
+       if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
+           object->purgable != VM_PURGABLE_VOLATILE &&
+           object->purgable != VM_PURGABLE_EMPTY &&
+           object->copy == NULL &&
+           size == object->vo_size &&
+           offset == 0 &&
+           object->shadow == NULL &&
+           object->pager == NULL)
+       {
+               if (object->resident_page_count == size_in_pages)
+               {
+                       assert(object != compressor_object);
+                       assert(object != kernel_object);
+                       fast_path_full_req = TRUE;
+               }
+               else if (object->resident_page_count == 0)
+               {
+                       assert(object != compressor_object);
+                       assert(object != kernel_object);
+                       fast_path_empty_req = TRUE;
+                       set_cache_attr_needed = TRUE;
+               }
+       }
+
        if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
                interruptible = THREAD_ABORTSAFE;
        else
@@ -7633,85 +8338,26 @@ vm_object_iopl_request(
        dst_offset = offset;
        dw_count = 0;
 
-       if (fast_path_possible) {
-               int     wired_count = 0;
-
-               while (xfer_size) {
-                       
-                       while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) {
-                               OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
-
-                               VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
-
-                               if (vm_page_wait(interruptible) == FALSE) {
-                                       /*
-                                        * interrupted case
-                                        */
-                                       OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
-
-                                       VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
-
-                                       if (wired_count) {
-                                               vm_page_lockspin_queues();
-                                               vm_page_wire_count += wired_count;
-                                               vm_page_unlock_queues();
-
-                                               free_wired_pages = TRUE;
-                                       }
-                                       ret = MACH_SEND_INTERRUPTED;
-
-                                       goto return_err;
-                               }
-                               OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
-
-                               VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
-                       }
-                       if (no_zero_fill == FALSE)
-                               vm_page_zero_fill(dst_page);
-                       else
-                               dst_page->absent = TRUE;
+       if (fast_path_full_req) {
 
-                       dst_page->reference = TRUE;
+               if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags) == TRUE)
+                       goto finish;
+               /*
+                * we couldn't complete the processing of this request on the fast path
+                * so fall through to the slow path and finish up
+                */
 
-                       if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
-                               SET_PAGE_DIRTY(dst_page, FALSE);        
-                       }
-                       if (dst_page->absent == FALSE) {
-                               assert(object->purgable != VM_PURGABLE_VOLATILE);
-                               assert(object->purgable != VM_PURGABLE_EMPTY);
-                               dst_page->wire_count++;
-                               wired_count++;
+       } else if (fast_path_empty_req) {
 
-                               PAGE_WAKEUP_DONE(dst_page);
-                       }
-                       vm_page_insert_internal(dst_page, object, dst_offset, FALSE, TRUE, TRUE);
-                       
-                       lite_list[entry>>5] |= 1 << (entry & 31);
-               
-                       if (dst_page->phys_page > upl->highest_page)
-                               upl->highest_page = dst_page->phys_page;
-
-                       if (user_page_list) {
-                               user_page_list[entry].phys_addr = dst_page->phys_page;
-                               user_page_list[entry].absent    = dst_page->absent;
-                               user_page_list[entry].dirty     = dst_page->dirty;
-                               user_page_list[entry].precious  = FALSE;
-                               user_page_list[entry].pageout   = FALSE;
-                               user_page_list[entry].device    = FALSE;
-                               user_page_list[entry].needed    = FALSE;
-                               user_page_list[entry].speculative = FALSE;
-                               user_page_list[entry].cs_validated = FALSE;
-                               user_page_list[entry].cs_tainted = FALSE;
-                       }
-                       entry++;
-                       dst_offset += PAGE_SIZE_64;
-                       xfer_size -= PAGE_SIZE;
-                       size_in_pages--;
+               if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
+                       ret = KERN_MEMORY_ERROR;
+                       goto return_err;
                }
-               if (wired_count) {
-                       vm_page_lockspin_queues();
-                       vm_page_wire_count += wired_count;
-                       vm_page_unlock_queues();
+               ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, &dst_offset, size_in_pages);
+               
+               if (ret) {
+                       free_wired_pages = TRUE;
+                       goto return_err;
                }
                goto finish;
        }
@@ -7733,10 +8379,21 @@ vm_object_iopl_request(
 
        while (xfer_size) {
                vm_fault_return_t       result;
-               unsigned int            pg_num;
 
                dwp->dw_mask = 0;
 
+               if (fast_path_full_req) {
+                       /*
+                        * if we get here, it means that we ran into a page
+                        * state we couldn't handle in the fast path and
+                        * bailed out to the slow path... since the order
+                        * we look at pages is different between the 2 paths,
+                        * the following check is needed to determine whether
+                        * this page was already processed in the fast path
+                        */
+                       if (lite_list[entry>>5] & (1 << (entry & 31)))
+                               goto skip_page;
+               }
                dst_page = vm_page_lookup(object, dst_offset);
 
                /*
@@ -7837,19 +8494,19 @@ vm_object_iopl_request(
                                break;
 
                        case VM_FAULT_MEMORY_SHORTAGE:
-                               OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
+                               OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
 
                                VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
 
                                if (vm_page_wait(interruptible)) {
-                                       OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
+                                       OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
 
                                        VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
                                        vm_object_lock(object);
 
                                        break;
                                }
-                               OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
+                               OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
 
                                VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
 
@@ -7995,9 +8652,7 @@ record_phys_addr:
                if (dst_page->busy)
                        upl->flags |= UPL_HAS_BUSY;
 
-               pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
-               assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
-               lite_list[pg_num>>5] |= 1 << (pg_num & 31);
+               lite_list[entry>>5] |= 1 << (entry & 31);
 
                if (dst_page->phys_page > upl->highest_page)
                        upl->highest_page = dst_page->phys_page;
@@ -8017,6 +8672,7 @@ record_phys_addr:
                        user_page_list[entry].cs_validated = dst_page->cs_validated;
                        user_page_list[entry].cs_tainted = dst_page->cs_tainted;
                        user_page_list[entry].cs_nx = dst_page->cs_nx;
+                       user_page_list[entry].mark      = FALSE;
                }
                if (object != kernel_object && object != compressor_object) {
                        /*
@@ -8027,34 +8683,35 @@ record_phys_addr:
                        if (dst_page->clustered)
                                VM_PAGE_CONSUME_CLUSTERED(dst_page);
                }
+skip_page:
                entry++;
                dst_offset += PAGE_SIZE_64;
                xfer_size -= PAGE_SIZE;
-               size_in_pages--;
 
                if (dwp->dw_mask) {
                        VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
 
                        if (dw_count >= dw_limit) {
-                               vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+                               vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
                                
                                dwp = &dw_array[0];
                                dw_count = 0;
                        }
                }
        }
-       if (dw_count)
-               vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+       assert(entry == size_in_pages);
 
+       if (dw_count)
+               vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
 finish:
        if (user_page_list && set_cache_attr_needed == TRUE)
-               vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE);
+               vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
 
        if (page_list_count != NULL) {
                if (upl->flags & UPL_INTERNAL)
                        *page_list_count = 0;
-               else if (*page_list_count > entry)
-                       *page_list_count = entry;
+               else if (*page_list_count > size_in_pages)
+                       *page_list_count = size_in_pages;
        }
        vm_object_unlock(object);
 
@@ -8070,6 +8727,7 @@ finish:
                assert(!object->blocked_access);
                object->blocked_access = TRUE;
        }
+
        return KERN_SUCCESS;
 
 return_err:
@@ -8330,8 +8988,8 @@ vm_paging_map_init(void)
        if (kr != KERN_SUCCESS) {
                panic("vm_paging_map_init: kernel_map full\n");
        }
-       map_entry->object.vm_object = kernel_object;
-       map_entry->offset = page_map_offset;
+       VME_OBJECT_SET(map_entry, kernel_object);
+       VME_OFFSET_SET(map_entry, page_map_offset);
        map_entry->protection = VM_PROT_NONE;
        map_entry->max_protection = VM_PROT_NONE;
        map_entry->permanent = TRUE;
@@ -9607,7 +10265,8 @@ vm_page_slide(
        vm_map_size_t           kernel_mapping_size;
        boolean_t               kernel_mapping_needs_unmap;
        vm_offset_t             kernel_vaddr;
-       uint32_t                pageIndex = 0;
+       uint32_t                pageIndex;
+       uint32_t                slide_chunk;
 
        assert(!page->slid);
        assert(page->object->object_slid);
@@ -9659,19 +10318,22 @@ vm_page_slide(
        assert(!page->slid);
        assert(page->object->object_slid);
 
-       /* on some platforms this is an extern int, on others it's a cpp macro */
-       __unreachable_ok_push
-        /* TODO: Consider this */
-       if (!TEST_PAGE_SIZE_4K) {
-               for (int i = 0; i < 4; i++) {
-                       pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/0x1000);
-                       kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr + (0x1000*i), pageIndex + i);
+#define PAGE_SIZE_FOR_SR_SLIDE 4096
+       pageIndex = (uint32_t)((page->offset -
+                               page->object->vo_slide_info->start) /
+                              PAGE_SIZE_FOR_SR_SLIDE);
+       for (slide_chunk = 0;
+            slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
+            slide_chunk++) {
+               kr = vm_shared_region_slide_page(page->object->vo_slide_info,
+                                                (kernel_vaddr +
+                                                 (slide_chunk *
+                                                  PAGE_SIZE_FOR_SR_SLIDE)),
+                                                (pageIndex + slide_chunk));
+               if (kr != KERN_SUCCESS) {
+                       break;
                }
-       } else {
-               pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE);
-               kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex);
        }
-       __unreachable_ok_pop
 
        vm_page_slide_counter++;
 
@@ -9749,6 +10411,16 @@ ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
        return(UPL_PHYS_PAGE(upl, index));
 }
 
+void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
+{
+       upl[index].mark = v;
+}
+
+boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
+{
+       return upl[index].mark;
+}
+
 void
 vm_countdirtypages(void)
 {
@@ -9848,6 +10520,16 @@ upl_size_t upl_get_size(
         return upl->size;
 }
 
+upl_t upl_associated_upl(upl_t upl)
+{
+       return upl->associated_upl;
+}
+
+void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
+{
+       upl->associated_upl = associated_upl;
+}
+
 #if UPL_DEBUG
 kern_return_t  upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
 {
index cb148c9df7962519e82031ae0bb2a0c93021fa59..35ab0b34371ff383de2e9a478b0fff1e0b595891 100644 (file)
@@ -94,7 +94,7 @@ extern unsigned int vm_pageout_cleaned_reactivated, vm_pageout_cleaned_fault_rea
 
 #if CONFIG_FREEZE
 extern boolean_t memorystatus_freeze_enabled;
-#define VM_DYNAMIC_PAGING_ENABLED(port) (COMPRESSED_PAGER_IS_ACTIVE || (memorystatus_freeze_enabled == FALSE && IP_VALID(port)))
+#define VM_DYNAMIC_PAGING_ENABLED(port) (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED || (memorystatus_freeze_enabled == FALSE && IP_VALID(port)))
 #else
 #define VM_DYNAMIC_PAGING_ENABLED(port) (COMPRESSED_PAGER_IS_ACTIVE || IP_VALID(port))
 #endif
@@ -138,6 +138,11 @@ extern int vm_debug_events;
        }                                                       \
        MACRO_END
 
+#define VM_DEBUG_CONSTANT_EVENT(name, event, control, arg1, arg2, arg3, arg4)  \
+       MACRO_BEGIN                                             \
+               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, event)) | control, arg1, arg2, arg3, arg4, 0); \
+       MACRO_END
+
 extern void memoryshot(unsigned int event, unsigned int control);
 
 extern kern_return_t vm_map_create_upl(
@@ -147,7 +152,7 @@ extern kern_return_t vm_map_create_upl(
        upl_t                   *upl,
        upl_page_info_array_t   page_list,
        unsigned int            *count,
-       int                     *flags);
+       upl_control_flags_t     *flags);
 
 extern ppnum_t upl_get_highest_page(
        upl_t                   upl);
@@ -155,6 +160,9 @@ extern ppnum_t upl_get_highest_page(
 extern upl_size_t upl_get_size(
        upl_t                   upl);
 
+extern upl_t upl_associated_upl(upl_t upl);
+extern void upl_set_associated_upl(upl_t upl, upl_t associated_upl);
+
 extern void iopl_valid_data(
        upl_t                   upl_ptr);
 
@@ -222,19 +230,15 @@ extern kern_return_t      vm_pageout_internal_start(void);
 extern void            vm_pageout_object_terminate(
                                        vm_object_t     object);
 
-extern void            vm_pageout_cluster(
+extern int             vm_pageout_cluster(
                                        vm_page_t       m,
-                                       boolean_t       pageout);
+                                       boolean_t       pageout,
+                                       boolean_t       immediate_ok,
+                                       boolean_t       keep_object_locked);
 
 extern void            vm_pageout_initialize_page(
                                        vm_page_t       m);
 
-extern void            vm_pageclean_setup(
-                                       vm_page_t               m,
-                                       vm_page_t               new_m,
-                                       vm_object_t             new_object,
-                                       vm_object_offset_t      new_offset);
-
 /* UPL exported routines and structures */
 
 #define upl_lock_init(object)  lck_mtx_init(&(object)->Lock, &vm_object_lck_grp, &vm_object_lck_attr)
@@ -294,6 +298,7 @@ struct upl {
        vm_object_t     map_object;
        ppnum_t         highest_page;
        void*           vector_upl;
+       upl_t           associated_upl;
 #if CONFIG_IOSCHED
        int             upl_priority;
        uint64_t        *upl_reprio_info;
@@ -371,7 +376,7 @@ extern kern_return_t vm_object_iopl_request(
        upl_t                   *upl_ptr,
        upl_page_info_array_t   user_page_list,
        unsigned int            *page_list_count,
-       int                     cntrl_flags);
+       upl_control_flags_t     cntrl_flags);
 
 extern kern_return_t vm_object_super_upl_request(
        vm_object_t             object,
@@ -381,7 +386,7 @@ extern kern_return_t vm_object_super_upl_request(
        upl_t                   *upl,
        upl_page_info_t         *user_page_list,
        unsigned int            *page_list_count,
-       int                     cntrl_flags);
+       upl_control_flags_t     cntrl_flags);
 
 /* should be just a regular vm_map_enter() */
 extern kern_return_t vm_map_enter_upl(
@@ -500,8 +505,16 @@ extern int hibernate_flush_memory(void);
 extern void hibernate_reset_stats(void);
 extern void hibernate_create_paddr_map(void);
 
+extern void vm_set_restrictions(void);
+
 extern int vm_compressor_mode;
 extern int vm_compressor_thread_count;
+extern boolean_t vm_restricted_to_single_processor;
+extern boolean_t vm_compressor_immediate_preferred;
+extern boolean_t vm_compressor_immediate_preferred_override;
+extern kern_return_t vm_pageout_compress_page(void **, char *, vm_page_t, boolean_t);
+extern void vm_pageout_anonymous_pages(void);
+
 
 #define VM_PAGER_DEFAULT                               0x1     /* Use default pager. */
 #define VM_PAGER_COMPRESSOR_NO_SWAP                    0x2     /* In-core compressor only. */
@@ -515,6 +528,8 @@ extern int vm_compressor_thread_count;
 #define DEFAULT_PAGER_IS_ACTIVE                ((vm_compressor_mode & VM_PAGER_DEFAULT) == VM_PAGER_DEFAULT)
 
 #define COMPRESSED_PAGER_IS_ACTIVE     (vm_compressor_mode & (VM_PAGER_COMPRESSOR_NO_SWAP | VM_PAGER_COMPRESSOR_WITH_SWAP))
+#define COMPRESSED_PAGER_IS_SWAPLESS   ((vm_compressor_mode & VM_PAGER_COMPRESSOR_NO_SWAP) == VM_PAGER_COMPRESSOR_NO_SWAP)
+#define COMPRESSED_PAGER_IS_SWAPBACKED ((vm_compressor_mode & VM_PAGER_COMPRESSOR_WITH_SWAP) == VM_PAGER_COMPRESSOR_WITH_SWAP)
 
 #define DEFAULT_FREEZER_IS_ACTIVE      ((vm_compressor_mode & VM_PAGER_FREEZER_DEFAULT) == VM_PAGER_FREEZER_DEFAULT)
 
index cd86dfc8ce5df1b291d8c7a041b6e28df00de0c4..9f2a3232fe8b382cb80801466d516b63e11a2bba 100644 (file)
@@ -110,11 +110,11 @@ vm_phantom_cache_init()
        vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
        vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
 
-       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT) != KERN_SUCCESS)
+       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS)
                panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
        bzero(vm_phantom_cache, vm_phantom_cache_size);
 
-       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT) != KERN_SUCCESS)
+       if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, 0, KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS)
                panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
        bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size);
 
index 82704811e8da3fd38fe7af9ecf3a7d7e14d9ea64..71d58704b27d8d92b4a5b3c4d4032c0bda680af0 100644 (file)
@@ -79,6 +79,10 @@ extern task_t port_name_to_task(
 extern ipc_space_t  get_task_ipcspace(
        task_t t);
 
+#if CONFIG_JETSAM
+extern int max_task_footprint_mb;      /* Per-task limit on physical memory consumption in megabytes */
+#endif // CONFIG_JETSAM
+
 /* Some loose-ends VM stuff */
 
 extern vm_map_t                kalloc_map;
@@ -86,13 +90,12 @@ extern vm_size_t    msg_ool_size_small;
 extern vm_map_t                zone_map;
 
 extern void consider_machine_adjust(void);
-extern pmap_t get_map_pmap(vm_map_t);
 extern vm_map_offset_t get_map_min(vm_map_t);
 extern vm_map_offset_t get_map_max(vm_map_t);
 extern vm_map_size_t get_vmmap_size(vm_map_t);
 extern int get_vmmap_entries(vm_map_t);
 
-int vm_map_page_mask(vm_map_t);
+extern vm_map_offset_t vm_map_page_mask(vm_map_t);
 
 extern boolean_t coredumpok(vm_map_t map, vm_offset_t va);
 
@@ -129,15 +132,22 @@ extern mach_vm_offset_t mach_get_vm_start(vm_map_t);
 extern mach_vm_offset_t mach_get_vm_end(vm_map_t);
 
 #if CONFIG_CODE_DECRYPTION
+#define VM_MAP_DEBUG_APPLE_PROTECT     MACH_ASSERT
 struct pager_crypt_info;
 extern kern_return_t vm_map_apple_protected(
-                                           vm_map_t    map,
-                                           vm_map_offset_t     start,
-                                           vm_map_offset_t     end,
-                                           struct pager_crypt_info *crypt_info);
+       vm_map_t                map,
+       vm_map_offset_t         start,
+       vm_map_offset_t         end,
+       vm_object_offset_t      crypto_backing_offset,
+       struct pager_crypt_info *crypt_info);
 extern void apple_protect_pager_bootstrap(void);
-extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object,
-                                                struct pager_crypt_info *crypt_info);
+extern memory_object_t apple_protect_pager_setup(
+       vm_object_t             backing_object,
+       vm_object_offset_t      backing_offset,
+       vm_object_offset_t      crypto_backing_offset,
+       struct pager_crypt_info *crypt_info,
+       vm_object_offset_t      crypto_start,
+       vm_object_offset_t      crypto_end);
 #endif /* CONFIG_CODE_DECRYPTION */
 
 struct vnode;
@@ -145,6 +155,12 @@ extern void swapfile_pager_bootstrap(void);
 extern memory_object_t swapfile_pager_setup(struct vnode *vp);
 extern memory_object_control_t swapfile_pager_control(memory_object_t mem_obj);
 
+#if __arm64__ || ((__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS))
+#define SIXTEENK_PAGE_SIZE     0x4000
+#define SIXTEENK_PAGE_MASK     0x3FFF
+#define SIXTEENK_PAGE_SHIFT    14
+#endif /* __arm64__ || ((__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS)) */
+
 
 /*
  * bsd
@@ -155,6 +171,7 @@ extern void *upl_get_internal_page_list(
        upl_t upl);
 
 extern void vnode_setswapmount(struct vnode *);
+extern int64_t vnode_getswappin_avail(struct vnode *);
 
 typedef int pager_return_t;
 extern pager_return_t  vnode_pagein(
@@ -524,6 +541,9 @@ extern kern_return_t compressor_memory_object_create(
        memory_object_size_t,
        memory_object_t *);
 
+extern boolean_t vm_compressor_low_on_space(void);
+extern int      vm_swap_low_on_space(void);
+
 #if CONFIG_JETSAM
 extern int proc_get_memstat_priority(struct proc*, boolean_t);
 #endif /* CONFIG_JETSAM */
index d6229881ba2a45e6e07b7d4b58a826b4821b7f62..6df155404f39b5f9cb57ded1d6bbfc1ce59c4735 100644 (file)
@@ -197,11 +197,11 @@ find_available_token:
                                                      (vm_offset_t) tokens,
                                                      token_q_cur_size,
                                                      (vm_offset_t *) &new_loc,
-                                                     alloc_size);
+                                                     alloc_size, VM_KERN_MEMORY_OSFMK);
                        } else {
                                result = kmem_alloc(kernel_map,
                                                    (vm_offset_t *) &new_loc,
-                                                   alloc_size);
+                                                   alloc_size, VM_KERN_MEMORY_OSFMK);
                        }
                }
                
@@ -1144,7 +1144,99 @@ vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task)
        lck_mtx_unlock(&vm_purgeable_queue_lock);
        return;
 }
-       
+
+#if DEVELOPMENT || DEBUG
+static void
+vm_purgeable_account_volatile_queue(
+       purgeable_q_t queue,
+       int group,
+       task_t task,
+       pvm_account_info_t acnt_info)
+{
+       vm_object_t object;
+       uint64_t compressed_count;
+
+       for (object = (vm_object_t) queue_first(&queue->objq[group]);
+           !queue_end(&queue->objq[group], (queue_entry_t) object);
+           object = (vm_object_t) queue_next(&object->objq)) {
+               if (object->vo_purgeable_owner == task) {
+                       compressed_count = vm_compressor_pager_get_count(object->pager);
+                       acnt_info->pvm_volatile_compressed_count += compressed_count;
+                       acnt_info->pvm_volatile_count += (object->resident_page_count - object->wired_page_count);
+                       acnt_info->pvm_nonvolatile_count += object->wired_page_count;
+               }
+       }
+
+}
+
+/*
+ * Walks the purgeable object queues and calculates the usage
+ * associated with the objects for the given task.
+ */
+kern_return_t
+vm_purgeable_account(
+       task_t                  task,
+       pvm_account_info_t      acnt_info)
+{
+       queue_head_t    *nonvolatile_q;
+       vm_object_t     object;
+       int             group;
+       int             state;
+       uint64_t        compressed_count;
+       purgeable_q_t   volatile_q;
+
+
+       if ((task == NULL) || (acnt_info == NULL)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       acnt_info->pvm_volatile_count = 0;
+       acnt_info->pvm_volatile_compressed_count = 0;
+       acnt_info->pvm_nonvolatile_count = 0;
+       acnt_info->pvm_nonvolatile_compressed_count = 0;
+
+       lck_mtx_lock(&vm_purgeable_queue_lock);
+
+       nonvolatile_q = &purgeable_nonvolatile_queue;
+       for (object = (vm_object_t) queue_first(nonvolatile_q);
+            !queue_end(nonvolatile_q, (queue_entry_t) object);
+            object = (vm_object_t) queue_next(&object->objq)) {
+               if (object->vo_purgeable_owner == task) {
+                       state = object->purgable;
+                       compressed_count =  vm_compressor_pager_get_count(object->pager);
+                       if (state == VM_PURGABLE_EMPTY) {
+                               acnt_info->pvm_volatile_count += (object->resident_page_count - object->wired_page_count);
+                               acnt_info->pvm_volatile_compressed_count += compressed_count;
+                       } else {
+                               acnt_info->pvm_nonvolatile_count += (object->resident_page_count - object->wired_page_count);
+                               acnt_info->pvm_nonvolatile_compressed_count += compressed_count;
+                       }
+                       acnt_info->pvm_nonvolatile_count += object->wired_page_count;
+               }
+       }
+
+       volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
+       vm_purgeable_account_volatile_queue(volatile_q, 0, task, acnt_info);
+
+       volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+       for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
+               vm_purgeable_account_volatile_queue(volatile_q, group, task, acnt_info);
+       }
+
+       volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
+       for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
+               vm_purgeable_account_volatile_queue(volatile_q, group, task, acnt_info);
+       }
+       lck_mtx_unlock(&vm_purgeable_queue_lock);
+
+       acnt_info->pvm_volatile_count = (acnt_info->pvm_volatile_count * PAGE_SIZE);
+       acnt_info->pvm_volatile_compressed_count = (acnt_info->pvm_volatile_compressed_count * PAGE_SIZE);
+       acnt_info->pvm_nonvolatile_count = (acnt_info->pvm_nonvolatile_count * PAGE_SIZE);
+       acnt_info->pvm_nonvolatile_compressed_count = (acnt_info->pvm_nonvolatile_compressed_count * PAGE_SIZE);
+
+       return KERN_SUCCESS;
+}
+#endif /* DEVELOPMENT || DEBUG */
 
 static void
 vm_purgeable_volatile_queue_disown(
index 498566b04fbb54000358f053a68d1388741b49da..c958f6b0e059da97a1ae0d59164e43942a750c7c 100644 (file)
@@ -114,6 +114,11 @@ purgeable_q_t vm_purgeable_object_remove(vm_object_t object);
 /* statistics for purgable objects in all queues */
 void vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task);
 
+#if DEVELOPMENT || DEBUG
+/* statistics for purgeable object usage in all queues for a task */
+kern_return_t vm_purgeable_account(task_t task, pvm_account_info_t acnt_info);
+#endif /* DEVELOPMENT || DEBUG */
+
 int vm_purgeable_purge_task_owned(task_t task);
 void vm_purgeable_nonvolatile_enqueue(vm_object_t object, task_t task);
 void vm_purgeable_nonvolatile_dequeue(vm_object_t object);
index bd207bfdd1764b61603652ffa68d2486831ed2b8..0f1c6c9905103f683be5dec8115248b0df99fda0 100644 (file)
@@ -64,6 +64,7 @@
 
 #include <debug.h>
 #include <libkern/OSAtomic.h>
+#include <libkern/OSDebug.h>
 
 #include <mach/clock_types.h>
 #include <mach/vm_prot.h>
@@ -85,6 +86,7 @@
 #include <vm/vm_kern.h>                        /* kernel_memory_allocate() */
 #include <kern/misc_protos.h>
 #include <zone_debug.h>
+#include <mach_debug/zone_info.h>
 #include <vm/cpm.h>
 #include <pexpert/pexpert.h>
 
@@ -124,8 +126,9 @@ __private_extern__ void             vm_page_init_lck_grp(void);
 static void            vm_page_free_prepare(vm_page_t  page);
 static vm_page_t       vm_page_grab_fictitious_common(ppnum_t phys_addr);
 
+static void vm_tag_init(void);
 
-
+uint64_t       vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
 
 /*
  *     Associated with page of user-allocatable memory is a
@@ -169,6 +172,8 @@ uint32_t    vm_page_bucket_hash;            /* Basic bucket hash */
 unsigned int   vm_page_bucket_lock_count = 0;          /* How big is array of locks? */
 
 lck_spin_t     *vm_page_bucket_locks;
+lck_spin_t     vm_objects_wired_lock;
+lck_spin_t     vm_allocation_sites_lock;
 
 #if VM_PAGE_BUCKETS_CHECK
 boolean_t vm_page_buckets_check_ready = FALSE;
@@ -178,6 +183,9 @@ vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
 #endif /* VM_PAGE_FAKE_BUCKETS */
 #endif /* VM_PAGE_BUCKETS_CHECK */
 
+extern int not_in_kdp;
+
+
 #if    MACH_PAGE_HASH_STATS
 /* This routine is only for debug.  It is intended to be called by
  * hand by a developer using a kernel debugger.  This routine prints
@@ -314,13 +322,18 @@ queue_head_t      vm_page_queue_inactive;
 queue_head_t   vm_page_queue_anonymous;        /* inactive memory queue for anonymous pages */
 queue_head_t   vm_page_queue_throttled;
 
+queue_head_t   vm_objects_wired;
+
 unsigned int   vm_page_active_count;
 unsigned int   vm_page_inactive_count;
 unsigned int   vm_page_anonymous_count;
 unsigned int   vm_page_throttled_count;
 unsigned int   vm_page_speculative_count;
+
 unsigned int   vm_page_wire_count;
+unsigned int   vm_page_stolen_count;
 unsigned int   vm_page_wire_count_initial;
+unsigned int   vm_page_pages_initial;
 unsigned int   vm_page_gobble_count = 0;
 
 #define        VM_PAGE_WIRE_COUNT_WARNING      0
@@ -616,6 +629,7 @@ vm_page_bootstrap(
        queue_init(&vm_page_queue_cleaned);
        queue_init(&vm_page_queue_throttled);
        queue_init(&vm_page_queue_anonymous);
+       queue_init(&vm_objects_wired);
 
        for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
                queue_init(&vm_page_queue_speculative[i].age_q);
@@ -632,9 +646,9 @@ vm_page_bootstrap(
        /*
         *      Steal memory for the map and zone subsystems.
         */
-       kernel_debug_string("zone_steal_memory");
+       kernel_debug_string_simple("zone_steal_memory");
        zone_steal_memory();
-       kernel_debug_string("vm_map_steal_memory");
+       kernel_debug_string_simple("vm_map_steal_memory");
        vm_map_steal_memory();
 
        /*
@@ -704,12 +718,12 @@ vm_page_bootstrap(
 #endif /* VM_PAGE_FAKE_BUCKETS */
 #endif /* VM_PAGE_BUCKETS_CHECK */
 
-       kernel_debug_string("vm_page_buckets");
+       kernel_debug_string_simple("vm_page_buckets");
        vm_page_buckets = (vm_page_bucket_t *)
                pmap_steal_memory(vm_page_bucket_count *
                                  sizeof(vm_page_bucket_t));
 
-       kernel_debug_string("vm_page_bucket_locks");
+       kernel_debug_string_simple("vm_page_bucket_locks");
        vm_page_bucket_locks = (lck_spin_t *)
                pmap_steal_memory(vm_page_bucket_lock_count *
                                  sizeof(lck_spin_t));
@@ -727,6 +741,10 @@ vm_page_bootstrap(
        for (i = 0; i < vm_page_bucket_lock_count; i++)
                lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
 
+       lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
+       lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
+       vm_tag_init();
+
 #if VM_PAGE_BUCKETS_CHECK
        vm_page_buckets_check_ready = TRUE;
 #endif /* VM_PAGE_BUCKETS_CHECK */
@@ -739,7 +757,7 @@ vm_page_bootstrap(
         *      to get the alignment right.
         */
 
-       kernel_debug_string("pmap_startup");
+       kernel_debug_string_simple("pmap_startup");
        pmap_startup(&virtual_space_start, &virtual_space_end);
        virtual_space_start = round_page(virtual_space_start);
        virtual_space_end = trunc_page(virtual_space_end);
@@ -757,11 +775,12 @@ vm_page_bootstrap(
        assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
        vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;     /* initial value */
        vm_page_wire_count_initial = vm_page_wire_count;
+       vm_page_pages_initial = vm_page_pages;
 
        printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
               vm_page_free_count, vm_page_wire_count);
 
-       kernel_debug_string("vm_page_bootstrap complete");
+       kernel_debug_string_simple("vm_page_bootstrap complete");
        simple_lock_init(&vm_paging_lock, 0);
 }
 
@@ -836,7 +855,7 @@ pmap_steal_memory(
                 * Account for newly stolen memory
                 */
                vm_page_wire_count++;
-
+               vm_page_stolen_count++;
        }
 
        return (void *) addr;
@@ -882,7 +901,7 @@ pmap_startup(
        /*
         *      Initialize the page frames.
         */
-       kernel_debug_string("Initialize the page frames");
+       kernel_debug_string_simple("Initialize the page frames");
        for (i = 0, pages_initialized = 0; i < npages; i++) {
                if (!pmap_next_page(&phys_page))
                        break;
@@ -903,7 +922,7 @@ pmap_startup(
        if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
                panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
 #endif
-       kernel_debug_string("page fill/release");
+       kernel_debug_string_simple("page fill/release");
        /*
         * Check if we want to initialize pages to a known value
         */
@@ -1008,6 +1027,7 @@ pmap_startup(
 void
 vm_page_module_init(void)
 {
+       uint64_t vm_page_zone_pages, vm_page_zone_data_size;
        vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
                             0, PAGE_SIZE, "vm pages");
 
@@ -1020,13 +1040,18 @@ vm_page_module_init(void)
        zone_change(vm_page_zone, Z_EXHAUST, TRUE);
        zone_change(vm_page_zone, Z_FOREIGN, TRUE);
        zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
-        /*
-         * Adjust zone statistics to account for the real pages allocated
-         * in vm_page_create(). [Q: is this really what we want?]
-         */
-        vm_page_zone->count += vm_page_pages;
-        vm_page_zone->sum_count += vm_page_pages;
-        vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
+       /*
+        * Adjust zone statistics to account for the real pages allocated
+        * in vm_page_create(). [Q: is this really what we want?]
+        */
+       vm_page_zone->count += vm_page_pages;
+       vm_page_zone->sum_count += vm_page_pages;
+       vm_page_zone_data_size = vm_page_pages * vm_page_zone->elem_size;
+       vm_page_zone->cur_size += vm_page_zone_data_size;
+       vm_page_zone_pages = ((round_page(vm_page_zone_data_size)) / PAGE_SIZE);
+       OSAddAtomic64(vm_page_zone_pages, &(vm_page_zone->page_count));
+       /* since zone accounts for these, take them out of stolen */
+       VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
 }
 
 /*
@@ -1087,7 +1112,17 @@ vm_page_insert(
        vm_object_t             object,
        vm_object_offset_t      offset)
 {
-       vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
+       vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
+}
+
+void
+vm_page_insert_wired(
+       vm_page_t               mem,
+       vm_object_t             object,
+       vm_object_offset_t      offset,
+       vm_tag_t                tag)
+{
+       vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
 }
 
 void
@@ -1095,9 +1130,12 @@ vm_page_insert_internal(
        vm_page_t               mem,
        vm_object_t             object,
        vm_object_offset_t      offset,
+       vm_tag_t                tag,
        boolean_t               queues_lock_held,
        boolean_t               insert_in_hash,
-       boolean_t               batch_pmap_op)
+       boolean_t               batch_pmap_op,
+        boolean_t               batch_accounting,
+       uint64_t                *delayed_ledger_update)
 {
        vm_page_bucket_t        *bucket;
        lck_spin_t              *bucket_lock;
@@ -1117,6 +1155,8 @@ vm_page_insert_internal(
 
        assert(page_aligned(offset));
 
+       assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
+
        /* the vm_submap_object is only a placeholder for submaps */
        assert(object != vm_submap_object);
 
@@ -1126,7 +1166,7 @@ vm_page_insert_internal(
                       queues_lock_held ? LCK_MTX_ASSERT_OWNED
                                        : LCK_MTX_ASSERT_NOTOWNED);
 #endif /* DEBUG */
-       
+
        if (insert_in_hash == TRUE) {
 #if DEBUG || VM_PAGE_CHECK_BUCKETS
                if (mem->tabled || mem->object != VM_OBJECT_NULL)
@@ -1138,7 +1178,14 @@ vm_page_insert_internal(
 
                /* only insert "pageout" pages into "pageout" objects,
                 * and normal pages into normal objects */
+#if 00
+               /*
+                * For some reason, this assertion gets tripped
+                * but it's mostly harmless, so let's disable it
+                * for now.
+                */
                assert(object->pageout == mem->pageout);
+#endif /* 00 */
 
                assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
                
@@ -1182,7 +1229,8 @@ vm_page_insert_internal(
        /*
         *      Now link into the object's list of backed pages.
         */
-       VM_PAGE_INSERT(mem, object);
+       queue_enter(&object->memq, mem, vm_page_t, listq);
+       object->memq_hint = mem;
        mem->tabled = TRUE;
 
        /*
@@ -1191,14 +1239,25 @@ vm_page_insert_internal(
 
        object->resident_page_count++;
        if (VM_PAGE_WIRED(mem)) {
-               object->wired_page_count++;
+           if (!mem->private && !mem->fictitious) 
+           {
+               if (!object->wired_page_count)
+               {
+                   assert(VM_KERN_MEMORY_NONE != tag);
+                   object->wire_tag = tag;
+                   VM_OBJECT_WIRED(object);
+               }
+           }
+           object->wired_page_count++;
        }
        assert(object->resident_page_count >= object->wired_page_count);
 
-       if (object->internal) {
-               OSAddAtomic(1, &vm_page_internal_count);
-       } else {
-               OSAddAtomic(1, &vm_page_external_count);
+        if (batch_accounting == FALSE) {
+               if (object->internal) {
+                       OSAddAtomic(1, &vm_page_internal_count);
+               } else {
+                       OSAddAtomic(1, &vm_page_external_count);
+               }
        }
 
        /*
@@ -1226,14 +1285,19 @@ vm_page_insert_internal(
        if (owner &&
            (object->purgable == VM_PURGABLE_NONVOLATILE ||
             VM_PAGE_WIRED(mem))) {
-               /* more non-volatile bytes */
-               ledger_credit(owner->ledger,
-                             task_ledgers.purgeable_nonvolatile,
-                             PAGE_SIZE);
-               /* more footprint */
-               ledger_credit(owner->ledger,
-                             task_ledgers.phys_footprint,
-                             PAGE_SIZE);
+
+               if (delayed_ledger_update)
+                       *delayed_ledger_update += PAGE_SIZE;
+               else {
+                       /* more non-volatile bytes */
+                       ledger_credit(owner->ledger,
+                                     task_ledgers.purgeable_nonvolatile,
+                                     PAGE_SIZE);
+                       /* more footprint */
+                       ledger_credit(owner->ledger,
+                                     task_ledgers.phys_footprint,
+                                     PAGE_SIZE);
+               }
 
        } else if (owner &&
                   (object->purgable == VM_PURGABLE_VOLATILE ||
@@ -1378,7 +1442,7 @@ vm_page_replace(
                 */
                vm_page_free_unlocked(found_m, FALSE);
        }
-       vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
+       vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
 }
 
 /*
@@ -1450,7 +1514,7 @@ vm_page_remove(
         *      Now remove from the object's list of backed pages.
         */
 
-       VM_PAGE_REMOVE(mem);
+       vm_page_remove_internal(mem);
 
        /*
         *      And show that the object has one fewer resident
@@ -1483,6 +1547,9 @@ vm_page_remove(
        if (VM_PAGE_WIRED(mem)) {
                assert(mem->object->wired_page_count > 0);
                mem->object->wired_page_count--;
+               if (!mem->object->wired_page_count) {
+                   VM_OBJECT_UNWIRED(mem->object);
+               }
        }
        assert(mem->object->resident_page_count >=
               mem->object->wired_page_count);
@@ -1551,13 +1618,55 @@ vm_page_remove(
  *     The object must be locked.  No side effects.
  */
 
-unsigned long vm_page_lookup_hint = 0;
-unsigned long vm_page_lookup_hint_next = 0;
-unsigned long vm_page_lookup_hint_prev = 0;
-unsigned long vm_page_lookup_hint_miss = 0;
-unsigned long vm_page_lookup_bucket_NULL = 0;
-unsigned long vm_page_lookup_miss = 0;
+#define        VM_PAGE_HASH_LOOKUP_THRESHOLD   10
+
+#if DEBUG_VM_PAGE_LOOKUP
 
+struct {
+       uint64_t        vpl_total;
+       uint64_t        vpl_empty_obj;
+       uint64_t        vpl_bucket_NULL;
+       uint64_t        vpl_hit_hint;
+       uint64_t        vpl_hit_hint_next;
+       uint64_t        vpl_hit_hint_prev;
+       uint64_t        vpl_fast;
+       uint64_t        vpl_slow;
+       uint64_t        vpl_hit;
+       uint64_t        vpl_miss;
+
+       uint64_t        vpl_fast_elapsed;
+       uint64_t        vpl_slow_elapsed;
+} vm_page_lookup_stats __attribute__((aligned(8)));
+
+#endif
+
+#define        KDP_VM_PAGE_WALK_MAX    1000
+
+vm_page_t
+kdp_vm_page_lookup(
+       vm_object_t             object,
+       vm_object_offset_t      offset)
+{
+       vm_page_t cur_page;
+       int num_traversed = 0;
+
+       if (not_in_kdp) {
+               panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
+       }
+
+       queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
+               if (cur_page->offset == offset) {
+                       return cur_page;
+               }
+               num_traversed++;
+
+               if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
+                       return VM_PAGE_NULL;
+               }
+       }
+
+       return VM_PAGE_NULL;
+}
 
 vm_page_t
 vm_page_lookup(
@@ -1567,18 +1676,32 @@ vm_page_lookup(
        vm_page_t       mem;
        vm_page_bucket_t *bucket;
        queue_entry_t   qe;
-       lck_spin_t      *bucket_lock;
+       lck_spin_t      *bucket_lock = NULL;
        int             hash_id;
+#if DEBUG_VM_PAGE_LOOKUP
+       uint64_t        start, elapsed;
 
+       OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
+#endif
        vm_object_lock_assert_held(object);
+
+       if (object->resident_page_count == 0) {
+#if DEBUG_VM_PAGE_LOOKUP
+               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
+#endif
+               return (VM_PAGE_NULL);
+       }
+
        mem = object->memq_hint;
 
        if (mem != VM_PAGE_NULL) {
                assert(mem->object == object);
 
                if (mem->offset == offset) {
-                       vm_page_lookup_hint++;
-                       return mem;
+#if DEBUG_VM_PAGE_LOOKUP
+                       OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
+#endif
+                       return (mem);
                }
                qe = queue_next(&mem->listq);
 
@@ -1589,9 +1712,11 @@ vm_page_lookup(
                        assert(next_page->object == object);
 
                        if (next_page->offset == offset) {
-                               vm_page_lookup_hint_next++;
                                object->memq_hint = next_page; /* new hint */
-                               return next_page;
+#if DEBUG_VM_PAGE_LOOKUP
+                               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
+#endif
+                               return (next_page);
                        }
                }
                qe = queue_prev(&mem->listq);
@@ -1603,9 +1728,11 @@ vm_page_lookup(
                        assert(prev_page->object == object);
 
                        if (prev_page->offset == offset) {
-                               vm_page_lookup_hint_prev++;
                                object->memq_hint = prev_page; /* new hint */
-                               return prev_page;
+#if DEBUG_VM_PAGE_LOOKUP
+                               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
+#endif
+                               return (prev_page);
                        }
                }
        }
@@ -1624,37 +1751,72 @@ vm_page_lookup(
         * really cheap optimiztion to avoid taking the lock
         */
        if (!bucket->page_list) {
-               vm_page_lookup_bucket_NULL++;
-
+#if DEBUG_VM_PAGE_LOOKUP
+               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
+#endif
                return (VM_PAGE_NULL);
        }
-       bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
-
-       lck_spin_lock(bucket_lock);
 
-       for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
-#if 0
+#if DEBUG_VM_PAGE_LOOKUP
+       start = mach_absolute_time();
+#endif
+       if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
                /*
-                * we don't hold the page queue lock
-                * so this check isn't safe to make
+                * on average, it's roughly 3 times faster to run a short memq list
+                * than to take the spin lock and go through the hash list
                 */
-               VM_PAGE_CHECK(mem);
+               mem = (vm_page_t)queue_first(&object->memq);
+
+               while (!queue_end(&object->memq, (queue_entry_t)mem)) {
+
+                       if (mem->offset == offset)
+                               break;
+
+                       mem = (vm_page_t)queue_next(&mem->listq);
+               }
+               if (queue_end(&object->memq, (queue_entry_t)mem))
+                       mem = NULL;
+       } else {
+
+               bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
+
+               lck_spin_lock(bucket_lock);
+
+               for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
+#if 0
+                       /*
+                        * we don't hold the page queue lock
+                        * so this check isn't safe to make
+                        */
+                       VM_PAGE_CHECK(mem);
 #endif
-               if ((mem->object == object) && (mem->offset == offset))
-                       break;
+                       if ((mem->object == object) && (mem->offset == offset))
+                               break;
+               }
+               lck_spin_unlock(bucket_lock);
        }
-       lck_spin_unlock(bucket_lock);
 
+#if DEBUG_VM_PAGE_LOOKUP
+       elapsed = mach_absolute_time() - start;
+
+       if (bucket_lock) {
+               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
+               OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
+       } else {
+               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
+               OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
+       }
+       if (mem != VM_PAGE_NULL)
+               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
+       else
+               OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
+#endif
        if (mem != VM_PAGE_NULL) {
-               if (object->memq_hint != VM_PAGE_NULL) {
-                       vm_page_lookup_hint_miss++;
-               }
                assert(mem->object == object);
-               object->memq_hint = mem;
-       } else
-               vm_page_lookup_miss++;
 
-       return(mem);
+               object->memq_hint = mem;
+       }
+       return (mem);
 }
 
 
@@ -1673,10 +1835,13 @@ vm_page_rename(
        vm_object_offset_t              new_offset,
        boolean_t                       encrypted_ok)
 {
-       boolean_t       internal_to_external, external_to_internal;
+       boolean_t internal_to_external, external_to_internal;
+       vm_tag_t  tag;
 
        assert(mem->object != new_object);
 
+        assert(mem->object);
+
        /*
         * ENCRYPTED SWAP:
         * The encryption key is based on the page's memory object
@@ -1715,7 +1880,7 @@ vm_page_rename(
                 * up there anyway, and we don't do vm_page_rename's frequently enough
                 * for this to matter.
                 */
-               VM_PAGE_QUEUES_REMOVE(mem);
+               vm_page_queues_remove(mem);
                vm_page_activate(mem);
        }
        if (mem->active || mem->inactive || mem->speculative) {
@@ -1727,8 +1892,9 @@ vm_page_rename(
                }
        }
 
+       tag = mem->object->wire_tag;
        vm_page_remove(mem, TRUE);
-       vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
+       vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
 
        if (internal_to_external) {
                vm_page_pageable_internal_count--;
@@ -1907,7 +2073,7 @@ void vm_page_more_fictitious(void)
 
        retval = kernel_memory_allocate(zone_map,
                                        &addr, PAGE_SIZE, VM_PROT_ALL,
-                                       KMA_KOBJECT|KMA_NOPAGEWAIT);
+                                       KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
        if (retval != KERN_SUCCESS) { 
                /*
                 * No page was available. Drop the
@@ -1919,9 +2085,6 @@ void vm_page_more_fictitious(void)
                return;
        }
 
-       /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
-       OSAddAtomic64(1, &(vm_page_zone->page_count));
-
        zcram(vm_page_zone, addr, PAGE_SIZE);
 
        lck_mtx_unlock(&vm_page_alloc_lock);
@@ -2445,24 +2608,6 @@ vm_page_alloc(
        return(mem);
 }
 
-vm_page_t
-vm_page_alloclo(
-       vm_object_t             object,
-       vm_object_offset_t      offset)
-{
-       register vm_page_t      mem;
-
-       vm_object_lock_assert_exclusive(object);
-       mem = vm_page_grablo();
-       if (mem == VM_PAGE_NULL)
-               return VM_PAGE_NULL;
-
-       vm_page_insert(mem, object, offset);
-
-       return(mem);
-}
-
-
 /*
  *     vm_page_alloc_guard:
  *     
@@ -2536,12 +2681,16 @@ vm_page_free_prepare_queues(
                counter(++c_laundry_pages_freed);
        }
        
-       VM_PAGE_QUEUES_REMOVE(mem);     /* clears local/active/inactive/throttled/speculative */
+       vm_page_queues_remove(mem);     /* clears local/active/inactive/throttled/speculative */
 
        if (VM_PAGE_WIRED(mem)) {
                if (mem->object) {
                        assert(mem->object->wired_page_count > 0);
                        mem->object->wired_page_count--;
+                       if (!mem->object->wired_page_count) {
+                           VM_OBJECT_UNWIRED(mem->object);
+                       }
+
                        assert(mem->object->resident_page_count >=
                               mem->object->wired_page_count);
 
@@ -2818,9 +2967,13 @@ vm_page_free_list(
  *
  *     The page's object and the page queues must be locked.
  */
+
+
 void
 vm_page_wire(
-       register vm_page_t      mem)
+       register vm_page_t mem,
+       vm_tag_t           tag,
+       boolean_t          check_memorystatus)
 {
 
 //     dbgLog(current_thread(), mem->offset, mem->object, 1);  /* (TEST/DEBUG) */
@@ -2848,10 +3001,21 @@ vm_page_wire(
                        mem->pageout = FALSE;
                        vm_pageout_throttle_up(mem);
                }
-               VM_PAGE_QUEUES_REMOVE(mem);
+               vm_page_queues_remove(mem);
 
                if (mem->object) {
+
+                       if (!mem->private && !mem->fictitious) 
+                       {
+                           if (!mem->object->wired_page_count)
+                           {
+                               assert(VM_KERN_MEMORY_NONE != tag);
+                               mem->object->wire_tag = tag;
+                               VM_OBJECT_WIRED(mem->object);
+                           }
+                       }
                        mem->object->wired_page_count++;
+
                        assert(mem->object->resident_page_count >=
                               mem->object->wired_page_count);
                        if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
@@ -2904,8 +3068,9 @@ vm_page_wire(
                        vm_page_gobble_count--;
                mem->gobbled = FALSE;
 
-               VM_CHECK_MEMORYSTATUS;
-               
+               if (check_memorystatus == TRUE) {
+                       VM_CHECK_MEMORYSTATUS;
+               }
                /* 
                 * ENCRYPTED SWAP:
                 * The page could be encrypted, but
@@ -2921,32 +3086,6 @@ vm_page_wire(
        VM_PAGE_CHECK(mem);
 }
 
-/*
- *      vm_page_gobble:
- *
- *      Mark this page as consumed by the vm/ipc/xmm subsystems.
- *
- *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
- */
-void
-vm_page_gobble(
-        register vm_page_t      mem)
-{
-        vm_page_lockspin_queues();
-        VM_PAGE_CHECK(mem);
-
-       assert(!mem->gobbled);
-       assert( !VM_PAGE_WIRED(mem));
-
-        if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
-                if (!mem->private && !mem->fictitious)
-                        vm_page_wire_count++;
-        }
-       vm_page_gobble_count++;
-        mem->gobbled = TRUE;
-        vm_page_unlock_queues();
-}
-
 /*
  *     vm_page_unwire:
  *
@@ -2975,6 +3114,9 @@ vm_page_unwire(
                vm_page_wire_count--;
                assert(mem->object->wired_page_count > 0);
                mem->object->wired_page_count--;
+               if (!mem->object->wired_page_count) {
+                   VM_OBJECT_UNWIRED(mem->object);
+               }
                assert(mem->object->resident_page_count >=
                       mem->object->wired_page_count);
                if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
@@ -3068,7 +3210,7 @@ vm_page_deactivate_internal(
        }
        /*
         * if this page is currently on the pageout queue, we can't do the
-        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * vm_page_queues_remove (which doesn't handle the pageout queue case)
         * and we can't remove it manually since we would need the object lock
         * (which is not required here) to decrement the activity_in_progress
         * reference which is held on the object while the page is in the pageout queue...
@@ -3084,13 +3226,14 @@ vm_page_deactivate_internal(
        m->no_cache = FALSE;
 
        if (!m->inactive) {
-               VM_PAGE_QUEUES_REMOVE(m);
+               vm_page_queues_remove(m);
 
                if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
                    m->dirty && m->object->internal &&
                    (m->object->purgable == VM_PURGABLE_DENY ||
                     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
                     m->object->purgable == VM_PURGABLE_VOLATILE)) {
+                       vm_page_check_pageable_safe(m);
                        queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
                        m->throttled = TRUE;
                        vm_page_throttled_count++;
@@ -3101,7 +3244,7 @@ vm_page_deactivate_internal(
                                vm_page_speculative_recreated++;
 #endif
                        } else {
-                               VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+                               vm_page_enqueue_inactive(m, FALSE);
                        }
                }
        }
@@ -3134,7 +3277,7 @@ void vm_page_enqueue_cleaned(vm_page_t m)
        }
        /*
         * if this page is currently on the pageout queue, we can't do the
-        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * vm_page_queues_remove (which doesn't handle the pageout queue case)
         * and we can't remove it manually since we would need the object lock
         * (which is not required here) to decrement the activity_in_progress
         * reference which is held on the object while the page is in the pageout queue...
@@ -3143,8 +3286,9 @@ void vm_page_enqueue_cleaned(vm_page_t m)
        if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
                return;
 
-       VM_PAGE_QUEUES_REMOVE(m);
+       vm_page_queues_remove(m);
 
+       vm_page_check_pageable_safe(m);
        queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
        m->clean_queue = TRUE;
        vm_page_cleaned_count++;
@@ -3191,7 +3335,7 @@ vm_page_activate(
        }
        /*
         * if this page is currently on the pageout queue, we can't do the
-        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * vm_page_queues_remove (which doesn't handle the pageout queue case)
         * and we can't remove it manually since we would need the object lock
         * (which is not required here) to decrement the activity_in_progress
         * reference which is held on the object while the page is in the pageout queue...
@@ -3210,10 +3354,10 @@ vm_page_activate(
                DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
        }
        
-       VM_PAGE_QUEUES_REMOVE(m);
+       vm_page_queues_remove(m);
 
        if ( !VM_PAGE_WIRED(m)) {
-
+               vm_page_check_pageable_safe(m);
                if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 
                    m->dirty && m->object->internal && 
                    (m->object->purgable == VM_PURGABLE_DENY ||
@@ -3254,7 +3398,8 @@ vm_page_speculate(
         struct vm_speculative_age_q    *aq;
 
        VM_PAGE_CHECK(m);
-       assert(m->object != kernel_object);
+       vm_page_check_pageable_safe(m);
+
        assert(m->phys_page != vm_page_guard_addr);
 #if DEBUG
        lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
@@ -3263,7 +3408,7 @@ vm_page_speculate(
 
        /*
         * if this page is currently on the pageout queue, we can't do the
-        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * vm_page_queues_remove (which doesn't handle the pageout queue case)
         * and we can't remove it manually since we would need the object lock
         * (which is not required here) to decrement the activity_in_progress
         * reference which is held on the object while the page is in the pageout queue...
@@ -3272,7 +3417,7 @@ vm_page_speculate(
        if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
                return;
 
-       VM_PAGE_QUEUES_REMOVE(m);               
+       vm_page_queues_remove(m);
 
        if ( !VM_PAGE_WIRED(m)) {
                mach_timespec_t         ts;
@@ -3397,7 +3542,7 @@ vm_page_lru(
 #endif
        /*
         * if this page is currently on the pageout queue, we can't do the
-        * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
+        * vm_page_queues_remove (which doesn't handle the pageout queue case)
         * and we can't remove it manually since we would need the object lock
         * (which is not required here) to decrement the activity_in_progress
         * reference which is held on the object while the page is in the pageout queue...
@@ -3408,9 +3553,9 @@ vm_page_lru(
 
        m->no_cache = FALSE;
 
-       VM_PAGE_QUEUES_REMOVE(m);
+       vm_page_queues_remove(m);
 
-       VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+       vm_page_enqueue_inactive(m, FALSE);
 }
 
 
@@ -3528,6 +3673,7 @@ vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
 
                queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
                        VM_PAGE_CHECK(m);
+                       vm_page_check_pageable_safe(m);
                        assert(m->local);
                        assert(!m->active);
                        assert(!m->inactive);
@@ -3995,6 +4141,11 @@ vm_page_queues_assert(
 #endif /* MACH_ASSERT */
 
 
+
+
+
+extern boolean_t (* volatile consider_buffer_cache_collect)(int);
+
 /*
  *     CONTIGUOUS PAGE ALLOCATION
  *
@@ -4077,21 +4228,23 @@ vm_page_find_contiguous(
        unsigned int    idx_last_contig_page_found = 0;
        int             free_considered, free_available;
        int             substitute_needed;
-       boolean_t       wrapped;
+       boolean_t       wrapped, zone_gc_called = FALSE;
 #if DEBUG
        clock_sec_t     tv_start_sec, tv_end_sec;
        clock_usec_t    tv_start_usec, tv_end_usec;
 #endif
-#if MACH_ASSERT
+
        int             yielded = 0;
        int             dumped_run = 0;
        int             stolen_pages = 0;
        int             compressed_pages = 0;
-#endif
+
 
        if (contig_pages == 0)
                return VM_PAGE_NULL;
 
+full_scan_again:
+
 #if MACH_ASSERT
        vm_page_verify_free_lists();
 #endif
@@ -4101,6 +4254,8 @@ vm_page_find_contiguous(
        PAGE_REPLACEMENT_ALLOWED(TRUE);
 
        vm_page_lock_queues();
+
+
        lck_mtx_lock(&vm_page_queue_free_lock);
 
        RESET_STATE_OF_RUN();
@@ -4270,9 +4425,9 @@ did_consider:
                         */
                        free_available = vm_page_free_count - vm_page_free_reserved;
                        considered = 0;
-#if MACH_ASSERT
+
                        yielded++;
-#endif
+
                        goto retry;
                }
                considered++;
@@ -4548,7 +4703,7 @@ did_consider:
                                         * now put the substitute page
                                         * on the object
                                         */
-                                       vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
+                                       vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
 
                                        if (m2->compressor) {
                                                m2->pmapped = TRUE;
@@ -4556,9 +4711,9 @@ did_consider:
 
                                                PMAP_ENTER(kernel_pmap, m2->offset, m2,
                                                           VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
-#if MACH_ASSERT
+
                                                compressed_pages++;
-#endif
+
                                        } else {
                                                if (m2->reference)
                                                        vm_page_activate(m2);
@@ -4579,9 +4734,9 @@ did_consider:
                                         */
                                        vm_page_free_prepare(m1);
                                }
-#if MACH_ASSERT
+
                                stolen_pages++;
-#endif
+
                        }
                        m1->pageq.next = (queue_entry_t) m;
                        m1->pageq.prev = NULL;
@@ -4596,9 +4751,9 @@ did_consider:
                        if (m != VM_PAGE_NULL) {
                                vm_page_free_list(m, FALSE);
                        }
-#if MACH_ASSERT
+
                        dumped_run++;
-#endif
+
                        /*
                         * want the index of the last
                         * page in this run that was
@@ -4684,6 +4839,23 @@ done_scanning:
 #if MACH_ASSERT
        vm_page_verify_free_lists();
 #endif
+       if (m == NULL && zone_gc_called == FALSE) {
+               printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
+                      __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
+                      scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
+
+               if (consider_buffer_cache_collect != NULL) {
+                       (void)(*consider_buffer_cache_collect)(1);
+               }
+
+               consider_zone_gc(TRUE);
+
+               zone_gc_called = TRUE;
+
+               printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
+               goto full_scan_again;
+       }
+
        return m;
 }
 
@@ -4765,6 +4937,7 @@ unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
 void
 vm_page_do_delayed_work(
        vm_object_t     object,
+       vm_tag_t        tag,
        struct vm_page_delayed_work *dwp,
        int             dw_count)
 {
@@ -4810,7 +4983,7 @@ vm_page_do_delayed_work(
                        vm_phantom_cache_update(m);
 #endif
                if (dwp->dw_mask & DW_vm_page_wire)
-                       vm_page_wire(m);
+                       vm_page_wire(m, tag, FALSE);
                else if (dwp->dw_mask & DW_vm_page_unwire) {
                        boolean_t       queueit;
 
@@ -4868,7 +5041,7 @@ vm_page_do_delayed_work(
                                vm_page_lru(m);
                        else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
                                if ( !m->pageout_queue)
-                                       VM_PAGE_QUEUES_REMOVE(m);
+                                       vm_page_queues_remove(m);
                        }
                        if (dwp->dw_mask & DW_set_reference)
                                m->reference = TRUE;
@@ -4877,11 +5050,11 @@ vm_page_do_delayed_work(
 
                        if (dwp->dw_mask & DW_move_page) {
                                if ( !m->pageout_queue) {
-                                       VM_PAGE_QUEUES_REMOVE(m);
+                                       vm_page_queues_remove(m);
 
                                        assert(m->object != kernel_object);
 
-                                       VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+                                       vm_page_enqueue_inactive(m, FALSE);
                                }
                        }
                        if (dwp->dw_mask & DW_clear_busy)
@@ -4964,8 +5137,6 @@ vm_page_get_phys_page(vm_page_t page)
 
 static vm_page_t hibernate_gobble_queue;
 
-extern boolean_t (* volatile consider_buffer_cache_collect)(int);
-
 static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
 static int  hibernate_flush_dirty_pages(int);
 static int  hibernate_flush_queue(queue_head_t *, int);
@@ -5240,16 +5411,16 @@ hibernate_flush_queue(queue_head_t *q, int qcount)
                /*
                 * we've already factored out pages in the laundry which
                 * means this page can't be on the pageout queue so it's
-                * safe to do the VM_PAGE_QUEUES_REMOVE
+                * safe to do the vm_page_queues_remove
                 */
                 assert(!m->pageout_queue);
 
-               VM_PAGE_QUEUES_REMOVE(m);
+               vm_page_queues_remove(m);
 
                if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
                        pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
 
-               vm_pageout_cluster(m, FALSE);
+               (void)vm_pageout_cluster(m, FALSE, FALSE, FALSE);
 
                hibernate_stats.hibernate_found_dirty++;
 
@@ -5439,38 +5610,6 @@ hibernate_page_list_zero(hibernate_page_list_t *list)
     }
 }
 
-void
-hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
-{
-    uint32_t i;
-    vm_page_t m;
-    uint64_t start, end, timeout, nsec;
-    clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
-    clock_get_uptime(&start);
-
-    for (i = 0; i < gobble_count; i++)
-    {
-       while (VM_PAGE_NULL == (m = vm_page_grab()))
-       {
-           clock_get_uptime(&end);
-           if (end >= timeout)
-               break;
-           VM_PAGE_WAIT();
-       }
-       if (!m)
-           break;
-       m->busy = FALSE;
-       vm_page_gobble(m);
-
-       m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
-       hibernate_gobble_queue = m;
-    }
-
-    clock_get_uptime(&end);
-    absolutetime_to_nanoseconds(end - start, &nsec);
-    HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
-}
-
 void
 hibernate_free_gobble_pages(void)
 {
@@ -5713,7 +5852,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
     boolean_t                   discard_all;
     boolean_t            discard;
 
-    HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
+    HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
 
     if (preflight) {
         page_list       = NULL;
@@ -6638,3 +6777,581 @@ vm_page_buckets_check(void)
 //     printf("BUCKET_CHECK: checked buckets\n");
 }
 #endif /* VM_PAGE_BUCKETS_CHECK */
+
+/*
+ * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
+ * local queues if they exist... its the only spot in the system where we add pages
+ * to those queues...  once on those queues, those pages can only move to one of the
+ * global page queues or the free queues... they NEVER move from local q to local q.
+ * the 'local' state is stable when vm_page_queues_remove is called since we're behind
+ * the global vm_page_queue_lock at this point...  we still need to take the local lock
+ * in case this operation is being run on a different CPU then the local queue's identity,
+ * but we don't have to worry about the page moving to a global queue or becoming wired
+ * while we're grabbing the local lock since those operations would require the global
+ * vm_page_queue_lock to be held, and we already own it.
+ *
+ * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
+ * 'wired' and local are ALWAYS mutually exclusive conditions.
+ */
+void
+vm_page_queues_remove(vm_page_t mem)
+{
+       boolean_t       was_pageable;
+
+       VM_PAGE_QUEUES_ASSERT(mem, 1);
+       assert(!mem->pageout_queue);
+       /*
+        *      if (mem->pageout_queue)
+        *              NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
+        *              the caller is responsible for determing if the page is on that queue, and if so, must
+        *              either first remove it (it needs both the page queues lock and the object lock to do
+        *              this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
+        */
+       if (mem->local) {
+               struct vpl      *lq;
+               assert(mem->object != kernel_object);
+               assert(mem->object != compressor_object);
+               assert(!mem->inactive && !mem->speculative);
+               assert(!mem->active && !mem->throttled);
+               assert(!mem->clean_queue);
+               assert(!mem->fictitious);
+               lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
+               VPL_LOCK(&lq->vpl_lock);
+               queue_remove(&lq->vpl_queue,
+                            mem, vm_page_t, pageq);
+               mem->local = FALSE;
+               mem->local_id = 0;
+               lq->vpl_count--;
+               if (mem->object->internal) {
+                       lq->vpl_internal_count--;
+               } else {
+                       lq->vpl_external_count--;
+               }
+               VPL_UNLOCK(&lq->vpl_lock);
+               was_pageable = FALSE;
+       }
+
+       else if (mem->active) {
+               assert(mem->object != kernel_object);
+               assert(mem->object != compressor_object);
+               assert(!mem->inactive && !mem->speculative);
+               assert(!mem->clean_queue);
+               assert(!mem->throttled);
+               assert(!mem->fictitious);
+               queue_remove(&vm_page_queue_active,
+                       mem, vm_page_t, pageq);
+               mem->active = FALSE;
+               vm_page_active_count--;
+               was_pageable = TRUE;
+       }
+
+       else if (mem->inactive) {
+               assert(mem->object != kernel_object);
+               assert(mem->object != compressor_object);
+               assert(!mem->active && !mem->speculative);
+               assert(!mem->throttled);
+               assert(!mem->fictitious);
+               vm_page_inactive_count--;
+               if (mem->clean_queue) {
+                       queue_remove(&vm_page_queue_cleaned,
+                        mem, vm_page_t, pageq);
+                       mem->clean_queue = FALSE;
+                       vm_page_cleaned_count--;
+               } else {
+                       if (mem->object->internal) {
+                               queue_remove(&vm_page_queue_anonymous,
+                               mem, vm_page_t, pageq);
+                               vm_page_anonymous_count--;
+                       } else {
+                               queue_remove(&vm_page_queue_inactive,
+                               mem, vm_page_t, pageq);
+                       }
+                       vm_purgeable_q_advance_all();
+               }
+               mem->inactive = FALSE;
+               was_pageable = TRUE;
+       }
+
+       else if (mem->throttled) {
+               assert(mem->object != compressor_object);
+               assert(!mem->active && !mem->inactive);
+               assert(!mem->speculative);
+               assert(!mem->fictitious);
+               queue_remove(&vm_page_queue_throttled,
+                            mem, vm_page_t, pageq);
+               mem->throttled = FALSE;
+               vm_page_throttled_count--;
+               was_pageable = FALSE;
+       }
+
+       else if (mem->speculative) {
+               assert(mem->object != compressor_object);
+               assert(!mem->active && !mem->inactive);
+               assert(!mem->throttled);
+               assert(!mem->fictitious);
+                remque(&mem->pageq);
+               mem->speculative = FALSE;
+               vm_page_speculative_count--;
+               was_pageable = TRUE;
+       }
+
+       else if (mem->pageq.next || mem->pageq.prev) {
+               was_pageable = FALSE;
+               panic("vm_page_queues_remove: unmarked page on Q");
+       } else {
+               was_pageable = FALSE;
+       }
+
+       mem->pageq.next = NULL;
+       mem->pageq.prev = NULL;
+       VM_PAGE_QUEUES_ASSERT(mem, 0);
+       if (was_pageable) {
+               if (mem->object->internal) {
+                       vm_page_pageable_internal_count--;
+               } else {
+                       vm_page_pageable_external_count--;
+               }
+       }
+}
+
+void
+vm_page_remove_internal(vm_page_t page)
+{
+       vm_object_t __object = page->object;
+       if (page == __object->memq_hint) {
+               vm_page_t       __new_hint;
+               queue_entry_t   __qe;
+               __qe = queue_next(&page->listq);
+               if (queue_end(&__object->memq, __qe)) {
+                       __qe = queue_prev(&page->listq);
+                       if (queue_end(&__object->memq, __qe)) {
+                               __qe = NULL;
+                       }
+               }
+               __new_hint = (vm_page_t) __qe;
+               __object->memq_hint = __new_hint;
+       }
+       queue_remove(&__object->memq, page, vm_page_t, listq);
+}
+
+void
+vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
+{
+       VM_PAGE_QUEUES_ASSERT(mem, 0);
+       assert(!mem->fictitious);
+       assert(!mem->laundry);
+       assert(!mem->pageout_queue);
+       vm_page_check_pageable_safe(mem);
+       if (mem->object->internal) {
+               if (first == TRUE)
+                       queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
+               else
+                       queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
+               vm_page_anonymous_count++;
+               vm_page_pageable_internal_count++;
+       } else {
+               if (first == TRUE)
+                       queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
+               else
+                       queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
+               vm_page_pageable_external_count++;
+       }
+       mem->inactive = TRUE;
+       vm_page_inactive_count++;
+       token_new_pagecount++;
+}
+
+/*
+ * Pages from special kernel objects shouldn't
+ * be placed on pageable queues.
+ */
+void
+vm_page_check_pageable_safe(vm_page_t page)
+{
+       if (page->object == kernel_object) {
+               panic("vm_page_check_pageable_safe: trying to add page" \
+                        "from kernel object (%p) to pageable queue", kernel_object);
+       }
+
+       if (page->object == compressor_object) {
+               panic("vm_page_check_pageable_safe: trying to add page" \
+                        "from compressor object (%p) to pageable queue", compressor_object);
+       }
+
+       if (page->object == vm_submap_object) {
+               panic("vm_page_check_pageable_safe: trying to add page" \
+                       "from submap object (%p) to pageable queue", vm_submap_object);
+       }
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * wired page diagnose
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include <libkern/OSKextLibPrivate.h>
+
+vm_allocation_site_t * 
+vm_allocation_sites[VM_KERN_MEMORY_COUNT];
+
+vm_tag_t 
+vm_tag_bt(void)
+{
+    uintptr_t* frameptr;
+    uintptr_t* frameptr_next;
+    uintptr_t retaddr;
+    uintptr_t kstackb, kstackt;
+    const vm_allocation_site_t * site;
+    thread_t cthread;
+    
+    cthread = current_thread();
+    if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
+
+    kstackb = cthread->kernel_stack;
+    kstackt = kstackb + kernel_stack_size;
+
+    /* Load stack frame pointer (EBP on x86) into frameptr */
+    frameptr = __builtin_frame_address(0);
+    site = NULL;
+    while (frameptr != NULL) 
+    {
+       /* Verify thread stack bounds */
+       if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
+
+       /* Next frame pointer is pointed to by the previous one */
+       frameptr_next = (uintptr_t*) *frameptr;
+
+       /* Pull return address from one spot above the frame pointer */
+       retaddr = *(frameptr + 1);
+
+       if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
+       {
+           site = OSKextGetAllocationSiteForCaller(retaddr);
+           break;
+       }
+
+       frameptr = frameptr_next;
+    }
+    return (site ? site->tag : VM_KERN_MEMORY_NONE);
+}
+
+static uint64_t free_tag_bits[256/64];
+
+void
+vm_tag_alloc_locked(vm_allocation_site_t * site)
+{
+    vm_tag_t tag;
+    uint64_t avail;
+    uint64_t idx;
+
+    if (site->tag) return;
+
+    idx = 0;
+    while (TRUE)
+    {
+       avail = free_tag_bits[idx];
+       if (avail)
+       {
+           tag = __builtin_clzll(avail);
+           avail &= ~(1ULL << (63 - tag));
+           free_tag_bits[idx] = avail;
+           tag += (idx << 6);
+           break;
+       }
+       idx++;
+       if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
+       {
+            tag = VM_KERN_MEMORY_ANY;
+            break;
+       }
+    }
+    site->tag = tag;
+    if (VM_KERN_MEMORY_ANY != tag)
+    {
+       assert(!vm_allocation_sites[tag]);
+       vm_allocation_sites[tag] = site;
+    }
+}
+
+static void
+vm_tag_free_locked(vm_tag_t tag)
+{
+    uint64_t avail;
+    uint32_t idx;
+    uint64_t bit;
+
+    if (VM_KERN_MEMORY_ANY == tag) return;
+
+    idx = (tag >> 6);
+    avail = free_tag_bits[idx];
+    tag &= 63;
+    bit = (1ULL << (63 - tag));
+    assert(!(avail & bit));
+    free_tag_bits[idx] = (avail | bit);
+}
+
+static void
+vm_tag_init(void)
+{
+    vm_tag_t tag;
+    for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
+    {
+        vm_tag_free_locked(tag);
+    }
+}
+
+vm_tag_t
+vm_tag_alloc(vm_allocation_site_t * site)
+{
+    vm_tag_t tag;
+
+    if (VM_TAG_BT & site->flags)
+    {
+       tag = vm_tag_bt();
+       if (VM_KERN_MEMORY_NONE != tag) return (tag);
+    }
+
+    if (!site->tag) 
+    {
+       lck_spin_lock(&vm_allocation_sites_lock);
+       vm_tag_alloc_locked(site);
+       lck_spin_unlock(&vm_allocation_sites_lock);
+    }
+
+    return (site->tag);
+}
+
+static void 
+vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
+{
+    if (!object->wired_page_count) return;
+    if (object != kernel_object)
+    {
+       assert(object->wire_tag < num_sites);
+       sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
+    }
+}
+
+typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites, 
+                                    unsigned int num_sites, vm_object_t object);
+
+static void 
+vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
+                                 vm_page_iterate_proc proc, purgeable_q_t queue, 
+                                 int group)
+{
+    vm_object_t object;
+
+    for (object = (vm_object_t) queue_first(&queue->objq[group]);
+       !queue_end(&queue->objq[group], (queue_entry_t) object);
+       object = (vm_object_t) queue_next(&object->objq))
+    {
+       proc(sites, num_sites, object);
+    }
+}
+
+static void 
+vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
+                       vm_page_iterate_proc proc)
+{
+    purgeable_q_t   volatile_q;
+    queue_head_t  * nonvolatile_q;
+    vm_object_t     object;
+    int             group;
+
+    lck_spin_lock(&vm_objects_wired_lock);
+    queue_iterate(&vm_objects_wired,
+                 object,
+                 vm_object_t,
+                 objq)
+    {
+       proc(sites, num_sites, object);
+    }
+    lck_spin_unlock(&vm_objects_wired_lock);
+
+    lck_mtx_lock(&vm_purgeable_queue_lock);
+    nonvolatile_q = &purgeable_nonvolatile_queue;
+    for (object = (vm_object_t) queue_first(nonvolatile_q);
+        !queue_end(nonvolatile_q, (queue_entry_t) object);
+        object = (vm_object_t) queue_next(&object->objq))
+    {
+       proc(sites, num_sites, object);
+    }
+
+    volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
+    vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
+
+    volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+    for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
+    {
+       vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
+    }
+
+    volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
+    for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
+    {
+       vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
+    }
+    lck_mtx_unlock(&vm_purgeable_queue_lock);
+}
+
+static uint64_t
+process_account(mach_memory_info_t * sites, unsigned int __unused num_sites)
+{
+    uint64_t found;
+    unsigned int idx;
+    vm_allocation_site_t * site;
+
+    assert(num_sites >= VM_KERN_MEMORY_COUNT);
+    found = 0;
+    for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++) 
+    {
+       found += sites[idx].size;
+       if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
+       {
+           sites[idx].site   = idx;
+           sites[idx].flags |= VM_KERN_SITE_TAG;
+           if (VM_KERN_MEMORY_ZONE == idx) sites[idx].flags |= VM_KERN_SITE_HIDE;
+           else                            sites[idx].flags |= VM_KERN_SITE_WIRED;
+           continue;
+       }
+       lck_spin_lock(&vm_allocation_sites_lock);
+       if ((site = vm_allocation_sites[idx]))
+       {
+           if (sites[idx].size)
+           {
+               sites[idx].flags |= VM_KERN_SITE_WIRED;
+               if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
+               {
+                   sites[idx].site   = OSKextGetKmodIDForSite(site);
+                   sites[idx].flags |= VM_KERN_SITE_KMOD;
+               }
+               else
+               {
+                   sites[idx].site   = VM_KERNEL_UNSLIDE(site);
+                   sites[idx].flags |= VM_KERN_SITE_KERNEL;
+               }
+               site = NULL;
+           }
+           else
+           {
+               vm_tag_free_locked(site->tag);
+               site->tag = VM_KERN_MEMORY_NONE;
+               vm_allocation_sites[idx] = NULL;
+               if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
+           }
+       }
+       lck_spin_unlock(&vm_allocation_sites_lock);
+        if (site) OSKextFreeSite(site);
+    }
+    return (found);
+}
+
+kern_return_t 
+vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites)
+{
+    enum                  { kMaxKernelDepth = 1 };
+    vm_map_t                        maps   [kMaxKernelDepth];
+    vm_map_entry_t                  entries[kMaxKernelDepth];
+    vm_map_t                        map;
+    vm_map_entry_t                  entry;
+    vm_object_offset_t              offset;
+    vm_page_t                       page;
+    int                             stackIdx, count;
+    uint64_t                wired_size;
+    uint64_t                wired_managed_size;
+    uint64_t                wired_reserved_size;
+    mach_memory_info_t     * counts;
+
+    bzero(sites, num_sites * sizeof(mach_memory_info_t));
+
+    vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
+
+    wired_size          = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
+    wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
+    wired_managed_size  = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
+
+    assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
+    counts = &sites[VM_KERN_MEMORY_COUNT];
+
+#define SET_COUNT(xcount, xsize, xflags)                       \
+    counts[xcount].site  = (xcount);                   \
+    counts[xcount].size  = (xsize);                    \
+    counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
+
+    SET_COUNT(VM_KERN_COUNT_MANAGED,             ptoa_64(vm_page_pages),        0);
+    SET_COUNT(VM_KERN_COUNT_WIRED,               wired_size,                    0);
+    SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED,       wired_managed_size,            0);
+    SET_COUNT(VM_KERN_COUNT_RESERVED,            wired_reserved_size,           VM_KERN_SITE_WIRED);
+    SET_COUNT(VM_KERN_COUNT_STOLEN,              ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
+    SET_COUNT(VM_KERN_COUNT_LOPAGE,              ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
+
+#define SET_MAP(xcount, xsize, xfree, xlargest)                \
+    counts[xcount].site    = (xcount);                 \
+    counts[xcount].size    = (xsize);                  \
+    counts[xcount].free    = (xfree);                  \
+    counts[xcount].largest = (xlargest);               \
+    counts[xcount].flags   = VM_KERN_SITE_COUNTER;
+
+    vm_map_size_t map_size, map_free, map_largest;
+
+    vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
+    SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
+
+    vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
+    SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
+
+    vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
+    SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
+
+    map = kernel_map;
+    stackIdx = 0;
+    while (map)
+    {
+       vm_map_lock(map);
+       for (entry = map->hdr.links.next; map; entry = entry->links.next)
+       {
+           if (entry->is_sub_map)
+           {
+               assert(stackIdx < kMaxKernelDepth);
+               maps[stackIdx] = map;
+               entries[stackIdx] = entry;
+               stackIdx++;
+               map = VME_SUBMAP(entry);
+               entry = NULL;
+               break;
+           }
+           if (VME_OBJECT(entry) == kernel_object)
+           {
+               count = 0;
+               vm_object_lock(VME_OBJECT(entry));
+               for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
+               {
+                       page = vm_page_lookup(VME_OBJECT(entry), offset);
+                       if (page && VM_PAGE_WIRED(page)) count++;
+               }
+               vm_object_unlock(VME_OBJECT(entry));
+
+               if (count)
+               {
+                   assert(VME_ALIAS(entry) < num_sites);
+                   sites[VME_ALIAS(entry)].size += ptoa_64(count);
+               }
+           }
+           if (entry == vm_map_last_entry(map))
+           {
+               vm_map_unlock(map);
+               if (!stackIdx) map = NULL;
+               else
+               {
+                   --stackIdx;
+                   map = maps[stackIdx];
+                   entry = entries[stackIdx];
+               }
+           }
+       }
+    }
+
+    process_account(sites, num_sites);
+    
+    return (KERN_SUCCESS);
+}
index 00039b3665bec9f77941ce8191c2ef92377a07d3..c9601f7f693de43b042cdd496e2d91df7dbfb57c 100644 (file)
@@ -1044,6 +1044,7 @@ vm_shared_region_map_file(
        mach_vm_offset_t        first_mapping = (mach_vm_offset_t) -1;
 
 
+
        kr = KERN_SUCCESS;
 
        vm_shared_region_lock();
@@ -1111,7 +1112,7 @@ vm_shared_region_map_file(
                        map_port = MACH_PORT_NULL;
                } else {
                        /* file-backed memory */
-                       map_port = (ipc_port_t) file_object->pager;
+                       __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
                }
                
                if (mappings[i].sfm_init_prot & VM_PROT_SLIDE) {
@@ -1565,7 +1566,7 @@ vm_shared_region_slide_init(
 
        kr = kmem_alloc(kernel_map,
                        (vm_offset_t *) &slide_info_entry,
-                       (vm_size_t) slide_info_size);
+                       (vm_size_t) slide_info_size, VM_KERN_MEMORY_OSFMK);
        if (kr != KERN_SUCCESS) {
                return kr;
        }
@@ -1595,15 +1596,16 @@ vm_shared_region_slide_init(
                        vm_object_t shadow_obj = VM_OBJECT_NULL;
         
                        if (entry->is_sub_map == TRUE) { 
-                               map = entry->object.sub_map;
+                               map = VME_SUBMAP(entry);
                                start -= entry->vme_start;
-                               start += entry->offset;
+                               start += VME_OFFSET(entry);
                                vm_map_lock_read(map);
                                vm_map_unlock_read(cur_map);
                                goto Retry;
                        } else {
-                               object = entry->object.vm_object;
-                               offset = (start - entry->vme_start) + entry->offset;
+                               object = VME_OBJECT(entry);
+                               offset = ((start - entry->vme_start) +
+                                         VME_OFFSET(entry));
                        }
         
                        vm_object_lock(object);
@@ -1820,7 +1822,7 @@ _vm_commpage_init(
        if (kr != KERN_SUCCESS) {
                panic("_vm_commpage_init: could not allocate mem_entry");
        }
-       new_map = vm_map_create(pmap_create(NULL, 0, FALSE), 0, size, TRUE);
+       new_map = vm_map_create(pmap_create(NULL, 0, 0), 0, size, TRUE);
        if (new_map == VM_MAP_NULL) {
                panic("_vm_commpage_init: could not allocate VM map");
        }
index bb985060ff0ce4a0b17318c2ef1063d8cd79fb13..e50177fdb8fde258c7ed2d59a5ce76a959765382 100644 (file)
@@ -380,8 +380,8 @@ swapfile_pager_data_request(
                retval = kr;
                goto done;
        }
-       map_entry->object.vm_object = kernel_object;
-       map_entry->offset = kernel_mapping - VM_MIN_KERNEL_ADDRESS;
+       VME_OBJECT_SET(map_entry, kernel_object);
+       VME_OFFSET_SET(map_entry, kernel_mapping - VM_MIN_KERNEL_ADDRESS);
        vm_map_unlock(kernel_map);
        dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping);
        dst_ptr = (char *) dst_vaddr;
@@ -728,7 +728,7 @@ swapfile_pager_lookup(
 {
        swapfile_pager_t        pager;
 
-       pager = (swapfile_pager_t) mem_obj;
+       __IGNORE_WCASTALIGN(pager = (swapfile_pager_t) mem_obj);
        assert(pager->pager_ops == &swapfile_pager_ops);
        assert(pager->ref_count > 0);
        return pager;
index 024140fb53562e8137fb76b73c08ee3894973366..7b1eb07034e0ae94775e026f09c738efe84844f6 100644 (file)
@@ -1139,7 +1139,7 @@ mach_vm_wire(
                                                   VM_MAP_PAGE_MASK(map)),
                                 vm_map_round_page(start+size,
                                                   VM_MAP_PAGE_MASK(map)),
-                                access,
+                                access | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
                                 TRUE);
        } else {
                rc = vm_map_unwire(map,
@@ -1189,7 +1189,7 @@ vm_wire(
                                                   VM_MAP_PAGE_MASK(map)),
                                 vm_map_round_page(start+size,
                                                   VM_MAP_PAGE_MASK(map)),
-                                access,
+                                access | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
                                 TRUE);
        } else {
                rc = vm_map_unwire(map,
@@ -1306,9 +1306,13 @@ vm_toggle_entry_reuse(int toggle, int *old_value)
        if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){
                *old_value = map->disable_vmentry_reuse;
        } else if(toggle == VM_TOGGLE_SET){
+               vm_map_entry_t map_to_entry;
+
                vm_map_lock(map);
+               vm_map_disable_hole_optimization(map);
                map->disable_vmentry_reuse = TRUE;
-               if (map->first_free == vm_map_to_entry(map)) {
+               __IGNORE_WCASTALIGN(map_to_entry = vm_map_to_entry(map));
+               if (map->first_free == map_to_entry) {
                        map->highest_entry_end = vm_map_min(map);
                } else {
                        map->highest_entry_end = map->first_free->vme_end;
@@ -1822,11 +1826,11 @@ vm_map_get_upl(
        upl_t                   *upl,
        upl_page_info_array_t   page_list,
        unsigned int            *count,
-       int                     *flags,
+       upl_control_flags_t     *flags,
        int                     force_data_sync)
 {
-       int             map_flags;
-       kern_return_t   kr;
+       upl_control_flags_t map_flags;
+       kern_return_t       kr;
 
        if (VM_MAP_NULL == map)
                return KERN_INVALID_ARGUMENT;
@@ -1874,6 +1878,7 @@ mach_make_memory_entry_64(
 
        /* needed for call to vm_map_lookup_locked */
        boolean_t               wired;
+       boolean_t               iskernel;
        vm_object_offset_t      obj_off;
        vm_prot_t               prot;
        struct vm_object_fault_info     fault_info;
@@ -1885,9 +1890,8 @@ mach_make_memory_entry_64(
        vm_map_entry_t          next_entry;
        vm_map_t                local_map;
        vm_map_t                original_map = target_map;
-       vm_map_size_t           total_size;
-       vm_map_size_t           map_size;
-       vm_map_offset_t         map_offset;
+       vm_map_size_t           total_size, map_size;
+       vm_map_offset_t         map_start, map_end;
        vm_map_offset_t         local_offset;
        vm_object_size_t        mappable_size;
 
@@ -1904,6 +1908,7 @@ mach_make_memory_entry_64(
 
        boolean_t               force_shadow = FALSE;
        boolean_t               use_data_addr;
+       boolean_t               use_4K_compat;
 
        if (((permission & 0x00FF0000) &
             ~(MAP_MEM_ONLY |
@@ -1912,6 +1917,7 @@ mach_make_memory_entry_64(
               MAP_MEM_NAMED_REUSE |
               MAP_MEM_USE_DATA_ADDR |
               MAP_MEM_VM_COPY |
+              MAP_MEM_4K_DATA_ADDR |
               MAP_MEM_VM_SHARE))) {
                /*
                 * Unknown flag: reject for forward compatibility.
@@ -1935,18 +1941,20 @@ mach_make_memory_entry_64(
        mask_protections = permission & VM_PROT_IS_MASK;
        access = GET_MAP_MEM(permission);
        use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0);
+       use_4K_compat = ((permission & MAP_MEM_4K_DATA_ADDR) != 0);
 
        user_handle = IP_NULL;
        user_entry = NULL;
 
-       map_offset = vm_map_trunc_page(offset, PAGE_MASK);
+       map_start = vm_map_trunc_page(offset, PAGE_MASK);
 
        if (permission & MAP_MEM_ONLY) {
                boolean_t               parent_is_object;
 
-               map_size = vm_map_round_page(*size, PAGE_MASK);
+               map_end = vm_map_round_page(offset + *size, PAGE_MASK);
+               map_size = map_end - map_start;
                
-               if (use_data_addr || parent_entry == NULL) {
+               if (use_data_addr || use_4K_compat || parent_entry == NULL) {
                        return KERN_INVALID_ARGUMENT;
                }
 
@@ -1991,9 +1999,10 @@ mach_make_memory_entry_64(
                        *object_handle = IP_NULL;
                return KERN_SUCCESS;
        } else if (permission & MAP_MEM_NAMED_CREATE) {
-               map_size = vm_map_round_page(*size, PAGE_MASK);
+               map_end = vm_map_round_page(offset + *size, PAGE_MASK);
+               map_size = map_end - map_start;
 
-               if (use_data_addr) {
+               if (use_data_addr || use_4K_compat) {
                        return KERN_INVALID_ARGUMENT;
                }
 
@@ -2082,7 +2091,8 @@ mach_make_memory_entry_64(
                /* user_object pager and internal fields are not used */
                /* when the object field is filled in.                */
 
-               *size = CAST_DOWN(vm_size_t, map_size);
+               *size = CAST_DOWN(vm_size_t, (user_entry->size -
+                                             user_entry->data_offset));
                *object_handle = user_handle;
                return KERN_SUCCESS;
        }
@@ -2094,18 +2104,18 @@ mach_make_memory_entry_64(
                        return KERN_INVALID_TASK;
                }
 
-               if (use_data_addr) {
-                       map_size = (vm_map_round_page(offset + *size,
-                                                     PAGE_MASK) -
-                                   map_offset);
-                       offset_in_page = offset - map_offset;
+               map_end = vm_map_round_page(offset + *size, PAGE_MASK);
+               map_size = map_end - map_start;
+               if (use_data_addr || use_4K_compat) {
+                       offset_in_page = offset - map_start;
+                       if (use_4K_compat)
+                               offset_in_page &= ~((signed)(0xFFF));
                } else {
-                       map_size = vm_map_round_page(*size, PAGE_MASK);
                        offset_in_page = 0;
                }
 
                kr = vm_map_copyin(target_map,
-                                  map_offset,
+                                  map_start,
                                   map_size,
                                   FALSE,
                                   &copy);
@@ -2129,7 +2139,8 @@ mach_make_memory_entry_64(
                user_entry->size = map_size;
                user_entry->data_offset = offset_in_page;
 
-               *size = CAST_DOWN(vm_size_t, map_size);
+               *size = CAST_DOWN(vm_size_t, (user_entry->size -
+                                             user_entry->data_offset));
                *object_handle = user_handle;
                return KERN_SUCCESS;
        }
@@ -2142,18 +2153,18 @@ mach_make_memory_entry_64(
                        return KERN_INVALID_TASK;
                }
 
-               if (use_data_addr) {
-                       map_size = (vm_map_round_page(offset + *size,
-                                                     PAGE_MASK) -
-                                   map_offset);
-                       offset_in_page = offset - map_offset;
+               map_end = vm_map_round_page(offset + *size, PAGE_MASK);
+               map_size = map_end - map_start;
+               if (use_data_addr || use_4K_compat) {
+                       offset_in_page = offset - map_start;
+                       if (use_4K_compat)
+                               offset_in_page &= ~((signed)(0xFFF));
                } else {
-                       map_size = vm_map_round_page(*size, PAGE_MASK);
                        offset_in_page = 0;
                }
 
                kr = vm_map_copy_extract(target_map,
-                                        map_offset,
+                                        map_start,
                                         map_size,
                                         &copy,
                                         &cur_prot,
@@ -2200,7 +2211,8 @@ mach_make_memory_entry_64(
                user_entry->size = map_size;
                user_entry->data_offset = offset_in_page;
 
-               *size = CAST_DOWN(vm_size_t, map_size);
+               *size = CAST_DOWN(vm_size_t, (user_entry->size -
+                                             user_entry->data_offset));
                *object_handle = user_handle;
                return KERN_SUCCESS;
        }
@@ -2208,11 +2220,13 @@ mach_make_memory_entry_64(
        if (parent_entry == NULL ||
            (permission & MAP_MEM_NAMED_REUSE)) {
 
-               if (use_data_addr) {
-                       map_size = vm_map_round_page(offset + *size, PAGE_MASK) - map_offset;
-                       offset_in_page = offset - map_offset;
+               map_end = vm_map_round_page(offset + *size, PAGE_MASK);
+               map_size = map_end - map_start;
+               if (use_data_addr || use_4K_compat) {
+                       offset_in_page = offset - map_start;
+                       if (use_4K_compat)
+                               offset_in_page &= ~((signed)(0xFFF));
                } else {
-                       map_size = vm_map_round_page(*size, PAGE_MASK);
                        offset_in_page = 0;
                }
 
@@ -2231,7 +2245,7 @@ redo_lookup:
                /* note we check the permission of the range against */
                /* that requested by the caller */
 
-               kr = vm_map_lookup_locked(&target_map, map_offset, 
+               kr = vm_map_lookup_locked(&target_map, map_start, 
                                          protections | mask_protections,
                                          OBJECT_LOCK_EXCLUSIVE, &version,
                                          &object, &obj_off, &prot, &wired,
@@ -2281,7 +2295,7 @@ redo_lookup:
                vm_object_unlock(object);
 
                local_map = original_map;
-               local_offset = map_offset;
+               local_offset = map_start;
                if(target_map != local_map) {
                        vm_map_unlock_read(target_map);
                        if(real_map != target_map)
@@ -2301,8 +2315,9 @@ redo_lookup:
                        object = VM_OBJECT_NULL;
                         goto make_mem_done;
                   }
+                  iskernel = (local_map->pmap == kernel_pmap);
                   if(!(map_entry->is_sub_map)) {
-                     if(map_entry->object.vm_object != object) {
+                     if (VME_OBJECT(map_entry) != object) {
                         kr = KERN_INVALID_ARGUMENT;
                          vm_map_unlock_read(target_map);
                         if(real_map != target_map)
@@ -2315,14 +2330,14 @@ redo_lookup:
                   } else {
                        vm_map_t        tmap;
                        tmap = local_map;
-                       local_map = map_entry->object.sub_map;
+                       local_map = VME_SUBMAP(map_entry);
                        
                        vm_map_lock_read(local_map);
                        vm_map_unlock_read(tmap);
                        target_map = local_map;
                        real_map = local_map;
                        local_offset = local_offset - map_entry->vme_start;
-                       local_offset += map_entry->offset;
+                       local_offset += VME_OFFSET(map_entry);
                   }
                }
 
@@ -2363,13 +2378,13 @@ redo_lookup:
                        /* lets see if the next map entry is still   */
                        /* pointing at this object and is contiguous */
                        while(map_size > mappable_size) {
-                               if((next_entry->object.vm_object == object) &&
-                                       (next_entry->vme_start == 
-                                               next_entry->vme_prev->vme_end) &&
-                                       (next_entry->offset == 
-                                          next_entry->vme_prev->offset + 
-                                          (next_entry->vme_prev->vme_end - 
-                                          next_entry->vme_prev->vme_start))) {
+                               if ((VME_OBJECT(next_entry) == object) &&
+                                   (next_entry->vme_start == 
+                                    next_entry->vme_prev->vme_end) &&
+                                   (VME_OFFSET(next_entry) == 
+                                    (VME_OFFSET(next_entry->vme_prev) + 
+                                     (next_entry->vme_prev->vme_end - 
+                                      next_entry->vme_prev->vme_start)))) {
                                        if (mask_protections) {
                                                /*
                                                 * The caller asked us to use
@@ -2403,7 +2418,9 @@ redo_lookup:
                        }
                }
 
-               if (vm_map_entry_should_cow_for_true_share(map_entry) &&
+               /* vm_map_entry_should_cow_for_true_share() checks for malloc tags,
+                * never true in kernel */ 
+               if (!iskernel && vm_map_entry_should_cow_for_true_share(map_entry) &&
                    object->vo_size > map_size &&
                    map_size != 0) {
                        /*
@@ -2421,16 +2438,16 @@ redo_lookup:
 
                        vm_map_clip_start(target_map,
                                          map_entry,
-                                         vm_map_trunc_page(offset,
+                                         vm_map_trunc_page(map_start,
                                                            VM_MAP_PAGE_MASK(target_map)));
                        vm_map_clip_end(target_map,
                                        map_entry,
-                                       (vm_map_round_page(offset + map_size,
+                                       (vm_map_round_page(map_end,
                                                           VM_MAP_PAGE_MASK(target_map))));
                        force_shadow = TRUE;
 
                        if ((map_entry->vme_end - offset) < map_size) {
-                               map_size = map_entry->vme_end - offset;
+                               map_size = map_entry->vme_end - map_start;
                        }
                        total_size = map_entry->vme_end - map_entry->vme_start;
 
@@ -2449,7 +2466,7 @@ redo_lookup:
                            ((map_entry->needs_copy  ||
                              object->shadowed ||
                              (object->vo_size > total_size &&
-                              (map_entry->offset != 0 ||
+                              (VME_OFFSET(map_entry) != 0 ||
                                object->vo_size >
                                vm_map_round_page(total_size,
                                                  VM_MAP_PAGE_MASK(target_map)))))
@@ -2490,20 +2507,21 @@ redo_lookup:
                                 */
                                 
                                /* create a shadow object */
-                               vm_object_shadow(&map_entry->object.vm_object,
-                                                &map_entry->offset, total_size);
-                               shadow_object = map_entry->object.vm_object;
+                               VME_OBJECT_SHADOW(map_entry, total_size);
+                               shadow_object = VME_OBJECT(map_entry);
 #if 00
                                vm_object_unlock(object);
 #endif
 
                                prot = map_entry->protection & ~VM_PROT_WRITE;
 
-                               if (override_nx(target_map, map_entry->alias) && prot)
+                               if (override_nx(target_map,
+                                               VME_ALIAS(map_entry))
+                                   && prot)
                                        prot |= VM_PROT_EXECUTE;
 
                                vm_object_pmap_protect(
-                                       object, map_entry->offset,
+                                       object, VME_OFFSET(map_entry),
                                        total_size,
                                        ((map_entry->is_shared 
                                          || target_map->mapped_in_other_pmaps)
@@ -2521,15 +2539,16 @@ redo_lookup:
                                    assert((next_entry->wired_count == 0) ||
                                           (map_entry->wired_count));
 
-                                  if(next_entry->object.vm_object == object) {
+                                   if (VME_OBJECT(next_entry) == object) {
                                        vm_object_reference_locked(shadow_object);
-                                       next_entry->object.vm_object 
-                                                       = shadow_object;
+                                       VME_OBJECT_SET(next_entry,
+                                                      shadow_object);
                                        vm_object_deallocate(object);
-                                       next_entry->offset 
-                                               = next_entry->vme_prev->offset +
-                                               (next_entry->vme_prev->vme_end 
-                                               - next_entry->vme_prev->vme_start);
+                                       VME_OFFSET_SET(
+                                               next_entry,
+                                               (VME_OFFSET(next_entry->vme_prev) +
+                                                (next_entry->vme_prev->vme_end 
+                                                 - next_entry->vme_prev->vme_start)));
                                                next_entry->needs_copy = FALSE;
                                        } else {
                                                panic("mach_make_memory_entry_64:"
@@ -2549,8 +2568,8 @@ redo_lookup:
                                vm_object_deallocate(object); /* extra ref */
                                object = shadow_object;
 
-                               obj_off = (local_offset - map_entry->vme_start)
-                                                        + map_entry->offset;
+                               obj_off = ((local_offset - map_entry->vme_start)
+                                          + VME_OFFSET(map_entry));
 
                                vm_map_lock_write_to_read(target_map);
                        }
@@ -2638,8 +2657,10 @@ redo_lookup:
                            parent_entry->offset == obj_off &&
                            parent_entry->protection == protections &&
                            parent_entry->size == map_size &&
-                           ((!use_data_addr && (parent_entry->data_offset == 0)) ||  
-                            (use_data_addr && (parent_entry->data_offset == offset_in_page)))) {
+                           ((!(use_data_addr || use_4K_compat) &&
+                             (parent_entry->data_offset == 0)) ||  
+                            ((use_data_addr || use_4K_compat) &&
+                             (parent_entry->data_offset == offset_in_page)))) {
                                /*
                                 * We have a match: re-use "parent_entry".
                                 */
@@ -2650,7 +2671,9 @@ redo_lookup:
                                /* Get an extra send-right on handle */
                                ipc_port_copy_send(parent_handle);
 
-                               *size = CAST_DOWN(vm_size_t, map_size);
+                               *size = CAST_DOWN(vm_size_t,
+                                                 (parent_entry->size -
+                                                  parent_entry->data_offset));
                                *object_handle = parent_handle;
                                return KERN_SUCCESS;
                        } else {
@@ -2682,7 +2705,8 @@ redo_lookup:
                /* user_object pager and internal fields are not used */
                /* when the object field is filled in.                */
 
-               *size = CAST_DOWN(vm_size_t, map_size);
+               *size = CAST_DOWN(vm_size_t, (user_entry->size -
+                                             user_entry->data_offset));
                *object_handle = user_handle;
                return KERN_SUCCESS;
 
@@ -2693,7 +2717,7 @@ redo_lookup:
                        goto make_mem_done;
                }
 
-               if (use_data_addr) {
+               if (use_data_addr || use_4K_compat) {
                        /*
                         * submaps and pagers should only be accessible from within
                         * the kernel, which shouldn't use the data address flag, so can fail here.
@@ -2710,11 +2734,15 @@ redo_lookup:
                                goto make_mem_done;
                        }
 
-                       map_offset = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK);
-                       offset_in_page = (offset + parent_entry->data_offset) - map_offset;
-                       map_size = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK) - map_offset;
+                       map_start = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK);
+                       offset_in_page = (offset + parent_entry->data_offset) - map_start;
+                       if (use_4K_compat)
+                               offset_in_page &= ~((signed)(0xFFF));
+                       map_end = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK);
+                       map_size = map_end - map_start;
                } else {
-                       map_size = vm_map_round_page(*size, PAGE_MASK);
+                       map_end = vm_map_round_page(offset + *size, PAGE_MASK);
+                       map_size = map_end - map_start;
                        offset_in_page = 0;
 
                        if((offset + map_size) > parent_entry->size) {
@@ -2743,7 +2771,7 @@ redo_lookup:
                }
 
                user_entry->size = map_size;
-               user_entry->offset = parent_entry->offset + map_offset;
+               user_entry->offset = parent_entry->offset + map_start;
                user_entry->data_offset = offset_in_page; 
                user_entry->is_sub_map = parent_entry->is_sub_map;
                user_entry->is_pager = parent_entry->is_pager;
@@ -2792,7 +2820,8 @@ redo_lookup:
                        object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
                   vm_object_unlock(object);
                }
-               *size = CAST_DOWN(vm_size_t, map_size);
+               *size = CAST_DOWN(vm_size_t, (user_entry->size -
+                                             user_entry->data_offset));
                *object_handle = user_handle;
                return KERN_SUCCESS;
        }
@@ -3480,25 +3509,26 @@ vm_map_get_phys_page(
        vm_map_lock(map);
        while (vm_map_lookup_entry(map, map_offset, &entry)) {
 
-               if (entry->object.vm_object == VM_OBJECT_NULL) {
+               if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
                        vm_map_unlock(map);
                        return (ppnum_t) 0;
                }
                if (entry->is_sub_map) {
                        vm_map_t        old_map;
-                       vm_map_lock(entry->object.sub_map);
+                       vm_map_lock(VME_SUBMAP(entry));
                        old_map = map;
-                       map = entry->object.sub_map;
-                       map_offset = entry->offset + (map_offset - entry->vme_start);
+                       map = VME_SUBMAP(entry);
+                       map_offset = (VME_OFFSET(entry) +
+                                     (map_offset - entry->vme_start));
                        vm_map_unlock(old_map);
                        continue;
                }
-               if (entry->object.vm_object->phys_contiguous) {
+               if (VME_OBJECT(entry)->phys_contiguous) {
                        /* These are  not standard pageable memory mappings */
                        /* If they are not present in the object they will  */
                        /* have to be picked up from the pager through the  */
                        /* fault mechanism.  */
-                       if(entry->object.vm_object->vo_shadow_offset == 0) {
+                       if (VME_OBJECT(entry)->vo_shadow_offset == 0) {
                                /* need to call vm_fault */
                                vm_map_unlock(map);
                                vm_fault(map, map_offset, VM_PROT_NONE, 
@@ -3506,15 +3536,16 @@ vm_map_get_phys_page(
                                vm_map_lock(map);
                                continue;
                        }
-                       offset = entry->offset + (map_offset - entry->vme_start);
+                       offset = (VME_OFFSET(entry) +
+                                 (map_offset - entry->vme_start));
                        phys_page = (ppnum_t)
-                               ((entry->object.vm_object->vo_shadow_offset 
-                                                       + offset) >> PAGE_SHIFT);
+                               ((VME_OBJECT(entry)->vo_shadow_offset 
+                                 + offset) >> PAGE_SHIFT);
                        break;
                        
                }
-               offset = entry->offset + (map_offset - entry->vme_start);
-               object = entry->object.vm_object;
+               offset = (VME_OFFSET(entry) + (map_offset - entry->vme_start));
+               object = VME_OBJECT(entry);
                vm_object_lock(object);
                while (TRUE) {
                        vm_page_t dst_page = vm_page_lookup(object,offset);
@@ -3545,7 +3576,7 @@ vm_map_get_phys_page(
 }
 
 
-
+#if 0
 kern_return_t kernel_object_iopl_request(      /* forward */
        vm_named_entry_t        named_entry,
        memory_object_offset_t  offset,
@@ -3674,7 +3705,8 @@ kernel_object_iopl_request(
                                     upl_ptr,
                                     user_page_list,
                                     page_list_count,
-                                    caller_flags);
+                                    (upl_control_flags_t)(unsigned int)caller_flags);
        vm_object_deallocate(object);
        return ret;
 }
+#endif
index 6a8d316b9d92ac705803ff5ac876e94abca5de8c..49a70b2cbdafd7fbe69a9bc2e16aaec81792d2b2 100644 (file)
                3. 3 low 10-bits are packed into a 32-bit word, this is after the dictionary indices section.
 
        cclee, 11/30/12
+
+    Added zero page, single value page, sparse page, early abort optimizations
+    rsrini, 09/14/14
+
 */
 
        .text
        .align 4,0x90
 
+#define SV_RETURN           $0                      // return value when SV, ZV page is found
+#define MZV_MAGIC           $17185                  // magic value used to identify MZV page encoding
+#define CHKPT_BYTES         416                     // for early aborts: checkpoint after processing this many bytes. Must be in range [4..4096]
+#define CHKPT_TAG_BYTES     (CHKPT_BYTES/16)        // size of the tags for  CHKPT_BYTES of data
+#define CHKPT_SHRUNK_BYTES  426                     // for early aborts: max size of compressed stream to allow further processing ..
+                                                    //      .. to disable early aborts, set CHKPT_SHRUNK_BYTES to 4096
+
+#if CHKPT_BYTES > 4096
+    #error CHKPT_BYTES must be <= 4096
+#endif
+#if CHKPT_BYTES < 4
+    #error CHKPT_BYTES must be >= 4
+#endif
+
 .globl _WKdm_compress_new
 _WKdm_compress_new:
        pushq   %rbp
@@ -162,19 +180,25 @@ _WKdm_compress_new:
        pushq   %r13
        pushq   %r12
        pushq   %rbx
-       subq    $(24+64), %rsp
+       subq    $(48+64), %rsp
 
-
-       #define tempTagsArray   64(%rsp)
+       #define tempTagsArray       64(%rsp)
        #define tempLowBitsArray        72(%rsp)
+
+    #define start_next_full_patt  80(%rsp)
+    #define start_next_input_word 88(%rsp)
+    #define byte_budget           96(%rsp)
+    #define start_next_qp         tempQPosArray
+    #define start_next_low_bits   tempLowBitsArray 
+    
        #define next_tag                        %r8
        #define next_input_word         %rdi
        #define end_of_input            %r13
        #define next_full_patt          %rbx
        #define dict_location           %rcx
        #define next_qp                         %r10
+    #define checkpoint          %r11
        #define dictionary                      %rsp
-       #define scratch                         %r11
        #define dest_buf                        %r12
        #define hashTable                       %r14
        #define tempQPosArray           %r15
@@ -182,7 +206,6 @@ _WKdm_compress_new:
        #define byte_count                      %r9d
 
        movq    %rsi, %r12                                              // dest_buf
-       movq    %rdx, scratch                                   // scratch = dictionary
 
        movq    %rdx, tempTagsArray                     // &tempTagsArray[0]
        movq    %rdx, next_tag                                  // next_tag always points to the one following the current tag 
@@ -190,6 +213,7 @@ _WKdm_compress_new:
        leaq    1024(%rdx), tempQPosArray               // &tempQPosArray[0]
        movq    tempQPosArray, next_qp                  // next_qp
 
+    leaq    CHKPT_BYTES(%rdi), checkpoint   // checkpoint = src_buf + CHKPT_BYTES
        leaq    4096(%rdi), end_of_input                // end_of_input = src_buf + num_input_words
        leaq    268(%rsi), %rbx                                 // dest_buf + [TAGS_AREA_OFFSET + (num_input_words / 16)]*4
 
@@ -197,37 +221,46 @@ _WKdm_compress_new:
        subl    $(12+256), byte_count                   // header + tags
        jle             L_budgetExhausted
 
+                                            // NOTE: ALL THE DICTIONARY VALUES MUST BE INITIALIZED TO ZERO
+                                            // THIS IS NEEDED TO EFFICIENTLY DETECT SINGLE VALUE PAGES
        // PRELOAD_DICTIONARY;
-       movl    $1, 0(dictionary)
-       movl    $1, 4(dictionary)
-       movl    $1, 8(dictionary)
-       movl    $1, 12(dictionary)
-       movl    $1, 16(dictionary)
-       movl    $1, 20(dictionary)
-       movl    $1, 24(dictionary)
-       movl    $1, 28(dictionary)
-       movl    $1, 32(dictionary)
-       movl    $1, 36(dictionary)
-       movl    $1, 40(dictionary)
-       movl    $1, 44(dictionary)
-       movl    $1, 48(dictionary)
-       movl    $1, 52(dictionary)
-       movl    $1, 56(dictionary)
-       movl    $1, 60(dictionary)
+       movl    $0, 0(dictionary)
+       movl    $0, 4(dictionary)
+       movl    $0, 8(dictionary)
+       movl    $0, 12(dictionary)
+       movl    $0, 16(dictionary)
+       movl    $0, 20(dictionary)
+       movl    $0, 24(dictionary)
+       movl    $0, 28(dictionary)
+       movl    $0, 32(dictionary)
+       movl    $0, 36(dictionary)
+       movl    $0, 40(dictionary)
+       movl    $0, 44(dictionary)
+       movl    $0, 48(dictionary)
+       movl    $0, 52(dictionary)
+       movl    $0, 56(dictionary)
+       movl    $0, 60(dictionary)
 
        leaq    2048(%rdx), %rax                                // &tempLowBitsArray[0]
        movq    %rax, tempLowBitsArray                  // save for later reference
        movq    %rax, next_low_bits                             // next_low_bits        
 
        leaq    _hashLookupTable_new(%rip), hashTable   // hash look up table
+
+    movq    next_full_patt, start_next_full_patt
+    movq    next_input_word, start_next_input_word
+    movl    %ecx, byte_budget               // save the byte budget    
+
+
        jmp             L_scan_loop
 
        .align 4,0x90
 L_RECORD_ZERO:
        movb    $0, -1(next_tag)                                                // *next_tag = ZERO;
        addq    $4, next_input_word                                     // next_input_word++;
-       cmpq    next_input_word, end_of_input                   // end_of_input vs next_input_word
-       jbe             L_done_search
+       cmpq    next_input_word, checkpoint             // checkpoint time?
+       je              CHECKPOINT
+
 L_scan_loop:
        movl    (next_input_word), %edx
        incq    next_tag                                                                // next_tag++
@@ -253,8 +286,9 @@ L_RECORD_MISS:
        movb    $2, -1(next_tag)                                                // *next_tag = 2 for miss
        subl    $4, byte_count                                                  // fill in a new 4-bytes word
        jle             L_budgetExhausted
-       cmpq    next_input_word, end_of_input                   // end_of_input vs next_input_word
-       ja              L_scan_loop
+       cmpq    next_input_word, checkpoint             // checkpoint time?
+       jne     L_scan_loop
+       jmp         CHECKPOINT  
 
 L_done_search:
 
@@ -395,7 +429,7 @@ L20:
 
 L_done:
        // restore registers and return
-       addq    $(24+64), %rsp
+       addq    $(48+64), %rsp
        popq    %rbx
        popq    %r12
        popq    %r13
@@ -417,9 +451,9 @@ L_RECORD_EXACT:
        movb    $3, -1(next_tag)                                        // *next_tag = 3 for exact
        movb    %cl, (next_qp)                                          // *next_qp = word offset (4-bit)
        incq    next_qp                                                         // next_qp++
-       cmpq    next_input_word, end_of_input                   // end_of_input vs next_input_word
-       ja              L_scan_loop
-       jmp             L_done_search
+       cmpq    next_input_word, checkpoint         // checkpoint time?
+       jne     L_scan_loop
+       jmp         CHECKPOINT  
 
        .align 4,0x90
 L_RECORD_PARTIAL:
@@ -433,7 +467,156 @@ L_RECORD_PARTIAL:
        incq    next_qp                                                         // next_qp++
        mov             %dx, (next_low_bits)                            // save next_low_bits
        addq    $2, next_low_bits                                       // next_low_bits++
-       cmpq    next_input_word, end_of_input           // end_of_input vs next_input_word
-       ja              L_scan_loop
-       jmp             L_done_search
+       cmpq    next_input_word, checkpoint         // checkpoint time?
+       jne     L_scan_loop
+
+CHECKPOINT:
+
+    cmpq       end_of_input, checkpoint            // end of buffer or compression ratio check?
+    jne     L_check_compression_ratio
+
+L_check_zero_page:
+                                                // check if any dictionary misses in page
+    cmpq    start_next_full_patt, next_full_patt
+    jne     L_check_single_value_page
+
+    cmpq    start_next_qp, next_qp              // check if any partial or exact dictionary matches
+    jne     L_check_single_value_page
+
+    mov     SV_RETURN, %rax                     // Magic return value
+    jmp     L_done
+
+L_check_single_value_page:
+
+    movq    next_full_patt, %rax                // get # dictionary misses
+    subq    start_next_full_patt, %rax
+    shrq    $2, %rax
+    
+    movq    next_qp, %r11                       // get # dictionary hits (exact + partial)
+    subq    start_next_qp, %r11
+    
+    movq    next_low_bits, %r13                 // get # dictionary partial hits
+    subq    start_next_low_bits, %r13
+    shrq    $1, %r13
+
+    movq    tempTagsArray, %r14                 // get the address of the first tag
+
+    // Single value page if one of the follwoing is true:
+    //  partial == 0 AND hits == 1023 AND miss == 1 AND tag[0] == 2 (i.e. miss)
+    //  partial == 1 AND hits == 1024 AND tag[0] == 1 (i.e. partial)
+    //
+    cmpq    $0, %r13                            // were there 0 partial hits?
+    jne     1f
+
+    cmpq    $1023, %r11                         // were there 1023 dictionary hits
+    jne     1f
+
+    cmpq    $1, %rax                            // was there exacly 1 dictionary miss?
+    jne     1f 
+
+    cmpb    $2, 0(%r14)                         // was the very 1st tag a miss?
+    je      L_is_single_value_page
+
+1:
+    cmpq    $1, %r13                            // was there 1 partial hit?
+    jne     L_check_mostly_zero
+
+    cmpq    $1024, %r11                         // were there 1024 dictionary hits
+    jne     L_check_mostly_zero
+
+    cmpb    $1, 0(%r14)                         // was the very 1st tag a partial?
+    jne     L_check_mostly_zero
+     
+L_is_single_value_page:
+    
+    mov     SV_RETURN, %rax                     // Magic return value
+    jmp     L_done
+
+L_check_mostly_zero:
+                                                // how much space will the sparse packer take?
+    addq    %r11, %rax                          // rax += (next_qp - start_next_qp)
+    movq    $6, %rdx
+    mulq    %rdx                                // rax *= 6 (i.e. 4 byte word + 2 byte offset)
+    addq    $4, %rax                            // rax += 4 byte for header
+    movq    %rax, %r11 
+                                                // how much space will the defaut packer take?
+    movq    next_low_bits, %rax
+    subq    start_next_low_bits, %rax           // get bytes consumed by lower-10 bits
+    movq    $1365, %rdx
+    mulq    %rdx
+    shrq    $11, %rax                           // rax = 2/3*(next_low_bits - start_next_low_bits)
+    movq    next_full_patt, %rdx
+    subq    start_next_full_patt, %rdx          // get bytes consumed by dictionary misses
+    addq    %rdx, %rax                          // rax += (next_full_patt - start_next_full_patt)
+    movq    next_qp, %rdx
+    subq    start_next_qp, %rdx
+    shrq    $1, %rdx                            // get bytes consumed by dictionary hits
+    addq    %rdx, %rax                          // rax += (next_qp - start_next_qp)/2
+    addq    $(12+256), %rax                     // rax += bytes taken by the header + tags
+
+    cmpq    %r11, %rax                          // is default packer the better option?
+    jb      L_done_search
+
+    cmpl    byte_budget, %r11d                  // can the sparse packer fit into the given budget?
+    ja      L_budgetExhausted
+
+L_sparse_packer:
+
+    movl    MZV_MAGIC, 0(dest_buf)              // header to indicate a sparse packer
+    addq    $4, dest_buf
+
+    movq    $0, %rdx                            // rdx = byte offset in src of non-0 word
+    movq    start_next_input_word, %r8
+1:
+    movq    0(%r8, %rdx), %rax                  // rax = read dword
+       testq   %rax, %rax                          // is dword == 0
+    jne     5f
+3:
+    addq    $8, %rdx                            // 8 more bytes have been processed
+4:
+    cmpq    $4096, %rdx
+    jne     1b
+    movq    %r11, %rax                          // store the size of the compressed stream
+    jmp     L_done
+
+5:
+    testl   %eax, %eax                          // is lower word == 0
+    je      6f
+    movl    %eax, 0(dest_buf)                   // store the non-0 word in the dest buffer
+    mov     %dx, 4(dest_buf)                    // store the byte index
+    addq    $6, dest_buf
+6:
+    shrq    $32, %rax                           // get the upper word into position
+    testl   %eax, %eax                          // is upper word == 0
+    je      3b
+    addq    $4, %rdx
+    movl    %eax, 0(dest_buf)                   // store the word in the dest buffer
+    mov     %dx, 4(dest_buf)                    // store the byte index
+    addq    $6, dest_buf
+    addq    $4, %rdx
+    jmp     4b
+
+L_check_compression_ratio:
+
+    movq    end_of_input, checkpoint            // checkpoint = end of buffer
+
+    movq    next_low_bits, %rax
+    subq    start_next_low_bits, %rax           // get bytes consumed by lower-10 bits
+    movq    $1365, %rdx
+    mulq    %rdx
+    shrq    $11, %rax                           // rax = 2/3*(next_low_bits - start_next_low_bits)
+    
+    movq    next_full_patt, %rdx
+    subq    start_next_full_patt, %rdx          // get bytes consumed by dictionary misses
+    addq    %rdx, %rax                          // rax += (next_full_patt - start_next_full_patt)
+
+    movq    next_qp, %rdx
+    subq    start_next_qp, %rdx
+    shrq    $1, %rdx
+    addq    %rdx, %rax                          // rax += (next_qp - start_next_qp)/2
+
+    addq    $CHKPT_TAG_BYTES, %rax              // rax += bytes taken by the tags
+    cmpq    $CHKPT_SHRUNK_BYTES, %rax
+    ja      L_budgetExhausted                   // compressed size exceeds budget
+    jmp     L_scan_loop 
 
index 1c435c5e793a507f74605c37cdd1200c50af2b6f..bcf6496edb924f700099fca69181833e9004e285 100644 (file)
                        }
  
        cclee, 11/30/12
+
+    Added zero page, single value page, sparse page, early abort optimizations
+    rsrini, 09/14/14
 */
 
+#define MZV_MAGIC           $17185      // magic value used to identify MZV page encoding
+
        .text
 
        .globl _WKdm_decompress_new
@@ -81,27 +86,61 @@ _WKdm_decompress_new:
 
        subq    $(64+8+16), %rsp
 
+    movl    0(%rdi), %eax               // read the 1st word from the header
+    cmpl    MZV_MAGIC, %eax             // is the alternate packer used (i.e. is MZV page)?
+    jne     L_default_decompressor      // default decompressor was used
+
+                                        // Mostly Zero Page Handling...
+                                        // {
+    movq    $0, %rax
+1:                                      // Zero out the entire page
+    movq    $0, 0(%rsi, %rax)
+    movq    $0, 8(%rsi, %rax)
+    movq    $0, 16(%rsi, %rax)
+    movq    $0, 24(%rsi, %rax)
+    movq    $0, 32(%rsi, %rax)
+    movq    $0, 40(%rsi, %rax)
+    movq    $0, 48(%rsi, %rax)
+    movq    $0, 56(%rsi, %rax)
+    addq    $64, %rax
+    cmpq    $4096, %rax
+    jne     1b
+
+    movq    $4, %r12                    // current byte position in src to read from
+2:
+    movl    0(%rdi, %r12), %eax         // get the word
+    movzwq  4(%rdi, %r12), %rdx         // get the index
+    movl    %eax, 0(%rsi, %rdx)         // store non-0 word in the destination buffer
+    addq    $6, %r12                    // 6 more bytes processed
+    cmpl    %ecx, %r12d                 // finished processing all the bytes?
+    jne     2b
+    jmp     L_done
+                                        // }
+
+L_default_decompressor:
+
        movq    %rsi, %r12                                      // dest_buf
-       movq    %rdx, %r13                                      // scracht_buf
+       movq    %rdx, %r13                                      // scratch_buf
 
        // PRELOAD_DICTONARY; dictionary starting address : starting address 0(%rsp)
+    // NOTE: ALL THE DICTIONARY VALUES MUST BE INITIALIZED TO ZERO TO MIRROR THE COMPRESSOR
 #if 1
-       movl    $1, 0(%rsp)
-       movl    $1, 4(%rsp)
-       movl    $1, 8(%rsp)
-       movl    $1, 12(%rsp)
-       movl    $1, 16(%rsp)
-       movl    $1, 20(%rsp)
-       movl    $1, 24(%rsp)
-       movl    $1, 28(%rsp)
-       movl    $1, 32(%rsp)
-       movl    $1, 36(%rsp)
-       movl    $1, 40(%rsp)
-       movl    $1, 44(%rsp)
-       movl    $1, 48(%rsp)
-       movl    $1, 52(%rsp)
-       movl    $1, 56(%rsp)
-       movl    $1, 60(%rsp)
+       movl    $0, 0(%rsp)
+       movl    $0, 4(%rsp)
+       movl    $0, 8(%rsp)
+       movl    $0, 12(%rsp)
+       movl    $0, 16(%rsp)
+       movl    $0, 20(%rsp)
+       movl    $0, 24(%rsp)
+       movl    $0, 28(%rsp)
+       movl    $0, 32(%rsp)
+       movl    $0, 36(%rsp)
+       movl    $0, 40(%rsp)
+       movl    $0, 44(%rsp)
+       movl    $0, 48(%rsp)
+       movl    $0, 52(%rsp)
+       movl    $0, 56(%rsp)
+       movl    $0, 60(%rsp)
 #else
        mov             $0x100000001, %rax
        mov             %rax, (%rsp)
index 249e621fd90f27b0ed40f780b77633530456f1f7..fc0af58d6cd0296084beddc5f4c3abc7b7d4dc20 100644 (file)
@@ -64,6 +64,7 @@
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
+       movq    %rdi, %rax                      /* return destination */
        movq    %rdx,%rcx
        shrq    $3,%rcx                         /* copy by 64-bit words */
        cld                                     /* copy forwards */
index fcfdf7245401635c7c91ed8c285236b032db7475..be490d18cbda37d16a0290495cdbe92d9afea98e 100644 (file)
@@ -84,6 +84,28 @@ ENTRY(memset)
        movq    %r8 ,%rax               /* returns its first argument */
        ret
 
+/*
+ * void *memset_word(void * addr, int pattern, size_t length)
+ */
+
+ENTRY(memset_word)
+       movq    %rdi, %r8
+       movq    %rsi, %rax              /* move pattern (arg2) to rax */
+       mov     %eax, %ecx
+       shlq    $32,%rax
+       orq     %rcx, %rax 
+       cld                             /* reset direction flag */
+       movq    %rdx, %rcx              /* mov quads first */
+       shrq    $1, %rcx
+       rep
+       stosq
+       movq    %rdx,%rcx               /* if necessary, mov 32 bit word */
+       andq    $1,%rcx
+       rep
+       stosl
+       movq    %r8 ,%rax               /* returns its first argument */
+       ret
+
 /*
  * void bzero(char * addr, size_t length)
  */
index 66a4dd7ac8e86b49671827af5c931aa60269feeb..20e246b6e8e32132abd52afef510f755c38c2052 100644 (file)
 static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
 static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
 
+/*
+ * Copy sizes bigger than this value will cause a kernel panic.
+ *
+ * Yes, this is an arbitrary fixed limit, but it's almost certainly
+ * a programming error to be copying more than this amount between
+ * user and wired kernel memory in a single invocation on this
+ * platform.
+ */
+#define COPYSIZELIMIT_PANIC     (64*MB)
+
 /*
  * The copy engine has the following characteristics
  *   - copyio() handles copies to/from user or kernel space
@@ -144,6 +154,8 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
        debug_type += (copy_type << 2);
 #endif
 
+       assert(nbytes < COPYSIZELIMIT_PANIC);
+
        thread = current_thread();
 
        KERNEL_DEBUG(debug_type | DBG_FUNC_START,
index 59b642007eb416a6a4b35f891a6869fdd932077d..88d88060c80540bf6c941c208bfac32ae7da99e7 100644 (file)
@@ -59,7 +59,6 @@
 
 #include <i386/asm.h>
 #include <i386/proc_reg.h>
-#include <i386/mp.h>
 #include <assym.s>
 
 Entry(Load_context)
index c6a38aef8235c6d4f7f04fdd7ae0755f3fe82dea..0dc07f85020b2b1082e3ea3f663ee23987adc180 100644 (file)
@@ -1028,8 +1028,6 @@ Entry(hndl_alltraps)
        cli                                     /* hold off intrs - critical section */
        xorl    %ecx, %ecx                      /* don't check if we're in the PFZ */
 
-#define CLI cli
-#define STI sti
 
 Entry(return_from_trap)
        movq    %gs:CPU_ACTIVE_THREAD,%r15      /* Get current thread */
@@ -1067,13 +1065,13 @@ L_return_from_trap_with_ast:
        movl    %eax, R64_RBX(%r15)     /* let the PFZ know we've pended an AST */
        jmp     EXT(return_to_user)
 2:     
-       STI                             /* interrupts always enabled on return to user mode */
+       sti                             /* interrupts always enabled on return to user mode */
 
        xor     %edi, %edi              /* zero %rdi */
        xorq    %rbp, %rbp              /* clear framepointer */
        CCALL(i386_astintr)             /* take the AST */
 
-       CLI
+       cli
        mov     %rsp, %r15              /* AST changes stack, saved state */
        xorl    %ecx, %ecx              /* don't check if we're in the PFZ */
        jmp     EXT(return_from_trap)   /* and check again (rare) */
@@ -1160,8 +1158,6 @@ Entry(hndl_allintrs)
 
        CCALL1(interrupt, %r15)         /* call generic interrupt routine */
 
-       cli                             /* just in case we returned with intrs enabled */
-
        .globl  EXT(return_to_iret)
 LEXT(return_to_iret)                   /* (label for kdb_kintr and hardclock) */
 
@@ -1417,7 +1413,6 @@ Entry(hndl_mdep_scall64)
 
 Entry(hndl_diag_scall64)
        CCALL1(diagCall64, %r15)        // Call diagnostics
-       cli                             // Disable interruptions just in case
        test    %eax, %eax              // What kind of return is this?
        je      1f                      // - branch if bad (zero)
        jmp     EXT(return_to_user)     // Normal return, do not check asts...
index a0b7e01ab9348e0757d84e0ea58aac3715e23b7e..82fd80e5493aefdd3608299ef10658003fce8736 100644 (file)
@@ -61,7 +61,8 @@
 #define RDPMC_FIXED_COUNTER_SELECTOR (1ULL<<30)
 
 /* track the last config we enabled */
-static uint32_t kpc_running = 0;
+static uint64_t kpc_running_cfg_pmc_mask = 0;
+static uint32_t kpc_running_classes = 0;
 
 /* PMC / MSR accesses */
 
@@ -123,22 +124,22 @@ wrIA32_PERFEVTSELx(uint32_t ctr, uint64_t value)
 boolean_t
 kpc_is_running_fixed(void)
 {
-       return (kpc_running & KPC_CLASS_FIXED_MASK) == KPC_CLASS_FIXED_MASK;
+       return (kpc_running_classes & KPC_CLASS_FIXED_MASK) == KPC_CLASS_FIXED_MASK;
 }
 
 boolean_t
-kpc_is_running_configurable(void)
+kpc_is_running_configurable(uint64_t pmc_mask)
 {
-       return (kpc_running & KPC_CLASS_CONFIGURABLE_MASK) == KPC_CLASS_CONFIGURABLE_MASK;
+       assert(kpc_popcount(pmc_mask) <= kpc_configurable_count());
+       return ((kpc_running_classes & KPC_CLASS_CONFIGURABLE_MASK) == KPC_CLASS_CONFIGURABLE_MASK) &&
+              ((kpc_running_cfg_pmc_mask & pmc_mask) == pmc_mask);
 }
 
 uint32_t
 kpc_fixed_count(void)
 {
        i386_cpu_info_t *info = NULL;
-
        info = cpuid_info();
-
        return info->cpuid_arch_perf_leaf.fixed_number;
 }
 
@@ -146,9 +147,7 @@ uint32_t
 kpc_configurable_count(void)
 {
        i386_cpu_info_t *info = NULL;
-
        info = cpuid_info();
-
        return info->cpuid_arch_perf_leaf.number;
 }
 
@@ -159,9 +158,10 @@ kpc_fixed_config_count(void)
 }
 
 uint32_t
-kpc_configurable_config_count(void)
+kpc_configurable_config_count(uint64_t pmc_mask)
 {
-       return kpc_configurable_count();
+       assert(kpc_popcount(pmc_mask) <= kpc_configurable_count());
+       return kpc_popcount(pmc_mask);
 }
 
 uint32_t
@@ -268,33 +268,28 @@ set_running_fixed(boolean_t on)
 }
 
 static void
-set_running_configurable(boolean_t on)
+set_running_configurable(uint64_t target_mask, uint64_t state_mask)
 {
-       uint64_t global = 0, mask = 0;
-       uint64_t cfg, save;
-       int i;
+       uint32_t cfg_count = kpc_configurable_count();
+       uint64_t global = 0ULL, cfg = 0ULL, save = 0ULL;
        boolean_t enabled;
-       int ncnt = (int) kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
 
        enabled = ml_set_interrupts_enabled(FALSE);
 
        /* rmw the global control */
        global = rdmsr64(MSR_IA32_PERF_GLOBAL_CTRL);
-       for( i = 0; i < ncnt; i++ ) {
-               mask |= (1ULL<<i);
 
-               /* need to save and restore counter since it resets when reconfigured */
+       /* need to save and restore counter since it resets when reconfigured */
+       for (uint32_t i = 0; i < cfg_count; ++i) {
                cfg = IA32_PERFEVTSELx(i);
                save = IA32_PMCx(i);
                wrIA32_PERFEVTSELx(i, cfg | IA32_PERFEVTSEL_PMI | IA32_PERFEVTSEL_EN);
                wrIA32_PMCx(i, save);
        }
 
-       if( on )
-               global |= mask;
-       else
-               global &= ~mask;
-
+       /* update the global control value */
+       global &= ~target_mask; /* clear the targeted PMCs bits */
+       global |= state_mask;   /* update the targeted PMCs bits with their new states */
        wrmsr64(MSR_IA32_PERF_GLOBAL_CTRL, global);
 
        ml_set_interrupts_enabled(enabled);
@@ -303,17 +298,20 @@ set_running_configurable(boolean_t on)
 static void
 kpc_set_running_mp_call( void *vstate )
 {
-       uint32_t new_state = *(uint32_t*)vstate;
+       struct kpc_running_remote *mp_config = (struct kpc_running_remote*) vstate;
+       assert(mp_config);
+
+       if (kpc_controls_fixed_counters())
+               set_running_fixed(mp_config->classes & KPC_CLASS_FIXED_MASK);
 
-       set_running_fixed((new_state & KPC_CLASS_FIXED_MASK) != 0);
-       set_running_configurable((new_state & KPC_CLASS_CONFIGURABLE_MASK) != 0);
+       set_running_configurable(mp_config->cfg_target_mask,
+                                mp_config->cfg_state_mask);
 }
 
 int
 kpc_get_fixed_config(kpc_config_t *configv)
 {
        configv[0] = IA32_FIXED_CTR_CTRL();
-
        return 0;
 }
 
@@ -361,25 +359,31 @@ kpc_get_fixed_counters(uint64_t *counterv)
 }
 
 int
-kpc_get_configurable_config(kpc_config_t *configv)
+kpc_get_configurable_config(kpc_config_t *configv, uint64_t pmc_mask)
 {
-       int i, n = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
+       uint32_t cfg_count = kpc_configurable_count();
 
-       for( i = 0; i < n; i++ )
-               configv[i] = IA32_PERFEVTSELx(i);
+       assert(configv);
 
+       for (uint32_t i = 0; i < cfg_count; ++i)
+               if ((1ULL << i) & pmc_mask)
+                       *configv++  = IA32_PERFEVTSELx(i);
        return 0;
 }
 
 static int
-kpc_set_configurable_config(kpc_config_t *configv)
+kpc_set_configurable_config(kpc_config_t *configv, uint64_t pmc_mask)
 {
-       int i, n = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
+       uint32_t cfg_count = kpc_configurable_count();
        uint64_t save;
 
-       for( i = 0; i < n; i++ ) {
+       for (uint32_t i = 0; i < cfg_count; i++ ) {
+               if (((1ULL << i) & pmc_mask) == 0)
+                       continue;
+
                /* need to save and restore counter since it resets when reconfigured */
                save = IA32_PMCx(i);
+
                /*
                 * Some bits are not safe to set from user space.
                 * Allow these bits to be set:
@@ -402,63 +406,118 @@ kpc_set_configurable_config(kpc_config_t *configv)
                 *   33     IN_TXCP
                 *   34-63  Reserved
                 */
-               wrIA32_PERFEVTSELx(i, configv[i] & 0xffc7ffffull);
+               wrIA32_PERFEVTSELx(i, *configv & 0xffc7ffffull);
                wrIA32_PMCx(i, save);
+
+               /* next configuration word */
+               configv++;
        }
 
        return 0;
 }
 
 int
-kpc_get_configurable_counters(uint64_t *counterv)
+kpc_get_configurable_counters(uint64_t *counterv, uint64_t pmc_mask)
 {
-       int i, n = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
-       uint64_t status;
+       uint32_t cfg_count = kpc_configurable_count();
+       uint64_t status, *it_counterv = counterv;
 
        /* snap the counters */
-       for( i = 0; i < n; i++ ) {
-               counterv[i] = CONFIGURABLE_SHADOW(i) +
-                       (IA32_PMCx(i) - CONFIGURABLE_RELOAD(i));
+       for (uint32_t i = 0; i < cfg_count; ++i) {
+               if ((1ULL << i) & pmc_mask) {
+                       *it_counterv++ = CONFIGURABLE_SHADOW(i) +
+                                        (IA32_PMCx(i) - CONFIGURABLE_RELOAD(i));
+               }
        }
 
        /* Grab the overflow bits */
        status = rdmsr64(MSR_IA32_PERF_GLOBAL_STATUS);
 
-       /* If the overflow bit is set for a counter, our previous read may or may not have been
+       /* reset the iterator */
+       it_counterv = counterv;
+
+       /*
+        * If the overflow bit is set for a counter, our previous read may or may not have been
         * before the counter overflowed. Re-read any counter with it's overflow bit set so
         * we know for sure that it has overflowed. The reason this matters is that the math
-        * is different for a counter that has overflowed. */
-       for( i = 0; i < n; i++ ) {
-               if ((1ull << i) & status) {
-                       counterv[i] = CONFIGURABLE_SHADOW(i) +
-                               (kpc_configurable_max() - CONFIGURABLE_RELOAD(i)) + IA32_PMCx(i);
+        * is different for a counter that has overflowed.
+        */
+       for (uint32_t i = 0; i < cfg_count; ++i) {
+               if (((1ULL << i) & pmc_mask) &&
+                   ((1ULL << i) & status))
+               {
+                       *it_counterv++ = CONFIGURABLE_SHADOW(i) +
+                                        (kpc_configurable_max() - CONFIGURABLE_RELOAD(i)) + IA32_PMCx(i);
                }
        }
 
        return 0;
 }
 
+static void
+kpc_get_curcpu_counters_mp_call(void *args)
+{
+       struct kpc_get_counters_remote *handler = args;
+       int offset=0, r=0;
+
+       assert(handler);
+       assert(handler->buf);
+
+       offset = cpu_number() * handler->buf_stride;
+       r = kpc_get_curcpu_counters(handler->classes, NULL, &handler->buf[offset]);
+
+       /* number of counters added by this CPU, needs to be atomic  */
+       hw_atomic_add(&(handler->nb_counters), r);
+}
+
+int
+kpc_get_all_cpus_counters(uint32_t classes, int *curcpu, uint64_t *buf)
+{
+       int enabled = 0;
+
+       struct kpc_get_counters_remote hdl = {
+               .classes = classes, .nb_counters = 0,
+               .buf_stride = kpc_get_counter_count(classes), .buf = buf
+       };
+
+       assert(buf);
+
+       enabled = ml_set_interrupts_enabled(FALSE);
+
+       if (curcpu)
+               *curcpu = current_processor()->cpu_id;
+       mp_cpus_call(CPUMASK_ALL, ASYNC, kpc_get_curcpu_counters_mp_call, &hdl);
+
+       ml_set_interrupts_enabled(enabled);
+
+       return hdl.nb_counters;
+}
+
 static void
 kpc_set_config_mp_call(void *vmp_config)
 {
+
        struct kpc_config_remote *mp_config = vmp_config;
-       uint32_t classes = mp_config->classes;
-       kpc_config_t *new_config = mp_config->configv;
-       int count = 0;
+       kpc_config_t *new_config = NULL;
+       uint32_t classes = 0, count = 0;
        boolean_t enabled;
 
+       assert(mp_config);
+       assert(mp_config->configv);
+       classes = mp_config->classes;
+       new_config = mp_config->configv;
+
        enabled = ml_set_interrupts_enabled(FALSE);
        
-       if( classes & KPC_CLASS_FIXED_MASK )
+       if (classes & KPC_CLASS_FIXED_MASK)
        {
                kpc_set_fixed_config(&new_config[count]);
                count += kpc_get_config_count(KPC_CLASS_FIXED_MASK);
        }
 
-       if( classes & KPC_CLASS_CONFIGURABLE_MASK )
-       {
-               kpc_set_configurable_config(&new_config[count]);
-               count += kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
+       if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
+               kpc_set_configurable_config(&new_config[count], mp_config->pmc_mask);
+               count += kpc_popcount(mp_config->pmc_mask);
        }
 
        ml_set_interrupts_enabled(enabled);
@@ -468,34 +527,51 @@ static void
 kpc_set_reload_mp_call(void *vmp_config)
 {
        struct kpc_config_remote *mp_config = vmp_config;
-       uint64_t max = kpc_configurable_max();
-       uint32_t i, count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
-       uint64_t *new_period;
-       uint64_t classes;
-       int enabled;
+       uint64_t *new_period = NULL, max = kpc_configurable_max();
+       uint32_t classes = 0, count = 0;
+       boolean_t enabled;
 
+       assert(mp_config);
+       assert(mp_config->configv);
        classes = mp_config->classes;
        new_period = mp_config->configv;
 
+       enabled = ml_set_interrupts_enabled(FALSE);
+
        if (classes & KPC_CLASS_CONFIGURABLE_MASK) {
-               enabled = ml_set_interrupts_enabled(FALSE);
+               /*
+                * Update _all_ shadow counters, this cannot be done for only
+                * selected PMCs. Otherwise, we would corrupt the configurable
+                * shadow buffer since the PMCs are muxed according to the pmc
+                * mask.
+                */
+               uint64_t all_cfg_mask = (1ULL << kpc_configurable_count()) - 1;
+               kpc_get_configurable_counters(&CONFIGURABLE_SHADOW(0), all_cfg_mask);
 
-               kpc_get_configurable_counters(&CONFIGURABLE_SHADOW(0));
+               /* set the new period */
+               count = kpc_configurable_count();
+               for (uint32_t i = 0; i < count; ++i) {
+                       /* ignore the counter */
+                       if (((1ULL << i) & mp_config->pmc_mask) == 0)
+                               continue;
 
-               for (i = 0; i < count; i++) {
-                       if (new_period[i] == 0)
-                               new_period[i] = kpc_configurable_max();
+                       if (*new_period == 0)
+                               *new_period = kpc_configurable_max();
 
-                       CONFIGURABLE_RELOAD(i) = max - new_period[i];
+                       CONFIGURABLE_RELOAD(i) = max - *new_period;
 
+                       /* reload the counter */
                        kpc_reload_configurable(i);
 
                        /* clear overflow bit just in case */
                        wrmsr64(MSR_IA32_PERF_GLOBAL_OVF_CTRL, 1ull << i);
-               }
 
-               ml_set_interrupts_enabled(enabled);
+                       /* next period value */
+                       new_period++;
+               }
        }
+
+       ml_set_interrupts_enabled(enabled);
 }
 
 int
@@ -522,14 +598,17 @@ kpc_get_classes(void)
 }
 
 int
-kpc_set_running(uint32_t new_state)
+kpc_set_running_arch(struct kpc_running_remote *mp_config)
 {
+       assert(mp_config);
+
        lapic_set_pmi_func((i386_intr_func_t)kpc_pmi_handler);
 
        /* dispatch to all CPUs */
-       mp_cpus_call( CPUMASK_ALL, ASYNC, kpc_set_running_mp_call, &new_state );
+       mp_cpus_call(CPUMASK_ALL, ASYNC, kpc_set_running_mp_call, mp_config);
 
-       kpc_running = new_state;
+       kpc_running_cfg_pmc_mask = mp_config->cfg_state_mask;
+       kpc_running_classes = mp_config->classes;
 
        return 0;
 }
@@ -591,14 +670,24 @@ void kpc_pmi_handler(__unused x86_saved_state_t *state)
 }
 
 int
-kpc_force_all_ctrs_arch( task_t task __unused, int val __unused )
+kpc_set_sw_inc( uint32_t mask __unused )
 {
-       /* TODO: reclaim counters ownership from XCPM */
-       return 0;
+       return ENOTSUP;
 }
 
 int
-kpc_set_sw_inc( uint32_t mask __unused )
+kpc_get_pmu_version(void)
 {
-       return ENOTSUP;
+       i386_cpu_info_t *info = cpuid_info();
+
+       uint8_t version_id = info->cpuid_arch_perf_leaf.version;
+
+       if (version_id == 3) {
+               return KPC_PMU_INTEL_V3;
+       } else if (version_id == 2) {
+               return KPC_PMU_INTEL_V2;
+       }
+
+       return KPC_PMU_ERROR;
 }
+
index ca044a570241309267307e763e268c26a672c194..bcdbb977984bfe78f802243122736d8fcc2efc20 100644 (file)
@@ -71,8 +71,6 @@
 #define _ARCH_I386_ASM_HELP_H_          /* Prevent inclusion of user header */
 #include <mach/i386/syscall_sw.h>
 
-#include <i386/mp.h>
-
 /*
  * Fault recovery.
  */
@@ -176,22 +174,21 @@ LEXT(thread_exception_return)
  * Copyin/out from user/kernel address space.
  * rdi:        source address
  * rsi:        destination address
- * rdx:        byte count
+ * rdx:        byte count (in fact, always < 64MB -- see copyio)
  */
 Entry(_bcopy)
-// TODO not pop regs; movq; think about 32 bit or 64 bit byte count
-       xchgq   %rdi, %rsi              /* source %rsi, dest %rdi */
+       xchg    %rdi, %rsi              /* source %rsi, dest %rdi */
 
        cld                             /* count up */
-       movl    %edx,%ecx               /* move by longwords first */
-       shrl    $3,%ecx
+       mov     %rdx, %rcx              /* move by longwords first */
+       shr     $3, %rcx
        RECOVERY_SECTION
        RECOVER(_bcopy_fail)
        rep
        movsq                           /* move longwords */
 
-       movl    %edx,%ecx               /* now move remaining bytes */
-       andl    $7,%ecx
+       movl    %edx, %ecx              /* now move remaining bytes */
+       andl    $7, %ecx
        RECOVERY_SECTION
        RECOVER(_bcopy_fail)
        rep
index 486d5c7262ea17a8f658dc18598601ed1ab1e597..35ee768fc3a7bea24c013f98f0170a055eb762ca 100644 (file)
 #include <kdp/kdp_callout.h>
 #endif /* !MACH_KDP */
 
+#include <libkern/OSDebug.h>
+#if CONFIG_DTRACE
+#include <mach/sdt.h>
+#endif
+
 #if 0
 
 #undef KERNEL_DEBUG
@@ -246,6 +251,8 @@ ovbcopy(
  *  Read data from a physical address. Memory should not be cache inhibited.
  */
 
+uint64_t reportphyreaddelayabs;
+uint32_t reportphyreadosbt;
 
 static inline unsigned int
 ml_phys_read_data(pmap_paddr_t paddr, int size)
@@ -253,10 +260,17 @@ ml_phys_read_data(pmap_paddr_t paddr, int size)
        unsigned int result = 0;
        unsigned char s1;
        unsigned short s2;
+       boolean_t istate;
+       uint64_t sabs, eabs;
 
-       if (!physmap_enclosed(paddr))
+       if (__improbable(!physmap_enclosed(paddr)))
                panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr);
 
+       if (__improbable(reportphyreaddelayabs != 0)) {
+               istate = ml_set_interrupts_enabled(FALSE);
+               sabs = mach_absolute_time();
+       }
+
         switch (size) {
         case 1:
                s1 = *(volatile unsigned char *)PHYSMAP_PTOV(paddr);
@@ -273,6 +287,22 @@ ml_phys_read_data(pmap_paddr_t paddr, int size)
                panic("Invalid size %d for ml_phys_read_data\n", size);
                break;
         }
+
+       if (__improbable(reportphyreaddelayabs != 0)) {
+               eabs = mach_absolute_time();
+               (void)ml_set_interrupts_enabled(istate);
+
+               if ((eabs - sabs) > reportphyreaddelayabs) {
+                       if (reportphyreadosbt) {
+                               OSReportWithBacktrace("ml_phys_read_data took %lluus\n", (eabs - sabs) / 1000);
+                       }
+#if CONFIG_DTRACE
+                       DTRACE_PHYSLAT3(physread, uint64_t, (eabs - sabs),
+                           pmap_paddr_t, paddr, uint32_t, size);
+#endif
+               }
+       }
+
         return result;
 }
 
index 0ad5ccc336774a60325ebbe4be4e0bf50e012484..954ac8def6b1d6ded2d1984cdc1bce8fa636ba18 100644 (file)
@@ -39,13 +39,4 @@ typedef uint64_t kpc_config_t;
 /* number of fixed config registers on x86_64 */
 #define KPC_X86_64_FIXED_CONFIGS (1)
 
-#define FIXED_ACTIONID(ctr) (kpc_actionid[(ctr)])
-#define CONFIGURABLE_ACTIONID(ctr) (kpc_actionid[(ctr) + kpc_fixed_count()])
-
-#define FIXED_RELOAD(ctr) (current_cpu_datap()->cpu_kpc_reload[(ctr)])
-#define CONFIGURABLE_RELOAD(ctr) (current_cpu_datap()->cpu_kpc_reload[(ctr) + kpc_fixed_count()])
-
-#define FIXED_SHADOW(ctr) (current_cpu_datap()->cpu_kpc_shadow[(ctr)])
-#define CONFIGURABLE_SHADOW(ctr) (current_cpu_datap()->cpu_kpc_shadow[(ctr) + kpc_fixed_count()])
-
 #endif /* _MACHINE_X86_64_KPC_H */
index 253070a09676283213cb44e8e74364d38eb3b726..af962f2f44238dc46b88b3fe701941950163045e 100644 (file)
@@ -78,10 +78,15 @@ ENTRY(ml_get_timebase)
  *
  */
 ENTRY(tmrCvt)
+       cmpq    $1,%rsi                         /* check for unity fastpath */
+       je      1f
        movq    %rdi,%rax
        mulq    %rsi                            /* result is %rdx:%rax */
        shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */
        ret
+1:
+       mov     %rdi,%rax
+       ret
 
  /*
  * void _rtc_nanotime_adjust(
index 51b1b3348485e964ae2c34acffa21e73e9a5787a..69a3bdc267efb6cdc363df06a9f7ffeabd8e142f 100644 (file)
 #include <i386/tsc.h>
 #include <i386/pmap_internal.h>
 #include <i386/pmap_pcid.h>
+#if CONFIG_VMX
+#include <i386/vmx/vmx_cpu.h>
+#endif
 
 #include <vm/vm_protos.h>
 
@@ -246,12 +249,15 @@ pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
 
-const boolean_t        pmap_disable_kheap_nx = FALSE;
-const boolean_t        pmap_disable_kstack_nx = FALSE;
+boolean_t      pmap_disable_kheap_nx = FALSE;
+boolean_t      pmap_disable_kstack_nx = FALSE;
 extern boolean_t doconstro_override;
 
 extern long __stack_chk_guard[];
 
+boolean_t pmap_ept_support_ad = FALSE;
+
+
 /*
  *     Map memory at initialization.  The physical addresses being
  *     mapped are not managed and are never unmapped.
@@ -381,6 +387,7 @@ pmap_bootstrap(
        kernel_pmap->pm_pdpt = (pd_entry_t *) ((uintptr_t)IdlePDPT);
        kernel_pmap->pm_pml4 = IdlePML4;
        kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4);
+       kernel_pmap->pm_eptp = 0;
        pmap_pcid_initialize_kernel(kernel_pmap);
 
        
@@ -685,10 +692,9 @@ pmap_init(void)
                         + pv_lock_table_size(npages)
                         + pv_hash_lock_table_size((npvhashbuckets))
                                + npages);
-
        s = round_page(s);
        if (kernel_memory_allocate(kernel_map, &addr, s, 0,
-                                  KMA_KOBJECT | KMA_PERMANENT)
+                                  KMA_KOBJECT | KMA_PERMANENT, VM_KERN_MEMORY_PMAP)
            != KERN_SUCCESS)
                panic("pmap_init");
 
@@ -791,6 +797,12 @@ pmap_init(void)
         * before this is shared with any user.
         */
        pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT, PMAP_EXPAND_OPTIONS_NONE);
+
+#if CONFIG_VMX
+       pmap_ept_support_ad = vmx_hv_support()  && (VMX_CAP(MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_EPT_VPID_CAP_AD_SHIFT, 1) ? TRUE : FALSE);
+#else
+       pmap_ept_support_ad = FALSE;
+#endif /* CONFIG_VMX */
 }
 
 static
@@ -799,6 +811,8 @@ void pmap_mark_range(pmap_t npmap, uint64_t sv, uint64_t nxrosz, boolean_t NX, b
        pd_entry_t *pdep;
        pt_entry_t *ptep = NULL;
 
+       assert(!is_ept_pmap(npmap));
+
        assert(((sv & 0xFFFULL) | (nxrosz & 0xFFFULL)) == 0);
 
        for (pdep = pmap_pde(npmap, cv); pdep != NULL && (cv < ev);) {
@@ -1202,6 +1216,28 @@ pmap_is_empty(
        return TRUE;
 }
 
+void
+hv_ept_pmap_create(void **ept_pmap, void **eptp)
+{
+       pmap_t p;
+
+       if ((ept_pmap == NULL) || (eptp == NULL)) {
+               return;
+       }
+
+       p = pmap_create_options(get_task_ledger(current_task()), 0, (PMAP_CREATE_64BIT | PMAP_CREATE_EPT));
+       if (p == PMAP_NULL) {
+               *ept_pmap = NULL;
+               *eptp = NULL;
+               return;
+       }
+
+       assert(is_ept_pmap(p));
+
+       *ept_pmap = (void*)p;
+       *eptp = (void*)(p->pm_eptp);
+       return;
+}
 
 /*
  *     Create and return a physical map.
@@ -1216,10 +1252,10 @@ pmap_is_empty(
  *     is bounded by that size.
  */
 pmap_t
-pmap_create(
-       ledger_t                ledger,
-           vm_map_size_t       sz,
-           boolean_t           is_64bit)
+pmap_create_options(
+       ledger_t        ledger,
+       vm_map_size_t   sz,
+       int             flags)
 {
        pmap_t          p;
        vm_size_t       size;
@@ -1227,7 +1263,7 @@ pmap_create(
        pml4_entry_t    *kpml4;
 
        PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
-                  (uint32_t) (sz>>32), (uint32_t) sz, is_64bit, 0, 0);
+                  (uint32_t) (sz>>32), (uint32_t) sz, flags, 0, 0);
 
        size = (vm_size_t) sz;
 
@@ -1239,6 +1275,13 @@ pmap_create(
                return(PMAP_NULL);
        }
 
+       /*
+        *      Return error when unrecognized flags are passed.
+        */
+       if ((flags & ~(PMAP_CREATE_KNOWN_FLAGS)) != 0) {
+               return(PMAP_NULL);
+       }
+
        p = (pmap_t) zalloc(pmap_zone);
        if (PMAP_NULL == p)
                panic("pmap_create zalloc");
@@ -1259,7 +1302,7 @@ pmap_create(
        ledger_reference(ledger);
        p->ledger = ledger;
 
-       p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
+       p->pm_task_map = ((flags & PMAP_CREATE_64BIT) ? TASK_MAP_64BIT : TASK_MAP_32BIT);
        if (pmap_pcid_ncpus)
                pmap_pcid_initialize(p);
 
@@ -1269,7 +1312,13 @@ pmap_create(
 
        memset((char *)p->pm_pml4, 0, PAGE_SIZE);
 
-       p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
+       if (flags & PMAP_CREATE_EPT) {
+               p->pm_eptp = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
+               p->pm_cr3 = 0;
+       } else {
+               p->pm_eptp = 0;
+               p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
+       }
 
        /* allocate the vm_objs to hold the pdpt, pde and pte pages */
 
@@ -1293,16 +1342,26 @@ pmap_create(
        pml4[KERNEL_PHYSMAP_PML4_INDEX] = kpml4[KERNEL_PHYSMAP_PML4_INDEX];
 
        PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
-                  p, is_64bit, 0, 0, 0);
+                  p, flags, 0, 0, 0);
 
        return(p);
 }
 
+pmap_t
+pmap_create(
+       ledger_t        ledger,
+       vm_map_size_t   sz,
+       boolean_t       is_64bit)
+{
+       return pmap_create_options(ledger, sz, ((is_64bit) ? PMAP_CREATE_64BIT : 0));
+}
+
 /*
  *     Retire the given physical map from service.
  *     Should only be called if the map contains
  *     no valid mappings.
  */
+extern int vm_wired_objects_page_count;
 
 void
 pmap_destroy(pmap_t    p)
@@ -1428,6 +1487,7 @@ pmap_protect_options(
        vm_map_offset_t orig_sva;
        boolean_t       set_NX;
        int             num_found = 0;
+       boolean_t       is_ept;
 
        pmap_intr_assert();
 
@@ -1448,6 +1508,9 @@ pmap_protect_options(
        else
                set_NX = TRUE;
 
+       is_ept = is_ept_pmap(map);
+
+
        PMAP_LOCK(map);
 
        orig_sva = sva;
@@ -1456,8 +1519,8 @@ pmap_protect_options(
                if (lva > eva)
                        lva = eva;
                pde = pmap_pde(map, sva);
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
                                /* superpage */
                                spte = pde;
                                epte = spte+1; /* excluded */
@@ -1468,18 +1531,31 @@ pmap_protect_options(
                        }
 
                        for (; spte < epte; spte++) {
-                               if (!(*spte & INTEL_PTE_VALID))
+                               if (!(*spte & PTE_VALID_MASK(is_ept)))
                                        continue;
 
+                               if (is_ept) {
+                                       if (prot & VM_PROT_READ)
+                                               pmap_update_pte(spte, 0, PTE_READ(is_ept));
+                                       else
+                                               pmap_update_pte(spte, PTE_READ(is_ept), 0);
+                               }
                                if (prot & VM_PROT_WRITE)
-                                       pmap_update_pte(spte, 0, INTEL_PTE_WRITE);
+                                       pmap_update_pte(spte, 0, PTE_WRITE(is_ept));
                                else
-                                       pmap_update_pte(spte, INTEL_PTE_WRITE, 0);
+                                       pmap_update_pte(spte, PTE_WRITE(is_ept), 0);
 
-                               if (set_NX)
-                                       pmap_update_pte(spte, 0, INTEL_PTE_NX);
-                               else
-                                       pmap_update_pte(spte, INTEL_PTE_NX, 0);
+                               if (set_NX) {
+                                       if (!is_ept)
+                                               pmap_update_pte(spte, 0, INTEL_PTE_NX);
+                                       else
+                                               pmap_update_pte(spte, INTEL_EPT_EX, 0);
+                               } else {
+                                       if (!is_ept)
+                                               pmap_update_pte(spte, INTEL_PTE_NX, 0);
+                                       else
+                                               pmap_update_pte(spte, 0, INTEL_EPT_EX);
+                               }
                                num_found++;
                        }
                }
@@ -1535,6 +1611,7 @@ pmap_expand_pml4(
        uint64_t        i;
        ppnum_t         pn;
        pml4_entry_t    *pml4p;
+       boolean_t       is_ept = is_ept_pmap(map);
 
        DBG("pmap_expand_pml4(%p,%p)\n", map, (void *)vaddr);
 
@@ -1560,7 +1637,7 @@ pmap_expand_pml4(
        pmap_zero_page(pn);
 
        vm_page_lockspin_queues();
-       vm_page_wire(m);
+       vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
        vm_page_unlock_queues();
 
        OSAddAtomic(1,  &inuse_ptepages_count);
@@ -1591,7 +1668,7 @@ pmap_expand_pml4(
                     map, map->pm_obj_pml4, vaddr, i);
        }
 #endif
-       vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE);
+       vm_page_insert_wired(m, map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE, VM_KERN_MEMORY_PTE);
        vm_object_unlock(map->pm_obj_pml4);
 
        /*
@@ -1600,9 +1677,9 @@ pmap_expand_pml4(
        pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
 
        pmap_store_pte(pml4p, pa_to_pte(pa)
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_USER
-                               | INTEL_PTE_WRITE);
+                               | PTE_READ(is_ept)
+                               | (is_ept ? INTEL_EPT_EX : INTEL_PTE_USER)
+                               | PTE_WRITE(is_ept));
 
        PMAP_UNLOCK(map);
 
@@ -1617,6 +1694,7 @@ pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options)
        uint64_t        i;
        ppnum_t         pn;
        pdpt_entry_t    *pdptp;
+       boolean_t       is_ept = is_ept_pmap(map);
 
        DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr);
 
@@ -1649,7 +1727,7 @@ pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options)
        pmap_zero_page(pn);
 
        vm_page_lockspin_queues();
-       vm_page_wire(m);
+       vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
        vm_page_unlock_queues();
 
        OSAddAtomic(1,  &inuse_ptepages_count);
@@ -1680,7 +1758,7 @@ pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options)
                     map, map->pm_obj_pdpt, vaddr, i);
        }
 #endif
-       vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE);
+       vm_page_insert_wired(m, map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE, VM_KERN_MEMORY_PTE);
        vm_object_unlock(map->pm_obj_pdpt);
 
        /*
@@ -1689,9 +1767,9 @@ pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options)
        pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
 
        pmap_store_pte(pdptp, pa_to_pte(pa)
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_USER
-                               | INTEL_PTE_WRITE);
+                               | PTE_READ(is_ept)
+                               | (is_ept ? INTEL_EPT_EX : INTEL_PTE_USER)
+                               | PTE_WRITE(is_ept));
 
        PMAP_UNLOCK(map);
 
@@ -1727,6 +1805,7 @@ pmap_expand(
        register pmap_paddr_t   pa;
        uint64_t                i;
        ppnum_t                 pn;
+       boolean_t               is_ept = is_ept_pmap(map);
 
 
        /*
@@ -1768,7 +1847,7 @@ pmap_expand(
        pmap_zero_page(pn);
 
        vm_page_lockspin_queues();
-       vm_page_wire(m);
+       vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
        vm_page_unlock_queues();
 
        OSAddAtomic(1,  &inuse_ptepages_count);
@@ -1800,7 +1879,7 @@ pmap_expand(
                     map, map->pm_obj, vaddr, i);
        }
 #endif
-       vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE);
+       vm_page_insert_wired(m, map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE, VM_KERN_MEMORY_PTE);
        vm_object_unlock(map->pm_obj);
 
        /*
@@ -1808,9 +1887,9 @@ pmap_expand(
         */
        pdp = pmap_pde(map, vaddr);
        pmap_store_pte(pdp, pa_to_pte(pa)
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_USER
-                               | INTEL_PTE_WRITE);
+                               | PTE_READ(is_ept)
+                               | (is_ept ? INTEL_EPT_EX : INTEL_PTE_USER)
+                               | PTE_WRITE(is_ept));
 
        PMAP_UNLOCK(map);
 
@@ -1827,6 +1906,7 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr)
 {
        ppnum_t pn;
        pt_entry_t              *pte;
+       boolean_t               is_ept = is_ept_pmap(pmap);
 
        PMAP_LOCK(pmap);
 
@@ -1839,9 +1919,9 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr)
                pte = pmap64_pml4(pmap, vaddr);
 
                pmap_store_pte(pte, pa_to_pte(i386_ptob(pn))
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_USER
-                               | INTEL_PTE_WRITE);
+                               | PTE_READ(is_ept)
+                               | (is_ept ? INTEL_EPT_EX : INTEL_PTE_USER)
+                               | PTE_WRITE(is_ept));
        }
 
        if(pmap64_pde(pmap, vaddr) == PD_ENTRY_NULL) {
@@ -1853,9 +1933,9 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr)
                pte = pmap64_pdpt(pmap, vaddr);
 
                pmap_store_pte(pte, pa_to_pte(i386_ptob(pn))
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_USER
-                               | INTEL_PTE_WRITE);
+                               | PTE_READ(is_ept)
+                               | (is_ept ? INTEL_EPT_EX : INTEL_PTE_USER)
+                               | PTE_WRITE(is_ept));
        }
 
        if(pmap_pte(pmap, vaddr) == PT_ENTRY_NULL) {
@@ -1867,9 +1947,9 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr)
                pte = pmap64_pde(pmap, vaddr);
 
                pmap_store_pte(pte, pa_to_pte(i386_ptob(pn))
-                               | INTEL_PTE_VALID
-                               | INTEL_PTE_USER
-                               | INTEL_PTE_WRITE);
+                               | PTE_READ(is_ept)
+                               | (is_ept ? INTEL_EPT_EX : INTEL_PTE_USER)
+                               | PTE_WRITE(is_ept));
        }
 
        PMAP_UNLOCK(pmap);
@@ -1924,6 +2004,7 @@ pmap_collect(
        register pt_entry_t     *pdp, *ptp;
        pt_entry_t              *eptp;
        int                     wired;
+       boolean_t               is_ept;
 
        if (p == PMAP_NULL)
                return;
@@ -1931,6 +2012,8 @@ pmap_collect(
        if (p == kernel_pmap)
                return;
 
+       is_ept = is_ept_pmap(p);
+
        /*
         *      Garbage collect map.
         */
@@ -1940,75 +2023,74 @@ pmap_collect(
             pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
             pdp++)
        {
-          if (*pdp & INTEL_PTE_VALID) {
-             if(*pdp & INTEL_PTE_REF) {
-               pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
-               collect_ref++;
-             } else {
-               collect_unref++;
-               ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
-               eptp = ptp + NPTEPG;
+               if (*pdp & PTE_VALID_MASK(is_ept)) {
+                       if (*pdp & PTE_REF(is_ept)) {
+                               pmap_store_pte(pdp, *pdp & ~PTE_REF(is_ept));
+                               collect_ref++;
+                       } else {
+                               collect_unref++;
+                               ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
+                               eptp = ptp + NPTEPG;
 
-               /*
-                * If the pte page has any wired mappings, we cannot
-                * free it.
-                */
-               wired = 0;
-               {
-                   register pt_entry_t *ptep;
-                   for (ptep = ptp; ptep < eptp; ptep++) {
-                       if (iswired(*ptep)) {
-                           wired = 1;
-                           break;
+                               /*
+                                * If the pte page has any wired mappings, we cannot
+                                * free it.
+                                */
+                               wired = 0;
+                               {
+                                       register pt_entry_t *ptep;
+                                       for (ptep = ptp; ptep < eptp; ptep++) {
+                                               if (iswired(*ptep)) {
+                                                       wired = 1;
+                                                       break;
+                                               }
+                                       }
+                               }
+                               if (!wired) {
+                                       /*
+                                        * Remove the virtual addresses mapped by this pte page.
+                                        */
+                                               pmap_remove_range(p,
+                                                       pdetova(pdp - (pt_entry_t *)p->dirbase),
+                                                       ptp,
+                                                       eptp);
+
+                                       /*
+                                        * Invalidate the page directory pointer.
+                                        */
+                                       pmap_store_pte(pdp, 0x0);
+
+                                       PMAP_UNLOCK(p);
+
+                                       /*
+                                        * And free the pte page itself.
+                                        */
+                                       {
+                                               register vm_page_t m;
+
+                                               vm_object_lock(p->pm_obj);
+
+                                               m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]) * PAGE_SIZE);
+                                               if (m == VM_PAGE_NULL)
+                                                       panic("pmap_collect: pte page not in object");
+
+                                               vm_object_unlock(p->pm_obj);
+
+                                               VM_PAGE_FREE(m);
+
+                                               OSAddAtomic(-1,  &inuse_ptepages_count);
+                                               PMAP_ZINFO_PFREE(p, PAGE_SIZE);
+                                       }
+
+                                       PMAP_LOCK(p);
+                               }
                        }
-                   }
                }
-               if (!wired) {
-                   /*
-                    * Remove the virtual addresses mapped by this pte page.
-                    */
-                   pmap_remove_range(p,
-                               pdetova(pdp - (pt_entry_t *)p->dirbase),
-                               ptp,
-                               eptp);
-
-                   /*
-                    * Invalidate the page directory pointer.
-                    */
-                   pmap_store_pte(pdp, 0x0);
-                
-                   PMAP_UNLOCK(p);
-
-                   /*
-                    * And free the pte page itself.
-                    */
-                   {
-                       register vm_page_t m;
-
-                       vm_object_lock(p->pm_obj);
-
-                       m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]) * PAGE_SIZE);
-                       if (m == VM_PAGE_NULL)
-                           panic("pmap_collect: pte page not in object");
-
-                       vm_object_unlock(p->pm_obj);
-
-                       VM_PAGE_FREE(m);
-
-                       OSAddAtomic(-1,  &inuse_ptepages_count);
-                       PMAP_ZINFO_PFREE(p, PAGE_SIZE);
-                   }
-
-                   PMAP_LOCK(p);
-               }
-             }
-          }
        }
 
        PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL);
        PMAP_UNLOCK(p);
        return;
-
 }
 #endif
 
@@ -2304,7 +2386,7 @@ pmap_flush(
                                mp_cpus_NMIPI(cpus_to_respond);
 
                                panic("TLB invalidation IPI timeout: "
-                                   "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
+                                   "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%llx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
                                    cpus_to_respond, orig_acks, NMIPI_acks);
                        }
                }
@@ -2316,6 +2398,19 @@ pmap_flush(
 }
 
 
+static void
+invept(void *eptp)
+{
+       struct {
+               uint64_t eptp;
+               uint64_t reserved;
+       } __attribute__((aligned(16), packed)) invept_descriptor = {(uint64_t)eptp, 0};
+
+       __asm__ volatile("invept (%%rax), %%rcx"
+               : : "c" (PMAP_INVEPT_SINGLE_CONTEXT), "a" (&invept_descriptor)
+               : "cc", "memory");
+}
+
 /*
  * Called with pmap locked, we:
  *  - scan through per-cpu data to see which other cpus need to flush
@@ -2339,15 +2434,27 @@ pmap_flush_tlbs(pmap_t  pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o
        boolean_t       pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
        boolean_t       need_global_flush = FALSE;
        uint32_t        event_code;
+       boolean_t       is_ept = is_ept_pmap(pmap);
 
        assert((processor_avail_count < 2) ||
               (ml_get_interrupts_enabled() && get_preemption_level() != 0));
 
-       event_code = (pmap == kernel_pmap) ? PMAP_CODE(PMAP__FLUSH_KERN_TLBS)
-                                          : PMAP_CODE(PMAP__FLUSH_TLBS);
+       if (pmap == kernel_pmap) {
+               event_code = PMAP_CODE(PMAP__FLUSH_KERN_TLBS);
+       } else if (is_ept) {
+               event_code = PMAP_CODE(PMAP__FLUSH_EPT);
+       } else {
+               event_code = PMAP_CODE(PMAP__FLUSH_TLBS);
+       }
+
        PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_START,
                            pmap, options, startv, endv, 0);
 
+       if (is_ept) {
+               mp_cpus_call(CPUMASK_ALL, ASYNC, invept, (void*)pmap->pm_eptp);
+               goto out;
+       }
+
        /*
         * Scan other cpus for matching active or task CR3.
         * For idle cpus (with no active map) we mark them invalid but
@@ -2476,7 +2583,7 @@ pmap_flush_tlbs(pmap_t    pmap, vm_map_offset_t startv, vm_map_offset_t endv, int o
                                mp_cpus_NMIPI(cpus_to_respond);
 
                                panic("TLB invalidation IPI timeout: "
-                                   "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
+                                   "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%llx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
                                    cpus_to_respond, orig_acks, NMIPI_acks);
                        }
                }
@@ -2539,6 +2646,8 @@ pmap_permissions_verify(pmap_t ipmap, vm_map_t ivmmap, vm_offset_t sv, vm_offset
        kern_return_t rv = KERN_SUCCESS;
        uint64_t skip4 = 0, skip2 = 0;
 
+       assert(!is_ept_pmap(ipmap));
+
        sv &= ~PAGE_MASK_64;
        ev &= ~PAGE_MASK_64;
        while (cv < ev) {
index 9baf0ab3bdcd792beb7cbb6b096c03543963ffb0..d94e1ee77a6257041a9de290454b947b0b7d58d1 100644 (file)
@@ -63,7 +63,6 @@
 #include <i386/postcode.h>
 #include <assym.s>
 
-#include <i386/mp.h>
 #include <i386/cpuid.h>
 #include <i386/acpi.h>
 
index d8ac9dfba62212a0a209129240ff2207fcbb82b0..656af69bc81cf17035547bfeaf8f3d2db7fa71fd 100644 (file)
@@ -11,12 +11,14 @@ INSTINC_SUBDIRS = pexpert
 INSTINC_SUBDIRS_X86_64 = pexpert
 INSTINC_SUBDIRS_X86_64H = pexpert
 INSTINC_SUBDIRS_ARM = pexpert
+INSTINC_SUBDIRS_ARM64 = pexpert
 
 
 EXPINC_SUBDIRS = pexpert
 EXPINC_SUBDIRS_X86_64 = pexpert
 EXPINC_SUBDIRS_X86_64H = pexpert
 EXPINC_SUBDIRS_ARM = pexpert
+EXPINC_SUBDIRS_ARM64 = pexpert
 
 COMP_SUBDIRS =         \
        conf
index 60c3c87b44740e2fabe195d417d6dde5601c1f16..ffcbdbe1d8b377f82c6efa302cd0f23842747dfb 100644 (file)
@@ -66,9 +66,9 @@ $(SOBJS): .SFLAGS
 
 $(COMPONENT).filelist: $(OBJS)
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 do_all: $(COMPONENT).filelist
 
index d4d8e3caa99b53e94e55216214e0fa112d1eb76c..c5efead48273faaf190fecdbd5ff09dff00f54d9 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <pexpert/pexpert.h>
 #include <pexpert/device_tree.h>
 
+typedef boolean_t (*argsep_func_t) (char c);
+
 static boolean_t isargsep( char c);
+static boolean_t israngesep( char c);
 static int argstrcpy(char *from, char *to);
 static int argstrcpy2(char *from,char *to, unsigned maxlen);
-static int argnumcpy(int val, void *to, unsigned maxlen);
-static int getval(char *s, int *val);
+static int argnumcpy(long long val, void *to, unsigned maxlen);
+static int getval(char *s, long long *val, argsep_func_t issep, boolean_t skip_equal_sign);
+boolean_t get_range_bounds(char * c, int64_t * lower, int64_t * upper);
 
 extern int IODTGetDefault(const char *key, void *infoAddr, unsigned int infoSize);
 
@@ -46,7 +50,7 @@ struct i24 {
 #define        STR     1
 
 #if !defined(__LP64__) && !defined(__arm__)
-boolean_t 
+boolean_t
 PE_parse_boot_arg(
        const char  *arg_string,
        void            *arg_ptr)
@@ -67,7 +71,7 @@ PE_parse_boot_argn(
        char *args;
        char *cp, c;
        uintptr_t i;
-       int val;
+       long long val;
        boolean_t arg_boolean;
        boolean_t arg_found;
 
@@ -81,7 +85,7 @@ PE_parse_boot_argn(
 
        while (*args)
        {
-               if (*args == '-') 
+               if (*args == '-')
                        arg_boolean = TRUE;
                else
                        arg_boolean = FALSE;
@@ -116,7 +120,7 @@ PE_parse_boot_argn(
                                arg_found = TRUE;
                                break;
                        }
-                       switch (getval(cp, &val)) 
+                       switch (getval(cp, &val, isargsep, FALSE))
                        {
                                case NUM:
                                        argnumcpy(val, arg_ptr, max_len);
@@ -144,18 +148,26 @@ gotit:
 }
 
 static boolean_t
-isargsep(
-       char c)
+isargsep(char c)
 {
        if (c == ' ' || c == '\0' || c == '\t')
-               return(TRUE);
+               return (TRUE);
        else
-               return(FALSE);
+               return (FALSE);
+}
+
+static boolean_t
+israngesep(char c)
+{
+       if (isargsep(c) || c == '_' || c == ',')
+               return (TRUE);
+       else
+               return (FALSE);
 }
 
 static int
 argstrcpy(
-       char *from, 
+       char *from,
        char *to)
 {
        int i = 0;
@@ -170,7 +182,7 @@ argstrcpy(
 
 static int
 argstrcpy2(
-       char *from, 
+       char *from,
        char *to,
        unsigned maxlen)
 {
@@ -184,7 +196,7 @@ argstrcpy2(
        return(i);
 }
 
-static int argnumcpy(int val, void *to, unsigned maxlen)
+static int argnumcpy(long long val, void *to, unsigned maxlen)
 {
        switch (maxlen) {
                case 0:
@@ -201,6 +213,11 @@ static int argnumcpy(int val, void *to, unsigned maxlen)
                        ((struct i24 *)to)->i24 = val;
                        break;
                case 4:
+                       *(int32_t *)to = val;
+                       break;
+               case 8:
+                       *(int64_t *)to = val;
+                       break;
                default:
                        *(int32_t *)to = val;
                        maxlen = 4;
@@ -212,15 +229,22 @@ static int argnumcpy(int val, void *to, unsigned maxlen)
 
 static int
 getval(
-       char *s, 
-       int *val)
+       char *s,
+       long long *val,
+       argsep_func_t issep,
+       boolean_t skip_equal_sign )
 {
-       unsigned int radix, intval;
-    unsigned char c;
+       unsigned long long radix, intval;
+       unsigned char c;
        int sign = 1;
+       boolean_t has_value = FALSE;
 
        if (*s == '=') {
                s++;
+               has_value = TRUE;
+       }
+
+       if (has_value || skip_equal_sign) {
                if (*s == '-')
                        sign = -1, s++;
                intval = *s++-'0';
@@ -246,7 +270,7 @@ getval(
                                break;
 
                        default:
-                               if (!isargsep(*s))
+                               if (!issep(*s))
                                        return (STR);
                        }
                 } else if (intval >= radix) {
@@ -254,7 +278,7 @@ getval(
                 }
                for(;;) {
                         c = *s++;
-                        if (isargsep(c))
+                        if (issep(c))
                             break;
                         if ((radix <= 10) &&
                             ((c >= '0') && (c <= ('9' - (10 - radix))))) {
@@ -285,7 +309,7 @@ getval(
                        intval *= radix;
                        intval += c;
                }
-                if (!isargsep(c) && !isargsep(*s))
+                if (!issep(c) && !issep(*s))
                     return STR;
                *val = intval * sign;
                return (NUM);
@@ -294,7 +318,7 @@ getval(
        return (NUM);
 }
 
-boolean_t 
+boolean_t
 PE_imgsrc_mount_supported()
 {
        return TRUE;
@@ -341,3 +365,38 @@ PE_get_default(
         */
        return IODTGetDefault(property_name, property_ptr, max_property) ? FALSE : TRUE;
 }
+
+/* function: get_range_bounds
+ * Parse a range string like "1_3,5_20" and return 1,3 as lower and upper.
+ * Note: '_' is separator for bounds integer delimiter and
+ *       ',' is considered as separator for range pair.
+ * returns TRUE when both range values are found
+ */
+boolean_t
+get_range_bounds(char *c, int64_t *lower, int64_t *upper)
+{
+       if (c == NULL || lower == NULL || upper == NULL) {
+               return FALSE;
+       }
+
+       if (NUM != getval(c, lower, israngesep, TRUE)) {
+               return FALSE;
+       }
+
+       while (*c != '\0') {
+               if (*c == '_') {
+                       break;
+               }
+               c++;
+       }
+
+       if (*c == '_') {
+               c++;
+               if (NUM != getval(c, upper, israngesep, TRUE)) {
+                       return FALSE;
+               }
+       } else {
+               return FALSE;
+       }
+       return TRUE;
+}
index 15eaf656dbbd46905a58d72d8ba58ae21f4d7695..5bab0cc1401fdf2d028bc3c6d88219f9a818e569 100644 (file)
 #include <kern/debug.h>
 
 static int DEBUGFlag;
+static uint32_t gPEKernelConfigurationBitmask;
 
 int32_t gPESerialBaud = -1;
 
 void pe_init_debug(void)
 {
-  if (!PE_parse_boot_argn("debug", &DEBUGFlag, sizeof (DEBUGFlag)))
-    DEBUGFlag = 0;
+       boolean_t boot_arg_value;
+
+       if (!PE_parse_boot_argn("debug", &DEBUGFlag, sizeof (DEBUGFlag)))
+               DEBUGFlag = 0;
+
+       gPEKernelConfigurationBitmask = 0;
+
+       if (!PE_parse_boot_argn("assertions", &boot_arg_value, sizeof(boot_arg_value))) {
+#if MACH_ASSERT
+               boot_arg_value = TRUE;
+#else
+               boot_arg_value = FALSE;
+#endif
+       }
+       gPEKernelConfigurationBitmask |= (boot_arg_value ? kPEICanHasAssertions : 0);
+
+       if (!PE_parse_boot_argn("statistics", &boot_arg_value, sizeof(boot_arg_value))) {
+#if DEVELOPMENT || DEBUG
+               boot_arg_value = TRUE;
+#else
+               boot_arg_value = FALSE;
+#endif
+       }
+       gPEKernelConfigurationBitmask |= (boot_arg_value ? kPEICanHasStatistics : 0);
+
+#if SECURE_KERNEL
+       boot_arg_value = FALSE;
+#else
+       if (!PE_i_can_has_debugger(NULL)) {
+               boot_arg_value = FALSE;
+       } else if (!PE_parse_boot_argn("diagnostic_api", &boot_arg_value, sizeof(boot_arg_value)))  {
+               boot_arg_value = TRUE;
+       }
+#endif
+       gPEKernelConfigurationBitmask |= (boot_arg_value ? kPEICanHasDiagnosticAPI : 0);
+
 }
 
 void PE_enter_debugger(const char *cause)
@@ -50,6 +85,12 @@ void PE_enter_debugger(const char *cause)
     Debugger(cause);
 }
 
+uint32_t
+PE_i_can_has_kernel_configuration(void)
+{
+       return gPEKernelConfigurationBitmask;
+}
+
 /* extern references */
 extern void vcattach(void);
 
index e154c7a784d676c46a46317ce664ee98e77254a9..35d44a25c20c746a37937392a1a6f34a5c731452 100644 (file)
 #include <kern/sched_prim.h>
 #include <kern/debug.h>
 
+#if CONFIG_CSR
+#include <sys/csr.h>
+#endif
+
 #include "boot_images.h"
 
 /* extern references */
@@ -101,12 +105,6 @@ void PE_init_iokit(void)
 {
     enum { kMaxBootVar = 128 };
         
-    typedef struct {
-        char            name[32];
-        unsigned long   length;
-        unsigned long   value[2];
-    } DriversPackageProp;
-
     boolean_t bootClutInitialized = FALSE;
     boolean_t noroot_rle_Initialized = FALSE;
 
@@ -178,7 +176,7 @@ void PE_init_iokit(void)
                            default_progress_data3x, 
                            (unsigned char *) appleClut8);
 
-    (void) StartIOKit( PE_state.deviceTreeHead, PE_state.bootArgs, gPEEFIRuntimeServices, NULL);
+    StartIOKit( PE_state.deviceTreeHead, PE_state.bootArgs, gPEEFIRuntimeServices, NULL);
 }
 
 void PE_init_platform(boolean_t vm_initialized, void * _args)
@@ -326,3 +324,21 @@ PE_reboot_on_panic(void)
        else
                return FALSE;
 }
+
+/* rdar://problem/21244753 */
+uint32_t
+PE_i_can_has_debugger(uint32_t *debug_flags)
+{
+#if CONFIG_CSR
+       if (csr_check(CSR_ALLOW_KERNEL_DEBUGGER) != 0 &&
+           csr_check(CSR_ALLOW_APPLE_INTERNAL) != 0) {
+               if (debug_flags)
+                       *debug_flags = 0;
+               return FALSE;
+       }
+#endif
+       if (debug_flags) {
+               *debug_flags = debug_boot_arg;
+       }
+       return TRUE;
+}
index 3f655631b10837689b61eb0b3f9f1c9689eebad8..b4912b42bbf0417a3bf3f9154d9038cfe543268e 100644 (file)
@@ -42,7 +42,7 @@
 /* Globals */
 void (*PE_kputc)(char c);
 
-#if DEBUG
+#if DEVELOPMENT || DEBUG 
 /* DEBUG kernel starts with true serial, but
  * may later disable or switch to video
  * console */
@@ -115,7 +115,7 @@ void kprintf(const char *fmt, ...)
                 * take any locks, just dump to serial */
                if (!PE_kputc || early) {
                        va_start(listp, fmt);
-                       _doprnt(fmt, &listp, pal_serial_putc, 16);
+                       _doprnt_log(fmt, &listp, pal_serial_putc, 16);
                        va_end(listp);
                        return;
                }
index 50a0293d9de0a3c3c03dc504484df8dd7c7718df..4c2e34956685e9d6bb7fa3e8b8e0c96f2d18faa3 100644 (file)
@@ -14,20 +14,30 @@ INSTINC_SUBDIRS_X86_64 = \
 INSTINC_SUBDIRS_X86_64H = \
        i386
 INSTINC_SUBDIRS_ARM = \
-       arm
+       arm     \
+       arm64
+INSTINC_SUBDIRS_ARM64 = \
+       arm     \
+       arm64
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_X86_64H = ${INSTINC_SUBDIRS_X86_64H}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
+EXPINC_SUBDIRS_ARM64 = ${INSTINC_SUBDIRS_ARM64}
 
 DATAFILES = \
         boot.h \
        protos.h \
        pexpert.h
 
+PRIVATE_DATAFILES = \
+       pexpert.h
+
 INSTALL_MI_LIST        = ${DATAFILES}
 
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+
 INSTALL_MI_DIR = pexpert
 
 EXPORT_MI_LIST = ${DATAFILES} \
index ad6430635027355308985d39290fdd9cf27feb40..fcb93c6ad1683c48604d81af95152f19fe9db89c 100644 (file)
@@ -126,10 +126,11 @@ typedef struct boot_icon_element boot_icon_element;
 #define kBootArgsFlagHiDPI             (1 << 1)
 #define kBootArgsFlagBlack             (1 << 2)
 #define kBootArgsFlagCSRActiveConfig   (1 << 3)
-#define kBootArgsFlagCSRPendingConfig  (1 << 4)
+#define kBootArgsFlagCSRConfigMode     (1 << 4)
 #define kBootArgsFlagCSRBoot           (1 << 5)
 #define kBootArgsFlagBlackBg           (1 << 6)
 #define kBootArgsFlagLoginUI           (1 << 7)
+#define kBootArgsFlagInstallUI         (1 << 8)
 
 typedef struct boot_args {
     uint16_t    Revision;      /* Revision of boot_args structure */
@@ -174,9 +175,11 @@ typedef struct boot_args {
     uint32_t    pciConfigSpaceStartBusNumber;
     uint32_t    pciConfigSpaceEndBusNumber;
     uint32_t   csrActiveConfig;
-    uint32_t   csrPendingConfig;
+    uint32_t   csrCapabilities;
     uint32_t    boot_SMC_plimit;
-    uint32_t    __reserved4[727];
+    uint16_t    bootProgressMeterStart;
+    uint16_t    bootProgressMeterEnd;
+    uint32_t    __reserved4[726];
 
 } boot_args;
 
index 34498150b2a363715fbd9393afa02c8d42b61ab8..5dca696cbd0e5bbb86c5761d3dc93b6a91c6af8e 100644 (file)
@@ -68,12 +68,34 @@ uint32_t PE_get_random_seed(
        unsigned char * dst_random_seed,
        uint32_t request_size);
 
+uint32_t PE_i_can_has_debugger(
+       uint32_t *);
+
+
+#if KERNEL_PRIVATE
+
+/*
+ * Kexts should consult this bitmask to change behavior, since the kernel
+ * may be configured as RELEASE but have MACH_ASSERT enabled, or boot args
+ * may have changed the kernel behavior for statistics and kexts should
+ * participate similarly
+ */
+
+#define kPEICanHasAssertions   0x00000001      /* Exceptional conditions should panic() instead of printf() */
+#define kPEICanHasStatistics   0x00000002      /* Gather expensive statistics (that don't otherwise change behavior */
+#define kPEICanHasDiagnosticAPI        0x00000004      /* Vend API to userspace or kexts that introspect kernel state */
+
+extern uint32_t PE_i_can_has_kernel_configuration(void);
+
+#endif /* KERNEL_PRIVATE */
 
 void PE_init_kprintf(
        boolean_t vm_initialized);
 
 extern int32_t gPESerialBaud;
 
+extern uint8_t gPlatformECID[8];
+
 unsigned int PE_init_taproot(vm_offset_t *taddr);
 
 extern void (*PE_kputc)(char c);
@@ -295,6 +317,14 @@ extern void PE_cpu_signal(
        cpu_id_t source,
        cpu_id_t target);
 
+extern void PE_cpu_signal_deferred(
+       cpu_id_t source,
+       cpu_id_t target);
+
+extern void PE_cpu_signal_cancel(
+       cpu_id_t source,
+       cpu_id_t target);
+
 extern void PE_cpu_machine_init(
        cpu_id_t target,
        boolean_t bootb);
index d54464467c839c9ebb485175c44a19a546417d97..4659fcc68c5fd681338eba181d837adb096f00a1 100644 (file)
@@ -57,6 +57,13 @@ _doprnt(
         void                    (*putc)(char),
         int                     radix);
 
+extern void    
+_doprnt_log(
+        register const char     *fmt,
+        va_list                 *argp,
+        void                    (*putc)(char),
+        int                     radix);
+
 #include <machine/io_map_entries.h>
 
 //------------------------------------------------------------------------
@@ -78,7 +85,7 @@ void Debugger(const char *message);
 //------------------------------------------------------------------------
 
 // from iokit/IOStartIOKit.cpp
-extern int StartIOKit( void * p1, void * p2, void * p3, void * p4);
+extern void StartIOKit( void * p1, void * p2, void * p3, void * p4);
 
 // from iokit/Families/IOFramebuffer.cpp
 extern unsigned char appleClut8[ 256 * 3 ];
index 07c961c42585fca333b94b9db05b6210ed13c3b9..d917f8827198965c8583cd4b1e71383e2a78378a 100644 (file)
@@ -16,23 +16,24 @@ PRIVATE_DATAFILES = \
        mac_alloc.h \
        mac_data.h \
        mac_framework.h \
+       mac_internal.h \
        mac_mach_internal.h \
-       mac_internal.h
+       mac_policy.h
 
 # Installed in /usr/include/security/
 INSTALL_MI_LIST = ${DATAFILES} 
 
 INSTALL_MI_DIR = security
 
-EXPORT_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+EXPORT_MI_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES})
 
 EXPORT_MI_DIR = security
 
 # /System/Library/Frameworks/System.framework/PrivateHeaders
-INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
 
 # /System/Library/Frameworks/Kernel.framework/PrivateHeaders
-INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
+INSTALL_KF_MI_LCL_LIST = $(sort ${DATAFILES} ${PRIVATE_DATAFILES})
 
 COMP_SUBDIRS = conf
 
index 975f64d84c860e049995fcdede1d741ba03cb628..9e94a12ca05e83b36de3356d8000b50e91ddf0a9 100644 (file)
@@ -75,9 +75,9 @@ $(SOBJS): .SFLAGS
 
 $(COMPONENT).filelist: $(OBJS)
        @echo LDFILELIST $(COMPONENT)
-       $(_v)( for obj in ${OBJS}; do   \
+       $(_v)for obj in ${OBJS}; do     \
                 echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \
-       done; ) > $(COMPONENT).filelist
+       done > $(COMPONENT).filelist
 
 do_all: $(COMPONENT).filelist
 
index 2d6761fd3a74d4b5f93bdaeca434415d273ca413..a2cd80b6fecbc6fc074a870323fbf22b1feaa19d 100644 (file)
@@ -33,3 +33,4 @@ security/mac_inet.c                                   optional config_macf_net
 security/mac_priv.c                                    optional config_macf
 security/mac_pty.c                                     optional config_macf
 security/mac_kext.c                                    optional config_macf
+security/mac_mach.c                                    optional config_macf
index 14800566a93ad44a874f2d266608abea080e93a3..9563a77969b6f36c0eaad1d6cc602d16f78b9713 100644 (file)
@@ -102,6 +102,14 @@ typedef struct mac *mac_t;
 #warning "MAC policy is not KPI, see Technical Q&A QA1574"
 #endif
 
+#if DEBUG
+#define SECURITY_MAC_CTLFLAGS (CTLFLAG_RW | CTLFLAG_LOCKED)
+#define SECURITY_MAC_CHECK_ENFORCE 1
+#else
+#define SECURITY_MAC_CTLFLAGS (CTLFLAG_RD | CTLFLAG_LOCKED)
+#define SECURITY_MAC_CHECK_ENFORCE 0
+#endif
+
 struct user_mac {
        user_size_t     m_buflen;
        user_addr_t     m_string;
@@ -172,14 +180,11 @@ __BEGIN_DECLS
 int     __mac_execve(char *fname, char **argv, char **envv, mac_t _label);
 int     __mac_get_fd(int _fd, mac_t _label);
 int     __mac_get_file(const char *_path, mac_t _label);
-int     __mac_get_lcid(pid_t _lcid, mac_t _label);
-int     __mac_get_lctx(mac_t _label);
 int     __mac_get_link(const char *_path, mac_t _label);
 int     __mac_get_pid(pid_t _pid, mac_t _label);
 int     __mac_get_proc(mac_t _label);
 int     __mac_set_fd(int _fildes, const mac_t _label);
 int     __mac_set_file(const char *_path, mac_t _label);
-int     __mac_set_lctx(mac_t _label);
 int     __mac_set_link(const char *_path, mac_t _label);
 int     __mac_mount(const char *type, const char *path, int flags, void *data,
     struct mac *label);
index 7fe8b5705a41c731e5d5d9a3811cc0b22b1e6c7d..2454b57aa980bda2c8aaaa5b41961ab8f596c8b9 100644 (file)
@@ -116,8 +116,13 @@ mac_proc_check_getauid(struct proc *curp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    
+       if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
                return 0;
 
        cred = kauth_cred_proc_ref(curp);
@@ -133,9 +138,13 @@ mac_proc_check_setauid(struct proc *curp, uid_t auid)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_setauid, cred, auid);
@@ -150,9 +159,13 @@ mac_proc_check_getaudit(struct proc *curp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_getaudit, cred);
@@ -167,9 +180,13 @@ mac_proc_check_setaudit(struct proc *curp, struct auditinfo_addr *ai)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_setaudit, cred, ai);
index c88c7e14a48fddbe196b8f9b42e9da2252a67504..d75117bc8d0d8261af951280a8575ba59a15757a 100644 (file)
@@ -124,12 +124,6 @@ SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 SYSCTL_NODE(_security, OID_AUTO, mac, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
     "TrustedBSD MAC policy controls");
 
-#if DEBUG
-#define SECURITY_MAC_CTLFLAGS CTLFLAG_RW | CTLFLAG_LOCKED
-#else
-#define SECURITY_MAC_CTLFLAGS CTLFLAG_RD | CTLFLAG_LOCKED
-#endif
-
 /*
  * Declare that the kernel provides MAC support, version 1.  This permits
  * modules to refuse to be loaded if the necessary support isn't present,
@@ -1311,231 +1305,6 @@ out:
        return (error);
 }
 
-#if CONFIG_LCTX
-/*
- * __mac_get_lcid: 
- *     Get login context ID.  A login context associates a BSD process 
- *     with an instance of a user.  For more information see getlcid(2) man page.
- *
- * Parameters:    p                        Process requesting the get
- *                uap                      User argument descriptor (see below)
- *                ret                      (ignored)
- *
- * Indirect:      uap->lcid                login context ID to search
- *                uap->mac_p.m_buflen      MAC info buffer size
- *                uap->mac_p.m_string      MAC info user address
- *
- * Returns:        0                       Success
- *                !0                       Not success
- */
-int
-__mac_get_lcid(proc_t p, struct __mac_get_lcid_args *uap, int *ret __unused)
-{
-       char *elements, *buffer;
-       struct user_mac mac;
-       struct lctx *l;
-       int error;
-       size_t ulen;
-
-       AUDIT_ARG(value32, uap->lcid);
-       if (IS_64BIT_PROCESS(p)) {
-               struct user64_mac mac64;
-               error = copyin(uap->mac_p, &mac64, sizeof(mac64));
-               mac.m_buflen = mac64.m_buflen;
-               mac.m_string = mac64.m_string;
-       } else {
-               struct user32_mac mac32;
-               error = copyin(uap->mac_p, &mac32, sizeof(mac32));
-               mac.m_buflen = mac32.m_buflen;
-               mac.m_string = mac32.m_string;
-       }
-
-       if (error)
-               return (error);
-
-       error = mac_check_structmac_consistent(&mac);
-       if (error)
-               return (error);
-
-       l = lcfind(uap->lcid);
-       if (l == NULL)
-               return (ESRCH);
-
-       MALLOC(elements, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-       error = copyinstr(mac.m_string, elements, mac.m_buflen, &ulen);
-       if (error) {
-               LCTX_UNLOCK(l);
-               FREE(elements, M_MACTEMP);
-               return (error);
-       }
-       AUDIT_ARG(mac_string, elements);
-       MALLOC(buffer, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-       error = mac_lctx_label_externalize(l->lc_label, elements,
-                                          buffer, mac.m_buflen);
-       if (error == 0)
-               error = copyout(buffer, mac.m_string, strlen(buffer)+1);
-
-       LCTX_UNLOCK(l);
-       FREE(buffer, M_MACTEMP);
-       FREE(elements, M_MACTEMP);
-       return (error);
-}
-
-/*
- * __mac_get_lctx:
- *     Get login context label.  A login context associates a BSD process
- *     associated with an instance of a user.
- *
- * Parameters:    p                        Process requesting the get
- *                uap                      User argument descriptor (see below)
- *                ret                      (ignored)
- *
- * Indirect:      uap->lcid                login context ID to search
- *                uap->mac_p               MAC info 
- *
- * Returns:        0                       Success
- *                !0                       Not success
- *
- */
-int
-__mac_get_lctx(proc_t p, struct __mac_get_lctx_args *uap, int *ret __unused)
-{
-       char *elements, *buffer;
-       struct user_mac mac;
-       int error;
-       size_t ulen;
-
-       if (IS_64BIT_PROCESS(p)) {
-               struct user64_mac mac64;
-               error = copyin(uap->mac_p, &mac64, sizeof(mac64));
-               mac.m_buflen = mac64.m_buflen;
-               mac.m_string = mac64.m_string;
-       } else {
-               struct user32_mac mac32;
-               error = copyin(uap->mac_p, &mac32, sizeof(mac32));
-               mac.m_buflen = mac32.m_buflen;
-               mac.m_string = mac32.m_string;
-       }
-
-       if (error)
-               return (error);
-
-       error = mac_check_structmac_consistent(&mac);
-       if (error)
-               return (error);
-
-       MALLOC(elements, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-       error = copyinstr(mac.m_string, elements, mac.m_buflen, &ulen);
-       if (error) {
-               FREE(elements, M_MACTEMP);
-               return (error);
-       }
-       AUDIT_ARG(mac_string, elements);
-       MALLOC(buffer, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-
-       proc_lock(p);
-       if (p->p_lctx == NULL) {
-               proc_unlock(p);
-               error = ENOENT;
-               goto out;
-       }
-
-       error = mac_lctx_label_externalize(p->p_lctx->lc_label,
-                                          elements, buffer, mac.m_buflen);
-       proc_unlock(p);
-       if (error == 0)
-               error = copyout(buffer, mac.m_string, strlen(buffer)+1);
-
-out:
-       FREE(buffer, M_MACTEMP);
-       FREE(elements, M_MACTEMP);
-       return (error);
-}
-
-int
-__mac_set_lctx(proc_t p, struct __mac_set_lctx_args *uap, int *ret __unused)
-{
-       struct user_mac mac;
-       struct label *intlabel;
-       char *buffer;
-       int error;
-       size_t ulen;
-
-       if (IS_64BIT_PROCESS(p)) {
-               struct user64_mac mac64;
-               error = copyin(uap->mac_p, &mac64, sizeof(mac64));
-               mac.m_buflen = mac64.m_buflen;
-               mac.m_string = mac64.m_string;
-       } else {
-               struct user32_mac mac32;
-               error = copyin(uap->mac_p, &mac32, sizeof(mac32));
-               mac.m_buflen = mac32.m_buflen;
-               mac.m_string = mac32.m_string;
-       }
-       if (error)
-               return (error);
-
-       error = mac_check_structmac_consistent(&mac);
-       if (error)
-               return (error);
-
-       MALLOC(buffer, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-       error = copyinstr(mac.m_string, buffer, mac.m_buflen, &ulen);
-       if (error) {
-               FREE(buffer, M_MACTEMP);
-               return (error);
-       }
-       AUDIT_ARG(mac_string, buffer);
-
-       intlabel = mac_lctx_label_alloc();
-       error = mac_lctx_label_internalize(intlabel, buffer);
-       FREE(buffer, M_MACTEMP);
-       if (error)
-               goto out;
-
-       proc_lock(p);
-       if (p->p_lctx == NULL) {
-               proc_unlock(p);
-               error = ENOENT;
-               goto out;
-       }
-
-       error = mac_lctx_check_label_update(p->p_lctx, intlabel);
-       if (error) {
-               proc_unlock(p);
-               goto out;
-       }
-       mac_lctx_label_update(p->p_lctx, intlabel);
-       proc_unlock(p);
-out:
-       mac_lctx_label_free(intlabel);
-       return (error);
-}
-
-#else  /* LCTX */
-
-int
-__mac_get_lcid(proc_t p __unused, struct __mac_get_lcid_args *uap __unused, int *ret __unused)
-{
-
-       return (ENOSYS);
-}
-
-int
-__mac_get_lctx(proc_t p __unused, struct __mac_get_lctx_args *uap __unused, int *ret __unused)
-{
-
-       return (ENOSYS);
-}
-
-int
-__mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, int *ret __unused)
-{
-
-       return (ENOSYS);
-}
-#endif /* !LCTX */
-
 int
 __mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, int *ret __unused)
 {
@@ -2197,20 +1966,10 @@ void mac_label_set(struct label *l __unused, int slot __unused, intptr_t v __unu
                return;
 }
 
-struct label *mac_thread_get_threadlabel(struct thread *thread __unused)
-{
-        return NULL;
-}
-
-struct label *mac_thread_get_uthreadlabel(struct uthread *uthread __unused)
-{
-        return NULL;
-}
-
 void mac_proc_set_enforce(proc_t p, int enforce_flags);
 void mac_proc_set_enforce(proc_t p __unused, int enforce_flags __unused)
 {
-               return;
+       return;
 }
 
 int mac_iokit_check_hid_control(kauth_cred_t cred __unused);
index 626b3e0b844259d0ef1f954621fb903e32c73a6b..c1aa3281be8f264debe7fed9d9b435daab071d91 100644 (file)
@@ -204,13 +204,13 @@ mac_file_check_lock(struct ucred *cred, struct fileglob *fg, int op,
  */
 int
 mac_file_check_mmap(struct ucred *cred, struct fileglob *fg, int prot,
-    int flags, int *maxprot)
+    int flags, uint64_t offset, int *maxprot)
 {
        int error;
        int maxp;
 
        maxp = *maxprot;
-       MAC_CHECK(file_check_mmap, cred, fg, fg->fg_label, prot, flags, &maxp);
+       MAC_CHECK(file_check_mmap, cred, fg, fg->fg_label, prot, flags, offset, &maxp);
        if ((maxp | *maxprot) != *maxprot)
                panic("file_check_mmap increased max protections");
        *maxprot = maxp;
index 0ac2572f5c674637e294c91f155bdcc5e32f5dad..23863c0ed05398a0e5c57663b80a52c180f46a00 100644 (file)
@@ -94,9 +94,9 @@ struct ifnet;
 struct ifreq;
 struct image_params;
 struct inpcb;
+struct ipc_port;
 struct ipq;
 struct knote;
-struct lctx;
 struct m_tag;
 struct mac;
 struct mac_module_data;
@@ -126,6 +126,9 @@ struct vnode;
 struct vnode_attr;
 struct vop_setlabel_args;
 
+#include <sys/kauth.h>
+#include <sys/kernel_types.h>
+
 #if CONFIG_MACF
 
 #ifndef __IOKIT_PORTS_DEFINED__
@@ -198,7 +201,7 @@ int mac_file_check_ioctl(kauth_cred_t cred, struct fileglob *fg,
 int    mac_file_check_lock(kauth_cred_t cred, struct fileglob *fg, int op,
            struct flock *fl);
 int    mac_file_check_mmap(kauth_cred_t cred, struct fileglob *fg,
-           int prot, int flags, int *maxprot);
+           int prot, int flags, uint64_t file_pos, int *maxprot);
 void   mac_file_check_mmap_downgrade(kauth_cred_t cred, struct fileglob *fg,
            int *prot);
 int    mac_file_check_receive(kauth_cred_t cred, struct fileglob *fg);
@@ -230,18 +233,14 @@ int       mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry
 int    mac_iokit_check_filter_properties(kauth_cred_t cred, io_object_t registry_entry);
 int    mac_iokit_check_get_property(kauth_cred_t cred, io_object_t registry_entry, const char *name);
 int    mac_iokit_check_hid_control(kauth_cred_t cred);
+int    mac_iokit_check_nvram_delete(kauth_cred_t cred, const char *name);
+int    mac_iokit_check_nvram_get(kauth_cred_t cred, const char *name);
+int    mac_iokit_check_nvram_set(kauth_cred_t cred, const char *name, io_object_t value);
 void   mac_ipq_label_associate(struct mbuf *fragment, struct ipq *ipq);
 int    mac_ipq_label_compare(struct mbuf *fragment, struct ipq *ipq);
 void   mac_ipq_label_destroy(struct ipq *ipq);
 int    mac_ipq_label_init(struct ipq *ipq, int flag);
 void   mac_ipq_label_update(struct mbuf *fragment, struct ipq *ipq);
-struct label   *mac_lctx_label_alloc(void);
-void    mac_lctx_label_free(struct label *label);
-void   mac_lctx_label_update(struct lctx *l, struct label *newlabel);
-int    mac_lctx_check_label_update(struct lctx *l, struct label *newlabel);
-void   mac_lctx_notify_create(proc_t proc, struct lctx *l);
-void   mac_lctx_notify_join(proc_t proc, struct lctx *l);
-void   mac_lctx_notify_leave(proc_t proc, struct lctx *l);
 void   mac_mbuf_label_associate_bpfdesc(struct bpf_d *bpf_d, struct mbuf *m);
 void   mac_mbuf_label_associate_ifnet(struct ifnet *ifp, struct mbuf *m);
 void   mac_mbuf_label_associate_inpcb(struct inpcb *inp, struct mbuf *m);
@@ -337,6 +336,7 @@ int mac_proc_check_fork(proc_t proc);
 int    mac_proc_check_suspend_resume(proc_t proc, int sr);
 int    mac_proc_check_get_task_name(kauth_cred_t cred, struct proc *p);
 int    mac_proc_check_get_task(kauth_cred_t cred, struct proc *p);
+int    mac_proc_check_expose_task(kauth_cred_t cred, struct proc *p);
 int    mac_proc_check_inherit_ipc_ports(struct proc *p, struct vnode *cur_vp, off_t cur_offset, struct vnode *img_vp, off_t img_offset, struct vnode *scriptvp);
 int    mac_proc_check_getaudit(proc_t proc);
 int    mac_proc_check_getauid(proc_t proc);
@@ -456,10 +456,6 @@ void       mac_sysvshm_label_associate(kauth_cred_t cred,
 void   mac_sysvshm_label_destroy(struct shmid_kernel *shmsegptr);
 void   mac_sysvshm_label_init(struct shmid_kernel* shmsegptr);
 void   mac_sysvshm_label_recycle(struct shmid_kernel *shmsegptr);
-struct label * mac_thread_label_alloc(void);
-void   mac_thread_label_destroy(struct uthread *uthread);
-void   mac_thread_label_free(struct label *label);
-void   mac_thread_label_init(struct uthread *uthread);
 int    mac_vnode_check_access(vfs_context_t ctx, struct vnode *vp,
            int acc_mode);
 int    mac_vnode_check_chdir(vfs_context_t ctx, struct vnode *dvp);
@@ -565,6 +561,7 @@ void        mac_pty_notify_grant(proc_t p, struct tty *tp, dev_t dev, struct label *lab
 void   mac_pty_notify_close(proc_t p, struct tty *tp, dev_t dev, struct label *label);
 int    mac_kext_check_load(kauth_cred_t cred, const char *identifier);
 int    mac_kext_check_unload(kauth_cred_t cred, const char *identifier);
+int    mac_kext_check_query(kauth_cred_t cred);
 
 void psem_label_associate(struct fileproc *fp, struct vnode *vp, struct vfs_context *ctx);
 void pshm_label_associate(struct fileproc *fp, struct vnode *vp, struct vfs_context *ctx);
index 153e7d727cda79356b557051dea933cafded0e54..4ea01c77ac79bd382b9ddce842969933466730b1 100644 (file)
@@ -79,7 +79,6 @@
 #include <security/mac_policy.h>
 #include <security/mac_data.h>
 #include <sys/sysctl.h>
-#include <kern/wait_queue.h>
 #include <kern/locks.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -176,11 +175,16 @@ static int mac_proc_check_enforce(proc_t p, int enforce_flags);
 static __inline__ int mac_proc_check_enforce(proc_t p, int enforce_flags)
 {
 #if CONFIG_MACF
-       return ((p->p_mac_enforce & enforce_flags) != 0);
+#if SECURITY_MAC_CHECK_ENFORCE // 21167099 - only check if we allow write
+    return ((p->p_mac_enforce & enforce_flags) != 0);
+#else
+#pragma unused(p,enforce_flags)
+    return 1;
+#endif // SECURITY_MAC_CHECK_ENFORCE
 #else
 #pragma unused(p,enforce_flags)
        return 0;
-#endif
+#endif // CONFIG_MACF
 }
 
 static int mac_context_check_enforce(vfs_context_t ctx, int enforce_flags);
@@ -234,7 +238,6 @@ int   mac_check_structmac_consistent(struct mac *mac);
 #endif
        
 int mac_cred_label_externalize(struct label *, char *e, char *out, size_t olen, int flags);
-int mac_lctx_label_externalize(struct label *, char *e, char *out, size_t olen);
 #if CONFIG_MACF_SOCKET
 int mac_socket_label_externalize(struct label *, char *e, char *out, size_t olen);
 #endif /* CONFIG_MACF_SOCKET */
@@ -243,7 +246,6 @@ int mac_pipe_label_externalize(struct label *label, char *elements,
  char *outbuf, size_t outbuflen);
 
 int mac_cred_label_internalize(struct label *label, char *string);
-int mac_lctx_label_internalize(struct label *label, char *string);
 #if CONFIG_MACF_SOCKET
 int mac_socket_label_internalize(struct label *label, char *string);
 #endif /* CONFIG_MACF_SOCKET */
@@ -416,8 +418,6 @@ struct __mac_get_pid_args;
 struct __mac_get_proc_args;
 struct __mac_set_proc_args;
 struct __mac_get_lcid_args;
-struct __mac_get_lctx_args;
-struct __mac_set_lctx_args;
 struct __mac_get_fd_args;
 struct __mac_get_file_args;
 struct __mac_get_link_args;
index 0a68a6d41120257ea9df39c0a15895a66564de0a..d9dff9460d8bdaa069d7bd0950ae61f2a1fe7060 100644 (file)
@@ -118,3 +118,32 @@ mac_iokit_check_hid_control(kauth_cred_t cred)
        MAC_CHECK(iokit_check_hid_control, cred);
        return (error);
 }
+
+int
+mac_iokit_check_nvram_delete(kauth_cred_t cred, const char *name)
+{
+       int error;
+
+       MAC_CHECK(iokit_check_nvram_delete, cred, name);
+       return (error);
+}
+
+int
+mac_iokit_check_nvram_get(kauth_cred_t cred, const char *name)
+{
+       int error;
+
+       MAC_CHECK(iokit_check_nvram_get, cred, name);
+       return (error);
+}
+
+int
+mac_iokit_check_nvram_set(kauth_cred_t cred, const char *name, io_object_t value)
+{
+       int error;
+
+       MAC_CHECK(iokit_check_nvram_set, cred, name, value);
+       return (error);
+}
+
+
index 97fbbcb6c1b0e66ca495c6ab63134465e6b6f8c6..404749060381102021f34089d3e68a199cc7a7d3 100644 (file)
@@ -20,3 +20,13 @@ mac_kext_check_unload(kauth_cred_t cred, const char *identifier) {
 
        return (error);
 }
+
+int
+mac_kext_check_query(kauth_cred_t cred) {
+       int error;
+
+       MAC_CHECK(kext_check_query, cred);
+
+       return (error);
+}
+
diff --git a/security/mac_mach.c b/security/mac_mach.c
new file mode 100644 (file)
index 0000000..4ff8cf7
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/exception_types.h>
+#include <mach/mach_types.h>
+#include <sys/param.h>
+#include <sys/user.h>
+#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
+#include <kern/task.h>
+
+#include <security/mac_framework.h>
+#include <security/mac_internal.h>
+#include <security/mac_mach_internal.h>
+
+static struct proc *
+mac_task_get_proc(struct task *task)
+{
+       if (task == current_task())
+               return proc_self();
+
+       /*
+        * Tasks don't really hold a reference on a proc unless the
+        * calling thread belongs to the task in question.
+        */
+       int pid = task_pid(task);
+       struct proc *p = proc_find(pid);
+
+       if (p != NULL) {
+               if (proc_task(p) == task)
+                       return p;
+               proc_rele(p);
+       }
+       return NULL;
+}
+
+int
+mac_task_check_expose_task(struct task *task)
+{
+       int error;
+
+       struct proc *p = mac_task_get_proc(task);
+       if (p == NULL)
+               return ESRCH;
+
+       struct ucred *cred = kauth_cred_get();
+       MAC_CHECK(proc_check_expose_task, cred, p);
+       proc_rele(p);
+       return (error);
+}
+
+int
+mac_task_check_set_host_special_port(struct task *task, int id, struct ipc_port *port)
+{
+       int error;
+
+       struct proc *p = mac_task_get_proc(task);
+       if (p == NULL)
+               return ESRCH;
+
+       kauth_cred_t cred = kauth_cred_proc_ref(p);
+       MAC_CHECK(proc_check_set_host_special_port, cred, id, port);
+       kauth_cred_unref(&cred);
+       proc_rele(p);
+       return (error);
+}
+
+int
+mac_task_check_set_host_exception_port(struct task *task, unsigned int exception)
+{
+       int error;
+
+       struct proc *p = mac_task_get_proc(task);
+       if (p == NULL)
+               return ESRCH;
+
+       kauth_cred_t cred = kauth_cred_proc_ref(p);
+       MAC_CHECK(proc_check_set_host_exception_port, cred, exception);
+       kauth_cred_unref(&cred);
+       proc_rele(p);
+       return (error);
+}
+
+int
+mac_task_check_set_host_exception_ports(struct task *task, unsigned int exception_mask)
+{
+       int error = 0;
+       int exception;
+
+       struct proc *p = mac_task_get_proc(task);
+       if (p == NULL)
+               return ESRCH;
+
+       kauth_cred_t cred = kauth_cred_proc_ref(p);
+       for (exception = FIRST_EXCEPTION; exception < EXC_TYPES_COUNT; exception++) {
+               if (exception_mask & (1 << exception)) {
+                       MAC_CHECK(proc_check_set_host_exception_port, cred, exception);
+                       if (error)
+                               break;
+               }
+       }
+       kauth_cred_unref(&cred);
+       proc_rele(p);
+       return (error);
+}
+
+void
+mac_thread_userret(struct thread *td)
+{
+
+       MAC_PERFORM(thread_userret, td);
+}
+
index cf48cf3be71896438c217b1ae7bdda527a062fc4..79587fd895ce8572b3a149e4e0cfaa28ba1a175b 100644 (file)
 struct uthread;
 int    mac_do_machexc(int64_t code, int64_t subcode, uint32_t flags __unused);
 int    mac_schedule_userret(void);
-struct label *mac_thread_get_threadlabel(struct thread *thread);
-struct label *mac_thread_get_uthreadlabel(struct uthread *uthread);
 
 #if CONFIG_MACF
 void mac_policy_init(void);
 void mac_policy_initmach(void);
 
+/* tasks */
+int    mac_task_check_expose_task(struct task *t);
+
+int    mac_task_check_set_host_special_port(struct task *task,
+           int id, struct ipc_port *port);
+int    mac_task_check_set_host_exception_port(struct task *task,
+           unsigned int exception);
+int    mac_task_check_set_host_exception_ports(struct task *task,
+           unsigned int exception_mask);
+
 /* threads */
 void   act_set_astmacf(struct thread *);
 void   mac_thread_userret(struct thread *);
index fce1303acc07cecb639ef0cd8b56820d49014fe3..31fa3b436073c6cee297b007dfeb4b37612a84ea 100644 (file)
@@ -148,9 +148,11 @@ mac_pipe_check_kqfilter(kauth_cred_t cred, struct knote *kn,
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
-
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
        MAC_CHECK(pipe_check_kqfilter, cred, kn, cpipe, cpipe->pipe_label);
        return (error);
 }
@@ -159,8 +161,11 @@ mac_pipe_check_ioctl(kauth_cred_t cred, struct pipe *cpipe, u_int cmd)
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(pipe_check_ioctl, cred, cpipe, cpipe->pipe_label, cmd);
 
@@ -172,8 +177,11 @@ mac_pipe_check_read(kauth_cred_t cred, struct pipe *cpipe)
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(pipe_check_read, cred, cpipe, cpipe->pipe_label);
 
@@ -186,8 +194,11 @@ mac_pipe_check_label_update(kauth_cred_t cred, struct pipe *cpipe,
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(pipe_check_label_update, cred, cpipe, cpipe->pipe_label, newlabel);
 
@@ -199,8 +210,11 @@ mac_pipe_check_select(kauth_cred_t cred, struct pipe *cpipe, int which)
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(pipe_check_select, cred, cpipe, cpipe->pipe_label, which);
 
@@ -212,8 +226,11 @@ mac_pipe_check_stat(kauth_cred_t cred, struct pipe *cpipe)
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(pipe_check_stat, cred, cpipe, cpipe->pipe_label);
 
@@ -225,8 +242,11 @@ mac_pipe_check_write(kauth_cred_t cred, struct pipe *cpipe)
 {
        int error;
 
-       if (!mac_pipe_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_pipe_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(pipe_check_write, cred, cpipe, cpipe->pipe_label);
 
index 7d5d50e633141f1631e613a29f9ce82d46836f9e..2117d515b33b7be5fd0d5fc38125a1bf771d2ffe 100644 (file)
@@ -95,7 +95,6 @@ struct ifnet;
 struct inpcb;
 struct ipq;
 struct label;
-struct lctx;
 struct mac_module_data;
 struct mac_policy_conf;
 struct mbuf;
@@ -886,6 +885,7 @@ typedef int mpo_file_check_mmap_t(
        struct label *label,
        int prot,
        int flags,
+       uint64_t file_pos,
        int *maxprot
 );
 /**
@@ -1420,143 +1420,6 @@ typedef void mpo_ipq_label_update_t(
        struct ipq *ipq,
        struct label *ipqlabel
 );
-/**
-  @brief Access control check for relabelling Login Context
-  @param l Subject credential
-  @param newlabel New label to apply to the Login Context
-  @see mpo_lctx_label_update_t
-  @see mac_set_lcid
-  @see mac_set_lctx
-
-  Determine whether the subject identified by the credential can relabel
-  itself to the supplied new label (newlabel).  This access control check
-  is called when the mac_set_lctx/lcid system call is invoked.  A user space
-  application will supply a new value, the value will be internalized
-  and provided in newlabel.
-
-  @return Return 0 if access is granted, otherwise an appropriate value for
-  errno should be returned.
-*/
-typedef int mpo_lctx_check_label_update_t(
-       struct lctx *l,
-       struct label *newlabel
-);
-/**
- @brief Destroy Login Context label
- @param label The label to be destroyed
-*/
-typedef void mpo_lctx_label_destroy_t(
-       struct label *label
-);
-/**
-  @brief Externalize a Login Context label
-  @param label Label to be externalized
-  @param element_name Name of the label namespace for which labels should be
-  externalized
-  @param sb String buffer to be filled with a text representation of the label
-
-  Produce an external representation of the label on a Login Context.
-  An externalized label consists of a text representation
-  of the label contents that can be used with user applications.
-  Policy-agnostic user space tools will display this externalized
-  version.
-
-  @return 0 on success, return non-zero if an error occurs while
-  externalizing the label data.
-
-*/
-typedef int mpo_lctx_label_externalize_t(
-       struct label *label,
-       char *element_name,
-       struct sbuf *sb
-);
-/**
-  @brief Initialize Login Context label
-  @param label New label to initialize
-*/
-typedef void mpo_lctx_label_init_t(
-       struct label *label
-);
-/**
-  @brief Internalize a Login Context label
-  @param label Label to be internalized
-  @param element_name Name of the label namespace for which the label should
-  be internalized
-  @param element_data Text data to be internalized
-
-  Produce a Login Context label from an external representation.  An
-  externalized label consists of a text representation of the label
-  contents that can be used with user applications.  Policy-agnostic
-  user space tools will forward text version to the kernel for
-  processing by individual policy modules.
-
-  The policy's internalize entry points will be called only if the
-  policy has registered interest in the label namespace.
-
-  @return 0 on success, Otherwise, return non-zero if an error occurs
-  while internalizing the label data.
-
-*/
-typedef int mpo_lctx_label_internalize_t(
-       struct label *label,
-       char *element_name,
-       char *element_data
-);
-/**
-  @brief Update a Login Context label
-  @param l
-  @param newlabel A new label to apply to the Login Context
-  @see mpo_lctx_check_label_update_t
-  @see mac_set_lcid
-  @see mac_set_lctx
-
-  Update the label on a login context, using the supplied new label.
-  This is called as a result of a login context relabel operation.  Access
-  control was already confirmed by mpo_lctx_check_label_update.
-*/
-typedef void mpo_lctx_label_update_t(
-       struct lctx *l,
-       struct label *newlabel
-);
-/**
-  @brief A process has created a login context
-  @param p Subject
-  @param l Login Context
-
-  When a process creates a login context (via setlcid()) this entrypoint
-  is called to notify the policy that the process 'p' has created login
-  context 'l'.
-*/
-typedef void mpo_lctx_notify_create_t(
-       struct proc *p,
-       struct lctx *l
-);
-/**
-  @brief A process has joined a login context
-  @param p Subject
-  @param l Login Context
-
-  When a process joins a login context, either via setlcid() or via
-  fork() this entrypoint is called to notify the policy that process
-  'p' is now a member of login context 'l'.
-*/
-typedef void mpo_lctx_notify_join_t(
-       struct proc *p,
-       struct lctx *l
-);
-/**
-  @brief A process has left a login context
-  @param p Subject
-  @param l Login Context
-
-  When a process leaves a login context either via setlcid() or as a
-  result of the process exiting this entrypoint is called to notify
-  the policy that the process 'p' is no longer a member of login context 'l'.
-*/
-typedef void mpo_lctx_notify_leave_t(
-       struct proc *p,
-       struct lctx *l
-);
 /**
  @brief Assign a label to a new mbuf
  @param bpf_d BPF descriptor
@@ -2736,6 +2599,32 @@ typedef int mpo_proc_check_fork_t(
        kauth_cred_t cred,
        struct proc *proc
 );
+/**
+  @brief Access control check for setting host special ports.
+  @param cred Subject credential
+  @param id The host special port to set
+  @param port The new value to set for the special port
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_proc_check_set_host_special_port_t(
+       kauth_cred_t cred,
+       int id,
+       struct ipc_port *port
+);
+/**
+  @brief Access control check for setting host exception ports.
+  @param cred Subject credential
+  @param exceptions Exception port to set
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_proc_check_set_host_exception_port_t(
+       kauth_cred_t cred,
+       unsigned int exception
+);
 /**
   @brief Access control over pid_suspend and pid_resume
   @param cred Subject credential
@@ -4335,6 +4224,25 @@ typedef int mpo_proc_check_get_task_t(
        struct proc *p
 );
 
+/**
+  @brief Access control check for exposing a process's task port
+  @param cred Subject credential
+  @param proc Object process
+
+  Determine whether the subject identified by the credential can expose
+  the passed process's task control port.
+  This call is used by the accessor APIs like processor_set_tasks() and
+  processor_set_threads().
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned. Suggested failure: EACCES for label mismatch,
+  EPERM for lack of privilege, or ESRCH to hide visibility of the target.
+*/
+typedef int mpo_proc_check_expose_task_t(
+       kauth_cred_t cred,
+       struct proc *p
+);
+
 /**
  @brief Check whether task's IPC may inherit across process exec
  @param proc current process instance
@@ -4349,11 +4257,11 @@ typedef int mpo_proc_check_get_task_t(
 */
 typedef int mpo_proc_check_inherit_ipc_ports_t(
        struct proc *p,
-  struct vnode *cur_vp,
-  off_t cur_offset,
-  struct vnode *img_vp,
-  off_t img_offset,
-  struct vnode *scriptvp
+       struct vnode *cur_vp,
+       off_t cur_offset,
+       struct vnode *img_vp,
+       off_t img_offset,
+       struct vnode *scriptvp
 );
 
 /**
@@ -4366,7 +4274,7 @@ typedef int mpo_proc_check_inherit_ipc_ports_t(
  @return Return 0 if access is granted, otherwise an appropriate value for
  errno should be returned.
  */
-typedef int mac_proc_check_run_cs_invalid_t(
+typedef int mpo_proc_check_run_cs_invalid_t(
        struct proc *p
 );
 
@@ -4381,27 +4289,7 @@ typedef int mac_proc_check_run_cs_invalid_t(
 typedef void mpo_thread_userret_t(
        struct thread *thread
 );
-/**
-  @brief Initialize per thread label
-  @param label New label to initialize
 
-  Initialize the label for a newly instantiated thread.
-  Sleeping is permitted.
-*/
-typedef void mpo_thread_label_init_t(
-       struct label *label
-);
-/**
-  @brief Destroy thread label
-  @param label The label to be destroyed
-
-  Destroy a user thread label.  Since the user thread
-  is going out of scope, policy modules should free any internal
-  storage associated with the label so that it may be destroyed.
-*/
-typedef void mpo_thread_label_destroy_t(
-       struct label *label
-);
 /**
   @brief Check vnode access
   @param cred Subject credential
@@ -5801,6 +5689,70 @@ typedef int mpo_kext_check_unload_t(
        const char *identifier
 );
 
+/**
+  @brief Access control check for querying information about loaded kexts
+  @param cred Subject credential
+
+  Determine whether the subject identified by the credential can query
+  information about loaded kexts.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.  Suggested failure: EPERM for lack of privilege.
+*/
+typedef int mpo_kext_check_query_t(
+       kauth_cred_t cred
+);
+
+/**
+  @brief Access control check for getting NVRAM variables.
+  @param cred Subject credential
+  @param name NVRAM variable to get
+
+  Determine whether the subject identifier by the credential can get the
+  value of the named NVRAM variable.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.  Suggested failure: EPERM for lack of privilege.
+*/
+typedef int mpo_iokit_check_nvram_get_t(
+       kauth_cred_t cred,
+       const char *name
+);
+
+/**
+  @brief Access control check for setting NVRAM variables.
+  @param cred Subject credential
+  @param name NVRAM variable to set
+  @param value The new value for the NVRAM variable
+
+  Determine whether the subject identifier by the credential can set the
+  value of the named NVRAM variable.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.  Suggested failure: EPERM for lack of privilege.
+*/
+typedef int mpo_iokit_check_nvram_set_t(
+       kauth_cred_t cred,
+       const char *name,
+       io_object_t value
+);
+
+/**
+  @brief Access control check for deleting NVRAM variables.
+  @param cred Subject credential
+  @param name NVRAM variable to delete
+
+  Determine whether the subject identifier by the credential can delete the
+  named NVRAM variable.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.  Suggested failure: EPERM for lack of privilege.
+*/
+typedef int mpo_iokit_check_nvram_delete_t(
+       kauth_cred_t cred,
+       const char *name
+);
+
 /*
  * Placeholder for future events that may need mac hooks.
  */
@@ -5812,7 +5764,7 @@ typedef void mpo_reserved_hook_t(void);
  * Please note that this should be kept in sync with the check assumptions
  * policy in bsd/kern/policy_check.c (policy_ops struct).
  */
-#define MAC_POLICY_OPS_VERSION 32 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 37 /* inc when new reserved slots are taken */
 struct mac_policy_ops {
        mpo_audit_check_postselect_t            *mpo_audit_check_postselect;
        mpo_audit_check_preselect_t             *mpo_audit_check_preselect;
@@ -5887,15 +5839,15 @@ struct mac_policy_ops {
        mpo_ipq_label_init_t                    *mpo_ipq_label_init;
        mpo_ipq_label_update_t                  *mpo_ipq_label_update;
 
-       mpo_lctx_check_label_update_t           *mpo_lctx_check_label_update;
-       mpo_lctx_label_destroy_t                *mpo_lctx_label_destroy;
-       mpo_lctx_label_externalize_t            *mpo_lctx_label_externalize;
-       mpo_lctx_label_init_t                   *mpo_lctx_label_init;
-       mpo_lctx_label_internalize_t            *mpo_lctx_label_internalize;
-       mpo_lctx_label_update_t                 *mpo_lctx_label_update;
-       mpo_lctx_notify_create_t                *mpo_lctx_notify_create;
-       mpo_lctx_notify_join_t                  *mpo_lctx_notify_join;
-       mpo_lctx_notify_leave_t                 *mpo_lctx_notify_leave;
+       mpo_reserved_hook_t                     *mpo_reserved1;
+       mpo_reserved_hook_t                     *mpo_reserved2;
+       mpo_reserved_hook_t                     *mpo_reserved3;
+       mpo_reserved_hook_t                     *mpo_reserved4;
+       mpo_reserved_hook_t                     *mpo_reserved5;
+       mpo_reserved_hook_t                     *mpo_reserved6;
+       mpo_reserved_hook_t                     *mpo_reserved7;
+       mpo_reserved_hook_t                     *mpo_reserved8;
+       mpo_reserved_hook_t                     *mpo_reserved9;
 
        mpo_mbuf_label_associate_bpfdesc_t      *mpo_mbuf_label_associate_bpfdesc;
        mpo_mbuf_label_associate_ifnet_t        *mpo_mbuf_label_associate_ifnet;
@@ -5950,13 +5902,13 @@ struct mac_policy_ops {
        mpo_system_check_sysctlbyname_t         *mpo_system_check_sysctlbyname;
        mpo_proc_check_inherit_ipc_ports_t      *mpo_proc_check_inherit_ipc_ports;
        mpo_vnode_check_rename_t                *mpo_vnode_check_rename;
-       mpo_reserved_hook_t                     *mpo_reserved4;
-       mpo_reserved_hook_t                     *mpo_reserved5;
-       mpo_reserved_hook_t                     *mpo_reserved6;
-       mpo_reserved_hook_t                     *mpo_reserved7;
-       mpo_reserved_hook_t                     *mpo_reserved8;
-       mpo_reserved_hook_t                     *mpo_reserved9;
-       mpo_reserved_hook_t                     *mpo_reserved10;
+       mpo_kext_check_query_t                  *mpo_kext_check_query;
+       mpo_iokit_check_nvram_get_t             *mpo_iokit_check_nvram_get;
+       mpo_iokit_check_nvram_set_t             *mpo_iokit_check_nvram_set;
+       mpo_iokit_check_nvram_delete_t          *mpo_iokit_check_nvram_delete;
+       mpo_proc_check_expose_task_t            *mpo_proc_check_expose_task;
+       mpo_proc_check_set_host_special_port_t  *mpo_proc_check_set_host_special_port;
+       mpo_proc_check_set_host_exception_port_t *mpo_proc_check_set_host_exception_port;
        mpo_reserved_hook_t                     *mpo_reserved11;
        mpo_reserved_hook_t                     *mpo_reserved12;
        mpo_reserved_hook_t                     *mpo_reserved13;
@@ -6146,7 +6098,7 @@ struct mac_policy_ops {
        mpo_vnode_check_uipc_bind_t             *mpo_vnode_check_uipc_bind;
        mpo_vnode_check_uipc_connect_t          *mpo_vnode_check_uipc_connect;
 
-       mac_proc_check_run_cs_invalid_t         *mpo_proc_check_run_cs_invalid;
+       mpo_proc_check_run_cs_invalid_t         *mpo_proc_check_run_cs_invalid;
        mpo_proc_check_suspend_resume_t         *mpo_proc_check_suspend_resume;
 
        mpo_thread_userret_t                    *mpo_thread_userret;
@@ -6170,8 +6122,8 @@ struct mac_policy_ops {
 
        mpo_vnode_notify_rename_t               *mpo_vnode_notify_rename;
 
-       mpo_thread_label_init_t                 *mpo_thread_label_init;
-       mpo_thread_label_destroy_t              *mpo_thread_label_destroy;
+       mpo_reserved_hook_t                     *mpo_reserved32;
+       mpo_reserved_hook_t                     *mpo_reserved33;
 
        mpo_system_check_kas_info_t             *mpo_system_check_kas_info;
 
index b9851ed0f40e8ec1860c4b5328e251cf61c20383..f17db238ab942a2bd0fac89e963feb9b585760bb 100644 (file)
@@ -127,8 +127,11 @@ mac_posixsem_check_create(kauth_cred_t cred, const char *name)
 {
        int error;
 
-       if (!mac_posixsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(posixsem_check_create, cred, name);
 
@@ -140,8 +143,11 @@ mac_posixsem_check_open(kauth_cred_t cred, struct pseminfo *psem)
 {
        int error;
 
-       if (!mac_posixsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(posixsem_check_open, cred, psem,
            psem->psem_label);
@@ -154,8 +160,11 @@ mac_posixsem_check_post(kauth_cred_t cred, struct pseminfo *psem)
 {
        int error;
 
-       if (!mac_posixsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(posixsem_check_post, cred, psem, psem->psem_label);
 
@@ -168,8 +177,11 @@ mac_posixsem_check_unlink(kauth_cred_t cred, struct pseminfo *psem,
 {
        int error;
 
-       if (!mac_posixsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(posixsem_check_unlink, cred, psem, psem->psem_label, name);
 
@@ -181,8 +193,11 @@ mac_posixsem_check_wait(kauth_cred_t cred, struct pseminfo *psem)
 {
        int error;
 
-       if (!mac_posixsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(posixsem_check_wait, cred, psem, psem->psem_label);
 
index f2ffd9daf3b81a11d4c2dd8a63c64a929aa4ced5..cc4e281c1f3c2e8180b4252e8558f94178f4d9cd 100644 (file)
@@ -127,8 +127,11 @@ mac_posixshm_check_create(kauth_cred_t cred, const char *name)
 {
        int error = 0;
 
-       if (!mac_posixshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixshm_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(posixshm_check_create, cred, name);
 
@@ -140,8 +143,11 @@ mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *shm, int fflags)
 {
        int error = 0;
 
-       if (!mac_posixshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixshm_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(posixshm_check_open, cred, shm, shm->pshm_label, fflags);
 
@@ -154,8 +160,11 @@ mac_posixshm_check_mmap(kauth_cred_t cred, struct pshminfo *shm,
 {
        int error = 0;
 
-       if (!mac_posixshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixshm_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(posixshm_check_mmap, cred, shm, shm->pshm_label,
             prot, flags);
@@ -168,8 +177,11 @@ mac_posixshm_check_stat(kauth_cred_t cred, struct pshminfo *shm)
 {
        int error = 0;
 
-       if (!mac_posixshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixshm_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(posixshm_check_stat, cred, shm, shm->pshm_label);
 
@@ -182,8 +194,11 @@ mac_posixshm_check_truncate(kauth_cred_t cred, struct pshminfo *shm,
 {
        int error = 0;
 
-       if (!mac_posixshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixshm_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(posixshm_check_truncate, cred, shm, shm->pshm_label, size);
 
@@ -196,8 +211,11 @@ mac_posixshm_check_unlink(kauth_cred_t cred, struct pshminfo *shm,
 {
        int error = 0;
 
-       if (!mac_posixshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_posixshm_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(posixshm_check_unlink, cred, shm, shm->pshm_label, name);
 
index 929d8107c69e0094213ac092b35497017ded4107..c73d4642c86472bb1ccb2af6ef3a8c07c7d4434f 100644 (file)
@@ -73,6 +73,7 @@
 #include <sys/kauth.h>
 #include <sys/imgact.h>
 #include <mach/mach_types.h>
+#include <kern/task.h>
 
 #include <security/mac_internal.h>
 #include <security/mac_mach_internal.h>
@@ -273,8 +274,11 @@ mac_cred_check_label_update(kauth_cred_t cred, struct label *newlabel)
 {
        int error;
 
-       if (!mac_proc_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(cred_check_label_update, cred, newlabel);
 
@@ -286,12 +290,11 @@ mac_cred_check_visible(kauth_cred_t u1, kauth_cred_t u2)
 {
        int error;
 
-
-
-       if (!mac_proc_enforce)
-               return (0);
-
-
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(cred_check_visible, u1, u2);
 
@@ -313,11 +316,14 @@ mac_proc_check_debug(proc_t curp, struct proc *proc)
        kauth_cred_t cred;
        int error;
 
-
-
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_debug, cred, proc);
@@ -332,9 +338,14 @@ mac_proc_check_fork(proc_t curp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_fork, cred, curp);
@@ -363,6 +374,16 @@ mac_proc_check_get_task(struct ucred *cred, struct proc *p)
        return (error);
 }
 
+int
+mac_proc_check_expose_task(struct ucred *cred, struct proc *p)
+{
+       int error;
+
+       MAC_CHECK(proc_check_expose_task, cred, p);
+
+       return (error);
+}
+
 int
 mac_proc_check_inherit_ipc_ports(struct proc *p, struct vnode *cur_vp, off_t cur_offset, struct vnode *img_vp, off_t img_offset, struct vnode *scriptvp)
 {
@@ -385,8 +406,12 @@ mac_proc_check_map_anon(proc_t proc, user_addr_t u_addr,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vm_enforce ||
-           !mac_proc_check_enforce(proc, MAC_VM_ENFORCE))
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vm_enforce)
+        return 0;
+#endif
+       if (!mac_proc_check_enforce(proc, MAC_VM_ENFORCE))
                return (0);
 
        cred = kauth_cred_proc_ref(proc);
@@ -403,9 +428,13 @@ mac_proc_check_mprotect(proc_t proc,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vm_enforce ||
-           !mac_proc_check_enforce(proc, MAC_VM_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vm_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(proc, MAC_VM_ENFORCE))
+        return (0);
 
        cred = kauth_cred_proc_ref(proc);
        MAC_CHECK(proc_check_mprotect, cred, proc, addr, size, prot);
@@ -419,7 +448,11 @@ mac_proc_check_run_cs_invalid(proc_t proc)
 {
        int error;
        
-       if (!mac_vm_enforce) return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vm_enforce)
+        return 0;
+#endif
        
        MAC_CHECK(proc_check_run_cs_invalid, proc);
        
@@ -432,11 +465,14 @@ mac_proc_check_sched(proc_t curp, struct proc *proc)
        kauth_cred_t cred;
        int error;
 
-
-
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_sched, cred, proc);
@@ -451,11 +487,14 @@ mac_proc_check_signal(proc_t curp, struct proc *proc, int signum)
        kauth_cred_t cred;
        int error;
 
-
-
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_signal, cred, proc, signum);
@@ -470,11 +509,13 @@ mac_proc_check_wait(proc_t curp, struct proc *proc)
        kauth_cred_t cred;
        int error;
 
-
-
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_wait, cred, proc);
@@ -483,124 +524,19 @@ mac_proc_check_wait(proc_t curp, struct proc *proc)
        return (error);
 }
 
-#if CONFIG_LCTX
-/*
- * Login Context
- */
-
-int
-mac_proc_check_setlcid (struct proc *p0, struct proc *p,
-                       pid_t pid, pid_t lcid)
-{
-       int error;
-
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(p0, MAC_PROC_ENFORCE))
-               return (0);
-
-       MAC_CHECK(proc_check_setlcid, p0, p, pid, lcid);
-       return (error);
-}
-
-int
-mac_proc_check_getlcid (struct proc *p0, struct proc *p, pid_t pid)
-{
-       int error;
-
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(p0, MAC_PROC_ENFORCE))
-               return (0);
-
-       MAC_CHECK(proc_check_getlcid, p0, p, pid);
-       return (error);
-}
-
-void
-mac_lctx_notify_create (struct proc *p, struct lctx *l)
-{
-       MAC_PERFORM(lctx_notify_create, p, l);
-}
-
-void
-mac_lctx_notify_join (struct proc *p, struct lctx *l)
-{
-       MAC_PERFORM(lctx_notify_join, p, l);
-}
-
-void
-mac_lctx_notify_leave (struct proc *p, struct lctx *l)
-{
-       MAC_PERFORM(lctx_notify_leave, p, l);
-}
-
-struct label *
-mac_lctx_label_alloc(void)
-{
-       struct label *label;
-
-       label = mac_labelzone_alloc(MAC_WAITOK);
-       if (label == NULL)
-               return (NULL);
-       MAC_PERFORM(lctx_label_init, label);
-       return (label);
-}
-
-void
-mac_lctx_label_free(struct label *label)
-{
-
-       MAC_PERFORM(lctx_label_destroy, label);
-       mac_labelzone_free(label);
-}
-
-int
-mac_lctx_label_externalize(struct label *label, char *elements,
-    char *outbuf, size_t outbuflen)
-{
-       int error;
-
-       error = MAC_EXTERNALIZE(lctx, label, elements, outbuf, outbuflen);
-
-       return (error);
-}
-
-int
-mac_lctx_label_internalize(struct label *label, char *string)
-{
-       int error;
-
-       error = MAC_INTERNALIZE(lctx, label, string);
-
-       return (error);
-}
-
-void
-mac_lctx_label_update(struct lctx *l, struct label *newlabel)
-{
-
-       MAC_PERFORM(lctx_label_update, l, newlabel);
-}
-
-int
-mac_lctx_check_label_update(struct lctx *l, struct label *newlabel)
-{
-       int error;
-
-       MAC_CHECK(lctx_check_label_update, l, newlabel);
-
-       return (error);
-}
-#endif /* LCTX */
-
 int
 mac_proc_check_suspend_resume(proc_t curp, int sr)
 {
        kauth_cred_t cred;
        int error;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_suspend_resume, cred, curp, sr);
@@ -615,9 +551,13 @@ mac_proc_check_ledger(proc_t curp, proc_t proc, int ledger_op)
        kauth_cred_t cred;
        int error = 0;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_ledger, cred, proc, ledger_op);
@@ -632,9 +572,13 @@ mac_proc_check_cpumon(proc_t curp)
        kauth_cred_t cred;
        int error = 0;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_cpumon, cred);
@@ -649,9 +593,13 @@ mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor)
        kauth_cred_t cred;
        int error = 0;
 
-       if (!mac_proc_enforce ||
-           !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce)
+        return 0;
+#endif
+    if (!mac_proc_check_enforce(curp, MAC_PROC_ENFORCE))
+        return 0;
 
        cred = kauth_cred_proc_ref(curp);
        MAC_CHECK(proc_check_proc_info, cred, target, callnum, flavor);
@@ -660,57 +608,3 @@ mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor)
        return (error);
 }
 
-struct label *
-mac_thread_label_alloc(void)
-{
-       struct label *label;
-
-       label = mac_labelzone_alloc(MAC_WAITOK);
-       if (label == NULL)
-               return (NULL);
-       MAC_PERFORM(thread_label_init, label);
-       return (label);
-}
-
-void
-mac_thread_label_init(struct uthread *uthread)
-{
-       uthread->uu_label = mac_thread_label_alloc();
-}
-
-void
-mac_thread_label_free(struct label *label)
-{
-       MAC_PERFORM(thread_label_destroy, label);
-       mac_labelzone_free(label);
-}
-
-void
-mac_thread_label_destroy(struct uthread *uthread)
-{
-
-       mac_thread_label_free(uthread->uu_label);
-       uthread->uu_label = NULL;
-}
-
-void
-mac_thread_userret(struct thread *td)
-{
-
-       MAC_PERFORM(thread_userret, td);
-}
-
-struct label *
-mac_thread_get_uthreadlabel(struct uthread *uthread)
-{
-
-       return (uthread->uu_label);
-}
-
-struct label *
-mac_thread_get_threadlabel(struct thread *thread)
-{
-       struct uthread *uthread = get_bsdthread_info(thread);
-
-       return (mac_thread_get_uthreadlabel(uthread));
-}
index b9bc4b698f3ba4f0439753e3e941a6029e277ba7..2151c0915c56c1f750ec9bfb58ad7a29ef14839e 100644 (file)
@@ -213,8 +213,11 @@ mac_socket_label_internalize(struct label *label, char *string)
 void
 mac_socket_label_associate(struct ucred *cred, struct socket *so)
 {
-       if (!mac_socket_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return;
+#endif
 
        MAC_PERFORM(socket_label_associate, cred, 
                    (socket_t)so, so->so_label);
@@ -224,8 +227,11 @@ void
 mac_socket_label_associate_accept(struct socket *oldsocket,
     struct socket *newsocket)
 {
-       if (!mac_socket_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return;
+#endif
 
        MAC_PERFORM(socket_label_associate_accept, 
                    (socket_t)oldsocket, oldsocket->so_label,
@@ -238,8 +244,11 @@ mac_socketpeer_label_associate_mbuf(struct mbuf *mbuf, struct socket *so)
 {
        struct label *label;
 
-       if (!mac_socket_enforce && !mac_net_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce && !mac_net_enforce)
+        return;
+#endif
 
        label = mac_mbuf_to_label(mbuf);
 
@@ -260,8 +269,11 @@ void
 mac_socketpeer_label_associate_socket(struct socket *oldsocket,
     struct socket *newsocket)
 {
-       if (!mac_socket_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return;
+#endif
 
        MAC_PERFORM(socketpeer_label_associate_socket,
                    (socket_t)oldsocket, oldsocket->so_label,
@@ -274,8 +286,11 @@ mac_socket_check_kqfilter(kauth_cred_t cred, struct knote *kn,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_kqfilter, cred, kn, 
                  (socket_t)so, so->so_label);
@@ -288,8 +303,11 @@ mac_socket_check_label_update(kauth_cred_t cred, struct socket *so,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_label_update, cred,
                  (socket_t)so, so->so_label,
@@ -302,8 +320,11 @@ mac_socket_check_select(kauth_cred_t cred, struct socket *so, int which)
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_select, cred,
                  (socket_t)so, so->so_label, which);
@@ -315,8 +336,11 @@ mac_socket_check_stat(kauth_cred_t cred, struct socket *so)
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_stat, cred,
                  (socket_t)so, so->so_label);
@@ -329,8 +353,11 @@ mac_socket_label_update(kauth_cred_t cred, struct socket *so, struct label *labe
 {
        int error;
 #if 0
-       if (!mac_socket_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 #endif
        error = mac_socket_check_label_update(cred, so, label);
        if (error)
@@ -464,8 +491,11 @@ mac_socket_check_accept(kauth_cred_t cred, struct socket *so)
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_accept, cred,
                  (socket_t)so, so->so_label);
@@ -479,8 +509,11 @@ mac_socket_check_accepted(kauth_cred_t cred, struct socket *so)
        struct sockaddr *sockaddr;
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        if (sock_getaddr((socket_t)so, &sockaddr, 1) != 0) {
                error = ECONNABORTED;
@@ -499,8 +532,11 @@ mac_socket_check_bind(kauth_cred_t ucred, struct socket *so,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_bind, ucred,
                  (socket_t)so, so->so_label, sockaddr);
@@ -513,8 +549,11 @@ mac_socket_check_connect(kauth_cred_t cred, struct socket *so,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_connect, cred,
                  (socket_t)so, so->so_label,
@@ -527,8 +566,11 @@ mac_socket_check_create(kauth_cred_t cred, int domain, int type, int protocol)
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_create, cred, domain, type, protocol);
        return (error);
@@ -541,8 +583,11 @@ mac_socket_check_deliver(struct socket *so, struct mbuf *mbuf)
        struct label *label;
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        label = mac_mbuf_to_label(mbuf);
 
@@ -564,8 +609,11 @@ mac_socket_check_listen(kauth_cred_t cred, struct socket *so)
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_listen, cred,
                  (socket_t)so, so->so_label);
@@ -577,8 +625,11 @@ mac_socket_check_receive(kauth_cred_t cred, struct socket *so)
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_receive, cred,
                  (socket_t)so, so->so_label);
@@ -590,8 +641,11 @@ mac_socket_check_received(kauth_cred_t cred, struct socket *so, struct sockaddr
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
        
        MAC_CHECK(socket_check_received, cred,
                  so, so->so_label, saddr);
@@ -604,8 +658,11 @@ mac_socket_check_send(kauth_cred_t cred, struct socket *so,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_send, cred,
                  (socket_t)so, so->so_label, sockaddr);
@@ -618,8 +675,11 @@ mac_socket_check_setsockopt(kauth_cred_t cred, struct socket *so,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_setsockopt, cred,
                  (socket_t)so, so->so_label, sopt);
@@ -631,8 +691,11 @@ int mac_socket_check_getsockopt(kauth_cred_t cred, struct socket *so,
 {
        int error;
 
-       if (!mac_socket_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_socket_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(socket_check_getsockopt, cred,
                  (socket_t)so, so->so_label, sopt);
index 0ccb634019d21bb2111bae760eed1ccc5892ff1c..bae10876500379441c38d460953cc86389ea1a7f 100644 (file)
@@ -74,8 +74,11 @@ mac_system_check_acct(kauth_cred_t cred, struct vnode *vp)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_acct, cred, vp,
            vp != NULL ? vp->v_label : NULL);
@@ -88,8 +91,11 @@ mac_system_check_chud(kauth_cred_t cred)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_chud, cred);
 
@@ -101,8 +107,11 @@ mac_system_check_host_priv(kauth_cred_t cred)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_host_priv, cred);
 
@@ -114,8 +123,11 @@ mac_system_check_info(kauth_cred_t cred, const char *info_type)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_info, cred, info_type);
 
@@ -127,8 +139,11 @@ mac_system_check_nfsd(kauth_cred_t cred)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_nfsd, cred);
 
@@ -140,21 +155,28 @@ mac_system_check_reboot(kauth_cred_t cred, int howto)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_reboot, cred, howto);
 
        return (error);
 }
 
+
 int
 mac_system_check_settime(kauth_cred_t cred)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_settime, cred);
 
@@ -166,8 +188,11 @@ mac_system_check_swapon(kauth_cred_t cred, struct vnode *vp)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_swapon, cred, vp, vp->v_label);
        return (error);
@@ -178,10 +203,11 @@ mac_system_check_swapoff(kauth_cred_t cred, struct vnode *vp)
 {
        int error;
 
-
-
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_swapoff, cred, vp, vp->v_label);
        return (error);
@@ -194,8 +220,11 @@ mac_system_check_sysctlbyname(kauth_cred_t cred, const char *namestring, int *na
 {
        int error;
        
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_sysctlbyname, cred, namestring,
            name, namelen, oldctl, oldlen, newctl, newlen);     
@@ -208,8 +237,11 @@ mac_system_check_kas_info(kauth_cred_t cred, int selector)
 {
        int error;
 
+#if SECURITY_MAC_CHECK_ENFORCE
+       /* 21167099 - only check if we allow write */
        if (!mac_system_enforce)
                return (0);
+#endif
 
        MAC_CHECK(system_check_kas_info, cred, selector);
 
index 540f5b09b17036a364e73e0f26b8f0bb3f01c744..5a4016a7cb5b2e0649a5da76c239cbce1c4d5c6d 100644 (file)
@@ -148,8 +148,11 @@ mac_sysvmsq_check_enqueue(kauth_cred_t cred, struct msg *msgptr,
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_enqueue, cred,  msgptr, msgptr->label, msqptr,
            msqptr->label);
@@ -162,8 +165,11 @@ mac_sysvmsq_check_msgrcv(kauth_cred_t cred, struct msg *msgptr)
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_msgrcv, cred, msgptr, msgptr->label);
 
@@ -175,8 +181,11 @@ mac_sysvmsq_check_msgrmid(kauth_cred_t cred, struct msg *msgptr)
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_msgrmid, cred,  msgptr, msgptr->label);
 
@@ -188,8 +197,11 @@ mac_sysvmsq_check_msqget(kauth_cred_t cred, struct msqid_kernel *msqptr)
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_msqget, cred, msqptr, msqptr->label);
 
@@ -201,8 +213,11 @@ mac_sysvmsq_check_msqsnd(kauth_cred_t cred, struct msqid_kernel *msqptr)
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_msqsnd, cred, msqptr, msqptr->label);
 
@@ -214,8 +229,11 @@ mac_sysvmsq_check_msqrcv(kauth_cred_t cred, struct msqid_kernel *msqptr)
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_msqrcv, cred, msqptr, msqptr->label);
 
@@ -228,8 +246,11 @@ mac_sysvmsq_check_msqctl(kauth_cred_t cred, struct msqid_kernel *msqptr,
 {
        int error;
 
-       if (!mac_sysvmsg_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvmsg_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvmsq_check_msqctl, cred, msqptr, msqptr->label, cmd);
 
index b433aa6b258e689bd950897b9b99443a19bed11c..a136a0502a3449cf155bdbdce5cbe31c7c000452 100644 (file)
@@ -130,8 +130,11 @@ mac_sysvsem_check_semctl(kauth_cred_t cred, struct semid_kernel *semakptr,
 {
        int error;
 
-       if (!mac_sysvsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvsem_check_semctl, cred, semakptr, semakptr->label, cmd);
 
@@ -143,8 +146,11 @@ mac_sysvsem_check_semget(kauth_cred_t cred, struct semid_kernel *semakptr)
 {
        int error;
 
-       if (!mac_sysvsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvsem_check_semget, cred, semakptr, semakptr->label);
 
@@ -157,8 +163,11 @@ mac_sysvsem_check_semop(kauth_cred_t cred, struct semid_kernel *semakptr,
 {
        int error;
 
-       if (!mac_sysvsem_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvsem_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvsem_check_semop, cred, semakptr, semakptr->label,
            accesstype);
index bcacb48a1afbf0056c42589e27599e3f86fab216..b6777cb743f5910e4d0a74415835079a200b854b 100644 (file)
@@ -132,8 +132,11 @@ mac_sysvshm_check_shmat(struct ucred *cred, struct shmid_kernel *shmsegptr,
 {
        int error;
 
-       if (!mac_sysvshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvshm_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvshm_check_shmat, cred, shmsegptr, shmsegptr->label,
            shmflg);
@@ -147,8 +150,11 @@ mac_sysvshm_check_shmctl(struct ucred *cred, struct shmid_kernel *shmsegptr,
 {
        int error;
 
-       if (!mac_sysvshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvshm_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvshm_check_shmctl, cred, shmsegptr, shmsegptr->label,
            cmd);
@@ -161,8 +167,11 @@ mac_sysvshm_check_shmdt(struct ucred *cred, struct shmid_kernel *shmsegptr)
 {
        int error;
 
-       if (!mac_sysvshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvshm_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvshm_check_shmdt, cred, shmsegptr, shmsegptr->label);
 
@@ -175,8 +184,11 @@ mac_sysvshm_check_shmget(struct ucred *cred, struct shmid_kernel *shmsegptr,
 {
        int error;
 
-       if (!mac_sysvshm_enforce)
-               return 0;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_sysvshm_enforce)
+        return (0);
+#endif
 
        MAC_CHECK(sysvshm_check_shmget, cred, shmsegptr, shmsegptr->label,
            shmflg);
index 0d31d86c41264aae1ed7ecf0bbcb45855c557508..429980b4e93ed58da7e20e4ac809c042569e23b9 100644 (file)
@@ -278,8 +278,11 @@ mac_mount_label_externalize(struct label *label, char *elements,
 void
 mac_devfs_label_copy(struct label *src, struct label *dest)
 {
-       if (!mac_device_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_device_enforce)
+        return;
+#endif
 
        MAC_PERFORM(devfs_label_copy, src, dest);
 }
@@ -288,9 +291,11 @@ void
 mac_devfs_label_update(struct mount *mp, struct devnode *de,
     struct vnode *vp)
 {
-
-       if (!mac_device_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_device_enforce)
+        return;
+#endif
 
        MAC_PERFORM(devfs_label_update, mp, de, de->dn_label, vp,
            vp->v_label);
@@ -303,8 +308,11 @@ mac_vnode_label_associate(struct mount *mp, struct vnode *vp, vfs_context_t ctx)
        struct fdescnode *fnp;
        int error = 0;
 
-       if (!mac_vnode_enforce)
-               return (error);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return (error);
+#endif
 
        /* XXX: should not inspect v_tag in kernel! */
        switch (vp->v_tag) {
@@ -328,8 +336,11 @@ void
 mac_vnode_label_associate_devfs(struct mount *mp, struct devnode *de,
     struct vnode *vp)
 {
-       if (!mac_device_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_device_enforce)
+        return;
+#endif
 
        MAC_PERFORM(vnode_label_associate_devfs,
            mp, mp ? mp->mnt_mntlabel : NULL,
@@ -351,8 +362,12 @@ mac_vnode_label_associate_extattr(struct mount *mp, struct vnode *vp)
 void
 mac_vnode_label_associate_singlelabel(struct mount *mp, struct vnode *vp)
 {
-
-       if (!mac_vnode_enforce || !mac_label_vnodes)
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return;
+#endif
+       if (!mac_label_vnodes)
                return;
 
        MAC_PERFORM(vnode_label_associate_singlelabel, mp,
@@ -366,8 +381,12 @@ mac_vnode_notify_create(vfs_context_t ctx, struct mount *mp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return (0);
+#endif
+       if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
                return (0);
 
        cred = vfs_context_ucred(ctx);
@@ -383,9 +402,13 @@ mac_vnode_notify_rename(vfs_context_t ctx, struct vnode *vp,
 {
        kauth_cred_t cred;
 
-       if (!mac_vnode_enforce ||
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return;
 
        cred = vfs_context_ucred(ctx);
        MAC_PERFORM(vnode_notify_rename, cred, vp, vp->v_label,
@@ -397,9 +420,13 @@ mac_vnode_notify_open(vfs_context_t ctx, struct vnode *vp, int acc_flags)
 {
        kauth_cred_t cred;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return;
 
        cred = vfs_context_ucred(ctx);
        MAC_PERFORM(vnode_notify_open, cred, vp, vp->v_label, acc_flags);
@@ -411,9 +438,13 @@ mac_vnode_notify_link(vfs_context_t ctx, struct vnode *vp,
 {
        kauth_cred_t cred;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return;
 
        cred = vfs_context_ucred(ctx);
        MAC_PERFORM(vnode_notify_link, cred, dvp, dvp->v_label, vp, vp->v_label, cnp);
@@ -430,7 +461,12 @@ mac_vnode_label_update_extattr(struct mount *mp, struct vnode *vp,
 {
        int error = 0;
 
-       if (!mac_vnode_enforce || !mac_label_vnodes)
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return;
+#endif
+       if (!mac_label_vnodes)
                return;
 
        MAC_PERFORM(vnode_label_update_extattr, mp, mp->mnt_mntlabel, vp,
@@ -451,7 +487,12 @@ mac_vnode_label_store(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || !mac_label_vnodes ||
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+       if (!mac_label_vnodes ||
            !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
                return 0;
 
@@ -471,8 +512,11 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode *
        int error;
        posix_cred_t pcred = posix_cred_get(new);
 
-       if (!mac_proc_enforce && !mac_vnode_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce || !mac_vnode_enforce)
+        return;
+#endif
 
        /* mark the new cred to indicate "matching" includes the label */
        pcred->cr_flags |= CRF_MAC_ENFORCE;
@@ -536,8 +580,11 @@ mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, off_t of
        kauth_cred_t cred;
        int result = 0;
 
-       if (!mac_proc_enforce && !mac_vnode_enforce)
-               return result;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce || !mac_vnode_enforce)
+        return result;
+#endif
 
        cred = vfs_context_ucred(ctx);
 
@@ -594,8 +641,12 @@ mac_vnode_check_access(vfs_context_t ctx, struct vnode *vp,
        int error;
        int mask;
 
-       if (!mac_vnode_enforce ||
-           !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+       if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
                return 0;
 
        cred = vfs_context_ucred(ctx);
@@ -611,9 +662,13 @@ mac_vnode_check_chdir(vfs_context_t ctx, struct vnode *dvp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-           !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_chdir, cred, dvp, dvp->v_label);
@@ -627,9 +682,13 @@ mac_vnode_check_chroot(vfs_context_t ctx, struct vnode *dvp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_chroot, cred, dvp, dvp->v_label, cnp);
@@ -643,9 +702,13 @@ mac_vnode_check_create(vfs_context_t ctx, struct vnode *dvp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_create, cred, dvp, dvp->v_label, cnp, vap);
@@ -659,9 +722,13 @@ mac_vnode_check_unlink(vfs_context_t ctx, struct vnode *dvp, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_unlink, cred, dvp, dvp->v_label, vp,
@@ -676,9 +743,13 @@ mac_vnode_check_deleteacl(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_deleteacl, cred, vp, vp->v_label, type);
@@ -693,9 +764,13 @@ mac_vnode_check_deleteextattr(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_deleteextattr, cred, vp, vp->v_label, name);
@@ -708,9 +783,13 @@ mac_vnode_check_exchangedata(vfs_context_t ctx,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_exchangedata, cred, v1, v1->v_label, 
@@ -726,9 +805,13 @@ mac_vnode_check_getacl(vfs_context_t ctx, struct vnode *vp, acl_type_t type)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_getacl, cred, vp, vp->v_label, type);
@@ -743,9 +826,13 @@ mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_getattrlist, cred, vp, vp->v_label, alist);
@@ -761,8 +848,11 @@ mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error = 0;
 
-       if (!mac_vnode_enforce || !mac_proc_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce || !mac_vnode_enforce)
+        return 0;
+#endif
 
        cred = vfs_context_ucred(ctx);
 
@@ -825,9 +915,13 @@ mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce ||
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_fsgetpath, cred, vp, vp->v_label);
@@ -842,8 +936,11 @@ mac_vnode_check_signature(struct vnode *vp, off_t macho_offset,
 {
        int error;
        
-       if (!mac_vnode_enforce || !mac_proc_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce || !mac_vnode_enforce)
+        return 0;
+#endif
        
        MAC_CHECK(vnode_check_signature, vp, vp->v_label, macho_offset, sha1, 
                                                          signature, size, 
@@ -858,9 +955,13 @@ mac_vnode_check_getacl(vfs_context_t ctx, struct vnode *vp, acl_type_t type)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_getacl, cred, vp, vp->v_label, type);
@@ -875,9 +976,13 @@ mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_getextattr, cred, vp, vp->v_label,
@@ -891,9 +996,13 @@ mac_vnode_check_ioctl(vfs_context_t ctx, struct vnode *vp, u_int cmd)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_ioctl, cred, vp, vp->v_label, cmd);
@@ -907,9 +1016,13 @@ mac_vnode_check_kqfilter(vfs_context_t ctx, kauth_cred_t file_cred,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_kqfilter, cred, file_cred, kn, vp,
@@ -925,9 +1038,13 @@ mac_vnode_check_link(vfs_context_t ctx, struct vnode *dvp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_link, cred, dvp, dvp->v_label, vp,
@@ -941,9 +1058,13 @@ mac_vnode_check_listextattr(vfs_context_t ctx, struct vnode *vp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_listextattr, cred, vp, vp->v_label);
@@ -957,9 +1078,13 @@ mac_vnode_check_lookup(vfs_context_t ctx, struct vnode *dvp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_lookup, cred, dvp, dvp->v_label, cnp);
@@ -972,9 +1097,13 @@ mac_vnode_check_open(vfs_context_t ctx, struct vnode *vp, int acc_mode)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_open, cred, vp, vp->v_label, acc_mode);
@@ -988,9 +1117,13 @@ mac_vnode_check_read(vfs_context_t ctx, struct ucred *file_cred,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_read, cred, file_cred, vp,
@@ -1005,9 +1138,13 @@ mac_vnode_check_readdir(vfs_context_t ctx, struct vnode *dvp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_readdir, cred, dvp, dvp->v_label);
@@ -1020,9 +1157,13 @@ mac_vnode_check_readlink(vfs_context_t ctx, struct vnode *vp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_readlink, cred, vp, vp->v_label);
@@ -1036,9 +1177,13 @@ mac_vnode_check_label_update(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_label_update, cred, vp, vp->v_label, newlabel);
@@ -1054,9 +1199,13 @@ mac_vnode_check_rename(vfs_context_t ctx, struct vnode *dvp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce ||
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
 
@@ -1082,9 +1231,13 @@ mac_vnode_check_revoke(vfs_context_t ctx, struct vnode *vp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_revoke, cred, vp, vp->v_label);
@@ -1097,9 +1250,13 @@ mac_vnode_check_searchfs(vfs_context_t ctx, struct vnode *vp, struct attrlist *a
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_searchfs, cred, vp, vp->v_label, alist);
@@ -1112,9 +1269,13 @@ mac_vnode_check_select(vfs_context_t ctx, struct vnode *vp, int which)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_select, cred, vp, vp->v_label, which);
@@ -1129,9 +1290,13 @@ mac_vnode_check_setacl(vfs_context_t ctx, struct vnode *vp, acl_type_t type,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setacl, cred, vp, vp->v_label, type, acl);
@@ -1146,9 +1311,13 @@ mac_vnode_check_setattrlist(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setattrlist, cred, vp, vp->v_label, alist);
@@ -1162,9 +1331,13 @@ mac_vnode_check_setextattr(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setextattr, cred, vp, vp->v_label,
@@ -1178,9 +1351,13 @@ mac_vnode_check_setflags(vfs_context_t ctx, struct vnode *vp, u_long flags)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setflags, cred, vp, vp->v_label, flags);
@@ -1193,9 +1370,13 @@ mac_vnode_check_setmode(vfs_context_t ctx, struct vnode *vp, mode_t mode)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setmode, cred, vp, vp->v_label, mode);
@@ -1209,9 +1390,13 @@ mac_vnode_check_setowner(vfs_context_t ctx, struct vnode *vp, uid_t uid,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setowner, cred, vp, vp->v_label, uid, gid);
@@ -1225,9 +1410,13 @@ mac_vnode_check_setutimes(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_setutimes, cred, vp, vp->v_label, atime,
@@ -1242,9 +1431,13 @@ mac_vnode_check_stat(vfs_context_t ctx, struct ucred *file_cred,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_stat, cred, file_cred, vp,
@@ -1259,9 +1452,13 @@ mac_vnode_check_truncate(vfs_context_t ctx, struct ucred *file_cred,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_truncate, cred, file_cred, vp,
@@ -1277,9 +1474,13 @@ mac_vnode_check_write(vfs_context_t ctx, struct ucred *file_cred,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_write, cred, file_cred, vp, vp->v_label);
@@ -1294,9 +1495,13 @@ mac_vnode_check_uipc_bind(vfs_context_t ctx, struct vnode *dvp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_uipc_bind, cred, dvp, dvp->v_label, cnp, vap);
@@ -1309,9 +1514,13 @@ mac_vnode_check_uipc_connect(vfs_context_t ctx, struct vnode *vp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(vnode_check_uipc_connect, cred, vp, vp->v_label);
@@ -1347,8 +1556,11 @@ mac_vnode_find_sigs(struct proc *p, struct vnode *vp, off_t offset)
 {
        int error;
 
-       if (!mac_vnode_enforce || !mac_proc_enforce)
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_proc_enforce || !mac_vnode_enforce)
+        return 0;
+#endif
 
        MAC_CHECK(vnode_find_sigs, p, vp, offset, vp->v_label);
 
@@ -1411,9 +1623,13 @@ mac_mount_check_mount(vfs_context_t ctx, struct vnode *vp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_mount, cred, vp, vp->v_label, cnp, vfc_name);
@@ -1427,9 +1643,13 @@ mac_mount_check_remount(vfs_context_t ctx, struct mount *mp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_remount, cred, mp, mp->mnt_mntlabel);
@@ -1443,9 +1663,13 @@ mac_mount_check_umount(vfs_context_t ctx, struct mount *mp)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_umount, cred, mp, mp->mnt_mntlabel);
@@ -1460,9 +1684,13 @@ mac_mount_check_getattr(vfs_context_t ctx, struct mount *mp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_getattr, cred, mp, mp->mnt_mntlabel, vfa);
@@ -1476,9 +1704,13 @@ mac_mount_check_setattr(vfs_context_t ctx, struct mount *mp,
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_setattr, cred, mp, mp->mnt_mntlabel, vfa);
@@ -1491,9 +1723,13 @@ mac_mount_check_stat(vfs_context_t ctx, struct mount *mount)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_stat, cred, mount, mount->mnt_mntlabel);
@@ -1507,9 +1743,13 @@ mac_mount_check_label_update(vfs_context_t ctx, struct mount *mount)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_label_update, cred, mount, mount->mnt_mntlabel);
@@ -1523,9 +1763,13 @@ mac_mount_check_fsctl(vfs_context_t ctx, struct mount *mp, u_int cmd)
        kauth_cred_t cred;
        int error;
 
-       if (!mac_vnode_enforce || 
-               !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
-               return (0);
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+    if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+        return 0;
 
        cred = vfs_context_ucred(ctx);
        MAC_CHECK(mount_check_fsctl, cred, mp, mp->mnt_mntlabel, cmd);
@@ -1537,8 +1781,11 @@ void
 mac_devfs_label_associate_device(dev_t dev, struct devnode *de,
     const char *fullpath)
 {
-       if (!mac_device_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_device_enforce)
+        return;
+#endif
 
        MAC_PERFORM(devfs_label_associate_device, dev, de, de->dn_label,
            fullpath);
@@ -1548,8 +1795,11 @@ void
 mac_devfs_label_associate_directory(const char *dirname, int dirnamelen,
     struct devnode *de, const char *fullpath)
 {
-       if (!mac_device_enforce)
-               return;
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_device_enforce)
+        return;
+#endif
 
        MAC_PERFORM(devfs_label_associate_directory, dirname, dirnamelen, de,
            de->dn_label, fullpath);
@@ -1560,7 +1810,12 @@ vn_setlabel(struct vnode *vp, struct label *intlabel, vfs_context_t context)
 {
        int error;
 
-       if (!mac_vnode_enforce || !mac_label_vnodes)
+#if SECURITY_MAC_CHECK_ENFORCE
+    /* 21167099 - only check if we allow write */
+    if (!mac_vnode_enforce)
+        return 0;
+#endif
+       if (!mac_label_vnodes)
                return (0);
 
        if (vp->v_mount == NULL) {
index db5e9dd55c82d8ff97377b9dc820ac8acb876649..e1fe380802389063ecb5681ec89b753b7b3088d2 100644 (file)
@@ -14,8 +14,10 @@ do_config_all:: lldbmacros_install
 LLDBMACROS_SOURCE:=$(SRCROOT)/tools/lldbmacros/
 LLDBMACROS_BOOTSTRAP_DEST:=$(OBJPATH)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)
 LLDBMACROS_DEST:=$(LLDBMACROS_BOOTSTRAP_DEST)/lldbmacros/
+LLDBMACROS_USERDEBUG_FILES=
 
-LLDBMACROS_PYTHON_FILES = \
+
+LLDBMACROS_PYTHON_FILES = $(LLDBMACROS_USERDEBUG_FILES) \
        core/standard.py \
        core/cvalue.py \
        core/__init__.py \
@@ -36,6 +38,7 @@ LLDBMACROS_PYTHON_FILES = \
        routedefines.py \
        ipc.py \
        ipcimportancedetail.py \
+       kcdata.py \
        scheduler.py \
        structanalyze.py \
        pmap.py \
@@ -49,7 +52,10 @@ LLDBMACROS_PYTHON_FILES = \
        userspace.py \
        pci.py \
        misc.py \
-       apic.py
+       apic.py \
+       kauth.py \
+       usertaskgdbserver.py \
+       waitq.py
 
 ifneq ($(PLATFORM),MacOSX)
        LLDBMACROS_PYTHON_FILES+= \
index 0515112064decc3a182b15be31411a9b328ad0cd..cd77f789d6ed55e4679bc87e615cf5d05a4ab1fd 100644 (file)
@@ -13,6 +13,7 @@ E. FAQ and Generel Coding Guidelines
      ii. Formatted Output printing guidelines [MUST READ]
     iii. Coding conventions.  [MUST READ]
      iv. Submitting changes in lldbmacros [MUST READ]
+      v. Common utility functions and paradigms
 F. Development and Debugging on lldb kernel debugging platform.
       i. Reading a exception backtrace
      ii. Loading custom or local lldbmacros and operating_system plugin
@@ -125,7 +126,7 @@ where:
                  -v          : increase the verbosity of the command. Each '-v' encountered will increase verbosity by 1.
                  -p <plugin> : pass the output of command to <plugin> for processing and followup with command requests by it.
   CMDOPTIONS   : These are command level options (always a CAPITAL letter option) that are defined by the macro developer. Please do
-                 help <cmdname> to know how each option operates on that particular command.
+                 help <cmdname> to know how each option operates on that particular command. For an example of how to use CMDOPTIONS, take a look at vm_object_walk_pages in memory.py
 
 ii. Writing new commands.
 --------------------------
@@ -308,7 +309,14 @@ iv. Submitting changes in lldbmacros
   * Do a clean build of kernel from xnu top level directory.
   * Verify that your changes are present in the dSYM directory of new build. 
   * Re-run all your test and verification steps with the lldbmacros from the newly packaged dSYM/Contents/Resources/Python/lldbmacros.
+
+v. Common utility functions and paradigms
+-----------------------------------------
+ Please search and look around the code for common util functions and paradigm
+  * Take a peek at utils.py for common utility like sizeof_fmt() to humanize size strings in KB, MB etc. The convention is to have functions that do self contained actions and does not require intricate knowledge of kernel structures in utils.py
+  * If you need to get pagesize of the traget system, do not hard code any value. kern.globals.page_size is your friend. Similarly use config['verbosity'] for finding about configs.
+  * If you are developing a command for structure that is different based on development/release kernels please use "hasattr()" functionality to conditionalize referencing #ifdef'ed fields in structure. See example in def GetTaskSummary(task) in process.py
+
 ===============================================================
 F. Development and Debugging on lldb kernel debugging platform.
 ===============================================================
index 7f16e901ef1cb17c12cae2c34010ca49c84607c7..451e03ab0e6368aa0fc7e0d0a49516bed483c245 100644 (file)
@@ -15,17 +15,17 @@ def GetATMValueSummary(atm_value):
 
 
 @lldb_type_summary(['atm_task_descriptor', 'atm_task_descriptor_t'])
-@header("{0: <20s} {1: <20s} {2: <16s} {3: <16s} {4: <20s} {5: <20s} {6: <10s}".format("task_descriptor", "trace_buffer", "buffer_size", "refcount", "mailbox_addr", "mailbox_size", "flags"))
+@header("{0: <20s} {1: <20s} {2: <16s} {3: <16s} {4: <10s}".format("task_descriptor", "trace_buffer", "buffer_size", "refcount", "flags"))
 def GetATMTaskDescriptorSummary(descriptor):
     """ Summarizes atm_task_descriptor object
         params: descriptor - value object of type atm_task_descriptor_t
         returns: string - containing the description.
     """
-    format_str = "{0: <#020x} {1: <#020x} {2: <#016x} {3: <16d} {4: <#020x} {5: <#020x} {6: <10s}"
+    format_str = "{0: <#020x} {1: <#020x} {2: <#016x} {3: <16d} {4: <10s}"
     flags_str = ""
     if unsigned(descriptor.flags) & 0x1:
         flags_str = "DEAD"
-    out_string = format_str.format(descriptor, descriptor.trace_buffer, descriptor.trace_buffer_size, descriptor.reference_count, descriptor.mailbox_kernel_addr, descriptor.mailbox_array_size, flags_str)
+    out_string = format_str.format(descriptor, descriptor.trace_buffer, descriptor.trace_buffer_size, descriptor.reference_count, flags_str)
 
     #if DEVELOPMENT
     if hasattr(descriptor, 'task'):
@@ -46,13 +46,13 @@ def ShowATMValueListeners(cmd_args=None, cmd_options={}):
     atm_val = kern.GetValueFromAddress(cmd_args[0], 'atm_value_t')
     print GetATMValueSummary.header
     print GetATMValueSummary(atm_val)
-    header_str = "{0: <20s} ".format("#mailbox") + GetATMTaskDescriptorSummary.header
+    header_str = "{0: <20s} ".format("#guard") + GetATMTaskDescriptorSummary.header
     #if DEVELOPMENT
     header_str += "  " +  GetTaskSummary.header + " procname"
     #endif
     print header_str
     for listener in IterateQueue(atm_val.listeners, 'atm_link_object_t', 'listeners_element'):
-        listener_summary = "{0: <#020x}".format(listener.mailbox)
+        listener_summary = "{0: <#020x}".format(listener.guard)
         listener_summary += " " + GetATMTaskDescriptorSummary(listener.descriptor)
         print listener_summary
     return 
index 26a4dcf7a0a53fd981f3413cc997c7acf086ce26..ec6295dff78cb752e33b7c823c731e930e063363 100644 (file)
@@ -68,13 +68,55 @@ def IterateListEntry(element, element_type, field_name):
         next_el = elt.__getattr__(field_name).le_next
         elt = cast(next_el, element_type)
 
-def IterateQueue(queue_head, element_ptr_type, element_field_name):
-    """ iterate over a queue in kernel of type queue_head_t. refer to osfmk/kern/queue.h
+def IterateLinkageChain(queue_head, element_type, field_name, field_ofst=0):
+    """ Iterate over a Linkage Chain queue in kernel of type queue_head_t. (osfmk/kern/queue.h method 1)
+        This is equivalent to the qe_foreach_element() macro
+        params:
+            queue_head   - value       : Value object for queue_head.
+            element_type - lldb.SBType : pointer type of the element which contains the queue_chain_t. Typically its structs like thread, task etc..
+                         - str         : OR a string describing the type. ex. 'task *'
+            field_name   - str         : Name of the field (in element) which holds a queue_chain_t
+            field_ofst   - int         : offset from the 'field_name' (in element) which holds a queue_chain_t
+                                         This is mostly useful if a particular element contains an array of queue_chain_t
+        returns:
+            A generator does not return. It is used for iterating.
+            value  : An object thats of type (element_type). Always a pointer object
+        example usage:
+            coalq = kern.GetGlobalVariable('coalitions_q')
+            for coal in IterateLinkageChain(coalq, 'struct coalition *', 'coalitions'):
+                print GetCoalitionInfo(coal)
+    """
+    global kern
+    if type(element_type) == str:
+        element_type = gettype(element_type)
+
+    if unsigned(queue_head) == 0:
+        return
+
+    if element_type.IsPointerType():
+        elem_ofst = getfieldoffset(element_type.GetPointeeType(), field_name) + field_ofst
+    else:
+        elem_ofst = getfieldoffset(element_type, field_name) + field_ofst
+
+    link = queue_head.next
+    while (unsigned(link) != unsigned(queue_head)):
+        addr = unsigned(link) - elem_ofst;
+        # I can't use the GetValueFromAddress function of the kernel class
+        # because I have no instance of that class!
+        obj = value(link.GetSBValue().CreateValueFromExpression(None,'(void *)'+str(addr)))
+        obj = cast(obj, element_type)
+        yield obj
+        link = link.next
+
+
+def IterateQueue(queue_head, element_ptr_type, element_field_name, backwards=False):
+    """ Iterate over an Element Chain queue in kernel of type queue_head_t. (osfmk/kern/queue.h method 2)
         params:
             queue_head         - value : Value object for queue_head.
-            element_ptr_type       - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc..
+            element_ptr_type   - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc..
                                - str         : OR a string describing the type. ex. 'task *'
             element_field_name - str : name of the field in target struct.
+            backwards          - backwards : traverse the queue backwards
         returns:
             A generator does not return. It is used for iterating.
             value  : an object thats of type (element_type) queue_head->next. Always a pointer object
@@ -91,16 +133,21 @@ def IterateQueue(queue_head, element_ptr_type, element_field_name):
         queue_head_addr = queue_head.GetValueAsUnsigned()
     else:
         queue_head_addr = queue_head.GetAddress().GetLoadAddress(LazyTarget.GetTarget())
-    cur_elt = queue_head.GetChildMemberWithName('next')
+    if backwards:
+        cur_elt = queue_head.GetChildMemberWithName('prev')
+    else:
+        cur_elt = queue_head.GetChildMemberWithName('next')
+
     while True:
 
         if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0 or cur_elt.GetValueAsUnsigned() == queue_head_addr:
             break
         elt = cur_elt.Cast(element_ptr_type)
         yield value(elt)
-        cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')
-
-
+        if backwards:
+            cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('prev')
+        else:
+            cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')
 
 class KernelTarget(object):
     """ A common kernel object that provides access to kernel objects and information.
@@ -130,7 +177,7 @@ class KernelTarget(object):
             def __getattr__(self, name):
                 v = self._xnu_kernobj_12obscure12.GetGlobalVariable(name)
                 if not v.GetSBValue().IsValid():
-                    raise ValueError('no such global variable by name: %s '%str(name))
+                    raise ValueError('No such global variable by name: %s '%str(name))
                 return v
         self.globals = _GlobalVariableFind(self)
         LazyTarget.Initialize(debugger)
@@ -202,7 +249,10 @@ class KernelTarget(object):
             returns: value - python object representing global variable.
             raises : Exception in case the variable is not found.
         """
-        return value(LazyTarget.GetTarget().FindGlobalVariables(name, 0).GetValueAtIndex(0))
+        self._globals_cache_dict = caching.GetDynamicCacheData("kern._globals_cache_dict", {})
+        if name not in self._globals_cache_dict:
+            self._globals_cache_dict[name] = value(LazyTarget.GetTarget().FindGlobalVariables(name, 1).GetValueAtIndex(0))
+        return self._globals_cache_dict[name]
 
     def GetLoadAddressForSymbol(self, name):
         """ Get the load address of a symbol in the kernel.
@@ -283,6 +333,27 @@ class KernelTarget(object):
         else:
             raise ValueError("PhysToVirt does not support {0}".format(arch))
 
+    def GetNanotimeFromAbstime(self, abstime):
+        """ convert absolute time (which is in MATUs) to nano seconds.
+            Since based on architecture the conversion may differ.
+            params:
+                abstime - int absolute time as shown by mach_absolute_time
+            returns:
+                int - nanosecs of time
+        """
+        usec_divisor = caching.GetStaticCacheData("kern.rtc_usec_divisor", None)
+        if not usec_divisor:
+            if self.arch == 'x86_64':
+                usec_divisor = 1000
+            else:
+                rtclockdata_addr = self.GetLoadAddressForSymbol('RTClockData')
+                rtc = self.GetValueFromAddress(rtclockdata_addr, 'struct _rtclock_data_ *')
+                usec_divisor = unsigned(rtc.rtc_usec_divisor)
+            usec_divisor = int(usec_divisor)
+            caching.SaveStaticCacheData('kern.rtc_usec_divisor', usec_divisor)
+        nsecs = (abstime * 1000)/usec_divisor
+        return nsecs
+
     def __getattribute__(self, name):
         if name == 'zones' :
             self._zones_list = caching.GetDynamicCacheData("kern._zones_list", [])
@@ -318,11 +389,11 @@ class KernelTarget(object):
         if name == 'coalitions' :
             self._coalitions_list = caching.GetDynamicCacheData("kern._coalitions_list", [])
             if len(self._coalitions_list) > 0 : return self._coalitions_list
-            coalition_queue_head = self.GetGlobalVariable('coalitions')
+            coalition_queue_head = self.GetGlobalVariable('coalitions_q')
             coalition_type = LazyTarget.GetTarget().FindFirstType('coalition')
             coalition_ptr_type = coalition_type.GetPointerType()
-            for tsk in IterateQueue(coalition_queue_head, coalition_ptr_type, 'coalitions'):
-                self._coalitions_list.append(tsk)
+            for coal in IterateLinkageChain(addressof(coalition_queue_head), coalition_ptr_type, 'coalitions'):
+                self._coalitions_list.append(coal)
             caching.SaveDynamicCacheData("kern._coalitions_list", self._coalitions_list)
             return self._coalitions_list
 
index e19945561c9c1edda228b61d9b8e1aab237d01f7..38ee0b4fc91c5444d8a6c0508e75134c414b79e1 100644 (file)
@@ -20,14 +20,14 @@ class Armv8_RegisterSet(object):
     """ register info set for armv8 64 bit architecture"""
     register_info = { 'sets' : ['GPR'],
                   'registers': [
-    {'name': 'x0'  , 'bitsize':64, 'offset':  0, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 0, 'dwarf': 0},
-    {'name': 'x1'  , 'bitsize':64, 'offset':  8, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 1, 'dwarf': 1},
-    {'name': 'x2'  , 'bitsize':64, 'offset': 16, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 2, 'dwarf': 2},
-    {'name': 'x3'  , 'bitsize':64, 'offset': 24, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 3, 'dwarf': 3},
-    {'name': 'x4'  , 'bitsize':64, 'offset': 32, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 4, 'dwarf': 4},
-    {'name': 'x5'  , 'bitsize':64, 'offset': 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 5, 'dwarf': 5},
-    {'name': 'x6'  , 'bitsize':64, 'offset': 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 6, 'dwarf': 6},
-    {'name': 'x7'  , 'bitsize':64, 'offset': 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 7, 'dwarf': 7},
+    {'name': 'x0'  , 'bitsize':64, 'offset':  0, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 0, 'dwarf': 0, 'alt-name':'arg1', 'generic':'arg1'},
+    {'name': 'x1'  , 'bitsize':64, 'offset':  8, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 1, 'dwarf': 1, 'alt-name':'arg2', 'generic':'arg2'},
+    {'name': 'x2'  , 'bitsize':64, 'offset': 16, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 2, 'dwarf': 2, 'alt-name':'arg3', 'generic':'arg3'},
+    {'name': 'x3'  , 'bitsize':64, 'offset': 24, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 3, 'dwarf': 3, 'alt-name':'arg4', 'generic':'arg4'},
+    {'name': 'x4'  , 'bitsize':64, 'offset': 32, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 4, 'dwarf': 4, 'alt-name':'arg5', 'generic':'arg5'},
+    {'name': 'x5'  , 'bitsize':64, 'offset': 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 5, 'dwarf': 5, 'alt-name':'arg6', 'generic':'arg6'},
+    {'name': 'x6'  , 'bitsize':64, 'offset': 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 6, 'dwarf': 6, 'alt-name':'arg7', 'generic':'arg7'},
+    {'name': 'x7'  , 'bitsize':64, 'offset': 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 7, 'dwarf': 7, 'alt-name':'arg8', 'generic':'arg8'},
     {'name': 'x8'  , 'bitsize':64, 'offset': 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 8, 'dwarf': 8},
     {'name': 'x9'  , 'bitsize':64, 'offset': 72, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 9, 'dwarf': 9},
     {'name': 'x10' , 'bitsize':64, 'offset': 80, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':10, 'dwarf':10},
@@ -53,9 +53,9 @@ class Armv8_RegisterSet(object):
     {'name': 'lr'  , 'bitsize':64, 'offset':240, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':30, 'dwarf':30, 'alt-name': 'lr', 'generic':'lr'},
     {'name': 'sp'  , 'bitsize':64, 'offset':248, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':31, 'dwarf':31, 'alt-name': 'sp', 'generic':'sp'},
     {'name': 'pc'  , 'bitsize':64, 'offset':256, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':32, 'dwarf':32, 'alt-name': 'pc', 'generic':'pc'},
-    {'name': 'far' , 'bitsize':64, 'offset':264, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':34, 'dwarf':34, 'alt-name': 'far', 'generic':'far'},
-    {'name': 'cpsr', 'bitsize':32, 'offset':272, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':33, 'dwarf':33, 'alt-name': 'cpsr', 'generic':'cpsr'},
-    {'name': 'esr' , 'bitsize':32, 'offset':276, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':35, 'dwarf':35, 'alt-name': 'esr', 'generic':'esr'},
+    {'name': 'far' , 'bitsize':64, 'offset':264, 'encoding':'uint', 'format':'hex', 'set':0},
+    {'name': 'cpsr', 'bitsize':32, 'offset':272, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':33, 'dwarf':33, 'generic':'flags'},
+    {'name': 'esr' , 'bitsize':32, 'offset':276, 'encoding':'uint', 'format':'hex', 'set':0},
     ]
     }
 
@@ -113,7 +113,7 @@ class Armv8_RegisterSet(object):
     def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version):
         """ Setup register values from KDP saved information.
         """
-        saved_state = kernel_version.CreateValueFromExpression(None, '(arm_saved_state64_t *) ' + str(kdp_state.GetValueAsUnsigned()))
+        saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state64 *) ' + str(kdp_state.GetValueAsUnsigned()))
         saved_state = saved_state.Dereference()
         saved_state = PluginValue(saved_state)
         self.ResetRegisterValues()
@@ -237,12 +237,12 @@ class Armv7_RegisterSet(object):
         { 'name':'r10'  , 'bitsize' : 32, 'offset' : 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':10, 'dwarf' :10},
         { 'name':'r11'  , 'bitsize' : 32, 'offset' : 44, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':11, 'dwarf' :11, 'alt-name': 'fp', 'generic': 'fp'},
         { 'name':'r12'  , 'bitsize' : 32, 'offset' : 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':12, 'dwarf' :12},
-        { 'name':'sp'   , 'bitsize' : 32, 'offset' : 52, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':13, 'dwarf' :13, 'alt-name': 'sp', 'generic': 'sp'},
-        { 'name':'lr'   , 'bitsize' : 32, 'offset' : 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':14, 'dwarf' :14, 'alt-name': 'lr', 'generic': 'lr'},
-        { 'name':'pc'   , 'bitsize' : 32, 'offset' : 60, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':15, 'dwarf' :15, 'alt-name': 'pc', 'generic': 'pc'},
-        { 'name':'cpsr' , 'bitsize' : 32, 'offset' : 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':25, 'dwarf' :16, 'alt-name':'cpsr','generic':'cpsr'},
-        { 'name':'fsr'  , 'bitsize' : 32, 'offset' : 68, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':17, 'dwarf' :17, 'alt-name':'fsr', 'generic': 'fsr'},
-        { 'name':'far'  , 'bitsize' : 32, 'offset' : 72, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':18, 'dwarf' :18, 'alt-name': 'far', 'generic': 'far'}
+        { 'name':'sp'   , 'bitsize' : 32, 'offset' : 52, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':13, 'dwarf' :13, 'generic': 'sp'},
+        { 'name':'lr'   , 'bitsize' : 32, 'offset' : 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':14, 'dwarf' :14, 'generic': 'lr'},
+        { 'name':'pc'   , 'bitsize' : 32, 'offset' : 60, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':15, 'dwarf' :15, 'generic': 'pc'},
+        { 'name':'cpsr' , 'bitsize' : 32, 'offset' : 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':16, 'dwarf' :16, 'generic':'flags'},
+        { 'name':'fsr'  , 'bitsize' : 32, 'offset' : 68, 'encoding':'uint', 'format':'hex', 'set':0},
+        { 'name':'far'  , 'bitsize' : 32, 'offset' : 72, 'encoding':'uint', 'format':'hex', 'set':0}
         ]
         }
 
@@ -380,21 +380,21 @@ class I386_RegisterSet(object):
     register_info = { 'sets' : ['GPR'],
                   'registers': [
         { 'name': 'eax'   , 'bitsize': 32, 'offset' : 0, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf': 0},
-        { 'name': 'ebx'   , 'bitsize': 32, 'offset' : 4, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf': 1},
-        { 'name': 'ecx'   , 'bitsize': 32, 'offset' : 8, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf': 2},
-        { 'name': 'edx'   , 'bitsize': 32, 'offset' :12, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf': 3},
-        { 'name': 'edi'   , 'bitsize': 32, 'offset' :16, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf': 4},
-        { 'name': 'esi'   , 'bitsize': 32, 'offset' :20, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf': 5},
-        { 'name': 'ebp'   , 'bitsize': 32, 'offset' :24, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf': 6},
-        { 'name': 'esp'   , 'bitsize': 32, 'offset' :28, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf': 7},
-        { 'name': 'ss'    , 'bitsize': 32, 'offset' :32, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf': 8},
-        { 'name': 'eflags', 'bitsize': 32, 'offset' :36, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf': 9},
-        { 'name': 'eip'   , 'bitsize': 32, 'offset' :40, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :10, 'dwarf':10},
-        { 'name': 'cs'    , 'bitsize': 32, 'offset' :44, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :11, 'dwarf':11},
-        { 'name': 'ds'    , 'bitsize': 32, 'offset' :48, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :12, 'dwarf':12},
-        { 'name': 'es'    , 'bitsize': 32, 'offset' :52, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :13, 'dwarf':13},
-        { 'name': 'fs'    , 'bitsize': 32, 'offset' :56, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :14, 'dwarf':14},
-        { 'name': 'gs'    , 'bitsize': 32, 'offset' :60, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :15, 'dwarf':15},
+        { 'name': 'ebx'   , 'bitsize': 32, 'offset' : 4, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf': 3},
+        { 'name': 'ecx'   , 'bitsize': 32, 'offset' : 8, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf': 1},
+        { 'name': 'edx'   , 'bitsize': 32, 'offset' :12, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf': 2},
+        { 'name': 'edi'   , 'bitsize': 32, 'offset' :16, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf': 7},
+        { 'name': 'esi'   , 'bitsize': 32, 'offset' :20, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf': 6},
+        { 'name': 'ebp'   , 'bitsize': 32, 'offset' :24, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf': 5, 'generic': 'fp', 'alt-name': 'fp'},
+        { 'name': 'esp'   , 'bitsize': 32, 'offset' :28, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf': 4, 'generic': 'sp', 'alt-name': 'sp'},
+        { 'name': 'ss'    , 'bitsize': 32, 'offset' :32, 'encoding': 'uint' , 'format':'hex' , 'set': 0},
+        { 'name': 'eflags', 'bitsize': 32, 'offset' :36, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf': 9, 'generic': 'flags'},
+        { 'name': 'eip'   , 'bitsize': 32, 'offset' :40, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :8, 'dwarf':8, 'generic': 'pc', 'alt-name': 'pc'},
+        { 'name': 'cs'    , 'bitsize': 32, 'offset' :44, 'encoding': 'uint' , 'format':'hex' , 'set': 0},
+        { 'name': 'ds'    , 'bitsize': 32, 'offset' :48, 'encoding': 'uint' , 'format':'hex' , 'set': 0},
+        { 'name': 'es'    , 'bitsize': 32, 'offset' :52, 'encoding': 'uint' , 'format':'hex' , 'set': 0},
+        { 'name': 'fs'    , 'bitsize': 32, 'offset' :56, 'encoding': 'uint' , 'format':'hex' , 'set': 0},
+        { 'name': 'gs'    , 'bitsize': 32, 'offset' :60, 'encoding': 'uint' , 'format':'hex' , 'set': 0},
         ]
         }
 
@@ -689,8 +689,8 @@ class OperatingSystemPlugIn(object):
             self._target = process.target
             osplugin_target_obj = self._target
             self.current_session_id = GetUniqueSessionID(self.process)
-            self.version = self._target.FindGlobalVariables('version', 0).GetValueAtIndex(0)
-            self.kernel_stack_size = self._target.FindGlobalVariables('kernel_stack_size', 0).GetValueAtIndex(0).GetValueAsUnsigned()
+            self.version = self._target.FindGlobalVariables('version', 1).GetValueAtIndex(0)
+            self.kernel_stack_size = self._target.FindGlobalVariables('kernel_stack_size', 1).GetValueAtIndex(0).GetValueAsUnsigned()
             self.kernel_context_size = 0
             self.connected_over_kdp = False
             # connected_to_debugserver signifies if we are connected to astris or other gdbserver instance
@@ -734,6 +734,21 @@ class OperatingSystemPlugIn(object):
                 print "Instantiating threads completely from saved state in memory."
 
     def create_thread(self, tid, context):
+        # if tid is deadbeef means its a custom thread which kernel does not know of.
+        if tid == 0xdeadbeef :
+            # tid manipulation should be the same as in "switchtoregs" code in lldbmacros/process.py .
+            tid = 0xdead0000 | (context & ~0xffff0000)
+            tid = tid & 0xdeadffff
+            thread_obj = { 'tid'   : tid,
+                           'ptr'   : context,
+                           'name'  : 'switchtoregs' + hex(context),
+                           'queue' : 'None',
+                           'state' : 'stopped',
+                           'stop_reason' : 'none'
+                         }
+            self.thread_cache[tid] = thread_obj
+            return thread_obj
+        
         th_ptr = context
         th = self.version.CreateValueFromExpression(str(th_ptr),'(struct thread *)' + str(th_ptr))
         thread_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned()
@@ -807,7 +822,7 @@ class OperatingSystemPlugIn(object):
 
         # FIXME remove legacy code
         try:
-            thread_q_head = self._target.FindGlobalVariables('threads', 0).GetValueAtIndex(0)
+            thread_q_head = self._target.FindGlobalVariables('threads', 1).GetValueAtIndex(0)
             thread_type = self._target.FindFirstType('thread')
             thread_ptr_type = thread_type.GetPointerType()
             for th in IterateQueue(thread_q_head, thread_ptr_type, 'threads'):
@@ -830,16 +845,22 @@ class OperatingSystemPlugIn(object):
         return self.register_set.register_info
 
     def get_register_data(self, tid):
-        #print "searching for tid", tid
         thobj = None
         try:
+            regs = self.register_set
             if self.current_session_id != GetUniqueSessionID(self.process):
                 self.thread_cache = {}
                 self.current_session_id = GetUniqueSessionID(self.process)
-
             if tid in self.thread_cache.keys():
+                
+                #Check if the thread is a fake one. Then create and return registers directly
+                if self.thread_cache[tid]['name'].find('switchtoregs') == 0:
+                    savedstateobj = self.version.CreateValueFromExpression(None, '(uintptr_t *) ' + str(self.thread_cache[tid]['ptr']))
+                    regs.ReadRegisterDataFromKDPSavedState(savedstateobj, self.version)
+                    return regs.GetPackedRegisterState()
+
                 thobj = self.version.CreateValueFromExpression(self.thread_cache[tid]['name'], '(struct thread *)' + str(self.thread_cache[tid]['ptr']))
-            regs = self.register_set
+            
             if thobj == None :
                 print "FATAL ERROR: Could not find thread with id %d" % tid
                 regs.ResetRegisterValues()
index a0f23d08123244eaec272d0779b83b96cffc7493..b518246449066b8ab5ec238c2288c5e68d477c80 100644 (file)
@@ -25,6 +25,9 @@ def __lldb_init_module(debugger, internal_dict):
     base_dir_name = self_path[:self_path.rfind("/")]
     core_os_plugin = base_dir_name + "/lldbmacros/core/operating_system.py"
     osplugin_cmd = "settings set target.process.python-os-plugin-path \"%s\"" % core_os_plugin
+    intel_whitelist = ['hndl_allintrs', 'hndl_alltraps', 'trap_from_kernel', 'hndl_double_fault', 'hndl_machine_check']
+    arm_whitelist = ['_fleh_prefabt', '_ExceptionVectorsBase', '_ExceptionVectorsTable', '_fleh_undef', '_fleh_dataabt', '_fleh_irq', '_fleh_decirq', '_fleh_fiq_generic', '_fleh_dec']
+    whitelist_trap_cmd = "settings set target.trap-handler-names %s %s" % (' '.join(intel_whitelist), ' '.join(arm_whitelist))
     xnu_debug_path = base_dir_name + "/lldbmacros/xnu.py"
     xnu_load_cmd = "command script import \"%s\"" % xnu_debug_path
     if debug_session_enabled :
@@ -37,6 +40,8 @@ def __lldb_init_module(debugger, internal_dict):
     else:
         print osplugin_cmd
         debugger.HandleCommand(osplugin_cmd)
+        print whitelist_trap_cmd
+        debugger.HandleCommand(whitelist_trap_cmd)
         print xnu_load_cmd
         debugger.HandleCommand(xnu_load_cmd)
     print "\n"
index 44f3aaf9c323d6f07b9c63f923ed985af22e5710..7f079563c2acea0d112af2a3da784e1c69c1ddbf 100644 (file)
@@ -7,6 +7,20 @@ import sys
 ######################################
 plane = None
 
+#####################################
+# Utility functions.
+#####################################
+def CastIOKitClass(obj, target_type):
+    """ Type cast an object to another IOKIT CPP class.
+        params:
+            obj - core.value  object representing some C construct in lldb
+            target_type - str : ex 'OSString *'
+                        - lldb.SBType :
+    """
+    v = Cast(obj, target_type)
+    v.GetSBValue().SetPreferDynamicValue(lldb.eNoDynamicValues)
+    return v
+
 ######################################
 # Type Summaries
 ######################################
@@ -52,17 +66,17 @@ def GetObjectSummary(obj):
     
     ztvAddr = kern.GetLoadAddressForSymbol('_ZTV7OSArray')
     if vt == ztvAddr:
-        out_string += "(" + GetArray(Cast(obj, 'OSArray *')) + ")"
+        out_string += "(" + GetArray(CastIOKitClass(obj, 'OSArray *')) + ")"
         return out_string
     
     ztvAddr = kern.GetLoadAddressForSymbol('_ZTV5OSSet')
     if vt == ztvAddr:
-        out_string += GetSet(Cast(obj, 'OSSet *'))
+        out_string += GetSet(CastIOKitClass(obj, 'OSSet *'))
         return out_string
     
     ztvAddr = kern.GetLoadAddressForSymbol('_ZTV12OSDictionary')
     if vt == ztvAddr:
-        out_string += GetDictionary(Cast(obj, 'OSDictionary *'))
+        out_string += GetDictionary(CastIOKitClass(obj, 'OSDictionary *'))
         return out_string
     
     return out_string
@@ -85,9 +99,9 @@ def GetRegistryEntrySummary(entry):
         name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey)
     
     if name is not None:
-        out_string += "+-o {0:s}  ".format(GetString(Cast(name, 'OSString *')))
-    elif Cast(entry, 'IOService *').pwrMgt and Cast(entry, 'IOService *').pwrMgt.Name:
-        out_string += "+-o {0:s}  ".format(Cast(entry, 'IOService *').pwrMgt.Name)
+        out_string += "+-o {0:s}  ".format(GetString(CastIOKitClass(name, 'OSString *')))
+    elif CastIOKitClass(entry, 'IOService *').pwrMgt and CastIOKitClass(entry, 'IOService *').pwrMgt.Name:
+        out_string += "+-o {0:s}  ".format(CastIOKitClass(entry, 'IOService *').pwrMgt.Name)
     else:
         out_string += "+-o ??  "
     
@@ -102,7 +116,7 @@ def GetRegistryEntrySummary(entry):
     ztvAddr = kern.GetLoadAddressForSymbol('_ZTV15IORegistryEntry')
     if vtableAddr != ztvAddr:
         out_string += ", "
-        state = Cast(entry, 'IOService *').__state[0]
+        state = CastIOKitClass(entry, 'IOService *').__state[0]
         # kIOServiceRegisteredState
         if 0 == state & 2:
             out_string += "!"
@@ -114,11 +128,9 @@ def GetRegistryEntrySummary(entry):
         #kIOServiceInactiveState
         if 0 != state & 1:
             out_string += "in"
-        busyCount = (Cast(entry, 'IOService *').__state[1] & 0xff)
-        retCount = (Cast(entry, 'IOService *').retainCount & 0xffff)
+        busyCount = (CastIOKitClass(entry, 'IOService *').__state[1] & 0xff)
+        retCount = (CastIOKitClass(entry, 'IOService *').retainCount & 0xffff)
         out_string += "active, busy {0}, retain count {1}>".format(busyCount, retCount)
-    #else:
-    #    out_string += "\n"
     return out_string
 
 ######################################
@@ -133,7 +145,7 @@ def ShowAllClasses(cmd_args=None):
     count = unsigned(kern.globals.sAllClassesDict.count)
     
     while idx < count:
-        meta = Cast(kern.globals.sAllClassesDict.dictionary[idx].value, 'OSMetaClass *')
+        meta = CastIOKitClass(kern.globals.sAllClassesDict.dictionary[idx].value, 'OSMetaClass *')
         idx += 1
         print GetMetaClass(meta)
 
@@ -462,13 +474,13 @@ def ShowRegistryEntryRecurse(entry, prefix, printProps):
     childArray = LookupKeyInOSDict(registryTable, childKey)
     if childArray is not None:
         idx = 0
-        ca = Cast(childArray, 'OSArray *')
+        ca = CastIOKitClass(childArray, 'OSArray *')
         count = unsigned(ca.count)
         while idx < count:
             if plen != 0 and plen != 1 and (plen & (plen - 1)) == 0:
-                ShowRegistryEntryRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), prefix + "| ", printProps)
+                ShowRegistryEntryRecurse(CastIOKitClass(ca.array[idx], 'IORegistryEntry *'), prefix + "| ", printProps)
             else:
-                ShowRegistryEntryRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), prefix + "  ", printProps)
+                ShowRegistryEntryRecurse(CastIOKitClass(ca.array[idx], 'IORegistryEntry *'), prefix + "  ", printProps)
             idx += 1
 
 def FindRegistryEntryRecurse(entry, search_name, stopAfterFirst):
@@ -490,12 +502,12 @@ def FindRegistryEntryRecurse(entry, search_name, stopAfterFirst):
         name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey)
     
     if name is not None:
-        if str(Cast(name, 'OSString *').string) == search_name:
+        if str(CastIOKitClass(name, 'OSString *').string) == search_name:
             print GetRegistryEntrySummary(entry)
             if stopAfterFirst is True:
                 return True
-    elif Cast(entry, 'IOService *').pwrMgt and Cast(entry, 'IOService *').pwrMgt.Name:
-        name = Cast(entry, 'IOService *').pwrMgt.Name
+    elif CastIOKitClass(entry, 'IOService *').pwrMgt and CastIOKitClass(entry, 'IOService *').pwrMgt.Name:
+        name = CastIOKitClass(entry, 'IOService *').pwrMgt.Name
         if str(name) == search_name:
             print GetRegistryEntrySummary(entry)
             if stopAfterFirst is True:
@@ -509,10 +521,10 @@ def FindRegistryEntryRecurse(entry, search_name, stopAfterFirst):
     childArray = LookupKeyInOSDict(registryTable, childKey)
     if childArray is not None:
         idx = 0
-        ca = Cast(childArray, 'OSArray *')
+        ca = CastIOKitClass(childArray, 'OSArray *')
         count = unsigned(ca.count)
         while idx < count:
-            if FindRegistryEntryRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), search_name, stopAfterFirst) is True:
+            if FindRegistryEntryRecurse(CastIOKitClass(ca.array[idx], 'IORegistryEntry *'), search_name, stopAfterFirst) is True:
                 return True
             idx += 1
     return False
@@ -537,10 +549,10 @@ def FindRegistryObjectRecurse(entry, search_name):
         name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey)
     
     if name is not None:
-        if str(Cast(name, 'OSString *').string) == search_name:
+        if str(CastIOKitClass(name, 'OSString *').string) == search_name:
             return entry
-    elif Cast(entry, 'IOService *').pwrMgt and Cast(entry, 'IOService *').pwrMgt.Name:
-        name = Cast(entry, 'IOService *').pwrMgt.Name
+    elif CastIOKitClass(entry, 'IOService *').pwrMgt and CastIOKitClass(entry, 'IOService *').pwrMgt.Name:
+        name = CastIOKitClass(entry, 'IOService *').pwrMgt.Name
         if str(name) == search_name:
             return entry
     
@@ -551,9 +563,9 @@ def FindRegistryObjectRecurse(entry, search_name):
         childKey = plane.keys[1]
     childArray = LookupKeyInOSDict(registryTable, childKey)
     if childArray is not None:
-        ca = Cast(childArray, 'OSArray *')
+        ca = CastIOKitClass(childArray, 'OSArray *')
         for idx in range(ca.count):
-            registry_object = FindRegistryObjectRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), search_name)
+            registry_object = FindRegistryObjectRecurse(CastIOKitClass(ca.array[idx], 'IORegistryEntry *'), search_name)
             if not registry_object or int(registry_object) == int(0):
                 continue
             else:
@@ -609,11 +621,11 @@ def GetRegDictionary(osdict, prefix):
 def GetString(string):
     """ Returns the python string representation of a given OSString
     """
-    out_string = "\"{0:s}\"".format(Cast(string, 'OSString *').string)
+    out_string = "\"{0:s}\"".format(CastIOKitClass(string, 'OSString *').string)
     return out_string
 
 def GetNumber(num):
-    out_string = "{0:d}".format(Cast(num, 'OSNumber *').value)
+    out_string = "{0:d}".format(CastIOKitClass(num, 'OSNumber *').value)
     return out_string
 
 def GetBoolean(b):
@@ -784,8 +796,8 @@ def showinterruptcounts(cmd_args=None):
     print header_format.format("Name", "Index", "Count")
     
     for i in kern.interrupt_stats:
-        owner = Cast(i.owner, 'IOInterruptEventSource *')
-        nub = Cast(owner.provider, 'IORegistryEntry *') 
+        owner = CastIOKitClass(i.owner, 'IOInterruptEventSource *')
+        nub = CastIOKitClass(owner.provider, 'IORegistryEntry *') 
         name = None
 
         # To uniquely identify an interrupt, we need the nub name and the index.  The index
@@ -803,7 +815,7 @@ def showinterruptcounts(cmd_args=None):
         if name is None:
             nub_name = "Unknown"
         else:
-            nub_name = GetString(Cast(name, 'OSString *'))
+            nub_name = GetString(CastIOKitClass(name, 'OSString *'))
 
         # We now have everything we need; spew the requested data.
 
@@ -834,8 +846,8 @@ def showinterruptstats(cmd_args=None):
     print header_format.format("Name", "Index", "Interrupt Count", "Interrupt Time", "Workloop Count", "Workloop CPU Time", "Workloop Time")
     
     for i in kern.interrupt_stats:
-        owner = Cast(i.owner, 'IOInterruptEventSource *')
-        nub = Cast(owner.provider, 'IORegistryEntry *') 
+        owner = CastIOKitClass(i.owner, 'IOInterruptEventSource *')
+        nub = CastIOKitClass(owner.provider, 'IORegistryEntry *') 
         name = None
 
         # To uniquely identify an interrupt, we need the nub name and the index.  The index
@@ -853,7 +865,7 @@ def showinterruptstats(cmd_args=None):
         if name is None:
             nub_name = "Unknown"
         else:
-            nub_name = GetString(Cast(name, 'OSString *'))
+            nub_name = GetString(CastIOKitClass(name, 'OSString *'))
 
         # We now have everything we need; spew the requested data.
 
index ec783959fa263d5603428b8ed2e86b9f71992fdb..d9c3745af92d27b94e1c0ffd03ad53ee07b82edd 100644 (file)
@@ -7,6 +7,7 @@ from utils import *
 from process import *
 from atm import *
 from bank import *
+from waitq import *
 import xnudefines
 
 @header("{0: <20s} {1: <6s} {2: <6s} {3: <10s} {4: <15s}".format("task", "pid", '#acts', "tablesize", "command"))
@@ -27,7 +28,7 @@ def GetTaskIPCSummary(task):
 
 @header("{0: <20s} {1: <28s} {2: <12s} {3: <6s} {4: <4s}  {5: <20s} {6: <4s}\n".format(
             "port", "mqueue", "recvname", "flags", "refs", "recvname", "dest"))
-def GetPortSummary(port, show_kmsg_summary=True, prefix=""):
+def PrintPortSummary(port, show_kmsg_summary=True, prefix=""):
     """ Display a port's summary
         params:
             port : core.value representing a port in the kernel
@@ -52,17 +53,17 @@ def GetPortSummary(port, show_kmsg_summary=True, prefix=""):
                                 unsigned(portp.ip_messages.data.port.receiver_name),
                                 "DPort", portp.ip_object.io_references, unsigned(portp),
                                 "inactive-port")
-    
+    print out_string
     if show_kmsg_summary:
         kmsgp = Cast(portp.ip_messages.data.port.messages.ikmq_base, 'ipc_kmsg_t')
-        out_string += prefix + GetKMsgSummary.header + prefix + GetKMsgSummary(kmsgp)
-        
-        kmsgheadp = kmsgp
-        kmsgp = kmsgp.ikm_next
-        while (kmsgp) != (kmsgheadp):
-            out_string += prefix + GetKMsgSummary(kmsgp)
+        if unsigned(kmsgp):
+            print prefix + GetKMsgSummary.header + prefix + GetKMsgSummary(kmsgp, prefix)
+            kmsgheadp = kmsgp
             kmsgp = kmsgp.ikm_next
-    return out_string
+            while (kmsgp) != (kmsgheadp):
+                print prefix + GetKMsgSummary(kmsgp, prefix)
+                kmsgp = kmsgp.ikm_next
+    return
 
 def GetPortDestProc(portp):
     """ Display the name and pid of a given port's receiver
@@ -85,9 +86,9 @@ def GetPortDestProc(portp):
     
     return out_str
 
-@header("{0: <20s} {1: <28s} {2: <12s} {3: <6s} {4: <6s} {5: <19s} {6: <26s} {7: <26s}\n".format(
-            "dest-port", "kmsg", "msgid", "disp", "size", "reply-port", "source", "destination"))
-def GetKMsgSummary(kmsgp):
+@header("{:<20s} {:<28s} {:<12s} {:<8s} {:<6s} {:<19s} {:<26s} {:<26s}\n".format(
+            "", "kmsg", "msgid", "disp", "size", "reply-port", "source", "destination"))
+def GetKMsgSummary(kmsgp, prefix_str=""):
     """ Display a summary for type ipc_kmsg_t
         params:
             kmsgp : core.value representing the given ipc_kmsg_t struct
@@ -97,25 +98,61 @@ def GetKMsgSummary(kmsgp):
     kmsghp = kmsgp.ikm_header
     kmsgh = dereference(kmsghp)
     out_string = ""
-    out_string += "{0: <19s} {1: <#019x} {2: <8s} {3: <#011x} ".format(
-                    ' '*19, unsigned(kmsgp), ' '*8, kmsgh.msgh_id)
+    out_string += "{0: <20s} {1: <#019x} {2: <8s} {3: <#011x} ".format(
+                    ' ', unsigned(kmsgp), ' '*8, kmsgh.msgh_id)
+    prefix_str = "{0: <20s} ".format(' ') + prefix_str
+    disposition = ""
+    bits = kmsgh.msgh_bits & 0xff
     
-    if (kmsgh.msgh_bits & 0xff) == 17:
-        out_string += "{0: <2s}".format("rS")
+    # remote port
+    if bits == 17:
+        disposition = "rS"
+    elif bits == 18:
+        disposition = "rO"
+    else :
+        disposition = "rX" # invalid
+    
+    out_string += "{0: <2s}".format(disposition)
+    
+    # local port
+    disposition = ""
+    bits = (kmsgh.msgh_bits & 0xff00) >> 8
+    
+    if bits == 17:
+        disposition = "lS"
+    elif bits == 18:
+        disposition = "lO"
+    elif bits == 0:
+        disposition = "l-"
     else:
-        out_string += "{0: <2s}".format("rO")
+        disposition = "lX"  # invalid
+        
+    out_string += "{0: <2s}".format(disposition)
     
-    if (kmsgh.msgh_bits & 0xff00) == (17 << 8):
-        out_string += "{0: <2s}".format("lS")
+    # voucher
+    disposition = ""
+    bits = (kmsgh.msgh_bits & 0xff0000) >> 16
+    
+    if bits == 17:
+        disposition = "vS"
+    elif bits == 0:
+        disposition = "v-"
     else:
-        if (kmsgh.msgh_bits & 0xff00) == (18 << 8):
-            out_string += "{0: <2s}".format("lO")
-        else:
-            out_string += "{0: <2s}".format("l-")
-    if kmsgh.msgh_bits & 0xf0000000:
-        out_string += "{0: <2s}".format("c")
+        disposition = "vX"
+
+    out_string += "{0: <2s}".format(disposition) 
+        
+    # complex message
+    if kmsgh.msgh_bits & 0x80000000:
+        out_string += "{0: <1s}".format("c")
     else:
-        out_string += "{0: <2s}".format("s")
+        out_string += "{0: <1s}".format("s")
+    
+    # importance boost
+    if kmsgh.msgh_bits & 0x20000000:
+        out_string += "{0: <1s}".format("I")
+    else:
+        out_string += "{0: <1s}".format("-")
     
     dest_proc_name = ""
     if kmsgp.ikm_header.msgh_remote_port:
@@ -124,8 +161,48 @@ def GetKMsgSummary(kmsgp):
     out_string += "{0: ^6d}   {1: <#019x} {2: <26s} {3: <26s}\n".format(
                     unsigned(kmsgh.msgh_size), unsigned(kmsgh.msgh_local_port),
                     GetKMsgSrc(kmsgp), dest_proc_name)
+    
+    if kmsgh.msgh_bits & 0x80000000:
+        out_string += prefix_str + "\t" + GetKMsgBody.header + "\n"
+        out_string += prefix_str + "\t" + GetKMsgBody(kmsgp, prefix_str + "\t") + "\n"
+    
+    return out_string
+
+@header("{: <20s} {: <20s} {: <10s}".format("descriptor", "address", "size"))
+def GetMachMsgOOLDescriptorSummary(desc):
+    """ Returns description for mach_msg_ool_descriptor_t * object
+    """
+    format_string = "{: <#020x} {: <#020x} {: <#010x}"
+    out_string = format_string.format(desc, desc.address, desc.size)
     return out_string
 
+@header("{: <20s} {: <8s} {: <20s} {: <10s} {: <20s}".format("kmsgheader", "size", "body", "ds_count", "dsc_head"))
+def GetKMsgBody(kmsgp, prefix_str=""):
+    """ Routine that prints a complex kmsg's body
+    """
+    kmsghp = kmsgp.ikm_header
+    kmsgh = dereference(kmsghp)
+    format_string = "{: <#020x} {: <#08x} {: <#020x} {: <#010x} {: <#020x}"
+    out_string = ""
+    body = Cast(addressof(kmsghp[1]), 'mach_msg_body_t *')
+    dsc_count = body.msgh_descriptor_count
+
+    dschead = Cast(addressof(body[1]), 'mach_msg_descriptor_t *')
+    out_string += format_string.format(kmsghp, sizeof(dereference(kmsghp)), body, unsigned(dsc_count), dschead)
+    
+    for i in range(dsc_count):
+        dsc = dschead[i]        
+        out_string += "\n" + prefix_str + "Descriptor: " + xnudefines.mach_msg_type_descriptor_strings[unsigned(dsc.type.type)]
+        if unsigned(dsc.type.type) == 0:
+            # its a port.
+            p = dsc.port.name
+            out_string += " name: {: <#20x}".format(p)
+        elif unsigned(dsc.type.type) in (1,3):
+            # its OOL DESCRIPTOR or OOL VOLATILE DESCRIPTOR
+            ool = dsc.out_of_line
+            out_string += " " + GetMachMsgOOLDescriptorSummary(addressof(ool))
+    return out_string 
+
 def GetKMsgSrc(kmsgp):
     """ Routine that prints a kmsg's source process and pid details
         params:
@@ -138,9 +215,44 @@ def GetKMsgSrc(kmsgp):
     
     return "{0:s} ({1:d})".format(GetProcNameForPid(kmsgpid), kmsgpid)
 
+
+def PrintPortSetMembers(space, setid, show_kmsg_summary):
+    """ Print out the members of a given IPC PSet
+    """
+    num_entries = int(space.is_table_size)
+    is_tableval = space.is_table
+    setid_str = GetWaitqSetidString(setid)
+
+    prefix_str = "{0:<21s}".format(' '*21)
+    once = True
+    verbose = False
+    if config['verbosity'] > vHUMAN:
+        verbose = True
+
+    idx = 0
+    while idx < num_entries:
+        entryval = GetObjectAtIndexFromArray(is_tableval, idx)
+        ie_bits = unsigned(entryval.ie_bits)
+        if not (ie_bits & 0x00180000):
+            # It's a port entry that's _not_ dead
+            portval = Cast(entryval.ie_object, 'ipc_port_t')
+            waitq = addressof(portval.ip_messages.data.port.waitq)
+            psets = GetWaitqSets(addressof(portval.ip_messages.data.port.waitq))
+            for ps in psets:
+                if ps == setid_str:
+                    if once:
+                        once = False
+                        print "{:s}\n{:s}{:s}".format(GetPortDestProc(portval), prefix_str, PrintPortSummary.header)
+                    PrintPortSummary(portval, show_kmsg_summary, prefix_str)
+            if verbose:
+                sys.stderr.write('{:d}/{:d}...          \r'.format(idx, num_entries))
+        idx += 1
+    return
+
+
 @header("{0: <20s} {1: <28s} {2: <12s} {3: <6s} {4: <6s} {5: <20s} {6: <7s}\n".format(
             "portset", "waitqueue", "recvname", "flags", "refs", "recvname", "process"))
-def GetPortSetSummary(pset):
+def PrintPortSetSummary(pset, space = 0):
     """ Display summary for a given struct ipc_pset *
         params:
             pset : core.value representing a pset in the kernel
@@ -148,7 +260,13 @@ def GetPortSetSummary(pset):
             str  : string of summary information for the given pset
     """
     out_str = ""
+    show_kmsg_summary = False
+    if config['verbosity'] > vHUMAN :
+        show_kmsg_summary = True
+
+    setid = 0
     if pset.ips_object.io_bits & 0x80000000:
+        setid = pset.ips_messages.data.pset.setq.wqset_id
         out_str += "{0: #019x}  {1: #019x} {2: <7s} {3: #011x}   {4: <4s} {5: >6d}  {6: #019x}   ".format(
                     unsigned(pset), addressof(pset.ips_messages), ' '*7,
                     pset.ips_messages.data.pset.local_name, "ASet",
@@ -161,20 +279,12 @@ def GetPortSetSummary(pset):
                     pset.ips_messages.data.pset.local_name, "DSet",
                     pset.ips_object.io_references,
                     pset.ips_messages.data.pset.local_name)
-    
-    once = True
-    setlinksp = addressof(pset.ips_messages.data.pset.set_queue.wqs_setlinks)
-    wql = Cast(pset.ips_messages.data.pset.set_queue.wqs_setlinks.next, 'WaitQueueLink *')
-    portoff = getfieldoffset('struct ipc_port', 'ip_messages')
-    prefix_str = "{0:<21s}".format(' '*21)
-    while unsigned(wql) != unsigned(Cast(setlinksp, 'void *')):
-        portp = kern.GetValueFromAddress(unsigned(wql.wql_element.wqe_queue) - portoff, 'ipc_port *')
-        if once:
-            once = False
-            out_str += "{0:s}\n{1:s}{2:s}".format(GetPortDestProc(portp), prefix_str, GetPortSummary.header)
-        out_str += GetPortSummary(portp, False, prefix_str)
-        wql = Cast(wql.wql_setlinks.next, 'WaitQueueLink *')
-    return out_str
+    print out_str
+
+    if setid != 0 and space != 0:
+        PrintPortSetMembers(space, setid, show_kmsg_summary)
+
+    return
 
 # Macro: showipc
 
@@ -191,8 +301,8 @@ def ShowIPC(cmd_args=None):
     if not ipc:
         print "unknown arguments:", str(cmd_args)
         return False
-    print GetIPCInformation.header
-    print GetIPCInformation(ipc, False, False)
+    print PrintIPCInformation.header
+    PrintIPCInformation(ipc, False, False)
 
 # EndMacro: showipc
 
@@ -230,8 +340,8 @@ def ShowAllIPC(cmd_args=None):
         print GetTaskSummary.header + " " + GetProcSummary.header
         pval = Cast(t.bsd_info, 'proc *')
         print GetTaskSummary(t) + " " + GetProcSummary(pval)
-        print GetIPCInformation.header
-        print GetIPCInformation(t.itk_space, False, False) + "\n\n"
+        print PrintIPCInformation.header
+        PrintIPCInformation(t.itk_space, False, False) + "\n\n"
 
 # EndMacro: showallipc
 
@@ -318,18 +428,31 @@ def GetPortDestinationSummary(port):
     return out_str
     
 @lldb_type_summary(['ipc_entry_t'])
-@header("{0: <20s} {1: <20s} {2: <8s} {3: <8s} {4: <20s} {5: <20s}".format("object", "name","rite", "urefs", "destname", "destination"))
-def GetIPCEntrySummary(entry, ipc_name=''):
+@header("{: <20s} {: <20s} {: <8s} {: <8s} {: <8s} {: <8s} {: <20s} {: <20s}".format("object", "name","rite", "urefs", "nsets", "nmsgs", "destname", "destination"))
+def GetIPCEntrySummary(entry, ipc_name='', rights_filter=0):
     """ Get summary of a ipc entry.
         params:
             entry - core.value representing ipc_entry_t in the kernel
             ipc_name - str of format '0x0123' for display in summary.  
         returns:
             str - string of ipc entry related information
+
+        types of rights:
+            'Dead'  : Dead name
+            'Set'   : Port set
+            'S'     : Send right
+            'R'     : Receive right
+            'O'     : Send-once right
+        types of notifications:
+            's'     : Send-Possible notification armed
+            'd'     : Send-Possible notification requested
+            'n'     : Dead-Name notification requested
+            'c'     : ???
+            'x'     : No-Senders notification requested
     """
     out_str = ''    
     entry_ptr = int(hex(entry), 16)
-    format_string = "{0: <#020x} {1: <12s} {2: <8s} {3: <8d} {4: <20s} {5: <20s}"
+    format_string = "{: <#020x} {: <12s} {: <8s} {: <8d} {: <8d} {: <8d} {: <20s} {: <20s}"
     right_str = ''
     destname_str = ''
     destination_str = ''
@@ -337,19 +460,29 @@ def GetIPCEntrySummary(entry, ipc_name=''):
     ie_object = entry.ie_object
     ie_bits = int(entry.ie_bits)
     urefs = int(ie_bits & 0xffff)
+    nsets = 0
+    nmsgs = 0
     if ie_bits & 0x00100000 :
         right_str = 'Dead'
     elif ie_bits & 0x00080000:
         right_str = 'Set'
+        psetval = Cast(ie_object, 'ipc_pset *')
+        set_str = GetWaitqSets(addressof(psetval.ips_messages.data.pset.setq.wqset_q))
+        nsets = len(set_str)
+        nmsgs = 0
     else:
         if ie_bits & 0x00010000 :
             if ie_bits & 0x00020000 :
+                # SEND + RECV
                 right_str = 'SR'
             else:
+                # SEND only
                 right_str = 'S'
         elif ie_bits & 0x00020000:
+            # RECV only
             right_str = 'R'
         elif ie_bits & 0x00040000 :
+            # SEND_ONCE
             right_str = 'O'
         portval = Cast(ie_object, 'ipc_port_t')
         if int(entry.index.request) != 0:
@@ -357,15 +490,24 @@ def GetIPCEntrySummary(entry, ipc_name=''):
             sorightval = requestsval[int(entry.index.request)].notify.port
             soright_ptr = unsigned(sorightval)
             if soright_ptr != 0:
+                 # send-possible armed
                  if soright_ptr & 0x1 : right_str +='s'
+                 # send-possible requested
                  elif soright_ptr & 0x2 : right_str +='d'
+                 # dead-name notification requested
                  else : right_str +='n'
+        # XXX: What does this bit mean?
         if ie_bits & 0x00800000 : right_str +='c'
+        # No-senders notification requested
         if portval.ip_nsrequest != 0: right_str +='x'
         # now show the port destination part
         destname_str = GetPortDestinationSummary(Cast(ie_object, 'ipc_port_t'))
-        
-    out_str = format_string.format(ie_object, ipc_name, right_str, urefs, destname_str, destination_str)
+        # Get the number of sets to which this port belongs
+        set_str = GetWaitqSets(addressof(portval.ip_messages.data.port.waitq))
+        nsets = len(set_str)
+        nmsgs = portval.ip_messages.data.port.msgcount
+    if rights_filter == 0 or rights_filter == right_str:
+        out_str = format_string.format(ie_object, ipc_name, right_str, urefs, nsets, nmsgs, destname_str, destination_str)
     return out_str
 
 @header("{0: >20s}".format("user bt") )
@@ -391,12 +533,12 @@ def GetPortUserStack(port, task):
     return out_str
 
 @lldb_type_summary(['ipc_space *'])
-@header("{0: <20s} {1: <20s} {2: <20s} {3: <8s} {4: <10s} {5: <16s} {6: <10s} {7: <7s}".format('ipc_space', 'is_task', 'is_table', 'flags', 'ports', 'table_next', 'low_mod', 'high_mod'))
-def GetIPCInformation(space, show_entries=False, show_userstack=False):
+@header("{0: <20s} {1: <20s} {2: <20s} {3: <8s} {4: <10s} {5: <18s} {6: >8s} {7: <8s}".format('ipc_space', 'is_task', 'is_table', 'flags', 'ports', 'table_next', 'low_mod', 'high_mod'))
+def PrintIPCInformation(space, show_entries=False, show_userstack=False, rights_filter=0):
     """ Provide a summary of the ipc space
     """
     out_str = ''
-    format_string = "{0: <#020x} {1: <#020x} {2: <#020x} {3: <8s} {4: <10d} {5: <#01x} {6: >10d} {7: >10d}"
+    format_string = "{0: <#020x} {1: <#020x} {2: <#020x} {3: <8s} {4: <10d} {5: <#18x} {6: >8d} {7: <8d}"
     is_tableval = space.is_table
     ports = int(space.is_table_size)
     flags =''
@@ -404,11 +546,11 @@ def GetIPCInformation(space, show_entries=False, show_userstack=False):
     if (is_bits & 0x40000000) == 0: flags +='A'
     else: flags += ' '
     if (is_bits & 0x20000000) != 0: flags +='G'
-    out_str += format_string.format(space, space.is_task, space.is_table, flags, space.is_table_size, space.is_table_next, space.is_low_mod, space.is_high_mod)
+    print format_string.format(space, space.is_task, space.is_table, flags, space.is_table_size, space.is_table_next, space.is_low_mod, space.is_high_mod)
     
     #should show the each individual entries if asked.
     if show_entries == True:
-        out_str += "\n\t" + GetIPCEntrySummary.header + "\n"
+        print "\t" + GetIPCEntrySummary.header
         num_entries = ports
         index = 0
         while index < num_entries:
@@ -416,22 +558,41 @@ def GetIPCInformation(space, show_entries=False, show_userstack=False):
             entry_ie_bits = unsigned(entryval.ie_bits)
             if (int(entry_ie_bits) & 0x001f0000 ) != 0:
                 entry_name = "{0: <#020x}".format( (index <<8 | entry_ie_bits >> 24) )
-                out_str += "\t" + GetIPCEntrySummary(entryval, entry_name) + "\n"
-                if show_userstack == True:
-                    entryport = Cast(entryval.ie_object, 'ipc_port *')
-                    if entryval.ie_object and (int(entry_ie_bits) & 0x00070000) and entryport.ip_callstack[0]:
-                        out_str += GetPortUserStack.header
-                        out_str += GetPortUserStack(entryport, space.is_task)
-            index +=1    
+                entry_str = GetIPCEntrySummary(entryval, entry_name, rights_filter)
+                if len(entry_str) > 0:
+                    print "                  \r\t" + entry_str
+                    if show_userstack == True:
+                        entryport = Cast(entryval.ie_object, 'ipc_port *')
+                        if entryval.ie_object and (int(entry_ie_bits) & 0x00070000) and entryport.ip_callstack[0]:
+                            print GetPortUserStack.header + GetPortUserStack(entryport, space.is_task)
+                else:
+                    # give some progress indication (this is especially
+                    # helpful for tasks with large sets of rights)
+                    sys.stderr.write(' {:d}/{:d}...\r'.format(index, num_entries))
+            index += 1
     #done with showing entries
     return out_str
 
 # Macro: showrights
 
-@lldb_command('showrights'
-def ShowRights(cmd_args=None):
+@lldb_command('showrights', 'R:')
+def ShowRights(cmd_args=None, cmd_options={}):
     """  Routine to print rights information for the given IPC space 
-         Usage: showrights <address of ipc space>
+         Usage: showrights [-R rights_type] <address of ipc space>
+                -R rights_type  : only display rights matching the string 'rights_type'
+
+                types of rights:
+                    'Dead'  : Dead name
+                    'Set'   : Port set
+                    'S'     : Send right
+                    'R'     : Receive right
+                    'O'     : Send-once right
+                types of notifications (append to rights type string):
+                    's'     : Send-Possible notification armed
+                    'd'     : Send-Possible notification requested
+                    'n'     : Dead-Name notification requested
+                    'c'     : ???
+                    'x'     : No-Senders notification requested
     """
     if not cmd_args:
         print "No arguments passed"
@@ -441,15 +602,32 @@ def ShowRights(cmd_args=None):
     if not ipc:
         print "unknown arguments:", str(cmd_args)
         return False
-    print GetIPCInformation.header
-    print GetIPCInformation(ipc, True, False)
+    rights_type = 0
+    if "-R" in cmd_options:
+        rights_type = cmd_options["-R"]
+    print PrintIPCInformation.header
+    PrintIPCInformation(ipc, True, False, rights_type)
 
 # EndMacro: showrights
 
-@lldb_command('showtaskrights')
-def ShowTaskRights(cmd_args=None):
+@lldb_command('showtaskrights','R:')
+def ShowTaskRights(cmd_args=None, cmd_options={}):
     """ Routine to ipc rights information for a task
-        Usage: showtaskrights <task address>
+        Usage: showtaskrights [-R rights_type] <task address>
+               -R rights_type  : only display rights matching the string 'rights_type'
+
+               types of rights:
+                   'Dead'  : Dead name
+                   'Set'   : Port set
+                   'S'     : Send right
+                   'R'     : Receive right
+                   'O'     : Send-once right
+               types of notifications (append to rights type string):
+                   's'     : Send-Possible notification armed
+                   'd'     : Send-Possible notification requested
+                   'n'     : Dead-Name notification requested
+                   'c'     : ???
+                   'x'     : No-Senders notification requested
     """
     if cmd_args == None:
         print "No arguments passed"
@@ -459,18 +637,35 @@ def ShowTaskRights(cmd_args=None):
     if not tval:
         print "unknown arguments:", str(cmd_args)
         return False
+    rights_type = 0
+    if "-R" in cmd_options:
+        rights_type = cmd_options["-R"]
     print GetTaskSummary.header + " " + GetProcSummary.header
     pval = Cast(tval.bsd_info, 'proc *')
     print GetTaskSummary(tval) + " " + GetProcSummary(pval)
-    print GetIPCInformation.header
-    print GetIPCInformation(tval.itk_space, True, False)
+    print PrintIPCInformation.header
+    PrintIPCInformation(tval.itk_space, True, False, rights_type)
 
 # Macro: showataskrightsbt
 
-@lldb_command('showtaskrightsbt')
-def ShowTaskRightsBt(cmd_args=None):
+@lldb_command('showtaskrightsbt', 'R:')
+def ShowTaskRightsBt(cmd_args=None, cmd_options={}):
     """ Routine to ipc rights information with userstacks for a task
-        Usage: showtaskrightsbt <task address>
+        Usage: showtaskrightsbt [-R rights_type] <task address>
+               -R rights_type  : only display rights matching the string 'rights_type'
+
+               types of rights:
+                   'Dead'  : Dead name
+                   'Set'   : Port set
+                   'S'     : Send right
+                   'R'     : Receive right
+                   'O'     : Send-once right
+               types of notifications (append to rights type string):
+                   's'     : Send-Possible notification armed
+                   'd'     : Send-Possible notification requested
+                   'n'     : Dead-Name notification requested
+                   'c'     : ???
+                   'x'     : No-Senders notification requested
     """
     if cmd_args == None:
         print "No arguments passed"
@@ -480,28 +675,48 @@ def ShowTaskRightsBt(cmd_args=None):
     if not tval:
         print "unknown arguments:", str(cmd_args)
         return False
+    rights_type = 0
+    if "-R" in cmd_options:
+        rights_type = cmd_options["-R"]
     print GetTaskSummary.header + " " + GetProcSummary.header
     pval = Cast(tval.bsd_info, 'proc *')
     print GetTaskSummary(tval) + " " + GetProcSummary(pval)
-    print GetIPCInformation.header
-    print GetIPCInformation(tval.itk_space, True, True)
+    print PrintIPCInformation.header
+    PrintIPCInformation(tval.itk_space, True, True, rights_type)
 
 # EndMacro: showtaskrightsbt
 
 # Macro: showallrights
 
-@lldb_command('showallrights'
-def ShowAllRights(cmd_args=None):
+@lldb_command('showallrights', 'R:')
+def ShowAllRights(cmd_args=None, cmd_options={}):
     """  Routine to print rights information for IPC space of all tasks
-         Usage: showallrights
+         Usage: showallrights [-R rights_type]
+                -R rights_type  : only display rights matching the string 'rights_type'
+
+                types of rights:
+                    'Dead'  : Dead name
+                    'Set'   : Port set
+                    'S'     : Send right
+                    'R'     : Receive right
+                    'O'     : Send-once right
+                types of notifications (append to rights type string):
+                    's'     : Send-Possible notification armed
+                    'd'     : Send-Possible notification requested
+                    'n'     : Dead-Name notification requested
+                    'c'     : ???
+                    'x'     : No-Senders notification requested
     """
+    rights_type = 0
+    if "-R" in cmd_options:
+        rights_type = cmd_options["-R"]
     for t in kern.tasks:
         print GetTaskSummary.header + " " + GetProcSummary.header
         pval = Cast(t.bsd_info, 'proc *')
         print GetTaskSummary(t) + " " + GetProcSummary(pval)
         try:
-            print GetIPCInformation.header
-            print GetIPCInformation(t.itk_space, True, False) + "\n\n"
+            print PrintIPCInformation.header
+            PrintIPCInformation(t.itk_space, True, False, rights_type) + "\n\n"
         except (KeyboardInterrupt, SystemExit):
             raise
         except:
@@ -531,10 +746,10 @@ def ShowTaskBusyPorts(cmd_args=None):
         print ShowTaskBusyPorts.__doc__
         return
     task = kern.GetValueFromAddress(cmd_args[0], 'task_t')
-    print GetTaskBusyPorts(task)
+    PrintTaskBusyPorts(task)
     return
 
-def GetTaskBusyPorts(task):
+def PrintTaskBusyPorts(task):
     """ Prints all busy ports for a given task. ie. all receive rights belonging
         to this task that have enqueued messages.
         params:
@@ -544,15 +759,15 @@ def GetTaskBusyPorts(task):
     """
     isp = task.itk_space
     i = 0
-    out_string = ""
     while i < isp.is_table_size:
         iep = addressof(isp.is_table[i])
         if iep.ie_bits & 0x00020000:
             port = Cast(iep.ie_object, 'ipc_port_t')
             if port.ip_messages.data.port.msgcount > 0:
-                out_string += GetPortSummary.header + GetPortSummary(port)
+                print PrintPortSummary.header
+                PrintPortSummary(port)
         i = i + 1
-    return out_string
+    return
 # EndMacro: showtaskbusyports
 
 # Macro: showallbusyports
@@ -562,32 +777,57 @@ def ShowAllBusyPorts(cmd_args=None):
         have enqueued messages.
     """
     task_queue_head = kern.globals.tasks
-    
+
     for tsk in kern.tasks:
-        print GetTaskBusyPorts(tsk)
+        PrintTaskBusyPorts(tsk)
     return
 # EndMacro: showallbusyports
 
+# Macro: showport:
+@lldb_command('showport','K')
+def ShowPort(cmd_args=None, cmd_options={}):
+    """ Routine that lists details about a given IPC port 
+        Syntax: (lldb) showport 0xaddr
+    """
+    show_kmsgs = True
+    if "-K" in cmd_options:
+        show_kmsgs = False
+    if not cmd_args:
+        print "Please specify the address of the port whose details you want to print"
+        print ShowPort.__doc__
+        return
+    port = kern.GetValueFromAddress(cmd_args[0], 'struct ipc_port *')
+    print PrintPortSummary.header
+    PrintPortSummary(port, show_kmsgs)
+# EndMacro: showport
+
 # Macro: showmqueue:
-@lldb_command('showmqueue')
-def ShowMQueue(cmd_args=None):
+@lldb_command('showmqueue', "S:")
+def ShowMQueue(cmd_args=None, cmd_options={}):
     """ Routine that lists details about a given mqueue
-        Syntax: (lldb) showmqueue 0xaddr
+        Syntax: (lldb) showmqueue 0xaddr [-S ipc_space]
     """
     if not cmd_args:
         print "Please specify the address of the ipc_mqueue whose details you want to print"
         print ShowMQueue.__doc__
         return
+    space = 0
+    if "-S" in cmd_options:
+        space = kern.GetValueFromAddress(cmd_options["-S"], 'struct ipc_space *')
     mqueue = kern.GetValueFromAddress(cmd_args[0], 'struct ipc_mqueue *')
-    wq_type = mqueue.data.pset.set_queue.wqs_wait_queue.wq_type
+    wq_type = mqueue.data.pset.setq.wqset_q.waitq_type
     if int(wq_type) == 3:
         psetoff = getfieldoffset('struct ipc_pset', 'ips_messages')
         pset = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(psetoff)
-        print GetPortSetSummary.header + GetPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *'))
-    if int(wq_type) == 2:
+        print PrintPortSetSummary.header
+        PrintPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *'), space)
+    elif int(wq_type) == 2:
         portoff = getfieldoffset('struct ipc_port', 'ip_messages')
         port = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(portoff)
-        print GetPortSummary.header + GetPortSummary(kern.GetValueFromAddress(port, 'struct ipc_port *'))
+        print PrintPortSummary.header
+        PrintPortSummary(kern.GetValueFromAddress(port, 'struct ipc_port *'))
+    else:
+        print "Invalid mqueue? (waitq type {:d} is invalid)".format(int(wq_type))
 # EndMacro: showmqueue
 
 # Macro: showkmsg:
@@ -605,17 +845,21 @@ def ShowKMSG(cmd_args=[]):
 # EndMacro: showkmsg
 
 # Macro: showpset
-@lldb_command('showpset')
-def ShowPSet(cmd_args=None):
+@lldb_command('showpset', "S:")
+def ShowPSet(cmd_args=None, cmd_options={}):
     """ Routine that prints details for a given ipc_pset *
-        Syntax: (lldb) showpset 0xaddr
+        Syntax: (lldb) showpset 0xaddr [-S ipc_space]
     """
     if not cmd_args:
         print "Please specify the address of the pset whose details you want to print"
         print ShowPSet.__doc__
         return
-    
-    print GetPortSetSummary.header + GetPortSetSummary(kern.GetValueFromAddress(cmd_args[0], 'ipc_pset *'))
+    space = 0
+    if "-S" in cmd_options:
+        space = kern.GetValueFromAddress(cmd_options["-S"], 'struct ipc_space *')
+
+    print PrintPortSetSummary.header
+    PrintPortSetSummary(kern.GetValueFromAddress(cmd_args[0], 'ipc_pset *'), space)
 # EndMacro: showpset
 
 # IPC importance inheritance related macros.
@@ -673,7 +917,7 @@ def GetIPCImportanceElemSummary(iie):
         if kmsg_count > 0:
             out_str += "\n\t"+ GetKMsgSummary.header 
             for k in IterateQueue(iie.iie_kmsgs, 'struct ipc_kmsg *',  'ikm_inheritance'):
-                out_str += "\t" + "{: <#018x}".format(k.ikm_header.msgh_remote_port) + '   ' + GetKMsgSummary(k).lstrip() 
+                out_str += "\t" + "{: <#018x}".format(k.ikm_header.msgh_remote_port) + '   ' + GetKMsgSummary(k, "\t").lstrip() 
             out_str += "\n"
         if inherit_count > 0:
             out_str += "\n\t" + GetIPCImportanceInheritSummary.header + "\n"
diff --git a/tools/lldbmacros/kauth.py b/tools/lldbmacros/kauth.py
new file mode 100644 (file)
index 0000000..89ee584
--- /dev/null
@@ -0,0 +1,35 @@
+""" Please make sure you read the README file COMPLETELY BEFORE reading anything below.
+    It is very critical that you read coding guidelines in Section E in README file.
+"""
+
+from xnu import *
+from utils import *
+
+# Macro: walkkauthcache
+@lldb_command('walkkauthcache')
+def WalkKauthCache(cmd_args=None):
+    """ Walks the bins of the kauth credential hash cache and prints out the
+        number of bins and bin usage information.
+    """
+    PrintKauthCache()
+# EndMacro: walkkauthcache
+
+def PrintKauthCache(cmd_args=None):
+    """ Routine to determine the size of the kauth cache, walk the bins
+         and print out usage information.
+    """
+    anchor = unsigned(kern.globals.kauth_cred_table_anchor)
+    alloc_info_struct = anchor - sizeof('struct _mhead')
+    alloc_info = kern.GetValueFromAddress(alloc_info_struct, 'struct _mhead*')
+    alloc_size = unsigned(alloc_info.mlen) - (sizeof('struct _mhead'))
+    table_entries = alloc_size / sizeof('struct kauth_cred_entry_head')
+    anchor = kern.globals.kauth_cred_table_anchor
+    print "Cred cache has: " + str(table_entries) + " buckets\n"
+    print "Number of items in each bucket ... \n"
+    for i in range(0, table_entries):
+        numinbucket = 0
+        for kauth_cred in IterateTAILQ_HEAD(anchor[i], "cr_link"):
+            numinbucket += 1
+            #print str(kauth_cred.cr_posix)
+            #print str(kauth_cred.cr_ref)
+        print str(numinbucket) + "\n"
diff --git a/tools/lldbmacros/kcdata.py b/tools/lldbmacros/kcdata.py
new file mode 100644 (file)
index 0000000..259a4a7
--- /dev/null
@@ -0,0 +1,993 @@
+#!/usr/bin/env python
+import sys
+import struct
+import mmap
+import json
+import cgitb
+import copy
+import re
+import base64
+import argparse
+import os
+import shlex
+import subprocess
+
+cgitb.enable(format='text')
+
+kcdata_type_def = {
+    'KCDATA_TYPE_INVALID':              0x0,
+    'KCDATA_TYPE_STRING_DESC':          0x1,
+    'KCDATA_TYPE_UINT32_DESC':          0x2,
+    'KCDATA_TYPE_UINT64_DESC':          0x3,
+    'KCDATA_TYPE_INT32_DESC':           0x4,
+    'KCDATA_TYPE_INT64_DESC':           0x5,
+    'KCDATA_TYPE_BINDATA_DESC':         0x6,
+    'KCDATA_TYPE_ARRAY':                0x11,
+    'KCDATA_TYPE_TYPEDEFINTION':        0x12,
+    'KCDATA_TYPE_CONTAINER_BEGIN':      0x13,
+    'KCDATA_TYPE_CONTIANER_END':        0x14,
+    'KCDATA_TYPE_LIBRARY_LOADINFO':     0x30,
+    'KCDATA_TYPE_LIBRARY_LOADINFO64':   0x31,
+    'KCDATA_TYPE_TIMEBASE':             0x32,
+    #'KCDATA_TYPE_MACH_ABSOLUTE_TIME':   0x33,
+    'KCDATA_TYPE_TIMEVAL':              0x34,
+    'KCDATA_TYPE_USECS_SINCE_EPOCH':    0x35,
+    'STACKSHOT_KCCONTAINER_TASK':       0x903,
+    'STACKSHOT_KCCONTAINER_THREAD':     0x904,
+    'STACKSHOT_KCTYPE_KERN_STACKFRAME': 0x90A,
+    'STACKSHOT_KCTYPE_KERN_STACKFRAME64': 0x90B,
+    'STACKSHOT_KCTYPE_USER_STACKFRAME': 0x90C,
+    'STACKSHOT_KCTYPE_USER_STACKFRAME64': 0x90D,
+    'STACKSHOT_KCTYPE_BOOTARGS':        0x90E,
+    'STACKSHOT_KCTYPE_OSVERSION':       0x90F,
+    'STACKSHOT_KCTYPE_KERN_PAGE_SIZE':  0x910,
+    'STACKSHOT_KCTYPE_JETSAM_LEVEL':    0x911,
+    'KCDATA_TYPE_BUFFER_END':      0xF19158ED,
+
+
+    'TASK_CRASHINFO_EXTMODINFO':           0x801,
+    'TASK_CRASHINFO_BSDINFOWITHUNIQID':    0x802,
+    'TASK_CRASHINFO_TASKDYLD_INFO':        0x803,
+    'TASK_CRASHINFO_UUID':                 0x804,
+    'TASK_CRASHINFO_PID':                  0x805,
+    'TASK_CRASHINFO_PPID':                 0x806,
+    'TASK_CRASHINFO_RUSAGE':               0x807,
+    'TASK_CRASHINFO_RUSAGE_INFO':          0x808,
+    'TASK_CRASHINFO_PROC_NAME':            0x809,
+    'TASK_CRASHINFO_PROC_STARTTIME':       0x80B,
+    'TASK_CRASHINFO_USERSTACK':            0x80C,
+    'TASK_CRASHINFO_ARGSLEN':              0x80D,
+    'TASK_CRASHINFO_EXCEPTION_CODES':      0x80E,
+    'TASK_CRASHINFO_PROC_PATH':            0x80F,
+    'TASK_CRASHINFO_PROC_CSFLAGS':         0x810,
+    'TASK_CRASHINFO_PROC_STATUS':          0x811,
+    'TASK_CRASHINFO_UID':                  0x812,
+    'TASK_CRASHINFO_GID':                  0x813,
+    'TASK_CRASHINFO_PROC_ARGC':            0x814,
+    'TASK_CRASHINFO_PROC_FLAGS':           0x815,
+    'TASK_CRASHINFO_CPUTYPE':              0x816,
+    'TASK_CRASHINFO_WORKQUEUEINFO':        0x817,
+    'TASK_CRASHINFO_RESPONSIBLE_PID':      0x818,
+    'TASK_CRASHINFO_DIRTY_FLAGS':          0x819,
+    'TASK_CRASHINFO_CRASHED_THREADID':     0x81A,
+
+    'KCDATA_BUFFER_BEGIN_CRASHINFO':  0xDEADF157,
+    'KCDATA_BUFFER_BEGIN_STACKSHOT':  0x59a25807
+}
+kcdata_type_def_rev = dict((v, k) for k, v in kcdata_type_def.iteritems())
+
+KNOWN_TYPES_COLLECTION = {}
+
+
+def enum(**args):
+    return type('enum', (), args)
+
+KCSUBTYPE_TYPE = enum(KC_ST_CHAR=1, KC_ST_INT8=2, KC_ST_UINT8=3, KC_ST_INT16=4, KC_ST_UINT16=5, KC_ST_INT32=6, KC_ST_UINT32=7, KC_ST_INT64=8, KC_ST_UINT64=9)
+
+
+class KCSubTypeElement(object):
+    """convert kcdata_subtype_descriptor to """
+    _unpack_formats = (None, 'c', 'b', 'B', 'h', 'H', 'i', 'I', 'q', 'Q')
+    _ctypes = ('Unknown', 'char', 'int8_t', 'uint8_t', 'int16_t', 'uint16_t', 'int32_t', 'uint32_t', 'int64_t', 'uint64_t')
+
+    def __init__(self, st_name, st_type, st_size, st_offset=0, st_flag=0, custom_repr=None):
+        self.name = st_name
+        self.offset = st_offset
+        self.type_id = st_type
+        if st_type <= 0 or st_type > KCSUBTYPE_TYPE.KC_ST_UINT64:
+            raise ValueError("Invalid type passed %d" % st_type)
+        self.unpack_fmt = KCSubTypeElement._unpack_formats[self.type_id]
+        self.size = st_size
+        self.totalsize = st_size
+        self.count = 1
+        self.is_array_type = False
+        self.custom_JsonRepr = custom_repr
+        if (st_flag & 0x1) == 0x1:
+            self.is_array_type = True
+            self.size = st_size & 0xffff
+            self.count = (st_size >> 16) & 0xffff
+            self.totalsize = self.size * self.count
+
+    @staticmethod
+    def GetSizeForArray(el_count, el_size):
+        return ((el_count & 0xffff) << 16) | (el_size & 0xffff)
+
+    @staticmethod
+    def FromBinaryTypeData(byte_data):
+        (st_flag, st_type, st_offset, st_size, st_name) = struct.unpack_from('=BBHI32s', byte_data)
+        st_name = st_name.rstrip('\x00')
+        return KCSubTypeElement(st_name, st_type, st_size, st_offset, st_flag)
+
+    @staticmethod
+    def FromBasicCtype(st_name, st_type, st_offset=0):
+        if st_type <= 0 or st_type > KCSUBTYPE_TYPE.KC_ST_UINT64:
+            raise ValueError("Invalid type passed %d" % st_type)
+        st_size = struct.calcsize(KCSubTypeElement._unpack_formats[st_type])
+        st_flag = 0
+        retval = KCSubTypeElement(st_name, st_type, st_size, st_offset, st_flag, KCSubTypeElement._get_naked_element_value)
+        return retval
+
+    @staticmethod
+    def FromKCSubTypeElement(other, name_override=''):
+        _copy = copy.copy(other)
+        if name_override:
+            _copy.name = name_override
+        return copy
+
+    def GetName(self):
+        return self.name
+
+    def GetTotalSize(self):
+        return self.totalsize
+
+    def GetValueAsString(self, base_data, array_pos=0):
+        return str(self.GetValue(base_data, array_pos))
+
+    def GetValue(self, base_data, array_pos=0):
+        return struct.unpack_from(self.unpack_fmt, base_data[self.offset + (array_pos * self.size):])[0]
+
+    @staticmethod
+    def _get_naked_element_value(elementValue, elementName):
+        return json.dumps(elementValue)
+
+    def __str__(self):
+        if self.is_array_type:
+            return '[%d,%d] %s  %s[%d];' % (self.offset, self.totalsize, self.GetCTypeDesc(), self.name, self.count)
+        return '[%d,%d] %s  %s;' % (self.offset, self.totalsize, self.GetCTypeDesc(), self.name)
+
+    def __repr__(self):
+        return str(self)
+
+    def GetCTypeDesc(self):
+        return KCSubTypeElement._ctypes[self.type_id]
+
+    def GetStringRepr(self, base_data):
+        if not self.is_array_type:
+            return self.GetValueAsString(base_data)
+        if self.type_id == KCSUBTYPE_TYPE.KC_ST_CHAR:
+            str_len = self.count
+            if len(base_data) < str_len:
+                str_len = len(base_data)
+            str_arr = []
+            for i in range(str_len):
+                _v = self.GetValue(base_data, i)
+                if ord(_v) == 0:
+                    break
+                str_arr.append(self.GetValueAsString(base_data, i))
+
+            return '"' + ''.join(str_arr) + '"'
+        o = '[' + ','.join([self.GetValueAsString(base_data, i) for i in range(self.count)]) + ']'
+        return o
+
+    def GetJsonRepr(self, base_data):
+        if self.custom_JsonRepr:
+            if self.is_array_type:
+                e_data = [self.GetValue(base_data, i) for i in range(self.count)]
+            else:
+                e_data = self.GetValue(base_data)
+            return self.custom_JsonRepr(e_data, self.name)
+        return self.GetStringRepr(base_data)
+
+
+class KCTypeDescription(object):
+    def __init__(self, t_type_id, t_elements=[], t_name='anon', custom_repr=None):
+        self.type_id = t_type_id
+        self.elements = t_elements
+        self.name = t_name
+        self.totalsize = 0
+        self.custom_JsonRepr = custom_repr
+        for e in self.elements:
+            self.totalsize += e.GetTotalSize()
+
+    def ValidateData(self, base_data):
+        if len(base_data) >= self.totalsize:
+            return True
+        return False
+
+    def GetTypeID(self):
+        return self.type_id
+
+    def GetName(self):
+        return self.name
+
+    def __str__(self):
+        o = '%s {\n\t' % self.name + "\n\t".join([str(e) for e in self.elements]) + '\n};'
+        return o
+
+    @staticmethod
+    def FromKCTypeDescription(other, t_type_id, t_name):
+        retval = KCTypeDescription(t_type_id, other.elements, t_name, other.custom_JsonRepr)
+        return retval
+
+    def GetJsonRepr(self, base_data):
+        if self.custom_JsonRepr:
+            return self.custom_JsonRepr([e.GetValue(base_data) for e in self.elements])
+        o = '{' + ", ".join(['"%s": %s' % (e.GetName(), e.GetJsonRepr(base_data)) for e in self.elements]) + '}'
+        return o
+
+
+def GetTypeNameForKey(k):
+    retval = "0x%x" % k
+    if k in KNOWN_TYPES_COLLECTION:
+        retval = KNOWN_TYPES_COLLECTION[k].GetName()
+    elif k in kcdata_type_def_rev:
+        retval = kcdata_type_def_rev[k]
+    return retval
+
+
+def GetTypeForName(n):
+    ret = 0
+    if n in kcdata_type_def:
+        ret = kcdata_type_def[n]
+    return ret
+
+
+class KCObject(object):
+    """
+    """
+    def __init__(self, type_code, data, flags=0, field_name=''):
+        self.i_type = type_code
+        self.i_data = data
+        self.i_size = len(data)
+        self.i_name = field_name
+        self.i_flags = flags
+        self.obj_collection = []
+        self.obj = {}
+        self.is_container_type = False
+        self.is_array_type = False
+        self.is_naked_type = False
+        if not field_name:
+            self.i_name = GetTypeNameForKey(type_code)
+        self.ParseData()
+
+    @staticmethod
+    def FromKCItem(kcitem):
+        return KCObject(kcitem.i_type, kcitem.i_data, kcitem.i_flags)
+
+    def IsContainerType(self):
+        return self.is_container_type
+
+    def IsContainerEnd(self):
+        if self.i_type in (GetTypeForName('KCDATA_TYPE_CONTIANER_END'), GetTypeForName('KCDATA_TYPE_BUFFER_END')):
+            return True
+        return False
+
+    def GetJsonRepr(self):
+        if self.is_array_type:
+            return '[' + ', '.join([i.GetJsonRepr() for i in self.obj_collection]) + ']'
+        #if self.is_array_type:
+        #    return '"%s" : [' % self.i_name + ', '.join([i.GetJsonRepr() for i in self.obj_collection]) + ']'
+        if self.is_container_type:
+            raise NotImplementedError("Containter types should not have come here")
+        if self.i_type in KNOWN_TYPES_COLLECTION:
+            return KNOWN_TYPES_COLLECTION[self.i_type].GetJsonRepr(self.i_data)
+        if self.is_naked_type:
+            return json.dumps(self.obj)
+
+        raise NotImplementedError("Broken GetJsonRepr implementation")
+
+    def ParseData(self):
+        if self.i_type == GetTypeForName('KCDATA_TYPE_CONTAINER_BEGIN'):
+            self.is_container_type = True
+            self.obj['uniqID'] = self.i_flags
+            self.i_name = str(self.obj['uniqID'])
+            self.obj['typeID'] = struct.unpack_from('I', self.i_data)[0]
+
+        elif self.i_type in (GetTypeForName('KCDATA_BUFFER_BEGIN_CRASHINFO'), GetTypeForName('KCDATA_BUFFER_BEGIN_STACKSHOT')):
+            self.is_container_type = True
+            self.obj['uniqID'] = self.i_name
+            self.obj['typeID'] = self.i_type
+
+        elif self.i_type == GetTypeForName('KCDATA_TYPE_CONTIANER_END'):
+            self.obj['uniqID'] = self.i_flags
+
+        elif self.i_type == GetTypeForName('KCDATA_TYPE_BUFFER_END'):
+            self.obj = ''
+
+        elif self.i_type == GetTypeForName('KCDATA_TYPE_UINT32_DESC'):
+            self.is_naked_type = True
+            u_d = struct.unpack_from('32sI', self.i_data)
+            self.i_name = u_d[0].strip(chr(0))
+            self.obj = u_d[1]
+
+        elif self.i_type == GetTypeForName('KCDATA_TYPE_UINT64_DESC'):
+            self.is_naked_type = True
+            u_d = struct.unpack_from('32sQ', self.i_data)
+            self.i_name = u_d[0].strip(chr(0))
+            self.obj = u_d[1]
+
+        elif self.i_type == GetTypeForName('KCDATA_TYPE_TYPEDEFINTION'):
+            self.is_naked_type = True
+            u_d = struct.unpack_from('II32s', self.i_data)
+            self.obj['name'] = u_d[2].strip(chr(0))
+            self.i_name = "typedef<%s>" % self.obj['name']
+            self.obj['typeID'] = u_d[0]
+            self.obj['numOfFields'] = u_d[1]
+            element_arr = []
+            for i in range(u_d[1]):
+                e = KCSubTypeElement.FromBinaryTypeData(self.i_data[40+(i*40):])
+                #print str(e)
+                element_arr.append(e)
+            type_desc = KCTypeDescription(u_d[0], element_arr, self.obj['name'])
+            #print str(type_desc)
+            self.obj['fields'] = [str(e) for e in element_arr]
+            KNOWN_TYPES_COLLECTION[type_desc.GetTypeID()] = type_desc
+
+        elif self.i_type == GetTypeForName('KCDATA_TYPE_ARRAY'):
+            self.is_array_type = True
+            e_t = (self.i_flags >> 32) & 0xffffffff
+            e_c = self.i_flags & 0xffffffff
+            e_s = self.i_size / e_c
+            self.obj['typeID'] = e_t
+            self.i_name = GetTypeNameForKey(e_t)
+            self.i_type = e_t
+            self.obj['numOfElements'] = e_c
+            self.obj['sizeOfElement'] = e_s
+            #populate the array here by recursive creation of KCObject
+            for _i in range(e_c):
+                _o = KCObject(e_t, self.i_data[(_i * e_s):(_i * e_s) + e_s])
+                self.obj_collection.append(_o)
+        elif self.i_type in KNOWN_TYPES_COLLECTION:
+            self.i_name = KNOWN_TYPES_COLLECTION[self.i_type].GetName()
+            self.is_naked_type = True
+        else:
+            self.is_naked_type = True
+            #self.obj = "data of len %d" % len(self.i_data)
+            #self.obj = ''.join(["%x" % ki for ki in struct.unpack('%dB' % len(self.i_data), self.i_data)])
+            self.obj = base64.b64encode(self.i_data)
+
+
+class KCContainerObject(KCObject):
+    def __init__(self, *args, **kwargs):
+        KCObject.__init__(self, *args, **kwargs)
+        self.obj_container_dict = {}
+        self.obj_nested_objs = {}
+
+    def GetJsonRepr(self):
+        o = '"%s"' % self.obj['uniqID'] + ' : { "typeID" : %d ,' % self.obj['typeID']
+        for (k, v) in self.obj_container_dict.items():
+            if v.IsContainerType():
+                o += v.GetJsonRepr() + ","
+            else:
+                o += ' "%s" : ' % k + v.GetJsonRepr() + ","
+
+        for (k, v) in self.obj_nested_objs.items():
+            o += '"%s" : {' % k + ",".join([vi.GetJsonRepr() for vi in v.values()]) + "} ,"
+
+        o = o.rstrip(',') + "}"
+
+        return o
+
+    def AddObject(self, kco):
+        if kco.IsContainerEnd():
+            return
+        if kco.IsContainerType():
+            type_name = GetTypeNameForKey(kco.obj['typeID'])
+            if type_name not in self.obj_nested_objs:
+                self.obj_nested_objs[type_name] = {}
+            self.obj_nested_objs[type_name][kco.i_name] = kco
+            return
+        self.obj_container_dict[kco.i_name] = kco
+
+
+class KCData_item:
+    """ a basic kcdata_item type object.
+    """
+    header_size = 16  # (uint32_t + uint32_t + uint64_t)
+
+    def __init__(self, item_type, item_size, item_flags, item_data):
+        self.i_type = item_type
+        self.i_size = item_size
+        self.i_flags = item_flags
+        self.i_data = item_data
+        self._buf_pos = None
+
+    def __init__(self, barray, pos=0):
+        """ create an object by parsing data from bytes array
+            returns : obj - if data is readable
+                      raises ValueError if something is not ok.
+        """
+        self.i_type = struct.unpack('I', barray[pos:pos+4])[0]     # int.from_bytes(barray[pos:pos+4])
+        self.i_size = struct.unpack('I', barray[pos+4:pos+8])[0]   # int.from_bytes(barray[pos+4:pos+8])
+        self.i_flags = struct.unpack('Q', barray[pos+8:pos+16])[0]  # int.from_bytes(barray[pos+8:pos+16])
+        self.i_data = barray[pos+16: (pos + 16 + self.i_size)]
+        self._buf_pos = pos
+
+    def __len__(self):
+        return self.i_size + KCData_item.header_size
+
+    def GetHeaderDescription(self):
+        outs = "type: 0x%x size: 0x%x flags: 0x%x" % (self.i_type, self.i_size, self.i_flags)
+        if not self._buf_pos is None:
+            outs = "pos: 0x%x" % self._buf_pos + outs
+        return outs
+
+    def __str__(self):
+        return self.GetHeaderDescription()
+
+
+def kcdata_item_iterator(filename):
+    if not filename:
+        return
+    with open(filename, "r+b") as f:
+        fmap = mmap.mmap(f.fileno(), 0)
+        file_len = len(fmap)
+        curpos = 0
+        while curpos < file_len:
+            item = KCData_item(fmap, curpos)
+            yield item
+            curpos += len(item)
+        fmap.close()
+
+
+def _get_data_element(elementValues):
+    return json.dumps(elementValues[-1])
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_UINT32_DESC')] = KCTypeDescription(GetTypeForName('KCDATA_TYPE_UINT32_DESC'), (
+    KCSubTypeElement('desc', KCSUBTYPE_TYPE.KC_ST_CHAR, KCSubTypeElement.GetSizeForArray(32, 1), 0, 1),
+    KCSubTypeElement('data', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 32, 0)
+),
+    'KCDATA_TYPE_UINT32_DESC',
+    _get_data_element
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_UINT64_DESC')] = KCTypeDescription(GetTypeForName('KCDATA_TYPE_UINT64_DESC'), (
+    KCSubTypeElement('desc', KCSUBTYPE_TYPE.KC_ST_CHAR, KCSubTypeElement.GetSizeForArray(32, 1), 0, 1),
+    KCSubTypeElement('data', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 32, 0)
+),
+    'KCDATA_TYPE_UINT64_DESC',
+    _get_data_element
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_TIMEBASE')] = KCTypeDescription(GetTypeForName('KCDATA_TYPE_TIMEBASE'), (
+    KCSubTypeElement('numerator', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 0, 0),
+    KCSubTypeElement('denominator', KCSUBTYPE_TYPE.KC_ST_UINT32, 8, 4, 0)
+),
+    'timebase_info'
+)
+
+
+STACKSHOT_IO_NUM_PRIORITIES = 4
+KNOWN_TYPES_COLLECTION[0x901] = KCTypeDescription(0x901, (
+    KCSubTypeElement.FromBasicCtype('disk_reads_count', KCSUBTYPE_TYPE.KC_ST_UINT64, 0),
+    KCSubTypeElement.FromBasicCtype('disk_reads_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 8),
+    KCSubTypeElement.FromBasicCtype('disk_writes_count', KCSUBTYPE_TYPE.KC_ST_UINT64, 16),
+    KCSubTypeElement.FromBasicCtype('disk_writes_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 24),
+    KCSubTypeElement('io_priority_count', KCSUBTYPE_TYPE.KC_ST_UINT64, KCSubTypeElement.GetSizeForArray(STACKSHOT_IO_NUM_PRIORITIES, 8), 32, 1),
+    KCSubTypeElement('io_priority_size', KCSUBTYPE_TYPE.KC_ST_UINT64, KCSubTypeElement.GetSizeForArray(STACKSHOT_IO_NUM_PRIORITIES, 8), 32 + (STACKSHOT_IO_NUM_PRIORITIES * 8), 1),
+    KCSubTypeElement.FromBasicCtype('paging_count', KCSUBTYPE_TYPE.KC_ST_UINT64, 32 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('paging_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 40 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('non_paging_count', KCSUBTYPE_TYPE.KC_ST_UINT64, 48 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('non_paging_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 56 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('data_count', KCSUBTYPE_TYPE.KC_ST_UINT64, 64 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('data_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 72 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('metadata_count', KCSUBTYPE_TYPE.KC_ST_UINT64, 80 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8)),
+    KCSubTypeElement.FromBasicCtype('metadata_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 88 + 2 * (STACKSHOT_IO_NUM_PRIORITIES * 8))
+),
+    'io_statistics'
+)
+
+KNOWN_TYPES_COLLECTION[0x902] = KCTypeDescription(0x902, (
+    KCSubTypeElement('snapshot_magic', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 0, 0),
+    KCSubTypeElement('free_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 1, 0),
+    KCSubTypeElement('active_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 2, 0),
+    KCSubTypeElement('inactive_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 3, 0),
+    KCSubTypeElement('purgeable_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 4, 0),
+    KCSubTypeElement('wired_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 5, 0),
+    KCSubTypeElement('speculative_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 6, 0),
+    KCSubTypeElement('throttled_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 7, 0),
+    KCSubTypeElement('filebacked_pages', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 8, 0),
+    KCSubTypeElement('compressions', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 9, 0),
+    KCSubTypeElement('decompressions', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 10, 0),
+    KCSubTypeElement('compressor_size', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 11, 0),
+    KCSubTypeElement('busy_buffer_count', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 4 * 12, 0),
+    KCSubTypeElement('pages_wanted', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 13, 0),
+    KCSubTypeElement('pages_reclaimed', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 4 * 14, 0),
+    KCSubTypeElement('pages_wanted_reclaimed_valid', KCSUBTYPE_TYPE.KC_ST_UINT8, 1, 4 * 15, 0)
+),
+    'mem_and_io_snapshot'
+)
+
+
+KNOWN_TYPES_COLLECTION[0x905] = KCTypeDescription(0x905, (
+    KCSubTypeElement.FromBasicCtype('unique_pid', KCSUBTYPE_TYPE.KC_ST_UINT64, 0),
+    KCSubTypeElement.FromBasicCtype('ss_flags', KCSUBTYPE_TYPE.KC_ST_UINT64, 8),
+    KCSubTypeElement.FromBasicCtype('user_time_in_terminated_threads', KCSUBTYPE_TYPE.KC_ST_UINT64, 16),
+    KCSubTypeElement.FromBasicCtype('system_time_in_terminated_threads', KCSUBTYPE_TYPE.KC_ST_UINT64, 24),
+    KCSubTypeElement.FromBasicCtype('p_start_sec', KCSUBTYPE_TYPE.KC_ST_UINT64, 32),
+    KCSubTypeElement.FromBasicCtype('task_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 40),
+    KCSubTypeElement.FromBasicCtype('task_max_resident_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 48),
+    KCSubTypeElement.FromBasicCtype('suspend_count', KCSUBTYPE_TYPE.KC_ST_UINT32, 56),
+    KCSubTypeElement.FromBasicCtype('faults', KCSUBTYPE_TYPE.KC_ST_UINT32, 60),
+    KCSubTypeElement.FromBasicCtype('pageins', KCSUBTYPE_TYPE.KC_ST_UINT32, 64),
+    KCSubTypeElement.FromBasicCtype('cow_faults', KCSUBTYPE_TYPE.KC_ST_UINT32, 68),
+    KCSubTypeElement.FromBasicCtype('was_throttled', KCSUBTYPE_TYPE.KC_ST_UINT32, 72),
+    KCSubTypeElement.FromBasicCtype('did_throttle', KCSUBTYPE_TYPE.KC_ST_UINT32, 76),
+    KCSubTypeElement.FromBasicCtype('latency_qos', KCSUBTYPE_TYPE.KC_ST_UINT32, 80),
+    KCSubTypeElement.FromBasicCtype('pid', KCSUBTYPE_TYPE.KC_ST_INT32, 84),
+    KCSubTypeElement('p_comm', KCSUBTYPE_TYPE.KC_ST_CHAR, KCSubTypeElement.GetSizeForArray(32, 1), 88, 1)
+),
+    'task_snapshot_v2'
+)
+
+KNOWN_TYPES_COLLECTION[0x906] = KCTypeDescription(0x906, (
+    KCSubTypeElement.FromBasicCtype('thread_id', KCSUBTYPE_TYPE.KC_ST_UINT64, 0),
+    KCSubTypeElement.FromBasicCtype('wait_event', KCSUBTYPE_TYPE.KC_ST_UINT64, 8),
+    KCSubTypeElement.FromBasicCtype('continuation', KCSUBTYPE_TYPE.KC_ST_UINT64, 16),
+    KCSubTypeElement.FromBasicCtype('total_syscalls', KCSUBTYPE_TYPE.KC_ST_UINT64, 24),
+    KCSubTypeElement.FromBasicCtype('voucher_identifier', KCSUBTYPE_TYPE.KC_ST_UINT64, 32),
+    KCSubTypeElement.FromBasicCtype('dqserialnum', KCSUBTYPE_TYPE.KC_ST_UINT64, 40),
+    KCSubTypeElement.FromBasicCtype('user_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 48),
+    KCSubTypeElement.FromBasicCtype('sys_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 56),
+    KCSubTypeElement.FromBasicCtype('ss_flags', KCSUBTYPE_TYPE.KC_ST_UINT64, 64),
+    KCSubTypeElement.FromBasicCtype('last_run_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 72),
+    KCSubTypeElement.FromBasicCtype('last_made_runnable_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 80),
+    KCSubTypeElement.FromBasicCtype('state', KCSUBTYPE_TYPE.KC_ST_UINT32, 88),
+    KCSubTypeElement.FromBasicCtype('sched_flags', KCSUBTYPE_TYPE.KC_ST_UINT32, 92),
+    KCSubTypeElement.FromBasicCtype('base_priority', KCSUBTYPE_TYPE.KC_ST_INT16, 96),
+    KCSubTypeElement.FromBasicCtype('sched_priority', KCSUBTYPE_TYPE.KC_ST_INT16, 98),
+    KCSubTypeElement.FromBasicCtype('ts_eqos', KCSUBTYPE_TYPE.KC_ST_UINT8, 100),
+    KCSubTypeElement.FromBasicCtype('ts_rqos', KCSUBTYPE_TYPE.KC_ST_UINT8, 101),
+    KCSubTypeElement.FromBasicCtype('ts_rqos_override', KCSUBTYPE_TYPE.KC_ST_UINT8, 102),
+    KCSubTypeElement.FromBasicCtype('io_tier', KCSUBTYPE_TYPE.KC_ST_UINT8, 103),
+),
+    'thread_snapshot_v2'
+)
+
+KNOWN_TYPES_COLLECTION[0x909] = KCSubTypeElement('pth_name', KCSUBTYPE_TYPE.KC_ST_CHAR, KCSubTypeElement.GetSizeForArray(64, 1), 0, 1)
+
+
+def _get_uuid_json_data(elementValues, elementName):
+    return '"<%s>"' % ''.join("%02x" % i for i in elementValues)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_LIBRARY_LOADINFO64')] = KCTypeDescription(GetTypeForName('KCDATA_TYPE_LIBRARY_LOADINFO64'), (
+    KCSubTypeElement('loadAddress', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 0, 0),
+    KCSubTypeElement('imageUUID', KCSUBTYPE_TYPE.KC_ST_UINT8, KCSubTypeElement.GetSizeForArray(16, 1), 8, 1, _get_uuid_json_data)
+),
+    'dyld_load_info'
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_LIBRARY_LOADINFO')] = KCTypeDescription(GetTypeForName('KCDATA_TYPE_LIBRARY_LOADINFO'), (
+    KCSubTypeElement('loadAddress', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 0, 0),
+    KCSubTypeElement('imageUUID', KCSUBTYPE_TYPE.KC_ST_UINT8, KCSubTypeElement.GetSizeForArray(16, 1), 4, 1, _get_uuid_json_data)
+),
+    'dyld_load_info'
+)
+
+KNOWN_TYPES_COLLECTION[0x908] = KCTypeDescription.FromKCTypeDescription(KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_LIBRARY_LOADINFO64')], 0x908, 'shared_cache_dyld_info')
+
+KNOWN_TYPES_COLLECTION[0x33] = KCSubTypeElement('mach_absolute_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 0, 0, KCSubTypeElement._get_naked_element_value)
+KNOWN_TYPES_COLLECTION[0x907] = KCSubTypeElement.FromBasicCtype('donating_pids', KCSUBTYPE_TYPE.KC_ST_INT32)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('KCDATA_TYPE_USECS_SINCE_EPOCH')] = KCSubTypeElement('usecs_since_epoch', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 0, 0, KCSubTypeElement._get_naked_element_value)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_KERN_STACKFRAME')] = KCTypeDescription(GetTypeForName('STACKSHOT_KCTYPE_KERN_STACKFRAME'), (
+    KCSubTypeElement.FromBasicCtype('lr', KCSUBTYPE_TYPE.KC_ST_UINT32),
+    KCSubTypeElement.FromBasicCtype('sp', KCSUBTYPE_TYPE.KC_ST_UINT32, 4)
+),
+    'kernel_stack_frames'
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_USER_STACKFRAME')] = KCTypeDescription.FromKCTypeDescription(
+    KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_KERN_STACKFRAME')],
+    GetTypeForName('STACKSHOT_KCTYPE_USER_STACKFRAME'),
+    'user_stack_frames'
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_KERN_STACKFRAME64')] = KCTypeDescription(GetTypeForName('STACKSHOT_KCTYPE_KERN_STACKFRAME64'), (
+    KCSubTypeElement.FromBasicCtype('lr', KCSUBTYPE_TYPE.KC_ST_UINT64),
+    KCSubTypeElement.FromBasicCtype('sp', KCSUBTYPE_TYPE.KC_ST_UINT64, 8)
+),
+    'kernel_stack_frames'
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_USER_STACKFRAME64')] = KCTypeDescription.FromKCTypeDescription(
+    KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_KERN_STACKFRAME64')],
+    GetTypeForName('STACKSHOT_KCTYPE_USER_STACKFRAME64'),
+    'user_stack_frames'
+)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_OSVERSION')] = KCSubTypeElement('osversion', KCSUBTYPE_TYPE.KC_ST_CHAR,
+                          KCSubTypeElement.GetSizeForArray(256, 1), 0, 1)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_BOOTARGS')] = KCSubTypeElement('bootargs', KCSUBTYPE_TYPE.KC_ST_CHAR,
+                           KCSubTypeElement.GetSizeForArray(256, 1), 0, 1)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_KERN_PAGE_SIZE')] = KCSubTypeElement('kernel_page_size', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 0, 0, KCSubTypeElement._get_naked_element_value)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('STACKSHOT_KCTYPE_JETSAM_LEVEL')] = KCSubTypeElement('jetsam_level', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 0, 0, KCSubTypeElement._get_naked_element_value)
+
+
+#KNOWN_TYPES_COLLECTION[0x907] = KCSubTypeElement('donating_pids', KCSUBTYPE_TYPE.KC_ST_UINT32, 4, 0, 0, KCSubTypeElement._get_naked_element_value)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PID')] = KCSubTypeElement('pid', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PPID')] = KCSubTypeElement('ppid', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PROC_NAME')] = KCSubTypeElement('p_comm', KCSUBTYPE_TYPE.KC_ST_CHAR,
+                           KCSubTypeElement.GetSizeForArray(32, 1), 0, 1)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_USERSTACK')] = KCSubTypeElement('userstack_ptr', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_ARGSLEN')] = KCSubTypeElement('p_argslen', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PROC_PATH')] = KCSubTypeElement('p_path', KCSUBTYPE_TYPE.KC_ST_CHAR,
+                           KCSubTypeElement.GetSizeForArray(1024, 1), 0, 1)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PROC_CSFLAGS')] = KCSubTypeElement('p_csflags', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_UID')] = KCSubTypeElement('uid', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_GID')] = KCSubTypeElement('gid', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PROC_ARGC')] = KCSubTypeElement('argc', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PROC_FLAGS')] = KCSubTypeElement('p_flags', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_CPUTYPE')] = KCSubTypeElement('cputype', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_RESPONSIBLE_PID')] = KCSubTypeElement('responsible_pid', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_DIRTY_FLAGS')] = KCSubTypeElement('dirty_flags', KCSUBTYPE_TYPE.KC_ST_INT32, 4, 0, 0)
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_CRASHED_THREADID')] = KCSubTypeElement('crashed_threadid', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 0, 0)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_PROC_STATUS')] = KCSubTypeElement('p_status', KCSUBTYPE_TYPE.KC_ST_UINT8, 1, 0, 0)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_BSDINFOWITHUNIQID')] = KCTypeDescription(GetTypeForName('TASK_CRASHINFO_BSDINFOWITHUNIQID'),
+    (   KCSubTypeElement('p_uuid', KCSUBTYPE_TYPE.KC_ST_UINT8, KCSubTypeElement.GetSizeForArray(16, 1), 0, 1),
+        KCSubTypeElement.FromBasicCtype('p_uniqueid', KCSUBTYPE_TYPE.KC_ST_UINT64, 16),
+        KCSubTypeElement.FromBasicCtype('p_puniqueid', KCSUBTYPE_TYPE.KC_ST_UINT64, 24)
+    ),
+    'proc_uniqidentifierinfo')
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_EXCEPTION_CODES')] = KCSubTypeElement('TASK_CRASHINFO_EXCEPTION_CODES', KCSUBTYPE_TYPE.KC_ST_INT64,
+    KCSubTypeElement.GetSizeForArray(2,8), 0, 1)
+
+KNOWN_TYPES_COLLECTION[GetTypeForName('TASK_CRASHINFO_RUSAGE_INFO')] = KCTypeDescription(GetTypeForName('TASK_CRASHINFO_RUSAGE_INFO'),
+    (
+        KCSubTypeElement('ri_uuid', KCSUBTYPE_TYPE.KC_ST_UINT8, KCSubTypeElement.GetSizeForArray(16, 1), 0, 1),
+            KCSubTypeElement.FromBasicCtype('ri_user_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 16),
+            KCSubTypeElement.FromBasicCtype('ri_system_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 24),
+            KCSubTypeElement.FromBasicCtype('ri_pkg_idle_wkups', KCSUBTYPE_TYPE.KC_ST_UINT64, 32),
+            KCSubTypeElement.FromBasicCtype('ri_interrupt_wkups', KCSUBTYPE_TYPE.KC_ST_UINT64, 40),
+            KCSubTypeElement.FromBasicCtype('ri_pageins', KCSUBTYPE_TYPE.KC_ST_UINT64, 48),
+            KCSubTypeElement.FromBasicCtype('ri_wired_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 56),
+            KCSubTypeElement.FromBasicCtype('ri_resident_size', KCSUBTYPE_TYPE.KC_ST_UINT64, 64),
+            KCSubTypeElement.FromBasicCtype('ri_phys_footprint', KCSUBTYPE_TYPE.KC_ST_UINT64, 72),
+            KCSubTypeElement.FromBasicCtype('ri_proc_start_abstime', KCSUBTYPE_TYPE.KC_ST_UINT64, 80),
+            KCSubTypeElement.FromBasicCtype('ri_proc_exit_abstime', KCSUBTYPE_TYPE.KC_ST_UINT64, 88),
+            KCSubTypeElement.FromBasicCtype('ri_child_user_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 96),
+            KCSubTypeElement.FromBasicCtype('ri_child_system_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 104),
+            KCSubTypeElement.FromBasicCtype('ri_child_pkg_idle_wkups', KCSUBTYPE_TYPE.KC_ST_UINT64, 112),
+            KCSubTypeElement.FromBasicCtype('ri_child_interrupt_wkups', KCSUBTYPE_TYPE.KC_ST_UINT64, 120),
+            KCSubTypeElement.FromBasicCtype('ri_child_pageins', KCSUBTYPE_TYPE.KC_ST_UINT64, 128),
+            KCSubTypeElement.FromBasicCtype('ri_child_elapsed_abstime', KCSUBTYPE_TYPE.KC_ST_UINT64, 136),
+            KCSubTypeElement.FromBasicCtype('ri_diskio_bytesread', KCSUBTYPE_TYPE.KC_ST_UINT64, 144),
+            KCSubTypeElement.FromBasicCtype('ri_diskio_byteswritten', KCSUBTYPE_TYPE.KC_ST_UINT64, 152),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_default', KCSUBTYPE_TYPE.KC_ST_UINT64, 160),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_maintenance', KCSUBTYPE_TYPE.KC_ST_UINT64, 168),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_background', KCSUBTYPE_TYPE.KC_ST_UINT64, 176),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_utility', KCSUBTYPE_TYPE.KC_ST_UINT64, 184),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_legacy', KCSUBTYPE_TYPE.KC_ST_UINT64, 192),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_user_initiated', KCSUBTYPE_TYPE.KC_ST_UINT64, 200),
+            KCSubTypeElement.FromBasicCtype('ri_cpu_time_qos_user_interactive', KCSUBTYPE_TYPE.KC_ST_UINT64, 208),
+            KCSubTypeElement.FromBasicCtype('ri_billed_system_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 216),
+            KCSubTypeElement.FromBasicCtype('ri_serviced_system_time', KCSUBTYPE_TYPE.KC_ST_UINT64, 224)
+    ),
+    'rusage_info_v3')
+
+def GetSecondsFromMATime(mat, tb):
+    return (float(mat) * tb['numerator']) / tb['denominator']
+
+def FindLibraryForAddress(liblist, address):
+    current_lib = None
+    for l in liblist:
+        if address >= l[1]:
+            current_lib = l
+    return current_lib
+
+def FindIndexOfLibInCatalog(catalog, lib):
+    index = None
+    i = 0
+    for l in catalog:
+        if l[0] == lib[0] and l[1] == lib[1]:
+            index = i
+            break
+        i += 1
+
+    if index is None:
+        catalog.append(lib)
+        index = len(catalog) - 1
+
+    return index
+
+def GetOffsetOfAddressForLib(lib, address):
+    return (address - lib[1])
+
+def GetSymbolInfoForFrame(catalog, liblist, address):
+    lib = FindLibraryForAddress(liblist, address)
+    if not lib:
+        lib = ["00000000000000000000000000000000",0,"A"]
+    offset = GetOffsetOfAddressForLib(lib, address)
+    index = FindIndexOfLibInCatalog(catalog, lib)
+    return [index, offset]
+
+def GetStateDescription(s):
+    retval = []
+    TH_WAIT = 0x01
+    TH_SUSP = 0x02
+    TH_RUN = 0x04
+    TH_UNINT = 0x08
+    TH_TERMINATE = 0x10
+    TH_TERMINATE2 = 0x20
+    TH_IDLE = 0x80
+    if (s & TH_WAIT):
+        retval.append("TH_WAIT")
+    if (s & TH_SUSP):
+        retval.append("TH_SUSP")
+    if (s & TH_RUN):
+        retval.append("TH_RUN")
+    if (s & TH_UNINT):
+        retval.append("TH_UNINT")
+    if (s & TH_TERMINATE):
+        retval.append("TH_TERMINATE")
+    if (s & TH_TERMINATE2):
+        retval.append("TH_TERMINATE2")
+    if (s & TH_IDLE):
+        retval.append("TH_IDLE")
+    return retval
+
+def SaveStackshotReport(j, outfile_name, dsc_uuid, dsc_libs_arr):
+    import time
+    from operator import itemgetter, attrgetter
+    ss = j.get('KCDATA_BUFFER_BEGIN_STACKSHOT')
+    if not ss:
+        print "No KCDATA_BUFFER_BEGIN_STACKSHOT object found. Skipping writing report."
+        return
+    timestamp = ss.get('usecs_since_epoch', int(time.time()))
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S %z",time.gmtime(timestamp))
+    os_version = ss.get('osversion', 'Unknown')
+    timebase = ss.get('timebase_info', {"denominator": 1, "numerator": 1})
+    dsc_common = [ss.get('shared_cache_dyld_info')['imageUUID'].strip('<>'),
+                  ss.get('shared_cache_dyld_info')['loadAddress'],
+                  "C"
+                 ]
+
+    dsc_libs = []
+    if dsc_common[0].replace('-', '').lower() == dsc_uuid:
+        print "SUCCESS: Found Matching dyld shared cache uuid. Loading library load addresses from layout provided."
+        _load_addr = dsc_common[1]
+        #print _load_addr
+        #print dsc_libs_arr
+        for i in dsc_libs_arr:
+            _uuid = i[2].lower().replace('-','').strip()
+            _addr = int(i[0], 16) + _load_addr
+            dsc_libs.append([_uuid, _addr, "P"])
+            #print "adding ", [_uuid, _addr, "C"]
+
+    AllImageCatalog = []
+    obj = {}
+    obj["kernel"] = os_version
+    obj["date"] = timestamp
+    obj["reason"] = "kernel panic stackshot"
+    obj["incident"] = "ABCDEFGH-1234-56IJ-789K-0LMNOPQRSTUV"
+    obj["crashReporterKey"] = "12ab34cd45aabbccdd6712ab34cd45aabbccdd67"
+    obj["bootArgs"] = ss.get('bootargs','')
+    obj["frontmostPids"] = [0]
+    obj["exception"] = "0xDEADF157"
+    obj["processByPid"] = {}
+    processByPid = obj["processByPid"]
+    ssplist = ss.get('STACKSHOT_KCCONTAINER_TASK', {})
+    kern_load_info = []
+    if "0" in ssplist:
+        kl_infos = ssplist["0"].get("dyld_load_info", [])
+        for dlinfo in kl_infos:
+            kern_load_info.append([dlinfo['imageUUID'].strip('<>'), dlinfo['loadAddress'], "K"])
+    for pid,piddata in ssplist.iteritems():
+        processByPid[str(pid)] = {}
+        tsnap = processByPid[str(pid)]
+        pr_lib_dsc = dsc_common
+        if 'shared_cache_dyld_info' in tsnap:
+            pr_lib_dsc = [tsnap.get('shared_cache_dyld_info')['imageUUID'].strip('<>'),
+                          tsnap.get('shared_cache_dyld_info')['loadAddress'],
+                          "C"
+                         ]
+
+        pr_libs = []
+        if len(dsc_libs) == 0:
+            pr_libs.append(pr_lib_dsc)
+        _lib_type = "P"
+        if int(pid) == 0:
+            _lib_type = "K"
+            pr_libs = []
+        else:
+            for dlinfo in piddata.get('dyld_load_info',[]):
+                pr_libs.append([dlinfo['imageUUID'].strip('<>'), dlinfo['loadAddress'], _lib_type])
+
+        pr_libs.extend(kern_load_info)
+        pr_libs.extend(dsc_libs)
+
+        pr_libs.sort(key=itemgetter(1))
+
+        tasksnap = piddata['task_snapshot_v2']
+        tsnap["pid"] = tasksnap["pid"]
+        tsnap["residentMemoryBytes"] = tasksnap["task_size"]
+        tsnap["timesDidThrottle"] = tasksnap["did_throttle"]
+        tsnap["systemTimeTask"] = GetSecondsFromMATime(tasksnap["system_time_in_terminated_threads"], timebase)
+        tsnap["pageIns"] = tasksnap["pageins"]
+        tsnap["pageFaults"] = tasksnap["faults"]
+        tsnap["userTimeTask"] = GetSecondsFromMATime(tasksnap["user_time_in_terminated_threads"], timebase)
+        tsnap["procname"] = tasksnap["p_comm"]
+        tsnap["copyOnWriteFaults"] = tasksnap["cow_faults"]
+        tsnap["timesThrottled"] = tasksnap["was_throttled"]
+        tsnap["threadById"] = {}
+        threadByID = tsnap["threadById"]
+        thlist = piddata.get('STACKSHOT_KCCONTAINER_THREAD', {})
+        for tid,thdata in thlist.iteritems():
+            threadByID[str(tid)] = {}
+            thsnap = threadByID[str(tid)]
+            threadsnap = thdata["thread_snapshot_v2"]
+            thsnap["userTime"] = GetSecondsFromMATime(threadsnap["user_time"], timebase)
+            thsnap["id"] = threadsnap["thread_id"]
+            thsnap["basePriority"] = threadsnap["base_priority"]
+            thsnap["systemTime"] = threadsnap["sys_time"]
+            thsnap["schedPriority"] = threadsnap["sched_priority"]
+            thsnap["state"] = GetStateDescription(threadsnap['state'])
+            thsnap["qosEffective"] = threadsnap["ts_eqos"]
+            thsnap["qosRequested"] = threadsnap["ts_rqos"]
+
+            if threadsnap['continuation']:
+                thsnap["continuation"] = GetSymbolInfoForFrame(AllImageCatalog, pr_libs, threadsnap['continuation'])
+            if "kernel_stack_frames" in thdata:
+                kuserframes = []
+                for f in thdata["kernel_stack_frames"]:
+                    kuserframes.append(GetSymbolInfoForFrame(AllImageCatalog, pr_libs, f['lr']))
+                thsnap["kernelFrames"] = kuserframes
+
+            if "user_stack_frames" in thdata:
+                uframes = []
+                for f in thdata["user_stack_frames"]:
+                    uframes.append(GetSymbolInfoForFrame(AllImageCatalog, pr_libs, f['lr']))
+                thsnap["userFrames"] = uframes
+            if threadsnap['wait_event']:
+                thsnap["waitEvent"] = GetSymbolInfoForFrame(AllImageCatalog, pr_libs, threadsnap['wait_event'])
+
+    obj['binaryImages'] = AllImageCatalog
+    fh = open(outfile_name, "w")
+    fh.write('{"bug_type":"288", "timestamp":"'+ timestamp +'", "os_version":"'+ os_version +'"}\n')
+    fh.write(json.dumps(obj, sort_keys=False, indent=2, separators=(',', ': ')))
+    fh.close()
+
+## Base utils for interacting with shell ##
+def RunCommand(bash_cmd_string, get_stderr = True):
+    """
+        returns: (int,str) : exit_code and output_str
+    """
+    print "RUNNING: %s" % bash_cmd_string
+    cmd_args = shlex.split(bash_cmd_string)
+    output_str = ""
+    exit_code = 0
+    try:
+        if get_stderr:
+            output_str = subprocess.check_output(cmd_args, stderr=subprocess.STDOUT)
+        else:
+            output_str = subprocess.check_output(cmd_args, stderr=None)
+    except subprocess.CalledProcessError, e:
+        exit_code = e.returncode
+    finally:
+        return (exit_code, output_str)
+
+def ProcessDyldSharedCacheFile(shared_cache_file_path, sdk_str=""):
+    """ returns (uuid, text_info) output from shared_cache_util.
+                In case of error None is returned and err message is printed to stdout.
+    """
+    if not os.path.exists(shared_cache_file_path):
+        print "File path: %s does not exists" % shared_cache_file_path
+        return None
+    if sdk_str:
+        sdk_str = ' -sdk "%s" ' % sdk_str
+    (c, so) = RunCommand("xcrun {} -find dyld_shared_cache_util".format(sdk_str))
+    if c:
+        print "Failed to find path to dyld_shared_cache_util. Exit code: %d , message: %s" % (c,so)
+        return None
+    dyld_shared_cache_util = so.strip()
+    (c, so) = RunCommand("{} -info {}".format(dyld_shared_cache_util, shared_cache_file_path))
+    if c:
+        print "Failed to get uuid info from %s" % shared_cache_file_path
+        print so
+        return None
+
+    uuid = so.splitlines()[0].split(": ")[-1].strip().replace("-","").lower()
+    
+    (c, so) = RunCommand("{} -text_info {}".format(dyld_shared_cache_util, shared_cache_file_path))
+    if c:
+        print "Failed to get text_info from %s" % shared_cache_file_path
+        print so
+        return None
+    
+    print "Found %s uuid: %s" % (shared_cache_file_path, uuid)
+    text_info = so
+
+    return (uuid, so)
+
+parser = argparse.ArgumentParser(description="Decode a kcdata binary file.")
+parser.add_argument("-l", "--listtypes", action="store_true", required=False, default=False,
+                    help="List all known types",
+                    dest="list_known_types")
+
+parser.add_argument("-s", "--stackshot", required=False, default=False,
+                    help="Generate a stackshot report file",
+                    dest="stackshot_file")
+
+parser.add_argument("-U", "--uuid", required=False, default="", help="UUID of dyld shared cache to be analysed and filled in libs of stackshot report", dest="uuid")
+parser.add_argument("-L", "--layout", required=False, type=argparse.FileType("r"), help="Path to layout file for DyldSharedCache. You can generate one by doing \n\tbash$xcrun -sdk <sdk> dyld_shared_cache_util -text_info </path/to/dyld_shared_cache> ", dest="layout")
+parser.add_argument("-S", "--sdk", required=False, default="", help="sdk property passed to xcrun command to find the required tools. Default is empty string.", dest="sdk")
+parser.add_argument("-D", "--dyld_shared_cache", required=False, default="", help="Path to dyld_shared_cache built by B&I", dest="dsc")
+parser.add_argument("kcdata_file", type=argparse.FileType('r'), help="Path to a kcdata binary file.")
+
+
+
+if __name__ == '__main__':
+    args = parser.parse_args()
+
+    if args.list_known_types:
+        for (n, t) in KNOWN_TYPES_COLLECTION.items():
+            print "%d : %s " % (n, str(t))
+        sys.exit(1)
+
+    file_name = args.kcdata_file.name
+    master_objs = []
+    master_container = None
+    current_container = None
+    for i in kcdata_item_iterator(file_name):
+        #print "processed " + str(i)
+        o = KCObject.FromKCItem(i)
+        if o.IsContainerType():
+            o = KCContainerObject(i.i_type, i.i_data, i.i_flags)
+
+        if current_container is None:
+            master_objs.append(o)
+            current_container = o
+            master_container = o
+        else:
+            current_container.AddObject(o)
+
+        if o.IsContainerType():
+            master_objs.append(current_container)
+            current_container = o
+
+        if o.IsContainerEnd():
+            current_container = master_objs.pop()
+    str_data = "{" + master_container.GetJsonRepr() + "}"
+    try:
+        json_obj = json.loads(str_data)
+        dsc_uuid = None
+        dsc_libs_arr = []
+        libs_re = re.compile("^\s*(0x[a-fA-F0-9]+)\s->\s(0x[a-fA-F0-9]+)\s+<([a-fA-F0-9\-]+)>\s+.*$", re.MULTILINE)
+        if args.uuid and args.layout:
+            dsc_uuid = args.uuid.strip().replace("-",'').lower()
+            dsc_libs_arr = libs_re.findall(args.layout.read())
+
+        if args.dsc:
+            _ret = ProcessDyldSharedCacheFile(args.dsc, args.sdk)
+            if _ret:
+                dsc_uuid = _ret[0]
+                dsc_libs_arr = libs_re.findall(_ret[1])
+
+        if args.stackshot_file:
+            SaveStackshotReport(json_obj, args.stackshot_file, dsc_uuid, dsc_libs_arr)
+        else:
+            print json.dumps(json_obj, sort_keys=True, indent=4, separators=(',', ': '))
+
+    except Exception, e:
+        raise
+        print e
+        print "--------------------------------------------"*3
+        print str_data
index 66325463b81d8a8c4f01e3f974354845a272a0dd..57f4a712fdd14ddea5d392452f276a5169c11d99 100644 (file)
@@ -5,8 +5,8 @@ import ctypes
 MBSHIFT = 20
 MSIZE = 256
 MCLBYTES = 2048
+MBIGCLBYTES = 4096
 M16KCLBYTES = 16384
-NBPG = 4096
 
 MB_SCVALID = 4
 
@@ -26,5 +26,3 @@ kgm_manual_pkt_arm    = 0xFFFF04A0
 kgm_kdp_pkt_data_len   = 128
 
 MCF_NOCPUCACHE = 0x10
-
-NSLABSPMB = 256
index f6ab3002af59a63377b6a8388125fc351b5de8d6..c16456a0089ac4192e848b039d5422772fdecd26 100644 (file)
@@ -42,32 +42,6 @@ def MBufStat(cmd_args=None):
                                   )
 # EndMacro: mbuf_stat
 
-# Macro: mbuf_walk_mleak_traces
-@lldb_command('mbuf_walk_mleak_traces')
-def MbufWalkMleakTraces(cmd_args=None):
-    """ Print mleak_traces
-    """
-    i = 0
-    while (i<256):
-        trace = kern.globals.mleak_traces[i]
-        out_string = ""
-        if (trace.allocs != 0):
-            print "Index: " + str(i)
-            out_string += ":" + str(trace.allocs) + " outstanding allocs\n"
-            out_string += str(trace.hitcount) + " hitcount\n"
-            out_string += str(trace.collisions) + " collisions\n"
-            out_string += "Backtrace saved " + str(trace.depth) + " deep\n"
-            if (trace.depth != 0):
-                cnt = 0
-                while (cnt < trace.depth):
-                    out_string += str(cnt + 1) + ": "
-                    out_string += GetPc(trace.addr[cnt])
-                    out_string += "\n"
-                    cnt += 1
-            print out_string
-        i +=1
-# EndMacro: mbuf_walk_mleak_traces
-
 # Macro: mbuf_walkpkt
 @lldb_command('mbuf_walkpkt')
 def MbufWalkPacket(cmd_args=None):
@@ -122,10 +96,7 @@ def MbufBuf2Slab(cmd_args=None):
         raise ArgumentError("Missing argument 0 in user function.")
 
     m = kern.GetValueFromAddress(cmd_args[0], 'mbuf *')
-    gix = (m - Cast(kern.globals.mbutl, 'char *')) >> MBSHIFT
-    slabstbl = kern.globals.slabstbl
-    ix = (m - Cast(slabstbl[int(gix)].slg_slab[0].sl_base, 'char *')) >> 12
-    slab = addressof(slabstbl[int(gix)].slg_slab[int(ix)])
+    slab = GetMbufSlab(m)
     if (kern.ptrsize == 8):
         mbuf_slab_format = "0x{0:<16x}"
         print mbuf_slab_format.format(slab)
@@ -166,19 +137,17 @@ def MbufSlabs(cmd_args=None):
         out_string += "slot slab       next       obj        mca        tstamp     C  R  N   size flags\n"
         out_string += "---- ---------- ---------- ---------- ---------- ---------- -- -- -- ------ -----\n"
 
-    mbutl = cast(kern.globals.mbutl, 'union mbigcluster *')
-    while x < NSLABSPMB:
+    mbutl = cast(kern.globals.mbutl, 'unsigned char *')
+    nslabspmb = int((1 << MBSHIFT) >> unsigned(kern.globals.page_shift))
+    while x < nslabspmb:
         sl = addressof(slg.slg_slab[x])
         mca = 0
         obj = sl.sl_base
         ts = 0
 
         if (kern.globals.mclaudit != 0):
-            ix = (obj - Cast(kern.globals.mbutl, 'char *')) >> 12
-            clbase = mbutl + (sizeof(dereference(mbutl)) * ix)
-            mclidx = (obj - clbase) >> 8
-            mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)]
-            trn = (mca.mca_next_trn + kern.globals.mca_trn_max - 1) % kern.globals.mca_trn_max
+            mca = GetMbufMcaPtr(obj, sl.sl_class)
+            trn = (mca.mca_next_trn + unsigned(kern.globals.mca_trn_max) - 1) % unsigned(kern.globals.mca_trn_max)
             ts = mca.mca_trns[trn].mca_tstamp
 
         out_string += slabs_string_format.format((x + 1), sl, sl.sl_next, obj, hex(mca), int(ts), int(sl.sl_class), int(sl.sl_refcnt), int(sl.sl_chunks), int(sl.sl_len), hex(sl.sl_flags))
@@ -204,11 +173,8 @@ def MbufSlabs(cmd_args=None):
                 ts = 0
 
                 if (kern.globals.mclaudit != 0):
-                    ix = (obj - Cast(kern.globals.mbutl, 'char *')) >> 12
-                    clbase = mbutl + (sizeof(dereference(mbutl)) * ix)
-                    mclidx = (obj - clbase) >> 8
-                    mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)]
-                    trn = (mca.mca_next_trn + kern.globals.mca_trn_max - 1) % kern.globals.mca_trn_max
+                    mca = GetMbufMcaPtr(obj, sl.sl_class)
+                    trn = (mca.mca_next_trn + unsigned(kern.globals.mca_trn_max) - 1) % unsigned(kern.globals.mca_trn_max)
                     ts = mca.mca_trns[trn].mca_tstamp
 
                 if (kern.ptrsize == 8):
@@ -238,6 +204,7 @@ def MbufSlabsTbl(cmd_args=None):
 
     slabstbl = kern.globals.slabstbl
     slabs_table_blank_string_format = "{0:>3d}: - \n"
+    nslabspmb = int(((1 << MBSHIFT) >> unsigned(kern.globals.page_shift)))
     while (x < unsigned(kern.globals.maxslabgrp)):
         slg = slabstbl[x]
         if (slg == 0):
@@ -245,23 +212,43 @@ def MbufSlabsTbl(cmd_args=None):
         else:
             if (kern.ptrsize == 8):
                 slabs_table_string_format = "{0:>3d}: 0x{1:16x}  [ 0x{2:16x} - 0x{3:16x} ]\n"
-                out_string += slabs_table_string_format.format(x+1, slg, addressof(slg.slg_slab[0]), addressof(slg.slg_slab[NSLABSPMB-1]))
+                out_string += slabs_table_string_format.format(x+1, slg, addressof(slg.slg_slab[0]), addressof(slg.slg_slab[nslabspmb-1]))
             else:
                 slabs_table_string_format = "{0:>3d}: 0x{1:8x}  [ 0x{2:8x} - 0x{3:8x} ]\n"
-                out_string += slabs_table_string_format.format(x+1, slg, addressof(slg.slg_slab[0]), addressof(slg.slg_slab[NSLABSPMB-1]))
+                out_string += slabs_table_string_format.format(x+1, slg, addressof(slg.slg_slab[0]), addressof(slg.slg_slab[nslabspmb-1]))
 
         x += 1
     print out_string
 # EndMacro: mbuf_slabstbl
 
+def GetMbufMcaPtr(m, cl):
+    pgshift = int(kern.globals.page_shift)
+    ix = int((m - Cast(kern.globals.mbutl, 'char *')) >> pgshift)
+    page_addr = (Cast(kern.globals.mbutl, 'char *') + (ix << pgshift))
+
+    if (int(cl) == 0):
+        midx = int((m - page_addr) >> 8)
+        mca = kern.globals.mclaudit[ix].cl_audit[midx]
+    elif (int(cl) == 1):
+        midx = int((m - page_addr) >> 11)
+        mca = kern.globals.mclaudit[ix].cl_audit[midx]
+    elif (int(cl) == 2):
+        midx = int((m - page_addr) >> 12)
+        mca = kern.globals.mclaudit[ix].cl_audit[midx]
+    else:
+        mca = kern.globals.mclaudit[ix].cl_audit[0]
+    return Cast(mca, 'mcache_audit_t *')
+
+def GetMbufSlab(m):
+    pgshift = int(kern.globals.page_shift)
+    gix = int((Cast(m, 'char *') - Cast(kern.globals.mbutl, 'char *')) >> MBSHIFT)
+    slabstbl = kern.globals.slabstbl
+    ix = int((Cast(m, 'char *') - Cast(slabstbl[gix].slg_slab[0].sl_base, 'char *')) >> pgshift)
+    return addressof(slabstbl[gix].slg_slab[ix])
 
 def GetMbufBuf2Mca(m):
-    ix = (m - Cast(kern.globals.mbutl, 'char *')) >> 12
-    #mbutl = Cast(kern.globals.mbutl, 'union mbigcluster *')
-    mbutl = cast(kern.globals.mbutl, 'union mbigcluster *')
-    clbase = mbutl + (sizeof(dereference(mbutl)) * ix)
-    mclidx = (m - clbase) >> 8
-    mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)]
+    sl = GetMbufSlab(m)
+    mca = GetMbufMcaPtr(m, sl.sl_class)
     return str(mca)
 
 def GetMbufWalkAllSlabs(show_a, show_f, show_tr):
@@ -288,18 +275,15 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr):
         show_mca_string_format = "{0:4s} {1:4s} {2:8s} {3:8s} {4:8} {5:12s} {6:12s}"
         out_string += show_mca_string_format.format("slot", "idx", "slab address", "mca address", "obj address", "type", "allocation state\n")
 
+    nslabspmb = unsigned((1 << MBSHIFT) >> unsigned(kern.globals.page_shift))
     while (x < unsigned(kern.globals.slabgrp)):
         slg = kern.globals.slabstbl[x]
         y = 0
         stop = 0
-        while ((y < NSLABSPMB) and (stop == 0)):
+        while ((y < nslabspmb) and (stop == 0)):
             sl = addressof(slg.slg_slab[y])
             base = sl.sl_base
-            mbutl = cast(kern.globals.mbutl, 'union mbigcluster *')
-            ix = (base - mbutl) >> 12
-            clbase = mbutl + (sizeof(dereference(mbutl)) * ix)
-            mclidx = (base - clbase) >> 8
-            mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)]
+            mca = GetMbufMcaPtr(base, sl.sl_class)
             first = 1
 
             while ((Cast(mca, 'int') != 0) and (unsigned(mca.mca_addr) != 0)):
@@ -344,7 +328,6 @@ def GetMbufWalkAllSlabs(show_a, show_f, show_tr):
                     total = total + 1
 
                     if (show_tr != 0):
-                        idx = int(show_tr)
                         trn = (mca.mca_next_trn + idx - 1) % unsigned(kern.globals.mca_trn_max)
                         out_string += "Transaction " + str(int(trn)) + " at " + str(int(mca.mca_trns[int(trn)].mca_tstamp)) + " by thread: 0x" + str(hex(mca.mca_trns[int(trn)].mca_thread)) + ":\n"
                         cnt = 0
@@ -388,7 +371,7 @@ def GetMbufMcaCtype(mca, vopt):
         else:
             out_string += "CL     "
         return out_string
-    if (csize == NBPG):
+    if (csize == MBIGCLBYTES):
         if (vopt):
             out_string += "BCL (4K cluster) "
         else:
@@ -422,7 +405,7 @@ def GetMbufMcaCtype(mca, vopt):
                     out_string += "(unpaired 2K cluster, mbuf) "
         return out_string
 
-    if (csize == (MSIZE + NBPG)):
+    if (csize == (MSIZE + MBIGCLBYTES)):
         if (mca.mca_uflags & MB_SCVALID):
             if (mca.mca_uptr):
                 out_string += "M+BCL  "
@@ -543,6 +526,7 @@ def MbufShowMca(cmd_args=None):
     """ Print the contents of an mbuf mcache audit structure
     """
     out_string = ""
+    pgshift = unsigned(kern.globals.page_shift)
     if cmd_args:
         mca = kern.GetValueFromAddress(cmd_args[0], 'mcache_audit_t *')
         cp = mca.mca_cache
@@ -550,9 +534,9 @@ def MbufShowMca(cmd_args=None):
         out_string += GetMbufMcaCtype(mca, 1)
         out_string += "\nControlling mcache :\t" + hex(mca.mca_cache) + " (" + str(cp.mc_name) + ")\n"
         if (mca.mca_uflags & MB_SCVALID):
-            mbutl = cast(kern.globals.mbutl, 'union mbigcluster *')
-            ix = (mca.mca_addr - mbutl) >> 12
-            clbase = mbutl + (sizeof(dereference(mbutl)) * ix)
+            mbutl = Cast(kern.globals.mbutl, 'unsigned char *')
+            ix = (mca.mca_addr - mbutl) >> pgshift
+            clbase = mbutl + (ix << pgshift)
             mclidx = (mca.mca_addr - clbase) >> 8
             out_string += "mbuf obj :\t\t" + hex(mca.mca_addr) + "\n"
             out_string += "mbuf index :\t\t" + str(mclidx + 1) + " (out of 16) in cluster base " + hex(clbase) + "\n"
@@ -566,7 +550,7 @@ def MbufShowMca(cmd_args=None):
                 peer_mca = cast(mca.mca_uptr, 'mcache_audit_t *')
                 out_string += "paired mbuf obj :\t" + hex(peer_mca.mca_addr) + " (mca " + hex(peer_mca) + ")\n"
 
-        for idx in range(kern.globals.mca_trn_max, 0, -1):
+        for idx in range(unsigned(kern.globals.mca_trn_max), 0, -1):
                 trn = (mca.mca_next_trn + idx - 1) % unsigned(kern.globals.mca_trn_max)
                 out_string += "transaction {:d} (tstamp {:d}, thread 0x{:x}):\n".format(trn, mca.mca_trns[trn].mca_tstamp, mca.mca_trns[trn].mca_thread)
                 cnt = 0
index ff5ce0ae22f06cb739d067636b932c5253c358bc..b3d4dccc9faa589224241aca01f2ab4897ee186b 100644 (file)
@@ -128,8 +128,8 @@ def ShowMemoryStatus(cmd_args=None):
 # Macro: zprint
 
 @lldb_type_summary(['zone','zone_t'])
-@header("{:^18s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s}({:>6s} {:>6s} {:>6s}) {:^14s} {:<20s}".format(
-'ZONE', 'TOT_SZ', 'PAGE_COUNT', 'ALLOC_ELTS', 'FREE_ELTS', 'FREE_SZ', 'ELT_SZ', 'ALLOC', 'ELTS', 'PGS', 'SLK', 'FLAGS', 'NAME'))
+@header("{:^18s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s}({:>6s} {:>6s} {:>6s}) {:^15s} {:<20s}".format(
+'ZONE', 'TOT_SZ', 'PAGE_COUNT', 'ALLOC_ELTS', 'FREE_ELTS', 'FREE_SZ', 'ELT_SZ', 'ALLOC', 'ELTS', 'PGS', 'WASTE', 'FLAGS', 'NAME'))
 def GetZoneSummary(zone):
     """ Summarize a zone with important information. See help zprint for description of each field
         params: 
@@ -138,28 +138,39 @@ def GetZoneSummary(zone):
           str - summary of the zone
     """
     out_string = ""
-    format_string = '{:#018x} {:10d} {:10d} {:10d} {:10d} {:10d} {:10d} {:10d} {:6d} {:6d} {:6d}  {markings} {name:s} ' 
-    pagesize = 4096
+    format_string = '{:#018x} {:10d} {:10d} {:10d} {:10d} {:10d} {:10d} {:10d} {:6d} {:6d} {:6d}  {markings} {name:s} '
+    pagesize = kern.globals.page_size
     
-    free_elements = (zone.cur_size / zone.elem_size) - zone.count
+    free_elements = zone.countfree
     free_size = free_elements * zone.elem_size
     
-    alloc_count = zone.alloc_size / zone.elem_size
     alloc_pages = zone.alloc_size / pagesize
-    alloc_slack = zone.alloc_size % zone.elem_size
+    if zone.use_page_list :
+        metadata_size = sizeof('struct zone_page_metadata')
+        metadata_offset = metadata_size
+        if ((metadata_size % zone.elem_size) != 0) :
+            metadata_offset += zone.elem_size - (metadata_size % zone.elem_size)
+        alloc_count = ((pagesize - metadata_offset) / zone.elem_size) * alloc_pages
+        alloc_waste = metadata_offset * alloc_pages
+    else :
+        alloc_count = zone.alloc_size / zone.elem_size
+        alloc_waste = zone.alloc_size  % zone.elem_size
+
     marks = [
-            ["collectable",        "C"],
-            ["expandable",         "X"],
-            ["noencrypt",          "$"],
-            ["caller_acct",        "@"],
-            ["exhaustible",        "H"],
-            ["allows_foreign",     "F"],
-            ["async_prio_refill",  "R"],
-            ["no_callout",         "O"],
-            ["zleak_on",           "L"],
-            ["doing_alloc",        "A"],
-            ["waiting",            "W"],
-            ["doing_gc",           "G"]
+            ["collectable",                 "C"],
+            ["expandable",                  "X"],
+            ["noencrypt",                   "$"],
+            ["caller_acct",                 "@"],
+            ["exhaustible",                 "H"],
+            ["allows_foreign",              "F"],
+            ["async_prio_refill",           "R"],
+            ["no_callout",                  "O"],
+            ["zleak_on",                    "L"],
+            ["doing_alloc_without_vm_priv", "A"],
+            ["doing_alloc_with_vm_priv",    "S"],
+            ["waiting",                     "W"],
+            ["doing_gc",                    "G"],
+            ["use_page_list",               "P"]
             ]
     if kern.arch == 'x86_64':
         marks.append(["gzalloc_exempt",     "M"])
@@ -174,7 +185,7 @@ def GetZoneSummary(zone):
     out_string += format_string.format(zone, zone.cur_size, zone.page_count,
                     zone.count, free_elements, free_size,
                     zone.elem_size, zone.alloc_size, alloc_count,
-                    alloc_pages, alloc_slack, name = zone.zone_name, markings=markings)
+                    alloc_pages, alloc_waste, name = zone.zone_name, markings=markings)
     
     if zone.exhaustible :
             out_string += "(max: {:d})".format(zone.max_size)
@@ -196,10 +207,12 @@ def Zprint(cmd_args=None):
         R - will be refilled when below low water mark
         O - does not allow refill callout to fill zone on noblock allocation
         N - zone requires alignment (avoids padding this zone for debugging)
-        A - currently trying to allocate more backing memory from kernel_memory_allocate
+        A - currently trying to allocate more backing memory from kernel_memory_allocate without VM priv
+        S - currently trying to allocate more backing memory from kernel_memory_allocate with VM priv
         W - another thread is waiting for more memory
         L - zone is being monitored by zleaks
         G - currently running GC
+        P - uses zone_page_metadata
     """
     global kern
     print GetZoneSummary.header
@@ -839,6 +852,49 @@ def ShowIOAllocations(cmd_args=None):
 # EndMacro: showioalloc    
 
 
+# Macro: showselectmem
+@lldb_command('showselectmem', "S:")
+def ShowSelectMem(cmd_args=None, cmd_options={}):
+    """ Show memory cached by threads on calls to select.
+
+        usage: showselectmem [-v]
+            -v        : print each thread's memory
+                        (one line per thread with non-zero select memory)
+            -S {addr} : Find the thread whose thread-local select set
+                        matches the given address
+    """
+    verbose = False
+    opt_wqs = 0
+    if config['verbosity'] > vHUMAN:
+        verbose = True
+    if "-S" in cmd_options:
+        opt_wqs = unsigned(kern.GetValueFromAddress(cmd_options["-S"], 'uint64_t *'))
+        if opt_wqs == 0:
+            raise ArgumentError("Invalid waitq set address: {:s}".format(cmd_options["-S"]))
+    selmem = 0
+    if verbose:
+        print "{:18s} {:10s} {:s}".format('Task', 'Thread ID', 'Select Mem (bytes)')
+    for t in kern.tasks:
+        for th in IterateQueue(t.threads, 'thread *', 'task_threads'):
+            uth = Cast(th.uthread, 'uthread *');
+            wqs = 0
+            if hasattr(uth, 'uu_allocsize'): # old style
+                thmem = uth.uu_allocsize
+                wqs = uth.uu_wqset
+            elif hasattr(uth, 'uu_wqstate_sz'): # new style
+                thmem = uth.uu_wqstate_sz
+                wqs = uth.uu_wqset
+            else:
+                print "What kind of uthread is this?!"
+                return
+            if opt_wqs and opt_wqs == unsigned(wqs):
+                print "FOUND: {:#x} in thread: {:#x} ({:#x})".format(opt_wqs, unsigned(th), unsigned(th.thread_id))
+            if verbose and thmem > 0:
+                print "{:<#18x} {:<#10x} {:d}".format(unsigned(t), unsigned(th.thread_id), thmem)
+            selmem += thmem
+    print '-'*40
+    print "Total: {:d} bytes ({:d} kbytes)".format(selmem, selmem/1024)
+# Endmacro: showselectmem
  
  
 # Macro: showtaskvme
@@ -846,6 +902,8 @@ def ShowIOAllocations(cmd_args=None):
 def ShowTaskVmeHelper(cmd_args=None, cmd_options={}):
     """ Display a summary list of the specified vm_map's entries
         Usage: showtaskvme <task address>  (ex. showtaskvme 0x00ataskptr00 )
+        Use -S flag to show VM object shadow chains
+        Use -P flag to show pager info (mapped file, compressed pages, ...)
     """
     show_pager_info = False
     show_all_shadows = False
@@ -904,6 +962,7 @@ def ShowTaskVM(cmd_args=None):
 def ShowAllVMStats(cmd_args=None):
     """ Print a summary of vm statistics in a table format
     """
+    page_size = kern.globals.page_size
     vmstats = lambda:None
     vmstats.wired_count = 0
     vmstats.resident_count = 0
@@ -951,7 +1010,7 @@ def ShowAllVMStats(cmd_args=None):
         if vmstats.new_resident_count +vmstats.reusable != vmstats.resident_count:
             vmstats.error += '*'
 
-        print entry_format.format(p=proc, m=vmmap, vsize=(unsigned(vmmap.size) >> 12), t=task, s=vmstats)
+        print entry_format.format(p=proc, m=vmmap, vsize=(unsigned(vmmap.size) / page_size), t=task, s=vmstats)
         
 
 def ShowTaskVMEntries(task, show_pager_info, show_all_shadows):
@@ -1022,6 +1081,7 @@ def GetVMMapSummary(vmmap):
 @header("{0: <20s} {1: <20s} {2: <5s} {3: >7s} {4: <20s} {5: <20s}".format("entry", "start", "prot", "#page", "object", "offset"))
 def GetVMEntrySummary(vme):
     """ Display vm entry specific information. """
+    page_size = kern.globals.page_size
     out_string = ""
     format_string = "{0: <#020x} {1: <#20x} {2: <1x}{3: <1x}{4: <3s} {5: >7d} {6: <#020x} {7: <#020x}"
     vme_protection = int(vme.protection)
@@ -1031,8 +1091,8 @@ def GetVMEntrySummary(vme):
         vme_extra_info_str +="s"
     elif int(vme.needs_copy) != 0 :
         vme_extra_info_str +="n"
-    num_pages = (unsigned(vme.links.end) - unsigned(vme.links.start)) >> 12
-    out_string += format_string.format(vme, vme.links.start, vme_protection, vme_max_protection, vme_extra_info_str, num_pages, vme.object.vm_object, vme.offset)
+    num_pages = (unsigned(vme.links.end) - unsigned(vme.links.start)) / page_size
+    out_string += format_string.format(vme, vme.links.start, vme_protection, vme_max_protection, vme_extra_info_str, num_pages, vme.vme_object.vmo_object, vme.vme_offset)
     return out_string
 
 # EndMacro: showtaskvme
@@ -1509,6 +1569,8 @@ def GetVnodeLock(lockf):
         vnode_lock_output += ("{: <8s}").format('prov')
     if lockf_flags & 0x10:
         vnode_lock_output += ("{: <4s}").format('W')
+    if lockf_flags & 0x400:
+        vnode_lock_output += ("{: <8s}").format('ofd')
     else:
         vnode_lock_output += ("{: <4s}").format('.')
 
@@ -2114,6 +2176,7 @@ def ShowPurgeableNonVolatileVmObject(object, idx, queue_len, nonvolatile_total):
         returns:
             None
     """
+    page_size = kern.globals.page_size
     if object.purgable == 0:
         purgable = "N"
     elif object.purgable == 1:
@@ -2130,16 +2193,16 @@ def ShowPurgeableNonVolatileVmObject(object, idx, queue_len, nonvolatile_total):
         compressor_pager = Cast(object.pager, 'compressor_pager *')
         compressed_count = compressor_pager.cpgr_num_slots_occupied
 
-    print "{:>6d}/{:<6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d}   {:#018x} {:>6d} {:<20s}\n".format(idx,queue_len,object,purgable,object.ref_count,object.vo_un1.vou_size/kern.globals.page_size,object.resident_page_count,object.wired_page_count,compressed_count, object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner))
+    print "{:>6d}/{:<6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d}   {:#018x} {:>6d} {:<20s}\n".format(idx,queue_len,object,purgable,object.ref_count,object.vo_un1.vou_size/page_size,object.resident_page_count,object.wired_page_count,compressed_count, object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner))
 
     nonvolatile_total.objects += 1
-    nonvolatile_total.vsize += object.vo_un1.vou_size/kern.globals.page_size
+    nonvolatile_total.vsize += object.vo_un1.vou_size/page_size
     nonvolatile_total.rsize += object.resident_page_count
     nonvolatile_total.wsize += object.wired_page_count
     nonvolatile_total.csize += compressed_count
     if object.vo_un2.vou_purgeable_owner == 0:
         nonvolatile_total.disowned_objects += 1
-        nonvolatile_total.disowned_vsize += object.vo_un1.vou_size/kern.globals.page_size
+        nonvolatile_total.disowned_vsize += object.vo_un1.vou_size/page_size
         nonvolatile_total.disowned_rsize += object.resident_page_count
         nonvolatile_total.disowned_wsize += object.wired_page_count
         nonvolatile_total.disowned_csize += compressed_count
@@ -2219,6 +2282,7 @@ def ShowPurgeableVolatileVmObject(object, idx, volatile_total):
 #        diff=" !="
 #    else:
 #        diff="  "
+    page_size = kern.globals.page_size
     if object.purgable == 0:
         purgable = "N"
     elif object.purgable == 1:
@@ -2234,16 +2298,16 @@ def ShowPurgeableVolatileVmObject(object, idx, volatile_total):
     else:
         compressor_pager = Cast(object.pager, 'compressor_pager *')
         compressed_count = compressor_pager.cpgr_num_slots_occupied
-#    print "{:>6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d} {:#018x} {:>6d} {:<20s}   {:#018x} {:>6d} {:<20s} {:s}\n".format(idx,object,purgable,object.ref_count,object.vo_un1.vou_size/kern.globals.page_size,object.resident_page_count,object.wired_page_count,compressed_count,object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner),object.vo_purgeable_volatilizer,GetProcPIDForTask(object.vo_purgeable_volatilizer),GetProcNameForTask(object.vo_purgeable_volatilizer),diff)
-    print "{:>6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d}   {:#018x} {:>6d} {:<20s}\n".format(idx,object,purgable,object.ref_count,object.vo_un1.vou_size/kern.globals.page_size,object.resident_page_count,object.wired_page_count,compressed_count, object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner))
+#    print "{:>6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d} {:#018x} {:>6d} {:<20s}   {:#018x} {:>6d} {:<20s} {:s}\n".format(idx,object,purgable,object.ref_count,object.vo_un1.vou_size/page_size,object.resident_page_count,object.wired_page_count,compressed_count,object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner),object.vo_purgeable_volatilizer,GetProcPIDForTask(object.vo_purgeable_volatilizer),GetProcNameForTask(object.vo_purgeable_volatilizer),diff)
+    print "{:>6d} {:#018x} {:1s} {:>6d} {:>16d} {:>10d} {:>10d} {:>10d}   {:#018x} {:>6d} {:<20s}\n".format(idx,object,purgable,object.ref_count,object.vo_un1.vou_size/page_size,object.resident_page_count,object.wired_page_count,compressed_count, object.vo_un2.vou_purgeable_owner,GetProcPIDForTask(object.vo_un2.vou_purgeable_owner),GetProcNameForTask(object.vo_un2.vou_purgeable_owner))
     volatile_total.objects += 1
-    volatile_total.vsize += object.vo_un1.vou_size/kern.globals.page_size
+    volatile_total.vsize += object.vo_un1.vou_size/page_size
     volatile_total.rsize += object.resident_page_count
     volatile_total.wsize += object.wired_page_count
     volatile_total.csize += compressed_count
     if object.vo_un2.vou_purgeable_owner == 0:
         volatile_total.disowned_objects += 1
-        volatile_total.disowned_vsize += object.vo_un1.vou_size/kern.globals.page_size
+        volatile_total.disowned_vsize += object.vo_un1.vou_size/page_size
         volatile_total.disowned_rsize += object.resident_page_count
         volatile_total.disowned_wsize += object.wired_page_count
         volatile_total.disowned_csize += compressed_count
@@ -2278,20 +2342,6 @@ def GetCompressedPagesForObject(obj):
 #           compressor_slot += 1
 #   return compressed_pages
 
-@lldb_command('showallvme', "-PS")
-def ShowAllVME(cmd_args=None, cmd_options={}):
-    """ Routine to print a summary listing of all the vm map entries
-        Go Through each task in system and show the vm info
-    """
-    show_pager_info = False
-    show_all_shadows = False
-    if "-P" in cmd_options:
-        show_pager_info = True
-    if "-S" in cmd_options:
-        show_all_shadows = True
-    for task in kern.tasks:
-        ShowTaskVMEntries(task, show_pager_info, show_all_shadows)
-
 def ShowTaskVMEntries(task, show_pager_info, show_all_shadows):
     """  Routine to print out a summary listing of all the entries in a vm_map
         params: 
@@ -2311,6 +2361,8 @@ def ShowTaskVMEntries(task, show_pager_info, show_all_shadows):
 def ShowMapVME(cmd_args=None, cmd_options={}):
     """Routine to print out info about the specified vm_map and its vm entries
         usage: showmapvme <vm_map>
+        Use -S flag to show VM object shadow chains
+        Use -P flag to show pager info (mapped file, compressed pages, ...)
     """
     if cmd_args == None or len(cmd_args) < 1:
         print "Invalid argument.", ShowMap.__doc__
@@ -2325,32 +2377,78 @@ def ShowMapVME(cmd_args=None, cmd_options={}):
     showmapvme(map, show_pager_info, show_all_shadows)
 
 def showmapvme(map, show_pager_info, show_all_shadows):
+    page_size = kern.globals.page_size
     vnode_pager_ops = kern.globals.vnode_pager_ops
     vnode_pager_ops_addr = unsigned(addressof(vnode_pager_ops))
     rsize = 0
     if map.pmap != 0:
         rsize = int(map.pmap.stats.resident_count)
-    print "{:<18s} {:<18s} {:<18s} {:>10s} {:>10s} {:>18s}:{:<18s}".format("vm_map","pmap","size","#ents","rsize","start","end")
-    print "{:#018x} {:#018x} {:#018x} {:>10d} {:>10d} {:#018x}:{:#018x}".format(map,map.pmap,(map.size/4096),map.hdr.nentries,rsize,map.hdr.links.start,map.hdr.links.end)
+    print "{:<18s} {:<18s} {:<18s} {:>10s} {:>18s} {:>18s}:{:<18s}".format("vm_map","pmap","size","#ents","rsize","start","end")
+    print "{:#018x} {:#018x} {:#018x} {:>10d} {:>18d} {:#018x}:{:#018x}".format(map,map.pmap,unsigned(map.size),map.hdr.nentries,rsize,map.hdr.links.start,map.hdr.links.end)
     vme_list_head = map.hdr.links
     vme_ptr_type = GetType('vm_map_entry *')
-    print "{:<18s} {:>18s}:{:<18s} {:>10s} {:>3s} {:<10s} {:<18s} {:<18s}".format("entry","start","end","#pgs","tag","prot&flags","object","offset")
-    last_end = map.hdr.links.start
+    print "{:<18s} {:>18s}:{:<18s} {:>10s} {:<8s} {:<10s} {:<18s} {:<18s}".format("entry","start","end","#pgs","tag.kmod","prot&flags","object","offset")
+    last_end = unsigned(map.hdr.links.start)
     for vme in IterateQueue(vme_list_head, vme_ptr_type, "links"):
-        if vme.links.start != last_end:
-            print "{:18s} {:#018x}:{:#018x} {:>10d}".format("------------------",last_end,vme.links.start,(vme.links.start-last_end)/4096)
-        last_end = vme.links.end
+        if unsigned(vme.links.start) != last_end:
+            print "{:18s} {:#018x}:{:#018x} {:>10d}".format("------------------",last_end,vme.links.start,(unsigned(vme.links.start) - last_end)/page_size)
+        last_end = unsigned(vme.links.end)
+        size = unsigned(vme.links.end) - unsigned(vme.links.start)
+        object = vme.vme_object.vmo_object
+        if object == 0:
+            object_str = "{:<#018x}".format(object)
+        elif vme.is_sub_map:
+            if object == kern.globals.bufferhdr_map:
+                object_str = "BUFFERHDR_MAP"
+            elif object == kern.globals.mb_map:
+                object_str = "MB_MAP"
+            elif object == kern.globals.bsd_pageable_map:
+                object_str = "BSD_PAGEABLE_MAP"
+            elif object == kern.globals.ipc_kernel_map:
+                object_str = "IPC_KERNEL_MAP"
+            elif object == kern.globals.ipc_kernel_copy_map:
+                object_str = "IPC_KERNEL_COPY_MAP"
+            elif object == kern.globals.kalloc_map:
+                object_str = "KALLOC_MAP"
+            elif object == kern.globals.zone_map:
+                object_str = "ZONE_MAP"
+            elif hasattr(kern.globals, 'gzalloc_map') and object == kern.globals.gzalloc_map:
+                object_str = "GZALLOC_MAP"
+            elif hasattr(kern.globals, 'g_kext_map') and object == kern.globals.g_kext_map:
+                object_str = "G_KEXT_MAP"
+            elif hasattr(kern.globals, 'vector_upl_submap') and object == kern.globals.vector_upl_submap:
+                object_str = "VECTOR_UPL_SUBMAP"
+            else:
+                object_str = "submap:{:<#018x}".format(object)
+        else:
+            if object == kern.globals.kernel_object:
+                object_str = "KERNEL_OBJECT"
+            elif object == kern.globals.vm_submap_object:
+                object_str = "VM_SUBMAP_OBJECT"
+            elif object == kern.globals.compressor_object:
+                object_str = "COMPRESSOR_OBJECT"
+            else:
+                object_str = "{:<#018x}".format(object)
+        offset = unsigned(vme.vme_offset) & ~0xFFF
+        tag = unsigned(vme.vme_offset & 0xFFF)
         vme_flags = ""
         if vme.is_sub_map:
             vme_flags += "s"
-        print "{:#018x} {:#018x}:{:#018x} {:>10d} {:>3d} {:1d}{:1d}{:<8s} {:#018x} {:<#18x}".format(vme,vme.links.start,vme.links.end,(vme.links.end-vme.links.start)/4096,vme.alias,vme.protection,vme.max_protection,vme_flags,vme.object.vm_object,vme.offset)
-        if show_pager_info and vme.is_sub_map == 0 and vme.object.vm_object != 0:
-            object = vme.object.vm_object
+        if vme.needs_copy:
+            vme_flags += "n"
+        if vme.is_sub_map and vme.use_pmap:
+            vme_flags += "p"
+        tagstr = ""
+        if map.pmap == kern.globals.kernel_pmap:
+            xsite = Cast(kern.globals.vm_allocation_sites[tag],'OSKextAccount *')
+            if xsite and xsite.site.flags & 2:
+                tagstr = ".{:<3d}".format(xsite.loadTag)
+        print "{:#018x} {:#018x}:{:#018x} {:>10d} {:>3d}{:<4s}  {:1d}{:1d}{:<8s} {:<18s} {:<#18x}".format(vme,vme.links.start,vme.links.end,(unsigned(vme.links.end)-unsigned(vme.links.start))/page_size,tag,tagstr,vme.protection,vme.max_protection,vme_flags,object_str,offset)
+        if (show_pager_info or show_all_shadows) and vme.is_sub_map == 0 and vme.vme_object.vmo_object != 0:
+            object = vme.vme_object.vmo_object
         else:
             object = 0
         depth = 0
-        offset = unsigned(vme.offset)
-        size = vme.links.end - vme.links.start
         while object != 0:
             depth += 1
             if show_all_shadows == False and depth != 1 and object.shadow != 0:
@@ -2370,19 +2468,185 @@ def showmapvme(map, show_pager_info, show_all_shadows):
             else:
                 internal = "external"
             pager_string = ""
-            if show_pager_info and object.pager != 0:
+            pager = object.pager
+            if show_pager_info and pager != 0:
                 if object.internal:
                     pager_string = "-> compressed:{:d}".format(GetCompressedPagesForObject(object))
+                elif unsigned(pager.mo_pager_ops) == vnode_pager_ops_addr:
+                    vnode_pager = Cast(pager,'vnode_pager *')
+                    pager_string = "-> " + GetVnodePath(vnode_pager.vnode_handle)
                 else:
-                    vnode_pager = Cast(object.pager,'vnode_pager *')
-                    if unsigned(vnode_pager.pager_ops) == vnode_pager_ops_addr:
-                        pager_string = "-> " + GetVnodePath(vnode_pager.vnode_handle)
-            print "{:>18d} {:#018x}:{:#018x} {:#018x} ref:{:<6d} ts:{:1d} strat:{:1s} {:s} ({:d} {:d} {:d}) {:s}".format(depth,offset,offset+size,object,object.ref_count,object.true_share,copy_strategy,internal,unsigned(object.vo_un1.vou_size)/4096,object.resident_page_count,object.wired_page_count,pager_string)
-#            print "        #{:<5d} obj {:#018x} ref:{:<6d} ts:{:1d} strat:{:1s} {:s} size:{:<10d} wired:{:<10d} resident:{:<10d} reusable:{:<10d}".format(depth,object,object.ref_count,object.true_share,copy_strategy,internal,object.vo_un1.vou_size/4096,object.wired_page_count,object.resident_page_count,object.reusable_page_count)
+                    pager_string = "-> {:s}:{:#018x}".format(pager.mo_pager_ops.memory_object_pager_name, pager.mo_pager_ops)
+            print "{:>18d} {:#018x}:{:#018x} {:#018x} ref:{:<6d} ts:{:1d} strat:{:1s} {:s} ({:d} {:d} {:d}) {:s}".format(depth,offset,offset+size,object,object.ref_count,object.true_share,copy_strategy,internal,unsigned(object.vo_un1.vou_size)/page_size,object.resident_page_count,object.wired_page_count,pager_string)
+#            print "        #{:<5d} obj {:#018x} ref:{:<6d} ts:{:1d} strat:{:1s} {:s} size:{:<10d} wired:{:<10d} resident:{:<10d} reusable:{:<10d}".format(depth,object,object.ref_count,object.true_share,copy_strategy,internal,object.vo_un1.vou_size/page_size,object.wired_page_count,object.resident_page_count,object.reusable_page_count)
             offset += unsigned(object.vo_un2.vou_shadow_offset)
             object = object.shadow
+    if unsigned(map.hdr.links.end) > last_end:
+        print "{:18s} {:#018x}:{:#018x} {:>10d}".format("------------------",last_end,map.hdr.links.end,(unsigned(map.hdr.links.end) - last_end)/page_size)
     return None
 
+def CountMapTags(map, tagcounts, slow):
+    page_size = unsigned(kern.globals.page_size)
+    vme_list_head = map.hdr.links
+    vme_ptr_type = GetType('vm_map_entry *')
+    for vme in IterateQueue(vme_list_head, vme_ptr_type, "links"):
+        object = vme.vme_object.vmo_object
+        tag = vme.vme_offset & 0xFFF
+        if object == kern.globals.kernel_object:
+            count = 0
+            if not slow:
+                count = unsigned(vme.links.end - vme.links.start) / page_size
+            else:
+                addr = unsigned(vme.links.start)
+                while addr < unsigned(vme.links.end):
+                    hash_id = _calc_vm_page_hash(object, addr)
+                    page_list = kern.globals.vm_page_buckets[hash_id].page_list
+                    page = _vm_page_unpack_ptr(page_list)
+                    while (page != 0):
+                        vmpage = kern.GetValueFromAddress(page, 'vm_page_t')
+                        if (addr == unsigned(vmpage.offset)) and (object == vmpage.object):
+                            if (not vmpage.local) and (vmpage.wire_count > 0):
+                                count += 1
+                            break
+                        page = _vm_page_unpack_ptr(vmpage.next_m)
+                    addr += page_size
+            tagcounts[tag] += count
+        elif vme.is_sub_map:
+            CountMapTags(Cast(object,'vm_map_t'), tagcounts, slow)
+    return None
+
+def CountWiredObject(object, tagcounts):
+    tagcounts[unsigned(object.wire_tag)] += object.wired_page_count
+    return None
+
+def CountWiredPurgeableGroup(qhead, tagcounts):
+    for object in IterateQueue(qhead, 'struct vm_object *', 'objq'):
+        CountWiredObject(object, tagcounts)
+    return None
+
+def CountWiredPurgeableQueue(qhead, tagcounts):
+    CountWiredPurgeableGroup(qhead.objq[0], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[1], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[2], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[3], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[4], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[5], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[6], tagcounts)
+    CountWiredPurgeableGroup(qhead.objq[7], tagcounts)
+
+def GetKmodIDName(kmod_id):
+    kmod_val = kern.globals.kmod
+    for kmod in IterateLinkedList(kmod_val, 'next'):
+        if (kmod.id == kmod_id):
+            return "{:<50s}".format(kmod.name)
+    return "??"
+
+def GetVMKernName(tag):
+    if 1 == tag:
+        return "VM_KERN_MEMORY_OSFMK"
+    elif 2 == tag:
+        return "VM_KERN_MEMORY_BSD"
+    elif 3 == tag:
+        return "VM_KERN_MEMORY_IOKIT"
+    elif 4 == tag:
+        return "VM_KERN_MEMORY_LIBKERN"
+    elif 5 == tag:
+        return "VM_KERN_MEMORY_OSKEXT"
+    elif 6 == tag:
+        return "VM_KERN_MEMORY_KEXT"
+    elif 7 == tag:
+        return "VM_KERN_MEMORY_IPC"
+    elif 8 == tag:
+        return "VM_KERN_MEMORY_STACK"
+    elif 9 == tag:
+        return "VM_KERN_MEMORY_CPU"
+    elif 10 == tag:
+        return "VM_KERN_MEMORY_PMAP"
+    elif 11 == tag:
+        return "VM_KERN_MEMORY_PTE"
+    elif 12 == tag:
+        return "VM_KERN_MEMORY_ZONE"
+    elif 13 == tag:
+        return "VM_KERN_MEMORY_KALLOC"
+    elif 14 == tag:
+        return "VM_KERN_MEMORY_COMPRESSOR"
+    elif 15 == tag:
+        return "VM_KERN_MEMORY_COMPRESSED_DATA"
+    elif 16 == tag:
+        return "VM_KERN_MEMORY_PHANTOM_CACHE"
+    elif 17 == tag:
+        return "VM_KERN_MEMORY_WAITQ"
+    elif 18 == tag:
+        return "VM_KERN_MEMORY_DIAG"
+    elif 19 == tag:
+        return "VM_KERN_MEMORY_LOG"
+    elif 20 == tag:
+        return "VM_KERN_MEMORY_FILE"
+    elif 21 == tag:
+        return "VM_KERN_MEMORY_MBUF"
+    elif 22 == tag:
+        return "VM_KERN_MEMORY_UBC"
+    elif 23 == tag:
+        return "VM_KERN_MEMORY_SECURITY"
+    elif 24 == tag:
+        return "VM_KERN_MEMORY_MLOCK"
+    return "??"
+
+
+@lldb_command("showvmtags", "S")
+def showvmtags(cmd_args=None, cmd_options={}):
+    """Routine to print out info about kernel wired page allocations
+        usage: showvmtags
+               iterates kernel map and vm objects totaling allocations by tag.
+        usage: showvmtags -S
+               also iterates kernel object pages individually - slow.
+    """
+    slow = False
+    if "-S" in cmd_options:
+        slow = True
+    page_size = unsigned(kern.globals.page_size)
+    tagcounts = []
+    for tag in range(256):
+        tagcounts.append(0)
+
+    queue_head = kern.globals.vm_objects_wired
+    for object in IterateQueue(queue_head, 'struct vm_object *', 'objq'):
+        CountWiredObject(object, tagcounts)
+
+    queue_head = kern.globals.purgeable_nonvolatile_queue
+    for object in IterateQueue(queue_head, 'struct vm_object *', 'objq'):
+        CountWiredObject(object, tagcounts)
+
+    purgeable_queues = kern.globals.purgeable_queues
+    CountWiredPurgeableQueue(purgeable_queues[0], tagcounts)
+    CountWiredPurgeableQueue(purgeable_queues[1], tagcounts)
+    CountWiredPurgeableQueue(purgeable_queues[2], tagcounts)
+
+    CountMapTags(kern.globals.kernel_map, tagcounts, slow)
+
+    total = 0
+    print " {:<8s}  {:>7s}  {:<50s}".format("tag.kmod","size","name")
+    for tag in range(256):
+        if tagcounts[tag]:
+            total += tagcounts[tag]
+            tagstr = ""
+            sitestr = ""
+            if (tag <= 24):
+                sitestr = GetVMKernName(tag)
+            else:
+                site = kern.globals.vm_allocation_sites[tag]
+                if site:
+                    if site.flags & 2:
+                        xsite = Cast(site,'OSKextAccount *')
+                        tagstr = ".{:<3d}".format(xsite.loadTag)
+                        sitestr = GetKmodIDName(xsite.loadTag)
+                    else:
+                        sitestr = kern.Symbolicate(site)
+            print " {:>3d}{:<4s}  {:>7d}K  {:<50s}".format(tag,tagstr,tagcounts[tag]*page_size / 1024,sitestr)
+    print "Total:    {:>7d}K".format(total*page_size / 1024)
+    return None
+
+
 def FindVMEntriesForVnode(task, vn):
     """ returns an array of vme that have the vnode set to defined vnode 
         each entry in array is of format (vme, start_addr, end_address, protection)
@@ -2399,8 +2663,8 @@ def FindVMEntriesForVnode(task, vn):
     vme_ptr_type = gettype('vm_map_entry *')
     for vme in IterateQueue(vme_list_head, vme_ptr_type, 'links'):
         #print vme
-        if unsigned(vme.is_sub_map) == 0 and unsigned(vme.object.vm_object) != 0:
-            obj = vme.object.vm_object
+        if unsigned(vme.is_sub_map) == 0 and unsigned(vme.vme_object.vmo_object) != 0:
+            obj = vme.vme_object.vmo_object
         else:
             continue
 
@@ -2439,4 +2703,254 @@ def ShowTaskLoadInfo(cmd_args=None, cmd_options={}):
             end_addr = m[2]
             #print "Load address: %s" % hex(m[1])
     print print_format.format(load_addr, end_addr, libname, uuid_out_string, filepath)
-    return None    
+    return None
+
+@header("{0: <20s} {1: <20s} {2: <20s}".format("vm_page_t", "offset", "object"))
+@lldb_command('vmpagelookup')
+def VMPageLookup(cmd_args=None):
+    """ Print the pages in the page bucket corresponding to the provided object and offset.
+        Usage: (lldb)vmpagelookup <vm_object_t> <vm_offset_t>
+    """
+    if cmd_args == None or len(cmd_args) < 2:
+        raise ArgumentError("Please specify an object and offset.")
+    format_string = "{0: <#020x} {1: <#020x} {2: <#020x}\n"
+
+    obj = kern.GetValueFromAddress(cmd_args[0],'unsigned long long')
+    off = kern.GetValueFromAddress(cmd_args[1],'unsigned long long')
+
+    hash_id = _calc_vm_page_hash(obj, off)
+
+    page_list = kern.globals.vm_page_buckets[hash_id].page_list
+    print("hash_id: 0x%x page_list: 0x%x\n" % (unsigned(hash_id), unsigned(page_list)))
+
+    print VMPageLookup.header
+    page = _vm_page_unpack_ptr(page_list)
+    while (page != 0) :
+        pg_t = kern.GetValueFromAddress(page, 'vm_page_t')
+        print format_string.format(page, pg_t.offset, pg_t.object)
+        page = _vm_page_unpack_ptr(pg_t.next_m)
+
+def _vm_page_unpack_ptr(page):
+    if kern.ptrsize == 4 :
+        return page
+
+    if page == 0 :
+        return page
+
+    min_addr = kern.globals.vm_min_kernel_and_kext_address
+    #INTEL - min_addr = 0xffffff7f80000000
+    #ARM - min_addr = 0x80000000
+    #ARM64 - min_addr = 0xffffff8000000000
+    return ((page << 6) + min_addr)
+
+@lldb_command('calcvmpagehash')
+def CalcVMPageHash(cmd_args=None):
+    """ Get the page bucket corresponding to the provided object and offset.
+        Usage: (lldb)calcvmpagehash <vm_object_t> <vm_offset_t>
+    """
+    if cmd_args == None or len(cmd_args) < 2:
+        raise ArgumentError("Please specify an object and offset.")
+
+    obj = kern.GetValueFromAddress(cmd_args[0],'unsigned long long')
+    off = kern.GetValueFromAddress(cmd_args[1],'unsigned long long')
+
+    hash_id = _calc_vm_page_hash(obj, off)
+
+    print("hash_id: 0x%x page_list: 0x%x\n" % (unsigned(hash_id), unsigned(kern.globals.vm_page_buckets[hash_id].page_list)))
+    return None
+
+def _calc_vm_page_hash(obj, off):
+    bucket_hash = (int) (kern.globals.vm_page_bucket_hash)
+    hash_mask = (int) (kern.globals.vm_page_hash_mask)
+
+    one = (obj * bucket_hash) & 0xFFFFFFFF
+    two = off >> unsigned(kern.globals.page_shift)
+    three = two ^ bucket_hash
+    four = one + three
+    hash_id = four & hash_mask
+
+    return hash_id
+
+@header("{0: <10s} of {1: <10s} {2: <20s} {3: <20s} {4: <20s} {5: <10s} {6: <5s}\t {7: <28s}\t{8: <50s}".format("index", "total", "vm_page_t", "offset", "next", "phys_page", "wire#", "first bitfield", "second bitfield"))
+@lldb_command('vmobjectwalkpages', 'SBNQP:')
+def VMObjectWalkPages(cmd_args=None, cmd_options={}):
+    """ Print the resident pages contained in the provided object. If a vm_page_t is provided as well, we
+        specifically look for this page, highlighting it in the output or noting if it was not found. For
+        each page, we confirm that it points to the object. We also keep track of the number of pages we
+        see and compare this to the object's resident page count field.
+        Usage:
+            vmobjectwalkpages <vm_object_t> : Walk and print all the pages for a given object (up to 4K pages by default)
+            vmobjectwalkpages <vm_object_t> -B : Walk and print all the pages for a given object (up to 4K pages by default), traversing the memq backwards
+            vmobjectwalkpages <vm_object_t> -N : Walk and print all the pages for a given object, ignore the page limit
+            vmobjectwalkpages <vm_object_t> -Q : Walk all pages for a given object, looking for known signs of corruption (i.e. inactive and active both being set for a page)
+            vmobjectwalkpages <vm_object_t> -P <vm_page_t> : Walk all the pages for a given object, annotate the specified page in the output with ***
+            vmobjectwalkpages <vm_object_t> -P <vm_page_t> -S : Walk all the pages for a given object, stopping when we find the specified page
+
+    """
+
+    if (cmd_args == None or len(cmd_args) < 1):
+        raise ArgumentError("Please specify at minimum a vm_object_t and optionally a vm_page_t")
+
+    out_string = ""
+
+    obj = kern.GetValueFromAddress(cmd_args[0], 'vm_object_t')
+
+    page = 0
+    if "-P" in cmd_options:
+        page = kern.GetValueFromAddress(cmd_options['-P'], 'vm_page_t')
+
+    stop = 0
+    if "-S" in cmd_options:
+        if page == 0:
+            raise ArgumentError("-S can only be passed when a page is specified with -P")
+        stop = 1
+
+    walk_backwards = False
+    if "-B" in cmd_options:
+        walk_backwards = True
+
+    quiet_mode = False
+    if "-Q" in cmd_options:
+        quiet_mode = True
+
+    if not quiet_mode:
+        print VMObjectWalkPages.header
+        format_string = "{0: <#10d} of {1: <#10d} {2: <#020x} {3: <#020x} {4: <#020x} {5: <#010x} {6: <#05d}\t"
+        first_bitfield_format_string = "{0: <#1d}:{1: <#1d}:{2: <#1d}:{3: <#1d}:{4: <#1d}:{5: <#1d}:{6: <#1d}:"
+        first_bitfield_format_string += "{7: <#1d}:{8: <#1d}:{9: <#1d}:{10: <#1d}:{11: <#1d}:{12: <#1d}"
+        second_bitfield_format_string = first_bitfield_format_string
+        second_bitfield_format_string += ":{13: <#1d}:{14: <#1d}:{15: <#1d}:{16: <#1d}:{17: <#1d}:{18: <#1d}:{19: <#1d}:"
+        second_bitfield_format_string +=  "{20: <#1d}:{21: <#1d}:{22: <#1d}:{23: <#1d}:{24: <#1d}:{25: <#1d}:{26: <#1d}\n"
+        first_bitfield_format_string += "\t"
+
+    limit = 4096 #arbitrary limit of number of pages to walk
+    ignore_limit = 0
+    if "-N" in cmd_options:
+        ignore_limit = 1
+
+    page_count = 0
+    res_page_count = unsigned(obj.resident_page_count)
+    page_found = False
+    pages_seen = set()
+
+    for vmp in IterateQueue(obj.memq, "vm_page_t", "listq", walk_backwards):
+        page_count += 1
+        out_string = ""
+        if (page != 0 and not(page_found) and vmp == page):
+            out_string += "******"
+            page_found = True
+
+        if page != 0 or quiet_mode:
+             if (page_count % 1000) == 0:
+                print "traversed %d pages ...\n" % (page_count)
+        else:
+                out_string += format_string.format(page_count, res_page_count, vmp, vmp.offset, vmp.listq.next, vmp.phys_page, vmp.wire_count)
+                out_string += first_bitfield_format_string.format(vmp.active, vmp.inactive, vmp.clean_queue, vmp.local, vmp.speculative,
+                                                                    vmp.throttled, vmp.free, vmp.pageout_queue, vmp.laundry, vmp.reference,
+                                                                    vmp.gobbled, vmp.private, vmp.no_cache)
+
+                out_string += second_bitfield_format_string.format(vmp.busy, vmp.wanted, vmp.tabled, vmp.hashed, vmp.fictitious, vmp.clustered,
+                                                                    vmp.clustered, vmp.pmapped, vmp.xpmapped, vmp.wpmapped, vmp.pageout, vmp.absent,
+                                                                    vmp.error, vmp.dirty, vmp.cleaning, vmp.precious, vmp.precious, vmp.overwriting,
+                                                                    vmp.restart, vmp.unusual, vmp.encrypted, vmp.encrypted, vmp.encrypted_cleaning,
+                                                                    vmp.cs_validated, vmp.cs_tainted, vmp.cs_nx, vmp.reusable, vmp.lopage, vmp.slid, vmp.compressor,
+                                                                    vmp.written_by_kernel)
+
+        if (vmp in pages_seen):
+            print out_string + "cycle detected! we've seen vm_page_t: " + "{0: <#020x}".format(unsigned(vmp)) + " twice. stopping...\n"
+            return
+
+        if (vmp.object != obj):
+            print out_string + " vm_page_t: " + "{0: <#020x}".format(unsigned(vmp)) +  " points to different vm_object_t: " + "{0: <#020x}".format(unsigned(vmp.object))
+            return
+
+        if (not vmp.local) and (vmp.wire_count > 0):
+            if (vmp.active or vmp.inactive or vmp.speculative or vmp.throttled or vmp.pageout_queue):
+                print out_string + " wired page with wrong page queue attributes\n"
+                print "vm_page_t: " + "{0: <#020x}".format(unsigned(vmp)) + " active: %d inactive: %d speculative: %d throttled %d pageout_queue: %d\n" % (vmp.active,
+                                    vmp.inactive, vmp.speculative, vmp.throttled, vmp.pageout_queue)
+                print "stopping...\n"
+                return
+
+        if ((vmp.free + vmp.active + vmp.inactive + vmp.speculative + vmp.throttled + vmp.pageout_queue) > 1):
+            print out_string + " more than one pageout queue bit set active\n"
+            print "vm_page_t: " + "{0: <#020x}".format(unsigned(vmp)) + " free: %d active: %d inactive: %d speculative: %d throttled: %d pageout_queue: %d\n" % (vmp.free,
+                                            vmp.active, vmp.inactive, vmp.speculative, vmp.throttled, vmp.pageout_queue)
+            print "stopping...\n"
+            return
+
+        if ((vmp.__unused_pageq_bits != 0) or (vmp.__unused_object_bits != 0)):
+            print out_string + " unused bits not zero for vm_page_t: " + "{0: <#020x}".format(unsigned(vmp)) + " unused__pageq_bits: %d unused_object_bits : %d\n" % (vmp.__unused_pageq_bits,
+                                            vmp.__unused_object_bits)
+            print "stopping...\n"
+            return
+
+        pages_seen.add(vmp)
+
+        if False:
+            hash_id = _calc_vm_page_hash(obj, vmp.offset)
+            hash_page_list = kern.globals.vm_page_buckets[hash_id].page_list
+            hash_page = _vm_page_unpack_ptr(hash_page_list)
+            hash_page_t = 0
+
+            while (hash_page != 0):
+                hash_page_t = kern.GetValueFromAddress(hash_page, 'vm_page_t')
+                if hash_page_t == vmp:
+                    break
+                hash_page = _vm_page_unpack_ptr(hash_page_t.next_m)
+
+            if (unsigned(vmp) != unsigned(hash_page_t)):
+                print out_string + "unable to find page: " + "{0: <#020x}".format(unsigned(vmp)) + " from object in kernel page bucket list\n"
+                print lldb_run_command("vm_page_info %s 0x%x" % (cmd_args[0], unsigned(vmp.offset)))
+                return
+
+        if (page_count >= limit and not(ignore_limit)):
+            print out_string + "Limit reached (%d pages), stopping..." % (limit)
+            return
+
+        print out_string
+
+        if page_found and stop:
+            print("Object reports resident page count of: %d we stopped after traversing %d and finding the requested page.\n" % (unsigned(obj.res_page_count), unsigned(page_count)))
+            return
+
+    if (page != 0):
+        print("page found? : %s\n" % page_found)
+
+    print("Object reports resident page count of %d, we saw %d pages when we walked the resident list.\n" % (unsigned(obj.resident_page_count), unsigned(page_count)))
+
+
+@lldb_command("show_all_apple_protect_pagers")
+def ShowAllAppleProtectPagers(cmd_args=None):
+    """Routine to print all apple_protect pagers
+        usage: show_all_apple_protect_pagers
+    """
+    print "{:>3s} {:<3s} {:<18s} {:>5s} {:>5s} {:>6s} {:<18s} {:<18s} {:<18s} {:<18s} {:<18s} {:<18s}\n".format("#", "#", "pager", "refs", "ready", "mapped", "mo_control", "object", "offset", "crypto_offset", "crypto_start", "crypto_end")
+    qhead = kern.globals.apple_protect_pager_queue
+    qtype = GetType('apple_protect_pager *')
+    qcnt = kern.globals.apple_protect_pager_count
+    idx = 0
+    for pager in IterateQueue(qhead, qtype, "pager_queue"):
+        idx = idx + 1
+        show_apple_protect_pager(pager, qcnt, idx)
+
+@lldb_command("show_apple_protect_pager")
+def ShowAppleProtectPager(cmd_args=None):
+    """Routine to print out info about an apple_protect pager
+        usage: show_apple_protect_pager <pager>
+    """
+    if cmd_args == None or len(cmd_args) < 1:
+        print "Invalid argument.", ShowMap.__doc__
+        return
+    pager = kern.GetValueFromAddress(cmd_ars[0], 'apple_protect_pager_t')
+    show_apple_protect_pager(pager, 1, 1)
+
+def show_apple_protect_pager(pager, qcnt, idx):
+    object = pager.backing_object
+    shadow = object.shadow
+    while shadow != 0:
+        object = shadow
+        shadow = object.shadow
+    vnode_pager = Cast(object.pager,'vnode_pager *')
+    filename = GetVnodePath(vnode_pager.vnode_handle)
+    print "{:>3}/{:<3d} {:#018x} {:>5d} {:>5d} {:>6d} {:#018x} {:#018x} {:#018x} {:#018x} {:#018x} {:#018x}\n\tcrypt_info:{:#018x} <decrypt:{:#018x} end:{:#018x} ops:{:#018x} refs:{:<d}>\n\tvnode:{:#018x} {:s}\n".format(idx, qcnt, pager, pager.ref_count, pager.is_ready, pager.is_mapped, pager.pager_control, pager.backing_object, pager.backing_offset, pager.crypto_backing_offset, pager.crypto_start, pager.crypto_end, pager.crypt_info, pager.crypt_info.page_decrypt, pager.crypt_info.crypt_end, pager.crypt_info.crypt_ops, pager.crypt_info.crypt_refcnt, vnode_pager.vnode_handle, filename)
index b9ccb240a8b561bbdaee058d6084b6283490e003..a9b7bafda28d5afef037d6018ffd6e55b4fab096 100644 (file)
@@ -339,3 +339,312 @@ def WriteMsr64(cmd_args=None):
     if not DoWriteMsr64(msr_address, lcpu, write_val):
         print "writemsr64 FAILED"
 
+def GetEVFlags(debug_arg):
+    """ Return the EV Flags for the given kernel debug arg value
+        params:
+            debug_arg - value from arg member of kernel debug buffer entry
+        returns: 
+            str - string representing the EV Flag for given input arg value
+    """
+    out_str = ""
+    if debug_arg & 1:
+        out_str += "EV_RE "
+    if debug_arg & 2:
+        out_str += "EV_WR "
+    if debug_arg & 4:
+        out_str += "EV_EX "
+    if debug_arg & 8:
+        out_str += "EV_RM "
+    if debug_arg & 0x00100:
+        out_str += "EV_RBYTES "
+    if debug_arg & 0x00200:
+        out_str += "EV_WBYTES "
+    if debug_arg & 0x00400:
+        out_str += "EV_RCLOSED "
+    if debug_arg & 0x00800:
+        out_str += "EV_RCONN "
+    if debug_arg & 0x01000:
+        out_str += "EV_WCLOSED "
+    if debug_arg & 0x02000:
+        out_str += "EV_WCONN "
+    if debug_arg & 0x04000:
+        out_str += "EV_OOB "
+    if debug_arg & 0x08000:
+        out_str += "EV_FIN "
+    if debug_arg & 0x10000:
+        out_str += "EV_RESET "
+    if debug_arg & 0x20000:
+        out_str += "EV_TIMEOUT "
+    
+    return out_str
+
+def GetKernelDebugBufferEntry(kdbg_entry):
+    """ Extract the information from given kernel debug buffer entry and return the summary
+        params:
+            kdebug_entry - kd_buf - address of kernel debug buffer entry
+        returns: 
+            str - formatted output information of kd_buf entry
+    """
+    out_str = ""
+    code_info_str = ""
+    kdebug_entry = kern.GetValueFromAddress(kdbg_entry, 'kd_buf *')
+    debugid     = kdebug_entry.debugid
+    kdebug_arg1 = kdebug_entry.arg1
+    kdebug_arg2 = kdebug_entry.arg2
+    kdebug_arg3 = kdebug_entry.arg3
+    kdebug_arg4 = kdebug_entry.arg4
+    
+    if kern.arch in ('x86_64', 'arm64'):
+        kdebug_cpu   = kdebug_entry.cpuid
+        ts_hi        = (kdebug_entry.timestamp >> 32) & 0xFFFFFFFF
+        ts_lo        = kdebug_entry.timestamp & 0xFFFFFFFF
+    else:
+        kdebug_cpu   = (kdebug_entry.timestamp >> 56)
+        ts_hi        = (kdebug_entry.timestamp >> 32) & 0x00FFFFFF
+        ts_lo        = kdebug_entry.timestamp & 0xFFFFFFFF
+    
+    kdebug_class    = (debugid >> 24) & 0x000FF
+    kdebug_subclass = (debugid >> 16) & 0x000FF
+    kdebug_code     = (debugid >>  2) & 0x03FFF
+    kdebug_qual     = (debugid) & 0x00003
+    
+    if kdebug_qual == 0:
+        kdebug_qual = '-'
+    elif kdebug_qual == 1:
+        kdebug_qual = 'S'
+    elif kdebug_qual == 2:
+        kdebug_qual = 'E'
+    elif kdebug_qual == 3:
+        kdebug_qual = '?'
+
+    # preamble and qual
+    out_str += "{:<#20x} {:>6d} {:>#12x} ".format(kdebug_entry, kdebug_cpu, kdebug_entry.arg5)
+    out_str += " {:#010x}{:08x} {:>6s} ".format(ts_hi, ts_lo, kdebug_qual)
+    
+    # class
+    kdbg_class = ""
+    if kdebug_class == 1:
+        kdbg_class = "MACH"
+    elif kdebug_class == 2:
+        kdbg_class = "NET "
+    elif kdebug_class == 3:
+        kdbg_class = "FS  "
+    elif kdebug_class == 4:
+        kdbg_class = "BSD "
+    elif kdebug_class == 5:
+        kdbg_class = "IOK "
+    elif kdebug_class == 6:
+        kdbg_class = "DRVR"
+    elif kdebug_class == 7:
+        kdbg_class = "TRAC"
+    elif kdebug_class == 8:
+        kdbg_class = "DLIL"
+    elif kdebug_class == 9:
+        kdbg_class = "WQ  "
+    elif kdebug_class == 10:
+        kdbg_class = "CS  "
+    elif kdebug_class == 11:
+        kdbg_class = "CG  "
+    elif kdebug_class == 20:
+        kdbg_class = "MISC"
+    elif kdebug_class == 30:
+        kdbg_class = "SEC "
+    elif kdebug_class == 31:
+        kdbg_class = "DYLD"
+    elif kdebug_class == 32:
+        kdbg_class = "QT  "
+    elif kdebug_class == 33:
+        kdbg_class = "APPS"
+    elif kdebug_class == 34:
+        kdbg_class = "LAUN"
+    elif kdebug_class == 36:
+        kdbg_class = "PPT "
+    elif kdebug_class == 37:
+        kdbg_class = "PERF"
+    elif kdebug_class == 38:
+        kdbg_class = "IMP "
+    elif kdebug_class == 39:
+        kdbg_class = "PCTL"
+    elif kdebug_class == 40:
+        kdbg_class = "BANK"
+    elif kdebug_class == 41:
+        kdbg_class = "XPC "
+    elif kdebug_class == 42:
+        kdbg_class = "ATM "
+    elif kdebug_class == 128:
+        kdbg_class = "ANS "
+    elif kdebug_class == 129:
+        kdbg_class = "SIO "
+    elif kdebug_class == 130:
+        kdbg_class = "SEP "
+    elif kdebug_class == 131:
+        kdbg_class = "ISP "
+    elif kdebug_class == 132:
+        kdbg_class = "OSCA"
+    elif kdebug_class == 133:
+        kdbg_class = "EGFX"
+    elif kdebug_class == 255:
+        kdbg_class = "MIG "
+    else:
+        out_str += "{:^#10x} ".format(kdebug_class)
+    
+    if kdbg_class:
+        out_str += "{:^10s} ".format(kdbg_class)
+
+    # subclass and code
+    out_str += " {:>#5x} {:>8d}   ".format(kdebug_subclass, kdebug_code)
+
+    # space for debugid-specific processing
+    # EVPROC from bsd/kern/sys_generic.c
+    # MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
+    if debugid == 0x14100048:
+        code_info_str += "waitevent "
+        if kdebug_arg1 == 1:
+            code_info_str += "before sleep"
+        elif kdebug_arg1 == 2:
+            code_info_str += "after  sleep"
+        else:
+            code_info_str += "????????????"
+        code_info_str += " chan={:#08x} ".format(kdebug_arg2)
+    elif debugid == 0x14100049:
+        # MISCDBG_CODE(DBG_EVENT,DBG_WAIT|DBG_FUNC_START)
+        code_info_str += "waitevent "
+    elif debugid == 0x1410004a:
+        # MISCDBG_CODE(DBG_EVENT,DBG_WAIT|DBG_FUNC_END)
+        code_info_str += "waitevent error={:d} ".format(kdebug_arg1)
+        code_info_str += "eqp={:#08x} ".format(kdebug_arg4)
+        code_info_str += GetEVFlags(kdebug_arg3)
+        code_info_str += "er_handle={:d} ".format(kdebug_arg2)
+    elif debugid == 0x14100059:
+        # MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE|DBG_FUNC_START)
+        code_info_str += "evprocdeque proc={:#08x} ".format(kdebug_arg1)
+        if kdebug_arg2 == 0:
+            code_info_str += "remove first "
+        else:
+            code_info_str += "remove {:#08x} ".format(kdebug_arg2)
+    elif debugid == 0x1410005a:
+        # MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE|DBG_FUNC_END)
+        code_info_str += "evprocdeque "
+        if kdebug_arg1 == 0:
+            code_info_str += "result=NULL "
+        else:
+            code_info_str += "result={:#08x} ".format(kdebug_arg1)
+    elif debugid == 0x14100041:
+        # MISCDBG_CODE(DBG_EVENT,DBG_POST|DBG_FUNC_START)
+        code_info_str += "postevent "
+        code_info_str += GetEVFlags(kdebug_arg1)
+    elif debugid == 0x14100040:
+        # MISCDBG_CODE(DBG_EVENT,DBG_POST)
+        code_info_str += "postevent "
+        code_info_str += "evq={:#08x} ".format(kdebug_arg1)
+        code_info_str += "er_eventbits="
+        code_info_str += GetEVFlags(kdebug_arg2)
+        code_info_str +="mask="
+        code_info_str += GetEVFlags(kdebug_arg3)
+    elif debugid == 0x14100042:
+        # MISCDBG_CODE(DBG_EVENT,DBG_POST|DBG_FUNC_END)
+        code_info_str += "postevent "
+    elif debugid == 0x14100055:
+        # MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE|DBG_FUNC_START)
+        code_info_str += "evprocenque eqp={:#08x} ".format(kdebug_arg1)
+        if kdebug_arg2 & 1:
+            code_info_str += "EV_QUEUED "
+        code_info_str += GetEVFlags(kdebug_arg3)
+    elif debugid == 0x14100050:
+        # MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
+        code_info_str += "evprocenque before wakeup eqp={:#08x} ".format(kdebug_arg4)
+    elif debugid == 0x14100056:
+        # MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE|DBG_FUNC_END)
+        code_info_str += "evprocenque "
+    elif debugid == 0x1410004d:
+        # MISCDBG_CODE(DBG_EVENT,DBG_MOD|DBG_FUNC_START)
+        code_info_str += "modwatch "
+    elif debugid == 0x1410004c:
+        # MISCDBG_CODE(DBG_EVENT,DBG_MOD)
+        code_info_str += "modwatch er_handle={:d} ".format(kdebug_arg1)
+        code_info_str += GetEVFlags(kdebug_arg2)
+        code_info_str += "evq={:#08x} ", kdebug_arg3
+    elif debugid == 0x1410004e:
+    # MISCDBG_CODE(DBG_EVENT,DBG_MOD|DBG_FUNC_END)
+        code_info_str += "modwatch er_handle={:d} ".format(kdebug_arg1)
+        code_info_str += "ee_eventmask="
+        code_info_str += GetEVFlags(kdebug_arg2)
+        code_info_str += "sp={:#08x} ".format(kdebug_arg3)
+        code_info_str += "flag="
+        code_info_str += GetEVFlags(kdebug_arg4)
+    else:
+        code_info_str += "arg1={:#010x} ".format(kdebug_arg1)
+        code_info_str += "arg2={:#010x} ".format(kdebug_arg2)
+        code_info_str += "arg3={:#010x} ".format(kdebug_arg3)
+        code_info_str += "arg4={:#010x} ".format(kdebug_arg4)
+    
+    # finish up
+    out_str += "{:<25s}\n".format(code_info_str)
+    return out_str
+
+@lldb_command('showkerneldebugbuffercpu')
+@header("{0: ^20s} {1: >6s} {2: >12s} {3: ^20s} {4: >6s} {5: ^10s} {6: >5s} {7: >8s} {8: ^25s}".
+    format('kd_buf', 'CPU', 'Thread', 'Timestamp', 'S/E', 'Class', 'Sub', 'Code', 'Code Specific Info'))
+def ShowKernelDebugBufferCPU(cmd_args=None):
+    """ Prints the last N entries in the kernel debug buffer for specified cpu
+        Syntax: showkerneldebugbuffercpu <cpu_num> <count>
+    """
+    if cmd_args == None or len(cmd_args) < 2:
+        raise ArgumentError("Invalid arguments passed.")
+    
+    out_str = ""
+    kdbg_str = ""
+    cpu_number = ArgumentStringToInt(cmd_args[0])
+    entry_count = ArgumentStringToInt(cmd_args[1])
+    debugentriesfound = 0
+    #  Check if KDBG_BFINIT (0x80000000) is set in kdebug_flags
+    if (kern.globals.kd_ctrl_page.kdebug_flags & 0x80000000):   
+        out_str += ShowKernelDebugBufferCPU.header + "\n"
+        if entry_count == 0:
+            out_str += "<count> is 0, dumping 50 entries\n"
+            entry_count = 50
+
+        if cpu_number >= kern.globals.kd_ctrl_page.kdebug_cpus:
+            kdbg_str += "cpu number too big\n"
+        else:
+            kdbp = addressof(kern.globals.kdbip[cpu_number])
+            kdsp = kdbp.kd_list_head
+            while ((kdsp.raw != 0 and kdsp.raw != 0x00000000ffffffff) and (entry_count > 0)):
+                kd_buffer = kern.globals.kd_bufs[kdsp.buffer_index]
+                kdsp_actual = addressof(kd_buffer.kdsb_addr[kdsp.offset])
+                if kdsp_actual.kds_readlast != kdsp_actual.kds_bufindx:
+                    kds_buf = kdsp_actual.kds_records[kdsp_actual.kds_bufindx]
+                    kds_bufptr = addressof(kds_buf)
+                    while (entry_count > 0) and \
+                        (unsigned(kds_bufptr) > unsigned(addressof(kdsp_actual.kds_records[kdsp_actual.kds_readlast]))):
+                        kds_bufptr = kds_bufptr - sizeof(kds_buf)
+                        entry_count = entry_count - 1
+                        kdbg_str += GetKernelDebugBufferEntry(kds_bufptr)
+                kdsp = kdsp_actual.kds_next
+    else:
+        kdbg_str += "Trace buffer not enabled for CPU {:d}\n".format(cpu_number)
+    
+    if kdbg_str:
+        out_str += kdbg_str
+        print out_str
+
+@lldb_command('showkerneldebugbuffer')
+def ShowKernelDebugBuffer(cmd_args=None):
+    """ Prints the last N entries in the kernel debug buffer per cpu
+        Syntax: showkerneldebugbuffer <count>
+    """
+    if cmd_args == None or len(cmd_args) < 1:
+        raise ArgumentError("Invalid arguments passed.")
+    
+    #  Check if KDBG_BFINIT (0x80000000) is set in kdebug_flags
+    if (kern.globals.kd_ctrl_page.kdebug_flags & 0x80000000):
+        entrycount = ArgumentStringToInt(cmd_args[0])
+        if entrycount == 0:
+            print "<count> is 0, dumping 50 entries per cpu\n"
+            entrycount = 50
+        cpu_num = 0
+        while cpu_num < kern.globals.kd_ctrl_page.kdebug_cpus:
+            ShowKernelDebugBufferCPU([str(cpu_num), str(entrycount)])
+            cpu_num += 1
+    else:
+        print "Trace buffer not enabled\n"
index 7e55c8d7cd2afd58e2e56b40757dcdc08236e4dd..ed39b0c2cf308e750b268f8ed1d076addb2bc33b 100644 (file)
@@ -1326,6 +1326,17 @@ def RtEntryTrash(cmd_args=None):
     print out_string
 # EndMacro: rtentry_trash
 
+# Macro: show_rtentry
+@lldb_command('show_rtentry')
+def ShRtEntry(cmd_args=None):
+    """ Print rtentry.
+    """
+    out_string = ""
+    rt = kern.GetValueFromAddress(cmd_args[0], 'rtentry *')
+    out_string += GetRtEntryPrDetailsAsString(rt) + "\n"
+    print out_string
+# EndMacro: show_rtentry
+
 # Macro: inifa_trash
 @lldb_command('inifa_trash')
 def InIfaTrash(cmd_args=None):
index 191c2783b404c50dc4fb0feead2b7c6b582675f4..6a3cee28373250c70408db8198f0a0ac2e19c853 100644 (file)
@@ -240,6 +240,8 @@ def _PT_Step(paddr, index, verbose_level = vSCRIPT):
             out_string += " invalid"
             pt_paddr = 0
             pt_valid = False
+            if entry & (0x1 << 62):
+                out_string += " compressed"
             #Stop decoding other bits
             entry = 0
         if entry & (0x1 << 1):
@@ -281,10 +283,100 @@ def _PT_Step(paddr, index, verbose_level = vSCRIPT):
     print out_string
     return (pt_paddr, pt_valid, pt_large)
 
+def _PT_StepEPT(paddr, index, verbose_level = vSCRIPT):
+    """
+     Step to lower-level page table and print attributes for EPT pmap
+       paddr: current page table entry physical address
+       index: current page table entry index (0..511)
+       verbose_level:    vHUMAN: print nothing
+                         vSCRIPT: print basic information
+                         vDETAIL: print basic information and hex table dump
+     returns: (pt_paddr, pt_valid, pt_large)
+       pt_paddr: next level page table entry physical address
+                      or null if invalid
+       pt_valid: 1 if $kgm_pt_paddr is valid, 0 if the walk
+                      should be aborted
+       pt_large: 1 if kgm_pt_paddr is a page frame address
+                      of a large page and not another page table entry
+    """
+    entry_addr = paddr + (8 * index)
+    entry = ReadPhysInt(entry_addr, 64, xnudefines.lcpu_self )
+    out_string = ''
+    if verbose_level >= vDETAIL:
+        for pte_loop in range(0, 512):
+            paddr_tmp = paddr + (8 * pte_loop)
+            out_string += "{0: <#020x}:\t {1: <#020x}\n".format(paddr_tmp, ReadPhysInt(paddr_tmp, 64, xnudefines.lcpu_self))
+    paddr_mask = ~((0xfff<<52) | 0xfff)
+    paddr_large_mask =  ~((0xfff<<52) | 0x1fffff)
+    pt_valid = False
+    pt_large = False
+    pt_paddr = 0
+    if verbose_level < vSCRIPT:
+        if entry & 0x7 :
+            pt_valid = True
+            pt_large = False
+            pt_paddr = entry & paddr_mask
+            if entry & (0x1 <<7):
+                pt_large = True
+                pt_paddr = entry & paddr_large_mask
+    else:
+        out_string+= "{0: <#020x}:\n\t{1:#020x}\n\t".format(entry_addr, entry)
+        if entry & 0x7:
+            out_string += "valid"
+            pt_paddr = entry & paddr_mask
+            pt_valid = True
+        else:
+            out_string += "invalid"
+            pt_paddr = 0
+            pt_valid = False
+            if entry & (0x1 << 62):
+                out_string += " compressed"
+            #Stop decoding other bits
+            entry = 0
+        if entry & 0x1:
+            out_string += " readable"
+        else:
+            out_string += " no read"
+        if entry & (0x1 << 1):
+            out_string += " writable"
+        else:
+            out_string += " no write"
+
+        if entry & (0x1 << 2):
+            out_string += " executable"
+        else:
+            out_string += " no exec"
+
+        ctype = entry & 0x38
+        if ctype == 0x30:
+            out_string += " cache-WB"
+        elif ctype == 0x28:
+            out_string += " cache-WP"
+        elif ctype == 0x20:
+            out_string += " cache-WT"
+        elif ctype == 0x8:
+            out_string += " cache-WC"
+        else:
+            out_string += " cache-NC"
+
+        if (entry & 0x40) == 0x40:
+            out_string += " Ignore-PTA"
 
+        if (entry & 0x100) == 0x100:
+            out_string += " accessed"
 
+        if (entry & 0x200) == 0x200:
+            out_string += " dirty"
 
-def _PmapL4Walk(pmap_addr_val,vaddr, verbose_level = vSCRIPT):
+        if entry & (0x1 << 7):
+            out_string += " large"
+            pt_large = True
+        else:
+            pt_large = False
+    print out_string
+    return (pt_paddr, pt_valid, pt_large)
+
+def _PmapL4Walk(pmap_addr_val,vaddr, ept_pmap, verbose_level = vSCRIPT):
     """ Walk the l4 pmap entry.
         params: pmap_addr_val - core.value representing kernel data of type pmap_addr_t
         vaddr : int - virtual address to walk
@@ -300,28 +392,40 @@ def _PmapL4Walk(pmap_addr_val,vaddr, verbose_level = vSCRIPT):
         pframe_offset = vaddr & 0x7fffffffff
         if verbose_level > vHUMAN :
             print "pml4 (index {0:d}):".format(pt_index)
-        (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        if not(ept_pmap):
+            (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        else:
+            (pt_paddr, pt_valid, pt_large) = _PT_StepEPT(pt_paddr, pt_index, verbose_level)
     if pt_valid:
         # Lookup bits 38:30 of the linear address in PDPT
         pt_index = (vaddr >> 30) & 0x1ff
         pframe_offset = vaddr & 0x3fffffff
         if verbose_level > vHUMAN:
             print "pdpt (index {0:d}):".format(pt_index)
-        (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        if not(ept_pmap):
+            (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        else:
+            (pt_paddr, pt_valid, pt_large) = _PT_StepEPT(pt_paddr, pt_index, verbose_level)
     if pt_valid and not pt_large:
         #Lookup bits 29:21 of the linear address in PDPT
         pt_index = (vaddr >> 21) & 0x1ff
         pframe_offset = vaddr & 0x1fffff
         if verbose_level > vHUMAN:
             print "pdt (index {0:d}):".format(pt_index)
-        (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        if not(ept_pmap):
+            (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        else:
+            (pt_paddr, pt_valid, pt_large) = _PT_StepEPT(pt_paddr, pt_index, verbose_level)
     if pt_valid and not pt_large:
         #Lookup bits 20:21 of linear address in PT
         pt_index = (vaddr >> 12) & 0x1ff
         pframe_offset = vaddr & 0xfff
         if verbose_level > vHUMAN:
             print "pt (index {0:d}):".format(pt_index)
-        (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        if not(ept_pmap):
+            (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level)
+        else:
+            (pt_paddr, pt_valid, pt_large) = _PT_StepEPT(pt_paddr, pt_index, verbose_level)
     paddr = 0
     paddr_isvalid = False
     if pt_valid:
@@ -528,12 +632,19 @@ def PmapWalkARM(pmap, vaddr, verbose_level = vHUMAN):
 
     return paddr
 
-def PmapWalkX86_64(pmapval, vaddr):
+def PmapWalkX86_64(pmapval, vaddr, verbose_level = vSCRIPT):
     """
         params: pmapval - core.value representing pmap_t in kernel
         vaddr:  int     - int representing virtual address to walk
     """
-    return _PmapL4Walk(pmapval.pm_cr3, vaddr, config['verbosity'])
+    if pmapval.pm_cr3 != 0:
+        if verbose_level > vHUMAN:
+            print "Using normal Intel PMAP from pm_cr3\n"
+        return _PmapL4Walk(pmapval.pm_cr3, vaddr, 0, config['verbosity'])
+    else:
+        if verbose_level > vHUMAN:
+            print "Using EPT pmap from pm_eptp\n"
+        return _PmapL4Walk(pmapval.pm_eptp, vaddr, 1, config['verbosity'])
 
 def assert_64bit(val):
     assert(val < 2**64)
@@ -543,24 +654,24 @@ ARM64_VMADDR_BITS = 48
 
 def PmapBlockOffsetMaskARM64(level):
     assert level >= 1 and level <= 3
-    page_size = kern.globals.page_size
+    page_size = kern.globals.arm_hardware_page_size
     ttentries = (page_size / ARM64_TTE_SIZE)
     return page_size * (ttentries ** (3 - level)) - 1
 
 def PmapBlockBaseMaskARM64(level):
     assert level >= 1 and level <= 3
-    page_size = kern.globals.page_size
+    page_size = kern.globals.arm_hardware_page_size
     return ((1 << ARM64_VMADDR_BITS) - 1) & ~PmapBlockOffsetMaskARM64(level)
 
 def PmapIndexMaskARM64(level):
     assert level >= 1 and level <= 3
-    page_size = kern.globals.page_size
+    page_size = kern.globals.arm_hardware_page_size
     ttentries = (page_size / ARM64_TTE_SIZE)
     return page_size * (ttentries ** (3 - level) * (ttentries - 1))
 
 def PmapIndexDivideARM64(level):
     assert level >= 1 and level <= 3
-    page_size = kern.globals.page_size
+    page_size = kern.globals.arm_hardware_page_size
     ttentries = (page_size / ARM64_TTE_SIZE)
     return page_size * (ttentries ** (3 - level))
 
@@ -603,7 +714,7 @@ def PmapDecodeTTEARM64(tte, level):
 def PmapWalkARM64(pmap, vaddr, verbose_level = vHUMAN):
     assert(type(pmap) == core.cvalue.value)
     assert(type(vaddr) in (long, int))
-    page_size = kern.globals.page_size
+    page_size = kern.globals.arm_hardware_page_size
     page_offset_mask = (page_size - 1)
     page_base_mask = ((1 << ARM64_VMADDR_BITS) - 1) & (~page_offset_mask)
 
@@ -699,7 +810,7 @@ def PmapWalkARM64(pmap, vaddr, verbose_level = vHUMAN):
 
 def PmapWalk(pmap, vaddr, verbose_level = vHUMAN):
     if kern.arch == 'x86_64':
-        return PmapWalkX86_64(pmap, vaddr)
+        return PmapWalkX86_64(pmap, vaddr, verbose_level)
     elif kern.arch == 'arm':
         return PmapWalkARM(pmap, vaddr, verbose_level)
     elif kern.arch == 'arm64':
@@ -710,7 +821,7 @@ def PmapWalk(pmap, vaddr, verbose_level = vHUMAN):
 @lldb_command('pmap_walk')
 def PmapWalkHelper(cmd_args=None):
     """ Perform a page-table walk in <pmap> for <virtual_address>.
-        Syntax: (lldb) pmap_walk <pmap> <virtual_address> [-v]
+        Syntax: (lldb) pmap_walk <pmap> <virtual_address> [-v] [-e]
             Multiple -v's can be specified for increased verbosity
     """
     if cmd_args == None or len(cmd_args) < 2:
index 40f8cfedbd195ab93bce0942825d3740f169d90c..b4c85f9180df44c380d4aa0d1d63de72549e137e 100644 (file)
@@ -1,12 +1,13 @@
 
 """ Please make sure you read the README file COMPLETELY BEFORE reading anything below.
-    It is very critical that you read coding guidelines in Section E in README file. 
+    It is very critical that you read coding guidelines in Section E in README file.
 """
 
 from xnu import *
 import sys, shlex
 from utils import *
 from core.lazytarget import *
+import time
 import xnudefines
 
 def GetProcNameForTask(task):
@@ -184,16 +185,27 @@ def GetASTSummary(ast):
                           0x1000:'G', 0x2000:'T', 0x4000:'T', 0x8000:'T', 0x10000:'S'}
     state_str = ''
     mask = 0x1
-    while mask <= 0x10000 :
+    while mask <= 0x10000:
         state_str += thread_state_chars[int(state & mask)]
         mask = mask << 1
-    
+
     return state_str
 
 
+@lldb_type_summary(['kcdata_descriptor *', 'kcdata_descriptor_t'])
+@header("{0: <20s} {1: <20s} {2: <20s} {3: <10s} {4: <5s}".format("kcdata_descriptor", "begin_addr", "cur_pos", "size", "flags"))
+def GetKCDataSummary(kcdata):
+    """ Summarizes kcdata_descriptor structure
+        params: kcdata: value - value object representing kcdata_descriptor
+        returns: str - summary of the kcdata object
+    """
+    format_string = "{0: <#020x} {1: <#020x} {2: <#020x} {3: <10d} {4: <#05x}"
+    return format_string.format(kcdata, kcdata.kcd_addr_begin, kcdata.kcd_addr_end, kcdata.kcd_length, kcdata.kcd_flags)
+
+
 @lldb_type_summary(['task', 'task_t'])
 @header("{0: <20s} {1: <20s} {2: <20s} {3: >5s} {4: <5s}".format("task","vm_map", "ipc_space", "#acts", "flags"))
-def GetTaskSummary(task):
+def GetTaskSummary(task, showcorpse=False):
     """ Summarizes the important fields in task structure.
         params: task: value - value object representing a task in kernel
         returns: str - summary of the task
@@ -214,11 +226,20 @@ def GetTaskSummary(task):
             task_flags += 'D'
         if int(tib.iit_assertcnt) > 0:
             task_flags += 'B'
+
+    # check if corpse flag is set
+    if unsigned(task.t_flags) & 0x20:
+        task_flags += 'C'
+    if unsigned(task.t_flags) & 0x40:
+        task_flags += 'P'
+
     out_string += format_string.format(task, task.map, task.itk_space, thread_count, task_flags)
+    if showcorpse is True and unsigned(task.corpse_info) != 0:
+        out_string += " " + GetKCDataSummary(task.corpse_info)
     return out_string
 
 @lldb_type_summary(['thread *', 'thread_t'])
-@header("{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <6s} {5: <15s} {6: <15s} {7: <8s} {8: <12s} {9: <32s} {10: <20s} {11: <20s} {12: <20s}".format('thread', 'thread_id', 'processor', 'base', 'pri', 'sched_mode', 'io_policy', 'state', 'ast', 'wait_queue', 'wait_event', 'wmesg', 'thread_name'))
+@header("{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <6s} {5: <15s} {6: <15s} {7: <8s} {8: <12s} {9: <32s} {10: <20s} {11: <20s} {12: <20s}".format('thread', 'thread_id', 'processor', 'base', 'pri', 'sched_mode', 'io_policy', 'state', 'ast', 'waitq', 'wait_event', 'wmesg', 'thread_name'))
 def GetThreadSummary(thread):
     """ Summarize the thread structure. It decodes the wait state and waitevents from the data in the struct.
         params: thread: value - value objecte representing a thread in kernel
@@ -232,6 +253,7 @@ def GetThreadSummary(thread):
         H - Terminated
         A - Terminated and on termination queue
         I - Idle thread
+        C - Crashed thread
 
         policy flags:
         B - darwinbg
@@ -248,7 +270,7 @@ def GetThreadSummary(thread):
     thread_id = hex(thread.thread_id)
     thread_name = ''
     processor = hex(thread.last_processor)
-    base_priority = str(int(thread.priority))
+    base_priority = str(int(thread.base_pri))
     sched_priority = str(int(thread.sched_pri))
     sched_mode = ''
     mode = str(thread.sched_mode)
@@ -302,7 +324,10 @@ def GetThreadSummary(thread):
     while mask <= 0x80 :
         state_str += thread_state_chars[int(state & mask)]
         mask = mask << 1
-        
+    
+    if int(thread.inspection):
+        state_str += 'C'
+
     ast = int(thread.ast) | int(thread.reason)
     ast_str = GetASTSummary(ast)
     
@@ -312,7 +337,7 @@ def GetThreadSummary(thread):
     wait_message = ''
     if ( state & 0x1 ) != 0:
         #we need to look at the waitqueue as well
-        wait_queue_str = str("{0: <#020x}".format(int(hex(thread.wait_queue), 16)))
+        wait_queue_str = str("{0: <#020x}".format(int(hex(thread.waitq), 16)))
         wait_event_str = str("{0: <#020x}".format(int(hex(thread.wait_event), 16)))
         wait_event_str_sym = kern.Symbolicate(int(hex(thread.wait_event), 16))
         if len(wait_event_str_sym) > 0:
@@ -324,23 +349,212 @@ def GetThreadSummary(thread):
             
     out_string += format_string.format(thread_ptr_str, thread_id, processor, base_priority, sched_priority, sched_mode, io_policy_str, state_str, ast_str, wait_queue_str, wait_event_str, wait_message, thread_name)
     return out_string
-    
 
-@lldb_type_summary(['coalition_t', 'coalition'])
-@header("{:>18s} {:>10s} {:>8s} {:>8s} {:>8s} {:>8s}".format("coalition", "id", "refcount", "active", "focal", "nonfocal"))
-def GetCoalitionSummary(coal):
-    out_string = ""
-    format_string = '{:>#018x} {:>10d} {:>8d} {:>8d} {:>8d} {:>8d}'
 
-    flags_string = ''
+def GetTaskRoleString(role):
+    role_strs = {
+                 0 : "TASK_UNSPECIFIED",
+                 1 : "TASK_FOREGROUND_APPLICATION",
+                 2 : "TASK_BACKGROUND_APPLICATION",
+                 3 : "TASK_CONTROL_APPLICATION",
+                 4 : "TASK_GRAPHICS_SERVER",
+                 5 : "TASK_THROTTLE_APPLICATION",
+                 6 : "TASK_NONUI_APPLICATION",
+                 7 : "TASK_DEFAULT_APPLICATION",
+                }
+    return role_strs[int(role)]
+
+def GetCoalitionFlagString(coal):
+    flags = []
+    if (coal.privileged):
+        flags.append('privileged')
+    if (coal.termrequested):
+        flags.append('termrequested')
     if (coal.terminated):
-        flags_string += ' terminated'
+        flags.append('terminated')
     if (coal.reaped):
-        flags_string += ' reaped'
-    out_string += format_string.format(coal, coal.id, coal.ref_count, coal.active_count, coal.focal_tasks_count, coal.non_focal_tasks_count)
+        flags.append('reaped')
+    if (coal.notified):
+        flags.append('notified')
+    return "|".join(flags)
+
+def GetCoalitionTasks(queue, coal_type, thread_details=False):
+    sfi_strs = {
+                 0x0  : "SFI_CLASS_UNSPECIFIED",
+                 0x1  : "SFI_CLASS_DARWIN_BG",
+                 0x2  : "SFI_CLASS_APP_NAP",
+                 0x3  : "SFI_CLASS_MANAGED_FOCAL",
+                 0x4  : "SFI_CLASS_MANAGED_NONFOCAL",
+                 0x5  : "SFI_CLASS_DEFAULT_FOCAL",
+                 0x6  : "SFI_CLASS_DEFAULT_NONFOCAL",
+                 0x7  : "SFI_CLASS_KERNEL",
+                 0x8  : "SFI_CLASS_OPTED_OUT",
+                 0x9  : "SFI_CLASS_UTILITY",
+                 0xA  : "SFI_CLASS_LEGACY_FOCAL",
+                 0xB  : "SFI_CLASS_LEGACY_NONFOCAL",
+                 0xC  : "SFI_CLASS_USER_INITIATED_FOCAL",
+                 0xD  : "SFI_CLASS_USER_INITIATED_NONFOCAL",
+                 0xE  : "SFI_CLASS_USER_INTERACTIVE_FOCAL",
+                 0xF  : "SFI_CLASS_USER_INTERACTIVE_NONFOCAL",
+                 0x10 : "SFI_CLASS_MAINTENANCE",
+                }
+    tasks = []
+    field_name = 'task_coalition'
+    for task in IterateLinkageChain(queue, 'task *', field_name, coal_type * sizeof('queue_chain_t')):
+        task_str = "({0: <d},{1: #x}, {2: <s}, {3: <s})".format(GetProcPIDForTask(task),task,GetProcNameForTask(task),GetTaskRoleString(task.effective_policy.t_role))
+        if thread_details:
+            for thread in IterateQueue(task.threads, "thread_t", "task_threads"):
+                task_str += "\n\t\t\t|-> thread:" + hex(thread) + ", " + sfi_strs[int(thread.sfi_class)]
+        tasks.append(task_str)
+    return tasks
+
+def GetCoalitionTypeString(type):
+    """ Convert a coalition type field into a string
+    Currently supported types (from <mach/coalition.h>):
+        COALITION_TYPE_RESOURCE
+        COALITION_TYPE_JETSAM
+    """
+    if type == 0: # COALITION_TYPE_RESOURCE
+        return 'RESOURCE'
+    if type == 1:
+        return 'JETSAM'
+    return '<unknown>'
+
+def GetResourceCoalitionSummary(coal, verbose=False):
+    """ Summarize a resource coalition
+    """
+    out_string = "Resource Coalition:\n\t  Ledger:\n"
+    thread_details = False
+    if config['verbosity'] > vSCRIPT:
+        thread_details = True
+    ledgerp = coal.r.ledger
+    if verbose and unsigned(ledgerp) != 0:
+        i = 0
+        while i != ledgerp.l_template.lt_cnt:
+            out_string += "\t\t"
+            out_string += GetLedgerEntrySummary(kern.globals.task_ledger_template, ledgerp.l_entries[i], i)
+            i = i + 1
+    out_string += "\t  bytesread {0: <d}\n\t  byteswritten {1: <d}\n\t  gpu_time {2: <d}".format(coal.r.bytesread, coal.r.byteswritten, coal.r.gpu_time)
+    out_string += "\n\t  total_tasks {0: <d}\n\t  dead_tasks {1: <d}\n\t  active_tasks {2: <d}".format(coal.r.task_count, coal.r.dead_task_count, coal.r.task_count - coal.r.dead_task_count)
+    out_string += "\n\t  last_became_nonempty_time {0: <d}\n\t  time_nonempty {1: <d}".format(coal.r.last_became_nonempty_time, coal.r.time_nonempty)
+    out_string += "\n\t  Tasks:\n\t\t"
+    tasks = GetCoalitionTasks(addressof(coal.r.tasks), 0, thread_details)
+    out_string += "\n\t\t".join(tasks)
+    return out_string
+
+def GetJetsamCoalitionSummary(coal, verbose=False):
+    out_string = "Jetsam Coalition:"
+    thread_details = False
+    if config['verbosity'] > vSCRIPT:
+        thread_details = True
+    if unsigned(coal.j.leader) == 0:
+        out_string += "\n\t  NO Leader!"
+    else:
+        out_string += "\n\t  Leader:\n\t\t"
+        out_string += "({0: <d},{1: #x}, {2: <s}, {3: <s})".format(GetProcPIDForTask(coal.j.leader),coal.j.leader,GetProcNameForTask(coal.j.leader),GetTaskRoleString(coal.j.leader.effective_policy.t_role))
+    out_string += "\n\t  Extensions:\n\t\t"
+    tasks = GetCoalitionTasks(addressof(coal.j.extensions), 1, thread_details)
+    out_string += "\n\t\t".join(tasks)
+    out_string += "\n\t  XPC Services:\n\t\t"
+    tasks = GetCoalitionTasks(addressof(coal.j.services), 1, thread_details)
+    out_string += "\n\t\t".join(tasks)
+    out_string += "\n\t  Other Tasks:\n\t\t"
+    tasks = GetCoalitionTasks(addressof(coal.j.other), 1, thread_details)
+    out_string += "\n\t\t".join(tasks)
+    return out_string
+
+@lldb_type_summary(['coalition_t', 'coalition *'])
+@header("{0: <20s} {1: <15s} {2: <10s} {3: <10s} {4: <10s} {5: <12s} {6: <12s} {7: <20s}".format("coalition", "type", "id", "ref count", "act count", "focal cnt", "nonfocal cnt","flags"))
+def GetCoalitionSummary(coal):
+    if unsigned(coal) == 0:
+        return '{0: <#020x} {1: <15s} {2: <10d} {3: <10d} {4: <10d} {5: <12d} {6: <12d} {7: <s}'.format(0, "", -1, -1, -1, -1, -1, "")
+    out_string = ""
+    format_string = '{0: <#020x} {1: <15s} {2: <10d} {3: <10d} {4: <10d} {5: <12d} {6: <12d} {7: <s}'
+    type_string = GetCoalitionTypeString(coal.type)
+    flag_string = GetCoalitionFlagString(coal)
+    out_string += format_string.format(coal, type_string, coal.id, coal.ref_count, coal.active_count, coal.focal_task_count, coal.nonfocal_task_count, flag_string)
+    return out_string
+
+def GetCoalitionInfo(coal, verbose=False):
+    """ returns a string describing a coalition, including details about the particular coalition type.
+        params:
+            coal : value object representing a coalition in the kernel
+        returns:
+            str : A string describing the coalition.
+    """
+    if unsigned(coal) == 0:
+        return "<null coalition>"
+    typestr = GetCoalitionTypeString(coal.type)
+    flagstr = GetCoalitionFlagString(coal)
+    out_string = ""
+    out_string += "Coalition {c: <#020x}\n\tID {c.id: <d}\n\tType {c.type: <d} ({t: <s})\n\tRefCount {c.ref_count: <d}\n\tActiveCount {c.active_count: <d}\n\tFocal Tasks: {c.focal_task_count: <d}\n\tNon-Focal Tasks: {c.nonfocal_task_count: <d}\n\tFlags {f: <s}\n\t".format(c=coal,t=typestr,f=flagstr)
+    if coal.type == 0: # COALITION_TYPE_RESOURCE
+        out_string += GetResourceCoalitionSummary(coal, verbose)
+    elif coal.type == 1: # COALITION_TYPE_JETSAM
+        out_string += GetJetsamCoalitionSummary(coal, verbose)
+    else:
+        out_string += "Unknown Type"
 
     return out_string
 
+# Macro: showcoalitioninfo
+
+@lldb_command('showcoalitioninfo')
+def ShowCoalitionInfo(cmd_args=None, cmd_options={}):
+    """  Display more detailed information about a coalition
+         Usage: showcoalitioninfo <address of coalition>
+    """
+    verbose = False
+    if config['verbosity'] > vHUMAN:
+        verbose = True
+    if not cmd_args:
+        print "No arguments passed"
+        print ShowCoalitionInfo.__doc__
+        return False
+    coal = kern.GetValueFromAddress(cmd_args[0], 'coalition *')
+    if not coal:
+        print "unknown arguments:", str(cmd_args)
+        return False
+    print GetCoalitionInfo(coal, verbose)
+
+# EndMacro: showcoalitioninfo
+
+# Macro: showallcoalitions
+
+@lldb_command('showallcoalitions')
+def ShowAllCoalitions(cmd_args=None):
+    """  Print a summary listing of all the coalitions
+    """
+    global kern
+    print GetCoalitionSummary.header
+    for c in kern.coalitions:
+        print GetCoalitionSummary(c)
+
+# EndMacro: showallcoalitions
+
+# Macro: showtaskcoalitions
+
+@lldb_command('showtaskcoalitions', 'F:')
+def ShowTaskCoalitions(cmd_args=None, cmd_options={}):
+    """
+    """
+    task_list = []
+    if "-F" in cmd_options:
+        task_list = FindTasksByName(cmd_options["-F"])
+    elif cmd_args:
+        t = kern.GetValueFromAddress(cmd_args[0], 'task *')
+        task_list.append(t)
+    else:
+        raise ArgumentError("No arguments passed")
+
+    if len(task_list) > 0:
+        print GetCoalitionSummary.header
+    for task in task_list:
+        print GetCoalitionSummary(task.coalition[0])
+        print GetCoalitionSummary(task.coalition[1])
+
+# EndMacro: showtaskcoalitions
+
 @lldb_type_summary(['proc', 'proc *'])
 @header("{0: >6s} {1: ^20s} {2: >14s} {3: ^10s} {4: <20s}".format("pid", "process", "io_policy", "wq_state", "command"))
 def GetProcSummary(proc):
@@ -439,17 +653,29 @@ def GetKQueueSummary(kq):
             state_str += ' ' + xnudefines.kq_state_strings[int(state & mask)]
         mask = mask << 1
     out_string += format_string.format(o=kq, st_str=state_str)
+    out_string += "\n" + GetKnoteSummary.header
+    for kn in IterateTAILQ_HEAD(kq.kq_head, 'kn_tqe'):
+        out_string += "\n" + GetKnoteSummary(kn)
     return out_string
 
 @lldb_type_summary(['knote *'])
-@header("{0: <20s}".format('knote'))
+@header("{0: <20s} {1: <10s} {2: <10s} {3: <20s} {4: <20s} {5: <30s}".format('knote', 'ident', 'kev_flags', 'kn_kq', 'filtops', ' status'))
 def GetKnoteSummary(kn):
     """ Summarizes a knote and related information
         returns: str - summary of knote
     """
     out_string = ""
-    format_string = "{o: <#020x}"
-    out_string += format_string.format(o=kn)
+    format_string = "{o: <#020x} {o.kn_kevent.ident: <#010X} {o.kn_kevent.flags: <#010X} {o.kn_kq: <#020X} {ops_str: <20s} {st_str: <30s}"
+    state = unsigned(kn.kn_status)
+    fops_str = kern.Symbolicate(unsigned(kn.kn_fop))
+    mask = 0x1
+    status_desc = ''
+    while mask <= 0x40:
+        if state & mask:
+            status_desc += ' ' + xnudefines.kn_state_strings[int(state & mask)]
+        mask = mask << 1
+
+    out_string += format_string.format(o=kn, st_str=status_desc, ops_str=fops_str)
     return out_string
 
 # Macro: showtask
@@ -596,6 +822,51 @@ def ShowProcFiles(cmd_args=None):
 
 #EndMacro: showprocfiles
 
+
+def GetProcKqueues(proc):
+    filetype_KQUEUE = 5
+
+    proc_filedesc = proc.p_fd
+    proc_lastfile = unsigned(proc_filedesc.fd_lastfile)
+    proc_ofiles = proc_filedesc.fd_ofiles
+
+    queues = list()
+
+    if unsigned(proc_ofiles) == 0:
+        return queues
+
+    count = 0
+
+    while count <= proc_lastfile:
+        if unsigned(proc_ofiles[count]) != 0:
+            proc_fd_flags = proc_ofiles[count].f_flags
+            proc_fd_fglob = proc_ofiles[count].f_fglob
+            proc_fd_ftype = unsigned(proc_fd_fglob.fg_ops.fo_type)
+            if proc_fd_ftype == filetype_KQUEUE:
+                q = Cast(proc_fd_fglob.fg_data, 'struct kqueue *')
+                queues.append(q)
+        count += 1
+
+    return queues
+
+def GetAllKqueues():
+    for t in kern.tasks:
+        if unsigned(t.bsd_info) == 0:
+            continue
+        pval = Cast(t.bsd_info, 'proc *')
+        for kq in GetProcKqueues(pval):
+            yield kq
+
+#Macro: showallkqueues
+@lldb_command('showallkqueues' ,'')
+def ShowAllKqueues(cmd_args=[], cmd_options={}):
+    """ Display a summary of all the kqueues in the system """
+    for kq in GetAllKqueues():
+        print GetKQueueSummary.header
+        print GetKQueueSummary(kq)
+        print "\n\n"
+#EndMacro: showallkqueues
+
 #Macro: showkqueue
 @lldb_command('showkqueue' ,'')
 def ShowKQueue(cmd_args=[], cmd_options={}):
@@ -699,6 +970,40 @@ def ShowAllTTYDevs(cmd_args=[], cmd_options={}):
 
 #EndMacro: showallttydevs
 
+#Macro: dumpthread_terminate_queue
+
+@lldb_command('dumpthread_terminate_queue')
+def DumpThreadTerminateQueue(cmd_args=None):
+    """ Displays the contents of the specified call_entry queue.
+        Usage: dumpthread_terminate_queue 
+    """
+    
+    count = 0
+    print GetThreadSummary.header
+    for th in IterateQueue(addressof(kern.globals.thread_terminate_queue), 'struct thread *',  'q_link'):
+        print GetThreadSummary(th)
+        count += 1
+    print "{0: <d} entries!".format(count)
+
+#EndMacro: dumpthread_terminate_queue
+
+#Macro: dumpcrashed_thread_queue
+
+@lldb_command('dumpcrashed_thread_queue')
+def DumpCrashedThreadsQueue(cmd_args=None):
+    """ Displays the contents of the specified call_entry queue.
+        Usage: dumpcrashed_thread_queue 
+    """
+    
+    count = 0
+    print GetThreadSummary.header
+    for th in IterateQueue(addressof(kern.globals.crashed_threads_queue), 'struct thread *',  'q_link'):
+        print GetThreadSummary(th)
+        count += 1
+    print "{0: <d} entries!".format(count)
+
+#EndMacro: dumpcrashed_thread_queue
+
 #Macro: dumpcallqueue
 
 @lldb_command('dumpcallqueue')
@@ -707,8 +1012,8 @@ def DumpCallQueue(cmd_args=None):
         Usage: dumpcallqueue <queue_head_t *>
     """
     if not cmd_args:
-        print DumpCallQueue.__doc__
-        return
+        raise ArgumentError("Invalid arguments")
+
     print "{0: <18s} {1: <18s} {2: <18s} {3: <64s} {4: <18s}".format('CALL_ENTRY', 'PARAM0', 'PARAM1', 'DEADLINE', 'FUNC')
     callhead = kern.GetValueFromAddress(cmd_args[0], 'queue_head_t *')
     count = 0
@@ -721,55 +1026,8 @@ def DumpCallQueue(cmd_args=None):
 
 #EndMacro: dumpcallqueue
 
-@lldb_command('showallcoalitions')
-def ShowAllCoalitions(cmd_args=None):
-    """  Routine to print a summary listing of all the coalitions
-    """
-    global kern
-    
-    role_strs = {
-                 0 : "TASK_UNSPECIFIED",
-                 1 : "TASK_FOREGROUND_APPLICATION",
-                 2 : "TASK_BACKGROUND_APPLICATION",
-                 3 : "TASK_CONTROL_APPLICATION",
-                 4 : "TASK_GRAPHICS_SERVER",
-                 5 : "TASK_THROTTLE_APPLICATION",
-                 6 : "TASK_NONUI_APPLICATION",
-                 7 : "TASK_DEFAULT_APPLICATION",
-                }
-    
-    sfi_strs = {
-                 0x0  : "SFI_CLASS_UNSPECIFIED",
-                 0x1  : "SFI_CLASS_DARWIN_BG",
-                 0x2  : "SFI_CLASS_APP_NAP",
-                 0x3  : "SFI_CLASS_MANAGED_FOCAL",
-                 0x4  : "SFI_CLASS_MANAGED_NONFOCAL",
-                 0x5  : "SFI_CLASS_DEFAULT_FOCAL",
-                 0x6  : "SFI_CLASS_DEFAULT_NONFOCAL",
-                 0x7  : "SFI_CLASS_KERNEL",
-                 0x8  : "SFI_CLASS_OPTED_OUT",
-                 0x9  : "SFI_CLASS_UTILITY",
-                 0xA  : "SFI_CLASS_LEGACY_FOCAL",
-                 0xB  : "SFI_CLASS_LEGACY_NONFOCAL",
-                 0xC  : "SFI_CLASS_USER_INITIATED_FOCAL",
-                 0xD  : "SFI_CLASS_USER_INITIATED_NONFOCAL",
-                 0xE  : "SFI_CLASS_USER_INTERACTIVE_FOCAL",
-                 0xF  : "SFI_CLASS_USER_INTERACTIVE_NONFOCAL",
-                 0x10 : "SFI_CLASS_MAINTENANCE",
-                }
-    
-
-    print GetCoalitionSummary.header
-    for c in kern.coalitions:
-        print GetCoalitionSummary(c)
-        for task in IterateQueue(c.tasks, "task_t", "coalition_tasks"):
-            print "\t" + hex(task) + " " + GetProcNameForTask(task) + " " + role_strs[int(task.effective_policy.t_role)]
-            for thread in IterateQueue(task.threads, "thread_t", "task_threads"):
-                print "\t\t" + hex(thread) + " " + sfi_strs[int(thread.sfi_class)]
-
-
-@lldb_command('showalltasks') 
-def ShowAllTasks(cmd_args=None):
+@lldb_command('showalltasks','C')
+def ShowAllTasks(cmd_args=None, cmd_options={}):
     """  Routine to print a summary listing of all the tasks
          wq_state -> reports "number of workq threads", "number of scheduled workq threads", "number of pending work items"
          if "number of pending work items" seems stuck at non-zero, it may indicate that the workqueue mechanism is hung
@@ -777,12 +1035,20 @@ def ShowAllTasks(cmd_args=None):
                      NORM  - normal I/O explicitly requested (this is the default)
                      PASS  - passive I/O requested (i.e. I/Os do not affect throttling decisions)
                      THROT - throttled I/O requested (i.e. thread/task may be throttled after each I/O completes)
+         Usage: (lldb) showalltasks -C  : describe the corpse structure
     """
     global kern
-    print GetTaskSummary.header + " " + GetProcSummary.header
+    extra_hdr = ''
+    showcorpse = False
+    if '-C' in cmd_options:
+        showcorpse = True
+        extra_hdr += " " + GetKCDataSummary.header
+
+    print GetTaskSummary.header + extra_hdr + " " + GetProcSummary.header
     for t in kern.tasks:
         pval = Cast(t.bsd_info, 'proc *')
-        print GetTaskSummary(t) +" "+ GetProcSummary(pval)
+        out_str = GetTaskSummary(t, showcorpse) + " " + GetProcSummary(pval)
+        print out_str
     ZombTasks()
 
 @lldb_command('showterminatedtasks') 
@@ -951,6 +1217,38 @@ def SwitchToAct(cmd_args=None):
     if not LazyTarget.GetProcess().SetSelectedThread(lldbthread):
         print "Failed to switch thread."
     return
+
+@lldb_command('switchtoregs')
+def SwitchToRegs(cmd_args=None):
+    """ Routine to switch to a register state.
+        Usage: (lldb) switchtoregs <struct arm_saved_state[64] *>
+        This command creates a fake thread in lldb with the saved register state.
+        Note: This command ONLY works for ARM based kernel setup.
+    """
+    
+    if cmd_args == None or len(cmd_args) < 1:
+        raise ArgumentError("No arguments passed")
+
+    lldb_process = LazyTarget.GetProcess()
+    
+    saved_state = ArgumentStringToInt(cmd_args[0])
+    # any change to this logic requires change in operating_system.py as well
+    fake_thread_id = 0xdead0000 | (saved_state & ~0xffff0000)
+    fake_thread_id = fake_thread_id & 0xdeadffff
+    lldb_process.CreateOSPluginThread(0xdeadbeef, saved_state)
+    lldbthread = lldb_process.GetThreadByID(fake_thread_id)
+    
+    if not lldbthread.IsValid():
+        print "Failed to create thread"
+        return
+
+    lldb_process.selected_thread = lldbthread
+    if not lldb_process.SetSelectedThread(lldbthread):
+        print "Failed to switch thread"
+    print "Switched to Fake thread created from register state at 0x%x" % saved_state
+            
+
+
 # Macro: showallstacks
 @lldb_command('showallstacks')
 def ShowAllStacks(cmd_args=None):
@@ -1018,7 +1316,7 @@ def GetFullBackTrace(frame_addr, verbosity = vHUMAN, prefix = ""):
     # <rdar://problem/12677290> lldb unable to find symbol for _mh_execute_header
     mh_execute_addr = int(lldb_run_command('p/x (uintptr_t *)&_mh_execute_header').split('=')[-1].strip(), 16)
     while frame_ptr and frame_ptr != previous_frame_ptr and bt_count < 128:
-        if (kern.arch != 'arm' and frame_ptr < mh_execute_addr) or (kern.arch == 'arm' and frame_ptr > mh_execute_addr):
+        if (kern.arch not in ('arm', 'arm64') and frame_ptr < mh_execute_addr) or (kern.arch in ('arm', 'arm64') and frame_ptr > mh_execute_addr):
             break
         pc_val = kern.GetValueFromAddress(frame_ptr + kern.ptrsize,'uintptr_t *')
         pc_val = unsigned(dereference(pc_val))
@@ -1173,7 +1471,30 @@ def GetProcessorSummary(processor):
     if processor_state in processor_states:
         processor_state_str = "{0: <11s} ".format(processor_states[processor_state])
 
-    out_str = "Processor {: <#018x} cpu_id {:>#4x} State {:<s}\n".format(processor, int(processor.cpu_id), processor_state_str)
+    processor_recommended_str = ""
+    if int(processor.is_recommended) == 0:
+        processor_recommended_str = " (not recommended)"
+
+    ast = 0
+    preemption_disable = 0
+    preemption_disable_str = ""
+
+    if kern.arch == 'x86_64':
+        cpu_data = kern.globals.cpu_data_ptr[processor.cpu_id]
+        if (cpu_data != 0) :
+            ast = cpu_data.cpu_pending_ast
+            preemption_disable = cpu_data.cpu_preemption_level
+    # On arm64, it's kern.globals.CpuDataEntries[processor.cpu_id].cpu_data_vaddr
+    # but LLDB can't find CpuDataEntries...
+
+    ast_str = GetASTSummary(ast)
+
+    if (preemption_disable != 0) :
+        preemption_disable_str = "Preemption Disabled"
+
+    out_str = "Processor {: <#018x} cpu_id {:>#4x} AST: {:<6s} State {:<s}{:<s} {:<s}\n".format(
+            processor, int(processor.cpu_id), ast_str, processor_state_str, processor_recommended_str,
+            preemption_disable_str)
     return out_str   
 
 def GetGroupSetSummary(runq, task_map):
@@ -1198,9 +1519,12 @@ def GetGroupSetSummary(runq, task_map):
             
             out_str += "      Queue [{: <#012x}] Priority {: <3d} count {:d}\n".format(runq_queue_head, runq_queue_i, runq_queue_this_count)
             for entry in IterateQueue(runq_queue_head, "sched_entry_t", "links"):
-                group = entry.group
-                task = task_map.get(unsigned(group), "Unknown task!")
-                out_str += "\tEntry [{: <#012x}] Priority {: <3d} Group {: <#012x} Task {: <#012x}\n".format(unsigned(entry), entry.sched_pri, unsigned(entry.group), unsigned(task))
+                group_addr = unsigned(entry) - (sizeof(dereference(entry)) * unsigned(entry.sched_pri))
+                group = kern.GetValueFromAddress(unsigned(group_addr), 'sched_group_t')
+                task = task_map.get(unsigned(group), 0x0)
+                if task == 0x0 :
+                    print "Cannot find task for group: {: <#012x}".format(group)
+                out_str += "\tEntry [{: <#012x}] Priority {: <3d} Group {: <#012x} Task {: <#012x}\n".format(unsigned(entry), entry.sched_pri, unsigned(group), unsigned(task))
                 
     return out_str
 
@@ -1265,6 +1589,21 @@ def GetGrrrSummary(grrr_runq):
                     out_str += "\t" + GetThreadBackTrace(thread, prefix="\t\t") + "\n"
     return out_str
 
+def ShowNextThread(processor):
+    out_str = ""
+    if (processor.next_thread != 0) :
+        out_str += "      " + "Next thread:\n"
+        out_str += "\t" + GetThreadSummary.header + "\n"
+        out_str += "\t" + GetThreadSummary(processor.next_thread) + "\n"
+    return out_str
+
+def ShowActiveThread(processor):
+    out_str = ""
+    if (processor.active_thread != 0) :
+        out_str += "\t" + GetThreadSummary.header + "\n"
+        out_str += "\t" + GetThreadSummary(processor.active_thread) + "\n"
+    return out_str
+
 @lldb_command('showallprocessors') 
 def ShowAllProcessors(cmd_args=None):
     """  Routine to print information of all psets and processors
@@ -1275,35 +1614,29 @@ def ShowAllProcessors(cmd_args=None):
     show_priority_runq = 0
     show_priority_pset_runq = 0
     show_group_pset_runq = 0
-    show_fairshare_grrr = 0
-    show_fairshare_list = 0
-    sched_enum_val = kern.globals._sched_enum
+    sched_string = str(kern.globals.sched_current_dispatch.sched_name)
     
-    if sched_enum_val == 1:
+    if sched_string == "traditional":
         show_priority_runq = 1
-        show_fairshare_list = 1
-    elif sched_enum_val == 2:
+    elif sched_string == "traditional_with_pset_runqueue":
         show_priority_pset_runq = 1
-        show_fairshare_list = 1
-    elif sched_enum_val == 4:
+    elif sched_string == "grrr":
         show_grrr = 1
-        show_fairshare_grrr = 1
-    elif sched_enum_val == 5:
+    elif sched_string == "multiq":
         show_priority_runq = 1
         show_group_pset_runq = 1
-        show_fairshare_list = 1
-    elif sched_enum_val == 6:
+    elif sched_string == "dualq":
         show_priority_pset_runq = 1        
         show_priority_runq = 1
-        show_fairshare_list = 1
+    else :
+        print "Unknown sched_string {:s}".format(sched_string)
 
     out_str = ''
     
-    out_str += "Scheduler: {:s} ({:s}, {:d})\n".format(kern.globals.sched_string,
-            kern.Symbolicate(unsigned(kern.globals.sched_current_dispatch)),
-            sched_enum_val)
+    out_str += "Scheduler: {:s} ({:s})\n".format(sched_string,
+            kern.Symbolicate(unsigned(kern.globals.sched_current_dispatch)))
     
-    out_str += "Runnable threads: {:d} Timeshare threads: {:d} Background threads {:d}\n".format(
+    out_str += "Runnable threads: {:d} Timeshare threads: {:d} Background threads: {:d}\n".format(
             kern.globals.sched_run_count, kern.globals.sched_share_count, kern.globals.sched_background_count)    
     
     if show_group_pset_runq:
@@ -1333,11 +1666,14 @@ def ShowAllProcessors(cmd_args=None):
                     task = task_map.get(unsigned(group), "Unknown task!")
                     out_str += "Group {: <#012x} Task {: <#012x}\n".format(unsigned(group), unsigned(task))
                     out_str += GetRunQSummary(group.runq)
-        
+
         out_str += "  Active Processors:\n"
         for processor in IterateQueue(pset.active_queue, "processor_t", "processor_queue"):
             out_str += "    "
             out_str += GetProcessorSummary(processor)
+            out_str += ShowActiveThread(processor)
+            out_str += ShowNextThread(processor)
+
             if show_priority_runq:
                 runq = processor.runq
                 out_str += GetRunQSummary(runq)
@@ -1348,32 +1684,48 @@ def ShowAllProcessors(cmd_args=None):
         out_str += "  Idle Processors:\n"
         for processor in IterateQueue(pset.idle_queue, "processor_t", "processor_queue"):
             out_str += "    " + GetProcessorSummary(processor)
+            out_str += ShowActiveThread(processor)
+            out_str += ShowNextThread(processor)
+
             if show_priority_runq:            
                 out_str += GetRunQSummary(processor.runq)
 
         out_str += "  Idle Secondary Processors:\n"
         for processor in IterateQueue(pset.idle_secondary_queue, "processor_t", "processor_queue"):
             out_str += "    " + GetProcessorSummary(processor)
+            out_str += ShowActiveThread(processor)
+            out_str += ShowNextThread(processor)
+
             if show_priority_runq:            
                 out_str += GetRunQSummary(processor.runq)
         
         pset = pset.pset_list
 
-    out_str += "\nRealtime Queue Count {:d}\n".format(kern.globals.rt_runq.count)
-    for rt_runq_thread in IterateQueue(kern.globals.rt_runq.queue, "thread_t", "links"):
-        out_str += ShowTask([unsigned(rt_runq_thread.task)])
-        out_str += ShowAct([unsigned(rt_runq_thread)])
+    out_str += "\nRealtime Queue ({:<#012x}) Count {:d}\n".format(addressof(kern.globals.rt_runq.queue), kern.globals.rt_runq.count)
+    if kern.globals.rt_runq.count != 0:
+        out_str += "\t" + GetThreadSummary.header + "\n"
+        for rt_runq_thread in IterateQueue(kern.globals.rt_runq.queue, "thread_t", "links"):
+            out_str += "\t" + GetThreadSummary(rt_runq_thread) + "\n"
+
+    out_str += "\nTerminate Queue: ({:<#012x})\n".format(addressof(kern.globals.thread_terminate_queue))
+    first = False
+    for thread in IterateQueue(kern.globals.thread_terminate_queue, "thread_t", "links"):
+        if first:
+            out_str += "\t" + GetThreadSummary.header + "\n"
+            first = True
+        out_str += "\t" + GetThreadSummary(thread) + "\n"
+
+    out_str += "\nCrashed Threads Queue: ({:<#012x})\n".format(addressof(kern.globals.crashed_threads_queue))
+    first = False
+    for thread in IterateQueue(kern.globals.crashed_threads_queue, "thread_t", "links"):
+        if first:
+            out_str += "\t" + GetThreadSummary.header + "\n"
+            first = True
+        out_str += "\t" + GetThreadSummary(thread) + "\n"
+
+    out_str += "\n"
     
     out_str += "\n"
-    if show_fairshare_list:
-        out_str += "Fair Share Queue Count {:d}\n".format(kern.globals.fs_runq.count)
-        for fs_runq_thread in IterateQueue(kern.globals.fs_runq.queue, "thread_t", "links"):
-            out_str += ShowTask([unsigned(fs_runq_thread.task)])
-            out_str += ShowAct([unsigned(rt_runq_thread)])
-    if show_fairshare_grrr:
-        out_str += "Fair Share Queue Count {:d}\n".format(kern.globals.fs_grrr_runq.count)
-        fs_grrr = addressof(kern.globals.fs_grrr_runq)
-        out_str += GetGrrrSummary(fs_grrr)
 
     print out_str
 # EndMacro: showallprocessors
index 45370808328932721adf2b88daeb888f720021f2..91a8df3f166e03419c42b7c9d1a3671911f16426 100644 (file)
@@ -5,31 +5,23 @@ from process import *
 # TODO: write scheduler related macros here
 
 # Macro: showinterrupts
+
 @lldb_command('showinterrupts')
 def ShowInterrupts(cmd_args=None):
     """ Prints IRQ, IPI and TMR counts for each CPU
-    """
-    bcdata = kern.GetValueFromAddress(kern.GetLoadAddressForSymbol('BootCpuData'), 'cpu_data_t *')
-    print "CPU 0 IRQ: {:d}\n".format(bcdata.cpu_stat.irq_ex_cnt)
-    print "CPU 0 IPI: {:d}\n".format(bcdata.cpu_stat.ipi_cnt)
-    print "CPU 0 TMR: {:d}\n".format(bcdata.cpu_stat.timer_cnt)
-    if (kern.globals.machine_info.physical_cpu == 2):
-        if kern.arch == 'arm':
-            cdentries = kern.GetValueFromAddress(kern.GetLoadAddressForSymbol('CpuDataEntries') + 20, 'uintptr_t *')
-            cpu_data_entry = Cast(dereference(cdentries), 'cpu_data_t *')
-            print "CPU 1 IRQ: {:d}\n".format(cpu_data_entry.cpu_stat.irq_ex_cnt)
-            print "CPU 1 IPI: {:d}\n".format(cpu_data_entry.cpu_stat.ipi_cnt)
-            print "CPU 1 TMR: {:d}\n".format(cpu_data_entry.cpu_stat.timer_cnt)
-        elif kern.arch == 'arm64':
-                cdentries = kern.GetValueFromAddress(kern.GetLoadAddressForSymbol('CpuDataEntries') + 24, 'uintptr_t *')
-                cpu_data_entry = Cast(dereference(cdentries), 'cpu_data_t *')
-                print "CPU 1 IRQ: {:d}\n".format(cpu_data_entry.cpu_stat.irq_ex_cnt)
-                print "CPU 1 IPI: {:d}\n".format(cpu_data_entry.cpu_stat.ipi_cnt)
-                print "CPU 1 TMR: {:d}\n".format(cpu_data_entry.cpu_stat.timer_cnt)
-
+    """ 
+    base_address = kern.GetLoadAddressForSymbol('CpuDataEntries')
+    struct_size = 16  
+    for x in range (0, unsigned(kern.globals.machine_info.physical_cpu)):
+        element  = kern.GetValueFromAddress(base_address + (x * struct_size), 'uintptr_t *')[1]
+        cpu_data_entry = Cast(element, 'cpu_data_t *')
+        print "CPU {} IRQ: {:d}\n".format(x, cpu_data_entry.cpu_stat.irq_ex_cnt)
+        print "CPU {} IPI: {:d}\n".format(x, cpu_data_entry.cpu_stat.ipi_cnt)
+        print "CPU {} TMR: {:d}\n".format(x, cpu_data_entry.cpu_stat.timer_cnt)        
 # EndMacro: showinterrupts
 
 # Macro: showactiveinterrupts
+
 @lldb_command('showactiveinterrupts')
 def ShowActiveInterrupts(cmd_args=None):
     """  Prints the interrupts that are unmasked & active with the Interrupt Controller
@@ -64,6 +56,155 @@ def ShowActiveInterrupts(cmd_args=None):
             active = dereference(kern.GetValueFromAddress((current_pointer + aic_him_offset) + (4 * group_count), 'uintptr_t *'))
         else:
             mask = mask << 1
-    
 # EndMacro: showactiveinterrupts
 
+
+@lldb_command('showcurrentabstime')
+def ShowCurremtAbsTime(cmd_args=None):
+    """  Routine to print latest absolute time known to system before being stopped.
+         Usage: showcurrentabstime
+    """
+    pset = addressof(kern.globals.pset0)
+    cur_abstime = 0
+
+    while unsigned(pset) != 0:
+        for processor in IterateQueue(pset.active_queue, "processor_t", "processor_queue"):
+            if unsigned(processor.last_dispatch) > cur_abstime:
+                cur_abstime = unsigned(processor.last_dispatch)
+
+        for processor in IterateQueue(pset.idle_queue, "processor_t", "processor_queue"):
+            if unsigned(processor.last_dispatch) > cur_abstime:
+                cur_abstime = unsigned(processor.last_dispatch)
+
+        for processor in IterateQueue(pset.idle_secondary_queue, "processor_t", "processor_queue"):
+            if unsigned(processor.last_dispatch) > cur_abstime:
+                cur_abstime = unsigned(processor.last_dispatch)
+
+        pset = pset.pset_list
+
+    print "Last dispatch time known: %d MATUs" % cur_abstime
+
+
+@lldb_command('abs2nano')
+def ShowAbstimeToNanoTime(cmd_args=[]):
+    """ convert mach_absolute_time units to nano seconds
+        Usage: (lldb) abs2nano <timestamp in MATUs>
+    """
+    if not cmd_args:
+        raise ArgumentError("Invalid argument")
+    timedata = ArgumentStringToInt(cmd_args[0])
+    print "%d ns" % kern.GetNanotimeFromAbstime(timedata)
+
+ # Macro: showschedhistory
+
+def ShowThreadSchedHistory(thread, most_recent_dispatch):
+    out_str = ""
+    thread_name = ""
+
+    if int(thread.uthread) != 0:
+        uthread = Cast(thread.uthread, 'uthread *')
+        #check for thread name
+        if int(uthread.pth_name) != 0 :
+            th_name_strval = Cast(uthread.pth_name, 'char *')
+            if len(str(th_name_strval)) > 0 :
+                thread_name = str(th_name_strval)
+
+    task = thread.task
+    task_name = "unknown"
+    if task and unsigned(task.bsd_info):
+        p = Cast(task.bsd_info, 'proc *')
+        task_name = str(p.p_name)
+
+    sched_mode = ""
+
+    mode = str(thread.sched_mode)
+    if "TIMESHARE" in mode:
+        sched_mode+="timeshare"
+    elif "FIXED" in mode:
+        sched_mode+="fixed"
+    elif "REALTIME" in mode:
+        sched_mode+="realtime"
+
+    if (unsigned(thread.bound_processor) != 0):
+        sched_mode+="-bound"
+
+    # TH_SFLAG_THROTTLED
+    if (unsigned(thread.sched_flags) & 0x0004):
+        sched_mode+="-BG"
+
+    state = thread.state
+
+    thread_state_chars = {0x0:'', 0x1:'W', 0x2:'S', 0x4:'R', 0x8:'U', 0x10:'H', 0x20:'A', 0x40:'P', 0x80:'I'}
+    state_str = ''
+    mask = 0x1
+    while mask <= 0x80 :
+        state_str += thread_state_chars[int(state & mask)]
+        mask = mask << 1
+
+    last_on = thread.computation_epoch
+    last_off = thread.last_run_time
+
+    time_on_abs = unsigned(last_off - last_on)
+    time_on_us = kern.GetNanotimeFromAbstime(time_on_abs) / 1000.0
+
+    time_since_off_abs = unsigned(most_recent_dispatch - last_off)
+    time_since_off_us = kern.GetNanotimeFromAbstime(time_since_off_abs) / 1000.0
+    time_since_on_abs = unsigned(most_recent_dispatch - last_on)
+    time_since_on_us = kern.GetNanotimeFromAbstime(time_since_on_abs) / 1000.0
+
+    fmt  = "0x{t:<16x} 0x{t.thread_id:<8x} {t.computation_epoch:16d} {t.last_run_time:16d} {time_on_us:16.3f} {time_since_off_us:16.3f} {time_since_on_us:16.3f}"
+    fmt2 = " {t.base_pri:2d} {t.sched_pri:2d} {t.task_priority:2d} {t.max_priority:2d} {sched_mode:19s}"
+    fmt3 = " {state:9s} {t.cpu_usage:10d} {t.cpu_delta:10d} {t.sched_usage:10d} {t.sched_stamp:10d} {t.pri_shift:10d} {name:s} {thread_name:s}"
+
+    out_str = fmt.format(t=thread, sched_mode=sched_mode, time_on_us=time_on_us, time_since_off_us=time_since_off_us, time_since_on_us=time_since_on_us)
+    out_str += fmt2.format(t=thread, sched_mode=sched_mode)
+    out_str += fmt3.format(t=thread, state=state_str, name=task_name, thread_name=thread_name)
+
+    return out_str
+
+@lldb_command('showschedhistory')
+def ShowSchedHistory(cmd_args=None):
+    """ Routine to print out thread scheduling history
+    """
+
+    print "Processors: {:d} Runnable threads: {:d} Timeshare threads: {:d} Background threads {:d}\n".format(
+            kern.globals.processor_avail_count, kern.globals.sched_run_count, kern.globals.sched_share_count, kern.globals.sched_background_count)
+
+    print "Mach factor: {:d} Load factor: {:d} Last sched tick {:d}\n".format(
+            kern.globals.sched_mach_factor, kern.globals.sched_load_average, kern.globals.sched_tick_last_abstime)
+
+    print "Sched tick: {:d} Fixed shift: {:d} Pri shift: {:d} Background pri shift {:d}\n".format(
+            kern.globals.sched_tick, kern.globals.sched_fixed_shift, kern.globals.sched_pri_shift, kern.globals.sched_background_pri_shift)
+
+    processor_list = kern.GetGlobalVariable('processor_list')
+
+    most_recent_dispatch = 0
+    current_processor = processor_list
+    while unsigned(current_processor) > 0:
+        active_thread = current_processor.active_thread
+        if unsigned(active_thread) != 0 :
+            task_val = active_thread.task
+            proc_val = Cast(task_val.bsd_info, 'proc *')
+            proc_name = str(proc_val.p_name)
+
+        last_dispatch = unsigned(current_processor.last_dispatch)
+
+        print "Processor last dispatch: {last_dispatch:16d} Active thread: 0x{t:<16x} 0x{t.thread_id:<8x} {proc_name:s}".format(t=active_thread, last_dispatch=last_dispatch, proc_name=proc_name)
+
+        if last_dispatch > most_recent_dispatch :
+            most_recent_dispatch = last_dispatch
+
+        current_processor = current_processor.processor_list
+
+    print "Most recent dispatch: " + str(most_recent_dispatch)
+
+    print "{:<18s} {:<10s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:2s} {:2s} {:2s} {:>2s} {:<19s} {:<9s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>16s} {:>16s}".format(
+            "thread", "id", "on-core", "off-core", "last-duration", "since-off", "since-on", "BP", "SP", "TP", "MP", "sched-mode", "state", "cpu-usage", "delta", "sch-usage", "stamp", "shift", "task", "thread-name")
+
+    for thread in IterateQueue(kern.globals.threads, 'thread *', 'threads'):
+        print ShowThreadSchedHistory(thread, most_recent_dispatch)
+
+    return
+
+# EndMacro: showschedhistory
+
index 21eeb328a72c1441acb07e0cb2503af603bb019f..4b4dd7ae5b7f324d9d05b9eeebc2a8581404a806 100644 (file)
@@ -144,7 +144,7 @@ def ShowThreadUserStack(cmd_args=None):
         ShowARM64UserStack(thread)
     return True
 
-@lldb_command('printuserdata','X')
+@lldb_command('printuserdata','XO:')
 def PrintUserspaceData(cmd_args=None, cmd_options={}):
     """ Read userspace data for given task and print based on format provided.
         Syntax: (lldb) printuserdata <task_t> <uspace_address> <format_specifier>
@@ -152,9 +152,12 @@ def PrintUserspaceData(cmd_args=None, cmd_options={}):
             <task_t> : pointer to task
             <uspace_address> : address to user space memory
             <format_specifier> : String representation for processing the data and printing it.
-                                 e.g Q -> unsigned long long, q-> long long, I-> unsigned int, i->int
+                                 e.g Q -> unsigned long long, q -> long long, I -> unsigned int, i -> int
+                                 10i -> 10 ints, 20s -> 20 character string, s -> null terminated string
+                                 See: https://docs.python.org/2/library/struct.html#format-characters
         options:
             -X : print all values in hex.
+            -O <file path>: Save data to file 
     """
 
     if not cmd_args or len(cmd_args) < 3:
@@ -163,6 +166,10 @@ def PrintUserspaceData(cmd_args=None, cmd_options={}):
     uspace_addr = ArgumentStringToInt(cmd_args[1])
     format_specifier_str = cmd_args[2]
     user_data_len = 0
+    if format_specifier_str == "s":
+        print "0x%x: " % uspace_addr + GetUserspaceString(task, uspace_addr)
+        return True
+
     try:
         user_data_len = struct.calcsize(format_specifier_str)
     except Exception, e:
@@ -172,15 +179,75 @@ def PrintUserspaceData(cmd_args=None, cmd_options={}):
     if not user_data_string:
         print "Could not read any data from userspace address."
         return False
+    if "-O" in cmd_options:
+        fh = open(cmd_options["-O"],"w")
+        fh.write(user_data_string)
+        fh.close()
+        print "Written %d bytes to %s." % (user_data_len, cmd_options['-O'])
+        return True
     upacked_data = struct.unpack(format_specifier_str, user_data_string)
+    element_size = user_data_len / len(upacked_data)
     for i in range(len(upacked_data)):
         if "-X" in cmd_options:
-            print "%d: " % i + hex(upacked_data[i])
+            print "0x%x: " % (uspace_addr + i*element_size) + hex(upacked_data[i])
         else:
-            print "%d: " % i + str(upacked_data[i])
+            print "0x%x: " % (uspace_addr + i*element_size) + str(upacked_data[i])
 
     return True
 
+
+@lldb_command('showtaskuserargs')
+def ShowTaskUserArgs(cmd_args=None, cmd_options={}):
+    """ Read the process argv, env, and apple strings from the user stack
+        Syntax: (lldb) showtaskuserargs <task_t>
+        params:
+            <task_t> : pointer to task
+    """
+    if not cmd_args or len(cmd_args) != 1:
+        raise ArgumentError("Insufficient arguments")
+
+    task = kern.GetValueFromAddress(cmd_args[0], 'task *')
+    proc = Cast(task.bsd_info, 'proc *')
+
+    format_string = "Q" if kern.ptrsize == 8 else "I"
+
+    string_area_size = proc.p_argslen
+    string_area_addr = proc.user_stack - string_area_size
+
+    string_area = GetUserDataAsString(task, string_area_addr, string_area_size)
+    if not string_area:
+        print "Could not read any data from userspace address."
+        return False
+
+    i = 0
+    pos = string_area_addr - kern.ptrsize
+
+    for name in ["apple", "env", "argv"] :
+        while True:
+            if name == "argv" :
+                if i == proc.p_argc:
+                    break
+                i += 1
+
+            pos -= kern.ptrsize
+
+            user_data_string = GetUserDataAsString(task, pos, kern.ptrsize)
+            ptr = struct.unpack(format_string, user_data_string)[0]          
+
+            if ptr == 0:
+                break
+
+            if string_area_addr <= ptr and ptr < string_area_addr+string_area_size :
+                string_offset = ptr - string_area_addr
+                string = string_area[string_offset:];
+            else:
+                string = GetUserspaceString(task, ptr)
+
+            print name + "[]: " + string
+
+    return True
+
+
 @lldb_command('showtaskuserstacks')
 def ShowTaskUserStacks(cmd_args=None):
     """ Print out the user stack for each thread in a task, followed by the user libraries.
@@ -367,35 +434,35 @@ def _ExtractDataFromString(strdata, offset, data_type, length=0):
         return 0
     return struct.unpack(unpack_str, strdata[offset:(offset + length)])[0]
 
-def GetPathForImage(task, path_address):
+def GetUserspaceString(task, string_address):
     """ Maps 32 bytes at a time and packs as string
         params:
             task: obj - referencing task to read data from
-            path_address: int - address where the image path is stored
+            string_address: int - address where the image path is stored
         returns:
             str - string path of the file. "" if failed to read.
     """
     done = False
     retval = ""
 
-    if path_address == 0:
+    if string_address == 0:
         done = True
 
     while not done:
-        path_str_data = GetUserDataAsString(task, path_address, 32)
-        if len(path_str_data) == 0:
+        str_data = GetUserDataAsString(task, string_address, 32)
+        if len(str_data) == 0:
             break
         i = 0
         while i < 32:
-            if ord(path_str_data[i]):
-                retval += path_str_data[i]
+            if ord(str_data[i]):
+                retval += str_data[i]
             else:
                 break
             i += 1
         if i < 32:
             done = True
         else:
-            path_address += 32
+            string_address += 32
     return retval
 
 def GetImageInfo(task, mh_image_address, mh_path_address, approx_end_address=None):
@@ -455,7 +522,7 @@ def GetImageInfo(task, mh_image_address, mh_path_address, approx_end_address=Non
             found_uuid_data = True
             uuid_out_string = "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X}".format(a=uuid_data)
             #also print image path
-            path_out_string = GetPathForImage(task, mh_path_address)
+            path_out_string = GetUserspaceString(task, mh_path_address)
             path_base_name = path_out_string.split("/")[-1]
             retval = print_format.format(mh_image_address, image_end_load_address, path_base_name, uuid_out_string, path_out_string)
         elif lc_cmd == 0xe:
@@ -465,7 +532,7 @@ def GetImageInfo(task, mh_image_address, mh_path_address, approx_end_address=Non
         lc_idx += 1
 
     if not found_uuid_data:
-        path_out_string = GetPathForImage(task, mh_path_address)
+        path_out_string = GetUserspaceString(task, mh_path_address)
         path_base_name = path_out_string.split("/")[-1]
         uuid_out_string = ""
 
@@ -642,7 +709,7 @@ def ShowTaskUserDyldInfo(cmd_args=None):
     dyld_all_imfo_infos_slide = (dyld_all_image_infos_address - dyld_all_image_infos_dyldAllImageInfosAddress)
     dyld_all_image_infos_dyldVersion_postslide = (dyld_all_image_infos_dyldVersion + dyld_all_imfo_infos_slide)
 
-    path_out = GetPathForImage(task, dyld_all_image_infos_dyldVersion_postslide)
+    path_out = GetUserspaceString(task, dyld_all_image_infos_dyldVersion_postslide)
     out_str += "[dyld-{:s}]\n".format(path_out)
     out_str += "version \t\t\t\t: {:d}\n".format(dyld_all_image_infos_version)
     out_str += "infoArrayCount \t\t\t\t: {:d}\n".format(dyld_all_image_infos_infoArrayCount)
@@ -671,7 +738,7 @@ def ShowTaskUserDyldInfo(cmd_args=None):
 
     out_str += "errorMessage \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_errorMessage)
     if dyld_all_image_infos_errorMessage != 0:
-        out_str += GetPathForImage(task, dyld_all_image_infos_errorMessage)
+        out_str += GetUserspaceString(task, dyld_all_image_infos_errorMessage)
 
     out_str += "terminationFlags \t\t\t: {:#x}\n".format(dyld_all_image_infos_terminationFlags)
     out_str += "coreSymbolicationShmPage \t\t: {:#x}\n".format(dyld_all_image_infos_coreSymbolicationShmPage)
@@ -713,17 +780,17 @@ def ShowTaskUserDyldInfo(cmd_args=None):
         out_str += "errorClientOfDylibPath \t\t\t: {:#x}\n".format(dyld_all_image_infos_errorClientOfDylibPath)
         if dyld_all_image_infos_errorClientOfDylibPath != 0:
             out_str += "\t\t\t\t"
-            out_str += GetPathForImage(task, dyld_all_image_infos_errorClientOfDylibPath)
+            out_str += GetUserspaceString(task, dyld_all_image_infos_errorClientOfDylibPath)
             out_str += "\n"
         out_str += "errorTargetDylibPath \t\t\t: {:#x}\n".format(dyld_all_image_infos_errorTargetDylibPath)
         if dyld_all_image_infos_errorTargetDylibPath != 0:
             out_str += "\t\t\t\t"
-            out_str += GetPathForImage(task, dyld_all_image_infos_errorTargetDylibPath)
+            out_str += GetUserspaceString(task, dyld_all_image_infos_errorTargetDylibPath)
             out_str += "\n"
         out_str += "errorSymbol \t\t\t\t: {:#x}\n".format(dyld_all_image_infos_errorSymbol)
         if dyld_all_image_infos_errorSymbol != 0:
             out_str += "\t\t\t\t"
-            out_str += GetPathForImage(task, dyld_all_image_infos_errorSymbol)
+            out_str += GetUserspaceString(task, dyld_all_image_infos_errorSymbol)
             out_str += "\n"
 
         if dyld_all_image_infos_version >= 12:
@@ -769,5 +836,51 @@ def ShowOSMalloc(cmd_args=None):
 # EndMacro: showosmalloc
 
 
+@lldb_command('savekcdata', 'T:O:')
+def SaveKCDataToFile(cmd_args=None, cmd_options={}):
+    """ Save the data referred by the kcdata_descriptor structure.
+        options:
+            -T: <task_t> pointer to task if memory referenced is in userstask.
+            -O: <output file path> path to file to save data. default: /tmp/kcdata.<timestamp>.bin
+        Usage: (lldb) savekcdata <kcdata_descriptor_t> -T <task_t> -O /path/to/outputfile.bin
+    """
+    if not cmd_args:
+        raise ArgumentError('Please provide the kcdata descriptor.')
+
+    kcdata = kern.GetValueFromAddress(cmd_args[0], 'kcdata_descriptor_t')
+
+    outputfile = '/tmp/kcdata.{:s}.bin'.format(str(time.time()))
+    task = None
+    if '-O' in cmd_options:
+        outputfile = cmd_options['-O']
+    if '-T' in cmd_options:
+        task = kern.GetValueFromAddress(cmd_options['-T'], 'task_t')
+
+    memory_begin_address = unsigned(kcdata.kcd_addr_begin)
+    memory_size = 16 + unsigned(kcdata.kcd_addr_end) - memory_begin_address
+    flags_copyout = unsigned(kcdata.kcd_flags)
+    if flags_copyout:
+        if not task:
+            raise ArgumentError('Invalid task pointer provided.')
+        memory_data = GetUserDataAsString(task, memory_begin_address, memory_size)
+    else:
+        data_ptr = kern.GetValueFromAddress(memory_begin_address, 'uint8_t *')
+        memory_data = []
+        for i in range(memory_size):
+            memory_data.append(chr(data_ptr[i]))
+            if i % 50000 == 0:
+                print "%d of %d            \r" % (i, memory_size),
+        memory_data = ''.join(memory_data)
+
+    if len(memory_data) != memory_size:
+        print "Failed to read {:d} bytes from address {: <#020x}".format(memory_size, memory_begin_address)
+        return False
+
+    fh = open(outputfile, 'w')
+    fh.write(memory_data)
+    fh.close()
+    print "Saved {:d} bytes to file {:s}".format(memory_size, outputfile)
+    return True
+
 
 
diff --git a/tools/lldbmacros/usertaskgdbserver.py b/tools/lldbmacros/usertaskgdbserver.py
new file mode 100644 (file)
index 0000000..94de936
--- /dev/null
@@ -0,0 +1,29 @@
+from xnu import *
+import logging
+_usertaskdebugging_availabe = False
+try:
+    from usertaskdebugging import userprocess
+    from usertaskdebugging import gdbserver
+    _usertaskdebugging_availabe = True
+except ImportError:
+    pass
+
+def setupLogging(debug_level):
+    log_level = debug_level
+    log_filename = "/tmp/kdbserver.log"
+    logging.basicConfig(level=log_level,
+                      format='%(asctime)s %(module)s %(levelname)s: %(message)s',
+                      datefmt='%Y-%m-%d %H:%M:%S')
+
+
+@lldb_command('beginusertaskdebugging', 'DW')
+def DoUserTaskDebuggingServer(cmd_args = [], cmd_options ={}):
+    """ starts a gdb protocol server that is backed by <task_t> in kernel debugging session.
+        Usage: (lldb) beginusertaskdebugging <task_t>
+        options: -D for debug level logging
+                 -W for warning level logging. 
+        default is error level logging
+    """
+    if not _usertaskdebugging_availabe:
+        print "You do not have the usertask debugging files available. "
+        return
index 5c7ff72c6e2bf7b498c440397be511b18dc7be10..68161d7c3bd46ab696329de6f3756f7cf3529038 100644 (file)
@@ -362,6 +362,22 @@ def loadDSYM(uuid, load_address):
     debuglog(cmd_str)
     lldb.debugger.HandleCommand(cmd_str)
 
+def RunShellCommand(command):
+    """ Run a shell command in subprocess.
+        params: command with arguments to run
+        returns: (exit_code, stdout, stderr)
+    """
+    import shlex, subprocess
+    cmd_args = shlex.split(command)
+    output_str = ""
+    exit_code = 0
+    try:
+        output_str = subprocess.check_output(cmd_args, stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError, e:
+        exit_code = e.returncode
+    finally:
+        return (exit_code, output_str, '')
+
 def dsymForUUID(uuid):
     """ Get dsym informaiton by calling dsymForUUID 
         params: uuid - str - uuid string from executable. eg. 4DD2344C0-4A81-3EAB-BDCF-FEAFED9EB73E
@@ -393,3 +409,15 @@ def debuglog(s):
     if config['debug']:
       print "DEBUG:",s
     return None
+
+def IsAppleInternal():
+    """ check if apple_internal modules are available
+        returns: True if apple_internal module is present
+    """
+    import imp
+    try:
+        imp.find_module("apple_internal")
+        retval = True
+    except ImportError:
+        retval = False
+    return retval
diff --git a/tools/lldbmacros/waitq.py b/tools/lldbmacros/waitq.py
new file mode 100644 (file)
index 0000000..e5914d4
--- /dev/null
@@ -0,0 +1,1064 @@
+from xnu import *
+from utils import *
+from core.configuration import *
+
+import sys
+
+def GetWaitqStateStr(waitq):
+    wq_types = {
+            0: 'INV',
+            1: '???',
+            2: '  Q',
+            3: 'SET'
+    }
+    return wq_types[int(waitq.waitq_type)]
+
+def GetWaitqBitsStr(waitq):
+    out_str = ""
+    if (Cast(waitq.waitq_interlock, 'int') != 0):
+        if waitq.waitq_irq:
+            out_str += '!'
+        else:
+            out_str += '*'
+    if waitq.waitq_fifo:
+        out_str += 'F'
+    if waitq.waitq_prepost:
+        out_str += 'P'
+    if waitq.waitq_irq:
+        out_str += 'I'
+    return out_str
+
+def WaitqTableElemType(e):
+    type = (e.wqte.wqt_bits >> 29) & 0x3
+    wqe_type = {
+            0: 'FREE',
+            1: 'ELEM',
+            2: 'LINK',
+            3: 'RSVD'
+    }
+    return wqe_type[type]
+
+def WaitqTableElemId(e):
+    return e.wqte.wqt_id.id
+
+def WaitqTableElemValid(e):
+    if unsigned(e) == 0:
+        return 0
+    return (e.wqte.wqt_bits & 0x80000000) == 0x80000000
+
+def WaitqTableElemRefcnt(e):
+    return (e.wqte.wqt_bits & 0x1fffffff)
+
+def WaitqTableIdxFromId(id):
+    if hasattr(kern.globals, 'g_wqt_idx_max'):
+        idx = id & unsigned(kern.globals.g_wqt_idx_max)
+    else:
+        # best guess
+        idx = id & 0x000000000003ffff
+    return int(idx)
+
+def WaitqTableGenFromId(id):
+    if hasattr(kern.globals, 'g_wqt_idx_max'):
+        msk = ~unsigned(kern.globals.g_wqt_idx_max)
+    else:
+        # best guess
+        msk = ~0x000000000003ffff
+    shift = 0
+    while (msk & 0x1) == 0:
+        msk >>= 1
+        shift += 1
+    return (unsigned(id) >> shift) & msk
+
+def GetWaitqLink(id):
+    if int(id) == 0:
+        return 0, "NULL link id"
+    idx = WaitqTableIdxFromId(id)
+    if idx >= kern.globals.g_linktable.nelem:
+        return 0, "Invalid waitq link table id: {:d}".format(id)
+    slab_slot = idx / kern.globals.g_linktable.slab_elem;
+    slab = kern.globals.g_linktable.table[int(slab_slot)]
+    if slab == 0:
+        print "Invalid waitq link table id:", str(id), " (invalid slab)"
+    first_elem = Cast(slab, 'wqt_elem *')
+    addr = int(slab) + ((idx - first_elem.wqt_id.idx) * int(kern.globals.g_linktable.elem_sz))
+    link = kern.GetValueFromAddress(addr, 'setid_link *')
+    gen = WaitqTableGenFromId(id)
+    warn_str = ''
+    if gen > 0 and link.wqte.wqt_id.generation != gen:
+        warn_str = "WARNING: found idx:{:d}/gen:{:d}, but requested idx:{:d}/gen:{:d}".format(link.wqte.wqt_id.idx, link.wqte.wqt_id.generation, idx, gen)
+        link = 0
+    return link, warn_str
+
+def GetWaitqPrepost(id):
+    idx = WaitqTableIdxFromId(id)
+    if idx > int(kern.globals.g_prepost_table.nelem):
+        warn_str = "Invalid waitq prepost table id {:s}".format(str(id))
+        return 0, warn_str
+    slab_slot = idx / kern.globals.g_prepost_table.slab_elem;
+    slab = kern.globals.g_prepost_table.table[int(slab_slot)]
+    if slab == 0:
+        warn_str = "Invalid waitq prepost table id:", str(id), " (invalid slab)"
+        return 0, warn_str
+    first_elem = Cast(slab, 'wqt_elem *')
+    addr = int(slab) + ((idx - first_elem.wqt_id.idx) * int(kern.globals.g_prepost_table.elem_sz))
+    wqp = kern.GetValueFromAddress(addr, 'wq_prepost *')
+    gen = WaitqTableGenFromId(id)
+    warn_str = ''
+    if gen > 0 and wqp.wqte.wqt_id.generation != gen:
+        warn_str = "WARNING: found idx:{:d}/gen:{:d}, but requested idx:{:d}/gen:{:d}".format(wqp.wqte.wqt_id.idx, wqp.wqte.wqt_id.generation, idx, gen)
+        wqp = 0
+    return wqp, warn_str
+
+
+def GetWaitqSetidString(setid):
+    idx = WaitqTableIdxFromId(setid)
+    gen = WaitqTableGenFromId(setid)
+    # This must match the format used in WaitqSetsFromLink
+    str = "{:>7d}/{:<#14x}".format(unsigned(idx), unsigned(gen))
+    return str
+
+
+def WaitqSetsFromLink(link, sets, depth):
+    if int(link) == 0:
+        sets.append("{: <22s}".format("<link:NULL>"))
+        return
+    if WaitqTableElemType(link) == "ELEM":
+        #sets.append("{: <#18x}".format(unsigned(link.sl_wqs.sl_set)))
+        #sets.append("{:>7d}/{:<#14x}".format(unsigned(id.idx),unsigned(id.generation)))
+        sets.append(GetWaitqSetidString(link.wqte.wqt_id.id))
+        return
+    if depth >= 950:
+        sets.append("{: <22s}".format("!recursion limit!"))
+        return
+    left_link = GetWaitqLink(link.sl_link.sl_left_setid)[0]
+    right_link = GetWaitqLink(link.sl_link.sl_right_setid)[0]
+    WaitqSetsFromLink(left_link, sets, depth + 1)
+    WaitqSetsFromLink(right_link, sets, depth + 1)
+    return
+
+def GetWaitqSets(waitq):
+    sets = []
+    if int(waitq) == 0:
+        return sets
+    if waitq.waitq_set_id == 0:
+        return sets
+    link = GetWaitqLink(waitq.waitq_set_id)[0]
+    WaitqSetsFromLink(link, sets, 0)
+    return sets
+
+def GetFrameString(pc, compact=True):
+    str = GetSourceInformationForAddress(unsigned(pc))
+    if compact:
+        return re.sub(r'.*0x[0-9a-f]+\s+<(\w+)( \+ 0x[0-9a-f]+)*>.*', r'\1', str, re.UNICODE)
+    else:
+        return re.sub(r'.*(0x[0-9a-f]+)\s+<(\w+)( \+ 0x[0-9a-f]+)*>.*', r'\2(\1)', str, re.UNICODE)
+
+@lldb_type_summary(['setid_link', 'setid_link *'])
+@header("{:<18s} {:<18s} {:<19s} {:<10s} {:<1s} {:<4s} {:<10s} {:<20s}".format('addr','id','idx','gen','V','type','refcnt','info'))
+def GetWaitqSetidLinkSummary(link, verbose=False):
+    has_stats = 0
+    if not link:
+        return ""
+    fmt_str = "{l: <#18x} {l.wqte.wqt_id.id: <#18x} {l.wqte.wqt_id.idx: <7d} (->{l.wqte.wqt_next_idx: <7d}) {l.wqte.wqt_id.generation: <#10x} {v: <1s} {t: <4s} {rcnt: <10d} "
+    if hasattr(link, 'sl_alloc_task'):
+        has_stats = 1
+        fmt_str += "owner:{l.sl_alloc_task: <#x}/th:{l.sl_alloc_th: <#x}\n"
+        fmt_str += ' '*87
+        try:
+            pid = GetProcPIDForTask(link.sl_alloc_task)
+        except:
+            pid = unsigned(link.sl_alloc_task.audit_token.val[5])
+        pidnm = ""
+        if pid < 0:
+            pidnm = "DEAD:{:s}".format(GetProcNameForTask(link.sl_alloc_task))
+        else:
+            pidnm += GetProcNameForPid(pid)
+        fmt_str += "      ({:d}/{:s}), ".format(pid, pidnm)
+    type = WaitqTableElemType(link)
+    if type == "ELEM":
+        type = "WQS"
+    v = "F"
+    if WaitqTableElemValid(link):
+        v = "T"
+    refcnt = WaitqTableElemRefcnt(link)
+    out_str = fmt_str.format(l=link, v=v, t=type, rcnt=refcnt)
+    if type == "WQS":
+        out_str += "wqs:{0: <#18x}".format(unsigned(link.sl_wqs.sl_set))
+    elif type == "LINK":
+        lID = link.sl_link.sl_left_setid
+        rID = link.sl_link.sl_right_setid
+        left = GetWaitqLink(lID)[0]
+        right = GetWaitqLink(rID)[0]
+        ltype = "<invalid>"
+        if WaitqTableElemValid(left):
+            ltype = WaitqTableElemType(left)
+            if ltype == "ELEM":
+                ltype = "WQS"
+        rtype = "<invalid>"
+        if WaitqTableElemValid(right):
+            rtype = WaitqTableElemType(right)
+            if rtype == "ELEM":
+                rtype = "WQS"
+        out_str += "left:{:<#x}({:s}), right:{:<#x}({:s})".format(lID, ltype, rID, rtype)
+    if hasattr(link, 'sl_alloc_bt') and unsigned(link.sl_alloc_bt[0]) > 0:
+        fmt_str = "\n{:s}alloc_bt({:d}):[".format(' '*87, link.sl_alloc_ts)
+        f = 0
+        while f < kern.globals.g_nwaitq_btframes:
+            fstr = GetFrameString(link.sl_alloc_bt[f], not verbose)
+            f += 1
+            if f == kern.globals.g_nwaitq_btframes:
+                fmt_str += "{:<s}".format(fstr)
+            else:
+                fmt_str += "{:<s} <- ".format(fstr)
+        fmt_str += "]"
+        out_str += fmt_str
+    if hasattr(link, 'sl_mkvalid_bt') and unsigned(link.sl_mkvalid_bt[0]) > 0:
+        fmt_str = "\n{:s}mkvalid_bt({:d}):[".format(' '*87, link.sl_mkvalid_ts)
+        f = 0
+        while f < kern.globals.g_nwaitq_btframes:
+            fstr = GetFrameString(link.sl_mkvalid_bt[f], not verbose)
+            f += 1
+            if f == kern.globals.g_nwaitq_btframes:
+                fmt_str += "{:<s}".format(fstr)
+            else:
+                fmt_str += "{:<s} <- ".format(fstr)
+        fmt_str += "]"
+        out_str += fmt_str
+    if hasattr(link, 'sl_invalidate_bt') and unsigned(link.sl_invalidate_bt[0]) > 0:
+        fmt_str = "\n{:s}invalidate_bt({:d}):[".format(' '*87, link.sl_invalidate_ts)
+        f = 0
+        while f < kern.globals.g_nwaitq_btframes:
+            fstr = GetFrameString(link.sl_invalidate_bt[f], not verbose)
+            f += 1
+            if f == kern.globals.g_nwaitq_btframes:
+                fmt_str += "{:<s}".format(fstr)
+            else:
+                fmt_str += "{:<s} <- ".format(fstr)
+        fmt_str += "]"
+        out_str += fmt_str
+    return out_str
+
+def PrintWaitqSetidLinkTree(link, verbose, sets, indent=87):
+    if not WaitqTableElemType(link) == "LINK":
+        return
+    lID = link.sl_link.sl_left_setid
+    rID = link.sl_link.sl_right_setid
+    left = GetWaitqLink(lID)[0]
+    right = GetWaitqLink(rID)[0]
+
+    ltype = "<invalid>"
+    if WaitqTableElemValid(left):
+        ltype = WaitqTableElemType(left)
+        if ltype == "ELEM":
+            ltype = "WQS"
+    lstr = "L:{:<#x}({:s})".format(lID, ltype)
+
+    rtype = "<invalid>"
+    if WaitqTableElemValid(right):
+        rtype = WaitqTableElemType(right)
+        if rtype == "ELEM":
+            rtype = "WQS"
+    rstr = "R:{:<#x}({:s})".format(rID, rtype)
+
+    if ltype == "WQS":
+        sets.append(addressof(left.sl_wqs.sl_set.wqset_q))
+    if rtype == "WQS":
+        sets.append(addressof(right.sl_wqs.sl_set.wqset_q))
+
+    print "{:s}`->{:s}, {:s}".format(' '*indent, lstr, rstr)
+    if ltype == "WQS":
+        PrintWaitqSetidLinkTree(right, verbose, sets, indent + len(lstr) + 6);
+    else:
+        print "{:s}`->{:s}, {:s}".format(' '*indent, lstr, rstr)
+        PrintWaitqSetidLinkTree(left, verbose, sets, indent + 4);
+        PrintWaitqSetidLinkTree(right, verbose, sets, indent + len(lstr) + 6)
+    return
+
+# Macro: showsetidlink
+@lldb_command('showsetidlink', "S:FT")
+def ShowSetidLink(cmd_args=None, cmd_options={}):
+    """ Print setid_link structure summary
+
+        Note: you can pass either a complete ID (generation + index), or
+              just the index to the -S argument.
+
+        usage: showsetidlink [-F] [-S ID] [0xaddr]
+            -S {ID} : show the setid link whose ID is {ID}
+            -F      : follow the chain of setid structures
+                      and print a summary of each one
+            -T      : print the tree of setidlinks in table format
+    """
+    link = 0
+    followchain = 0
+    showtree = 0
+    verbose = False
+    if config['verbosity'] > vHUMAN:
+        verbose = True
+    if "-T" in cmd_options:
+        showtree = 1
+    if "-S" in cmd_options:
+        id = unsigned(kern.GetValueFromAddress(cmd_options["-S"], 'uint64_t *'))
+        link, warn_str = GetWaitqLink(id)
+        if not link:
+            if warn_str != '':
+                raise LookupError(warn_str)
+            else:
+                raise ArgumentError("Invalid link ID {:d}({:<#x}".format(id, id))
+    if "-F" in cmd_options:
+        followchain = 1
+    if link == 0:
+        if not cmd_args:
+            raise ArgumentError("Please pass the address of a setid_link object")
+        link = kern.GetValueFromAddress(cmd_args[0], 'setid_link *')
+    if not link:
+        raise ArgumentError("Invalid setid_link {:s}".format(cmd_args[0]))
+
+    print GetWaitqSetidLinkSummary.header
+    print GetWaitqSetidLinkSummary(link, verbose)
+    if followchain == 1:
+        next_id = link.wqte.wqt_next_idx
+        max_elem = int(kern.globals.g_linktable.nelem)
+        if hasattr(kern.globals, 'g_wqt_idx_max'):
+            max_elem = unsigned(kern.globals.g_wqt_idx_max)
+        while link != 0 and next_id < max_elem:
+            link, warn_str = GetWaitqLink(unsigned(next_id))
+            if link != 0:
+                print GetWaitqSetidLinkSummary(link, verbose)
+                next_id = link.wqte.wqt_next_idx
+    if showtree == 1:
+        sets = []
+        print "\nLinkTree:{:<#x}({:s})".format(link.wqte.wqt_id.id, WaitqTableElemType(link))
+        PrintWaitqSetidLinkTree(link, verbose, sets, 9)
+        if len(sets) > 0:
+            print "{:d} Sets:".format(len(sets))
+            for wq in sets:
+                pp_str = GetWaitqPreposts(wq)
+                npreposts = len(pp_str)
+                nps = ""
+                if npreposts > 0:
+                    if npreposts > 1:
+                        nps = "s: "
+                    else:
+                        nps = ": "
+                    nps += ';'.join(pp_str)
+                else:
+                    nps = "s"
+                print "\tWQS:{:<#x} ({:d} prepost{:s})".format(unsigned(wq),npreposts,nps)
+# EndMacro: showsetidlink
+
+
+# Macro: showallsetidlinks
+@lldb_command('showallsetidlinks', 'V:T:S:F:XQ')
+def ShowAllSetidLinks(cmd_args=None, cmd_options={}):
+    """ Dump / summarize all waitq set linktable elements
+
+        usage: showallsetidlinks [options]
+            -V {0,1}  : only show [1 == valid/live links, 0 == invalid links]
+            -T {type} : only display objects of type {type}
+            -S {desc} : only display objects of type {type} which fit {desc}
+                        -T LINK -S {desc} can be:
+                            iL   : Invalid left-link pointer (only)
+                            iR   : Invalid right-link pointer (only)
+                            iLR  : Invalid left+right link pointers
+                            iLRI : Invalid left+right link pointers AND dead allocating process
+                        w/o "-T" -S {desc} can be:
+                            iP   : Invalid / Dead allocating process
+            -F n      : summarize the backtraces at frame level 'n'
+            -X        : cross-check waitq pointers in link table
+            -Q        : be quiet, only summarize
+    """
+    opt_summary = 0
+    opt_type_filt = ""
+    opt_valid_only = 0
+    opt_invalid_only = 0
+    opt_bt_idx = 0
+    opt_cross_check = 0
+    opt_validate_links = 0
+    opt_subtype_filter = 0
+    verbose = False
+    if config['verbosity'] > vHUMAN:
+        verbose = True
+    if "-Q" in cmd_options:
+        opt_summary = 1
+    if "-V" in cmd_options:
+        if int(cmd_options["-V"]) == 1:
+            opt_valid_only = 1
+        elif int(cmd_options["-V"]) == 0:
+            opt_invalid_only = 1
+        else:
+            raise ArgumentError("Invalid parameter to -V '{:s}': expecting 0 or 1".format(cmd_options["-V"]))
+    if "-X" in cmd_options:
+        opt_cross_check = 1
+        nunique_wqs = 0
+        nduplicated_wqs = 0
+        max_wqs_dupes = 0
+    if "-F" in cmd_options:
+        opt_bt_idx = unsigned(cmd_options["-F"])
+        if hasattr(kern.globals, "g_nwaitq_btframes"):
+            if opt_bt_idx >= unsigned(kern.globals.g_nwaitq_btframes):
+                raise ArgumentError("Invalid BT index '{:s}' max:{:d}".format(cmd_options["-F"], unsigned(kern.globals.g_nwaitq_btframes) - 1))
+    if "-T" in cmd_options:
+        opt_type_filt = cmd_options["-T"]
+        if opt_type_filt == "FREE" or opt_type_filt == "RSVD" or opt_type_filt == "LINK":
+            pass
+        elif opt_type_filt == "WQS":
+            opt_type_filt = "ELEM"
+        else:
+            raise ArgumentError("Invalid type filter'{:s}'".format(cmd_options["-T"]))
+    if "-S" in cmd_options:
+        opt_subtype_filter = cmd_options["-S"]
+        if opt_type_filt == "LINK":
+            if not (opt_subtype_filter == "iL" or \
+                    opt_subtype_filter == "iR" or \
+                    opt_subtype_filter == "iLR" or \
+                    opt_subtype_filter == "iLRI"):
+                raise ArgumentError("Invalid LINK sub-type filter \{desc\}: {:s}".format(opt_subtype_filter))
+        elif opt_type_filt == "":
+            if not opt_subtype_filter == "iP":
+                raise ArgumentError("Invalid sub-type filter \{desc\}: {:s}".format(opt_subtype_filter))
+    table = kern.globals.g_linktable
+    nelem = int(table.nelem)
+    wq_ptr = {}
+    bt_summary = {}
+    nfree = 0
+    ninv = 0
+    nwqs = 0
+    nlink = 0
+    nrsvd = 0
+    hdr_str = "Looking through {:d} setid_link objects from g_linktable@{:<#x}".format(nelem, addressof(kern.globals.g_linktable))
+    if opt_type_filt != "" or opt_valid_only != 0:
+        hdr_str += "\n\t`-> for "
+        if opt_valid_only:
+            hdr_str += "valid "
+        else:
+            hdr_str += "all "
+        if opt_type_filt == "":
+            hdr_str += "objects"
+        else:
+            hdr_str += "{:s} objects".format(opt_type_filt)
+    else:
+        if opt_valid_only:
+            hdr_str += "\n\t`-> showing only VALID links"
+        elif opt_invalid_only:
+            hdr_str += "\n\t`-> showing only INVALID links"
+    if opt_subtype_filter != 0:
+        if opt_type_filt != "LINK" and opt_type_filt != "":
+            raise ArgumentError("Subtype (-S {desc}) can only be used with (-T LINK) or no type filter at all")
+        hdr_str += "\n\t`-> filtering {:s} objects through '{:s}'".format(opt_type_filt, opt_subtype_filter)
+    if opt_cross_check:
+        hdr_str += "\n\t`-> cross-checking WQS elements for duplicates"
+    hdr_str += "\n\n"
+    print hdr_str
+    if not opt_summary:
+        print GetWaitqSetidLinkSummary.header
+    id = 0
+    while id < nelem:
+        if id == 0:
+            # Set a generation count to differentiate from an invalid ID
+            first_entry = Cast(kern.globals.g_linktable.table[0], 'wqt_elem *')
+            link = GetWaitqLink(first_entry.wqt_id.id)[0]
+        else:
+            link = GetWaitqLink(id)[0]
+        if not link:
+            print "<<<invalid link:{:d}>>>".format(id)
+            ninv += 1
+        else:
+            lt = WaitqTableElemType(link)
+            isvalid = WaitqTableElemValid(link)
+            inconsistent = 0
+            do_print = not ( (isvalid and opt_invalid_only) or (not isvalid and opt_valid_only) )
+            if do_print and opt_subtype_filter != 0 and lt == "LINK":
+                lID = link.sl_link.sl_left_setid
+                rID = link.sl_link.sl_right_setid
+                left = GetWaitqLink(lID)[0]
+                right = GetWaitqLink(rID)[0]
+                lValid = WaitqTableElemValid(left)
+                rValid = WaitqTableElemValid(right)
+                if opt_subtype_filter == "iL":
+                    if lValid or (not lValid and not rValid):
+                        do_print = False
+                elif opt_subtype_filter == "iR":
+                    if rValid or (not rValid and not lValid):
+                        do_print = False
+                elif opt_subtype_filter == "iLR":
+                    if rValid or lValid:
+                        do_print = False
+                elif opt_subtype_filter == "iLRI" and hasattr(link, 'sl_alloc_task'):
+                    # only print this if both left and right are invalid
+                    # and the allocating task is unknown/dead
+                    do_print = False
+                    is_dead = 0
+                    pid = -1
+                    try:
+                        pid = GetProcPIDForTask(link.sl_alloc_task)
+                    except:
+                        if link.sl_alloc_task:
+                            pid = unsigned(link.sl_alloc_task.audit_token.val[5])
+                    if pid < 0:
+                        is_dead = 1
+                    else:
+                        pidnm = GetProcNameForPid(pid)
+                        if pidnm == "Unknown":
+                            is_dead = 1
+                    if (not rValid) and (not lValid) and is_dead:
+                        do_print = True
+
+            if do_print and opt_type_filt == "" and opt_subtype_filter == "iP" and hasattr(link, 'sl_alloc_task'):
+                # Only print non-free table objects that were allocated by
+                # dead processes
+                do_print = False
+                is_dead = 0
+                pid = -1
+                try:
+                    pid = GetProcPIDForTask(link.sl_alloc_task)
+                except:
+                    if link.sl_alloc_task:
+                        pid = unsigned(link.sl_alloc_task.audit_token.val[5])
+                if pid < 0:
+                    is_dead = 1
+                else:
+                    pidnm = GetProcNameForPid(pid)
+                    if pidnm == "Unknown":
+                        is_dead = 1
+                if is_dead:
+                    do_print = True
+
+            if (opt_type_filt == "" or opt_type_filt == lt) and do_print:
+                if lt == "ELEM":
+                    nwqs += 1
+                elif lt == "LINK":
+                    nlink += 1
+                elif lt == "RSVD":
+                    nrsvd += 1
+                elif lt == "FREE":
+                    nfree += 1
+                else:
+                    ninv += 1
+                    inconsistent = 1
+                if hasattr(link, 'sl_alloc_bt'):
+                    pc = unsigned(link.sl_alloc_bt[opt_bt_idx])
+                    pc_str = str(pc)
+                    if pc > 0:
+                        if pc_str in bt_summary:
+                            bt_summary[pc_str] += 1
+                        else:
+                            bt_summary[pc_str] = 1
+                if not opt_summary:
+                    print GetWaitqSetidLinkSummary(link, verbose)
+                if inconsistent:
+                    ninconsistent += 1
+                    # print out warnings about inconsistent state as we parse
+                    # the list - even if the caller wants a summary
+                    print "[WARNING] inconsistent state in idx: {:d} ({:s} element)".format(link.wqte.wqt_id.idx, lt)
+            if opt_cross_check == 1 and lt == "ELEM":
+                wq = unsigned(addressof(link.sl_wqs.sl_set.wqset_q))
+                if wq in wq_ptr:
+                    wq_ptr[wq].append(id)
+                    l = len(wq_ptr[wq])
+                    if l == 2:
+                        nduplicated_wqs += 1
+                    if l > max_wqs_dupes:
+                        max_wqs_dupes = l
+                else:
+                    wq_ptr[wq] = [ id ]
+                    nunique_wqs += 1
+        id += 1
+        if opt_summary or verbose:
+            if verbose and opt_cross_check:
+                sys.stderr.write('[{:d}|{:d}|{:d}] id: {:d}/{:d}...          \r'.format(nunique_wqs, nduplicated_wqs, max_wqs_dupes, id, nelem))
+            else:
+                sys.stderr.write('id: {:d}/{:d}...          \r'.format(id, nelem))
+
+    nused = nwqs + nlink + nrsvd
+    nfound = nused + nfree + ninv
+    print "\n\nFound {:d} objects: {:d} WQS, {:d} LINK, {:d} RSVD, {:d} FREE".format(nfound, nwqs, nlink, nrsvd, nfree)
+    if (opt_type_filt == "" and opt_valid_only == 0) and (nused != table.used_elem):
+        print"\tWARNING: inconsistent state! Table reports {:d}/{:d} used elem, found {:d}/{:d}".format(table.used_elem, nelem, nused, nfound)
+    if len(bt_summary) > 0:
+        print "Link allocation BT (frame={:d})".format(opt_bt_idx)
+    for k,v in bt_summary.iteritems():
+        print "\t[{:d}] from: {:s}".format(v, GetSourceInformationForAddress(unsigned(k)))
+    if opt_cross_check:
+        print "\n{:d} Duplicated WQS objects:".format(nduplicated_wqs)
+        for wq in wq_ptr:
+            l = len(wq_ptr[wq])
+            if l > 1:
+                print "\tWQS:{:#x} ({:d} {:s}".format(wq, l, str(wq_ptr[wq]))
+# EndMacro: showallsetidlinks
+
+
+# Macro: showallpreposts
+@lldb_command('showallpreposts', 'VQT:F:Y:')
+def ShowAllPreposts(cmd_args=None, cmd_options={}):
+    """ Dump / summarize all waitq prepost linkage elements
+
+        usage: showallpreposts [-V] [-T {type}] [-Y n] [-F n] [-Q]
+            -V        : only show valid / live links
+            -T {type} : only display objects of type {type}
+            -Y {0|1}  : only only show POST objects that are
+                        valid (-Y 1) or invalid (-Y 0)
+            -F n      : summarize the backtraces at frame level 'n'
+            -Q        : be quiet, only summarize
+    """
+    opt_summary = 0
+    opt_type_filt = ""
+    opt_valid_only = 0
+    opt_post_type = -1
+    opt_bt_idx = 0
+    verbose = False
+    if config['verbosity'] > vHUMAN:
+        verbose = True
+    if "-Q" in cmd_options:
+        opt_summary = 1
+    if "-V" in cmd_options:
+        opt_valid_only = 1
+    if "-Y" in cmd_options:
+        opt_post_type = unsigned(cmd_options["-Y"])
+        if opt_post_type != 0 and opt_post_type != 1:
+            raise ArgumentError("Invalid POST obj specifier [-Y %d] (expected 0 or 1)" % cmd_options["-Y"])
+    if "-F" in cmd_options:
+        opt_bt_idx = unsigned(cmd_options["-F"])
+        if hasattr(kern.globals, "g_nwaitq_btframes"):
+            if opt_bt_idx >= unsigned(kern.globals.g_nwaitq_btframes):
+                raise ArgumentError("Invalid BT index '{:s}' max:{:d}".format(cmd_options["-F"], unsigned(kern.globals.g_nwaitq_btframes) - 1))
+    if "-T" in cmd_options:
+        opt_type_filt = cmd_options["-T"]
+        if opt_type_filt == "FREE" or opt_type_filt == "RSVD":
+            pass
+        elif opt_type_filt == "POST":
+            opt_type_filt = "LINK"
+        elif opt_type_filt == "WQ":
+            opt_type_filt = "ELEM"
+        else:
+            raise ArgumentError("Invalid type filter'{:s}'".format(cmd_options["-T"]))
+    table = kern.globals.g_prepost_table
+    nelem = int(table.nelem)
+    bt_summary = {}
+    nfree = 0
+    ninv = 0
+    nwq = 0
+    npost = 0
+    nrsvd = 0
+    hdr_str = "Looking through {:d} objects from g_prepost_table@{:<#x}".format(nelem, addressof(kern.globals.g_prepost_table))
+    if opt_type_filt != "" or opt_valid_only != 0:
+        hdr_str += "\n\t`-> for "
+        if opt_valid_only:
+            hdr_str += "valid "
+        else:
+            hdr_str += "all "
+        if opt_type_filt == "":
+            hdr_str += "objects"
+        else:
+            hdr_str += "{:s} objects".format(cmd_options["-T"])
+    print hdr_str
+    if not opt_summary:
+        print GetWaitqPrepostSummary.header
+    id = 0
+    while id < nelem:
+        wqp = GetWaitqPrepost(id)[0]
+        if wqp == 0:
+            print "<<<invalid prepost:{:d}>>>".format(id)
+            ninv += 1
+        else:
+            lt = WaitqTableElemType(wqp)
+            isvalid = WaitqTableElemValid(wqp)
+            should_count = 1
+            if isvalid and opt_post_type > -1 and lt == "LINK":
+                post_wqp = GetWaitqPrepost(wqp.wqp_post.wqp_wq_id)[0]
+                post_valid = WaitqTableElemValid(post_wqp)
+                if opt_post_type == 0 and post_valid: # only count _invalid_ POST objects
+                    should_count = 0
+                elif opt_post_type == 1 and not post_valid: # only count _valid_ POST objects
+                    should_count = 0
+            if should_count and (opt_type_filt == "" or opt_type_filt == lt) and ((opt_valid_only == 0 or isvalid)):
+                if lt == "ELEM":
+                    nwq += 1
+                elif lt == "LINK":
+                    npost += 1
+                elif lt == "RSVD":
+                    nrsvd += 1
+                elif lt == "FREE":
+                    nfree += 1
+                else:
+                    ninv += 1
+                if hasattr(wqp, 'wqp_alloc_bt'):
+                    pc = unsigned(wqp.wqp_alloc_bt[opt_bt_idx])
+                    pc_str = str(pc)
+                    if pc > 0:
+                        if pc_str in bt_summary:
+                            bt_summary[pc_str] += 1
+                        else:
+                            bt_summary[pc_str] = 1
+                if not opt_summary:
+                    print GetWaitqPrepostSummary(wqp)
+        if verbose:
+            sys.stderr.write('id: {:d}/{:d}...          \r'.format(id, nelem))
+        id += 1
+    nused = nwq + npost + nrsvd
+    nfound = nused + nfree + ninv
+    print "\nFound {:d} objects: {:d} WQ, {:d} POST, {:d} RSVD, {:d} FREE".format(nfound, nwq, npost, nrsvd, nfree)
+    if (opt_type_filt == "" and opt_valid_only == 0) and (nused != table.used_elem):
+        print"\tWARNING: inconsistent state! Table reports {:d}/{:d} used elem, found {:d}/{:d}".format(table.used_elem, nelem, nused, nfound)
+    if len(bt_summary) > 0:
+        print "Link allocation BT (frame={:d})".format(opt_bt_idx)
+    for k,v in bt_summary.iteritems():
+        print "\t[{:d}] from: {:s}".format(v, GetSourceInformationForAddress(unsigned(k)))
+# EndMacro: showallpreposts
+
+
+@lldb_type_summary(['wq_prepost', 'wq_prepost *'])
+@header("{:<18s} {:<18s} {:<19s} {:<10s} {:<1s} {:<4s} {:<10s} {:<20s}".format('addr','id','idx','gen','V','type','refcnt','info'))
+def GetWaitqPrepostSummary(wqp):
+    if not wqp:
+        return
+    fmt_str = "{w: <#18x} {w.wqte.wqt_id.id: <#18x} {w.wqte.wqt_id.idx: <7d} (->{w.wqte.wqt_next_idx: <7d}) {w.wqte.wqt_id.generation: <#10x} {v: <1s} {t: <4s} {rcnt: <10d} "
+    type = WaitqTableElemType(wqp)
+    if type == "ELEM":
+        type = "WQ"
+    elif type == "LINK":
+        type = "POST"
+    v = "F"
+    if WaitqTableElemValid(wqp):
+        v = "T"
+    refcnt = WaitqTableElemRefcnt(wqp)
+    out_str = fmt_str.format(w=wqp, v=v, t=type, rcnt=refcnt)
+    if type == "WQ":
+        out_str += "wq:{0: <#18x}".format(unsigned(wqp.wqp_wq.wqp_wq_ptr))
+    elif type == "POST":
+        out_str += "next:{0: <#18x}, wqid:{1: <#18x}".format(wqp.wqp_post.wqp_next_id, wqp.wqp_post.wqp_wq_id)
+        post_wqp = GetWaitqPrepost(wqp.wqp_post.wqp_wq_id)[0]
+        if not WaitqTableElemValid(post_wqp):
+            out_str += "(<invalid>)"
+        else:
+            if WaitqTableElemType(post_wqp) != "ELEM":
+                out_str += "(!WQP_WQ?)"
+            else:
+                out_str += "({0: <#18x})".format(unsigned(post_wqp.wqp_wq.wqp_wq_ptr))
+    return out_str
+
+
+# Macro: showprepost
+@lldb_command('showprepost', "P:")
+def ShowPrepost(cmd_args=None, cmd_options={}):
+    """ Print prepost structure summary
+
+        Note: you can pass either a complete ID (generation + index), or
+              just the index to the -P argument.
+
+        usage: showprepost [-P ID] [0xaddr]
+            -P {ID} : show prepost structure whose ID is {ID}
+    """
+    wqp = 0
+    if "-P" in cmd_options:
+        wqp, warn_str = GetWaitqPrepost(unsigned(kern.GetValueFromAddress(cmd_options["-P"], 'uint64_t *')))
+        if wqp == 0:
+            if warn_str != '':
+                raise LookupError(warn_str)
+            else:
+                raise ArgumentError("Invalid prepost ID {:s}".format(cmd_options["-P"]))
+    if wqp == 0:
+        if not cmd_args:
+            raise ArgumentError("Please pass the address of a prepost object")
+        wqp = kern.GetValueFromAddress(cmd_args[0], 'wq_prepost *')
+    if not wqp:
+        raise ArgumentError("Invalid prepost {:s}".format(cmd_args[0]))
+
+    print GetWaitqPrepostSummary.header
+    print GetWaitqPrepostSummary(wqp)
+# EndMacro: showprepost
+
+
+def WaitqPrepostFromObj(wqp, head_id, inv_ok, prepost_str, pp_arr = 0, depth = 0):
+    if pp_arr != 0:
+        pp_arr.append(wqp)
+    etype = WaitqTableElemType(wqp)
+    if not WaitqTableElemValid(wqp) and not inv_ok:
+        id = 0
+        if wqp:
+            id = wqp.wqte.wqt_id.id
+        prepost_str.append("{0: <#18x}:{1: <18s}".format(id, "<invalid>"))
+        return
+    if etype == "ELEM": # WQP_WQ
+        prepost_str.append("{0: <#18x}:{1: <#18x}".format(wqp.wqte.wqt_id.id, unsigned(wqp.wqp_wq.wqp_wq_ptr)))
+        return
+
+    post_wq = 0
+
+    if etype == "LINK": # WQP_POST
+        next_id = wqp.wqp_post.wqp_next_id
+        post_wq = GetWaitqPrepost(wqp.wqp_post.wqp_wq_id)[0]
+        if WaitqTableElemValid(post_wq):
+            if WaitqTableElemType(post_wq) != "ELEM":
+                prepost_str.append("{0: <#18x}:{1: <18s}".format(post_wq.wqte.wqt_id.id, "<invalid post>"))
+            else:
+                prepost_str.append("{0: <#18x}:{1: <#18x}".format(wqp.wqte.wqt_id.id, unsigned(post_wq.wqp_wq.wqp_wq_ptr)))
+        if next_id > 0 and next_id != head_id:
+            if depth >= 950:
+                prepost_str.append("{: <37s}".format("!recursion limit!"))
+                return
+            WaitqPrepostFromObj(GetWaitqPrepost(next_id)[0], head_id, inv_ok, prepost_str, pp_arr, depth + 1)
+    else: #  "RSVD" or "FREE":
+        prepost_str.append("{0: <#18x} -> {1: <15d}".format(wqp.wqte.wqt_id.id, wqp.wqte.wqt_next_idx))
+        next_id = wqp.wqte.wqt_next_idx
+        max_elem = int(kern.globals.g_prepost_table.nelem)
+        if hasattr(kern.globals, 'g_wqt_idx_max'):
+            max_elem = unsigned(kern.globals.g_wqt_idx_max)
+        if next_id < max_elem:
+            if depth >= 950:
+                prepost_str.append("{: <37s}".format("!recursion limit!"))
+                return
+            WaitqPrepostFromObj(GetWaitqPrepost(next_id)[0], head_id, inv_ok, prepost_str, pp_arr, depth + 1)
+    return
+
+def GetPrepostChain(head_id, inv_ok = False, pp_arr = 0):
+    pp = []
+    if unsigned(head_id) == 0:
+        return [ "{0: <#18x}:{1: <18s}".format(head_id, "<invalid>") ]
+    wqp = GetWaitqPrepost(head_id)[0]
+    if wqp != 0:
+        WaitqPrepostFromObj(wqp, head_id, inv_ok, pp, pp_arr)
+    else:
+        return [ "{0: <#18x}:{1: <18s}".format(head_id, "<invalid>") ]
+    return pp
+
+def GetWaitqPreposts(waitq):
+    if GetWaitqStateStr(waitq) != "SET":
+        return []
+    wqset = Cast(waitq, 'waitq_set *')
+    if wqset.wqset_prepost_id == 0:
+        return []
+    return GetPrepostChain(wqset.wqset_prepost_id)
+
+
+# Macro: showprepostchain
+@lldb_command('showprepostchain', "P:")
+def ShowPrepostChain(cmd_args=None, cmd_options={}):
+    """ Follow a chain of preposts, printing each one.
+        Note that prepost chains are circular, so this will print
+        the entire chain given a single element.
+
+        Note: you can pass either a complete ID (generation + index), or
+              just the index to the -P argument.
+
+        usage: showprepostchain [-P ID] [0xaddr]
+            -P {ID} : start printing with the prepost whose ID is {ID}
+    """
+    wqp = 0
+    if "-P" in cmd_options:
+        wqp, warn_str = GetWaitqPrepost(unsigned(kern.GetValueFromAddress(cmd_options["-P"], 'uint64_t *')))
+        if wqp == 0:
+            if warn_str != '':
+                raise LookupError(warn_str)
+            else:
+                raise ArgumentError("Invalid prepost ID {:s}".format(cmd_options["-P"]))
+    if wqp == 0:
+        if not cmd_args:
+            raise ArgumentError("Please pass the address of a prepost object")
+        wqp = kern.GetValueFromAddress(cmd_args[0], 'wq_prepost *')
+    if not wqp:
+        raise ArgumentError("Invalid prepost {:s}".format(cmd_args[0]))
+
+    pp_arr = []
+    GetPrepostChain(wqp.wqte.wqt_id.id, True, pp_arr)
+    pp_cnt = len(pp_arr)
+    idx = 0
+    nvalid = 0
+    ninvalid = 0
+    print GetWaitqPrepostSummary.header
+    while idx < pp_cnt:
+        print GetWaitqPrepostSummary(pp_arr[idx])
+        if pp_arr[idx] != 0:
+            type = WaitqTableElemType(pp_arr[idx])
+            if type == "LINK":
+                post_wqp = GetWaitqPrepost(pp_arr[idx].wqp_post.wqp_wq_id)[0]
+                if not WaitqTableElemValid(post_wqp):
+                    ninvalid += 1
+                else:
+                    nvalid += 1
+            else:
+                nvalid += 1
+        idx += 1
+    print "%s" % '-'*86
+    print "Total: {:d} ({:d} valid, {:d} invalid)".format(len(pp_arr), nvalid, ninvalid)
+# EndMacro: showprepostchain
+
+
+@lldb_type_summary(['waitq', 'waitq *'])
+@header("{: <16s} {: <3s} {: <4s} {: <17s} {: <18s} {: <18s} {: <37s} {: <22s} {: <10s}".format('waitq', 'typ', 'bits', 'evtmask', 'setid', 'wq_wqp', 'preposts', 'member_of', 'threads'))
+def GetWaitqSummary(waitq):
+    fmt_str = "{q: <16x} {state: <3s} {bits: <4s} {q.waitq_eventmask: <#17x} {setid: <#18x} {q.waitq_prepost_id: <#18x}"
+    th_str = []
+    if waitq.waitq_queue.next and waitq.waitq_queue.prev:
+        for thread in IterateLinkageChain(addressof(waitq.waitq_queue), 'thread *', 'links'):
+            th_str.append("{: <18s} e:{: <#18x}".format(hex(thread), thread.wait_event))
+    else:
+        th_str.append("{: <39s}".format('<invalid (NULL) queue>'))
+    th_cnt = len(th_str)
+    set_str = GetWaitqSets(waitq)
+    set_cnt = len(set_str)
+    pp_str = GetWaitqPreposts(waitq)
+    pp_cnt = len(pp_str)
+    last_str = ''
+    idx = 0;
+    while idx < pp_cnt or idx < set_cnt or idx < th_cnt:
+        p = ""
+        s = ""
+        t = ""
+        if idx < pp_cnt:
+            p = pp_str[idx]
+        if idx < set_cnt:
+            s = set_str[idx]
+        if idx < th_cnt:
+            t = th_str[idx]
+        if idx == 0:
+            last_str += "{0: <37s} {1: <22s} {2: <39s}".format(p, s, t)
+        else:
+            last_str += "\n{0: <80s} {1: <37s} {2: <22s} {3: <39s}".format('', p, s, t)
+        idx += 1
+    if pp_cnt > 0 or set_cnt > 0 or th_cnt > 0:
+        last_str += "\n{:<80s} {: <37s} {: <22s} {: <39s}".format('', '-'*37, '-'*20, '-'*39)
+        last_str += "\n{0: <80s} {1: <37d} {2: <22d} {3: <39d}".format('', pp_cnt, set_cnt, th_cnt)
+
+    state = GetWaitqStateStr(waitq)
+    setid = 0
+    if state == "SET":
+        setid = Cast(waitq, 'waitq_set *').wqset_id
+    out_str = fmt_str.format(q=waitq, state=state, bits=GetWaitqBitsStr(waitq), setid=setid)
+    out_str += last_str
+    return out_str
+
+# Macro: showwaitq
+@lldb_command('showwaitq', "P:S:")
+def ShowWaitq(cmd_args=None, cmd_options={}):
+    """ Print waitq structure summary.
+        Lookup the waitq either by address, by Set ID, or indirectly
+        through a prepost object that points to the waitq.
+
+        Note: you can pass either a complete ID (generation + index), or
+              just the index to the -P and -S arguments.
+
+        usage: showwaitq [-P PrePostID] [-S SetID] [0xaddr]
+            -P {ID}  : prepost ID that points to a waitq
+            -S {ID}  : waitq_set ID
+    """
+    waitq = 0
+    if "-P" in cmd_options:
+        wqp, warn_str = GetWaitqPrepost(unsigned(kern.GetValueFromAddress(cmd_options["-P"], 'uint64_t *')))
+        if wqp == 0:
+            if warn_str:
+                raise LookupError(warn_str)
+            else:
+                raise ArgumentError("Invalid prepost ID {:s}".format(cmd_options["-P"]))
+        if WaitqTableElemType(wqp) != "ELEM":
+            raise ArgumentError("Prepost ID {:s} points to a WQP_POST object, not a WQP_WQ!".format(cmd_options["-P"]))
+        waitq = wqp.wqp_wq.wqp_wq_ptr
+    if "-S" in cmd_options:
+        if waitq:
+            raise ArgumentError("Please pass only one of '-S' or '-P'!")
+        link, warn_str = GetWaitqLink(unsigned(kern.GetValueFromAddress(cmd_options["-S"],'uint64_t *')))
+        if not link:
+            if warn_str != '':
+                raise LookupError(warn_str)
+            else:
+                raise ArgumentError("Invalid link ID {:s}".format(cmd_options["-S"]))
+        if WaitqTableElemType(link) != "ELEM":
+            raise ArgumentError("Link ID {:s} points to a SLT_LINK object, not an SLT_WQS!".format(cmd_options["-S"]))
+        waitq = addressof(link.sl_wqs.sl_set.wqset_q)
+
+    if not waitq and not cmd_args:
+        raise ArgumentError("Please pass the address of a waitq!")
+    if not waitq:
+        waitq = kern.GetValueFromAddress(cmd_args[0], 'waitq *')
+    if not waitq:
+        raise ("Unknown arguments: %r %r" % (cmd_args, cmd_options))
+    print GetWaitqSummary.header
+    print GetWaitqSummary(waitq)
+# EndMacro: showwaitq
+
+
+# Macro: showglobalwaitqs
+@lldb_command('showglobalwaitqs')
+def ShowGlobalWaitqs(cmd_args=None):
+    """ Summarize global waitq usage
+    """
+    global kern
+    q = 0
+
+    print "Global waitq objects"
+    print GetWaitqSummary.header
+
+    while q < kern.globals.g_num_waitqs:
+        print GetWaitqSummary(addressof(kern.globals.global_waitqs[q]))
+        q = q + 1
+# EndMacro: showglobalwaitqs
+
+
+# Macro: showglobalqstats
+@lldb_command('showglobalqstats', "OF")
+def ShowGlobalQStats(cmd_args=None, cmd_options={}):
+    """ Summarize global waitq statistics
+
+        usage: showglobalqstats [-O] [-F]
+            -O  : only output waitqs with outstanding waits
+            -F  : output as much backtrace as was recorded
+    """
+    global kern
+    q = 0
+
+    if not hasattr(kern.globals, 'g_waitq_stats'):
+        print "No waitq stats support (use DEVELOPMENT kernel)!"
+        return
+
+    print "Global waitq stats"
+    print "{0: <18s} {1: <8s} {2: <8s} {3: <8s} {4: <8s} {5: <8s} {6: <32s}".format('waitq', '#waits', '#wakes', '#diff', '#fails', '#clears', 'backtraces')
+
+    waiters_only = False
+    full_bt = False
+    if "-O" in cmd_options:
+        waiters_only = True
+    if "-F" in cmd_options:
+        full_bt = True
+
+    fmt_str = "{q: <#18x} {stats.waits: <8d} {stats.wakeups: <8d} {diff: <8d} {stats.failed_wakeups: <8d} {stats.clears: <8d} {bt_str: <s}"
+    while q < kern.globals.g_num_waitqs:
+        waitq = kern.globals.global_waitqs[q]
+        stats = kern.globals.g_waitq_stats[q]
+        diff = stats.waits - stats.wakeups
+        if diff == 0 and waiters_only:
+            q = q + 1
+            continue
+        last_waitstr = ''
+        last_wakestr = ''
+        fw_str = ''
+        if (stats.last_wait[0]):
+            last_waitstr = GetSourceInformationForAddress(unsigned(stats.last_wait[0]))
+        if (stats.last_wakeup[0]):
+            last_wakestr = GetSourceInformationForAddress(unsigned(stats.last_wakeup[0]))
+        if (stats.last_failed_wakeup[0]):
+            fw_str = GetSourceInformationForAddress(unsigned(stats.last_failed_wakeup[0]))
+
+        if full_bt:
+            f = 1
+            while f < kern.globals.g_nwaitq_btframes:
+                if stats.last_wait[f]:
+                    last_waitstr = "{0}->{1}".format(GetSourceInformationForAddress(unsigned(stats.last_wait[f])), last_waitstr)
+                if stats.last_wakeup[f]:
+                    last_wakestr = "{0}->{1}".format(GetSourceInformationForAddress(unsigned(stats.last_wakeup[f])), last_wakestr)
+                if stats.last_failed_wakeup[f]:
+                    fw_str = "{0}->{1}".format(GetSourceInformationForAddress(unsigned(stats.last_failed_wakeup[f])), fw_str)
+                f = f + 1
+        bt_str = ''
+        if last_waitstr:
+            bt_str += "wait : " + last_waitstr
+        if last_wakestr:
+            if bt_str:
+                bt_str += "\n{0: <70s} ".format('')
+            bt_str += "wake : " + last_wakestr
+        if fw_str:
+            if bt_str:
+                bt_str += "\n{0: <70s} ".format('')
+            bt_str += "fails: " + fw_str
+
+        print fmt_str.format(q=addressof(waitq), stats=stats, diff=diff, bt_str=bt_str)
+        q = q + 1
+# EndMacro: showglobalqstats
index a3c5b1b5a10486f70c53c448b6c663a2dd2a6f61..bc3830f7c64b4bec30b1c820950ff8b71d4735cc 100644 (file)
@@ -532,33 +532,74 @@ def ShowVersion(cmd_args=None):
     print kern.version
 
 
-@lldb_command('paniclog')
-def ShowPanicLog(cmd_args=None):
+@lldb_command('paniclog', 'S')
+def ShowPanicLog(cmd_args=None, cmd_options={}):
     """ Display the paniclog information
         usage: (lldb) paniclog
         options:
             -v : increase verbosity
+            -S : parse stackshot data (if panic stackshot available)
     """
+    binary_data_bytes_to_skip = 0
+    if hasattr(kern.globals, "kc_panic_data"):
+        binary_data_bytes_to_skip = unsigned(kern.globals.kc_panic_data.kcd_addr_end) - unsigned(kern.globals.kc_panic_data.kcd_addr_begin)
+        if binary_data_bytes_to_skip > 0:
+            binary_data_bytes_to_skip += sizeof("struct kcdata_item")
+        else:
+            binary_data_bytes_to_skip = 0
+
+    if "-S" in cmd_options:
+        if hasattr(kern.globals, "kc_panic_data"):
+            kc_data = unsigned(addressof(kern.globals.kc_panic_data))
+            ts = int(time.time())
+            ss_binfile = "/tmp/panic_%d.bin" % ts
+            ss_ipsfile = "/tmp/stacks_%d.ips" % ts
+            print "savekcdata  0x%x -O %s" % (kc_data, ss_binfile)
+            SaveKCDataToFile(["0x%x" % kc_data], {"-O":ss_binfile})
+            self_path = str(__file__)
+            base_dir_name = self_path[:self_path.rfind("/")]
+            print "python %s/kcdata.py %s -s %s" % (base_dir_name, ss_binfile, ss_ipsfile)
+            (c,so,se) = RunShellCommand("python %s/kcdata.py %s -s %s" % (base_dir_name, ss_binfile, ss_ipsfile))
+            if c == 0:
+                print "Saved ips stackshot file as %s" % ss_ipsfile
+            else:
+                print "Failed to run command: exit code: %d, SO: %s SE: %s" % (c, so, se)
+        else:
+            print "kc_panic_data is unavailable for this kernel config."
+
     panic_buf = kern.globals.debug_buf_addr
     panic_buf_start = unsigned(panic_buf)
     panic_buf_end = unsigned(kern.globals.debug_buf_ptr)
     num_bytes = panic_buf_end - panic_buf_start
     if num_bytes == 0 :
         return
-    warn_str = ""
-    if num_bytes > 4096 and config['verbosity'] == vHUMAN:
-        num_bytes = 4096
-        warn_str = "LLDBMacro Warning: The paniclog is too large. Trimming to 4096 bytes."
-        warn_str += " If you wish to see entire log please use '-v' argument."
     out_str = ""
-    for i in range(num_bytes):
-        p_char = str(panic_buf[i])
+    warn_str = ""
+    num_print_bytes = 0
+    in_binary_data_region = False
+    pos = 0
+    while pos < num_bytes:
+        p_char = str(panic_buf[pos])
         out_str += p_char
         if p_char == '\n':
-            print out_str
+            if not in_binary_data_region:
+                num_print_bytes += 1
+                print out_str
+            if (out_str.find("Data: BEGIN>>") >= 0):
+                in_binary_data_region = True
+                pos += binary_data_bytes_to_skip - 1
+            if (out_str.find("<<END") >= 0):
+                in_binary_data_region = False
             out_str = ""
+        if num_print_bytes > 4096 and config['verbosity'] == vHUMAN:
+            warn_str = "LLDBMacro Warning: The paniclog is too large. Trimming to 4096 bytes."
+            warn_str += " If you wish to see entire log please use '-v' argument."
+            break
+        pos += 1
+
     if warn_str:
         print warn_str
+
     return
 
 @lldb_command('showbootargs')
@@ -711,4 +752,6 @@ from atm import *
 from structanalyze import *
 from ipcimportancedetail import *
 from bank import *
-
+from kauth import *
+from waitq import *
+from usertaskgdbserver import *
index 3d694ac4ec3fd5d346d69fb55a245b48b3ddde96..3491221b80555ff10d0a600fb0c88ce71be885ff 100644 (file)
@@ -23,6 +23,10 @@ arm_level2_access_strings = [ " noaccess",
                              ]
 kq_state_strings = {0:"", 1:"SEL", 2:"SLEEP", 4:"PROCWAIT", 8:"KEV32", 16:"KEV64"}
 
+kn_state_strings = {0:"", 1:"ACTIVE", 2:"QUEUED", 4:"DISABLED", 8:"DROPPING", 16:"USERWAIT", 32:"ATTACHING", 64:"STAYQUED"}
+
+mach_msg_type_descriptor_strings = {0: "PORT", 1: "OOLDESC", 2: "OOLPORTS", 3: "OOLVOLATILE"}
+
 proc_state_strings = [ "", "Idle", "Run", "Sleep", "Stop", "Zombie", "Reaping" ]
 proc_flag_explain_strings = ["!0x00000004 - process is 32 bit",  #only exception that does not follow bit settings
                              "0x00000001 - may hold advisory locks",
@@ -61,7 +65,7 @@ proc_flag_explain_strings = ["!0x00000004 - process is 32 bit",  #only exception
 #File: xnu/osfmk/kern/ipc_kobject.h
 # string representations for Kobject types
 kobject_types = ['', 'THREAD', 'TASK', 'HOST', 'HOST_PRIV', 'PROCESSOR', 'PSET', 'PSET_NAME', 'TIMER', 'PAGER_REQ', 'DEVICE', 'XMM_OBJECT', 'XMM_PAGER', 'XMM_KERNEL', 'XMM_REPLY', 
-                     'NOTDEF 15', 'NOTDEF 16', 'HOST_SEC', 'LEDGER', 'MASTER_DEV', 'ACTIVATION', 'SUBSYTEM', 'IO_DONE_QUE', 'SEMAPHORE', 'LOCK_SET', 'CLOCK', 'CLOCK_CTRL' , 'IOKIT_SPARE', 
+                     'NOTDEF 15', 'NOTDEF 16', 'HOST_SEC', 'LEDGER', 'MASTER_DEV', 'TASK_NAME', 'SUBSYTEM', 'IO_DONE_QUE', 'SEMAPHORE', 'LOCK_SET', 'CLOCK', 'CLOCK_CTRL' , 'IOKIT_SPARE', 
                       'NAMED_MEM', 'IOKIT_CON', 'IOKIT_OBJ', 'UPL', 'MEM_OBJ_CONTROL', 'AU_SESSIONPORT', 'FILEPORT', 'LABELH', 'TASK_RESUME', 'VOUCHER', 'VOUCHER_ATTR_CONTROL']
 
 def populate_kobject_types(xnu_dir_path):
diff --git a/tools/reindent.sh b/tools/reindent.sh
new file mode 100755 (executable)
index 0000000..738f48a
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+#
+# Reindent CWD recursively using clang-format (assumed to be in your PATH),
+# and per-component or per-directory .clang-format style specifications.
+#
+
+CPUS=`sysctl -n hw.logicalcpu`
+CLANGFORMAT=`xcrun -find clang-format`
+
+if [ ! -x "${CLANGFORMAT}" ]; then
+    echo "Could not find clang-format" 1>&2
+    exit 1
+fi
+
+echo "Using ${CLANGFORMAT} to reindent, using concurrency of ${CPUS}"
+
+find -x . \! \( \( -name BUILD -o -name EXTERNAL_HEADERS -o -name libMicro -o -name zlib -o -name .svn -o -name .git -o -name cscope.\* -o -name \*~ \) -prune \) -type f \( -name \*.c -o -name \*.cpp \) -print0 | \
+    xargs -0 -P "${CPUS}" -n 10 "${CLANGFORMAT}" -style=file -i
+ret=$?
+
+if [ $ret -ne 0 ]; then
+    echo "reindent failed: $ret" 1>&2
+    exit 1
+fi
+
+exit 0
index 43e3ec44ea327f9eb0ac952bbc0ea12d3d46889b..2d0cff8fd46314cb06937f7b594bc9c75845ceba 100755 (executable)
@@ -68,6 +68,7 @@ for arg in "$@"; do
 done
 
 
+RSYNC_ARGS="-azvh"
 ARGS[$index]="REMOTEBUILD="
 REMOTEARGS[$index]="\"REMOTEBUILD=\""
 
@@ -132,9 +133,9 @@ else
     REMOTE_BUILDSCRIPTREL="./BUILD/obj"
     BUILDSCRIPTNAME="build.sh"
     if [ ! -d "${OBJROOT}/SETUP" ]; then
-       RSYNC_ARGS="--delete-excluded"
+    RSYNC_DELETE_EXCLUDED="--delete-excluded"
     else
-       RSYNC_ARGS=""
+    RSYNC_DELETE_EXCLUDED=""
     fi
     if [ ! -e "${SYMROOT}/" ]; then
        RSYNC_DELETE_SYMROOT=1
@@ -213,19 +214,19 @@ else
     ssh $REMOTEBUILD "mkdir -p \"${REMOTEBUILDPATH}/BUILD/\"{obj,sym,dst}" || die "Could not make remote build directory"
 
     # Copy source only
-    rsync -azv --delete --exclude=\*~ --exclude=.svn --exclude=.git --exclude=/BUILD . $REMOTEBUILD:"${REMOTEBUILDPATH}" || die "Could not rsync source tree"
+    rsync $RSYNC_ARGS --delete --exclude=\*~ --exclude=.svn --exclude=.git --exclude=/BUILD . $REMOTEBUILD:"${REMOTEBUILDPATH}" || die "Could not rsync source tree"
 
     # Copy partial OBJROOT (just build tools and build script), and optionally delete everything else
-    rsync -azv --delete $RSYNC_ARGS --include=/build.sh --include=/BuildTools --include=/BuildTools/\*\* --exclude=\* "${OBJROOT}/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" || die "Could not rsync build tree"
+    rsync $RSYNC_ARGS --delete $RSYNC_DELETE_EXCLUDED --include=/build.sh --include=/BuildTools --include=/BuildTools/\*\* --exclude=\* "${OBJROOT}/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" || die "Could not rsync build tree"
 
     # Delete remote SYMROOT if it has been deleted locally
     if [ "$RSYNC_DELETE_SYMROOT" -eq 1 ]; then
-       rsync -azv --delete "${BUILDTOOLSDIR}/empty/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/sym/" || die "Could not rsync delete SYMROOT"
+       rsync $RSYNC_ARGS --delete "${BUILDTOOLSDIR}/empty/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/sym/" || die "Could not rsync delete SYMROOT"
     fi
 
     # Delete remote DSTROOT if it has been deleted locally
     if [ "$RSYNC_DELETE_DSTROOT" -eq 1 ]; then
-       rsync -azv --delete "${BUILDTOOLSDIR}/empty/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/dst/" || die "Could not rsync delete DSTROOT"
+       rsync $RSYNC_ARGS --delete "${BUILDTOOLSDIR}/empty/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/dst/" || die "Could not rsync delete DSTROOT"
     fi
 
     # Start the build
@@ -234,9 +235,9 @@ else
 
     # Copy back build results except for object files (which might be several GB)
     echo "Copying results back..."
-    rsync -azv --no-o --no-g --exclude=\*.o --exclude=\*.cpo --exclude=\*.d --exclude=\*.cpd --exclude=\*.non_lto --exclude=\*.ctf --exclude=conf $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" "${OBJROOT}/" || die "Could not rsync build results"
-    rsync -azv --no-o --no-g $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/sym/" "${SYMROOT}/" || die "Could not rsync build results"
-    rsync -azv --no-o --no-g $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/dst/" "${DSTROOT}/" || die "Could not rsync build results"
+    rsync $RSYNC_ARGS --no-o --no-g --exclude=\*.o --exclude=\*.cpo --exclude=\*.d --exclude=\*.cpd --exclude=\*.non_lto --exclude=\*.ctf $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" "${OBJROOT}/" || die "Could not rsync build results"
+    rsync $RSYNC_ARGS --no-o --no-g $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/sym/" "${SYMROOT}/" || die "Could not rsync build results"
+    rsync $RSYNC_ARGS --no-o --no-g $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/dst/" "${DSTROOT}/" || die "Could not rsync build results"
 
 fi
 
index 69dd25bd74a1cccf362446a3859c331633ea58cb..c2991c37cb97d8159d24e4f9e4dacd21a0eb3a00 100644 (file)
 #include <pthread.h>
 #include <mach/mach.h>
 #include <mach/mach_error.h>
+#include <mach/mach_time.h>
 #include <mach/notify.h>
 #include <servers/bootstrap.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/signal.h>
 
+#include <libkern/OSAtomic.h>
+
 #define MAX(A, B) ((A) < (B) ? (B) : (A))
 
 
@@ -51,7 +54,10 @@ struct port_args {
        int reply_size;
        mach_msg_header_t *reply_msg;
        mach_port_t port;
-       mach_port_t set;
+       mach_port_t rcv_set;
+
+       mach_port_t *set;
+       mach_port_t *port_list;
 };
 
 typedef union {
@@ -60,7 +66,7 @@ typedef union {
 } thread_id_t;
 
 /* Global options */
-static boolean_t       verbose = FALSE;
+static int             verbose = 0;
 static boolean_t       affinity = FALSE;
 static boolean_t       timeshare = FALSE;
 static boolean_t       threaded = FALSE;
@@ -75,8 +81,27 @@ int                  client_delay;
 int                    client_spin;
 int                    client_pages;
 int                    portcount = 1;
+int                    setcount = 0;
+boolean_t              stress_prepost = FALSE;
 char                   **server_port_name;
 
+struct port_args       *server_port_args;
+
+/* global data */
+mach_timebase_info_data_t g_timebase;
+int64_t g_client_send_time = 0;
+
+static inline uint64_t ns_to_abs(uint64_t ns)
+{
+       return ns * g_timebase.denom / g_timebase.numer;
+}
+
+static inline uint64_t abs_to_ns(uint64_t abs)
+{
+       return abs * g_timebase.numer / g_timebase.denom;
+}
+
+
 void signal_handler(int sig) {
 }
 
@@ -86,20 +111,23 @@ void usage(const char *progname) {
        fprintf(stderr, "    -affinity\t\tthreads use affinity\n");
        fprintf(stderr, "    -timeshare\t\tthreads use timeshare\n");
        fprintf(stderr, "    -threaded\t\tuse (p)threads\n");
-       fprintf(stderr, "    -verbose\t\tbe verbose\n");
+       fprintf(stderr, "    -verbose\t\tbe verbose (use multiple times to increase verbosity)\n");
        fprintf(stderr, "    -oneway\t\tdo not request return reply\n");
        fprintf(stderr, "    -count num\t\tnumber of messages to send\n");
        fprintf(stderr, "    -type trivial|inline|complex\ttype of messages to send\n");
        fprintf(stderr, "    -numints num\tnumber of 32-bit ints to send in messages\n");
-       fprintf(stderr, "    -servers num\tnumber of servers threads to run\n");
+       fprintf(stderr, "    -servers num\tnumber of server threads to run\n");
        fprintf(stderr, "    -clients num\tnumber of clients per server\n");
        fprintf(stderr, "    -delay num\t\tmicroseconds to sleep clients between messages\n");
        fprintf(stderr, "    -work num\t\tmicroseconds of client work\n");
        fprintf(stderr, "    -pages num\t\tpages of memory touched by client work\n");
-       fprintf(stderr, "    -set num\t\tuse a portset stuffed with num ports in server\n");
+       fprintf(stderr, "    -set nset num\tcreate [nset] portsets and [num] ports in each server.\n");
+       fprintf(stderr, "                 \tEach port is connected to each set.\n");
+       fprintf(stderr, "    -prepost\t\tstress the prepost system (implies -threaded, requires -set X Y)\n");
        fprintf(stderr, "default values are:\n");
        fprintf(stderr, "    . no affinity\n");
        fprintf(stderr, "    . not timeshare\n");
+       fprintf(stderr, "    . not threaded\n");
        fprintf(stderr, "    . not verbose\n");
        fprintf(stderr, "    . not oneway\n");
        fprintf(stderr, "    . client sends 100000 messages\n");
@@ -108,6 +136,8 @@ void usage(const char *progname) {
        fprintf(stderr, "    . (num_available_processors+1)%%2 servers\n");
        fprintf(stderr, "    . 4 clients per server\n");
        fprintf(stderr, "    . no delay\n");
+       fprintf(stderr, "    . no sets / extra ports\n");
+       fprintf(stderr, "    . no prepost stress\n");
        exit(1);
 }
 
@@ -135,7 +165,7 @@ void parse_args(int argc, char *argv[]) {
        argc--; argv++;
        while (0 < argc) {
                if (0 == strcmp("-verbose", argv[0])) {
-                       verbose = TRUE;
+                       verbose++;
                        argc--; argv++;
                } else if (0 == strcmp("-affinity", argv[0])) {
                        affinity = TRUE;
@@ -197,14 +227,33 @@ void parse_args(int argc, char *argv[]) {
                        client_pages = strtoul(argv[1], NULL, 0);
                        argc -= 2; argv += 2;
                } else if (0 == strcmp("-set", argv[0])) {
-                       if (argc < 2) 
+                       if (argc < 3)
+                               usage(progname);
+                       setcount = strtoul(argv[1], NULL, 0);
+                       portcount = strtoul(argv[2], NULL, 0);
+                       if (setcount <= 0 || portcount <= 0)
                                usage(progname);
-                       portcount = strtoul(argv[1], NULL, 0);
                        useset = TRUE;
-                       argc -= 2; argv += 2;
+                       argc -= 3; argv += 3;
+               } else if (0 == strcmp("-prepost", argv[0])) {
+                       stress_prepost = TRUE;
+                       threaded = TRUE;
                        argc--; argv++;
-               } else 
+               } else {
+                       fprintf(stderr, "unknown option '%s'\n", argv[0]);
                        usage(progname);
+               }
+       }
+
+       if (stress_prepost) {
+               if (!threaded) {
+                       fprintf(stderr, "Prepost stress test _must_ be threaded\n");
+                       exit(1);
+               }
+               if (portcount < 1 || setcount < 1) {
+                       fprintf(stderr, "Prepost stress test requires >= 1 port in >= 1 set.\n");
+                       exit(1);
+               }
        }
 }
 
@@ -213,7 +262,6 @@ void setup_server_ports(struct port_args *ports)
        kern_return_t ret = 0;
        mach_port_t bsport;
        mach_port_t port;
-       int i;
 
        ports->req_size = MAX(sizeof(ipc_inline_message) +  
                        sizeof(u_int32_t) * num_ints, 
@@ -222,19 +270,49 @@ void setup_server_ports(struct port_args *ports)
                sizeof(mach_msg_trailer_t);
        ports->req_msg = malloc(ports->req_size);
        ports->reply_msg = malloc(ports->reply_size);
+       if (setcount > 0) {
+               ports->set = (mach_port_t *)calloc(sizeof(mach_port_t), setcount);
+               if (!ports->set) {
+                       fprintf(stderr, "calloc(%lu, %d) failed!\n", sizeof(mach_port_t), setcount);
+                       exit(1);
+               }
+       }
+       if (stress_prepost) {
+               ports->port_list = (mach_port_t *)calloc(sizeof(mach_port_t), portcount);
+               if (!ports->port_list) {
+                       fprintf(stderr, "calloc(%lu, %d) failed!\n", sizeof(mach_port_t), portcount);
+                       exit(1);
+               }
+       }
 
        if (useset) {
-               ret = mach_port_allocate(mach_task_self(), 
-                                        MACH_PORT_RIGHT_PORT_SET,  
-                                        &(ports->set));
-               if (KERN_SUCCESS != ret) {
-                       mach_error("mach_port_allocate(SET): ", ret);
+               mach_port_t set;
+               if (setcount < 1) {
+                       fprintf(stderr, "Can't use sets with a setcount of %d\n", setcount);
                        exit(1);
                }
+
+               for (int ns = 0; ns < setcount; ns++) {
+                       ret = mach_port_allocate(mach_task_self(),
+                                                MACH_PORT_RIGHT_PORT_SET,
+                                                &ports->set[ns]);
+                       if (KERN_SUCCESS != ret) {
+                               mach_error("mach_port_allocate(SET): ", ret);
+                               exit(1);
+                       }
+                       if (verbose > 1)
+                               printf("SVR[%d] allocated set[%d] %#x\n",
+                                      ports->server_num, ns, ports->set[ns]);
+
+                       set = ports->set[ns];
+               }
+
+               /* receive on a port set (always use the first in the chain) */
+               ports->rcv_set = ports->set[0];
        }
 
-       /* stuff the portset with ports */
-       for (i=0; i < portcount; i++) {
+       /* stuff the portset(s) with ports */
+       for (int i = 0; i < portcount; i++) {
                ret = mach_port_allocate(mach_task_self(), 
                                         MACH_PORT_RIGHT_RECEIVE,  
                                         &port);
@@ -243,27 +321,49 @@ void setup_server_ports(struct port_args *ports)
                        exit(1);
                }
 
+               if (stress_prepost)
+                       ports->port_list[i] = port;
+
                if (useset) {
-                       ret = mach_port_move_member(mach_task_self(), 
-                                                   port, 
-                                                   ports->set);
-                       if (KERN_SUCCESS != ret) {
-                               mach_error("mach_port_move_member(): ", ret);
-                               exit(1);
+                       /* insert the port into _all_ allocated lowest-level sets */
+                       for (int ns = 0; ns < setcount; ns++) {
+                               if (verbose > 1)
+                                       printf("SVR[%d] moving port %#x into set %#x...\n",
+                                              ports->server_num, port, ports->set[ns]);
+                               ret = mach_port_insert_member(mach_task_self(),
+                                                             port, ports->set[ns]);
+                               if (KERN_SUCCESS != ret) {
+                                       mach_error("mach_port_insert_member(): ", ret);
+                                       exit(1);
+                               }
                        }
                }
        }
 
-       /* use the last one as the real port */
+       /* use the last one as the server's bootstrap port */
        ports->port = port;
 
-       ret = mach_port_insert_right(mach_task_self(), 
-                       ports->port, 
-                       ports->port, 
-                       MACH_MSG_TYPE_MAKE_SEND);
-       if (KERN_SUCCESS != ret) {
-               mach_error("mach_port_insert_right(): ", ret);
-               exit(1);
+       if (stress_prepost) {
+               /* insert a send right for _each_ port */
+               for (int i = 0; i < portcount; i++) {
+                       ret = mach_port_insert_right(mach_task_self(),
+                                                    ports->port_list[i],
+                                                    ports->port_list[i],
+                                                    MACH_MSG_TYPE_MAKE_SEND);
+                       if (KERN_SUCCESS != ret) {
+                               mach_error("mach_port_insert_right(): ", ret);
+                               exit(1);
+                       }
+               }
+       } else {
+               ret = mach_port_insert_right(mach_task_self(),
+                                            ports->port,
+                                            ports->port,
+                                            MACH_MSG_TYPE_MAKE_SEND);
+               if (KERN_SUCCESS != ret) {
+                       mach_error("mach_port_insert_right(): ", ret);
+                       exit(1);
+               }
        }
 
        ret = task_get_bootstrap_port(mach_task_self(), &bsport);
@@ -273,8 +373,8 @@ void setup_server_ports(struct port_args *ports)
        }
 
        if (verbose) {
-               printf("server waiting for IPC messages from client on port '%s'.\n",
-                       server_port_name[ports->server_num]);
+               printf("server waiting for IPC messages from client on port '%s' (%#x).\n",
+                       server_port_name[ports->server_num], ports->port);
        }
        ret = bootstrap_register(bsport,
                                 server_port_name[ports->server_num],
@@ -313,14 +413,13 @@ void setup_client_ports(struct port_args *ports)
                exit(1);
        }
        if (verbose) {
-               printf("Client sending %d %s IPC messages to port '%s' in %s mode.\n",
+               printf("Client sending %d %s IPC messages to port '%s' in %s mode\n",
                                num_msgs, (msg_type == msg_type_inline) ? 
                                "inline" :  ((msg_type == msg_type_complex) ? 
                                        "complex" : "trivial"),  
                                server_port_name[ports->server_num],
                                (oneway ? "oneway" : "rpc"));
        }
-
 }
 
 
@@ -352,28 +451,31 @@ thread_setup(int tag) {
 }
 
 void *
-server(void *serverarg) 
+server(void *serverarg)
 {
-       struct port_args args;
        int idx;
        kern_return_t ret;
        int totalmsg = num_msgs * num_clients;
        mach_port_t recv_port;
+       uint64_t starttm, endtm;
+
+       int svr_num = (int)(uintptr_t)serverarg;
+       struct port_args *args = &server_port_args[svr_num];
 
-       args.server_num = (int) (long) serverarg;
-       setup_server_ports(&args);
+       args->server_num = svr_num;
+       setup_server_ports(args);
 
-       thread_setup(args.server_num + 1);
+       thread_setup(args->server_num + 1);
 
-       recv_port = (useset) ? args.set : args.port;
+       recv_port = (useset) ? args->rcv_set : args->port;
 
        for (idx = 0; idx < totalmsg; idx++) {
-               if (verbose
+               if (verbose > 2)
                        printf("server awaiting message %d\n", idx);
-               ret = mach_msg(args.req_msg,  
+               ret = mach_msg(args->req_msg,
                                MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE, 
                                0, 
-                               args.req_size,  
+                               args->req_size,
                                recv_port, 
                                MACH_MSG_TIMEOUT_NONE, 
                                MACH_PORT_NULL);
@@ -385,25 +487,25 @@ server(void *serverarg)
                        mach_error("mach_msg (receive): ", ret);
                        exit(1);
                }
-               if (verbose)
+               if (verbose > 2)
                        printf("server received message %d\n", idx);
-               if (args.req_msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+               if (args->req_msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
                        ret = vm_deallocate(mach_task_self(),  
-                                       (vm_address_t)((ipc_complex_message *)args.req_msg)->descriptor.address,  
-                                       ((ipc_complex_message *)args.req_msg)->descriptor.size);
+                                       (vm_address_t)((ipc_complex_message *)args->req_msg)->descriptor.address,
+                                       ((ipc_complex_message *)args->req_msg)->descriptor.size);
                }
 
-               if (1 == args.req_msg->msgh_id) {
-                       if (verbose
+               if (1 == args->req_msg->msgh_id) {
+                       if (verbose > 2)
                                printf("server sending reply %d\n", idx);
-                       args.reply_msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0);
-                       args.reply_msg->msgh_size = args.reply_size;
-                       args.reply_msg->msgh_remote_port = args.req_msg->msgh_remote_port;
-                       args.reply_msg->msgh_local_port = MACH_PORT_NULL;
-                       args.reply_msg->msgh_id = 2;
-                       ret = mach_msg(args.reply_msg, 
+                       args->reply_msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0);
+                       args->reply_msg->msgh_size = args->reply_size;
+                       args->reply_msg->msgh_remote_port = args->req_msg->msgh_remote_port;
+                       args->reply_msg->msgh_local_port = MACH_PORT_NULL;
+                       args->reply_msg->msgh_id = 2;
+                       ret = mach_msg(args->reply_msg,
                                        MACH_SEND_MSG, 
-                                       args.reply_size, 
+                                       args->reply_size,
                                        0, 
                                        MACH_PORT_NULL, 
                                        MACH_MSG_TIMEOUT_NONE,  
@@ -414,6 +516,36 @@ server(void *serverarg)
                        }
                }
        }
+
+       if (!useset)
+               return NULL;
+
+       if (verbose < 1)
+               return NULL;
+
+       uint64_t deltans = 0;
+       /*
+        * If we're using multiple sets, explicitly tear them all down
+        * and measure the time.
+        */
+       for (int ns = 0; ns < setcount; ns++) {
+               if (verbose > 1)
+                       printf("\tTearing down set[%d] %#x...\n", ns, args->set[ns]);
+               starttm = mach_absolute_time();
+               ret = mach_port_mod_refs(mach_task_self(), args->set[ns], MACH_PORT_RIGHT_PORT_SET, -1);
+               endtm = mach_absolute_time();
+               deltans += abs_to_ns(endtm - starttm);
+               if (ret != KERN_SUCCESS) {
+                       mach_error("mach_port_mod_refs(): ", ret);
+                       exit(1);
+               }
+       }
+
+       uint64_t nlinks = (uint64_t)setcount * (uint64_t)portcount;
+
+       printf("\tteardown of %llu links took %llu ns\n", nlinks, deltans);
+       printf("\t%lluns per set\n", deltans / (uint64_t)setcount);
+
        return NULL;
 }
 
@@ -476,7 +608,7 @@ calibrate_client_work(void)
                        calibration_count /= calibration_usec;
                        break;
                }
-               if (verbose)
+               if (verbose > 1)
                        printf("calibration_count=%d calibration_usec=%d\n",
                                calibration_count, calibration_usec);
        }
@@ -501,11 +633,12 @@ client_work(void)
 void *client(void *threadarg) 
 {
        struct port_args args;
+       struct port_args *svr_args = NULL;
        int idx;
        mach_msg_header_t *req, *reply; 
        mach_port_t bsport, servport;
        kern_return_t ret;
-       int server_num = (int) threadarg;
+       int server_num = (int)(uintptr_t)threadarg;
        void *ints = malloc(sizeof(u_int32_t) * num_ints);
 
        if (verbose) 
@@ -515,6 +648,9 @@ void *client(void *threadarg)
        args.server_num = server_num;
        thread_setup(server_num + 1);
 
+       if (stress_prepost)
+               svr_args = &server_port_args[server_num];
+
        /* find server port */
        ret = task_get_bootstrap_port(mach_task_self(), &bsport);
        if (KERN_SUCCESS != ret) {
@@ -538,17 +674,28 @@ void *client(void *threadarg)
                for (i = 0; i < client_pages; i++)
                        client_memory[i * PAGE_SIZE / sizeof(long)] = 0;
        }
+
+       uint64_t starttm, endtm;
        
        /* start message loop */
        for (idx = 0; idx < num_msgs; idx++) {
                req = args.req_msg;
                reply = args.reply_msg;
 
-               req->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 
-                               MACH_MSG_TYPE_MAKE_SEND_ONCE);
                req->msgh_size = args.req_size;
-               req->msgh_remote_port = servport;
-               req->msgh_local_port = args.port;
+               if (stress_prepost) {
+                       req->msgh_remote_port = svr_args->port_list[idx % portcount];
+               } else {
+                       req->msgh_remote_port = servport;
+               }
+               if (oneway) {
+                       req->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0);
+                       req->msgh_local_port = MACH_PORT_NULL;
+               } else {
+                       req->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND,
+                                                       MACH_MSG_TYPE_MAKE_SEND_ONCE);
+                       req->msgh_local_port = args.port;
+               }
                req->msgh_id = oneway ? 0 : 1;
                if (msg_type == msg_type_complex) {
                        (req)->msgh_bits |=  MACH_MSGH_BITS_COMPLEX;
@@ -560,8 +707,10 @@ void *client(void *threadarg)
                        ((ipc_complex_message *)req)->descriptor.copy = MACH_MSG_VIRTUAL_COPY;
                        ((ipc_complex_message *)req)->descriptor.type = MACH_MSG_OOL_DESCRIPTOR;
                }
-               if (verbose) 
-                       printf("client sending message %d\n", idx);
+               if (verbose > 2)
+                       printf("client sending message %d to port %#x\n",
+                              idx, req->msgh_remote_port);
+               starttm = mach_absolute_time();
                ret = mach_msg(req,  
                                MACH_SEND_MSG, 
                                args.req_size, 
@@ -569,14 +718,18 @@ void *client(void *threadarg)
                                MACH_PORT_NULL,  
                                MACH_MSG_TIMEOUT_NONE, 
                                MACH_PORT_NULL);
+               endtm = mach_absolute_time();
                if (MACH_MSG_SUCCESS != ret) {
                        mach_error("mach_msg (send): ", ret);
                        fprintf(stderr, "bailing after %u iterations\n", idx);
                        exit(1);
                        break;
                }
+               if (stress_prepost)
+                       OSAtomicAdd64(endtm - starttm, &g_client_send_time);
+
                if (!oneway) {
-                       if (verbose
+                       if (verbose > 2)
                                printf("client awaiting reply %d\n", idx);
                        reply->msgh_bits = 0;
                        reply->msgh_size = args.reply_size;
@@ -594,7 +747,7 @@ void *client(void *threadarg)
                                                idx);
                                exit(1);
                        }
-                       if (verbose
+                       if (verbose > 2)
                                printf("client received reply %d\n", idx);
                }
 
@@ -616,17 +769,17 @@ thread_spawn(thread_id_t *thread, void *(fn)(void *), void *arg) {
                                arg);
                if (ret != 0)
                        err(1, "pthread_create()");
-               if (verbose)
+               if (verbose > 1)
                        printf("created pthread %p\n", thread->tid);
        } else {
                thread->pid = fork();
                if (thread->pid == 0) {
-                       if (verbose)
+                       if (verbose > 1)
                                printf("calling %p(%p)\n", fn, arg);
                        fn(arg);
                        exit(0);
                }
-               if (verbose)
+               if (verbose > 1)
                        printf("forked pid %d\n", thread->pid);
        }
 }
@@ -635,14 +788,14 @@ static void
 thread_join(thread_id_t *thread) {
        if (threaded) {
                kern_return_t   ret;
-               if (verbose)
+               if (verbose > 1)
                        printf("joining thread %p\n", thread->tid);
                ret = pthread_join(thread->tid, NULL);
                if (ret != KERN_SUCCESS)
                        err(1, "pthread_join(%p)", thread->tid);
        } else {
                int     stat;
-               if (verbose)
+               if (verbose > 1)
                        printf("waiting for pid %d\n", thread->pid);
                waitpid(thread->pid, &stat, 0);
        }
@@ -690,6 +843,11 @@ int main(int argc, char *argv[])
        signal(SIGINT, signal_handler);
        parse_args(argc, argv);
 
+       if (mach_timebase_info(&g_timebase) != KERN_SUCCESS) {
+               fprintf(stderr, "Can't get mach_timebase_info!\n");
+               exit(1);
+       }
+
        calibrate_client_work();
 
        /*
@@ -701,6 +859,12 @@ int main(int argc, char *argv[])
 
        server_id = (thread_id_t *) malloc(num_servers * sizeof(thread_id_t));
        server_port_name = (char **) malloc(num_servers * sizeof(char *));
+       server_port_args = (struct port_args *)calloc(sizeof(struct port_args), num_servers);
+       if (!server_id || !server_port_name || !server_port_args) {
+               fprintf(stderr, "malloc/calloc of %d server book keeping structs failed\n", num_servers);
+               exit(1);
+       }
+
        if (verbose)
                printf("creating %d servers\n", num_servers);
        for (i = 0; i < num_servers; i++) {
@@ -751,6 +915,8 @@ int main(int argc, char *argv[])
        }
 
        gettimeofday(&endtv, NULL);
+       if (verbose)
+               printf("all servers complete: waiting for clients...\n");
 
        for (i = 0; i < totalclients; i++) {
                thread_join(&client_id[i]);
@@ -767,13 +933,21 @@ int main(int argc, char *argv[])
        double dsecs = (double) deltatv.tv_sec + 
                1.0E-6 * (double) deltatv.tv_usec;
 
-       printf(" in %u.%03u seconds\n",  
+       printf(" in %lu.%03u seconds\n",
                        deltatv.tv_sec, deltatv.tv_usec/1000);
        printf("  throughput in messages/sec:     %g\n",
                        (double)totalmsg / dsecs);
        printf("  average message latency (usec): %2.3g\n", 
                        dsecs * 1.0E6 / (double) totalmsg);
 
+       if (stress_prepost) {
+               int64_t sendns = abs_to_ns(g_client_send_time);
+               dsecs = (double)sendns / (double)NSEC_PER_SEC;
+               printf("  total send time: %2.3gs\n", dsecs);
+               printf("  average send time (usec): %2.3g\n",
+                      dsecs * 1.0E6 / (double)totalmsg);
+       }
+
        return (0);
 
 }
index 8bfd134407bcdb92a29b9c305488002b36581e3e..f5cb5de8bdb13be6db9d0eb019868530d84e89c1 100644 (file)
@@ -1,15 +1,11 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
 
 SYMROOT?=$(shell /bin/pwd)
 
-CFLAGS := -g -O2 -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+CFLAGS := -g -O2 -isysroot $(SDKROOT) -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+CFLAGS += -Wno-deprecated-declarations
 
 ifdef RC_ARCHS
     ARCHS:=$(RC_ARCHS)
@@ -55,7 +51,7 @@ $(DSTROOT)/KQMPMMtestD: KQMPMMtest.c
        if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi
 
 $(DSTROOT)/KQMPMMtest_64D: KQMPMMtest.c
-       ${CC} ${CFLAG} ${ARCH_64_FLAGS} -DDIRECT_MSG_RCV=1 -o $(SYMROOT)/$(notdir $@) $?
+       ${CC} ${CFLAGS} ${ARCH_64_FLAGS} -DDIRECT_MSG_RCV=1 -o $(SYMROOT)/$(notdir $@) $?
        if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi
 
 clean:
index 37bd9e536462aba1e3a5744efb071b73502fdd36..7a2093aa2d4bee2207454a845432ce59b64a68e8 100644 (file)
@@ -1,30 +1,27 @@
-ifdef RC_ProjectName
-DSTSUBPATH = $(DSTROOT)/AppleInternal/CoreOS
-else
-DSTSUBPATH = $(DSTROOT)
-endif
+include Makefile.common
 
-OBJROOT?=$(shell /bin/pwd)
-
-SDKROOT ?= macosx.internal
-
-# SDKROOT may be passed as a shorthand like "iphoneos.internal". We
-# must resolve these to a full path and override SDKROOT.
-
-SDKROOT_RESOLVED := $(shell xcrun -sdk $(SDKROOT) -show-sdk-path)
-ifeq ($(strip $(SDKROOT)_$(SDKROOT_RESOLVED)),/_)
-SDKROOT_RESOLVED := /
+ifndef SRCROOT
+export SRCROOT := $(shell /bin/pwd)
+endif
+ifndef OBJROOT
+export OBJROOT = $(SRCROOT)/BUILD/obj
+endif
+ifndef DSTROOT
+export DSTROOT = $(SRCROOT)/BUILD/dst
+endif
+ifndef SYMROOT
+export SYMROOT = $(SRCROOT)/BUILD/sym
 endif
-override SDKROOT = $(SDKROOT_RESOLVED)
-
 
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
+ifdef RC_ProjectName
+DSTSUBPATH = $(DSTROOT)/AppleInternal/CoreOS/tests/xnu
+BATS_CONFIG_PATH = $(DSTROOT)/AppleInternal/CoreOS
 else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
+DSTSUBPATH = $(DSTROOT)
+BATS_CONFIG_PATH = $(DSTROOT)
 endif
 
-COMMON_TARGETS = xnu_quick_test                \
+COMMON_TARGETS = unit_tests \
                MPMMTest                \
                affinity                \
                execperf                \
@@ -32,22 +29,25 @@ COMMON_TARGETS = xnu_quick_test             \
                superpages              \
                zero-to-n               \
                jitter                  \
-               perf_index              \
-               unit_tests
+               perf_index
 
-IPHONE_TARGETS = memorystatus
+IPHONE_TARGETS = 
 
 MAC_TARGETS = 
 
+
+BATS_TARGET = $(BATS_CONFIG_PATH)/BATS
+
 ifeq "$(Embedded)" "YES"
 TARGETS =      $(addprefix $(DSTSUBPATH)/, $(COMMON_TARGETS) $(IPHONE_TARGETS))
 else
 TARGETS =      $(addprefix $(DSTSUBPATH)/, $(COMMON_TARGETS) $(MAC_TARGETS))
 endif
 
-all:   $(TARGETS)
+all:   $(BATS_TARGET) $(TARGETS)
 
-$(DSTSUBPATH)/%:
+$(BATS_TARGET) $(DSTSUBPATH)/%:
        mkdir -p $@
        mkdir -p $(OBJROOT)/$(notdir $@) 
+       mkdir -p $(SYMROOT)
        $(MAKE) -C $(SRCROOT)/$(notdir $@) SRCROOT=$(SRCROOT)/$(notdir $@) DSTROOT=$@ OBJROOT=$(OBJROOT)/$(notdir $@) SDKROOT=$(SDKROOT)
diff --git a/tools/tests/Makefile.common b/tools/tests/Makefile.common
new file mode 100644 (file)
index 0000000..70e7f02
--- /dev/null
@@ -0,0 +1,48 @@
+#
+# Common definitions for test directories
+#
+
+XCRUN := /usr/bin/xcrun
+SDKROOT ?= macosx.internal
+
+# SDKROOT may be passed as a shorthand like "iphoneos.internal". We
+# must resolve these to a full path and override SDKROOT.
+
+SDKROOT_RESOLVED := $(shell xcrun -sdk $(SDKROOT) -show-sdk-path)
+ifeq ($(strip $(SDKROOT)_$(SDKROOT_RESOLVED)),/_)
+SDKROOT_RESOLVED := /
+endif
+override SDKROOT = $(SDKROOT_RESOLVED)
+
+SDKVERSION := $(shell $(XCRUN) -sdk $(SDKROOT) -show-sdk-version)
+
+PLATFORMPATH := $(shell xcrun -sdk $(SDKROOT) -show-sdk-platform-path)
+PLATFORM := $(shell echo $(PLATFORMPATH) | sed 's,^.*/\([^/]*\)\.platform$$,\1,')
+
+ifeq ($(PLATFORM),watchOS)
+    PLATFORM := WatchOS
+endif
+
+SUPPORTED_EMBEDDED_PLATFORMS := iPhoneOS iPhoneOSNano tvOS AppleTVOS WatchOS
+Embedded = $(if $(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),YES,NO)
+
+#
+# Deployment target flag
+#
+ifeq ($(PLATFORM),MacOSX)
+    DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),WatchOS)
+    DEPLOYMENT_TARGET_FLAGS = -mwatchos-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),tvOS)
+    DEPLOYMENT_TARGET_FLAGS = -mtvos-version-min=$(SDKVERSION)
+else ifeq ($(PLATFORM),AppleTVOS)
+    DEPLOYMENT_TARGET_FLAGS = -mtvos-version-min=$(SDKVERSION)
+else ifneq ($(filter $(SUPPORTED_EMBEDDED_PLATFORMS),$(PLATFORM)),)
+    DEPLOYMENT_TARGET_FLAGS = -miphoneos-version-min=$(SDKVERSION)
+else ifneq ($(filter $(SUPPORTED_SIMULATOR_PLATFORMS),$(PLATFORM)),)
+    DEPLOYMENT_TARGET_FLAGS =
+else
+    DEPLOYMENT_TARGET_FLAGS =
+endif
+
+DEPLOYMENT_TARGET_DEFINES = -DPLATFORM_$(PLATFORM)
index 41e3848bed5f35bde7e62cc0555d4ef0093e7c27..98f4e9e451cf69c8bac227361d9f4dc1cdf5a3dc 100644 (file)
@@ -1,9 +1,4 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
 
@@ -24,7 +19,7 @@ ARCH_32_FLAGS := $(patsubst %, -arch %, $(ARCH_32))
 ARCH_64 := $(filter %64, $(ARCHS))
 ARCH_64_FLAGS := $(patsubst %, -arch %, $(ARCH_64))
 
-CFLAGS         :=-g -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+CFLAGS         :=-g -isysroot $(SDKROOT) -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
 
 DSTROOT?=$(shell /bin/pwd)
 SRCROOT?=$(shell /bin/pwd)
index d67091354d5b1e1c50018c9be2cafccaea2cd321..ebf4bcdbd5becbeb8b8bfa060617aee7f12a8f43 100644 (file)
@@ -1,9 +1,4 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 ifdef RC_ARCHS
     ARCHS:=$(RC_ARCHS)
index 43d2dc2edd7f548f663b0ae2c79167a5618b72c6..f78950c1a86682260818cbfedffe9c57deb67028 100644 (file)
@@ -1,9 +1,4 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 OBJROOT?=$(shell /bin/pwd)
 
@@ -19,11 +14,9 @@ ifdef RC_ARCHS
   endif
 endif
 
-
-
 DSTROOT?=$(shell /bin/pwd)
 
-CFLAGS:=$(patsubst %, -arch %,$(ARCHS)) -g -Wall -Os -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+CFLAGS:=$(patsubst %, -arch %,$(ARCHS)) -g -Wall -Os -isysroot $(SDKROOT) -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
 
 all: $(DSTROOT)/jitter
 
index 8d10021ed71362099448aa5da3eb501b7ea99264..d8ffd2c16973dfb8f880bf228ca003ec07acbe64 100755 (executable)
@@ -1,9 +1,4 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
 
@@ -17,7 +12,7 @@ ifdef RC_ARCHS
   endif
 endif
 
-CFLAGS :=-g $(patsubst %, -arch %,$(ARCHS))
+CFLAGS :=-g $(patsubst %, -arch %,$(ARCHS)) -isysroot $(SDKROOT)
 
 DSTROOT?=$(shell /bin/pwd)
 SYMROOT?=$(shell /bin/pwd)
index ea890e55a1ed40d63feb988c9e7d404613160461..de0582b41754c1815de63c08af322cf7241497c0 100755 (executable)
@@ -180,16 +180,16 @@ cp ./apple/bin-*/posix_spawn_bin $bin_dir/$A
 newline=0
 
 #
-# Everything below the while loop is input for the while loop
-# if you have any tests which can't run in the while loop, put
+# Everything below the while loop is input for the while loop if
+# you have any tests which can't run in the while loop, put
 # them above this comment
 #
 while read A B
 do
        # $A contains the command, $B contains the arguments
        # we echo blank lines and comments
-       # we skip anything which fails to match *$1* (useful
-       # if we only want to test one case, but a nasty hack)
+       # we skip anything which fails to match *$1* (useful if
+       # we only want to test one case, but a nasty hack)
 
        case $A in
        \#*)
index 26a2bc562770e975f325722d33e382937b7d0f60..cb583001bfc317ea4b595778f9ea87081273c074 100755 (executable)
@@ -193,16 +193,16 @@ HOST_BASE=sfs0
 HOST_RANGE=1-8
 
 #
-# Everything below the while loop is input for the while loop
-# if you have any tests which can't run in the while loop, put
+# Everything below the while loop is input for the while loop if
+# you have any tests which can't run in the while loop, put
 # them above this comment
 #
 while read A B
 do
        # $A contains the command, $B contains the arguments
        # we echo blank lines and comments
-       # we skip anything which fails to match *$1* (useful
-       # if we only want to test one case, but a nasty hack)
+       # we skip anything which fails to match *$1* (useful if
+       # we only want to test one case, but a nasty hack)
 
        case $A in
        \#*)
index b153b43dd4fed970f0ca97d5431271cbe63ff752..a89205025dec53e8dcb5fd4ea907e8afae3ec6f4 100755 (executable)
@@ -206,16 +206,16 @@ fi
 #endif /* End of Apple code */
 
 #
-# Everything below the while loop is input for the while loop
-# if you have any tests which can't run in the while loop, put
+# Everything below the while loop is input for the while loop if
+# you have any tests which can't run in the while loop, put
 # them above this comment
 #
 while read A B
 do
        # $A contains the command, $B contains the arguments
        # we echo blank lines and comments
-       # we skip anything which fails to match *$1* (useful
-       # if we only want to test one case, but a nasty hack)
+       # we skip anything which fails to match *$1* (useful if
+       # we only want to test one case, but a nasty hack)
 
        case $A in
        \#*)
index 7b61d0f59dde7e680caae8a04eb409ec24057e3a..3abdb4368399cca281aee3c150837755764145be 100755 (executable)
@@ -186,16 +186,16 @@ cp ./apple/bin-*/posix_spawn_bin $bin_dir/$A
 newline=0
 
 #
-# Everything below the while loop is input for the while loop
-# if you have any tests which can't run in the while loop, put
+# Everything below the while loop is input for the while loop if
+# you have any tests which can't run in the while loop, put
 # them above this comment
 #
 while read A B
 do
        # $A contains the command, $B contains the arguments
        # we echo blank lines and comments
-       # we skip anything which fails to match *$1* (useful
-       # if we only want to test one case, but a nasty hack)
+       # we skip anything which fails to match *$1* (useful if
+       # we only want to test one case, but a nasty hack)
 
        case $A in
        \#*)
index fa9e3b768bfefe936899877021fdab59cf841fd7..f087cdcadd38a6881459d6108f6f74d8633b7548 100755 (executable)
@@ -42,7 +42,7 @@ if [ $# -ne 2 ]; then
   usage
 fi
 
-# if local node we don't need credentials
+# we don't need credentials if its a local node
 if [ $NODE != "/Local/Default" ]; then
   OD_ADMIN="diradmin"
   OD_PASS="admin"
index 00ea4e251a360c56cd70b44cce459c3832ff5687..45fb0e42ce5676080351aa230dab8434413a905d 100755 (executable)
@@ -36,7 +36,7 @@ if [ $# -ne 2 ]; then
   usage
 fi
 
-# if local node we don't need credentials
+# we don't need credentials if its a local node
 if [ $NODE != "/Local/Default" ]; then
   OD_ADMIN="diradmin"
   OD_PASS="admin"
diff --git a/tools/tests/memorystatus/Makefile b/tools/tests/memorystatus/Makefile
deleted file mode 100644 (file)
index 6a97b65..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/make
-
-DSTROOT?=$(shell /bin/pwd)
-CC:=$(shell xcrun -sdk "$(SDKROOT)" -find clang)
-CFLAGS:=-I. -g
-
-ifdef RC_ARCHS
-    ARCH:=$(RC_ARCHS)
-  else
-    ifeq "$(Embedded)" "YES"
-      ARCH:=armv7 armv7s arm64
-    else
-      ARCH:=x86_64 i386
-  endif
-endif
-
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
-
-ifeq "$(Embedded)" "NO"
-    SDKROOT:=$(shell xcodebuild -sdk macosx.internal -version Path)
-else
-    SDKROOT:=$(shell xcodebuild -sdk iphoneos.internal -version Path)
-endif
-
-MY_ARCH := $(patsubst %, -arch %, $(ARCH)) # allows building multiple archs.
-
-all: $(DSTROOT)/memorystatus \
-       $(DSTROOT)/memorystatus_groups
-
-$(DSTROOT)/memorystatus_groups: memorystatus_groups.c
-       $(CC) $(MY_ARCH)  -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -o $(SYMROOT)/$(notdir $@) memorystatus_groups.c $(CFLAGS)
-       ditto $(SYMROOT)/$(notdir $@) $@
-       codesign -f -s - $@
-
-
-$(DSTROOT)/memorystatus: memorystatus.c
-       $(CC) $(MY_ARCH) -framework CoreFoundation -framework ServiceManagement -F $(SDKROOT)/System/Library/PrivateFrameworks/ -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -o $(SYMROOT)/$(notdir $@) memorystatus.c $(CFLAGS)
-       ditto $(SYMROOT)/$(notdir $@) $@
-       codesign -f -s - $@
-
-clean: 
-       rm -f $(DSTROOT)/memorystatus
-       rm -f $(DSTROOT)/memorystatus_groups
diff --git a/tools/tests/memorystatus/memorystatus.c b/tools/tests/memorystatus/memorystatus.c
deleted file mode 100644 (file)
index 89ac971..0000000
+++ /dev/null
@@ -1,822 +0,0 @@
-#include <asl.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <libproc.h>
-
-#include <mach/mach.h>
-#include <mach/mach_types.h>
-#include <mach/mach_vm.h>
-#include <mach/shared_region.h>
-#include <mach/task_info.h>
-#include <mach/vm_map.h>
-#include <mach/vm_page_size.h> /* Needed for vm_region info */
-
-#include <sys/event.h>
-#include <sys/ipc.h>
-#include <sys/kern_memorystatus.h>
-#include <sys/mman.h>
-#include <sys/shm.h>
-#include <sys/stat.h>
-#include <sys/sysctl.h>
-#include <sys/wait.h>
-
-#include <xpc/xpc.h>
-#include <xpc/private.h>
-
-#include <CoreFoundation/CoreFoundation.h>
-
-#include <Security/Security.h>
-#include <ServiceManagement/ServiceManagement.h>
-#include <ServiceManagement/SMErrors.h>
-
-#include <Kernel/kern/ledger.h>
-
-#include <sys/spawn_internal.h>
-#include <spawn_private.h>
-
-#define CR_JOB "com.apple.ReportCrash.Jetsam"
-#define CR_JOB_PLIST_PATH "/System/Library/LaunchDaemons/com.apple.ReportCrash.Jetsam.plist"
-
-#define ERR_BUF_LEN 1024
-
-#ifndef VM_PAGE_SIZE
-#define VM_PAGE_SIZE 4096
-#endif
-
-#define TASK_LIMIT_MB 75
-#define HWM_LIMIT_MB 8
-
-/*
- * Blob of data that is not easily compressed.
- * Guaranteed during setup to be at least
- * RANDOM_DATA_SIZE in length.
- */
-
-#define RANDOM_DATA_SIZE 4096
-char   random_data[] = "ffd8ffe000104a46494600010101002400240000ffe100744578696600004d4d002a000000080004011a0005000000010000003e011b0005000000010000004601280003000000010002000087690004000000010000004e00000000000000240000000100000024000000010002a002000400000001000003c0a003000400000001000001ff00000000ffdb00430002020202020102020202020202030306040303030307050504060807080808070808090a0d0b09090c0a08080b0f0b0c0d0e0e0e0e090b10110f0e110d0e0e0effdb004301020202030303060404060e0908090e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0effc000110801ff03c003012200021101031101ffc4001f0000010501010101010100000000000000000102030405060708090a0bffc400b5100002010303020403050504040000017d01020300041105122131410613516107227114328191a1082342b1c11552d1f02433627282090a161718191a25262728292a3435363738393a434445464748494a535455565758595a636465666768696a737475767778797a838485868788898a92939495969798999aa2a3a4a5a6a7a8a9aab2b3b4b5b6b7b8b9bac2c3c4c5c6c7c8c9cad2d3d4d5d6d7d8d9dae1e2e3e4e5e6e7e8e9eaf1f2f3f4f5f6f7f8f9faffc4001f0100030101010101010101010000000000000102030405060708090a0bffc400b51100020102040403040705040400010277000102031104052131061241510761711322328108144291a1b1c109233352f0156272d10a162434e125f11718191a262728292a35363738393a434445464748494a535455565758595a636465666768696a737475767778797a82838485868788898a92939495969798999aa2a3a4a5a6a7a8a9aab2b3b4b5b6b7b8b9bac2c3c4c5c6c7c8c9cad2d3d4d5d6d7d8d9dae2e3e4e5e6e7e8e9eaf2f3f4f5f6f7f8f9faffda000c03010002110311003f00f9e74fbd37baa2db99e6506391f28371f9519ba67fd9fcabd46cbc1315de8d6776752d7419e049084b152a37283c1dfc8e6bc02db4af18d9df79c9e1bd59a40ae9b65b1761f32953c63ae09c7a1c57656fe24f8896da7c16c9e0bb3748a358d5a4d04b31006324f73c75a00935f7fec9f165ee98b7372e2ddc05795763f2a0f20138ebeb590bac3e70d2b6e1fed1ac6d4ecbc65aa6b973a85c7867528a6998168edec1a38c1c01c2f61c550fec1f16ff00d0bdade4f5ff00447ff0a00eaffb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f16ffd0bdadffe023ff851fd83e2dffa17b5bffc047ff0a00eabfb5dbfe7abfe668fed76ff009eaff99ae57fb07c5bff0042f6b7ff00808ffe147f60f8b7fe85ed6fff00011ffc2803aafed76ff9eaff0099a3fb5dbfe7abfe66b95fec1f16ff00d0bdadff00e023ff00851fd83e2dff00a17b5bff00c047ff000a00eabfb5dbfe7abfe668fed76ff9eaff0099ae57fb07c5bff42f6b7ff808ff00e147f60f8b7fe85ed6ff00f011ff00c2803aafed76ff009eaff99a3fb5dbfe7abfe66b95fec1f";
-
-/* 
- * TODO: import header (currently vm_pageout.h) without pulling in extraneous definitions;
- * see <rdar://problem/13374916>.
- */
-#ifndef VM_PAGER_FREEZER_DEFAULT
-#define VM_PAGER_FREEZER_DEFAULT 0x8   /* Freezer backed by default pager.*/
-#endif
-
-/*
- * Special note to ourselves: the jetsam cause to look out for is *either*
- * a high watermark kill, *or* a per-process kill.
- */
-#define CAUSE_HIWAT_OR_PERPROC -1
-
-typedef enum jetsam_test {
-    kSimpleJetsamTest = 1,
-    kCustomTaskLimitTest,
-    kPressureJetsamTestFG,
-    kPressureJetsamTestBG,
-    kHighwaterJetsamTest,
-    kVnodeJetsamTest,
-    kBackgroundJetsamTest
-} jetsam_test_t;
-
-typedef enum idle_exit_test {
-    kDeferTimeoutCleanTest = 1,
-    kDeferTimeoutDirtyTest,
-    kCancelTimeoutCleanTest,
-    kCancelTimeoutDirtyTest
-} idle_exit_test_t;
-
-typedef struct shared_mem_t {
-    pthread_mutex_t mutex;
-    pthread_cond_t cv;
-    boolean_t completed;
-    boolean_t pressure_event_fired;
-    boolean_t child_failed;
-} shared_mem_t;
-
-shared_mem_t *g_shared = NULL;
-unsigned long g_physmem = 0;
-int g_compressor_mode=0;
-int g_ledger_count = -1, g_footprint_index = -1;
-int64_t g_per_process_limit = -1;
-
-/*
- * g_exit_status:
- *     Holds the PASS/FAIL status of the memorystatus
- *     test run as a whole.
- *     e.g: If one subtest reports failure, the entire
- *          test run reports failure.
- *
- *     PASS:  returns 0   (default)
- *     FAIL:  returns -1
- *
- *     The only time the g_exit_status changes state
- *     is when printTestResult() reports a FAIL status.
- */
-int g_exit_status = 0;
-
-
-extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
-static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t requested_limit_mb, uint64_t requested_user_data, const char *test);
-
-/* Utilities. */
-
-static void
-printTestHeader(pid_t testPid, const char *testName, ...)
-{
-    va_list va;
-    printf("========================================\n");
-    printf("[TEST] ");
-    va_start(va, testName);
-    vprintf(testName, va);
-    va_end(va);
-    printf("\n");
-    printf("[PID]  %d\n", testPid);
-    printf("========================================\n");
-    printf("[BEGIN]\n");
-    fflush(stdout);
-}
-
-static void
-printTestResult(const char *testName, boolean_t didPass, const char *msg, ...)
-{
-    if (msg != NULL) {
-       va_list va;
-       printf("\t\t");
-       va_start(va, msg);
-       vprintf(msg, va);
-       va_end(va);
-       printf("\n");
-    }
-    if (didPass) {
-        printf("[PASS]\t%s\n\n", testName);
-    } else {
-        printf("[FAIL]\t%s\n\n", testName);
-
-       /* Any single failure, fails full test run */
-       g_exit_status = -1;
-    }
-    fflush(stdout);
-}
-
-static int
-_get_munch_interval(int given_interval)
-{
-    int res;
-    int new_interval=0;
-    char *slow_device;
-    char model_name_buf[1025];
-    size_t mnb_size = 1024;
-    res = sysctlbyname("hw.model", model_name_buf, &mnb_size, NULL, 0);
-
-    if (res) {
-        perror("\t\tsysctlbyname(hw.model...)");
-    }
-    else {
-        /* see if we're a slow device (N90, K66, J33) */
-        slow_device = strstr(model_name_buf, "N90");
-        if (slow_device == NULL) {
-            slow_device = strstr(model_name_buf, "K66");
-        }
-        if (slow_device == NULL) {
-            slow_device = strstr(model_name_buf, "J33");
-        }
-
-        if (slow_device != NULL) {
-            printf("\t\tRunning on a slow device...\n");
-        }
-        
-        if (given_interval == 0) {
-            if (slow_device != NULL) {
-                new_interval = 500 * 1000; /* want sleep time in microseconds */
-            }
-           else {
-               new_interval = 100 * 1000;/* want sleep time in microseconds */
-           }
-        }
-        else {
-            new_interval = given_interval * USEC_PER_SEC;
-        }
-    }
-
-    return new_interval;
-}
-
-static CFDictionaryRef create_dictionary_from_plist(const char *path) {
-    void *bytes = NULL;
-    CFDataRef data = NULL;
-    CFDictionaryRef options = NULL;
-    size_t bufLen;
-    int fd = open(path, O_RDONLY, 0);
-    if (fd == -1) {
-        goto exit;
-    }
-    struct stat sb;
-    if (fstat(fd, &sb) == -1) {
-        goto exit;
-    }
-
-    bufLen = (size_t)sb.st_size;
-    bytes = malloc(bufLen);
-    if (bytes == NULL) {
-        goto exit;
-    }
-
-    if (read(fd, bytes, bufLen) != bufLen) {
-        goto exit;
-    }
-
-    data = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, (const UInt8 *) bytes, bufLen, kCFAllocatorNull);
-    if (data == NULL) {
-        goto exit;
-    }
-
-    options = (CFDictionaryRef) CFPropertyListCreateWithData(kCFAllocatorDefault, data, kCFPropertyListImmutable, NULL, NULL);
-    if (options == NULL) {
-        goto exit;
-    }
-
-exit:
-    if (data != NULL) {
-        CFRelease(data);
-    }
-    if (bytes != NULL) {
-        free(bytes);
-    }
-    if (fd != -1) {
-        close(fd);
-    }
-    
-    return options;
-}
-
-
-/*
- * cleanup_and_exit():
- *    The parent process can call this routine to exit or abort
- *    the test run at any time.
- *
- *    The child process on the other hand should not call this routine.
- *    Be mindful about how re-enabling the crashreporter can affect tests
- *    further down the line.
- */
-static void cleanup_and_exit(int status) {
-    
-    /* Exit. Pretty literal. */
-    exit(status);
-}
-
-/*
- * child_ready():
- *     After a child process takes care of its inital setup, it
- *     synchronizes back to the parent using this call.
- *
- *     If the child process experiences a failure during its
- *     intial setup, it should abort using a standard exit
- *     routine, leaving crashreporter cleanup to the parent.
- *
- *     The child should never call cleanup_and_exit().
- *     That's for the parent only.
- */
-static void child_ready() {
-    pthread_mutex_lock(&g_shared->mutex);
-    pthread_cond_signal(&g_shared->cv);
-    pthread_mutex_unlock(&g_shared->mutex);
-}
-
-static pid_t init_and_fork() {
-    int pid;
-
-    g_shared->completed = 0;
-    g_shared->pressure_event_fired = 0;
-    
-    pthread_mutex_lock(&g_shared->mutex);
-
-    pid = fork();
-    if (pid == 0) {
-        return 0;
-    } else if (pid == -1) {
-        printTestResult(__func__, false, "Fork error!");
-        cleanup_and_exit(-1);        
-    }
-    
-    /* Wait for child's signal */
-    pthread_cond_wait(&g_shared->cv, &g_shared->mutex);
-    pthread_mutex_unlock(&g_shared->mutex);    
-    return (pid_t)pid;
-}
-
-static memorystatus_priority_entry_t *get_priority_list(int *size) {
-    memorystatus_priority_entry_t *list = NULL;
-    
-    assert(size);
-    
-    *size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, NULL, 0);
-    if (*size <= 0) {
-        printf("\t\tCan't get list size: %d!\n", *size);
-        goto exit;
-    }
-
-    list = (memorystatus_priority_entry_t*)malloc(*size);
-    if (!list) {
-        printf("\t\tCan't allocate list!\n");
-        goto exit;
-    }
-
-    *size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, list, *size);
-    if (*size <= 0) {
-        printf("\t\tCan't retrieve list!\n");
-        goto exit;
-    }
-    
-exit:
-    return list;
-}
-
-/* Tests */
-
-
-static boolean_t get_ledger_info(pid_t pid, int64_t *balance_mb, int64_t *limit_mb) {
-    struct ledger_entry_info *lei;
-    uint64_t count;
-    boolean_t res = false;
-        
-    lei = (struct ledger_entry_info *)malloc((size_t)(g_ledger_count * sizeof (*lei)));
-    if (lei) {
-        void *arg;
-            
-        arg = (void *)(long)pid;
-        count = g_ledger_count;
-        
-        if ((ledger(LEDGER_ENTRY_INFO, arg, (caddr_t)lei, (caddr_t)&count) >= 0) && (g_footprint_index < count)) {
-            if (balance_mb) {
-                *balance_mb = lei[g_footprint_index].lei_balance;
-            }
-            if (limit_mb) {
-                *limit_mb = lei[g_footprint_index].lei_limit;
-            }
-            res = true;
-        }
-        
-        free(lei);
-    }
-        
-    return res;
-}
-
-static boolean_t get_priority_props(pid_t pid, int32_t *priority, int32_t *limit_mb, uint64_t *user_data) {
-    int size;
-    memorystatus_priority_entry_t *entries = NULL;
-    int i;
-    boolean_t res = false;
-
-    entries = get_priority_list(&size);
-    if (!entries) {
-        goto exit;
-    }
-
-    /* Locate */
-    for (i = 0; i < size/sizeof(memorystatus_priority_entry_t); i++ ){
-        if (entries[i].pid == pid) {
-            int64_t limit;
-                   
-            *priority = entries[i].priority;
-            *user_data = entries[i].user_data;
-#if 1
-            *limit_mb = entries[i].limit;
-            res = true;
-#else
-            res = get_ledger_info(entries[i].pid, NULL, &limit);
-            if (false == res) {
-                    printf("Failed to get highwater!\n");
-            }
-            /* The limit is retrieved in bytes, but set in MB, so rescale */
-            *limit_mb = (int32_t)(limit/(1024 * 1024));
-#endif 
-            goto exit;
-        }
-    }
-
-    printf("\t\tCan't find pid: %d!\n", pid);
-
-exit:
-    if (entries)
-       free(entries);
-
-    return res;  
-}
-
-static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t requested_limit_mb, uint64_t requested_user_data, const char *test) {
-    const char *PROP_GET_ERROR_STRING = "failed to get properties";
-    const char *PROP_CHECK_ERROR_STRING = "property mismatch";
-    
-    int32_t actual_priority, actual_hiwat;
-    uint64_t actual_user_data;
-    
-    if (!get_priority_props(pid, &actual_priority, &actual_hiwat, &actual_user_data)) {
-        printf("\t\t%s test failed: %s\n", test, PROP_GET_ERROR_STRING);
-        return false;
-    }
-    
-    /* -1 really means the default per-process limit, which varies per device */
-    if (requested_limit_mb <= 0) {
-        requested_limit_mb = (int32_t)g_per_process_limit;
-    }
-    
-    if (actual_priority != requested_priority || actual_hiwat != requested_limit_mb || actual_user_data != requested_user_data) {
-        printf("\t\t%s test failed: %s\n", test, PROP_CHECK_ERROR_STRING);
-        printf("priority is %d, should be %d\n", actual_priority, requested_priority);
-        printf("hiwat is %d, should be %d\n", actual_hiwat, requested_limit_mb);
-        printf("user data is 0x%llx, should be 0x%llx\n", actual_user_data, requested_user_data);
-        return false;
-    }
-    
-    printf("\t\t%s test ok...\n", test);
-    
-    return true;
-}
-
-
-static void start_list_validation_test() {
-    int size;
-    memorystatus_priority_entry_t *entries = NULL;
-    int i;
-    boolean_t valid = false;
-    
-    printTestHeader(getpid(), "List validation test");
-    
-    entries = get_priority_list(&size);
-    if (!entries) {
-        printf("Can't get entries!\n");
-        goto exit;
-    }
-
-    /* Validate */
-    for (i = 0; i < size/sizeof(memorystatus_priority_entry_t); i++ ) {
-        int dirty_ret;
-        uint32_t dirty_flags;
-        
-        /* Make sure launchd isn't in the list - <rdar://problem/13168754> */
-        if (entries[i].pid <= 1) {
-            printf("\t\tBad process (%d) in list!\n", entries[i].pid);
-            goto exit;
-        }
-        
-        /* Sanity check idle exit state */
-        dirty_ret = proc_get_dirty(entries[i].pid, &dirty_flags);
-        if (dirty_ret != 0) {
-            dirty_flags = 0;
-        }
-        
-        if (dirty_flags & PROC_DIRTY_ALLOWS_IDLE_EXIT) {
-            /* Check that the process isn't at idle priority when dirty */
-            if ((entries[i].priority == JETSAM_PRIORITY_IDLE) && (dirty_flags & PROC_DIRTY_IS_DIRTY)) {
-                printf("\t\tProcess %d at idle priority when dirty (priority %d, flags 0x%x)!\n", entries[i].pid, entries[i].priority, dirty_flags);
-                goto exit;
-            }
-            /* Check that the process is at idle (or deferred) priority when clean. */
-            if ((entries[i].priority > JETSAM_PRIORITY_IDLE_DEFERRED) && !(dirty_flags & PROC_DIRTY_IS_DIRTY)) {
-                printf("\t\tProcess %d not at non-idle priority when clean(priority %d, flags 0x%x)\n", entries[i].pid, entries[i].priority, dirty_flags);
-                goto exit;
-            }
-        }        
-    }
-
-    valid = true;
-
-exit:
-    if (entries)
-       free(entries);
-    
-    printTestResult("List validation test", valid, NULL);
-}
-
-/* Random individual tests */
-static void start_general_sanity_test() {
-    int ret, size;
-    int i;
-    boolean_t valid = false;
-
-    /*
-     * The sanity test checks for permission failures
-     * against P_MEMSTAT_INTERNAL processes.
-     * Currently only launchd (pid==1) qualifies.
-    */
-    
-    printTestHeader(getpid(), "Sanity test");
-    
-
-    /* Ensure that launchd's transaction state is fixed */
-    ret = proc_track_dirty(1, PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT | PROC_DIRTY_DEFER);
-    if (ret != EPERM) {
-        printf("\t\tNo EPERM tracking launchd (%d/%d)!\n", ret, errno);
-        goto exit;           
-    } else {
-        printf("\t\tlaunchd track test OK!\n");    
-    }
-    
-    ret = proc_set_dirty(1, true);
-    if (ret != EPERM) {
-        printf("\t\tNo EPERM setting launchd dirty state (%d/%d)!\n", ret, errno);
-        goto exit;           
-    } else {
-        printf("\t\tlaunchd dirty test OK!\n");    
-    }
-
-
-    valid = true;
-
-exit:
-    printTestResult("Sanity test", valid, NULL);
-}
-
-static void idle_exit_deferral_test(idle_exit_test_t test) {
-    int secs = DEFERRED_IDLE_EXIT_TIME_SECS;
-
-    child_ready();
-
-    if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#1 - pre xpc_track_activity()")) {
-        goto exit;
-    }
-    
-    proc_track_dirty(getpid(), PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT | PROC_DIRTY_DEFER);
-    
-    if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#2 - post xpc_track_activity()")) {
-        goto exit;
-    }
-
-    /* Toggle */
-    proc_set_dirty(getpid(), true);
-    proc_set_dirty(getpid(), false);
-    proc_set_dirty(getpid(), true);
-    proc_set_dirty(getpid(), false);
-
-    switch (test) {
-    case kDeferTimeoutCleanTest:
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#3 - post toggle")) {
-            goto exit;
-        }
-        
-        /* Approximate transition check */
-        sleep(secs - 1);
-        
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#4 - pre timeout")) {
-            goto exit;
-        }
-
-        sleep(2);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#5 - post timeout")) {
-            goto exit;
-        }
-
-        proc_set_dirty(getpid(), true);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#6 - post dirty")) {
-            goto exit;
-        }
-
-        proc_set_dirty(getpid(), false);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#7 - post clean")) {
-            goto exit;
-        }
-
-        break;
-    case kDeferTimeoutDirtyTest:
-        proc_set_dirty(getpid(), true);
-        
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#3 - post dirty")) {
-            goto exit;
-        }
-        
-        /* Approximate transition check */
-        sleep(secs - 1);
-        
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#4 - pre timeout")) {
-            goto exit;
-        }
-
-        sleep(2);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#5 - post timeout")) {
-            goto exit;
-        }
-
-        proc_set_dirty(getpid(), false);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#6 - post clean")) {
-            goto exit;
-        }
-
-        break;
-    case kCancelTimeoutDirtyTest:
-        proc_set_dirty(getpid(), true);
-        
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#3 - post toggle")) {
-           goto exit;
-        }
-        
-        proc_clear_dirty(getpid(), PROC_DIRTY_DEFER);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#4 - post deferral cancellation")) {
-           goto exit;
-        }
-
-        proc_set_dirty(getpid(), false);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#5 - post toggle")) {
-           goto exit;
-        }
-        
-        break;
-    case kCancelTimeoutCleanTest:
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#3 - post toggle")) {
-            goto exit;
-        }
-        
-        proc_clear_dirty(getpid(), PROC_DIRTY_DEFER);
-  
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#4 - post deferral cancellation")) {
-            goto exit;
-        }
-      
-        proc_set_dirty(getpid(), true);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#5 - post dirty")) {
-           goto exit;
-        }
-        
-        proc_set_dirty(getpid(), false);
-
-        if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#6 - post clean")) {
-           goto exit;
-        }
-        
-        break;
-    }
-
-    g_shared->completed = 1;
-    exit(0);
-        
-exit:
-    printTestResult(__func__, false, "Something bad happened...");
-    exit(-1);
-}
-
-static void start_idle_exit_defer_test(idle_exit_test_t test) {
-    pid_t pid;
-    int status;
-       
-    /* Reset */
-    memset(g_shared, 0, sizeof(shared_mem_t));
-    
-    pid = init_and_fork();
-    if (pid == 0) {
-        idle_exit_deferral_test(test);
-    }
-    else {
-        printTestHeader(pid, "Idle exit deferral test: %d", test);
-    }
-
-    /* Wait for exit */
-    waitpid(pid, &status, 0);
-    /* Idle exit not reported on embedded */
-    // wait_for_exit_event(pid, kMemorystatusKilledIdleExit);
-
-    printTestResult("Idle exit deferral test", g_shared->completed, NULL);
-}
-
-static void ledger_init(void) {
-    const char *physFootprintName = "phys_footprint";
-    struct ledger_info li;
-    int64_t template_cnt;
-    struct ledger_template_info *templateInfo;
-    void *arg;
-    int i;
-        
-    /* Grab ledger entries */
-    arg = (void *)(long)getpid();
-    if (ledger(LEDGER_INFO, arg, (caddr_t)&li, NULL) < 0) {
-            exit(-1);
-    }
-    
-    g_ledger_count = template_cnt = li.li_entries; 
-
-    templateInfo = malloc(template_cnt * sizeof (struct ledger_template_info));
-    if (templateInfo == NULL) {
-            exit (-1);
-    }
-    
-    if (!(ledger(LEDGER_TEMPLATE_INFO, (caddr_t)templateInfo, (caddr_t)&template_cnt, NULL) < 0)) {
-            for (i = 0; i < template_cnt; i++) {
-                    if (!strncmp(templateInfo[i].lti_name, physFootprintName, strlen(physFootprintName))) {
-                            g_footprint_index = i;
-                            break;
-                    }
-            }
-    }
-    
-    free(templateInfo);
-}
-
-static void run_tests(const char *path) {
-    /* Embedded-only */
-#pragma unused(path)
-    
-    /* Generic */
-    start_general_sanity_test();
-    start_list_validation_test();
-    start_idle_exit_defer_test(kDeferTimeoutCleanTest);
-    start_idle_exit_defer_test(kDeferTimeoutDirtyTest);
-    start_idle_exit_defer_test(kCancelTimeoutCleanTest);
-    start_idle_exit_defer_test(kCancelTimeoutDirtyTest);
-}
-
-
-int main(int argc, char **argv)
-{
-    pthread_mutexattr_t attr;
-    pthread_condattr_t cattr;
-    size_t size;
-
-    /* Must be run as root for priority retrieval */
-    if (getuid() != 0) {
-        fprintf(stderr, "%s must be run as root.\n", getprogname());
-        exit(EXIT_FAILURE);
-    }
-    
-
-    /* Memory */
-    size = sizeof(g_physmem);
-    if (sysctlbyname("hw.physmem", &g_physmem, &size, NULL, 0) != 0 || !g_physmem) {
-        printTestResult(__func__, false, "Failed to retrieve system memory");
-        cleanup_and_exit(-1);
-    }
-
-    /* VM Compressor Mode */
-    size = sizeof(g_compressor_mode);
-    if (sysctlbyname("vm.compressor_mode", &g_compressor_mode, &size, NULL, 0) != 0) {
-       printTestResult(__func__, false, "Failed to retrieve compressor config");
-       cleanup_and_exit(-1);
-    }
-
-    /* Ledger; default limit applies to this process, so grab it here */
-    ledger_init();
-    if ((-1 == g_ledger_count) || (-1 == g_footprint_index) || (false == get_ledger_info(getpid(), NULL, &g_per_process_limit))) {
-        printTestResult("setup", false, "Unable to init ledger!\n");
-        cleanup_and_exit(-1);            
-    }
-    
-    if (g_per_process_limit == LEDGER_LIMIT_INFINITY) {
-        g_per_process_limit = 0;
-    } else {
-        /* Rescale to MB */
-        g_per_process_limit /= (1024 * 1024);
-    }
-    
-    /* Shared memory */
-    g_shared = mmap(NULL, sizeof(shared_mem_t), PROT_WRITE|PROT_READ, MAP_ANON|MAP_SHARED, 0, 0);
-    if (!g_shared) {
-        printTestResult(__func__, false, "Failed mmap");
-        cleanup_and_exit(-1);
-    }
-
-    /* Guarantee size of random_data buffer */
-    if (sizeof(random_data) < RANDOM_DATA_SIZE) {
-       printTestResult(__func__, false, "Failed to guarantee random_data buffer size [expected %d, actual %d]",
-         RANDOM_DATA_SIZE, sizeof(random_data));
-       cleanup_and_exit(-1);
-    }
-
-    pthread_mutexattr_init(&attr);
-    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED );
-
-    pthread_condattr_init(&cattr);
-    pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
-
-    if (pthread_mutex_init(&g_shared->mutex, &attr) || pthread_cond_init(&g_shared->cv, &cattr)) {
-        printTestResult("setup", false, "Unable to init condition variable!");
-        cleanup_and_exit(-1);
-    }
-
-    run_tests(argv[0]);
-
-    /* Teardown */
-    pthread_mutex_destroy(&g_shared->mutex);
-    pthread_cond_destroy(&g_shared->cv);
-
-    pthread_mutexattr_destroy(&attr);
-    pthread_condattr_destroy(&cattr);
-
-
-    return (g_exit_status);   /* exit status 0 on success, -1 on failure */
-}
diff --git a/tools/tests/memorystatus/memorystatus_groups.c b/tools/tests/memorystatus/memorystatus_groups.c
deleted file mode 100644 (file)
index 93ae8ad..0000000
+++ /dev/null
@@ -1,653 +0,0 @@
-#include <AvailabilityMacros.h>
-#include <mach/thread_policy.h>
-#include <mach/mach.h>
-#include <mach/mach_traps.h>
-#include <mach/mach_error.h>
-#include <mach/mach_time.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/errno.h>
-#include <sys/kern_memorystatus.h>
-
-#define MAXTESTPIDS  15
-#define MAXPRIORITY  JETSAM_PRIORITY_MAX - 1
-
-/*
- * <rdar://problem/15976217> memorystatus_control support for
- *     reprioritizing multiple processes
- *
- * This test/tool operates in one of two modes.
- *     List mode or Generate mode.
- *
- * In generate mode (the default)
- * Setup:
- *     Spin off some number of child processes.  (Enforce a max)
- *     Generate a random jetsam priority band for each process.
- *     Kill at least one of the processes (this tests the kernel's
- *        ability to ignore non-existant pid.)
- *     Sprinkle the processes into their randomly assigned band.
- * Test:
- *     Query the kernel for a snapshot of the jetsam priority list,
- *        (saving the priority and the index into the overall
- *        priority list for each pid)
- *
- *     Exercise the MEMORYSTATUS_CMD_GRP_SET_PROPERTIES control call.
- *
- *     Properties supported in this exercise?
- *             [1] priority
- *
- *     Query the kernel again for a second snapshot.
- *
- * Verify:
- *     If everything works as expected, all the pids have moved
- *     to the new priority band and relative order before the
- *     move is the same order after the move.
- *
- * In list mode, the user passes in a list of  pids from the command line.
- * We skip the Setup phase, but follow through with the Test and Verify
- * steps.
- *
- * When using generate mode, you can add a delay that takes place just
- * before the control call and then again just after the control call.
- *     eg: This allows time to manaully introspect the state of
- *     the device before and after the new property assignments.
- */
-
-/* Globals */
-int g_exit_status = 0;
-boolean_t generate_flag = FALSE;
-boolean_t list_flag     = FALSE;
-boolean_t verbose_flag  = FALSE;
-boolean_t do_error_flag = FALSE;
-uint64_t  delay_seconds = 0;
-uint32_t  kill_pid_indx = 0;
-uint32_t  g_new_priority = JETSAM_PRIORITY_IDLE;
-
-typedef struct pidinfo {
-       pid_t pid;
-       int32_t pri_random;    /* random priority for generate path */
-       int32_t pri_before;    /* priority before idle move */
-       int32_t indx_before;   /* jetsam bucket index before idle move */
-       int32_t pri_after;      /* priority found after idle move test */
-       int32_t exp_after;      /* Expect priority. Zero if moved to idle band  */
-       int32_t indx_after;     /* order it landed in the idle band */
-} pidinfo_t;
-
-static boolean_t do_get_priority_list (boolean_t before, memorystatus_priority_entry_t *mypids, size_t pid_count, pidinfo_t *pidinfo);
-static void do_generate_test();
-static void do_child_labor();
-static int priority_cmp(const void *x, const void *y);
-static void do_pidlist_test(memorystatus_priority_entry_t *list, uint32_t pid_count);
-static void do_control_list_test(memorystatus_priority_entry_t *list, uint32_t pid_count);
-static void dump_info_table(pidinfo_t *info, uint32_t count);
-static void print_usage();
-
-static char *g_testname = "GrpSetProperties";
-
-static void
-printTestHeader(pid_t testPid, const char *testName, ...)
-{
-       va_list va;
-       printf("=============================================\n");
-       printf("[TEST] GrpSetProperty ");
-       va_start(va, testName);
-       vprintf(testName, va);
-       va_end(va);
-       printf("\n");
-       printf("[PID]  %d\n", testPid);
-       printf("=============================================\n");
-       printf("[BEGIN]\n");
-}
-
-static void
-printTestResult(const char *testName, boolean_t didPass, const char *msg, ...)
-{
-       if (msg != NULL) {
-               va_list va;
-               printf("\t\t");
-               va_start(va, msg);
-               vprintf(msg, va);
-               va_end(va);
-               printf("\n");
-       }
-       if (didPass) {
-               printf("[PASS] GrpSetProperty\t%s\n\n", testName);
-       } else {
-               printf("[FAIL] GrpSetProperty\t%s\n\n", testName);
-
-               /* Any single failure, fails full test run */
-               g_exit_status = -1;
-       }
-}
-
-static void
-do_error_test ()
-{
-       boolean_t passflag = TRUE;
-       int error;
-       size_t listsize = 0;
-       memorystatus_priority_entry_t list[MAXTESTPIDS];
-
-       listsize = (sizeof(memorystatus_priority_entry_t) * MAXTESTPIDS);
-       memset (list, 0, listsize);
-
-       list[0].pid = getpid();
-       list[0].priority = JETSAM_PRIORITY_MAX+10;   /* out of range priority */
-       
-       printTestHeader (getpid(), "NULL pointer test");
-       errno=0;
-       error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, NULL, listsize);
-       printf("\t Expect: error (-1),  errno (%d)\n", EINVAL);
-       printf("\t Actual: error (%d),  errno (%d)\n", error, errno);
-       if (error == -1 && errno == EINVAL)
-               passflag = TRUE;
-       else
-               passflag = FALSE;
-       printTestResult("NULL pointer test", passflag, NULL);
-
-
-       printTestHeader (getpid(), "zero size test");
-       errno=0;
-       error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, &list, 0);
-       printf("\t Expect: error (-1),  errno (%d)\n", EINVAL);
-       printf("\t Actual: error (%d),  errno (%d)\n", error, errno);
-       if (error == -1 && errno == EINVAL)
-               passflag = TRUE;
-       else
-               passflag = FALSE;
-       printTestResult("zero size test", passflag, NULL);
-
-
-       printTestHeader (getpid(), "bad size test");
-       errno=0;
-       error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, &list, (listsize-1));
-       printf("\t Expect: error (-1),  errno (%d)\n", EINVAL);
-       printf("\t Actual: error (%d),  errno (%d)\n", error, errno);
-       if (error == -1 && errno == EINVAL)
-               passflag = TRUE;
-       else
-               passflag = FALSE;
-       printTestResult("bad size test", passflag, NULL);
-
-       printTestHeader (getpid(), "bad priority test");
-       errno=0;
-       error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0, &list, (listsize));
-       printf("\t Expect: error (-1),  errno (%d)\n", EINVAL);
-       printf("\t Actual: error (%d),  errno (%d)\n", error, errno);
-       if (error == -1 && errno == EINVAL)
-               passflag = TRUE;
-       else
-               passflag = FALSE;
-       printTestResult("bad priority test", passflag, NULL);
-}
-
-int
-main(int argc, char *argv[])
-{
-       kern_return_t        error;
-       
-       memorystatus_priority_entry_t list[MAXTESTPIDS];
-       uint32_t pid_count = MAXTESTPIDS;  /* default */
-       size_t listsize = 0;
-       int c;
-       int i = 0;
-
-       if (geteuid() != 0) {
-               printf("\tMust be run as root\n");
-               exit(1);
-       }
-
-       listsize = sizeof(memorystatus_priority_entry_t) * MAXTESTPIDS;
-       memset (list, 0, listsize);
-
-       while ((c = getopt (argc, argv, "p:ed:hvg:l")) != -1) {
-               switch (c) {
-               case 'p':
-                       g_new_priority = strtol(optarg, NULL, 10);
-                       break;
-               case 'e':
-                       do_error_flag = TRUE;
-                       break;
-               case 'v':
-                       verbose_flag = TRUE;
-                       break;
-               case 'd':
-                       delay_seconds = strtol(optarg, NULL, 10);
-                       break;
-               case 'l':
-                       /* means a list of pids follow */
-                       list_flag = TRUE;
-                       break;
-               case 'g':
-                       /* dynamicall generate 'n' processes */
-                       generate_flag = TRUE;
-                       pid_count = strtol(optarg, NULL, 10);
-                       break;
-               case 'h':
-                       print_usage();                  
-                       exit(0);
-               case '?':
-               default:
-                       print_usage();
-                       exit(-1);
-               }               
-       }
-
-       argc -= optind;
-       argv += optind;
-       errno = 0;
-
-       /*
-        * This core part of this test has two modes only.
-        * Default is to dynamically generate a list of pids to work on.
-        * Else use the -l flag and pass in a list of pids.
-        */
-       if (generate_flag && list_flag) {
-               printTestResult(g_testname, FALSE, "Can't use both -g and -l options\n");
-               exit(g_exit_status);
-       }
-       
-       if (generate_flag) {
-               if (pid_count <= 0 || pid_count > MAXTESTPIDS) {
-                       printTestResult(g_testname, FALSE,
-                           "Pid count out of range (actual: %d), (max: %d)\n", pid_count,  MAXTESTPIDS);                       
-                       exit(g_exit_status);
-               }
-       } else if (list_flag) {
-               pid_count=0;
-               for (; *argv; ++argv) {
-                       if (pid_count < MAXTESTPIDS){
-                               list[pid_count].pid = strtol(*argv, NULL, 10);
-                               list[pid_count].priority = g_new_priority;
-                               pid_count++;
-                               argc--;
-                               optind++;
-                       } else {
-                               printTestResult(g_testname, FALSE,
-                                   "Too many pids (actual: %d), (max: %d)\n", pid_count,  MAXTESTPIDS);
-                               exit(g_exit_status);
-                               break;
-                       }
-               }
-               if (pid_count <= 0 ) {
-                       printTestResult(g_testname, FALSE,
-                           "Provide at least one pid (actual: %d),(max: %d)\n", pid_count,  MAXTESTPIDS);
-                       exit(g_exit_status);                            
-               }
-       } else {
-               /* set defaults */
-               do_error_flag = TRUE;                   
-               generate_flag = TRUE;
-               pid_count = MAXTESTPIDS;
-       }
-
-       if (do_error_flag) {
-               do_error_test();
-       }
-       
-       if (generate_flag) {
-               do_generate_test(list, pid_count);
-       }
-
-       if (list_flag) {
-               do_pidlist_test (list, pid_count);
-       }
-
-       return(g_exit_status);
-
-}
-
-
-static void
-do_pidlist_test(memorystatus_priority_entry_t *list, uint32_t pid_count)
-{
-       
-       do_control_list_test(list, pid_count);
-}
-
-static void
-do_control_list_test(memorystatus_priority_entry_t *list, uint32_t pid_count)
-{
-       int error = 0;
-       int i;
-       boolean_t passflag;
-       pidinfo_t info[MAXTESTPIDS];
-
-       printTestHeader (getpid(), "new priority test");
-       memset (info, 0, MAXTESTPIDS * sizeof(pidinfo_t));
-       printf ("\tInput: pid_count = %d\n", pid_count);
-       printf ("\tInput: new_priority = %d\n", g_new_priority);
-
-       if (generate_flag)
-               printf("\tIntentionally killed pid [%d]\n", list[kill_pid_indx].pid);
-
-        /* random value initialization */
-       srandom((u_long)time(NULL));
-
-       /* In generate path, we sprinkle pids into random priority buckets */
-
-       /* initialize info structures and properties */
-       for (i = 0; i < pid_count; i++) {
-               info[i].pid = list[i].pid;
-               info[i].pri_random = random() % MAXPRIORITY;   /* generate path only */
-               info[i].pri_before = -1;
-               info[i].indx_before = -1;
-               info[i].pri_after = -1;
-               info[i].exp_after = g_new_priority;
-               info[i].indx_after = -1;
-
-               if (generate_flag) {
-                       /* Initialize properties for generated pids */
-                       memorystatus_priority_properties_t mp;
-                       mp.priority = info[i].pri_random;
-                       mp.user_data = 0;
-                       if(memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, list[i].pid, 0, &mp, sizeof(mp)) == -1) {
-                               /*
-                                * If we cannot set the properties on a given
-                                * pid (for whatever reason), we'll ignore it.
-                                * But set expectations for verification phase.
-                                */
-                               printf("\tWarning: set properties failed on pid [%d] (%s)\n", list[i].pid, strerror(errno));
-                               info[i].exp_after = -1;
-                               errno = 0;
-                       }
-               }
-       }
-
-       /* Get the system's current jetsam priority list, init pass */
-       if (do_get_priority_list(TRUE, list, pid_count, info) == FALSE) {
-               error = 1;
-               goto out;
-       }
-
-       if (delay_seconds > 0) {
-               printf("\tDelay [%llu] seconds... (before move to new band)\n", delay_seconds);
-               sleep(delay_seconds);
-               errno = 0;
-       }       
-
-       error = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, 0,
-           list, (pid_count * sizeof(memorystatus_priority_entry_t)));
-       if (error) {
-               printf("\tMEMORYSTATUS_CMD_GRP_SET_PROPERTIES failed (%s)\n", strerror(errno));
-               goto out;
-       }
-
-       /* Get the system's jetsam priority list, after move to new band */
-       if (do_get_priority_list(FALSE, list, pid_count, info) == FALSE) {
-               error = 1;
-               goto out;
-       }
-
-       if (delay_seconds > 0) {
-               printf("\tDelay [%llu] seconds... (after move to new band)\n", delay_seconds);          
-               sleep(delay_seconds);
-               errno = 0;
-       }               
-
-       qsort ((void *)info, pid_count, sizeof(pidinfo_t),priority_cmp);
-
-       /*
-        * Verify that the list of pids have been placed in new priority band
-        * and that they are in the same relative priority order.
-        * The relative bucket placement before moving to the new priority
-        * band should be the same as that after moving to the new
-        * priority band.
-        */
-       error = 0;
-       for (i=0; i < pid_count; i++) {
-               if (info[i].pri_before == -1){
-                       /* skip... this pid did not exist */
-                       continue;
-               }
-
-               /* The new priority band must meet expectations */
-               if (info[i].pri_after != info[i].exp_after) {
-                       error++;
-               }
-               
-               if (i+1 == pid_count)
-                       break;  /* Done traversing list */
-
-               if (info[i].pid == info[i+1].pid) {
-                       /* skip duplicate pids */
-                       continue;
-               }
-                       
-               if (info[i].indx_before < info[i+1].indx_before &&
-                   info[i].indx_after < info[i+1].indx_after &&
-                   info[i].pri_before <= info[i+1].pri_before &&
-                   info[i].pri_after <= info[i+1].pri_after ) {
-                       /* yay */
-               }
-               else {
-                       error++;
-               }
-       }
-
-       printf("\tFound [%d] verification errors.\n", error);
-       
-       if (error || errno || verbose_flag==TRUE) {
-               dump_info_table(info, pid_count);
-       }
-
-out:   
-       printf("\n\tExpect: error (0), errno (0)\n");
-       printf("\tActual: error (%d), errno (%d)\n", error, errno);
-       if (error != 0 || errno != 0)
-               passflag = FALSE;
-       else
-               passflag = TRUE;
-       printTestResult(g_testname, passflag, NULL);
-}
-
-/*
- * The concept of jetsam priority order can actually be viewed as
- * the relative index of an item in a bucket from from lowest
- * priority bucket to highest priority bucket and then from
- * head bucket entry to tail bucket entry.
- * In reality, we have a linear, ordered list at any point
- * in time.
- */
-
-
-static int
-priority_cmp(const void *x, const void *y)
-{
-       pidinfo_t      entry_x = *((pidinfo_t *)x);
-       pidinfo_t      entry_y = *((pidinfo_t *)y);
-
-       if (entry_x.pri_before < entry_y.pri_before)
-               return -1;
-       if (entry_x.pri_before == entry_y.pri_before) {
-               /*
-                * Second level ordering.
-                */
-               if (entry_x.indx_before < entry_y.indx_before)
-                       return -1;
-               if (entry_x.indx_before == entry_y.indx_before)
-                       return 0;   /* never */
-               return 1;
-       }
-       return 1;
-}
-
-
-static boolean_t
-do_get_priority_list (boolean_t before, memorystatus_priority_entry_t *mypids, size_t pid_count, pidinfo_t *pidinfo)
-{
-#pragma unused (mypids)
-       
-       size_t size = 0;
-       memorystatus_priority_entry_t *list;
-       size_t list_count = 0;
-       int found = 0;
-       int i, j;
-
-       size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, NULL, 0);
-       if (size <= 0 ) {
-               printf("\tCan't get jetsam priority list size: %s\n", strerror(errno));
-               return(FALSE);
-       }
-
-       list = (memorystatus_priority_entry_t *)malloc(size);
-
-       size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, list, size);
-       if (size <= 0) {
-               printf("\tCould not get jetsam priority list: %s\n", strerror(errno));
-               free(list);
-               return(FALSE);
-       }
-
-       /* recompute number of entries in the list and find the pid's priority*/
-       list_count = size / sizeof(memorystatus_priority_entry_t);
-       
-       printf("\tFound [%d] jetsam bucket entries (%s move to new band).\n",
-           (int)list_count, before? "before" : " after");
-       
-       for (i=0; i < pid_count; i++) {
-               for (j=0; j < list_count; j++) {
-                       if (list[j].pid == pidinfo[i].pid) {
-                               if (before) {
-                                       /*
-                                        * Save process's priority and relative index
-                                        * before moving to new priority
-                                        */
-                                       pidinfo[i].pri_before = list[j].priority;
-                                       pidinfo[i].indx_before = j;
-                               }else {
-                                       /*
-                                        * Save process's priority and relative index
-                                        * after moving to new priority
-                                        */
-                                       pidinfo[i].pri_after = list[j].priority;
-                                       pidinfo[i].indx_after = j;
-                               }
-                               break;
-                       }
-               }
-       }
-
-       if (list)
-               free(list);
-       
-       return(TRUE);
-}
-
-
-
-static
-void do_generate_test (memorystatus_priority_entry_t *list, uint32_t pid_count)
-{
-       int launch_errors = 0;
-       int i;
-       memorystatus_priority_properties_t mp;
-
-       /* Generate mode Setup phase */
-
-       if (pid_count <= 0)
-               return;
-
-       for (i=0; i < pid_count; i++) {
-               list[i].pid = fork();
-               list[i].priority = g_new_priority;     /*XXX introduce multiple
-                                                        new priorities??? */
-               switch (list[i].pid) {
-               case 0: /* child */
-                       do_child_labor();
-                       exit(0);
-                       break;
-               case -1:
-                       launch_errors++;
-                       break;
-               default:
-                       continue;
-               }
-       }
-
-       /*
-        * Parent will set the priority of the
-        * child processes
-        */
-
-       if (verbose_flag && launch_errors > 0)
-               printf("\tParent launch errors = %d\n", launch_errors);
-
-       /* Introduce a case where pid is not found */
-       kill_pid_indx = pid_count/2 ;
-       kill(list[kill_pid_indx].pid, SIGKILL);
-       sleep(5);
-       
-       do_control_list_test (list, pid_count);
-
-       for (i=0; i < pid_count; i++) {
-               if (i != kill_pid_indx) {
-                       kill(list[i].pid, SIGKILL );
-               }
-       }
-}
-
-
-static void
-do_child_labor()
-{
-       /*
-        * Ideally, the process should be suspended,
-        * but letting it spin doing random
-        * stuff should be harmless for this test.
-        */
-       if (verbose_flag)
-               printf("\tLaunched child pid [%d]\n", getpid());
-       while (TRUE) {
-               random();
-               sleep(5);
-       }
-}
-
-
-static void
-dump_info_table(pidinfo_t *info, uint32_t count)
-{
-       int i;
-
-       /*
-        * The random priority value is only of interest in the
-        * generate_flag path, and even then, it's not really 
-        * that interesting!  So, not dumped here.
-        * But it is evident in the Jetsam Priority 'before' column.
-        */
-
-       printf("\n%10s \t%s \t\t%20s\n", "Pid", "Jetsam Priority", "Relative Bucket Index");
-       printf("%10s \t%s %20s\n", "", "(before | after | expected)", "(before | after)");
-       
-       for (i=0; i < count; i++) {
-               printf("%10d",       info[i].pid);
-               printf("\t(%4d |",   info[i].pri_before);
-               printf("%4d |",      info[i].pri_after);
-               printf("%4d)",       info[i].exp_after);
-               printf("\t\t(%5d |", info[i].indx_before);
-               printf("%5d)\n",     info[i].indx_after);
-       }
-}      
-
-static void
-print_usage() {
-
-       printf("\nUsage:\n");
-       printf("[-e] [-p] [-v] [-d <seconds>][ -g <count> | -l <list of pids>]\n\n");
-       printf("Exercise the MEMORYSTATUS_CMD_GRP_SET_PROPERTIES command.\n");
-       printf("Operates on at most %d pids.\n", MAXTESTPIDS);
-       printf("Pass in a list of pids or allow the test to generate the pids dynamically.\n\n");
-
-       printf("\t -e                : exercise error tests\n");
-       printf("\t -p <priority>     : Override default priority band.\n");
-       printf("\t -v                : extra verbosity\n");
-       printf("\t -d <seconds>      : delay before and after idle move (default = 0)\n");
-       printf("\t -g <count>        : dynamically generate <count> processes.\n");
-       printf("\t -l <list of pids> : operate on the given list of pids\n\n");
-       printf("\t default           : generate %d pids, no delay, priority %d  eg: -g %d -p %d\n\n",
-           MAXTESTPIDS, g_new_priority, MAXTESTPIDS, g_new_priority);
-}
index 44c522b14d13d6aa7794dbc861e623590160d9cb..ba1218867dd1ddf86f61d12ce9186007bc5d71b6 100644 (file)
@@ -1,7 +1,7 @@
-SDKROOT ?= /
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
+include ../Makefile.common
+
 CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
-CFLAGS:=-c -Wall -pedantic -Os
+CFLAGS:=-c -Wall -pedantic -Os -isysroot $(SDKROOT)
 LDFLAGS:=
 
 SRCROOT?=$(shell /bin/pwd)
@@ -24,10 +24,8 @@ endif
 
 ifeq "$(Embedded)" "YES"
        TARGET_NAME:=PerfIndex.bundle-ios
-       XCODEBUILD=xcodebuild -sdk iphoneos.internal
 else
        TARGET_NAME:=PerfIndex.bundle-osx
-       XCODEBUILD=xcodebuild
 endif
 
 CFLAGS += $(patsubst %, -arch %, $(ARCHS))
@@ -64,7 +62,7 @@ $(DSTROOT)/perf_index: $(OBJROOT)/perf_index.o
        $(CC) $(LDFLAGS) $? -o $@
 
 $(DSTROOT)/PerfIndex.bundle: $(SRCROOT)/PerfIndex_COPS_Module/PerfIndex.xcodeproj
-       $(XCODEBUILD) -target $(TARGET_NAME) OBJROOT=$(OBJROOT) SYMROOT=$(SYMROOT) TARGET_TEMP_DIR=$(OBJROOT) TARGET_BUILD_DIR=$(DSTROOT) -project $? CLANG_ENABLE_MODULES=NO
+       xcodebuild -sdk $(SDKROOT) -target $(TARGET_NAME) OBJROOT=$(OBJROOT) SYMROOT=$(SYMROOT) TARGET_TEMP_DIR=$(OBJROOT) TARGET_BUILD_DIR=$(DSTROOT) -project $? CLANG_ENABLE_MODULES=NO
 
 $(DSTROOT)/%.dylib: $(OBJROOT)/%.o
        $(CC) $(LDFLAGS) -dynamiclib $? -o $@
index 2698f2fdbfa354601deda1a298ff95c399320a3c..4772a03e540e1727a4bd8c08fd951825304569aa 100644 (file)
@@ -1,9 +1,4 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 DSTROOT?=$(shell /bin/pwd)
 TARGETS := $(addprefix $(DSTROOT)/, measure_tlbs testsp)
@@ -19,9 +14,7 @@ ifdef RC_ARCHS
   endif
 endif
 
-ifneq ($(ARCHS),)
-CFLAGS += $(patsubst %, -arch %, $(ARCHS))
-endif
+CFLAGS += $(patsubst %, -arch %, $(ARCHS)) -isysroot $(SDKROOT)
 
 all: $(TARGETS)
 
index e33642e9b23f0a8f6f7ec510de69bece8b14e268..3e069f3b9ea15740aa46dee68e96cc4b6b8a9b05 100644 (file)
@@ -2,12 +2,14 @@
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
+       <key>NSHumanReadableCopyright</key>
+       <string>Copyright © 2015 Apple Inc. All rights reserved.</string>
        <key>CFBundleDevelopmentRegion</key>
        <string>English</string>
        <key>CFBundleExecutable</key>
        <string>${EXECUTABLE_NAME}</string>
        <key>CFBundleIdentifier</key>
-       <string>com.yourcompany.driver.${PRODUCT_NAME:rfc1034identifier}</string>
+       <string>com.apple.driver.${PRODUCT_NAME:rfc1034identifier}</string>
        <key>CFBundleInfoDictionaryVersion</key>
        <string>6.0</string>
        <key>CFBundlePackageType</key>
@@ -23,7 +25,7 @@
                 <key>testthreadcall</key>
                 <dict>
                         <key>CFBundleIdentifier</key>
-                        <string>com.yourcompany.driver.${PRODUCT_NAME:identifier}</string>
+                        <string>com.apple.driver.${PRODUCT_NAME:identifier}</string>
                         <key>IOClass</key>
                         <string>testthreadcall</string>
                         <key>IOMatchCategory</key>
@@ -42,6 +44,8 @@
                <string>11.0</string>
                <key>com.apple.kpi.mach</key>
                <string>11.0</string>
+               <key>com.apple.kpi.private</key>
+               <string>11.0</string>
        </dict>
 </dict>
 </plist>
index c8cd7529b9521a79ca2eb3e8a1b518757911df3e..2afb5c8e12b6e69df57dcfef3c7b7c11fb196821 100644 (file)
@@ -7,6 +7,7 @@
 #include "testthreadcall.h"
 
 #include <kern/thread_call.h>
+#include <pexpert/pexpert.h>
 
 #define super IOService
 OSDefineMetaClassAndStructors(testthreadcall, super);
@@ -21,18 +22,29 @@ static void thread_call_test_func2(thread_call_param_t param0,
 
 }
 
+static int my_event;
+
 bool
 testthreadcall::start( IOService * provider )
 {
-       boolean_t ret;
-       uint64_t deadline;
-       int sleepret;
+  boolean_t ret;
+  uint64_t deadline;
+  int sleepret;
+  uint32_t kernel_configuration;
     
     IOLog("%s\n", __PRETTY_FUNCTION__);
     
     if (!super::start(provider)) {
         return false;
     }
+
+    kernel_configuration = PE_i_can_has_kernel_configuration();
+    IOLog("%s: Assertions %s\n", __PRETTY_FUNCTION__,
+         (kernel_configuration & kPEICanHasAssertions) ? "enabled" : "disabled");
+    IOLog("%s: Statistics %s\n", __PRETTY_FUNCTION__,
+         (kernel_configuration & kPEICanHasStatistics) ? "enabled" : "disabled");
+    IOLog("%s: Diagnostic API %s\n", __PRETTY_FUNCTION__,
+         (kernel_configuration & kPEICanHasDiagnosticAPI) ? "enabled" : "disabled");
     
     IOLog("Attempting thread_call_allocate\n");
        tcall = thread_call_allocate(thread_call_test_func, this);
@@ -62,8 +74,8 @@ testthreadcall::start( IOService * provider )
 
     clock_interval_to_deadline(3, NSEC_PER_SEC, &deadline);
     IOLog("%d sec deadline is %llu\n", 3, deadline);
-    sleepret = IOLockSleepDeadline(tlock2, NULL, deadline, THREAD_INTERRUPTIBLE);
-    IOLog("IOLockSleepDeadline(NULL, %llu) returned %d, expected 0\n", deadline, sleepret);
+    sleepret = IOLockSleepDeadline(tlock2, &my_event, deadline, THREAD_INTERRUPTIBLE);
+    IOLog("IOLockSleepDeadline(&my_event, %llu) returned %d, expected 0\n", deadline, sleepret);
 
     IOLockUnlock(tlock2);
 
@@ -76,8 +88,8 @@ testthreadcall::start( IOService * provider )
 
     clock_interval_to_deadline(3, NSEC_PER_SEC, &deadline);
     IOLog("%d sec deadline is %llu\n", 3, deadline);
-    sleepret = IOLockSleepDeadline(tlock2, NULL, deadline, THREAD_INTERRUPTIBLE);
-    IOLog("IOLockSleepDeadline(NULL, %llu) returned %d, expected 1\n", deadline, sleepret);
+    sleepret = IOLockSleepDeadline(tlock2, &my_event, deadline, THREAD_INTERRUPTIBLE);
+    IOLog("IOLockSleepDeadline(&my_event, %llu) returned %d, expected 1\n", deadline, sleepret);
 
     IOLockUnlock(tlock2);
        
@@ -102,5 +114,5 @@ static void thread_call_test_func2(thread_call_param_t param0,
        
        IOLog("thread_call_test_func2 %p %p\n", param0, param1);
        
-       IOLockWakeup(self->tlock2, NULL, false);
+       IOLockWakeup(self->tlock2, &my_event, false);
 }
diff --git a/tools/tests/xnu_quick_test/32bit_inode_tests.c b/tools/tests/xnu_quick_test/32bit_inode_tests.c
deleted file mode 100644 (file)
index 209c640..0000000
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- *  32bit_inode_tests.c
- *  xnu_quick_test
- *
- *  Created by Ryan Branche on 2/17/08.
- *  Copyright 2008 Apple Inc. All rights reserved.
- *
- */
-
-/* 
- * Explicitely turn off 64-bit inodes because we are testing the 32-bit inode 
- * versions of statfs functions and getdirentries doesn't support 64-bit inodes.
- */
-#define _DARWIN_NO_64_BIT_INODE 1
-
-#include "tests.h"
-#include <mach/mach.h>
-#include <dirent.h>
-
-extern char            g_target_path[ PATH_MAX ];
-extern int             g_skip_setuid_tests;
-extern int             g_is_single_user;
-
-/*  **************************************************************************************************************
- *     Test getdirentries system call.
- *  **************************************************************************************************************
- */
-struct test_attr_buf {
-       uint32_t                length;
-       fsobj_type_t            obj_type;
-       fsobj_id_t              obj_id;
-       struct timespec         backup_time;
-};
-       
-typedef struct test_attr_buf test_attr_buf;
-
-int getdirentries_test( void * the_argp )
-{
-       int                                     my_err, done, found_it, i;
-       int                                     my_fd = -1;
-       int                                     is_ufs = 0;
-       char *                          my_pathp = NULL;
-       char *                          my_bufp = NULL;
-       char *                          my_file_namep;
-       long                            my_base;
-       unsigned long           my_count;
-       unsigned long           my_new_state;
-       fsobj_id_t                      my_obj_id;
-       struct timespec         my_new_backup_time;
-       struct attrlist         my_attrlist;
-       test_attr_buf           my_attr_buf[4];
-       struct statfs           my_statfs_buf;
-       kern_return_t           my_kr;
-
-       /* need to know type of file system */
-       my_err = statfs( &g_target_path[0], &my_statfs_buf );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( memcmp( &my_statfs_buf.f_fstypename[0], "ufs", 3 ) == 0 ) {
-               is_ufs = 1;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, (1024 * 5), VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-       
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit; 
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* get pointer to just the file name */
-       my_file_namep = strrchr( my_pathp, '/' );
-       my_file_namep++;
-       
-       /* check out the  test directory */
-       my_fd = open( &g_target_path[0], (O_RDONLY), 0 );
-       if ( my_fd == -1 ) {
-               printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       done = found_it = 0;
-       while ( done == 0 ) {
-               int                                     my_result, i;
-               struct dirent *         my_dirent_p;
-
-               /* This call requires that 64-bit inodes are disabled */
-               my_result = getdirentries( my_fd, my_bufp, (1024 * 5), &my_base );
-               if ( my_result <= 0 )
-                       break;
-               for ( i = 0; i < my_result; ) {
-                       my_dirent_p = (struct dirent *) (my_bufp + i);
-#if DEBUG
-                       printf( "d_ino %d d_reclen %d d_type %d d_namlen %d \"%s\" \n", 
-                                        my_dirent_p->d_ino, my_dirent_p->d_reclen, my_dirent_p->d_type,
-                                        my_dirent_p->d_namlen, &my_dirent_p->d_name[0] );
-#endif
-
-                       i += my_dirent_p->d_reclen;
-                       /* validate results by looking for our test file */
-                       if ( my_dirent_p->d_type == DT_REG && my_dirent_p->d_ino != 0 &&
-                                strlen( my_file_namep ) == my_dirent_p->d_namlen &&
-                                memcmp( &my_dirent_p->d_name[0], my_file_namep, my_dirent_p->d_namlen ) == 0 ) {
-                               done = found_it = 1;
-                               break;
-                       }
-               }
-       }
-       if ( found_it == 0 ) {
-               printf( "getdirentries failed to find test file. \n" );
-               goto test_failed_exit;  
-       }
-
-test_failed_exit:
-       if(my_err != 0)
-               my_err = -1;
-
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );     
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-       }
-       if ( my_bufp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_bufp, (1024 * 5));     
-       }
-       
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test 32-bit inode versions of statfs, fstatfs, and getfsstat system calls.
- *  **************************************************************************************************************
- */
-#pragma pack(4)
-struct vol_attr_buf {
-       u_int32_t       length;
-       off_t           volume_size;
-       u_int32_t       io_blksize;
-};
-#pragma pack()
-typedef struct vol_attr_buf vol_attr_buf;
-
-int statfs_32bit_inode_tests( void * the_argp )
-{
-       int                                     my_err, my_count, i;
-       int                                     my_buffer_size;
-       int                                     my_fd = -1;
-       int                                     is_ufs = 0;
-       void *                          my_bufferp = NULL;
-       struct statfs *         my_statfsp;
-       long                            my_io_size;
-       fsid_t                          my_fsid;
-       struct attrlist         my_attrlist;
-       vol_attr_buf            my_attr_buf;
-       kern_return_t           my_kr;
-
-       my_buffer_size = (sizeof(struct statfs) * 10);
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufferp, my_buffer_size, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       my_statfsp = (struct statfs *) my_bufferp;
-       my_err = statfs( "/", my_statfsp );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0 ) {
-               is_ufs = 1;
-       }
-       
-       my_count = getfsstat( (struct statfs *)my_bufferp, my_buffer_size, MNT_NOWAIT );
-       if ( my_count == -1 ) {
-               printf( "getfsstat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* validate results */
-       my_statfsp = (struct statfs *) my_bufferp;
-       for ( i = 0; i < my_count; i++, my_statfsp++ ) {
-               if ( memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 ||
-                        memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0 ||
-                        memcmp( &my_statfsp->f_fstypename[0], "devfs", 5 ) == 0 ||
-                        memcmp( &my_statfsp->f_fstypename[0], "volfs", 5 ) == 0 ) {
-                       /* found a valid entry */
-                       break;
-               }
-       }
-       if ( i >= my_count ) {
-               printf( "getfsstat call failed.  could not find valid f_fstypename! \n" );
-               goto test_failed_exit;
-       }
-
-       /* set up to validate results via multiple sources.  we use getattrlist to get volume
-        * related attributes to verify against results from fstatfs and statfs - but only if
-        * we are not targeting ufs volume since it doesn't support getattr calls
-        */
-       if ( is_ufs == 0 ) {
-               memset( &my_attrlist, 0, sizeof(my_attrlist) );
-               my_attrlist.bitmapcount = ATTR_BIT_MAP_COUNT;
-               my_attrlist.volattr = (ATTR_VOL_SIZE | ATTR_VOL_IOBLOCKSIZE);
-               my_err = getattrlist( "/", &my_attrlist, &my_attr_buf, sizeof(my_attr_buf), 0 );
-               if ( my_err != 0 ) {
-                       printf( "getattrlist call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-       }
-       
-       /* open kernel to use as test file for fstatfs */
-       my_fd = open( "/System/Library/Kernels/kernel", O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* testing fstatfs */
-       my_statfsp = (struct statfs *) my_bufferp;
-       my_err = fstatfs( my_fd, my_statfsp );
-       if ( my_err == -1 ) {
-               printf( "fstatfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* validate results */
-       if ( !(memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 ||
-                  memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0) ) {
-               printf( "fstatfs call failed.  could not find valid f_fstypename! \n" );
-               goto test_failed_exit;
-       }
-       my_io_size = my_statfsp->f_iosize;
-       my_fsid = my_statfsp->f_fsid;
-       if ( is_ufs == 0 && my_statfsp->f_iosize != my_attr_buf.io_blksize ) {
-               printf( "fstatfs and getattrlist results do not match for volume block size  \n" );
-               goto test_failed_exit;
-       } 
-
-       /* try again with statfs */
-       my_err = statfs( "/System/Library/Kernels/kernel", my_statfsp );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* validate resutls */
-       if ( my_io_size != my_statfsp->f_iosize || my_fsid.val[0] != my_statfsp->f_fsid.val[0] ||
-                my_fsid.val[1] != my_statfsp->f_fsid.val[1] ) {
-               printf( "statfs call failed.  wrong f_iosize or f_fsid! \n" );
-               goto test_failed_exit;
-       }
-       if ( is_ufs == 0 && my_statfsp->f_iosize != my_attr_buf.io_blksize ) {
-               printf( "statfs and getattrlist results do not match for volume block size  \n" );
-               goto test_failed_exit;
-       } 
-
-       /* We passed the test */
-       my_err = 0;
-
-test_failed_exit:
-       if(my_err != 0)
-               my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_bufferp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_bufferp, my_buffer_size);      
-       }
-        
-       return( my_err );
-}
-
diff --git a/tools/tests/xnu_quick_test/README b/tools/tests/xnu_quick_test/README
deleted file mode 100644 (file)
index 861df13..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-xnu_quick_test - this tool will do a quick test of every (well, to be
-honest most) system calls we support in xnu.
-
-WARNING - this is not meant to be a full regression test of all the
-system calls.  The intent is to have a quick test of each system call that
-can be run very easily and quickly when a new kernel is built.
-
-This tool is meant to grow as we find xnu problems that could have be
-caught before we submit to a build train.  So please add more tests and
-make the existing ones better.  Some of the original tests are nothing
-more than place holders and quite lame.  Just keep in mind that the tool
-should run as fast as possible.  If it gets too slow then most people
-will stop running it.
-
-LP64 testing tip - when adding or modifying tests, keep in mind the
-variants in the LP64 world.  If xnu gets passed a structure the varies in
-size between 32 and 64-bit processes, try to test that a field in the 
-structure contains valid data.  For example, if we know foo structure
-looks like:
-struct foo {
-             int             an_int;
-             long    a_long;
-             int             another_int;
-}
-And if we know what another_int should contain then test for the known
-value since it's offset will vary depending on whether the calling process
-is 32 or 64 bits.
-
-NOTE - we have several workarounds and test exceptions for some
-outstanding bugs in xnu.  All the workarounds are marked with "todo" and
-some comments noting the radar number of the offending bug.  Do a search
-for "todo" in the source files for this project to locate which tests have
-known failures.   And please tag any new exceptions you find with "todo"
-in the comment and the radar number of the bug.
-
-Building:
-xnu_quick_test is built automatically by BNI for both Mac (10.9 and later), and
-iOS (7 and later) trains, and is delivered on AppleInternal builds in
-/AppleInternal/CoreOS/xnu_quick_test. It is built as part of the xnu_quick_test
-build alias, so you can also find a copy on ~rc at:
-~rc/Software/$RELEASE/Updates/$RELEASEVERSION/Roots/xnu_quick_test/AppleInternal/CoreOS/xnu_quick_test.
-
-Alternatively you can build it yourself using make like so:
-SDKROOT=/path/to/sdk make
-
-For example:
-# build for Mac, current OS
-SDKROOT=/ make 
-# build for iOS
-SDKROOT=`xcodebuild -sdk iphoneos.internal -version Path` make
-
-By default xnu builds all-way fat, but you can restrict this by explicitly
-specifying architectures like so:
-# build for only armv7 and armv7s
-SDKROOT=`xcodebuild -sdk iphoneos.internal -version Path` make ARCH="armv7 armv7s"
-
-There are four defines which you can use at the compile line to build variants.
-DEBUG
-       turn on additional printfs
-CONFORMANCE_TESTS_IN_XNU 
-       when conformance tests are in xnu, set this to 1
-TEST_SYSTEM_CALLS
-       test system calls (doesn't compile; a different bug)
-RUN_UNDER_TESTBOTS
-       when running under testbots, set this to 1              
-by default, all four are set to 0, i.e. disabled.  To build, export
-MORECFLAGS with the values you want set, e.g. 
-       export MORECFLAGS="-D DEBUG=1 -D CONFORMANCE_TESTS_IN_XNU=1"
-
-todo:
--- have folks with area expertise fix lame tests
-   (most of the networking related tests are pretty lame)
--- mach system calls support
-
-
-USAGE:  xnu_quick_test -target TARGET_PATH 
-
-         -f[ailures] MAX_FAILS_ALLOWED   # number of test cases that may fail before we give up.  defaults to 0  
-         -l[ist]                         # list all the tests this tool performs   
-         -r[un] 1, 3, 10 - 19            # run specific tests.  enter individual test numbers and/or range of numbers.  use -list to list tests.   
-         -s[kip]                         # skip setuid tests   
-         -t[arget] TARGET_PATH           # path to directory where tool will create test files.  defaults to "/tmp/"  
-         -testbot                        # output results in CoreOS TestBot compatible format
-
-examples:  
---- Place all test files and directories at the root of volume "test_vol" --- 
-xnu_quick_test -t /Volumes/test_vol/ 
---- Run the tool for tests 10 thru 15, test 18 and test 20 --- 
-xnu_quick_test -r 10-15, 18, 20 
-
-
---- example of running the tool to list all the tests it currently supports ---
-xnu_quick_test -l
-List of all tests this tool performs... 
- 1         syscall 
- 2         fork, wait4, exit 
- 3         fsync, ftruncate, lseek, pread, pwrite, read, readv, truncate, write, writev 
- 4         close, fpathconf, fstat, open, pathconf 
- 5         link, stat, unlink 
- 6         chdir, fchdir 
- 7         access, chmod, fchmod 
- 8         chown, fchown, lchown, lstat, readlink, symlink 
- 9         fstatfs, getattrlist, getfsstat, statfs 
- 10        getpid, getppid, pipe 
- 11        getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid 
- 12        mkdir, rmdir, umask 
- 13        mknod, sync 
- 14        fsync, getsockopt, poll, select, setsockopt, socketpair 
- 15        accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto, sendfile
- 16        chflags, fchflags 
- 17        kill, vfork, execve 
- 18        getegid, getgid, getgroups, setegid, setgid, setgroups 
- 19        dup, dup2, getdtablesize 
- 20        getrusage, profil 
- 21        getitimer, setitimer, sigaction, sigpending, sigprocmask, sigsuspend, sigwait 
- 22        acct 
- 23        ioctl 
- 24        chroot 
- 25        madvise, mincore, minherit, mlock, mlock, mmap, mprotect, msync, munmap 
- 26        getpgrp, getpgid, getsid, setpgid, setpgrp, setsid 
- 27        fcntl 
- 28        getlogin, setlogin 
- 29        getpriority, setpriority 
- 30        futimes, gettimeofday, settimeofday, utimes 
- 31        rename, stat 
- 32        flock 
- 33        mkfifo, read, write 
- 34        quotactl 
- 35        getrlimit, setrlimit 
- 36        getattrlist, getdirentries, getdirentriesattr, setattrlist 
- 37        exchangedata 
- 38        searchfs 
- 39        sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait 
- 40        semctl, semget, semop 
- 41        shm_open, shm_unlink 
- 42        shmat, shmctl, shmdt, shmget 
- 43        fgetxattr, flistxattr, fremovexattr, fsetxattr, getxattr, listxattr, removexattr, setxattr 
- 44        aio_cancel, aio_error, aio_read, aio_return, aio_suspend, aio_write, fcntl, lio_listio 
- 45        kevent, kqueue 
- 46        msgctl, msgget, msgrcv, msgsnd 
-
-
---- example of a full test run ---
-cottje% ./BUILD/dst/xnu_quick_test 
-created test directory at "/tmp/xnu_quick_test-1660251855" 
-Will allow 0 failures before testing is aborted 
-
-Begin testing - Tue Dec 13 15:56:50 2005
-test #1 - syscall 
-test #2 - fork, wait4, exit 
-test #3 - fsync, ftruncate, lseek, pread, pwrite, read, readv, truncate, write, writev 
-test #4 - close, fpathconf, fstat, open, pathconf 
-test #5 - link, stat, unlink 
-test #6 - chdir, fchdir 
-test #7 - access, chmod, fchmod 
-test #8 - chown, fchown, lchown, lstat, readlink, symlink 
-test #9 - fstatfs, getattrlist, getfsstat, statfs 
-test #10 - getpid, getppid, pipe 
-test #11 - getauid, gettid, getuid, geteuid, issetugid, setauid, seteuid, settid, settid_with_pid, setuid 
-test #12 - mkdir, rmdir, umask 
-test #13 - mknod, sync 
-test #14 - fsync, getsockopt, poll, select, setsockopt, socketpair 
-test #15 - accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto 
-test #16 - chflags, fchflags 
-test #17 - kill, vfork, execve 
-test #18 - getegid, getgid, getgroups, setegid, setgid, setgroups 
-test #19 - dup, dup2, getdtablesize 
-test #20 - getrusage, profil 
-test #21 - ktrace 
-test #22 - getitimer, setitimer, sigaction, sigpending, sigprocmask, sigsuspend, sigwait 
-test #23 - acct 
-test #24 - ioctl 
-test #25 - chroot 
-test #26 - madvise, mincore, minherit, mlock, mlock, mmap, mprotect, msync, munmap 
-test #27 - getpgrp, getpgid, getsid, setpgid, setpgrp, setsid 
-test #28 - fcntl 
-test #29 - getlogin, setlogin 
-test #30 - getpriority, setpriority 
-test #31 - futimes, gettimeofday, settimeofday, utimes 
-test #32 - rename, stat 
-test #33 - flock 
-test #34 - mkfifo, read, write 
-test #35 - quotactl 
-test #36 - getrlimit, setrlimit 
-test #37 - getattrlist, getdirentries, getdirentriesattr, setattrlist 
-test #38 - exchangedata 
-test #39 - searchfs 
-test #40 - sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait 
-test #41 - semctl, semget, semop 
-test #42 - shm_open, shm_unlink 
-test #43 - shmat, shmctl, shmdt, shmget 
-test #44 - fgetxattr, flistxattr, fremovexattr, fsetxattr, getxattr, listxattr, removexattr, setxattr 
-test #45 - aio_cancel, aio_error, aio_read, aio_return, aio_suspend, aio_write, fcntl, lio_listio 
-test #46 - kevent, kqueue 
-test #47 - msgctl, msgget, msgrcv, msgsnd 
-
-End testing - Tue Dec 13 15:57:08 2005
diff --git a/tools/tests/xnu_quick_test/atomic_fifo_queue_test.c b/tools/tests/xnu_quick_test/atomic_fifo_queue_test.c
deleted file mode 100644 (file)
index 06a0e80..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#if defined(i386) || defined(__x86_64__)
-
-#include <libkern/OSAtomic.h>
-#include <stdio.h>
-#include <string.h>
-#include <err.h>
-
-typedef struct {
-       void *next;
-       char *str;
-} QueueNode;
-
-int atomic_fifo_queue_test( void *the_argp ) {
-       OSFifoQueueHead head = OS_ATOMIC_FIFO_QUEUE_INIT;
-       char *str1 = "String 1", *str2 = "String 2";
-       QueueNode node1 = { 0, str1 };
-       OSAtomicFifoEnqueue(&head, &node1, 0);
-       QueueNode node2 = { 0, str2 };
-       OSAtomicFifoEnqueue(&head, &node2, 0);
-       QueueNode *node_ptr = OSAtomicFifoDequeue(&head, 0);
-       if( strcmp(node_ptr->str, str1) != 0 ) {
-               warnx("OSAtomicFifoDequeue returned incorrect string. Expected %s, got %s", str1, node_ptr->str);
-               return 1;
-       }
-       node_ptr = OSAtomicFifoDequeue(&head, 0);
-       if( strcmp(node_ptr->str, str2) != 0 ) {
-               warnx("OSAtomicFifoDequeue returned incorrect string. Expected %s, got %s", str2, node_ptr->str);
-               return 1;
-       }
-       return 0;
-}
-
-#endif
diff --git a/tools/tests/xnu_quick_test/commpage_tests.c b/tools/tests/xnu_quick_test/commpage_tests.c
deleted file mode 100644 (file)
index 5c3ac4c..0000000
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- *  commpage_tests.c
- *  xnu_quick_test
- *
- *  Copyright 2009 Apple Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <unistd.h>
-#include <stdint.h>
-#include <err.h>
-#include <sys/param.h>
-#include <System/machine/cpu_capabilities.h>
-#include <mach/mach.h>
-#include <mach/mach_error.h>
-#include <mach/bootstrap.h>
-
-
-#ifdef _COMM_PAGE_ACTIVE_CPUS
-int active_cpu_test(void);
-#endif
-
-int get_sys_uint64(const char *sel, uint64_t *val);
-int get_sys_int32(const char *sel, int32_t *val);
-
-#define getcommptr(var, commpageaddr) do { \
-               var = (typeof(var))(uintptr_t)(commpageaddr); \
-       } while(0)
-
-/*
- * Check some of the data in the commpage
- * against manual sysctls
- */
-int commpage_data_tests( void * the_argp )
-{
-       int ret;
-       uint64_t sys_u64;
-       int32_t sys_i32;
-
-       volatile uint64_t *comm_u64;
-       volatile uint32_t *comm_u32;
-       volatile uint16_t *comm_u16;
-       volatile uint8_t *comm_u8;
-
-
-       /* _COMM_PAGE_CPU_CAPABILITIES */
-       getcommptr(comm_u32, _COMM_PAGE_CPU_CAPABILITIES);
-
-       ret = get_sys_int32("hw.ncpu", &sys_i32);
-       if (ret) goto fail;
-
-       if (sys_i32 != ((*comm_u32 & kNumCPUs) >> kNumCPUsShift)) {
-               warnx("kNumCPUs does not match hw.ncpu");
-               ret = -1;
-               goto fail;
-       }
-
-       getcommptr(comm_u8, _COMM_PAGE_NCPUS);
-       if (sys_i32 != (*comm_u8)) {
-               warnx("_COMM_PAGE_NCPUS does not match hw.ncpu");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.logicalcpu", &sys_i32);
-       if (ret) goto fail;
-
-       if (sys_i32 != ((*comm_u32 & kNumCPUs) >> kNumCPUsShift)) {
-               warnx("kNumCPUs does not match hw.logicalcpu");
-               ret = -1;
-               goto fail;
-       }
-
-       /* Intel only capabilities */
-#if defined(__i386__) || defined(__x86_64__)
-       ret = get_sys_int32("hw.optional.mmx", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasMMX)) {
-               warnx("kHasMMX does not match hw.optional.mmx");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.sse", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasSSE)) {
-               warnx("kHasSSE does not match hw.optional.sse");
-               ret = -1;
-               goto fail;
-       }
-       ret = get_sys_int32("hw.optional.sse2", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasSSE2)) {
-               warnx("kHasSSE2 does not match hw.optional.sse2");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.sse3", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasSSE3)) {
-               warnx("kHasSSE3 does not match hw.optional.sse3");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.supplementalsse3", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasSupplementalSSE3)) {
-               warnx("kHasSupplementalSSE3 does not match hw.optional.supplementalsse3");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.sse4_1", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasSSE4_1)) {
-               warnx("kHasSSE4_1 does not match hw.optional.sse4_1");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.sse4_2", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasSSE4_2)) {
-               warnx("kHasSSE4_2 does not match hw.optional.sse4_2");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.aes", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & kHasAES)) {
-               warnx("kHasAES does not match hw.optional.aes");
-               ret = -1;
-               goto fail;
-       }
-
-       ret = get_sys_int32("hw.optional.x86_64", &sys_i32);
-       if (ret) goto fail;
-
-       if (!(sys_i32) ^ !(*comm_u32 & k64Bit)) {
-               warnx("k64Bit does not match hw.optional.x86_64");
-               ret = -1;
-               goto fail;
-       }
-#endif /* __i386__ || __x86_64__ */
-        
-       /* These fields are not implemented for all architectures */
-#if defined(_COMM_PAGE_SCHED_GEN) && !TARGET_OS_EMBEDDED
-       uint32_t preempt_count1, preempt_count2;
-       uint64_t count;
-
-       ret = get_sys_uint64("hw.cpufrequency_max", &sys_u64);
-       if (ret) goto fail;
-       
-    getcommptr(comm_u32, _COMM_PAGE_SCHED_GEN);
-       preempt_count1 = *comm_u32;
-       /* execute for around 1 quantum (10ms) */
-       for(count = MAX(10000000ULL, sys_u64/64); count > 0; count--) {
-               asm volatile("");
-       }
-       preempt_count2 = *comm_u32;
-       if (preempt_count1 >= preempt_count2) {
-               warnx("_COMM_PAGE_SCHED_GEN not incrementing (%u => %u)",
-                         preempt_count1, preempt_count2);
-               ret = -1;
-               goto fail;
-       }
-#endif /* _COMM_PAGE_SCHED_GEN */
-
-#ifdef _COMM_PAGE_ACTIVE_CPUS
-       ret = get_sys_int32("hw.activecpu", &sys_i32);
-       if (ret) goto fail;
-
-       getcommptr(comm_u8, _COMM_PAGE_ACTIVE_CPUS);
-       if (sys_i32 != (*comm_u8)) {
-               warnx("_COMM_PAGE_ACTIVE_CPUS does not match hw.activecpu");
-               ret = -1;
-               goto fail;
-       }
-
-       /* We shouldn't be supporting userspace processor_start/processor_exit on embedded */
-       ret = active_cpu_test();
-       if (ret) goto fail;
-#endif /* _COMM_PAGE_ACTIVE_CPUS */
-
-#ifdef _COMM_PAGE_PHYSICAL_CPUS
-       ret = get_sys_int32("hw.physicalcpu_max", &sys_i32);
-       if (ret) goto fail;
-
-       getcommptr(comm_u8, _COMM_PAGE_PHYSICAL_CPUS);
-       if (sys_i32 != (*comm_u8)) {
-               warnx("_COMM_PAGE_PHYSICAL_CPUS does not match hw.physicalcpu_max");
-               ret = -1;
-               goto fail;
-       }
-#endif /* _COMM_PAGE_PHYSICAL_CPUS */
-
-#ifdef _COMM_PAGE_LOGICAL_CPUS
-       ret = get_sys_int32("hw.logicalcpu_max", &sys_i32);
-       if (ret) goto fail;
-
-       getcommptr(comm_u8, _COMM_PAGE_LOGICAL_CPUS);
-       if (sys_i32 != (*comm_u8)) {
-               warnx("_COMM_PAGE_LOGICAL_CPUS does not match hw.logicalcpu_max");
-               ret = -1;
-               goto fail;
-       }
-#endif /* _COMM_PAGE_LOGICAL_CPUS */
-
-#if 0
-#ifdef _COMM_PAGE_MEMORY_SIZE
-       ret = get_sys_uint64("hw.memsize", &sys_u64);
-       if (ret) goto fail;
-
-       getcommptr(comm_u64, _COMM_PAGE_MEMORY_SIZE);
-       if (sys_u64 != (*comm_u64)) {
-               warnx("_COMM_PAGE_MEMORY_SIZE does not match hw.memsize");
-               ret = -1;
-               goto fail;
-       }
-#endif /* _COMM_PAGE_MEMORY_SIZE */
-#endif
-
-       ret = 0;
-
-fail:
-       
-       return ret;
-}
-
-
-int get_sys_uint64(const char *sel, uint64_t *val)
-{
-       size_t size = sizeof(*val);
-       int ret;
-
-       ret = sysctlbyname(sel, val, &size, NULL, 0);
-       if (ret == -1) {
-               warn("sysctlbyname(%s)", sel);
-               return ret;
-       }
-
-//     warnx("sysctlbyname(%s) => %llx", sel, *val);
-
-       return 0;
-}
-
-int get_sys_int32(const char *sel, int32_t *val)
-{
-       size_t size = sizeof(*val);
-       int ret;
-
-       ret = sysctlbyname(sel, val, &size, NULL, 0);
-       if (ret == -1) {
-               warn("sysctlbyname(%s)", sel);
-               return ret;
-       }
-
-//     warnx("sysctlbyname(%s) => %x", sel, *val);
-
-       return 0;
-}
-
-#ifdef _COMM_PAGE_ACTIVE_CPUS
-/*
- * Try to find a secondary processor that we can disable,
- * and make sure the commpage reflects that. This test
- * will pass on UP systems, and if all secondary processors
- * have been manually disabled
- */
-int active_cpu_test(void)
-{
-       volatile uint8_t *activeaddr;
-       uint8_t original_activecpu;
-       boolean_t test_failed = FALSE;
-
-       /* Code stolen from hostinfo.c */
-       kern_return_t           ret;
-       processor_t             *processor_list;                
-       host_name_port_t        host;
-       struct processor_basic_info     processor_basic_info;
-       mach_msg_type_number_t  cpu_count;
-       mach_msg_type_number_t  data_count;
-       int                     i;
-
-
-       getcommptr(activeaddr, _COMM_PAGE_ACTIVE_CPUS);
-       original_activecpu = *activeaddr;
-
-       host = mach_host_self();
-       ret = host_processors(host,
-                                                 (processor_array_t *) &processor_list, &cpu_count);
-       if (ret != KERN_SUCCESS) {
-               mach_error("host_processors()", ret);
-               return ret;
-       }
-
-       /* skip master processor */
-       for (i = 1; i < cpu_count; i++) {
-               data_count = PROCESSOR_BASIC_INFO_COUNT;
-               ret = processor_info(processor_list[i], PROCESSOR_BASIC_INFO,
-                                                        &host,
-                                                        (processor_info_t) &processor_basic_info,
-                                                        &data_count);
-               if (ret != KERN_SUCCESS) {
-                       if (ret == MACH_SEND_INVALID_DEST) {
-                               continue;
-                       }
-                       mach_error("processor_info", ret);
-                       return ret;
-               }
-       
-               if (processor_basic_info.running) {
-                       /* found victim */
-                       ret = processor_exit(processor_list[i]);
-                       if (ret != KERN_SUCCESS) {
-                               mach_error("processor_exit()", ret);
-                               return ret;
-                       }
-
-                       sleep(1);
-
-                       if (*activeaddr != (original_activecpu - 1)) {
-                               test_failed = TRUE;
-                       }
-
-                       ret = processor_start(processor_list[i]);
-                       if (ret != KERN_SUCCESS) {
-                               mach_error("processor_exit()", ret);
-                               return ret;
-                       }
-
-                       sleep(1);
-
-                       break;
-               }
-       }
-
-       if (test_failed) {
-               warnx("_COMM_PAGE_ACTIVE_CPUS not updated after disabling a CPU");
-               return -1;
-       }
-
-       if (*activeaddr != original_activecpu) {
-               warnx("_COMM_PAGE_ACTIVE_CPUS not restored to original value");
-               return -1;
-       }
-
-       return 0;
-}
-#endif
diff --git a/tools/tests/xnu_quick_test/content_protection_test.c b/tools/tests/xnu_quick_test/content_protection_test.c
deleted file mode 100644 (file)
index c372100..0000000
+++ /dev/null
@@ -1,963 +0,0 @@
-#include "tests.h"
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <sys/mount.h>
-#include <sys/wait.h>
-
-#include <IOKit/IOKitLib.h>
-#include <Kernel/IOKit/crypto/AppleKeyStoreDefs.h>
-#include <Kernel/sys/content_protection.h>
-
-/* Note that this test (due to the need to lock/unlock the device on demand, and the
-   need to manipulate the passcode) has the unfortunate effect of link xnu_quick_test
-   to the IOKit Framework. */
-
-/* TODO: Change the test to use a single cleanup label. */
-
-#define CPT_IO_SIZE      4096
-#define CPT_AKS_BUF_SIZE 256
-#define CPT_MAX_PASS_LEN 64
-
-#define GET_PROT_CLASS(fd)             fcntl((fd), F_GETPROTECTIONCLASS)
-#define SET_PROT_CLASS(fd, prot_class) fcntl((fd), F_SETPROTECTIONCLASS, (prot_class))
-
-#define PRINT_LOCK_FAIL   printf("%s, line %d: failed to lock the device.\n", cpt_fail_header, __LINE__);
-#define PRINT_UNLOCK_FAIL printf("%s, line %d: failed to unlock the device.\n", cpt_fail_header, __LINE__);
-
-extern char g_target_path[PATH_MAX];
-
-char * cpt_fail_header = "Content protection test failed";
-char * keystorectl_path = "/usr/local/bin/keystorectl";
-
-/* Shamelessly ripped from keystorectl routines; a wrapper for invoking the AKS user client. */
-int apple_key_store(uint32_t command,
-                    uint64_t * inputs,
-                    uint32_t input_count,
-                    void * input_structs,
-                    size_t input_struct_count,
-                    uint64_t * outputs,
-                    uint32_t * output_count)
-{
-       int result = -1;
-       io_connect_t connection = IO_OBJECT_NULL;
-       io_registry_entry_t apple_key_bag_service = IO_OBJECT_NULL;
-       kern_return_t k_result = KERN_FAILURE;
-       IOReturn io_result = IO_OBJECT_NULL;
-
-       apple_key_bag_service = IOServiceGetMatchingService(kIOMasterPortDefault, IOServiceMatching(kAppleKeyStoreServiceName));
-
-       if (apple_key_bag_service == IO_OBJECT_NULL)
-       {
-               printf("FAILURE: failed to match kAppleKeyStoreServiceName.\n");
-               goto end;
-       }
-
-       k_result = IOServiceOpen(apple_key_bag_service, mach_task_self(), 0, &connection);
-
-       if (k_result != KERN_SUCCESS)
-       {
-               printf("FAILURE: failed to open AppleKeyStore.\n");
-               goto end;
-       }
-
-       k_result = IOConnectCallMethod(connection, kAppleKeyStoreUserClientOpen, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL);
-
-       if (k_result != KERN_SUCCESS)
-       {
-               printf("FAILURE: call to AppleKeyStore method kAppleKeyStoreUserClientOpen failed.\n");
-               goto close;
-       }
-
-       io_result = IOConnectCallMethod(connection, command, inputs, input_count, input_structs, input_struct_count, outputs, output_count, NULL, NULL);
-
-       if (io_result != kIOReturnSuccess)
-       {
-               printf("FAILURE: call to AppleKeyStore method %d failed.\n", command);
-               goto close;
-       }
-
-       result = 0;
-
-close:
-       IOServiceClose(apple_key_bag_service);
-
-end:
-       return(result);
-}
-
-#ifndef   KEYBAG_ENTITLEMENTS
-/* Just a wrapper around forking to exec keystorectl for commands requiring entitlements. */
-int keystorectl(char * const command[])
-{
-       int child_result = -1;
-       int result = -1;
-       pid_t child = -1;
-
-       child = fork();
-
-       if (child == -1)
-       {
-               printf("FAILURE: failed to fork.\n");
-               goto end;
-       }
-       else if (child == 0)
-       {
-               /* TODO: This keeps keystorectl from bombarding us with key state changes, but
-                  there must be a better way of doing this; killing stderr is a bit nasty,
-                  and if keystorectl fails, we want all the information we can get. */
-               fclose(stderr);
-               fclose(stdin);
-               execv(keystorectl_path, command);
-               printf("FAILURE: child failed to execv keystorectl, errno = %s.\n",
-                 strerror(errno));
-               exit(EXIT_FAILURE);
-       }
-
-       if ((waitpid(child, &child_result, 0) != child) || WEXITSTATUS(child_result))
-       {
-               printf("FAILURE: keystorectl failed.\n");
-               result = -1;
-       }
-       else
-       {
-               result = 0;
-       }
-
-end:
-       return(result);
-}
-#endif /* KEYBAG_ENTITLEMENTS */
-
-/* Code based on Mobile Key Bag; specifically MKBDeviceSupportsContentProtection
-   and MKBDeviceFormattedForContentProtection. */
-/* We want to verify that we support content protection, and that
-   we are formatted for it. */
-int supports_content_prot()
-{
-       int local_result = -1;
-       int result = -1;
-       uint32_t buffer_size = 1;
-       char buffer[buffer_size];
-       io_registry_entry_t defaults = IO_OBJECT_NULL;
-       kern_return_t k_result = KERN_FAILURE;
-       struct statfs statfs_results;
-
-       defaults = IORegistryEntryFromPath(kIOMasterPortDefault, kIODeviceTreePlane ":/defaults");
-
-       if (defaults == IO_OBJECT_NULL)
-       {
-               printf("FAILURE: failed to find defaults registry entry.\n");
-               goto end;
-       }
-
-       k_result = IORegistryEntryGetProperty(defaults, "content-protect", buffer, &buffer_size);
-
-       if (k_result != KERN_SUCCESS)
-       {       /* This isn't a failure; it means the entry doesn't exist, so we assume CP
-                  is unsupported. */
-               result = 0;
-               goto end;
-       }
-
-       /* At this point, we SUPPORT content protection... but are we formatted for it? */
-       /* This is ugly; we should be testing the file system we'll be testing in, not
-          just /tmp/. */
-       local_result = statfs(g_target_path, &statfs_results);
-
-       if (local_result == -1)
-       {
-               printf("FAILURE: failed to statfs the test directory, errno = %s.\n",
-                 strerror(errno));
-       }
-       else if (statfs_results.f_flags & MNT_CPROTECT)
-       {
-               result = 1;
-       }
-       else
-       {       /* This isn't a failure, it means the filesystem isn't formatted for CP. */
-               result = 0;
-       }
-
-end:
-       return(result);
-}
-
-#if 0
-int device_lock_state()
-{
-       /* TODO: Actually implement this. */
-       /* We fail if a passcode already exists, and the methods being used to lock/unlock
-          the device in this test appear to be synchronous... do we need this function? */
-       int result = -1;
-
-       return(result);
-}
-#endif
-
-int lock_device()
-{
-       int result = -1;
-
-#ifdef    KEYBAG_ENTITLEMENTS
-       /* If we're entitled, we can lock the device ourselves. */
-       uint64_t inputs[] = {device_keybag_handle};
-       uint32_t input_count = (sizeof(inputs) / sizeof(*inputs));
-       result = apple_key_store(kAppleKeyStoreKeyBagLock, inputs, input_count, NULL, 0, NULL, NULL);
-#else
-       /* If we aren't entitled, we'll need to use keystorectl to lock the device. */
-       /* keystorectl seems to have a bus error (though it locks successfully) unless
-          lock is passed an argument, so we'll also pass it the empty string. */
-       char * const keystorectl_args[] = {keystorectl_path, "lock", "", NULL};
-       result = keystorectl(keystorectl_args);
-#endif /* KEYBAG_ENTITLEMENTS */
-
-       return(result);
-}
-
-int unlock_device(char * passcode)
-{
-       int result = -1;
-
-#ifdef    KEYBAG_ENTITLEMENTS
-       /* If we're entitled, we can unlock the device ourselves. */
-       uint64_t inputs[] = {device_keybag_handle};
-       uint32_t input_count = (sizeof(inputs) / sizeof(*inputs));
-       size_t input_struct_count = 0;
-
-       if ((passcode == NULL) || ((input_struct_count = strnlen(passcode, CPT_MAX_PASS_LEN)) == CPT_MAX_PASS_LEN))
-       {
-               passcode = "";
-               input_struct_count = 0;
-       }
-
-       result = apple_key_store(kAppleKeyStoreKeyBagUnlock, inputs, input_count, passcode, input_struct_count, NULL, NULL);
-#else
-       /* If we aren't entitled, we'll need to use keystorectl to unlock the device. */
-       if ((passcode == NULL) || (strnlen(passcode, CPT_MAX_PASS_LEN) == CPT_MAX_PASS_LEN))
-       {
-               passcode = "";
-       }
-
-       char * const keystorectl_args[] = {keystorectl_path, "unlock", passcode, NULL};
-       result = keystorectl(keystorectl_args);
-#endif /* KEYBAG_ENTITLEMENTS */
-
-       return(result);
-}
-
-int set_passcode(char * new_passcode, char * old_passcode)
-{
-       int result = -1;
-
-#ifdef    KEYBAG_ENTITLEMENTS
-       /* If we're entitled, we can set the passcode ourselves. */
-       uint64_t inputs[] = {device_keybag_handle};
-       uint32_t input_count = (sizeof(inputs) / sizeof(*inputs));
-       void * input_structs = NULL;
-       size_t input_struct_count = 0;
-       char buffer[CPT_AKS_BUF_SIZE];
-       char * buffer_ptr = buffer;
-       uint32_t old_passcode_len = 0;
-       uint32_t new_passcode_len = 0;
-
-       if ((old_passcode == NULL) || ((old_passcode_len = strnlen(old_passcode, CPT_MAX_PASS_LEN)) == CPT_MAX_PASS_LEN))
-       {
-               old_passcode = "";
-               old_passcode_len = 0;
-       }
-
-       if ((new_passcode == NULL) || ((new_passcode_len = strnlen(new_passcode, CPT_MAX_PASS_LEN)) == CPT_MAX_PASS_LEN))
-       {
-               new_passcode = "";
-               new_passcode_len = 0;
-       }
-
-       *((uint32_t *) buffer_ptr) = ((uint32_t) 2);
-       buffer_ptr += sizeof(uint32_t);
-       *((uint32_t *) buffer_ptr) = old_passcode_len;
-       buffer_ptr += sizeof(uint32_t);
-       memcpy(buffer_ptr, old_passcode, old_passcode_len);
-       buffer_ptr += ((old_passcode_len + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1));
-       *((uint32_t *) buffer_ptr) = new_passcode_len;
-       buffer_ptr += sizeof(uint32_t);
-       memcpy(buffer_ptr, new_passcode, new_passcode_len);
-       buffer_ptr += ((new_passcode_len + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1));
-       input_structs = buffer;
-       input_struct_count = (buffer_ptr - buffer);
-
-       result = apple_key_store(kAppleKeyStoreKeyBagSetPasscode, inputs, input_count, input_structs, input_struct_count, NULL, NULL);
-#else
-       /* If we aren't entitled, we'll need to use keystorectl to set the passcode. */
-       if ((old_passcode == NULL) || (strnlen(old_passcode, CPT_MAX_PASS_LEN) == CPT_MAX_PASS_LEN))
-       {
-               old_passcode = "";
-       }
-
-       if ((new_passcode == NULL) || (strnlen(new_passcode, CPT_MAX_PASS_LEN) == CPT_MAX_PASS_LEN))
-       {
-               new_passcode = "";
-       }
-
-       char * const keystorectl_args[] = {keystorectl_path, "change-password", old_passcode, new_passcode, NULL};
-       result = keystorectl(keystorectl_args);
-#endif /* KEYBAG_ENTITLEMENTS */
-
-       return(result);
-}
-
-int clear_passcode(char * passcode)
-{
-       /* For the moment, this will set the passcode to the empty string (a known value);
-          this will most likely need to change, or running this test may ruin everything(tm). */
-       int result = -1;
-
-       result = set_passcode(NULL, passcode);
-
-       return(result);
-}
-
-#if 0
-/* Determines if we will try to test class C semanatics. */
-int unlocked_since_boot()
-{
-       /* TODO: Actually implement this. */
-       /* The actual semantics for CP mean that even with this primative, we would need
-          set a passcode and then reboot the device in order to test this; this function
-          will probably be rather worthless as a result. */
-       int result = 1;
-
-       return(result);
-}
-#endif
-
-/* If the device has a passcode when we want to test it, things are going to go wrong.
-   As such, we'll assume the device never has a passcode.
-   No, not even then.
-   Or we could just try "" to ""; it works. */
-int has_passcode()
-{
-       int result = -1;
-
-       result = set_passcode(NULL, NULL);
-
-       return(result);
-}
-
-int content_protection_test(void * argp)
-{
-       #pragma unused (argp)
-       int init_result = 0;
-       int local_result = -1;
-       int test_result = -1;
-       int fd = -1;
-       int dir_fd = -1;
-       int subdir_fd = -1;
-       int new_prot_class = -1;
-       int old_prot_class = -1;
-       int current_byte = 0;
-       char filepath[PATH_MAX];
-       char dirpath[PATH_MAX];
-       char subdirpath[PATH_MAX];
-       char rd_buffer[CPT_IO_SIZE];
-       char wr_buffer[CPT_IO_SIZE];
-       char * passcode = "IAmASecurePassword";
-
-       /* Do some initial setup (names). */
-       bzero(filepath, PATH_MAX);
-       bzero(dirpath, PATH_MAX);
-       bzero(subdirpath, PATH_MAX);
-
-       /* This is just easier than checking each result individually. */
-       init_result |= (strlcat(filepath, g_target_path, PATH_MAX) == PATH_MAX);
-       init_result |= (strlcat(filepath, "/", PATH_MAX) == PATH_MAX);
-       init_result |= (strlcpy(dirpath, filepath, PATH_MAX) == PATH_MAX);
-       init_result |= (strlcat(filepath, "cpt_test_file", PATH_MAX) == PATH_MAX);
-       init_result |= (strlcat(dirpath, "cpt_test_dir/", PATH_MAX) == PATH_MAX);
-       init_result |= (strlcpy(subdirpath, dirpath, PATH_MAX) == PATH_MAX);
-       init_result |= (strlcat(subdirpath, "cpt_test_subdir/", PATH_MAX) == PATH_MAX);
-
-       if (init_result)
-       {       /* If any of the initialization failed, we're just going to fail now. */
-               printf("%s, line %d: failed to initialize test strings.\n",
-                 cpt_fail_header, __LINE__);
-               goto end;
-       }
-
-       local_result = supports_content_prot();
-
-       if (local_result == -1)
-       {
-               printf("%s, line %d: failed to determine if content protection is supported.\n",
-                 cpt_fail_header, __LINE__);
-               goto end;
-       }
-       else if (local_result == 0)
-       {       /* If we don't support content protection at the moment, pass the test. */
-               printf("This device does not support or is not formatted for content protection.\n");
-               test_result = 0;
-               goto end;
-       }
-
-       /* If we support content protection, we'll need to be able to set the passcode. */
-       local_result = has_passcode();
-
-       if (local_result == -1)
-       {
-               printf("%s, line %d: the device appears to have a passcode.\n",
-                 cpt_fail_header, __LINE__);
-               goto end;
-       }
-
-       if (set_passcode(passcode, NULL))
-       {
-               printf("%s, line %d: failed to set a new passcode.\n",
-                 cpt_fail_header, __LINE__);
-               goto end;
-       }
-
-       fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0777);
-
-       if (fd == -1)
-       {
-               printf("%s, line %d: failed to create the test file, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto remove_passcode;
-       }
-
-       /* Ensure we can freely read and change protection classes when unlocked. */
-       for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_F; new_prot_class++)
-       {
-               old_prot_class = GET_PROT_CLASS(fd);
-
-               if (old_prot_class == -1)
-               {
-                       printf("%s, line %d: failed to get protection class when unlocked, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               if (SET_PROT_CLASS(fd, new_prot_class))
-               {
-                       printf("%s, line %d: failed to change protection class from %d to %d during unlock, errno = %s.\n",
-                         cpt_fail_header, __LINE__, old_prot_class, new_prot_class, strerror(errno));
-                       goto cleanup_file;
-               }
-       }
-
-       /* Query the filesystem for the default CP level (Is it C?) */
-#ifndef F_GETDEFAULTPROTLEVEL
-#define F_GETDEFAULTPROTLEVEL 79
-#endif
-
-       old_prot_class = fcntl(fd, F_GETDEFAULTPROTLEVEL);
-       if (old_prot_class == -1) {
-               printf("%s , line %d: failed to acquire default protection level for filesystem , errno = %s \n", 
-                               cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }       
-
-       /* XXX: Do we want to do anything with the level? What should it be? */
-
-
-       /* 
-        * files are allowed to move into F, but not out of it.  They can also only do so 
-        * when they do not have content.
-        */
-       close (fd);
-       unlink (filepath);
-
-
-       /* re-create the file */
-       fd = open (filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC);
-       if (fd == -1) {
-               printf("%s, line %d: failed to create the test file, errno = %s.\n",
-                                       cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-
-       /* Try making a class A file while locked. */
-       if (lock_device())
-       {
-               PRINT_LOCK_FAIL;
-               goto cleanup_file;
-       }
-
-       if (!SET_PROT_CLASS(fd, PROTECTION_CLASS_A))
-       {
-               printf("%s, line %d: was able to change protection class from D to A when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       if (unlock_device(passcode))
-       {
-               PRINT_UNLOCK_FAIL;
-               goto cleanup_file;
-       }
-
-       /* Attempt opening/IO to a class A file while unlocked. */
-       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_A))
-       {
-               printf("%s, line %d: failed to change protection class from D to A when unlocked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-
-       close(fd);
-       fd = open(filepath, O_RDWR | O_CLOEXEC);
-
-       if (fd == -1)
-       {
-               printf("%s, line %d: failed to open a class A file when unlocked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto remove_file;
-       }
-
-       /* TODO: Write specific data we can check for.
-          If we're going to do that, the write scheme should be deliberately ugly. */
-       current_byte = 0;
-
-       while (current_byte < CPT_IO_SIZE)
-       {
-               local_result = pwrite(fd, &wr_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to write to class A file when unlocked, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               current_byte += local_result;
-       }       
-
-       current_byte = 0;
-
-       while (current_byte < CPT_IO_SIZE)
-       {
-               local_result = pread(fd, &rd_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to read from class A file when unlocked, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               current_byte += local_result;
-       }
-
-       /* Again, but now while locked; and try to change the file class as well. */
-       if (lock_device())
-       {
-               PRINT_LOCK_FAIL;
-               goto cleanup_file;
-       }
-
-       if (pread(fd, rd_buffer, CPT_IO_SIZE, 0) > 0)
-       {
-               printf("%s, line %d: was able to read from a class A file when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       if (pwrite(fd, wr_buffer, CPT_IO_SIZE, 0) > 0)
-       {
-               printf("%s, line %d: was able to write to a class A file when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       if (!SET_PROT_CLASS(fd, PROTECTION_CLASS_D))
-       {
-               printf("%s, line %d: was able to change protection class from A to D when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       /* Try to open and truncate the file. */
-       close(fd);
-       fd = open(filepath, O_RDWR | O_TRUNC | O_CLOEXEC);
-
-       if (fd != -1)
-       {
-               printf("%s, line %d: was able to open and truncate a class A file when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       /* Try to open the file */
-       fd = open(filepath, O_RDWR | O_CLOEXEC);
-
-       if (fd != -1)
-       {
-               printf("%s, line %d: was able to open a class A file when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       /* What about class B files? */
-       if (unlock_device(passcode))
-       {
-               PRINT_UNLOCK_FAIL;
-               goto cleanup_file;
-       }
-
-       fd = open(filepath, O_RDWR | O_CLOEXEC);
-
-       if (fd == -1)
-       {
-               printf("%s, line %d: was unable to open a class A file when unlocked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_D))
-       {
-               printf("%s, line %d: failed to change protection class from A to D when unlocked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-
-       if (lock_device())
-       {
-               PRINT_LOCK_FAIL;
-               goto cleanup_file;
-       }
-
-       /* Can we create a class B file while locked? */
-       if (SET_PROT_CLASS(fd, PROTECTION_CLASS_B))
-       {
-               printf("%s, line %d: failed to change protection class from D to B when locked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-
-       if (GET_PROT_CLASS (fd) != PROTECTION_CLASS_B) {
-               printf("%s, line %d: Failed to switch to class B file \n",
-                               cpt_fail_header, __LINE__ );
-               goto cleanup_file;
-       }
-       
-
-       /* We should also be able to read/write to the file descriptor while it is open. */
-       current_byte = 0;
-
-       while (current_byte < CPT_IO_SIZE)
-       {
-               local_result = pwrite(fd, &wr_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to write to new class B file when locked, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               current_byte += local_result;
-       }
-
-       current_byte = 0;
-
-       while (current_byte < CPT_IO_SIZE)
-       {
-               local_result = pread(fd, &rd_buffer[current_byte], CPT_IO_SIZE - current_byte, current_byte);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to read from new class B file when locked, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               current_byte += local_result;
-       }
-
-       /* We should not be able to open a class B file under lock. */
-       close(fd);
-       fd = open(filepath, O_RDWR | O_CLOEXEC);
-
-       if (fd != -1)
-       {
-               printf("%s, line %d: was able to open a class B file when locked.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_file;
-       }
-
-       unlink(filepath);
-
-       /* We still need to test directory semantics. */
-       if (mkdir(dirpath, 0x0777) == -1)
-       {
-               printf("%s, line %d: failed to create a new directory when locked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto remove_passcode;
-       }
-
-       /* The newly created directory should not have a protection class. */
-       dir_fd = open(dirpath, O_RDONLY | O_CLOEXEC);
-
-       if (dir_fd == -1)
-       {
-               printf("%s, line %d: failed to open an unclassed directory when locked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto remove_dir;
-       }
-
-       if ((GET_PROT_CLASS(dir_fd) != PROTECTION_CLASS_D) && (GET_PROT_CLASS(dir_fd) != PROTECTION_CLASS_DIR_NONE))
-       {
-               printf("%s, line %d: newly created directory had a non-D and non-NONE protection class.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_dir;
-       }
-
-       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_A))
-       {
-               printf("%s, line %d: was unable to change a directory from class D to class A during lock.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_dir;
-       }
-
-       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_D))
-       {
-               printf("%s, line %d: failed to change a directory from class A to class D during lock, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_dir;
-       }
-
-       /* Do all files created in the directory properly inherit the directory's protection class? */
-       if ((strlcpy(filepath, dirpath, PATH_MAX) == PATH_MAX) || (strlcat(filepath, "cpt_test_file", PATH_MAX) == PATH_MAX))
-       {
-               printf("%s, line %d: failed to construct the path for a file in the directory.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_dir;
-       }
-
-       if (unlock_device(passcode))
-       {
-               PRINT_UNLOCK_FAIL;
-               goto cleanup_dir;
-       }
-
-       for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_D; new_prot_class++)
-       {
-               int getclass_dir;
-               old_prot_class = GET_PROT_CLASS(dir_fd);
-               
-               if (old_prot_class == -1)
-               {
-                       printf("%s, line %d: failed to get the protection class for the directory, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_dir;
-               }
-
-               if (SET_PROT_CLASS(dir_fd, new_prot_class))
-               {
-                       printf("%s, line %d: failed to change the protection class for the directory from %d to %d, errno = %s.\n",
-                         cpt_fail_header, __LINE__, old_prot_class, new_prot_class, strerror(errno));
-                       goto cleanup_dir;
-               }
-
-               getclass_dir = GET_PROT_CLASS(dir_fd);
-               if (getclass_dir != new_prot_class) {
-                       printf("%s, line %d: failed to get the new protection class for the directory %d (got %d) \n", 
-                                       cpt_fail_header, __LINE__, new_prot_class, getclass_dir);
-                       goto cleanup_dir;
-
-               }
-
-               fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC, 0777);
-
-               if (fd == -1)
-               {
-                       printf("%s, line %d: failed to create a file in a class %d directory when unlocked, errno = %s.\n",
-                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
-                       goto cleanup_dir;
-               }
-
-               local_result = GET_PROT_CLASS(fd);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to get the new file's protection class, errno = %s.\n",
-                         cpt_fail_header, __LINE__, strerror(errno));
-                       goto cleanup_file;
-               }
-               else if (local_result != new_prot_class)
-               {
-
-                       printf("%s, line %d: new file (%d) did not inherit the directory's protection class (%d) .\n",
-                                       cpt_fail_header, __LINE__, local_result, new_prot_class);
-                       goto cleanup_file;
-               }
-
-               close(fd);
-               unlink(filepath);
-       }
-
-       /* Do we disallow creation of a class F directory? */
-       if (!SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_F))
-       {
-               printf("%s, line %d: creation of a class F directory did not fail as expected.\n",
-                 cpt_fail_header, __LINE__);
-               goto cleanup_dir;
-       }
-
-       /* And are class A and class B semantics followed for when we create these files during lock? */
-       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_A))
-       {
-               printf("%s, line %d: failed to change directory class from F to A when unlocked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_dir;
-       }
-
-       if (lock_device())
-       {
-               PRINT_LOCK_FAIL;
-               goto cleanup_dir;
-       }
-
-       fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC, 0777);
-
-       if (fd != -1)
-       {
-               printf("%s, line %d: was able to create a new file in a class A directory when locked.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-
-       if (unlock_device(passcode))
-       {
-               PRINT_UNLOCK_FAIL;
-               goto cleanup_dir;
-       }
-
-       if (SET_PROT_CLASS(dir_fd, PROTECTION_CLASS_B))
-       {
-               printf("%s, line %d: failed to change directory class from A to B when unlocked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_dir;
-       }
-
-       if (lock_device())
-       {
-               PRINT_LOCK_FAIL;
-               goto cleanup_dir;
-       }
-
-       fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0777);
-
-       if (fd == -1)
-       {
-               printf("%s, line %d: failed to create new file in class B directory when locked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_dir;
-       }
-
-       local_result = GET_PROT_CLASS(fd);
-
-       if (local_result == -1)
-       {
-               printf("%s, line %d: failed to get protection class for a new file when locked, errno = %s.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-       else if (local_result != PROTECTION_CLASS_B)
-       {
-               printf("%s, line %d: new file in class B directory did not inherit protection class.\n",
-                 cpt_fail_header, __LINE__, strerror(errno));
-               goto cleanup_file;
-       }
-
-       /* What happens when we try to create new subdirectories? */
-       if (unlock_device(passcode))
-       {
-               PRINT_UNLOCK_FAIL;
-               goto cleanup_file;
-       }
-
-       for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_D; new_prot_class++)
-       {
-               if (SET_PROT_CLASS(dir_fd, new_prot_class))
-               {
-                       printf("%s, line %d: failed to change directory to class %d, errno = %s.\n",
-                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               local_result = mkdir(subdirpath, 0x0777);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to create subdirectory in class %d directory, errno = %s.\n",
-                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
-                       goto cleanup_file;
-               }
-
-               subdir_fd = open(subdirpath, O_RDONLY | O_CLOEXEC);
-
-               if (subdir_fd == -1)
-               {
-                       printf("%s, line %d: failed to open subdirectory in class %d directory, errno = %s.\n",
-                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
-                       goto remove_subdir;
-               }
-
-               local_result = GET_PROT_CLASS(subdir_fd);
-
-               if (local_result == -1)
-               {
-                       printf("%s, line %d: failed to get class of new subdirectory of class %d directory, errno = %s.\n",
-                         cpt_fail_header, __LINE__, new_prot_class, strerror(errno));
-                       goto cleanup_subdir;
-               }
-               else if (local_result != new_prot_class)
-               {
-                       printf("%s, line %d: new subdirectory had different class than class %d parent.\n",
-                         cpt_fail_header, __LINE__, new_prot_class);
-                       goto cleanup_subdir;
-               }
-
-               close(subdir_fd);
-               rmdir(subdirpath);
-       }
-
-       /* If we've made it this far, the test was successful. */
-       test_result = 0;
-
-cleanup_subdir:
-       close(subdir_fd);
-
-remove_subdir:
-       rmdir(subdirpath);
-
-cleanup_file:
-       close(fd);
-
-remove_file:
-       unlink(filepath);
-
-cleanup_dir:
-       close(dir_fd);
-
-remove_dir:
-       rmdir(dirpath);
-
-remove_passcode:
-       /* Try to unlock the device (no ramifications if it isn't locked when we try) and remove the passcode. */
-       if (unlock_device(passcode))
-       {
-               printf("WARNING: failed to unlock the device.\n");
-       }
-
-       if (clear_passcode(passcode))
-       {
-               printf("WARNING: failed to clear the passcode.\n");
-       }
-
-end:
-       return(test_result);
-}
-
diff --git a/tools/tests/xnu_quick_test/helpers/arch.c b/tools/tests/xnu_quick_test/helpers/arch.c
deleted file mode 100644 (file)
index 1e4f867..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#include <spawn.h>
-#include <mach/machine.h>
-
-/*
- * Helper function for posix_spawn test: returns binary type as exit code.
- */
-int main()
-{
-#if __i386__
-       return CPU_TYPE_I386;
-#endif /* __i386__ */
-#if __x86_64__
-       return CPU_TYPE_X86_64;
-#endif /* __x86_64__ */
-       /* unrecognized type */
-       return -1;
-}
diff --git a/tools/tests/xnu_quick_test/helpers/data_exec.c b/tools/tests/xnu_quick_test/helpers/data_exec.c
deleted file mode 100644 (file)
index e6c1229..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-#include <stdio.h>
-#include <strings.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <setjmp.h>
-#include <sys/mman.h>
-
-int test_func();
-void   catch_segv(int);
-jmp_buf resume;
-
-#define func_len       256
-
-#define ALT_STK_SIZE   (MINSIGSTKSZ + pagesize)
-
-#if __i386__
-typedef        unsigned int            psint_t;
-#endif
-#if __x86_64__
-typedef unsigned long long     psint_t;
-#endif
-
-int verbose = 0;
-
-#define msg(...)       do { if (verbose) printf(__VA_ARGS__); } while (0);
-
-/*
- * Test whether the architecture allows execution from the stack and heap data areas.  What's
- * allowed varies by architecture due to backwards compatibility.  We also run a separate test
- * where we turn on PROT_EXEC explicitly which should always allow execution to take place.
- *
- * The "expected" array tells us what the result of each test should be based on the architecture.
- * The code assumes the test numbers in the macros below are consecutive starting from 0.
- */
-
-#define HEAP_TEST      0
-#define HEAP_PROT_EXEC 1
-#define STACK_TEST     2
-#define STACK_PROT_EXEC        3
-
-#define        SUCCEED  1
-#define FAIL   -1      /* can't use 0 since setjmp uses that */
-
-int expected[4] = {
-#if NXDATA32TESTNONX
-       SUCCEED,                /* execute from heap */
-       SUCCEED,                /* exeucte from heap with PROT_EXEC */
-       FAIL,                   /* execute from stack */
-       SUCCEED,                /* exeucte from stack with PROT_EXEC */
-#elif __i386__
-       FAIL,           /* execute from heap */
-       SUCCEED,                /* exeucte from heap with PROT_EXEC */
-       FAIL,                   /* execute from stack */
-       SUCCEED,                /* exeucte from stack with PROT_EXEC */
-#endif
-#if __x86_64__
-       FAIL,                   /* execute from heap */
-       SUCCEED,                /* exeucte from heap with PROT_EXEC */
-       FAIL,                   /* execute from stack */
-       SUCCEED,                /* exeucte from stack with PROT_EXEC */
-#endif
-};
-
-
-int main(int argc, char *argv[])
-{
-       int (*func)();
-       int result, test;
-       char buf[func_len + 4];
-       psint_t base;
-       unsigned int len;
-       psint_t pagesize;
-       size_t  count;
-       stack_t sigstk;
-       struct sigaction sigact;
-       char *cmd_name;
-       int c;
-
-       cmd_name = argv[0];
-
-       while ((c = getopt(argc, argv, "v")) != -1) {
-               switch (c) {
-               case 'v':
-                       verbose = 1;
-                       break;
-
-               case '?':
-               default:
-                       fprintf(stderr, "usage: data_exec [-v]\n");
-                       exit(1);
-               }
-       }
-
-       pagesize = getpagesize();
-
-       sigstk.ss_sp = malloc(ALT_STK_SIZE);
-       sigstk.ss_size = ALT_STK_SIZE;
-       sigstk.ss_flags = 0;
-
-       if (sigaltstack(&sigstk, NULL) < 0) {
-               perror("sigaltstack");
-               exit(1);
-       }
-
-       sigact.sa_handler = catch_segv;
-       sigact.sa_flags = SA_ONSTACK;
-       sigemptyset(&sigact.sa_mask);
-
-       if (sigaction(SIGSEGV, &sigact, NULL) == -1) {
-               perror("sigaction SIGSEGV");
-               exit(1);
-       }
-
-        if (sigaction(SIGBUS, &sigact, NULL) == -1) {
-                perror("sigaction SIGBUS");
-                exit(1);
-        }
-
-       test = HEAP_TEST;
-
-restart:
-
-       if ((result = setjmp(resume)) != 0) {
-               if (result != expected[test]) {
-                       printf("%s: test %d failed, expected %d, got %d\n", cmd_name, test, expected[test], result);
-                       exit(2);
-               }
-
-               test++;
-               goto restart;
-       }
-
-       switch (test) {
-       case HEAP_TEST:
-               msg("attempting to execute from malloc'ed area..\n");
-
-               func = (void *)malloc(func_len);
-       
-               func = (void *)((char *)func + ((psint_t)test_func & 0x3));
-       
-               bcopy(test_func, func, func_len);
-       
-               result = (*func)();
-               msg("execution suceeded, result is %d\n\n", result);
-               longjmp(resume, SUCCEED);
-
-       case HEAP_PROT_EXEC:
-               msg("attempting to execute from malloc'ed area with PROT_EXEC..\n");
-
-               func = (void *)malloc(func_len);
-       
-               func = (void *)((char *)func + ((psint_t)test_func & 0x3));
-               bcopy(test_func, func, func_len);
-
-               base = (psint_t)func & ~(pagesize - 1);
-               len  = func_len + (psint_t)func - base;
-
-               if(mprotect((void *)base, len, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
-                       perror("mprotect of stack");
-                       exit(1);
-               }
-
-               result = (*func)();
-               msg("execution suceeded, result is %d\n\n", result);
-               longjmp(resume, SUCCEED);
-
-       case STACK_TEST:
-               msg("attempting to execute from stack...\n");
-
-               func = (void *)(buf + ((psint_t)test_func & 0x3));
-               bcopy(test_func, func, func_len);
-       
-               result = (*func)();
-               msg("stack execution suceeded, result from stack exec is %d\n\n", result);
-               longjmp(resume, SUCCEED);
-
-       case STACK_PROT_EXEC:
-               msg("attempting to execute from stack with PROT_EXEC...\n");
-
-               func = (void *)(buf + ((psint_t)test_func & 0x3));
-               bcopy(test_func, func, func_len);
-       
-               base = (psint_t)func & ~(pagesize - 1);
-               len  = func_len + (psint_t)func - base;
-       
-               if(mprotect((void *)base, len, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
-                       perror("mprotect of stack");
-                       exit(1);
-               }
-       
-               result = (*func)();
-               msg("stack execution suceeded, result from stack exec is %d\n", result);
-               longjmp(resume, SUCCEED);
-       }
-
-       msg("All tests passed.\n");
-       exit(0);
-}
-
-
-int
-test_func()
-{
-       return 42;
-}
-
-
-void 
-catch_segv(int sig)
-{
-       msg("got sig %d\n\n", sig);
-       longjmp(resume, FAIL);
-}
diff --git a/tools/tests/xnu_quick_test/helpers/launch.c b/tools/tests/xnu_quick_test/helpers/launch.c
deleted file mode 100644 (file)
index 178a04f..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/* 
- *  Part of the execve tests. This program should not be compiled fat. xnu_quick_test
- * will call the various single-architecture builds of this program as helpers to test
- * the exec() transitions it cannot test itself.
- *
- * When running on a 64-bit machine (x86_64 or PPC64), the 32-bit version of 
- * xnu_quick_test will fork and exec a 64-bit helper process that performs 
- * the following tests.
- * 1. 64 bit process forking() 64-bit child, child execing() 64-bit file(4GB pagezero)
- * 2. 64 bit process forking() 64-bit child, child execing() 64-bit file (4KB pagezero)
- * 3. 64 bit process forking() 64-bit child, child execing() 32-bit file
- *
- *  The 64-bit version of xnu_quick_test will fork and exec a 32-bit process 
- * that performs the following tests.
- * 4. 32 bit process forking() 32-bit child, child execing() 32-bit file
- * 5. 32 bit process forking() 32-bit child, child execing() 64 bit file (4GB pagezero) 
- * 6. 32 bit process forking() 32-bit child, child execing() 64 bit file (4KB pagezero)
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-
-extern int do_execve_test(char * path, char * argv[], void * envp, int killwait);
-extern int get_bits(void);
-
-int g_testbots_active = 0;
-int main(int argc, const char * argv[])
-{
-       int     my_err, my_status;
-       pid_t   my_pid, my_wait_pid;
-       char *  errmsg = NULL; 
-       char *  argvs[2] = {"", NULL};
-       int     bits = get_bits();              /* Gets actual processor bit-ness. */
-
-#if defined(__i386__)
-       /* 
-        * This is the helper binary for the x86_64 version of  xnu_quick_test. xnu_quick_test 
-        * forks and execs this code to test exec()ing from a 32-bit binary.
-        */
-       errmsg = "execve failed: from i386 forking and exec()ing i386 process.\n";
-       argvs[0] = "sleep-i386";
-       if (do_execve_test("helpers/sleep-i386", argvs, NULL, 0))       goto test_failed_exit;
-
-       errmsg = "execve failed: from i386 forking and exec()ing x86_64 process w/ 4G pagezero.\n";
-       argvs[0] = "sleep-x86_64-4G";
-       if (do_execve_test("helpers/sleep-x86_64-4G", argvs, NULL, 0))  goto test_failed_exit;
-
-       errmsg = "execve failed: from i386 forking and exec()ing x86_64 process w/ 4K pagezero.\n";
-       argvs[0] = "sleep-x86_64-4K";
-       if (do_execve_test("helpers/sleep-x86_64-4K", argvs, NULL, 0))  goto test_failed_exit;
-#endif
-
-
-#if defined(__x86_64__)
-       /* 
-        * This is the helper binary for the i386 version of xnu_quick_test. xnu_quick_test 
-        * forks and execs this code to test exec()ing from a 64-bit binary.
-        */
-       errmsg = "execve failed: from x86_64 forking and exec()ing 64-bit x86_64 process w/ 4G pagezero.\n";
-       argvs[0] = "sleep-x86_64-4G";
-       if (do_execve_test("helpers/sleep-x86_64-4G", argvs, NULL, 1))          goto test_failed_exit;
-
-       errmsg = "execve failed: from x86_64 forking and exec()ing 64-bit x86_64 process w/ 4K Pagezero.\n";
-       argvs[0] = "sleep-x86_64-4K";
-       if (do_execve_test("helpers/sleep-x86_64-4K", argvs, NULL, 1))          goto test_failed_exit;
-
-       errmsg = "execve failed: from x64_64 forking and exec()ing 32-bit i386 process.\n";
-       argvs[0] = "sleep-i386";
-       if (do_execve_test("helpers/sleep-i386", argvs, NULL, 1))               goto test_failed_exit;
-#endif
-
-
-       /* 
-        * We are ourselves launched with do_execve_test, which wants a chance to 
-        * send a SIGKILL
-        */
-       sleep(4);
-       return 0;
-
-test_failed_exit:
-       if (errmsg)
-               printf("%s", errmsg);
-       return -1;
-}
-
diff --git a/tools/tests/xnu_quick_test/helpers/sleep.c b/tools/tests/xnu_quick_test/helpers/sleep.c
deleted file mode 100644 (file)
index e74110f..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-/* Helper binary for the execve tests. Added for PR-4607285 */
-#include <unistd.h>
-int main()
-{
-       sleep(120);
-}
diff --git a/tools/tests/xnu_quick_test/kqueue_tests.c b/tools/tests/xnu_quick_test/kqueue_tests.c
deleted file mode 100644 (file)
index 663f1bb..0000000
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- *  tests.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 3/25/05.
- *  Copyright 2005 Apple Computer Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <pthread.h>
-#include <assert.h>
-#include <sys/event.h>         /* for kqueue tests */
-#include <sys/sysctl.h>                /* for determining hw */
-#include <mach/mach.h>
-#include <AvailabilityMacros.h>        /* for determination of Mac OS X version (tiger, leopard, etc.) */
-#include <libkern/OSByteOrder.h> /* for OSSwap32() */
-
-extern char            g_target_path[ PATH_MAX ];
-extern int             g_skip_setuid_tests;
-
-int msg_count = 14;
-int last_msg_seen = 0;
-pthread_cond_t my_cond = PTHREAD_COND_INITIALIZER;
-pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-
-static kern_return_t
-kmsg_send(mach_port_t remote_port, int index)
-{
-       int msgh_id = 1000 + index;
-        kern_return_t my_kr;
-        mach_msg_header_t * my_kmsg = NULL;
-       mach_msg_size_t size = sizeof(mach_msg_header_t) + sizeof(int)*index;
-        
-        my_kr = vm_allocate( mach_task_self(),
-                             (vm_address_t *)&my_kmsg,
-                             size,
-                             VM_MAKE_TAG(VM_MEMORY_MACH_MSG) | TRUE );
-        if (my_kr != KERN_SUCCESS)
-                return my_kr;
-        my_kmsg->msgh_bits =
-               MACH_MSGH_BITS_SET(MACH_MSG_TYPE_COPY_SEND, 0, 0, 0);
-        my_kmsg->msgh_size = size;
-        my_kmsg->msgh_remote_port = remote_port;
-        my_kmsg->msgh_local_port = MACH_PORT_NULL;
-        my_kmsg->msgh_voucher_port = MACH_PORT_NULL;
-        my_kmsg->msgh_id = msgh_id;
-        my_kr = mach_msg( my_kmsg, 
-                          MACH_SEND_MSG | MACH_MSG_OPTION_NONE,
-                         size,
-                          0, /* receive size */
-                          MACH_PORT_NULL,
-                          MACH_MSG_TIMEOUT_NONE,
-                          MACH_PORT_NULL );
-        vm_deallocate( mach_task_self(), (vm_address_t)my_kmsg, size );
-        return my_kr;
-}
-
-static kern_return_t
-kmsg_recv(mach_port_t portset, mach_port_t port, int * msgh_id_return)
-{
-        kern_return_t my_kr;
-        mach_msg_header_t * my_kmsg = NULL;
-        
-        my_kr = vm_allocate( mach_task_self(),
-                             (vm_address_t *)&my_kmsg,
-                             PAGE_SIZE,
-                             VM_MAKE_TAG(VM_MEMORY_MACH_MSG) | TRUE );
-        if (my_kr != KERN_SUCCESS)
-                return my_kr;
-        my_kr = mach_msg( my_kmsg, 
-                          MACH_RCV_MSG | MACH_MSG_OPTION_NONE,
-                          0, /* send size */
-                          PAGE_SIZE, /* receive size */
-                          port,
-                          MACH_MSG_TIMEOUT_NONE,
-                          MACH_PORT_NULL );
-        if ( my_kr == KERN_SUCCESS &&
-             msgh_id_return != NULL )
-                *msgh_id_return = my_kmsg->msgh_id;
-        vm_deallocate( mach_task_self(), (vm_address_t)my_kmsg, PAGE_SIZE );
-        return my_kr;
-}
-
-static void *
-kmsg_consumer_thread(void * arg)
-{
-       int             my_kqueue = *(int *)arg;
-       int             my_err;
-       kern_return_t   my_kr;
-       struct kevent   my_keventv[3];
-       int             msgid;
-
-       EV_SET( &my_keventv[0], 0, 0, 0, 0, 0, 0 );
-       while ( !(my_keventv[0].filter == EVFILT_USER &&
-                 my_keventv[0].ident == 0)) {
-               /* keep getting events */
-               my_err = kevent( my_kqueue, NULL, 0, my_keventv, 1, NULL );
-                if ( my_err == -1 ) {
-                        printf( "kevent call from consumer thread failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                        return (void *)-1;
-                }
-                if ( my_err == 0 ) {
-                        printf( "kevent call from consumer thread did not return any events when it should have \n" );
-                        return (void *)-1;
-                }
-                if ( my_keventv[0].filter == EVFILT_MACHPORT ) {
-                        if ( my_keventv[0].data == 0 ) {
-                                printf( "kevent call to get machport event returned 0 msg_size \n" );
-                                return (void *)-1;
-                        }
-                        my_kr = kmsg_recv( my_keventv[0].ident, my_keventv[0].data, &msgid );
-                        if ( my_kr != KERN_SUCCESS ) {
-                               printf( "kmsg_recv failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-                                return (void *)-1;
-                        }
-                        my_keventv[0].flags = EV_ENABLE;
-                        my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL );
-                        if ( my_err == -1 ) {
-                                printf( "kevent call to re-enable machport events failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                                return (void *)-1;
-                        }
-                       if (msgid == 1000 + msg_count) {
-                               pthread_mutex_lock(&my_mutex);
-                               last_msg_seen = 1;
-                               pthread_cond_signal(&my_cond);
-                               pthread_mutex_unlock(&my_mutex);
-                       }
-                }
-       }
-        return (void *)0;
-}
-
-/*  **************************************************************************************************************
- *     Test kevent, kqueue system calls.
- *  **************************************************************************************************************
- */
-int kqueue_tests( void * the_argp )
-{
-       int                             my_err, my_status;
-       void                            *my_pthread_join_status;
-       int                             my_kqueue = -1;
-       int                             my_kqueue64 = -1;
-       int                             my_fd = -1;
-       char *                  my_pathp = NULL;
-    pid_t                      my_pid, my_wait_pid;
-       size_t                  my_count, my_index;
-       int                             my_sockets[ 2 ] = {-1, -1};
-       struct kevent   my_keventv[3];
-       struct kevent64_s       my_kevent64;
-       struct timespec my_timeout;
-       char                    my_buffer[ 16 ];
-       kern_return_t kr;       
-
-       kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_err = socketpair( AF_UNIX, SOCK_STREAM, 0, &my_sockets[0] );
-       if ( my_err == -1 ) {
-               printf( "socketpair failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* fork here and use pipe to communicate */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       else if ( my_pid == 0 ) {
-               /* 
-                * child process - tell parent we are ready to go.
-                */
-               my_count = write( my_sockets[1], "r", 1 );
-               if ( my_count == -1 ) {
-                       printf( "write call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-
-               my_count = read( my_sockets[1], &my_buffer[0], 1 );
-               if ( my_count == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( my_buffer[0] != 'g' ) {
-                       printf( "read call on socket failed to get \"all done\" message \n" );
-                       exit( -1 );
-               }
-
-               /* now do some work that will trigger events our parent will track */
-               my_count = write( my_fd, "11111111", 8 );
-               if ( my_count == -1 ) {
-                       printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-       
-               my_err = unlink( my_pathp );
-               if ( my_err == -1 ) {
-                       printf( "unlink failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-               /* wait for parent to tell us to exit */
-               my_count = read( my_sockets[1], &my_buffer[0], 1 );
-               if ( my_count == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( my_buffer[0] != 'e' ) {
-                       printf( "read call on socket failed to get \"all done\" message \n" );
-                       exit( -1 );
-               }
-               exit(0);
-       }
-       
-       /* parent process - wait for child to spin up */
-       my_count = read( my_sockets[0], &my_buffer[0], sizeof(my_buffer) );
-       if ( my_count == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_buffer[0] != 'r' ) {
-               printf( "read call on socket failed to get \"ready to go message\" \n" );
-               goto test_failed_exit;
-       }
-
-       /* set up a kqueue and register for some events */
-       my_kqueue = kqueue( );
-       if ( my_kqueue == -1 ) {
-               printf( "kqueue call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* look for our test file to get unlinked or written to */
-       EV_SET( &my_keventv[0], my_fd, EVFILT_VNODE, (EV_ADD | EV_CLEAR), (NOTE_DELETE | NOTE_WRITE), 0, 0 );
-       /* also keep an eye on our child process while we're at it */
-       EV_SET( &my_keventv[1], my_pid, EVFILT_PROC, (EV_ADD | EV_ONESHOT), NOTE_EXIT, 0, 0 );
-
-       my_timeout.tv_sec = 0;
-       my_timeout.tv_nsec = 0;
-       my_err = kevent( my_kqueue, my_keventv, 2, NULL, 0, &my_timeout);
-       if ( my_err == -1 ) {
-               printf( "kevent call to register events failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* use kevent64 to test EVFILT_PROC */
-       EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); 
-       my_err = kevent64( my_kqueue, &my_kevent64, 1, NULL, 0, 0, 0); 
-       if ( my_err != -1 && errno != EINVAL ) {
-               printf( "kevent64 call should fail with kqueue used for kevent() - %d\n", my_err);
-               goto test_failed_exit;
-       }
-               
-       my_kqueue64 = kqueue();
-       EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); 
-       my_err = kevent64( my_kqueue64, &my_kevent64, 1, NULL, 0, 0, 0); 
-       if ( my_err == -1 ) {
-               printf( "kevent64 call to get proc exit failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* tell child to get to work */
-       my_count = write( my_sockets[0], "g", 1 );
-       if ( my_count == -1 ) {
-               printf( "write call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* go get vnode events */
-       EV_SET( &my_keventv[0], my_fd, EVFILT_VNODE, (EV_CLEAR), 0, 0, 0 );
-       my_err = kevent( my_kqueue, NULL, 0, my_keventv, 1, NULL );
-       if ( my_err == -1 ) {
-               printf( "kevent call to get vnode events failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_err == 0 ) {
-               printf( "kevent call to get vnode events did not return any when it should have \n" );
-               goto test_failed_exit;
-       }
-       if ( (my_keventv[0].fflags & (NOTE_DELETE | NOTE_WRITE)) == 0 ) {
-               printf( "kevent call to get vnode events did not return NOTE_DELETE or NOTE_WRITE \n" );
-               printf( "fflags 0x%02X \n", my_keventv[0].fflags );
-               goto test_failed_exit;
-       }
-
-       /* tell child to exit */
-       my_count = write( my_sockets[0], "e", 1 );
-       if ( my_count == -1 ) {
-               printf( "write call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* look for child exit notification after unregistering for vnode events */
-       EV_SET( &my_keventv[0], my_fd, EVFILT_VNODE, EV_DELETE, 0, 0, 0 );
-       my_err = kevent( my_kqueue, my_keventv, 1, my_keventv, 1, NULL );
-       if ( my_err == -1 ) {
-               printf( "kevent call to get proc exit event failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_err == 0 ) {
-               printf( "kevent call to get proc exit event did not return any when it should have \n" );
-               goto test_failed_exit;
-       }
-       if ( my_keventv[0].filter != EVFILT_PROC ) {
-               printf( "kevent call to get proc exit event did not return EVFILT_PROC \n" );
-               printf( "filter %i \n", my_keventv[0].filter );
-               goto test_failed_exit;
-       }
-       if ( (my_keventv[0].fflags & NOTE_EXIT) == 0 ) {
-               printf( "kevent call to get proc exit event did not return NOTE_EXIT \n" );
-               printf( "fflags 0x%02X \n", my_keventv[0].fflags );
-               goto test_failed_exit;
-       }
-
-       /* look for child exit notification on the kevent64 kqueue */
-       EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_CLEAR, NOTE_EXIT, 0, 0, 0, 0 ); 
-       my_err = kevent64( my_kqueue64, NULL, 0, &my_kevent64, 1, 0, 0); 
-       if ( my_err == -1 ) {
-               printf( "kevent64 call to get child exit failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_err == 0 ) {
-               printf( "kevent64 call to get proc exit event did not return any when it should have \n" );
-               goto test_failed_exit;
-       }
-       if ( my_kevent64.filter != EVFILT_PROC ) {
-               printf( "kevent64 call to get proc exit event did not return EVFILT_PROC \n" );
-               printf( "filter %i \n", my_kevent64.filter );
-               goto test_failed_exit;
-       }
-       if ( (my_kevent64.fflags & NOTE_EXIT) == 0 ) {
-               printf( "kevent64 call to get proc exit event did not return NOTE_EXIT \n" );
-               printf( "fflags 0x%02X \n", my_kevent64.fflags );
-               goto test_failed_exit;
-       }
-
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               printf( "wait4 returned wrong exit status - 0x%02X \n", my_status );
-               goto test_failed_exit;
-       }
-       
-       /* now try out EVFILT_MACHPORT and EVFILT_USER */
-       mach_port_t my_pset = MACH_PORT_NULL;
-       mach_port_t my_port = MACH_PORT_NULL;
-       kern_return_t my_kr;
-
-       my_kr = mach_port_allocate( mach_task_self(), MACH_PORT_RIGHT_PORT_SET, &my_pset );
-       if ( my_kr != KERN_SUCCESS ) {
-               printf( "mach_port_allocate failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-               goto test_failed_exit;
-       }
-       
-       my_kr = mach_port_allocate( mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &my_port );
-       if ( my_kr != KERN_SUCCESS ) {
-               printf( "mach_port_allocate failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-               goto test_failed_exit;
-       }
-       
-       /* try to register for events on my_port directly -- this should fail */
-       EV_SET( &my_keventv[0], my_port, EVFILT_MACHPORT, (EV_ADD | EV_DISPATCH), 0, 0, 0 );
-       my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL );
-       if ( my_err != -1 || errno != ENOTSUP ) {
-               printf( "kevent call to register my_port should have failed, but got %s \n", strerror(errno) );
-               goto test_failed_exit;
-       }
-       
-       /* now register for events on my_pset and user 0 */
-       EV_SET( &my_keventv[0], my_pset, EVFILT_MACHPORT, (EV_ADD | EV_CLEAR | EV_DISPATCH), 0, 0, 0 );
-       EV_SET( &my_keventv[1], 0, EVFILT_USER, EV_ADD, 0, 0, 0 );
-       my_err = kevent( my_kqueue, my_keventv, 2, NULL, 0, NULL );
-       if ( my_err == -1 ) {
-               printf( "kevent call to register my_pset and user 0 failed with error %d - %s \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       pthread_t my_threadv[3];
-
-       for (my_index = 0;
-            my_index < 3;
-            my_index++) {
-         my_err = pthread_create( &my_threadv[my_index], NULL, kmsg_consumer_thread, (void *)&my_kqueue );
-                if ( my_err != 0 ) {
-                        printf( "pthread_create failed with error %d - %s \n", my_err, strerror(my_err) );
-                        goto test_failed_exit;
-                }
-        }
-
-       /* insert my_port into my_pset */
-       my_kr = mach_port_insert_member( mach_task_self(), my_port, my_pset );
-       if ( my_kr != KERN_SUCCESS ) {
-               printf( "mach_port_insert_member failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-               goto test_failed_exit;
-       }
-       
-       my_kr = mach_port_insert_right( mach_task_self(), my_port, my_port, MACH_MSG_TYPE_MAKE_SEND );
-       if ( my_kr != KERN_SUCCESS ) {
-               printf( "mach_port_insert_right failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-               goto test_failed_exit;
-       }
-       
-       /* send some Mach messages */
-       for (my_index = 1;
-            my_index <= msg_count;
-            my_index++) {
-         my_kr = kmsg_send( my_port, my_index );
-                if ( my_kr != KERN_SUCCESS ) {
-                        printf( "kmsg_send failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-                        goto test_failed_exit;
-                }
-        }
-
-       /* make sure the last message eventually gets processed */
-       pthread_mutex_lock(&my_mutex);
-       while (last_msg_seen == 0) 
-         pthread_cond_wait(&my_cond, &my_mutex);
-       pthread_mutex_unlock(&my_mutex);
-
-       /* trigger the user 0 event, telling consumer threads to exit */
-       EV_SET( &my_keventv[0], 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0 );
-       my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL );
-       if ( my_err == -1 ) {
-               printf( "kevent call to trigger user 0 failed with error %d - %s \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       for (my_index = 0;
-            my_index < 3;
-            my_index++) {
-         my_err = pthread_join( my_threadv[my_index], &my_pthread_join_status );
-                if ( my_err != 0 ) {
-                        printf( "pthread_join failed with error %d - %s \n", my_err, strerror(my_err) );
-                        goto test_failed_exit;
-                }
-                if ( my_pthread_join_status != 0 ) {
-                        goto test_failed_exit;
-                }
-        }
-       
-       /* clear the user 0 event */
-       EV_SET( &my_keventv[0], 0, EVFILT_USER, EV_CLEAR, 0, 0, 0 );
-       my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL );
-       if ( my_err == -1 ) {
-               printf( "kevent call to trigger user 0 failed with error %d - %s \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* delibrately destroy my_pset while it's still registered for events */
-       my_kr = mach_port_mod_refs( mach_task_self(), my_pset, MACH_PORT_RIGHT_PORT_SET, -1 );
-       if ( my_kr != KERN_SUCCESS ) {
-               printf( "mach_port_mod_refs failed with error %d - %s \n", my_kr, mach_error_string(my_kr) );
-               goto test_failed_exit;
-       }
-
-       /* look for the event to trigger with a zero msg_size */
-       my_err = kevent( my_kqueue, NULL, 0, my_keventv, 1, NULL );
-       if ( my_err == -1 ) {
-               printf( "kevent call to get machport event failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_err == 0 ) {
-               printf( "kevent call to get machport event did not return any when it should have \n" );
-               goto test_failed_exit;
-       }
-       if ( my_keventv[0].filter != EVFILT_MACHPORT ) {
-               printf( "kevent call to get machport event did not return EVFILT_MACHPORT \n" );
-               printf( "filter %i \n", my_keventv[0].filter );
-               goto test_failed_exit;
-       }
-       if ( my_keventv[0].data != 0 ) {
-               printf( "kevent call to get machport event did not return 0 msg_size \n" );
-               printf( "data %ld \n", (long int) my_keventv[0].data );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_sockets[0] != -1 )
-               close( my_sockets[0] );
-       if ( my_sockets[1] != -1 )
-               close( my_sockets[1] );
-       if ( my_kqueue != -1 )
-               close( my_kqueue );
-       if ( my_kqueue64 != -1 )
-               close( my_kqueue );
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );     
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
diff --git a/tools/tests/xnu_quick_test/machvm_tests.c b/tools/tests/xnu_quick_test/machvm_tests.c
deleted file mode 100644 (file)
index 8ea8b64..0000000
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- *  machvm_tests.c
- *  xnu_quick_test
- *
- *  Copyright 2008 Apple Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <mach/mach.h>
-#include <unistd.h>
-#include <err.h>
-#include <sys/param.h>
-#include <mach-o/ldsyms.h>
-
-int machvm_tests( void * the_argp )
-{
-       int pagesize = getpagesize();
-       int regionsizes[] = { 1, 3, 7, 13, 77, 1223 }; /* sizes must be in increasing order */
-       char *regionbuffers[] = { NULL, NULL, NULL, NULL, NULL, NULL };
-       int i;
-       kern_return_t kret;
-       
-       /* Use vm_allocate to grab some memory */
-       for (i=0; i < sizeof(regionsizes)/sizeof(regionsizes[0]); i++) {
-               vm_address_t addr = 0;
-
-               kret = vm_allocate(mach_task_self(), &addr, regionsizes[i]*pagesize, VM_FLAGS_ANYWHERE);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_allocate of %d pages failed: %d", regionsizes[i], kret);
-                       goto fail;
-               }
-               regionbuffers[i] = (char *)addr;
-       }
-       
-       /* deallocate one range without having touched it, scribble on another, then deallocate that one */
-       kret = vm_deallocate(mach_task_self(), (vm_address_t)regionbuffers[4], regionsizes[4]*pagesize);
-       if (kret != KERN_SUCCESS) {
-               warnx("vm_deallocate of %d pages failed: %d", regionsizes[4], kret);
-               goto fail;
-       }
-       regionbuffers[4] = NULL;
-       
-       memset(regionbuffers[3], 0x4f, pagesize*MIN(3, regionsizes[3]));
-       
-       kret = vm_deallocate(mach_task_self(), (vm_address_t)regionbuffers[3], regionsizes[3]*pagesize);
-       if (kret != KERN_SUCCESS) {
-               warnx("vm_deallocate of %d pages failed: %d", regionsizes[3], kret);
-               goto fail;
-       }
-       regionbuffers[3] = NULL;
-       
-       // populate the largest buffer with a byte pattern that matches the page offset, then fix it to readonly
-       for (i=0; i < regionsizes[5]; i++) {
-               memset(regionbuffers[5] + i*pagesize, (unsigned char)i, pagesize);              
-       }
-       kret = vm_protect(mach_task_self(), (vm_offset_t)regionbuffers[5], regionsizes[5]*pagesize, FALSE, VM_PROT_READ);
-       if (kret != KERN_SUCCESS) {
-               warnx("vm_protect of %d pages failed: %d", regionsizes[5], kret);
-               goto fail;
-       }
-       
-       // read the last few pagse of the largest buffer and verify its contents
-       {
-               vm_offset_t     newdata;
-               mach_msg_type_number_t newcount;
-               
-               kret = vm_read(mach_task_self(), (vm_address_t)regionbuffers[5] + (regionsizes[5]-5)*pagesize, 5*pagesize,
-                                          &newdata, &newcount);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_read of %d pages failed: %d", 5, kret);
-                       goto fail;
-               }
-               
-               if (0 != memcmp((char *)newdata, regionbuffers[5] + (regionsizes[5]-5)*pagesize,
-                                               5*pagesize)) {
-                       warnx("vm_read comparison of %d pages failed", 5);
-                       kret = -1;
-                       vm_deallocate(mach_task_self(), newdata, 5*pagesize);
-                       goto fail;
-               }
-
-               kret = vm_deallocate(mach_task_self(), newdata, 5*pagesize);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_deallocate of %d pages failed: %d", 5, kret);
-                       goto fail;
-               }
-       }
-       
-       // do a list read to repopulate slots 3 and 4
-       {
-               vm_read_entry_t readlist;
-               
-               readlist[0].address = (vm_offset_t)regionbuffers[5] + 10*pagesize;
-               readlist[0].size = regionsizes[3]*pagesize;
-               readlist[1].address = (vm_offset_t)regionbuffers[5] + 10*pagesize + regionsizes[3]*pagesize;
-               readlist[1].size = regionsizes[4]*pagesize;
-               
-               kret = vm_read_list(mach_task_self(), readlist, 2);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_read_list failed: %d", kret);
-                       goto fail;
-               }
-               
-               if (0 != memcmp((char *)readlist[0].address, regionbuffers[5] + 10*pagesize,
-                                               regionsizes[3]*pagesize)) {
-                       warnx("vm_read_list comparison of allocation 0 failed");
-                       kret = -1;
-                       vm_deallocate(mach_task_self(), readlist[0].address, readlist[0].size);
-                       vm_deallocate(mach_task_self(), readlist[1].address, readlist[1].size);
-                       goto fail;
-               }
-
-               if (0 != memcmp((char *)readlist[1].address, regionbuffers[5] + 10*pagesize + regionsizes[3]*pagesize,
-                                               regionsizes[4]*pagesize)) {
-                       warnx("vm_read_list comparison of allocation 1 failed");
-                       kret = -1;
-                       vm_deallocate(mach_task_self(), readlist[0].address, readlist[0].size);
-                       vm_deallocate(mach_task_self(), readlist[1].address, readlist[1].size);
-                       goto fail;
-               }
-               
-               regionbuffers[3] = (char *)readlist[0].address;
-               regionbuffers[4] = (char *)readlist[1].address;
-       }
-       
-       // do a read_overwrite and copy, which should be about the same
-       {
-               vm_size_t count;
-               
-               kret = vm_read_overwrite(mach_task_self(), (vm_offset_t)regionbuffers[3],
-                                                                regionsizes[0]*pagesize,
-                                                                (vm_offset_t)regionbuffers[0],
-                                                                &count);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_read_overwrite of %d pages failed: %d", regionsizes[0], kret);
-                       goto fail;
-               }
-               
-               kret = vm_copy(mach_task_self(), (vm_offset_t)regionbuffers[0],
-                                                                regionsizes[0]*pagesize,
-                                                                (vm_offset_t)regionbuffers[1]);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_copy of %d pages failed: %d", regionsizes[0], kret);
-                       goto fail;
-               }
-               
-               if (0 != memcmp(regionbuffers[1], regionbuffers[3],
-                                               regionsizes[0]*pagesize)) {
-                       warnx("vm_read_overwrite/vm_copy comparison failed");
-                       kret = -1;
-                       goto fail;
-               }
-       }               
-       
-       // do a vm_copy of our mach-o header and compare.
-
-       kret = vm_write(mach_task_self(), (vm_address_t)regionbuffers[2],
-                                               (vm_offset_t)&_mh_execute_header, pagesize);
-       if (kret != KERN_SUCCESS) {
-               warnx("vm_write of %d pages failed: %d", 1, kret);
-               goto fail;
-       }
-               
-       if (_mh_execute_header.magic != *(uint32_t *)regionbuffers[2]) {
-               warnx("vm_write comparison failed");
-               kret = -1;
-               goto fail;
-       }       
-       
-       // check that the vm_protects above worked
-       {
-               vm_address_t addr = (vm_address_t)regionbuffers[5]+7*pagesize;
-               vm_size_t size = pagesize;
-               int _basic[VM_REGION_BASIC_INFO_COUNT];
-               vm_region_basic_info_t basic = (vm_region_basic_info_t)_basic;
-               int _basic64[VM_REGION_BASIC_INFO_COUNT_64];
-               vm_region_basic_info_64_t basic64 = (vm_region_basic_info_64_t)_basic64;
-               int _submap[VM_REGION_SUBMAP_INFO_COUNT];
-               vm_region_submap_info_t submap = (vm_region_submap_info_t)_submap;
-               mach_msg_type_number_t  infocnt;
-               mach_port_t     objname;
-               natural_t nesting_depth = 0;
-               
-#if !__LP64__
-               infocnt = VM_REGION_BASIC_INFO_COUNT;
-               kret = vm_region(mach_task_self(), &addr, &size, VM_REGION_BASIC_INFO,
-                                                (vm_region_info_t)basic, &infocnt, &objname);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_region(VM_REGION_BASIC_INFO) failed: %d", kret);
-                       goto fail;
-               }
-               if (VM_REGION_BASIC_INFO_COUNT != infocnt) {
-                       warnx("vm_region(VM_REGION_BASIC_INFO) returned a bad info count");
-                       kret = -1;
-                       goto fail;
-               }
-
-               // when we did the vm_read_list above, it should have split this region into
-               // a 10 page sub-region
-               if (addr != (vm_address_t)regionbuffers[5] || size != 10*pagesize) {
-                       warnx("vm_region(VM_REGION_BASIC_INFO) returned a bad region range");
-                       kret = -1;
-                       goto fail;
-               }
-
-               if (basic->protection != VM_PROT_READ) {
-                       warnx("vm_region(VM_REGION_BASIC_INFO) returned a bad protection");
-                       kret = -1;
-                       goto fail;
-               }
-#endif
-               
-               infocnt = VM_REGION_BASIC_INFO_COUNT_64;
-               // intentionally use VM_REGION_BASIC_INFO and get up-converted
-               kret = vm_region_64(mach_task_self(), &addr, &size, VM_REGION_BASIC_INFO,
-                                                (vm_region_info_t)basic64, &infocnt, &objname);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_region_64(VM_REGION_BASIC_INFO) failed: %d", kret);
-                       goto fail;
-               }
-               if (VM_REGION_BASIC_INFO_COUNT_64 != infocnt) {
-                       warnx("vm_region_64(VM_REGION_BASIC_INFO) returned a bad info count");
-                       kret = -1;
-                       goto fail;
-               }
-               
-               // when we did the vm_read_list above, it should have split this region into
-               // a 10 page sub-region
-               if (addr != (vm_address_t)regionbuffers[5] || size != 10*pagesize) {
-                       warnx("vm_region_64(VM_REGION_BASIC_INFO) returned a bad region range");
-                       kret = -1;
-                       goto fail;
-               }
-               
-               if (basic64->protection != VM_PROT_READ) {
-                       warnx("vm_region_64(VM_REGION_BASIC_INFO) returned a bad protection");
-                       kret = -1;
-                       goto fail;
-               }
-               
-#if !__LP64__
-               // try to compare some stuff. Particularly important for fields after offset
-               if (basic->offset != basic64->offset ||
-                       basic->behavior != basic64->behavior ||
-                       basic->user_wired_count != basic64->user_wired_count) {
-                       warnx("vm_region and vm_region_64 did not agree");
-                       kret = -1;
-                       goto fail;                      
-               }
-#endif
-
-#if !__LP64__
-               infocnt = VM_REGION_SUBMAP_INFO_COUNT;
-               kret = vm_region_recurse(mach_task_self(), &addr, &size,
-                                                                &nesting_depth, (vm_region_info_t)submap,
-                                                                &infocnt);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_region_recurse() failed: %d", kret);
-                       goto fail;
-               }
-
-               if (VM_REGION_SUBMAP_INFO_COUNT != infocnt) {
-                       warnx("vm_region_recurse() returned a bad info count");
-                       kret = -1;
-                       goto fail;
-               }
-
-               if (submap->pages_dirtied != 10) {
-                       warnx("vm_region_recurse() returned bage pages_dirtied");
-                       kret = -1;
-                       goto fail;
-               }
-
-#endif /* !__LP64__ */
-
-       }
-
-       // exercise mach_make_memory_entry/vm_map
-       {
-               vm_address_t addr1, addr2;
-               vm_size_t size;
-               mach_port_t mem_handle = MACH_PORT_NULL;
-
-               addr1 = 0;
-               size = 11*pagesize;
-               kret = vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_allocate failed: %d", kret);
-                       kret = -1;
-                       goto fail;
-               }
-
-               *(uint32_t *)(uintptr_t)addr1 = 'test';
-
-               kret = mach_make_memory_entry(mach_task_self(),
-                                                                         &size, addr1, VM_PROT_DEFAULT,
-                                                                         &mem_handle, MACH_PORT_NULL);
-               if (kret != KERN_SUCCESS) {
-                       warnx("mach_make_memory_entry failed: %d", kret);
-                       kret = -1;
-                       goto fail;
-               }
-
-               kret = vm_deallocate(mach_task_self(), addr1, size);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_deallocate failed: %d", kret);
-                       kret = -1;
-                       goto fail;
-               }
-
-               addr2 = 0;
-               kret = vm_map(mach_task_self(), &addr2, size, 0, VM_FLAGS_ANYWHERE,
-                                         mem_handle, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_DEFAULT,
-                                         VM_INHERIT_NONE);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_map failed: %d", kret);
-                       kret = -1;
-                       goto fail;
-               }
-
-               if (*(uint32_t *)(uintptr_t)addr2 != 'test') {
-                       warnx("mapped data mismatch");
-                       kret = -1;
-                       goto fail;
-               }
-
-               kret = vm_deallocate(mach_task_self(), addr2, size);
-               if (kret != KERN_SUCCESS) {
-                       warnx("vm_deallocate failed: %d", kret);
-                       kret = -1;
-                       goto fail;
-               }
-
-               kret = mach_port_mod_refs(mach_task_self(), mem_handle, MACH_PORT_RIGHT_SEND, -1);
-               if (kret != KERN_SUCCESS) {
-                       warnx("mach_port_mod_refs(-1) failed: %d", kret);
-                       kret = -1;
-                       goto fail;
-               }
-
-               addr2 = 0;
-               kret = vm_map(mach_task_self(), &addr2, size, 0, VM_FLAGS_ANYWHERE,
-                                         mem_handle, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_DEFAULT,
-                                         VM_INHERIT_NONE);
-               if (kret == KERN_SUCCESS) {
-                       warnx("vm_map succeeded when it should not have");
-                       kret = -1;
-                       goto fail;
-               }
-
-               kret = KERN_SUCCESS;
-       }
-       
-fail:
-       for (i=0; i < sizeof(regionsizes)/sizeof(regionsizes[0]); i++) {
-               if (regionbuffers[i]) {
-                       vm_deallocate(mach_task_self(), (vm_address_t)regionbuffers[i], regionsizes[i]*pagesize);
-               }
-       }
-       
-       return kret;
-}
-
diff --git a/tools/tests/xnu_quick_test/main.c b/tools/tests/xnu_quick_test/main.c
deleted file mode 100644 (file)
index 4682490..0000000
+++ /dev/null
@@ -1,639 +0,0 @@
-/* 
- * xnu_quick_test - this tool will do a quick test of every (well, to be
- * honest most) system calls we support in xnu.
- *
- * WARNING - this is not meant to be a full regression test of all the
- * system calls.  The intent is to have a quick test of each system call that
- * can be run very easily and quickly when a new kerenl is built.
- *
- * This tool is meant to grow as we find xnu problems that could have be
- * caught before we submit to a build train.  So please add more tests and
- * make the existing ones better.  Some of the original tests are nothing
- * more than place holders and quite lame.  Just keep in mind that the tool
- * should run as fast as possible.  If it gets too slow then most people
- * will stop running it.
- *
- * LP64 testing tip - when adding or modifying tests, keep in mind the
- * variants in the LP64 world.  If xnu gets passed a structure the varies in
- * size between 32 and 64-bit processes, try to test that a field in the 
- * sructure contains valid data.  For example if we know foo structure
- * looks like:
- * struct foo {
- *             int             an_int;
- *             long    a_long;
- *             int             another_int;
- * }
- * And if we know what another_int should contain then test for the known
- * value since it's offset will vary depending on whether the calling process
- * is 32 or 64 bits.
- *
- * NOTE - we have several workarounds and test exceptions for some
- * outstanding bugs in xnu.  All the workarounds are marked with "todo" and
- * some comments noting the radar number of the offending bug.  Do a seach
- * for "todo" in the source files for this project to locate which tests have
- * known failures.   And please tag any new exceptions you find with "todo"
- * in the comment and the radar number of the bug.
- */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <grp.h>
-#include <unistd.h>
-#include <ctype.h>
-#include <sys/mount.h>
-#include <sys/param.h>
-#include <sys/select.h>
-#include <sys/stat.h>
-#include <sys/syslimits.h>
-#include <sys/types.h>
-#include <sys/ucred.h>
-#include <sys/uio.h>
-#include <mach-o/ldsyms.h>
-#include <mach-o/loader.h>
-#include <mach-o/arch.h>
-#include "tests.h"
-
-
-
-
-/* our table of tests to run  */
-struct test_entry   g_tests[] =
-{
-       {1, &syscall_test, NULL, "syscall"},
-       {1, &fork_wait4_exit_test, NULL, "fork, wait4, exit"},
-       {1, &read_write_test, NULL, "fsync, ftruncate, lseek, pread, pwrite, read, readv, truncate, write, writev"},
-       {1, &open_close_test, NULL, "close, fpathconf, fstat, open, pathconf"},
-       {1, &link_stat_unlink_test, NULL, "link, stat, unlink"},
-       {1, &chdir_fchdir_test, NULL, "chdir, fchdir"},
-       {1, &access_chmod_fchmod_test, NULL, "access, chmod, fchmod"},
-       {1, &chown_fchown_lchown_lstat_symlink_test, NULL, "chown, fchown, lchown, lstat, readlink, symlink"},
-       {1, &fs_stat_tests, NULL, "fstatfs, getfsstat, statfs, fstatfs64, getfsstat64, statfs64"},
-       {1, &statfs_32bit_inode_tests, NULL, "32-bit inode versions: fstatfs, getfsstat, statfs"},
-       {1, &getpid_getppid_pipe_test, NULL, "getpid, getppid, pipe"},
-       {1, &uid_tests, NULL, "getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid"},
-       {1, &mkdir_rmdir_umask_test, NULL, "mkdir, rmdir, umask"},
-       {1, &mknod_sync_test, NULL, "mknod, sync"},
-       {1, &socket2_tests, NULL, "fsync, getsockopt, poll, select, setsockopt, socketpair"},
-       {1, &socket_tests, NULL, "accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto, sendfile"},
-       {1, &chflags_fchflags_test, NULL, "chflags, fchflags"},
-       {1, &execve_kill_vfork_test, NULL, "kill, vfork, execve, posix_spawn"},
-       {1, &groups_test, NULL, "getegid, getgid, getgroups, setegid, setgid, setgroups"},
-       {1, &dup_test, NULL, "dup, dup2, getdtablesize"},
-       {1, &getrusage_test, NULL, "getrusage"},
-       {1, &signals_test, NULL, "getitimer, setitimer, sigaction, sigpending, sigprocmask, sigsuspend, sigwait"},
-       {1, &acct_test, NULL, "acct"},
-       {1, &ioctl_test, NULL, "ioctl"},
-       {1, &chroot_test, NULL, "chroot"},
-       {1, &memory_tests, NULL, "madvise, mincore, minherit, mlock, mlock, mmap, mprotect, msync, munmap"},
-       {1, &process_group_test, NULL, "getpgrp, getpgid, getsid, setpgid, setpgrp, setsid"},
-       {1, &fcntl_test, NULL, "fcntl"},
-       {1, &getlogin_setlogin_test, NULL, "getlogin, setlogin"},
-       {1, &getpriority_setpriority_test, NULL, "getpriority, setpriority"},
-       {1, &time_tests, NULL, "futimes, gettimeofday, settimeofday, utimes"},
-       {1, &rename_test, NULL, "rename, stat"},
-       {1, &locking_test, NULL, "flock"},
-       {1, &mkfifo_test, NULL, "mkfifo, read, write"},
-       {1, &quotactl_test, NULL, "quotactl"},
-       {1, &limit_tests, NULL, "getrlimit, setrlimit"},
-       {1, &directory_tests, NULL, "getattrlist, getdirentriesattr, setattrlist"},
-       {1, &getdirentries_test, NULL, "getdirentries"},
-       {1, &exchangedata_test, NULL, "exchangedata"},
-       {1, &searchfs_test, NULL, "searchfs"},
-       {1, &sema2_tests, NULL, "sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait"},
-       {1, &sema_tests, NULL, "semctl, semget, semop"},
-       {1, &bsd_shm_tests, NULL, "shm_open, shm_unlink"},
-       {1, &shm_tests, NULL, "shmat, shmctl, shmdt, shmget"},
-       {1, &xattr_tests, NULL, "fgetxattr, flistxattr, fremovexattr, fsetxattr, getxattr, listxattr, removexattr, setxattr"},
-       {1, &aio_tests, NULL, "aio_cancel, aio_error, aio_read, aio_return, aio_suspend, aio_write, fcntl, lio_listio"},
-       {1, &kqueue_tests, NULL, "kevent, kqueue"},
-       {1, &message_queue_tests, NULL, "msgctl, msgget, msgrcv, msgsnd"},
-       {1, &data_exec_tests, NULL, "data/stack execution"},
-       {1, &machvm_tests, NULL, "Mach VM calls"},
-       {1, &commpage_data_tests, NULL, "Commpage data"},
-#if defined(i386) || defined(__x86_64__)
-       {1, &atomic_fifo_queue_test, NULL, "OSAtomicFifoEnqueue, OSAtomicFifoDequeue"},
-#endif
-       {1, &sched_tests, NULL, "Scheduler tests"},
-       {1, &pipes_test, NULL, "Pipes tests"},
-       {1, &kaslr_test, NULL, "KASLR tests"},
-       {1, &getattrlistbulk_test, NULL, "getattrlistbulk"},
-       {1, &openat_close_test, NULL, "openat, fpathconf, fstatat, close"},
-       {1, &linkat_fstatat_unlinkat_test, NULL, "linkat, statat, unlinkat"},
-       {1, &faccessat_fchmodat_fchmod_test, NULL, "faccessat, fchmodat, fchmod"},
-       {1, &fchownat_fchown_symlinkat_test, NULL, "fchownat, symlinkat, readlinkat"},
-       {1, &mkdirat_unlinkat_umask_test, NULL, "mkdirat, unlinkat, umask"},
-       {1, &renameat_test, NULL, "renameat, fstatat"},
-       {1, &set_exception_ports_test, NULL, "thread_set_exception_ports, task_set_exception_ports, host_set_exception_ports"},
-       {0, NULL, NULL, "last one"}
-};
-
-static void create_target_directory( const char * the_targetp );
-static void list_all_tests( void );
-static void mark_tests_to_run( long my_start, long my_end );
-static int parse_tests_to_run( int argc, const char * argv[], int * indexp );
-static void usage( void );
-static int setgroups_if_single_user(void);
-static const char *current_arch( void );
-
-/* globals */
-long           g_max_failures = 0;
-int            g_skip_setuid_tests = 0;
-const char *   g_cmd_namep;
-char           g_target_path[ PATH_MAX ];
-int            g_is_single_user = 0;
-int            g_testbots_active = 0;
-int main( int argc, const char * argv[] ) 
-{
-       #pragma unused(argc)
-       #pragma unused(argv)
-       int                             my_tests_count, i;
-       int                             err;
-       int                             my_failures = 0;
-       int                             list_the_tests = 0;
-       const char *    my_targetp;
-       time_t                  my_start_time, my_end_time;
-       struct stat             my_stat_buf;
-       char                    my_buffer[64];
-       uid_t           sudo_uid = 0;
-       const char *    sudo_uid_env;
-       gid_t           sudo_gid;
-       const char *    sudo_gid_env;
-       sranddev( );                            /* set up seed for our random name generator */
-       g_cmd_namep = argv[0];
-
-       /* make sure SIGCHLD is not ignored, so wait4 calls work */
-       signal(SIGCHLD, SIG_DFL);
-
-       /* NOTE - code in create_target_directory will append '/' if it is necessary */
-       my_targetp = getenv("TMPDIR");
-       if ( my_targetp == NULL )
-               my_targetp = "/tmp";
-       
-       /* make sure we are running as root */
-       if ( ( getuid() != 0 ) || ( geteuid() != 0 ) ) {
-               printf( "Test must be run as root\n", g_cmd_namep );
-               exit( -1 );
-       }
-
-       sudo_uid_env = getenv("SUDO_UID");
-       if ( sudo_uid_env ) {
-               sudo_uid = strtol(sudo_uid_env, NULL, 10);
-       }
-
-       /* switch real uid to a non_root user, while keeping effective uid as root */
-       if ( sudo_uid != 0 ) {
-               setreuid( sudo_uid, 0 );
-       }
-       else {
-               /* Default to 501 if no sudo uid found */
-               setreuid( 501, 0 );
-       }
-
-       /* restore the gid if run through sudo */
-       sudo_gid_env = getenv("SUDO_GID");
-       if ( sudo_gid_env ) {
-               sudo_gid = strtol(sudo_gid_env, NULL, 10);
-       }
-       
-       if ( getgid() == 0 ) {
-
-               if ( sudo_gid != 0 ) {
-                       setgid( sudo_gid );
-               }
-               else {
-                       /* Default to 20 if no sudo gid found */
-                       setgid( 20 );
-               }
-       }
-
-       /* parse input parameters */
-       for ( i = 1; i < argc; i++ ) {
-               if ( strcmp( argv[i], "-u" ) == 0 ) {
-                       usage( );
-               }
-               if ( strcmp( argv[i], "-t" ) == 0 ||
-                        strcmp( argv[i], "-target" ) == 0 ) {
-                       if ( ++i >= argc ) {
-                               printf( "invalid target parameter \n" );
-                               usage( );
-                       }
-                       /* verify our target directory exists */
-                       my_targetp = argv[i];
-                       err = stat( my_targetp, &my_stat_buf );
-                       if ( err != 0 || S_ISDIR(my_stat_buf.st_mode) == 0 ) {
-                               printf( "invalid target path \n" );
-                               if ( err != 0 ) {
-                                       printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                               }
-                               usage( );
-                       }
-                       continue;
-               }
-               if ( strcmp( argv[i], "-f" ) == 0 ||
-                        strcmp( argv[i], "-failures" ) == 0 ) {
-                       if ( ++i >= argc ) {
-                               printf( "invalid failures parameter \n" );
-                               usage( );
-                       }
-
-                       /* get our max number of failures */
-                       g_max_failures = strtol( argv[i], NULL, 10 );
-                       continue;
-               }
-               if ( strcmp( argv[i], "-l" ) == 0 ||
-                        strcmp( argv[i], "-list" ) == 0 ) {
-                       /* list all the tests this tool will do.
-                        */
-                       list_the_tests = 1;
-                       continue;
-               }
-               if ( strcmp( argv[i], "-r" ) == 0 ||
-                        strcmp( argv[i], "-run" ) == 0 ) {
-                       if ( ++i >= argc ) {
-                               printf( "invalid run tests parameter \n" );
-                               usage( );
-                       }
-
-                       /* get which tests to run */
-                       if ( parse_tests_to_run( argc, argv, &i ) != 0 ) {
-                               printf( "invalid run tests parameter \n" );
-                               usage( );
-                       }
-                       continue;
-               }
-               if ( strcmp( argv[i], "-s" ) == 0 ||
-                        strcmp( argv[i], "-skip" ) == 0 ) {
-                       /* set that want to skip the setuid related tests - this is useful for debgugging since since I can't  
-                        * get setuid tests to work while in gdb.
-                        */
-                       g_skip_setuid_tests = 1;
-                       continue;
-               }
-               if ( strcmp( argv[i], "-testbot" ) == 0 ) {
-                       g_testbots_active = 1;
-                       continue;
-               }
-               printf( "invalid argument \"%s\" \n", argv[i] );
-               usage( );
-       }
-
-       /* done parsing.
-        */
-
-/* Check if we are running under testbots */
-#if RUN_UNDER_TESTBOTS
-g_testbots_active = 1;
-#endif
-       /* Code added to run xnu_quick_test under testbots */
-       if ( g_testbots_active == 1 ) {
-               printf("[TEST] xnu_quick_test \n");     /* Declare the beginning of test suite */
-       }
-
-       /* Populate groups list if we're in single user mode */
-       if (setgroups_if_single_user()) {
-               return 1;
-       }
-       if ( list_the_tests != 0 ) {
-               list_all_tests( );
-               return 0;
-       }
-       
-       /* build a test target directory that we use as our path to create any test
-        * files and directories.
-        */
-       create_target_directory( my_targetp );
-       printf( "Will allow %ld failures before testing is aborted \n", g_max_failures );
-       
-       my_start_time = time( NULL );
-       printf( "\nBegin testing - %s \n", ctime_r( &my_start_time, &my_buffer[0] ) );
-       printf( "Current architecture is %s\n", current_arch() );
-
-       /* Code added to run xnu_quick_test under testbots */
-               
-       /* run each test that is marked to run in our table until we complete all of them or
-        * hit the maximum number of failures.
-        */
-       my_tests_count = (sizeof( g_tests ) / sizeof( g_tests[0] ));
-       for ( i = 0; i < (my_tests_count - 1); i++ ) {
-               int                             my_err;
-               test_entryp             my_testp;
-
-               my_testp = &g_tests[i];
-               if ( my_testp->test_run_it == 0 || my_testp->test_routine == NULL )
-                       continue;
-
-               if ( g_testbots_active == 1 ) {
-                       printf("[BEGIN] %s \n", my_testp->test_infop);
-               }
-
-               printf( "test #%d - %s \n", (i + 1), my_testp->test_infop );
-               fflush(stdout);
-               my_err = my_testp->test_routine( my_testp->test_input );
-               if ( my_err != 0 ) {
-                       printf("\t--> FAILED \n");
-                       printf("SysCall %s failed", my_testp->test_infop);
-                       printf("Result %d", my_err);
-                       my_failures++;
-                       if ( my_failures > g_max_failures ) {
-                               printf( "\n Reached the maximum number of failures - Aborting xnu_quick_test. \n" );
-                               /* Code added to run xnu_quick_test under testbots */
-                               if ( g_testbots_active == 1 ) {
-                                       printf("[FAIL] %s \n", my_testp->test_infop);
-                               }       
-                               goto exit_this_routine;
-                       }
-                       /* Code added to run xnu_quick_test under testbots */
-                       if ( g_testbots_active == 1 ) {
-                               printf("\n[FAIL] %s \n", my_testp->test_infop);
-                       }                       
-                       continue;
-               }
-               /* Code added to run xnu_quick_test under testbots */
-               if ( g_testbots_active == 1 ) {
-                       printf("[PASS] %s \n", my_testp->test_infop);
-               }       
-       }
-       
-exit_this_routine:
-       my_end_time = time( NULL );
-       printf( "\nEnd testing - %s \n", ctime_r( &my_end_time, &my_buffer[0] ) );
-
-       /* clean up our test directory */
-       rmdir( &g_target_path[0] );     
-
-       /* exit non zero if there are any failures */
-       return my_failures != 0;
-} /* main */
-
-
-/* 
- * parse_tests_to_run - parse the -run argument parameters.  the run argument tells us which tests the user
- * wants to run.  we accept ranges (example  1 - 44) and runs of tests (example 2, 33, 34, 100) or a mix of
- * both (example  1, 44 - 100, 200, 250)
- */
-static int parse_tests_to_run( int argc, const char * argv[], int * indexp )
-{
-       int                             my_tests_count, is_range = 0, i;
-       const char *    my_ptr;
-       char *                  my_temp_ptr;
-       long                    my_first_test_number, my_last_test_number;
-       char                    my_buffer[ 128 ];
-       
-       /* set tests table to not run any tests then go back and set the specific tests the caller asked for */
-       my_tests_count = (sizeof( g_tests ) / sizeof( g_tests[0] ));
-       for ( i = 0; i < (my_tests_count - 1); i++ ) {
-               g_tests[ i ].test_run_it = 0;
-       }
-        
-       for ( i = *indexp; i < argc; i++ ) {
-               my_ptr = argv[ i ];
-               if ( strlen( my_ptr ) > 1 && *my_ptr == '-' && isalpha( *(my_ptr + 1) ) ) {
-                       /* we have hit a new argument - need to make sure caller uses this argument on the next
-                        * pass through its parse loop (which will bump the index value so we want to be one less
-                        * than the actual index). 
-                        */
-                       *indexp = (i - 1);
-                       return 0;
-               }
-               
-               if ( strlen( my_ptr ) == 1 && *my_ptr == '-' ) {
-                       /* we are dealing with a range of tests, for example:  33 - 44  */
-                       is_range = 1;
-                       continue;
-               }
-
-               if ( strlen( my_ptr ) > (sizeof( my_buffer ) - 1) ) {
-                       printf( "-run argument has too many test parameters (max of %lu characters) \n", sizeof( my_buffer ) );
-                       return -1;
-               }
-               /* get a local copy of the parameter string to work with - break range into two strings */
-               strcpy( &my_buffer[0], my_ptr );
-
-               my_temp_ptr = strrchr( &my_buffer[0], '-' );
-               if ( my_temp_ptr != NULL ) {
-                       /* we are dealing with a range of tests with no white space, for example:  33-44  or  33- 44  */
-                       my_temp_ptr = strrchr( &my_buffer[0], '-' );
-                       *my_temp_ptr = 0x00;
-                       my_first_test_number = strtol( &my_buffer[0], NULL, 10 );
-                       if ( *(my_temp_ptr + 1) == 0x00 ) {
-                               /* we hit the case where the range indicator is at the end of one string, for example:  33-  */
-                               is_range = 1;
-                               continue;
-                       }
-                       my_last_test_number = strtol( (my_temp_ptr + 1), NULL, 10 );
-                       if ( my_first_test_number < 1 || my_first_test_number > my_last_test_number ) {
-                               printf( "-run argument has invalid range parmeters \n" );
-                               return -1;
-                       }
-                       mark_tests_to_run( my_first_test_number, my_last_test_number );
-                       is_range = 0;
-                       continue;
-               }
-               
-               if ( is_range ) {
-                       /* should be the second part of the test number range */
-                       my_last_test_number = strtol( &my_buffer[0], NULL, 10 );
-                       if ( my_first_test_number < 1 || my_first_test_number > my_last_test_number ) {
-                               printf( "-run argument has invalid range parmeters \n" );
-                               return -1;
-                       }
-
-                       mark_tests_to_run( my_first_test_number, my_last_test_number );
-                       is_range = 0;
-                       continue;
-               }
-               else {
-                       my_first_test_number = strtol( &my_buffer[0], NULL, 10 );
-                       if ( my_first_test_number < 1 ) {
-                               printf( "-run argument has invalid test number parameter \n" );
-                               return -1;
-                       }
-                       mark_tests_to_run( my_first_test_number, my_first_test_number );
-                       continue;
-               }
-       }
-       
-       *indexp = i;
-       return 0;
-       
-} /* parse_tests_to_run */
-
-
-static void create_target_directory( const char * the_targetp )
-{
-       int                     err;
-       
-       if ( strlen( the_targetp ) > (sizeof(g_target_path) - 1) ) {
-               printf( "target path too long - \"%s\" \n", the_targetp );
-               exit( 1 );
-       }
-       
-       for ( ;; ) {
-        int                    my_rand;
-        char           my_name[64];
-               
-        my_rand = rand( );
-        sprintf( &my_name[0], "xnu_quick_test-%d", my_rand );
-        if ( (strlen( &my_name[0] ) + strlen( the_targetp ) + 2) > PATH_MAX ) {
-                       printf( "target path plus our test directory name is too long: \n" );
-                       printf( "target path - \"%s\"  \n", the_targetp );
-                       printf( "test directory name - \"%s\"  \n", &my_name[0] );
-                       exit( 1 );
-               }
-
-        /* append generated directory name onto our path */
-               g_target_path[0] = 0x00;
-        strcat( &g_target_path[0], the_targetp );
-               if ( g_target_path[ (strlen(the_targetp) - 1) ] != '/' ) {
-                       strcat( &g_target_path[0], "/" );
-               }
-        strcat( &g_target_path[0], &my_name[0] );
-               
-               /* try to create the test directory */
-               err = mkdir( &g_target_path[0], (S_IRWXU | S_IRWXG | S_IROTH) );
-               if ( err == 0 ) {
-                       break;
-               }
-               err = errno;
-               if ( EEXIST != err ) {
-                       printf( "test directory creation failed - \"%s\" \n", &g_target_path[0] );
-                       printf( "mkdir call failed with error %d - \"%s\" \n", errno, strerror( err) );
-                       exit( 1 );
-               }
-       }
-       printf( "created test directory at \"%s\" \n", &g_target_path[0] );
-       
-} /* create_target_directory */
-
-
-static void mark_tests_to_run( long my_start, long my_end )
-{
-       int                     my_tests_count, i;
-
-       my_tests_count = (sizeof( g_tests ) / sizeof( g_tests[0] ));
-       my_end = (my_end < (my_tests_count - 1)) ? my_end : (my_tests_count - 1);
-       for ( i = (my_start - 1); i < my_end; i++ ) {
-               g_tests[ i ].test_run_it = 1;  /* run this test */
-       }
-       return;
-} /* mark_tests_to_run */
-
-
-static void usage( void )
-{
-       char *          my_ptr;
-       
-       /* skip past full path and just show the tool name */
-       my_ptr = strrchr( g_cmd_namep, '/' );
-       if ( my_ptr != NULL ) {
-               my_ptr++;
-       }
-       
-       printf( "\nUSAGE:  %s -target TARGET_PATH \n\n", (my_ptr != NULL) ? my_ptr : g_cmd_namep );
-       printf( "\t -f[ailures] MAX_FAILS_ALLOWED   # number of test cases that may fail before we give up.  defaults to 0  \n" );
-       printf( "\t -l[ist]                         # list all the tests this tool performs   \n" );
-       printf( "\t -r[un] 1, 3, 10 - 19            # run specific tests.  enter individual test numbers and/or range of numbers.  use -list to list tests.   \n" );
-       printf( "\t -s[kip]                         # skip setuid tests   \n" );
-       printf( "\t -t[arget] TARGET_PATH           # path to directory where tool will create test files.  defaults to \"/tmp/\"   \n" );
-       printf( "\t -testbot                        # output results in CoreOS TestBot compatible format  \n" );
-       printf( "\nexamples:  \n" );
-       printf( "--- Place all test files and directories at the root of volume \"test_vol\" --- \n" );
-       printf( "%s -t /Volumes/test_vol/ \n", (my_ptr != NULL) ? my_ptr : g_cmd_namep );
-       printf( " \n" );
-       printf( "--- Run the tool for tests 10 thru 15, test 18 and test 20 --- \n" );
-       printf( "%s -r 10-15, 18, 20 \n", (my_ptr != NULL) ? my_ptr : g_cmd_namep );
-       printf( " \n" );
-       exit( 1 );
-
-} /* usage */
-
-/* This is a private API between Libinfo, Libc, and the DirectoryService daemon.
- * Since we are trying to determine if an external provider will back group
- * lookups, we can use this, without relying on additional APIs or tools
- * that might not work yet */
-extern int _ds_running(void);
-
-#define NUM_GROUPS     6
-static int
-setgroups_if_single_user(void)
-{
-       int i, retval = -1;
-       struct group *grp;
-       gid_t gids[NUM_GROUPS];
-
-       if (!_ds_running()) {
-               printf("In single-user mode.\n");
-               g_is_single_user = 1;           
-
-               /* We skip 'nobody' and 'anyone' */
-               getgrent();
-               getgrent();
-               for (i = 0; i < NUM_GROUPS; i++) {
-                       grp = getgrent();
-                       if (!grp) {
-                               break;
-                       }
-
-                       gids[i] = grp->gr_gid;
-               }
-               
-               endgrent();
-               
-               /* Only succeed if we find at least NUM_GROUPS */
-               if (i == NUM_GROUPS) {
-                       retval = setgroups(NUM_GROUPS, gids);
-                       if (retval == 0) { 
-                               getgroups(NUM_GROUPS, gids);
-                               printf("After single-user hack, groups are: ");
-                               for (i = 0; i < NUM_GROUPS; i++) {
-                                       printf("%d, ", gids[i]);
-                               }
-                               putchar('\n');
-                       } else {
-                               printf("Setgroups failed.\n");
-                       }
-               } else {
-                       printf("Couldn't get sufficient number of groups.\n");
-               }
-       } else {
-               printf("Not in single user mode.\n");
-               retval = 0;
-       }
-
-
-       return retval;
-}
-
-static const char *current_arch( void )
-{
-       cpu_type_t cputype = _mh_execute_header.cputype;
-       cpu_subtype_t cpusubtype = _mh_execute_header.cpusubtype;
-
-       const NXArchInfo *arch = NXGetArchInfoFromCpuType(cputype, cpusubtype);
-
-       if (arch) {
-               return arch->name;
-       } else {
-               return "<unknown>";
-       }
-}
-
-#undef printf  /* this makes the "-l" output easier to read */
-static void list_all_tests( void )
-{
-       int             i, my_tests_count;
-       
-       my_tests_count = (sizeof( g_tests ) / sizeof( g_tests[0] ));
-       printf( "\nList of all tests this tool performs... \n" );
-
-       for ( i = 0; i < (my_tests_count - 1); i++ ) {
-               printf( " %d \t   %s \n", (i + 1), g_tests[ i ].test_infop );
-       }
-       
-       return;
-} /* list_all_tests */
diff --git a/tools/tests/xnu_quick_test/makefile b/tools/tests/xnu_quick_test/makefile
deleted file mode 100644 (file)
index 8f39783..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-SDKROOT ?= /
-
-PLATFORMPATH := $(shell xcrun -sdk $(SDKROOT) -show-sdk-platform-path)
-PLATFORM := $(shell echo $(PLATFORMPATH) | sed 's,^.*/\([^/]*\)\.platform$$,\1,')
-SDKVERSION:=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1)
-SDKPATH := $(shell xcodebuild -sdk $(SDKROOT) -version Path)
-
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-CFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION)
-LIBFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION)
-else
-CFLAGS += -mmacosx-version-min=$(SDKVERSION)
-LIBFLAGS += -mmacosx-version-min=$(SDKVERSION)
-endif
-
-CC := $(shell xcrun -sdk "$(SDKROOT)" -find cc)
-HOSTCC := cc
-
-CODESIGN := $(shell xcrun -sdk "$(SDKROOT)" -find codesign)
-
-SRCROOT?=$(shell /bin/pwd)
-OBJROOT?=$(SRCROOT)/BUILD/obj
-DSTROOT?=$(SRCROOT)/BUILD/dst
-SYMROOT?=$(SRCROOT)/BUILD/sym
-
-SRCSUBPATH := $(SRCROOT)
-OBJSUBPATH := $(OBJROOT)
-
-ifdef RC_ARCHS
-  ARCH:=$(RC_ARCHS)
-else
-  ifeq ($(PLATFORM),MacOSX)
-    ARCH:=i386 x86_64
-  else ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-    ARCH:=arm64 armv7s armv7
-endif
-endif
-
-MY_ARCH := $(patsubst %, -arch %, $(ARCH)) # allows building multiple archs.
-ARCH_32 := $(filter-out %64, $(ARCH))
-ARCH_32_FLAGS := $(patsubst %, -arch %, $(ARCH_32))
-ARCH_64 := $(filter %64, $(ARCH))
-ARCH_64_FLAGS := $(patsubst %, -arch %, $(ARCH_64))
-
-CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS) -Wno-deprecated-declarations
-
-LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders  -F/AppleInternal/Library/Frameworks/ 
-
-# The current implementation of the content protection test requires IOKit.
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-LIBFLAGS += -framework IOKit
-endif
-
-MY_OBJECTS := $(OBJSUBPATH)/main.o $(OBJSUBPATH)/memory_tests.o $(OBJSUBPATH)/misc.o \
-                        $(OBJSUBPATH)/sema_tests.o $(OBJSUBPATH)/shared_memory_tests.o \
-                        $(OBJSUBPATH)/socket_tests.o $(OBJSUBPATH)/tests.o \
-                        $(OBJSUBPATH)/xattr_tests.o $(OBJSUBPATH)/kqueue_tests.o \
-                        $(OBJSUBPATH)/machvm_tests.o $(OBJSUBPATH)/commpage_tests.o \
-                        $(OBJSUBPATH)/atomic_fifo_queue_test.o $(OBJSUBPATH)/sched_tests.o \
-                        $(OBJSUBPATH)/pipes_tests.o
-
-ifeq ($(PLATFORM),MacOSX)
-MY_OBJECTS += $(OBJSUBPATH)/32bit_inode_tests.o
-endif
-
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-MY_OBJECTS += $(OBJSUBPATH)/content_protection_test.o
-endif
-
-
-xnu_quick_test : $(OBJSUBPATH) $(DSTROOT) $(SYMROOT) $(MY_OBJECTS) helpers
-ifndef RC_ProjectName
-       rm -rf $(DSTROOT)/xnu_quick_test
-endif
-       $(CC) -g $(MY_ARCH) $(LIBFLAGS) -o $(SYMROOT)/xnu_quick_test $(MY_OBJECTS)
-       /usr/bin/dsymutil $(SYMROOT)/xnu_quick_test
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-       $(CODESIGN) -f -s - --entitlements $(SRCSUBPATH)/xnu_quick_test.entitlements $(SYMROOT)/xnu_quick_test
-endif
-       /usr/bin/ditto $(SYMROOT)/xnu_quick_test $(DSTROOT)/xnu_quick_test
-
-# This target is defined for testbots. 
-# Before compiling this target, MORECFLAGS must be set to "-D RUN_UNDER_TESTBOTS=1", check README file for more details
-# NOTE: -f[ailures] MAX_FAILS_ALLOWED option is set to 100 to make sure we completely run the test suite and 
-# report all the failures.
-
-testbots: xnu_quick_test 
-       @(cd $(DSTROOT) ; ./xnu_quick_test -f 100)
-
-# The helper binaries are used to test exec()'ing between 64bit and 32bit. 
-# Creates test binaries with page zero sizes = 4KB and 4GB. Also creates 32-bit
-# helper processes for the 64-bit version of xnu_quick_test to test the conversion
-# from a 32-bit process to a 64-bit process.
-helpers : $(SRCSUBPATH)/helpers/sleep.c $(SRCSUBPATH)/helpers/launch.c $(SRCSUBPATH)/helpers/arch.c $(SRCSUBPATH)/helpers/data_exec.c helperdir $(OBJSUBPATH)/misc.o
-ifeq ($(PLATFORM),MacOSX)
-ifneq "$(ARCH_32)" ""
-       $(CC) -g $(ARCH_32_FLAGS)                              $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-i386
-       /usr/bin/ditto $(SYMROOT)/sleep-i386 $(DSTROOT)/helpers/
-endif
-endif
-ifeq ($(PLATFORM),MacOSX)
-ifneq "$(ARCH_32)" ""
-       $(CC) -g $(LIBFLAGS) $(ARCH_32_FLAGS)   $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-i386
-       $(CC) -g $(ARCH_32_FLAGS)       -DNXDATA32TESTNONX $(SRCSUBPATH)/helpers/data_exec.c -o $(SYMROOT)/data_exec32nonxspawn
-       /usr/bin/ditto $(SYMROOT)/launch-i386 $(SYMROOT)/data_exec32nonxspawn $(DSTROOT)/helpers/
-endif
-ifneq "$(ARCH_64)" ""
-       $(CC) -g $(ARCH_64_FLAGS) -pagezero_size 0x100000000 $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-x86_64-4G
-       $(CC) -g $(ARCH_64_FLAGS) -pagezero_size 0x1000      $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-x86_64-4K
-       $(CC) -g $(LIBFLAGS) $(ARCH_64_FLAGS)   $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-x86_64
-       /usr/bin/ditto $(SYMROOT)/sleep-x86_64-4G $(SYMROOT)/sleep-x86_64-4K $(SYMROOT)/launch-x86_64 $(DSTROOT)/helpers/
-endif
-       $(CC) -g $(MY_ARCH)     $(SRCSUBPATH)/helpers/data_exec.c -o $(SYMROOT)/data_exec
-       /usr/bin/ditto $(SYMROOT)/data_exec $(DSTROOT)/helpers/
-endif
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-ifneq "$(ARCH_32)" ""
-       $(CC) $(CFLAGS) $(ARCH_32_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-arm
-       $(CC) $(LIBFLAGS) $(CFLAGS) $(ARCH_32_FLAGS) $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-arm
-       /usr/bin/ditto $(SYMROOT)/sleep-arm $(SYMROOT)/launch-arm $(DSTROOT)/helpers/
-endif
-ifneq "$(ARCH_64)" ""
-       $(CC) $(CFLAGS) $(ARCH_64_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-arm64
-       /usr/bin/ditto $(SYMROOT)/sleep-arm64 $(DSTROOT)/helpers/
-endif
-endif
-       $(CC) -g $(MY_ARCH) $(CFLAGS) $(SRCSUBPATH)/helpers/arch.c -o $(SYMROOT)/arch
-       /usr/bin/ditto $(SYMROOT)/arch $(DSTROOT)/helpers/
-
-helperdir :
-       mkdir -p $(DSTROOT)/helpers
-
-$(OBJSUBPATH) :
-       mkdir -p $(OBJSUBPATH);
-
-$(DSTROOT) :
-       mkdir -p $(DSTROOT);
-
-$(SYMROOT) :
-       mkdir -p $(SYMROOT)
-
-$(OBJSUBPATH)/main.o : $(SRCSUBPATH)/main.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/main.c  -o $@
-
-$(OBJSUBPATH)/memory_tests.o : $(SRCSUBPATH)/memory_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/memory_tests.c  -o $@
-
-# misc.o has to be built 3-way for the helpers to link
-$(OBJSUBPATH)/misc.o : $(SRCSUBPATH)/misc.c $(SRCSUBPATH)/tests.h
-ifneq ($(filter iPhoneOS iPhoneOSNano,$(PLATFORM)),)
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/misc.c   -o $@
-else
-       $(CC) -arch i386 -arch x86_64 $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/misc.c   -o $@
-endif
-
-$(OBJSUBPATH)/sema_tests.o : $(SRCSUBPATH)/sema_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/sema_tests.c   -o $@
-
-$(OBJSUBPATH)/shared_memory_tests.o : $(SRCSUBPATH)/shared_memory_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/shared_memory_tests.c   -o $@
-
-$(OBJSUBPATH)/socket_tests.o : $(SRCSUBPATH)/socket_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/socket_tests.c   -o $@
-
-$(OBJSUBPATH)/tests.o : $(SRCSUBPATH)/tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/tests.c    -o $@
-
-$(OBJSUBPATH)/xattr_tests.o : $(SRCSUBPATH)/xattr_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/xattr_tests.c    -o $@
-
-$(OBJSUBPATH)/machvm_tests.o : $(SRCSUBPATH)/machvm_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/machvm_tests.c    -o $@
-
-$(OBJSUBPATH)/sched_tests.o : $(SRCSUBPATH)/sched_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/sched_tests.c    -o $@
-
-$(OBJSUBPATH)/kqueue_tests.o : $(SRCSUBPATH)/kqueue_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/kqueue_tests.c   -o $@
-
-$(OBJSUBPATH)/32bit_inode_tests.o : $(SRCSUBPATH)/32bit_inode_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/32bit_inode_tests.c    -o $@
-
-$(OBJSUBPATH)/commpage_tests.o : $(SRCSUBPATH)/commpage_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/commpage_tests.c    -o $@
-
-$(OBJSUBPATH)/atomic_fifo_queue_test.o : $(SRCSUBPATH)/atomic_fifo_queue_test.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/atomic_fifo_queue_test.c    -o $@
-
-$(OBJSUBPATH)/content_protection_test.o : $(SRCSUBPATH)/content_protection_test.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/content_protection_test.c -o $@
-
-$(OBJSUBPATH)/pipes_tests.o : $(SRCSUBPATH)/pipes_tests.c $(SRCSUBPATH)/tests.h
-       $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/pipes_tests.c -o $@
-
-.PHONY : clean
-clean :
-       rm -Rf $(DSTROOT)/xnu_quick_test
-       rm -Rf $(DSTROOT)/helpers/*
-       rm -Rf $(OBJSUBPATH)/*.o
diff --git a/tools/tests/xnu_quick_test/memory_tests.c b/tools/tests/xnu_quick_test/memory_tests.c
deleted file mode 100644 (file)
index c14564a..0000000
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- *  memory_tests.c.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 4/12/05.
- *  Copyright 2005 Apple Computer Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <mach/mach.h>
-
-extern char  g_target_path[ PATH_MAX ];
-
-/*
- * static to localize to this compilation unit; volatile to avoid register
- * optimization which would prevent modification by a signal handler.
- */
-static volatile int    my_err;
-
-void
-bus_handler(int sig, siginfo_t *si, void *mcontext)
-{
-       /* Reset global error value when we see a SIGBUS */
-       if (sig == SIGBUS) {
-               _exit(0);
-       }
-}
-
-/*  **************************************************************************************************************
- *     Test madvise, mincore, minherit, mlock, mlock, mmap, mprotect, msync, munmap system calls.
- *     todo - see if Francois has better versions of these tests...
- *  **************************************************************************************************************
- */
-int memory_tests( void * the_argp )
-{
-       int                     my_page_size, my_status;
-       int                     my_fd = -1;
-       char *          my_pathp = NULL;
-       char *          my_bufp = NULL;
-       char *          my_addr = NULL;
-       char *          my_test_page_p = NULL;
-       ssize_t         my_result;
-       pid_t           my_pid, my_wait_pid;
-       kern_return_t   my_kr;          
-       struct sigaction        my_sa;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-       
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       my_page_size = getpagesize( );
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_test_page_p, my_page_size, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_test_page_p = 0x00;
-       strcat( my_test_page_p, "parent data" );
-       /* test minherit - share a page with child, add to the string in child then 
-        * check for modification after child terminates.
-        */ 
-       my_err = minherit( my_test_page_p, my_page_size, VM_INHERIT_SHARE );
-       if ( my_err == -1 ) {
-               printf( "minherit failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /*
-        * spin off a child process that we will use for testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process...
-                */             
-               strcat( my_test_page_p, " child data" );
-
-               /* create a test file in page size chunks */
-                       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, (my_page_size * 10), VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* test madvise on anonymous memory */
-               my_err = madvise(my_bufp, (my_page_size * 10), MADV_WILLNEED);
-               if ( my_err == -1 ) {
-                       printf("madvise WILLNEED on anon memory failed with error %d - \"%s\" \n", errno, strerror( errno ) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               memset( my_bufp, 'j', (my_page_size * 10) );
-               my_fd = open( my_pathp, O_RDWR, 0 );
-               if ( my_fd == -1 ) {
-                       printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               
-               /* test madvise on anonymous memory */
-               my_err = madvise(my_bufp, (my_page_size * 10), MADV_DONTNEED);
-               if ( my_err == -1 ) {
-                       printf("madvise DONTNEED on anon memory failed with error %d - \"%s\" \n", errno, strerror( errno ) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               my_result = write( my_fd, my_bufp, (my_page_size * 10) );
-               if ( my_result == -1 ) {
-                       printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               
-               /* map the file into memory */
-               my_addr = (char *) mmap( NULL, (my_page_size * 2), (PROT_READ | PROT_WRITE), (MAP_FILE | MAP_SHARED), my_fd, 0 );
-               if ( my_addr == (char *) -1 ) {
-                       printf( "mmap call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               
-               /* make sure we got the right data mapped */
-               if ( *my_addr != 'j' || *(my_addr + my_page_size) != 'j' ) {
-                       printf( "did not map in correct data \n" );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* test madvise */
-               my_err = madvise( my_addr, (my_page_size * 2), MADV_WILLNEED );
-               if ( my_err == -1 ) {
-                       printf( "madvise WILLNEED call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               my_err = madvise( my_addr, (my_page_size * 2), MADV_DONTNEED );
-               if ( my_err == -1 ) {
-                       printf( "madvise DONTNEED call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* test mincore, mlock, mlock */
-               my_err = mlock( my_addr, my_page_size );
-               if ( my_err == -1 ) {
-                       printf( "mlock call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* mybufp is about to be reused, so test madvise on anonymous memory */
-               my_err = madvise(my_bufp, (my_page_size * 10), MADV_FREE);
-               if ( my_err == -1 ) {
-                       printf("madvise FREE on anon memory failed with error %d - \"%s\" \n", errno, strerror( errno ) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               my_err = mincore( my_addr, 1, my_bufp );        
-               if ( my_err == -1 ) {
-                       printf( "mincore call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               /* page my_addr is in should be resident after mlock */
-               if ( (*my_bufp & MINCORE_INCORE) == 0 ) {
-                       printf( "mincore call failed to find resident page \n" );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               
-               my_err = munlock( my_addr, my_page_size );
-               if ( my_err == -1 ) {
-                       printf( "munlock call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-                
-               /* modify first page then use msync to push data out */
-               memset( my_addr, 'x', my_page_size );
-               my_err = msync( my_addr, my_page_size, (MS_SYNC | MS_INVALIDATE) );     
-               if ( my_err == -1 ) {
-                       printf( "msync call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-                                       
-               /* test madvise */
-               my_err = madvise( my_addr, (my_page_size * 2), MADV_DONTNEED );
-               if ( my_err == -1 ) {
-                       printf( "madvise DONTNEED call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* test madvise */
-               my_err = madvise( my_addr, (my_page_size * 2), MADV_FREE );
-               if ( my_err == -1 ) {
-                       printf( "madvise FREE call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* verify that the file was updated */
-               lseek( my_fd, 0, SEEK_SET );    
-               bzero( (void *)my_bufp, my_page_size );
-               my_result = read( my_fd, my_bufp, my_page_size );
-               if ( my_result == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               if ( *my_bufp != 'x' ) {
-                       printf( "msync did not flush correct data \n" );
-                       my_err = -1;
-                       goto exit_child;
-               }
-                
-               /* unmap our test page */
-               my_err = munmap( my_addr, (my_page_size * 2) );
-               if ( my_err == -1 ) {
-                       printf( "munmap call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               my_addr = NULL;
-
-               /* map the file into memory again for mprotect test  */
-               my_addr = (char *) mmap( NULL, (my_page_size * 2), (PROT_READ | PROT_WRITE), (MAP_FILE | MAP_SHARED), my_fd, 0 );
-               if ( my_addr == (char *) -1 ) {
-                       printf( "mmap call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-               *my_addr = 'a';
-        
-
-
-               /* test mprotect - change protection to only PROT_READ */
-               my_err = mprotect( my_addr, my_page_size, PROT_READ );
-               if ( my_err == -1 ) {
-                       printf( "mprotect call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               my_sa.sa_sigaction = bus_handler;
-               my_sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
-               if ((my_err = sigaction(SIGBUS, &my_sa, NULL)) != 0) {
-                       printf("sigaction call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               my_err = -1;    /* default to error out if we do NOT trigger a SIGBUS */
-
-               *my_addr = 'z'; /* should cause SIGBUS signal (we look for this at child termination within the parent) */
-
-               /* NOTREACHED */
-
-               printf("Expected SIGBUS signal, got nothing!\n");
-               my_err = -1;
-exit_child:
-               exit( my_err );
-       }
-
-       /* parent process -
-        * we should get no error if the child has completed all tests successfully
-        */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               goto test_failed_exit;
-       }
-
-       /* If we did not exit cleanly, report it
-        */
-       if ( !WIFEXITED( my_status ) || (WEXITSTATUS( my_status ) != 0)) {
-               printf( "wait4 returned child died of status - 0x%08X \n", my_status );
-               goto test_failed_exit;
-       }
-
-       /* make sure shared page got modified in child */
-       if ( strcmp( my_test_page_p, "parent data child data" ) != 0 ) {
-               printf( "minherit did not work correctly - shared page looks wrong \n" );
-               goto test_failed_exit;
-       }
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );     
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);              
-        }
-        if ( my_test_page_p != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_test_page_p, my_page_size);
-        }
-       return( my_err );
-}
-
diff --git a/tools/tests/xnu_quick_test/misc.c b/tools/tests/xnu_quick_test/misc.c
deleted file mode 100644 (file)
index 74c454a..0000000
+++ /dev/null
@@ -1,436 +0,0 @@
-
-#include "tests.h"
-#include <mach/mach.h>
-
-extern int g_testbots_active;
-
-/*
- * create_random_name - creates a file with a random / unique name in the given directory.
- * when do_open is true we create a file else we generaate a name that does not exist in the
- * given directory (we do not create anything when do_open is 0).
- * WARNING - caller provides enough space in path buffer for longest possible name.
- * WARNING - assumes caller has appended a trailing '/' on the path passed to us.
- * RAND_MAX is currently 2147483647 (ten characters plus one for a slash)
- */
-int create_random_name( char *the_pathp, int do_open ) {
-       int             i, my_err;
-       int             my_fd = -1;
-       
-    for ( i = 0; i < 1; i++ ) {
-        int                    my_rand;
-        char           *myp;
-        char           my_name[32];
-        
-        my_rand = rand( );
-        sprintf( &my_name[0], "%d", my_rand );
-        if ( (strlen( &my_name[0] ) + strlen( the_pathp ) + 2) > PATH_MAX ) {
-            printf( "%s - path to test file greater than PATH_MAX \n", __FUNCTION__ );
-            return( -1 );
-        }
-
-        // append generated file name onto our path
-        myp = strrchr( the_pathp, '/' );
-        *(myp + 1) = 0x00;
-        strcat( the_pathp, &my_name[0] );
-               if ( do_open ) {
-                       /* create a file with this name */
-                       my_fd = open( the_pathp, (O_RDWR | O_CREAT | O_EXCL),
-                                                       (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) );
-                       if ( my_fd == -1 ) {
-                               if ( errno != EEXIST ) {
-                                       printf( "%s - open failed with errno %d - %s \n",
-                                                       __FUNCTION__, errno, strerror( errno ) );
-                                       return( -1 );
-                               }
-                               // name already exists, try another
-                               i--;
-                               continue;
-                       }
-               }
-               else {
-                       /* make sure the name is unique */
-                       struct stat             my_sb;
-                       my_err = stat( the_pathp, &my_sb );
-                       if ( my_err != 0 ) {
-                               if ( errno == ENOENT ) {
-                                       break;
-                               }
-                               else {
-                                       printf( "%s - open failed with errno %d - %s \n",
-                                                       __FUNCTION__, errno, strerror( errno ) );
-                                       return( -1 );
-                               }
-                       }
-                       /* name already exists, try another */
-                       i--;
-                       continue;
-               }
-    }
-       
-       if ( my_fd != -1 )
-               close( my_fd );
-       
-       return( 0 );
-       
-} /* create_random_name */
-
-/*
- * create_file_with_name - create a file in the given target directory using the given name.
- * If an existing file or directory is present use the value of remove_existing to determine if the
- * object is to be deleted.
- * returns 0 if file could be created, 1 if file exists, 2 if directory exists, else -1 
- * NOTE - will fail if a directory is present with the given name and it is not empty.
- */
-int create_file_with_name( char *the_target_dirp, char *the_namep, int remove_existing ) {
-       int                             create_test_file, my_err, my_result;
-       int                             my_fd = -1;
-       char *                  my_pathp = NULL;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-       create_test_file = 0;
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto failure_exit;
-        }
-       strcpy( my_pathp, the_target_dirp );
-       strcat( my_pathp, the_namep );
-
-       /* make sure the name is unique */
-       my_result = 0;
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               create_test_file = 1;
-               if ( errno != ENOENT ) {
-                       goto failure_exit;
-               }
-       }
-       else {
-               /* name already exists */
-               if ( S_ISDIR( my_sb.st_mode ) ) {
-                       my_result = 2; /* tell caller directory exists with target name */
-                       if ( remove_existing ) {
-                               my_err = rmdir( my_pathp );
-                               if ( my_err == -1 ) {
-                                       printf( "rmdir failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                                       goto failure_exit;
-                               }
-                               create_test_file = 1;
-                       }
-               }
-               else {
-                       my_result = 1; /* tell caller file exists with target name */
-                       if ( remove_existing ) {
-                               my_err = unlink( my_pathp );
-                               if ( my_err == -1 ) {
-                                       printf( "unlink failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                                       goto failure_exit;
-                               }
-                               create_test_file = 1;
-                       }
-               }
-       }
-       
-       if ( create_test_file ) {
-               /* create a file with this name */
-               my_fd = open( my_pathp, (O_RDWR | O_CREAT | O_EXCL),
-                                               (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) );
-               if ( my_fd == -1 ) {
-                       printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto failure_exit;
-               }
-               fcntl( my_fd, F_FULLFSYNC );
-               close( my_fd );
-       } 
-       goto routine_exit;
-
-failure_exit:  
-       my_result = -1;
-routine_exit:
-       if ( my_pathp != NULL ) {
-               if ( my_result == -1 && create_test_file ) {
-                       remove( my_pathp );     
-               }
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-        }
-       
-       return( my_result );
-       
-} /* create_file_with_name */
-
-
-
-
-/*
- * This function is needed by both xnu_quick_test proper and the execve() helper
- * program. It forks a child process and then exec()s an image on that child.
- * Path, argv, and envp are fed directly to the execve() call.
- * Parameter killwait decides how long to wait before killing the child.
- */
-int do_execve_test(char * path, char * argv[], void * envp, int killwait)
-{
-       int     my_err = 0, my_status;
-       pid_t   my_pid, my_wait_pid;
-
-#if DEBUG
-       printf("do_execve_test(path = %s)\n", path);
-       printf("CWD= %s\n", getwd(NULL));
-       fflush(stdout);
-#endif
-
-       /* vfork then execve sleep system command (which we will kill from the parent process) */
-       my_pid = vfork();
-       if (my_pid == -1) {
-               printf( "vfork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process - use execve to start one of the customized helper
-                * binaries, which just sleep for 120 seconds. Let our parent kill us.
-                */
-
-               my_err = execve(path, argv, envp);
-               if ( my_err != 0 ) { /* TODO: execve() on x86_64 inca returns weird error codes, see rdar://4655612 */
-                       printf( "execve call failed with return value: %d, errno: %d - \"%s\"; path: %s \n",
-                               my_err, errno, strerror( errno), path );
-                       fflush(stdout);
-                       exit(-2);
-               }
-
-               /* should never get here */
-               printf("Execve failed and it was not caught by our test\n");
-               return(-1);
-       }
-       /* 
-        * parent process - let's kill our sleeping child
-        */     
-       sleep(killwait);
-       my_err = kill( my_pid, SIGKILL );
-       if ( my_err == -1 ) {
-               printf( "kill call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* wait for child to exit */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               goto test_failed_exit;
-       }       
-
-       if (!(WIFSIGNALED( my_status ))) {
-               printf( "child process was not signaled and should have been\n", my_status );
-               goto test_failed_exit;
-       }
-               
-       if (WTERMSIG( my_status ) != SIGKILL) {
-               printf( "wait4 returned wrong signal status - 0x%02X \n", my_status );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = 1;
-
-test_passed_exit:
-       return( my_err );
-} /* do_execve_test */
-
-/*
- * Helper function for posix_spawn test
- *     arch: target architecture to spawn for
- */
-int do_spawn_test(int arch, int shouldfail)
-{
-       int my_err, my_pid, my_status;
-       size_t my_size;
-       posix_spawnattr_t attr;
-
-       char * args[] = {"helpers/arch", NULL};
-       
-       my_err = posix_spawnattr_init(&attr);
-       if (my_err != 0) {
-               printf("posix_spawnattr_init failed\n");
-               goto done;
-       }
-
-       /* set spawn to only succeed for arch 'arch' */
-       my_err = posix_spawnattr_setbinpref_np(&attr, 1, &arch, &my_size);
-       if (my_err != 0 || my_size != 1) {
-               printf("posix_spawnattr_setbinpref_np failed\n");
-               goto done;
-       }
-
-       /* spawn off child process */
-       my_err = posix_spawn(&my_pid, args[0], NULL, &attr, args, NULL);
-       if (shouldfail) {
-               if( my_err == 0) {
-                       printf("posix_spawn should have failed on arch %d\n", arch);
-                       goto done;
-               }
-               my_err = 0;
-       } else {
-               /*
-                * child should exit with return code == arch; note that the
-                * posix_spawn error numers are *returned*, NOT set in errno!!!
-                */
-               if (my_err != 0) {
-                       printf("posix_spawn failed with errno %d - %s\n", my_err, strerror(my_err));
-                       goto done;
-               }
-
-               my_err = wait4(my_pid, &my_status, 0, NULL);
-               if (my_err == -1) {
-                       printf("wait4 failed with errno %d - %s\n", errno, strerror(errno));
-                       goto done;
-               }
-               my_err = 0;
-
-               if (WEXITSTATUS(my_status) != (arch & 0xff)) {
-                       printf("child exited with status %d (expected %d)\n", 
-                                       (WEXITSTATUS(my_status)), 
-                                       (arch & 0xff));
-                       my_err = -1;
-                       goto done;
-               }
-       }
-
-done:
-       return my_err;
-}
-
-/*
- * Uses sysctlbyname to determine the cpu type. Currently, XNU classifies G5 as a 
- * 32-bit CPU, so this shouldn't be used to determine whether or not a CPU
- * is 64-bit.
- */
-int get_architecture()
-{
-       int rval = -1;
-       size_t length = 0;
-       int my_err, buf;
-       char *errmsg = NULL;
-
-       errmsg = "sysctlbyname() failed when getting hw.cputype";
-       if ((my_err = sysctlbyname("hw.cputype", NULL, &length, NULL, 0))) goto finished;       /* get length of data */
-       if (length != sizeof(buf))                                       goto finished;
-       if ((my_err = sysctlbyname("hw.cputype", &buf, &length, NULL, 0))) goto finished; /* copy data */
-       switch (buf) {
-       case CPU_TYPE_X86:
-       case CPU_TYPE_X86_64:
-               rval = INTEL;
-               break;
-       case CPU_TYPE_ARM:
-#ifdef CPU_TYPE_ARM64
-       case CPU_TYPE_ARM64:
-#endif
-               rval = ARM;
-               break;
-       }
-
-finished:
-       if (rval == -1 && errmsg)
-               printf("%s", errmsg);
-
-       return rval;
-}
-
-
-/*
- * Gets the bit'ed-ness of the current host. Returns either 32 or 64.
- * This get the hardware capability, but does not tell us whether this
- * binary is executing in 64 bit or 32 bit mode. Check sizeof long
- * or pointer to determine that.
- */
-int get_bits()
-{
-       int  my_err, buf;
-       size_t len = 0;
-       int rval = 32;  /*
-                        * On 32-bit systems the sysctls 64bitops and x86_64 don't 
-                        * even exists, so if we don't find them then we assume 
-                        * a 32-bit system.
-                        */
-
-       /* Check for PPC 64 */
-       if ((my_err = sysctlbyname("hw.optional.64bitops", NULL, &len, NULL, 0)))       goto check64bit; /* Request size */
-       if (len > sizeof(buf))                                                          goto check64bit;
-       if ((my_err = sysctlbyname("hw.optional.64bitops", &buf, &len, NULL, 0)))       goto check64bit; /* Copy value out from kernel */
-       if (buf == 1) rval = 64;
-       goto finished;
-
-check64bit:
-#if defined(__i386__) || defined(__x86_64__)
-       /* Check for x86_64 */
-       if ((my_err = sysctlbyname("hw.optional.x86_64", NULL, &len, NULL, 0))) goto finished; /* Request size */
-       if (len > sizeof(buf))                                                  goto finished;
-       if ((my_err = sysctlbyname("hw.optional.x86_64", &buf, &len, NULL, 0))) goto finished; /* Copy value out from kernel */
-       if (buf == 1) rval = 64;
-
-#else 
-#error Unknown architecture.
-#endif
-
-finished:
-       return rval;
-}
-
-/*
- * printf with a date and time stamp so that we can correlate printf's
- * with the log files of a system in case of test failure.
- *
- * NB: MY_PRINTF_DATE_FMT chosen to look like syslog to aid "grep".
- */
-#define MY_PRINTF_DATE_FMT     "%b %e %T"
-#undef printf  /* was my_printf */
-int
-my_printf(const char * __restrict fmt, ...)
-{
-       char *bufp;
-       char datebuf[256];
-       struct tm *timeptr;
-       time_t result;
-       int rv;
-       va_list ap;
-
-       /* if we are running under a TestBot, do a normal printf */
-       if (g_testbots_active) {
-               va_start(ap, fmt);
-               rv = vprintf(fmt, ap);
-               va_end(ap);
-               return rv;
-       }
-
-       /* Get the timestamp for this printf */
-       result = time(NULL);
-       timeptr = localtime(&result);
-       strftime(datebuf, sizeof(datebuf), MY_PRINTF_DATE_FMT, timeptr);
-
-       /* do the printf of the requested data to a local buffer */
-       va_start(ap, fmt);
-       rv = vasprintf(&bufp, fmt, ap);
-       va_end(ap);
-
-       /*
-        * if we successfully got a local buffer, then we want to
-        * print a timestamp plus what we would have printed before,
-        * then free the allocated memory.
-        */
-       if (rv != -1) {
-               rv = printf("%s %s", datebuf, bufp);
-               free(bufp);
-       }
-
-       return(rv);
-}
diff --git a/tools/tests/xnu_quick_test/pipes_tests.c b/tools/tests/xnu_quick_test/pipes_tests.c
deleted file mode 100644 (file)
index b6e8384..0000000
+++ /dev/null
@@ -1,882 +0,0 @@
-/* Mach virtual memory unit tests
- *
- * The main goal of this code is to facilitate the construction,
- * running, result logging and clean up of a test suite, taking care
- * of all the scaffolding. A test suite is a sequence of very targeted
- * unit tests, each running as a separate process to isolate its
- * address space.
- * A unit test is abstracted as a unit_test_t structure, consisting of
- * a test function and a logging identifier. A test suite is a suite_t
- * structure, consisting of an unit_test_t array, a logging identifier,
- * and fixture set up and tear down functions.
- * Test suites are created dynamically. Each of its unit test runs in
- * its own fork()d process, with the fixture set up and tear down
- * running before and after each test. The parent process will log a
- * pass result if the child exits normally, and a fail result in any
- * other case (non-zero exit status, abnormal signal). The suite
- * results are then aggregated and logged, and finally the test suite
- * is destroyed.
- * Everything is logged to stdout in the standard Testbot format, which
- * can be easily converted to Munin or SimonSays logging
- * format. Logging is factored out as much as possible for future
- * flexibility. In our particular case, a unit test is logged as a
- * Testbot Test Case ([BEGIN]/[PASS]/[FAIL], and a test suite is
- * logged as a Testbot Test ([TEST]). This is confusing but
- * unfortunately cannot be avoided for compatibility. Suite results
- * are aggregated after the [SUMMARY] keyword.
- * The included test suites cover the various pipe buffer operations 
- * with dynamic expansion.
- *
- * Vishal Patel (vishal_patel@apple.com)
- */
-
-#include <stdlib.h>
-#include <ctype.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-#include <errno.h>
-#include <signal.h>
-#include <getopt.h>
-#include <sys/sysctl.h>
-#include <string.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <dispatch/dispatch.h>
-#include <pthread.h>
-#include <mach/vm_param.h>
-
-/**************************/
-/**************************/
-/* Unit Testing Framework */
-/**************************/
-/**************************/                                   
-
-/*********************/
-/* Private interface */
-/*********************/
-
-static const char frameworkname[] = "pipes_unitester";
-
-/* Type for test, fixture set up and fixture tear down functions. */
-typedef void (*test_fn_t)();
-
-/* Unit test structure. */
-typedef struct {
-     const char *name;
-     test_fn_t test;
-} unit_test_t;
-
-/* Test suite structure. */
-typedef struct {
-     const char *name;
-     int numoftests;
-     test_fn_t set_up;
-     unit_test_t *tests;
-     test_fn_t tear_down;
-} suite_t;
-
-int _quietness = 0;
-unsigned int _timeout = 0;
-int _expected_signal = 0;
-
-struct {
-     uintmax_t numoftests;
-     uintmax_t passed_tests;
-} results = { 0, 0 };
-
-void logr(char *format, ...) __printflike(1, 2);
-
-static void die(int condition, const char *culprit)
-{
-     if (condition) {
-         printf("%s: %s error: %s.\n", frameworkname, culprit,
-                strerror(errno));
-         exit(1);
-     }
-}
-
-static void die_on_stdout_error()
-{
-     die(ferror(stdout), "stdout");
-}
-
-/* Individual test result logging. */
-void logr(char *format, ...)
-{
-     if (_quietness <= 1) {
-         va_list ap;
-         
-         va_start(ap, format);
-         vprintf(format, ap);
-         va_end(ap);
-         die_on_stdout_error();
-     }
-}
-
-static suite_t *create_suite(const char *name, int numoftests,
-                            test_fn_t set_up, unit_test_t *tests,
-                            test_fn_t tear_down)
-{
-     suite_t *suite =  (suite_t *)malloc(sizeof(suite_t));
-     die(suite == NULL, "malloc()");
-
-     suite->name = name;
-     suite->numoftests = numoftests;
-     suite->set_up = set_up;
-     suite->tests = tests;
-     suite->tear_down = tear_down;
-     return suite;
-}
-
-static void destroy_suite(suite_t *suite)
-{
-     free(suite);
-}
-
-static void log_suite_info(suite_t *suite)
-{
-     logr("[TEST] %s\n", suite->name);
-     logr("Number of tests: %d\n\n", suite->numoftests);
-}
-
-static void log_suite_results(suite_t *suite, int passed_tests)
-{
-     results.numoftests += (uintmax_t)suite->numoftests;
-     results.passed_tests += (uintmax_t)passed_tests;
-}
-
-static void log_test_info(unit_test_t *unit_test)
-{
-     logr("[BEGIN] %s\n", unit_test->name);
-}
-
-static void log_test_result(unit_test_t *unit_test,
-                           boolean_t test_passed)
-{
-     logr("[%s] %s\n\n", test_passed ? "PASS" : "FAIL",
-         unit_test->name);
-}
-
-/* Handler for test time out. */
-static void alarm_handler(int signo)
-{
-     write(1,"Child process timed out.\n",
-          strlen("Child process timed out.\n"));
-     _Exit(6);
-}
-
-/* Run a test with fixture set up and teardown, while enforcing the
- * time out constraint. */
-static void run_test(suite_t *suite, unit_test_t *unit_test)
-{
-     struct sigaction alarm_act;
-
-     log_test_info(unit_test);
-     alarm_act.sa_handler = alarm_handler;
-     sigemptyset(&alarm_act.sa_mask);
-     alarm_act.sa_flags = 0;
-     die(sigaction(SIGALRM, &alarm_act, NULL) != 0, "sigaction()");
-     alarm(_timeout);
-     
-     suite->set_up();
-     unit_test->test();
-     suite->tear_down();
-}
-
-/* Check a child return status. */
-static boolean_t child_terminated_normally(int child_status)
-{
-     boolean_t normal_exit = FALSE;
-     
-     if (WIFEXITED(child_status)) {
-         int exit_status = WEXITSTATUS(child_status);
-         if (exit_status) {
-              printf("Child process unexpectedly exited with code "
-                     "%d.\n", exit_status);
-         } else if (!_expected_signal) {
-              normal_exit = TRUE;
-         }
-     } else if (WIFSIGNALED(child_status)) {
-         int signal = WTERMSIG(child_status);
-         if (signal == _expected_signal) {
-              if (_quietness <= 0) {
-                   printf("Child process died with expected signal "
-                          "%d.\n", signal);
-              }
-              normal_exit = TRUE;
-         } else {
-              printf("Child process unexpectedly died with signal "
-                     "%d.\n", signal);
-         }            
-     } else {
-         printf("Child process unexpectedly did not exit nor "
-                "die.\n");
-     }
-     die_on_stdout_error();
-     return normal_exit;
-}
-
-/* Run a test in its own process, and report the result. */
-static boolean_t child_test_passed(suite_t *suite,
-                                  unit_test_t *unit_test)
-{
-     int test_status;
-
-     pid_t test_pid = fork();
-     die(test_pid == -1, "fork()");
-     if (!test_pid) {
-         run_test(suite, unit_test);
-         exit(0);
-     }
-     while (waitpid(test_pid, &test_status, 0) != test_pid) {
-         continue;
-     }
-     boolean_t test_result = child_terminated_normally(test_status);
-     log_test_result(unit_test, test_result);
-     return test_result;
-}
-
-/* Run each test in a suite, and report the results. */
-static int count_passed_suite_tests(suite_t *suite)
-{
-     int passed_tests = 0;
-     int i;
-     
-     for (i = 0; i < suite->numoftests; i++) {
-         passed_tests += child_test_passed(suite,
-                                           &(suite->tests[i]));
-     }
-     return passed_tests;
-}
-
-/********************/
-/* Public interface */
-/********************/
-
-#define DEFAULT_TIMEOUT 5U
-#define DEFAULT_QUIETNESS 1
-
-#define assert(condition, exit_status, ...)    \
-     if (!(condition)) {                       \
-         _fatal(__FILE__, __LINE__, __func__,  \
-                (exit_status),  __VA_ARGS__);  \
-     }
-
-/* Include in tests whose expected outcome is a specific signal. */
-#define expect_signal(signal)                          \
-     struct sigaction _act;                            \
-     _act.sa_handler = expected_signal_handler;                \
-     sigemptyset(&_act.sa_mask);                       \
-     _act.sa_flags = 0;                                        \
-     assert(sigaction((signal), &_act, NULL) == 0, 1,  \
-           "sigaction() error: %s.", strerror(errno));
-
-#define run_suite(set_up, tests, tear_down, ...)               \
-     _run_suite((sizeof(tests)/sizeof(tests[0])),              \
-               (set_up), (tests), (tear_down), __VA_ARGS__)    
-
-typedef unit_test_t UnitTests[];
-
-void _fatal(const char *file, int line, const char *function,
-           int exit_status, const char *format, ...)
-     __printflike(5, 6);
-void _run_suite(int numoftests, test_fn_t set_up, UnitTests tests,
-               test_fn_t tear_down, const char *format, ...)
-     __printflike(5, 6);
-void logv(char *format, ...) __printflike(1, 2);
-
-void _fatal(const char *file, int line, const char *function,
-           int exit_status, const char *format, ...)
-{
-     va_list ap;
-     
-     va_start(ap, format);
-     vprintf(format, ap);
-     printf("\n");
-     printf("Assert failed in file %s, function %s(), line %d.\n",
-           file, function, line);
-     va_end(ap);
-     exit(exit_status);
-}
-void _run_suite(int numoftests, test_fn_t set_up, UnitTests tests,
-               test_fn_t tear_down, const char *format, ...)
-{
-     va_list ap;
-     char *name;
-     
-     va_start(ap, format);
-     die(vasprintf(&name, format, ap) == -1, "vasprintf()");
-     va_end(ap);
-     suite_t *suite = create_suite(name, numoftests, set_up, tests,
-                                  tear_down);
-     log_suite_info(suite);
-     log_suite_results(suite, count_passed_suite_tests(suite));
-     free(name);
-     destroy_suite(suite);
-}
-
-/* Signal handler for tests expected to terminate with a specific
- * signal. */
-void expected_signal_handler(int signo)
-{
-     write(1,"Child process received expected signal.\n",
-          strlen("Child process received expected signal.\n"));
-     _Exit(0);
-}
-
-/* Setters and getters for various test framework global
- * variables. Should only be used outside of the test, set up and tear
- * down functions. */
-
-/* Time out constraint for running a single test. */
-void set_timeout(unsigned int time)
-{
-     _timeout = time;
-}
-
-unsigned int get_timeout()
-{
-     return _timeout;
-}
-
-/* Expected signal for a test, default is 0. */
-void set_expected_signal(int signal)
-{
-     _expected_signal = signal;
-}
-
-int get_expected_signal()
-{
-     return _expected_signal;
-}
-
-/* Logging verbosity. */
-void set_quietness(int value)
-{
-     _quietness = value;
-}
-
-int get_quietness()
-{
-     return _quietness;
-}
-
-/* For fixture set up and tear down functions, and units tests. */
-void do_nothing() {
-}
-
-/* Verbose (default) logging. */
-void logv(char *format, ...)
-{
-     if (get_quietness() <= 0) {
-         va_list ap;
-         
-         va_start(ap, format);
-         vprintf(format, ap);
-         va_end(ap);
-         die_on_stdout_error();
-     }
-}
-
-void log_aggregated_results()
-{
-     printf("[SUMMARY] Aggregated Test Results\n");
-     printf("Total: %ju\n", results.numoftests);
-     printf("Passed: %ju\n", results.passed_tests);
-     printf("Failed: %ju\n\n", results.numoftests
-           - results.passed_tests);
-     die_on_stdout_error();
-}
-
-/*******************************/
-/*******************************/
-/* pipes buffer  unit  testing */
-/*******************************/
-/*******************************/
-
-static const char progname[] = "pipes_unitester";
-
-static void die_on_error(int condition, const char *culprit)
-{
-     assert(!condition, 1, "%s: %s error: %s.", progname, culprit,
-           strerror(errno));
-}
-
-  
-/*******************************/
-/* Usage and option processing */
-/*******************************/
-
-static void usage(int exit_status)
-{
-     printf("Usage : %s\n", progname);
-     exit(exit_status);
-} 
-
-static void die_on_invalid_value(int condition,
-                                const char *value_string)
-{
-     if (condition) {
-         printf("%s: invalid value: %s.\n", progname, value_string);
-         usage(1);
-     }
-}
-
-/* Convert a storage unit suffix into an exponent. */
-static int strtoexp(const char *string)
-{
-     if (string[0] == '\0') {
-         return 0;
-     }
-     
-     char first_letter =  toupper(string[0]);
-     char prefixes[] = "BKMGTPE";
-     const int numofprefixes = strlen(prefixes);
-     prefixes[numofprefixes] = first_letter;
-     int i = 0;
-
-     while (prefixes[i] != first_letter) {
-         i++;
-     }
-     die_on_invalid_value(i >= numofprefixes || (string[1] != '\0' &&
-                                                (toupper(string[1])
-                                                 != 'B' || string[2]
-                                                 != '\0')), string);
-     return 10 * i;
-}
-
-static void process_options(int argc, char *argv[])
-{
-     int opt;
-     char *endptr;
-  
-     setvbuf(stdout, NULL, _IONBF, 0);
-
-     set_timeout(DEFAULT_TIMEOUT);
-     set_quietness(DEFAULT_QUIETNESS);
-     
-     while ((opt = getopt(argc, argv, "t:vqh")) != -1) {
-         switch (opt) {
-         case 't': 
-              errno = 0;
-              set_timeout(strtoul(optarg, &endptr, 0));
-              die_on_invalid_value(errno == ERANGE || *endptr != '\0'
-                                   || endptr == optarg, optarg);
-              break;
-         case 'q':
-              set_quietness(get_quietness() + 1);
-              break;
-         case 'v':
-              set_quietness(0);
-              break;
-         case 'h':
-              usage(0);
-              break;
-         default:
-              usage(1);
-              break;
-         }
-     }
-}
-
-/*********************************/
-/* Various function declarations */
-/*********************************/
-
-void initialize_data(int *ptr, int len);
-
-int verify_data(int *base, int *target, int len);
-
-void clear_data(int *ptr, int len);
-
-/*******************************/
-/* Arrays for test suite loops */
-/*******************************/
-
-#define BUFMAX 20000
-#define BUFMAXLEN (BUFMAX * sizeof(int))
-
-const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096,8192,16384};
-static const int bufsizes[] = { 128, 512, 1024, 2048, 4096, 16384  };
-
-int data[BUFMAX],readbuf[BUFMAX];
-int pipefd[2] = {0,0};
-
-typedef int * pipe_t;
-
-struct thread_work_data {
-       pipe_t p;
-       unsigned int total_bytes;
-       unsigned int chunk_size;
-};
-
-void * reader_thread(void *ptr);
-void * writer_thread(void *ptr);
-
-dispatch_semaphore_t r_sem, w_sem;
-
-unsigned long current_buf_size=0;
-
-/*************************************/
-/* Global variables set up functions */
-/*************************************/
-
-
-void initialize_data(int *ptr, int len)
-{
-        int i;
-        if (!ptr || len <=0 )
-                return;
-
-        for (i = 0; i < len; i ++)
-                ptr[i] = i;
-}
-
-void clear_data(int *ptr, int len)
-{
-
-        int i;
-        if (!ptr)
-                return;
-        for (i = 0; i < len; i++)
-                ptr[i]=0;
-}
-
-int verify_data(int *base, int *target, int len)
-{
-        int i = 0;
-        
-        if (!base || !target)
-                return 0;
-        
-        for (i = 0; i < len; i++){
-                if (base[i] != target[i])
-                        return 0;
-        }
-
-        return 1;
-}
-
-void initialize_data_buffer()
-{
-       initialize_data(data, BUFMAX);
-       initialize_data(readbuf, BUFMAX);
-}
-
-/*******************************/
-/* core read write helper funtions */
-/*******************************/
-
-ssize_t read_whole_buffer(pipe_t p, void *scratch_buf, int size);
-ssize_t pipe_read_data(pipe_t p, void *dest_buf, int size);
-ssize_t pipe_write_data(pipe_t p, void *src_buf, int size);
-
-ssize_t read_whole_buffer(pipe_t p, void *scratch_buf, int size)
-{
-       int fd = p[0];
-       logv("reading whole buffer from fd %d, size %d", fd, size);
-       int retval = pread(fd, scratch_buf, size, 0);
-       if (retval == -1 ){
-               logv("Error reading whole buffer. (%d) %s\n",errno, strerror(errno));
-       }
-       return retval;
-
-}
-
-ssize_t pipe_read_data(pipe_t p, void *dest_buf, int size)
-{
-       int fd = p[0];
-       //logv("reading from pipe %d, for size %d", fd, size);
-       int retval = read(fd, dest_buf, size);
-       if (retval == -1) {
-               logv("Error reading from buffer. (%d)",errno);  
-       }
-       return retval;
-}
-
-ssize_t pipe_write_data(pipe_t p, void *src_buf, int size)
-{
-       int fd = p[1];
-       //logv("writing to pipe %d, for size %d", fd, size);
-       int retval = write(fd, src_buf, size);
-       if (retval == -1) {
-               logv("Error writing to buffer. (%d) %s",errno, strerror(errno));        
-       }
-       return retval;
-}
-
-
-void * reader_thread(void *ptr)
-{
-       struct thread_work_data *m;
-       m = (struct thread_work_data *) ptr;
-       int i = m->total_bytes/m->chunk_size;
-       int retval, data_idx=0;
-       while (i > 0){
-               dispatch_semaphore_wait(r_sem, 8000);
-               retval = pipe_read_data(m->p, &readbuf[data_idx], m->chunk_size);
-               assert(retval == m->chunk_size, 1, "Pipe read returned different amount of numbe");
-               data_idx +=m->chunk_size;
-               //logv("RD %d \n", m->chunk_size);
-               dispatch_semaphore_signal(w_sem);
-               i--;
-       }
-       return 0;
-}
-
-void * writer_thread(void *ptr)
-{
-       struct thread_work_data *m;
-       m = (struct thread_work_data *)ptr;
-       int i = m->total_bytes/m->chunk_size;
-       int retval, data_idx=0;
-       while ( i > 0 ){
-
-               dispatch_semaphore_wait(w_sem, 8000);
-               //logv("WR %d \n", m->chunk_size);
-               retval=pipe_write_data(m->p, &data[data_idx], m->chunk_size);
-                assert(retval == m->chunk_size, 1, "Pipe write failed");
-               data_idx +=m->chunk_size;
-               dispatch_semaphore_signal(r_sem);
-               i--;
-       }
-       return 0;
-}
-
-
-void create_threads(struct thread_work_data *rdata, struct thread_work_data *wdata){
-
-       pthread_t thread1, thread2;
-       r_sem = dispatch_semaphore_create(0);
-       w_sem = dispatch_semaphore_create(1);
-       int iret1, iret2;
-       void * thread_ret1 =0;
-       void * thread_ret2 =0;
-       /* Create independent threads each of which will execute function */
-
-       iret1 = pthread_create( &thread1, NULL, reader_thread, (void*) rdata);
-       iret2 = pthread_create( &thread2, NULL, writer_thread, (void*) wdata);
-
-       pthread_join( thread2, &thread_ret1);
-       pthread_join( thread1, &thread_ret1);
-       assert(thread_ret1 == 0, 1, "Reader Thread Failed");
-       assert(thread_ret2 == 0, 1, "Writer Thread Failed");
-}
-
-
-/*******************************/
-/* Pipes unit test functions   */
-/*******************************/
-void test_pipebuffer_setup ()
-{
-
-       logv("Setting up buffers data and readbuf\n");
-       clear_data(data, BUFMAX);
-       clear_data(readbuf, BUFMAX);
-       logv("Initializing buffers data and readbuf\n");
-       initialize_data(data, BUFMAX);
-       initialize_data(readbuf, BUFMAX);
-       logv("verifying data for correctness\n");
-       die_on_error(!verify_data(data, readbuf, BUFMAX), "data initialization");
-       clear_data(readbuf, BUFMAX);
-}
-
-void test_pipe_create(){
-       int pipefds[2] = {0,0};
-       pipe_t p = pipefds;
-       int err = pipe(p);
-       if ( err ){
-               logv("error opening pipes (%d) %s", errno, strerror(errno));
-               return;
-       }
-
-       die_on_error(0 != close(pipefds[0]), "close()");
-       die_on_error(0 != close(pipefds[1]), "close()");
-}
-
-void test_pipe_write_single_byte(){
-       int pipefds[2] = { 0 , 0 };
-       pipe_t p = pipefds;
-       die_on_error( 0 != pipe(p), "pipe()");
-       initialize_data_buffer();
-       int i = 0,retval;
-       for ( ; i < current_buf_size; i++){
-               if ( i > 16384){
-                       logv("cannot fill continuously beyond 16K.");
-                       break;
-               }
-               retval=pipe_write_data(p, &data[i], 1);
-               assert(retval == 1, 1, "Pipe write failed");
-       }
-
-       close(p[0]);
-       close(p[1]);
-}
-
-void test_pipe_single_read_write(){
-       int pipefds[2] = { 0 , 0 };
-        pipe_t p = pipefds;
-        die_on_error( 0 != pipe(p), "pipe()");
-        initialize_data_buffer();
-       struct thread_work_data d = { p, current_buf_size, 1};
-       create_threads(&d, &d);
-        verify_data(data, readbuf, current_buf_size);
-        close(p[0]);
-        close(p[1]);
-
-}
-
-void test_pipe_single_read_2write(){
-       int pipefds[2] = { 0 , 0 };
-        pipe_t p = pipefds;
-        die_on_error( 0 != pipe(p), "pipe()");
-        initialize_data_buffer();
-       struct thread_work_data rd = { p, current_buf_size, 1};
-       struct thread_work_data wd = { p, current_buf_size, 2};
-       create_threads(&rd, &wd);
-        verify_data(data, readbuf, current_buf_size);
-        close(p[0]);
-        close(p[1]);
-
-}
-
-void test_pipe_expansion_buffer(){
-       int pipefds[2] = { 0 , 0 };
-       int iter = 0;
-        pipe_t p = pipefds;
-        die_on_error( 0 != pipe(p), "pipe()");
-        initialize_data_buffer();
-       for ( iter=0; iter < sizeof(pipesize_blocks)/sizeof(unsigned int); iter++){
-               assert(pipesize_blocks[iter] == pipe_write_data(p, &data[0], pipesize_blocks[iter] ), 1, "expansion write failed");
-               assert(pipesize_blocks[iter] == pipe_read_data(p, &readbuf[0], pipesize_blocks[iter]+200), 1, "reading from expanded data failed");
-       /*      logv("finished round for size %u \n", pipesize_blocks[iter]); */
-       }
-        verify_data(data, readbuf, current_buf_size);
-        close(p[0]);
-        close(p[1]);
-
-}
-
-void test_pipe_initial_big_allocation(){
-        int pipefds[2] = { 0 , 0 };
-        int iter = 0;
-        pipe_t p = pipefds;
-        die_on_error( 0 != pipe(p), "pipe()");
-        initialize_data_buffer();
-        assert(current_buf_size == pipe_write_data(p, &data[0], current_buf_size ), 1, "initial big allocation failed");
-        assert(current_buf_size == pipe_read_data(p, &readbuf[0], current_buf_size+200), 1, "reading from initial big write failed");
-        assert(verify_data(data, readbuf, current_buf_size), 1, "big pipe initial allocation -not able to verify data");
-        close(p[0]);
-        close(p[1]);
-
-}
-
-void test_pipe_cycle_small_writes(){
-        int pipefds[2] = { 0 , 0 };
-        int iter = 0;
-        pipe_t p = pipefds;
-        die_on_error( 0 != pipe(p), "pipe()");
-        initialize_data_buffer();
-       int buf_size = current_buf_size / 2;
-        
-       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
-        assert(buf_size == pipe_read_data(p, &readbuf[0], buf_size+200), 1, "reading from cycle read failed");
-        assert(verify_data(data, readbuf, buf_size), 1, "data verification failed");
-        
-       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
-        assert(buf_size == pipe_read_data(p, &readbuf[0], buf_size+200), 1, "reading from cycle read failed");
-        assert(verify_data(data, readbuf, buf_size), 1, "data verification failed");
-        
-       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
-        assert(buf_size == pipe_read_data(p, &readbuf[0], buf_size+200), 1, "reading from cycle read failed");
-        assert(verify_data(data, readbuf, buf_size), 1, "data verification failed");
-        
-       close(p[0]);
-        close(p[1]);
-
-}
-void test_pipe_moving_data(){
-        int pipefds[2] = { 0 , 0 };
-        int iter = 0;
-        pipe_t p = pipefds;
-        die_on_error( 0 != pipe(p), "pipe()");
-        initialize_data_buffer();
-       int buf_size = current_buf_size / 2;
-       if (buf_size > PAGE_SIZE)
-               buf_size = PAGE_SIZE;
-        
-       assert(buf_size == pipe_write_data(p, &data[0], buf_size ), 1, "cycle  write failed");
-        logv("write of size =%d\n", buf_size);
-       assert(buf_size == pipe_write_data(p, &data[buf_size/sizeof(int)], buf_size ), 1, "cycle  write failed");
-        logv("write of size =%d\n", buf_size*2);
-       assert(buf_size == pipe_write_data(p, &data[(buf_size*2)/sizeof(int)], buf_size ), 1, "cycle  write failed");
-        logv("write of size =%d\n", buf_size*3);
-        assert((3*buf_size) == pipe_read_data(p, &readbuf[0], (3*buf_size)+200), 1, "reading from cycle read failed");
-        assert(verify_data(data, readbuf, (3*buf_size)/sizeof(int)), 1, "data verification failed");
-        
-       close(p[0]);
-        close(p[1]);
-
-}
-    
-
-/*************/
-/* pipe Suites */
-/*************/
-
-void run_pipe_basic_tests()
-{
-     int sizes_idx;
-     int numofsizes = sizeof(bufsizes)/sizeof(int);
-
-     logv("running tests for %d different sizes \n", numofsizes);
-
-     UnitTests pipe_basic_tests = {
-         { "1. create buffer and verify both reads/writes are valid",
-           test_pipebuffer_setup },
-         { "2. open and close pipes", test_pipe_create },
-         { "3. single byte write to full", test_pipe_write_single_byte},
-         { "4. single byte read/write in sync", test_pipe_single_read_write},
-         { "5. single byte read/2write in sync", test_pipe_single_read_2write},
-         { "6. expansion from existing size", test_pipe_expansion_buffer},
-         { "7. initial big allocation " , test_pipe_initial_big_allocation},
-         { "8. cycle_small_writes " ,test_pipe_cycle_small_writes },
-         { "9. test moving data " ,test_pipe_moving_data }
-     };
-  for (sizes_idx = 0; sizes_idx < numofsizes; sizes_idx++) {
-       current_buf_size = bufsizes[sizes_idx];
-       run_suite(do_nothing,
-                pipe_basic_tests,
-                do_nothing, "pipe create base test "
-                "Size: 0x%jx (%ju)",
-                (uintmax_t)bufsizes[sizes_idx],
-                (uintmax_t)bufsizes[sizes_idx]);
-  }
-}
-
-
-int pipes_test(void *the_argp)
-{
-     set_quietness(2);
-     run_pipe_basic_tests();
-     //log_aggregated_results();
-     return results.numoftests - results.passed_tests;
-}
-
-/*
- * retaining the old main function to debug issues with the tests and not the xnu_quick_test framework
- * or the system
- */
-int main_nonuse(int argc, char *argv[])
-{
-     process_options(argc, argv);
-     
-     run_pipe_basic_tests();
-     
-     log_aggregated_results();
-     return 0;
-}
diff --git a/tools/tests/xnu_quick_test/sched_tests.c b/tools/tests/xnu_quick_test/sched_tests.c
deleted file mode 100644 (file)
index 6dd23bf..0000000
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- *  sched_tests.c
- *  xnu_quick_test
- *
- *  Copyright 2011 Apple Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <mach/mach.h>
-#include <mach/mach_time.h>
-#include <mach/semaphore.h>
-#include <unistd.h>
-#include <err.h>
-#include <sys/param.h>
-#include <pthread.h>
-
-#define DEBUG 0
-
-#if DEBUG
-#define dprintf(...) printf(__VA_ARGS__)
-#else
-#define dprintf(...) do { } while(0)
-#endif
-
-static uint64_t
-nanos_to_abs(uint64_t ns, uint32_t numer, uint32_t denom)
-{
-       return (uint64_t)(ns * (((double)denom) / ((double)numer)));
-}
-
-static void set_realtime(void) {
-       struct mach_timebase_info mti;
-       thread_time_constraint_policy_data_t pol;
-       kern_return_t kret;
-
-       kret = mach_timebase_info(&mti);
-       if (kret != KERN_SUCCESS) {
-               warnx("Could not get timebase info %d", kret);
-               return;
-       }
-
-       /* 1s 100ms 10ms */
-       pol.period      = nanos_to_abs(1000000000, mti.numer, mti.denom);
-       pol.constraint  = nanos_to_abs(100000000,  mti.numer, mti.denom);
-       pol.computation = nanos_to_abs(10000000,   mti.numer, mti.denom);
-       pol.preemptible = 0; /* Ignored by OS */
-
-       kret = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
-       if (kret != KERN_SUCCESS) {
-               warnx("Failed to set realtime %d", kret);
-       }
-}
-
-struct t1_ctx {
-       pthread_t __p;
-       int currentThread;
-       int totalThreads;
-       boolean_t useRealtime;
-       semaphore_t wait_to_start;
-       semaphore_t next_waiter;
-
-       semaphore_t common_sema; /* main thing everyone blocks on */
-       uint64_t wakeup_time; /* out parameter */
-};
-
-void *t1(void *arg) {
-       struct t1_ctx *ctx = (struct t1_ctx *)arg;
-       kern_return_t kret;
-
-       dprintf("thread %d (pthread %p) started\n", ctx->currentThread, pthread_self());
-
-       /* Wait to allow previous thread to block on common semaphore */
-       kret = semaphore_wait(ctx->wait_to_start);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_wait(wait_to_start) thread %d failed %d",
-                         ctx->currentThread, kret);
-       }
-
-       sleep(1);
-
-       if (ctx->useRealtime) {
-               dprintf("thread %d going realtime\n", ctx->currentThread);
-               set_realtime();
-       }
-
-       kret = semaphore_signal(ctx->next_waiter);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_signal(next_waiter) thread %d failed %d",
-                         ctx->currentThread, kret);
-       }
-
-       /*
-        * We have 1 second to block on the common semaphore before
-        * the next thread does.
-        */
-       dprintf("thread %d blocking on common semaphore\n", ctx->currentThread);
-
-       kret = semaphore_wait(ctx->common_sema);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_wait(common_sema) thread %d failed %d",
-                         ctx->currentThread, kret);
-       }
-
-       /* Save our time for analysis */
-       ctx->wakeup_time = mach_absolute_time();
-       dprintf("thread %d woke up at %llu\n", ctx->currentThread, ctx->wakeup_time);
-
-       kret = semaphore_signal(ctx->common_sema);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_signal(common_sema) thread %d failed %d",
-                         ctx->currentThread, kret);
-       }
-
-       return NULL;
-}
-          
-
-
-
-int sched_tests( void * the_argp )
-{
-       kern_return_t kret;
-       int ret;
-       int i;
-       semaphore_t common_sema;
-       semaphore_t all_checked_in;
-       
-       struct t1_ctx ctxs[3];
-       
-       /*
-        * Test 8979062. Ensure that a realtime thread that
-        * blocks on a semaphore after a non-realtime thread
-        * gets woken up first.
-        */
-
-       kret = semaphore_create(mach_task_self(), &common_sema, SYNC_POLICY_FIFO /* not really, in this case */, 0);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_create failed: %d", kret);
-               return -1;
-       }
-
-       kret = semaphore_create(mach_task_self(), &all_checked_in, SYNC_POLICY_FIFO, 0);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_create failed: %d", kret);
-               return -1;
-       }
-
-       memset(&ctxs, 0x00, sizeof(ctxs));
-       for (i=0; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
-               ctxs[i].__p = NULL; /* set later */
-               ctxs[i].currentThread = i;
-               ctxs[i].totalThreads = sizeof(ctxs)/sizeof(ctxs[0]);
-               ctxs[i].useRealtime = FALSE;
-
-               kret = semaphore_create(mach_task_self(), &ctxs[i].wait_to_start, SYNC_POLICY_FIFO /* not really, in this case */, 0);
-               if (kret != KERN_SUCCESS) {
-                       warnx("semaphore_create failed: %d", kret);
-                       return -1;
-               }
-               ctxs[i].next_waiter = MACH_PORT_NULL; /* set later */
-               ctxs[i].common_sema = common_sema;
-               ctxs[i].wakeup_time = 0;
-       }
-
-       ctxs[1].useRealtime = TRUE;
-
-       for (i=1; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
-               ctxs[i-1].next_waiter = ctxs[i].wait_to_start;
-       }
-       ctxs[i-1].next_waiter = all_checked_in;
-
-
-       for (i=0; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
-               ret = pthread_create(&ctxs[i].__p, NULL, t1, &ctxs[i]);
-               if (ret != 0) {
-                       warn("pthread_create failed");
-                       return -1;
-               }
-       }
-
-       /* wake up first thread */
-       kret = semaphore_signal(ctxs[0].wait_to_start);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_signal(initial wait_to_start) failed %d", kret);
-               return -1;
-       }
-
-       /* Wait for everyone to have blocked */
-       kret = semaphore_wait(all_checked_in);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_wait(all_checked_in) failed %d", kret);
-               return -1;
-       }
-
-       /* Give some slack for last guy */
-       sleep(1);
-
-       kret = semaphore_signal(common_sema);
-       if (kret != KERN_SUCCESS) {
-               warnx("semaphore_signal(initial common_sema) failed %d", kret);
-               return -1;
-       }
-
-       for (i=0; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
-               ret = pthread_join(ctxs[i].__p, NULL);
-               if (ret != 0) {
-                       warn("pthread_join failed");
-                       return -1;
-               }
-       }
-
-       dprintf("All threads joined\n");
-
-       /*
-        * Our expectation is that thread 1 was realtime and
-        * finished first, followed by 0 and then 2
-        */
-       if ((ctxs[1].wakeup_time < ctxs[0].wakeup_time)
-               && (ctxs[0].wakeup_time < ctxs[2].wakeup_time)) {
-               /* success */
-       } else {
-               warnx("Threads woken out of order %llu %llu %llu",
-                         ctxs[0].wakeup_time, ctxs[1].wakeup_time,
-                         ctxs[2].wakeup_time);
-               return -1;
-       }
-
-       return 0;
-}
-
diff --git a/tools/tests/xnu_quick_test/sema_tests.c b/tools/tests/xnu_quick_test/sema_tests.c
deleted file mode 100644 (file)
index 6c5bc80..0000000
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- *  sema_tests.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 6/2/2005.
- *  Copyright 2005 Apple Computer Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <sys/sem.h> 
-#include <semaphore.h>
-
-/*  **************************************************************************************************************
- *     Test semctl, semget, semop system calls.
- *  **************************************************************************************************************
- */
-int sema_tests( void * the_argp ) 
-{
-       int                             my_err, i;
-       int                             my_sem_id = -1;
-       union semun             my_sem_union;
-       struct sembuf   my_sembuf;
-
-       srand( (unsigned int)getpid() );
-       my_sem_id = semget( (key_t)1234, 1, (0666 | IPC_CREAT) );
-       if ( my_sem_id == -1 ) {
-               printf( "semget failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_sem_union.val = 1;
-       my_err = semctl( my_sem_id, 0, SETVAL, my_sem_union );
-       if ( my_sem_id == -1 ) {
-               printf( "semget failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* verify semop failure for bad nsop values */
-       my_err = semop( my_sem_id, &my_sembuf, 10000);
-       if (my_err != -1 || errno != E2BIG) {
-               printf( "semop did not fail with E2BIG - instead %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-
-       }
-
-       for ( i = 0; i < 10000; i++ ) {
-               my_sembuf.sem_num = 0;
-               my_sembuf.sem_op  = -1;
-               my_sembuf.sem_flg = SEM_UNDO;
-       
-               my_err = semop( my_sem_id, &my_sembuf, 1 );
-               if ( my_err == -1 ) {
-                       printf( "semop failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               my_err = semctl( my_sem_id, 0, GETVAL, 0 );
-               if ( my_err == -1 ) {
-                       printf( "semctl failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-               if ( my_err != 0 ) {
-                       printf( "semctl(getval) returned %d. it should be 0 (locked) here \n", my_err );
-                       goto test_failed_exit;
-        }
-
-               my_sembuf.sem_num = 0;
-               my_sembuf.sem_op  = 1;
-               my_sembuf.sem_flg = SEM_UNDO;
-               
-               my_err = semop( my_sem_id, &my_sembuf, 1 );
-               if ( my_err == -1 ) {
-                       printf( "semop failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-       }
-
-       my_err = semctl( my_sem_id, 0, IPC_RMID, my_sem_union );
-       if ( my_err == -1 ) {
-               printf( "semctl (IPC_RMID) failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_sem_id = -1;
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_sem_id != -1 ) {
-               semctl( my_sem_id, 0, IPC_RMID, my_sem_union );
-       }
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait system calls.
- *  **************************************************************************************************************
- */
-int sema2_tests( void * the_argp ) 
-{
-       int                             my_err;
-       sem_t *                 my_sem_t = (sem_t *)SEM_FAILED;
-       char                    my_sema_name[ 64 ];
-       
-       /* get a semaphore (initialized as locked) */
-       sprintf( &my_sema_name[0], "sema_testing_%d", getpid( ) );
-       my_sem_t = sem_open( &my_sema_name[0], (O_CREAT | O_EXCL), (S_IRUSR | S_IWUSR), 0 );
-       if ( my_sem_t == (sem_t*)SEM_FAILED ) {
-               printf( "sem_open failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* get the lock - should return EAGAIN (EWOULDBLOCK) */
-       my_err = sem_trywait( my_sem_t );
-       if ( my_err == -1 ) {
-               my_err = errno;
-               if ( my_err != EAGAIN ) {
-                       printf( "sem_trywait failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-                       goto test_failed_exit;
-               } 
-       }
-
-       /* unlock our semaphore */
-       my_err = sem_post( my_sem_t );
-       if ( my_err == -1 ) {
-               printf( "sem_post failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* get the lock again */
-       my_err = sem_wait( my_sem_t );
-       if ( my_err == -1 ) {
-               printf( "sem_wait failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = sem_unlink( &my_sema_name[0] );
-       if ( my_err == -1 ) {
-               printf( "sem_unlink failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = sem_close( my_sem_t );
-       if ( my_err == -1 ) {
-               printf( "sem_close failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_sem_t = (sem_t *)SEM_FAILED;
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_sem_t != (sem_t *)SEM_FAILED ) {
-               sem_close( my_sem_t );
-       }
-       return( my_err );
-}
diff --git a/tools/tests/xnu_quick_test/shared_memory_tests.c b/tools/tests/xnu_quick_test/shared_memory_tests.c
deleted file mode 100644 (file)
index 876e7c6..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- *  shared_memory_tests.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 6/2/2005.
- *  Copyright 2005 Apple Computer Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <sys/ipc.h>
-#include <sys/mman.h>
-#include <sys/shm.h>
-
-extern char  g_target_path[ PATH_MAX ];
-
-
-/*  **************************************************************************************************************
- *     Test shmat, shmctl, shmdt, shmget system calls.
- *  **************************************************************************************************************
- */
-int shm_tests( void * the_argp )
-{      
-       int                                     my_err;
-       int                                     my_shm_id;
-       void *                          my_shm_addr = NULL;
-       struct shmid_ds         my_shmid_ds;
-
-       my_shm_id = shmget( IPC_PRIVATE, 4096, (IPC_CREAT | IPC_R | IPC_W) );
-       if ( my_shm_id == -1 ) {
-               printf( "shmget failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_shm_addr = shmat( my_shm_id, NULL, SHM_RND );
-       if ( my_shm_addr == (void *) -1 ) {
-               my_shm_addr = NULL;
-               printf( "shmat failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* try writing to the shared segment */
-       *((char *) my_shm_addr) = 'A';
-
-       my_err = shmctl( my_shm_id, IPC_STAT, &my_shmid_ds );
-       if ( my_err == -1 ) {
-               printf( "shmctl failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_shmid_ds.shm_segsz != 4096 ) {
-               printf( "shmctl failed get correct shared segment size \n" );
-               goto test_failed_exit;
-       }
-       if ( getpid( ) != my_shmid_ds.shm_cpid ) {
-               printf( "shmctl failed get correct creator pid \n" );
-               goto test_failed_exit;
-       }
-
-       if (my_shmid_ds.shm_internal != (void *) 0){
-               /*
-                * The shm_internal field is a pointer reserved for kernel
-                * use only.  It should not be leaked to user space.
-                * (PR-15642873)
-                */
-               printf( "shmctl failed to sanitize kernel internal pointer \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = shmdt( my_shm_addr );
-       if ( my_err == -1 ) {
-               printf( "shmdt failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = shmctl( my_shm_id, IPC_RMID, NULL );
-       if ( my_err == -1 ) {
-               printf("shmctl failed to delete memory segment.\n");
-               goto test_failed_exit;
-       }
-       
-       my_shm_addr = NULL;
-        
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_shm_addr != NULL ) {
-               shmdt( my_shm_addr );
-               shmctl( my_shm_id, IPC_RMID, NULL);
-       }
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test BSD shared memory system calls.
- *  **************************************************************************************************************
- */
-int bsd_shm_tests( void * the_argp )
-{
-       int                     my_err, i;
-       int                     my_fd = -1;
-       char *          my_addr = NULL;
-       char            my_name[ 64 ];
-
-       for ( i = 0; i < 100; i++ ) {
-               sprintf( &my_name[0], "bsd_shm_tests_%d", i );
-               my_fd = shm_open( &my_name[0], (O_RDWR | O_CREAT | O_EXCL), S_IRWXU );
-               if ( my_fd != -1 ) 
-                       break;
-               my_err = errno;
-               if ( my_err != EEXIST ) {
-                       printf( "shm_open failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-                       goto test_failed_exit;
-               }
-       }
-       if ( my_fd == -1 ) {
-               printf( "shm_open failed to open a shared memory object with name \"%s\" \n", &my_name[0] );
-               goto test_failed_exit;
-       }
-       
-       /* grow shared memory object */
-       my_err = ftruncate( my_fd, 4096 );              
-       if ( my_err == -1 ) {
-               printf( "ftruncate call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_err = shm_unlink( &my_name[0] );
-       if ( my_err == -1 ) {
-               printf( "shm_unlink failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* unlinking a non existent path */
-       my_err = shm_unlink ( "/tmp/anonexistent_shm_oject" );
-       my_err = errno;
-       if ( my_err != ENOENT ) {
-               printf( "shm_unlink of non existent path failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_addr = (char *) mmap( NULL, 4096, (PROT_READ | PROT_WRITE), (MAP_FILE | MAP_SHARED), my_fd, 0 );
-       if ( my_addr == (char *) -1 ) {
-               printf( "mmap call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       return( my_err );
-}
-
diff --git a/tools/tests/xnu_quick_test/socket_tests.c b/tools/tests/xnu_quick_test/socket_tests.c
deleted file mode 100644 (file)
index 306592c..0000000
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- *  socket_tests.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 4/12/05.
- *  Copyright 2005 Apple Computer Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <poll.h>
-#include <mach/mach.h>
-
-extern char  g_target_path[ PATH_MAX ];
-
-/*  **************************************************************************************************************
- *     Test accept, bind, connect, listen, socket, recvmsg, sendmsg, recvfrom, sendto, getpeername, getsockname
- *  system calls.
- *  WARNING - I don't do networking - this should get a good look from a networking stud.
- *  **************************************************************************************************************
- */
-int socket_tests( void * the_argp )
-{
-       int                             my_err, my_status, my_len;
-       pid_t                   my_pid, my_wait_pid;
-       int                             my_socket_fd = -1;
-       int                             my_accepted_socket = -1;
-       char *                  my_parent_pathp = NULL;
-       char *                  my_child_pathp = NULL;
-       socklen_t               my_accept_len;
-       struct sockaddr *my_sockaddr;
-       ssize_t                 my_result;
-       off_t                   my_current_offset;
-       char                    my_parent_socket_name[sizeof(struct sockaddr) + 64];
-       char                    my_child_socket_name[sizeof(struct sockaddr) + 64];
-       char                    my_accept_buffer[sizeof(struct sockaddr) + 64];
-       kern_return_t           my_kr;
-
-       /* generate 2 names for binding to the sockets (one socket in the parent and one in the child) */
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_parent_pathp, 128, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_child_pathp, 128, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-       }
-
-       *my_parent_pathp = 0x00;
-       strcat( my_parent_pathp, "/tmp/" );
-
-       /* get a unique name for our testing */
-       my_err = create_random_name( my_parent_pathp, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       strcpy( my_child_pathp, my_parent_pathp );
-       strcat( my_parent_pathp, "p" ); /* append 'p' to mean "parent" */
-       strcat( my_child_pathp, "c" ); /* append 'c' to mean "child" */
-
-       memset( &my_parent_socket_name[0], 0, sizeof(my_parent_socket_name) );
-       memset( &my_child_socket_name[0], 0, sizeof(my_child_socket_name) );
-
-       /* use unique names we generated in /tmp/  */
-       my_sockaddr = (struct sockaddr *) &my_parent_socket_name[0];
-       my_len = sizeof(*my_sockaddr) - sizeof(my_sockaddr->sa_data) + strlen(my_parent_pathp);
-       my_sockaddr->sa_len = my_len;
-       my_sockaddr->sa_family = AF_UNIX;
-       strcpy( &my_sockaddr->sa_data[0], my_parent_pathp );
-
-       my_sockaddr = (struct sockaddr *) &my_child_socket_name[0];
-       my_len = sizeof(*my_sockaddr) - sizeof(my_sockaddr->sa_data) + strlen(my_child_pathp);
-       my_sockaddr->sa_len = my_len;
-       my_sockaddr->sa_family = AF_UNIX;
-       strcpy( &my_sockaddr->sa_data[0], my_child_pathp );
-
-       /* set up socket for parent side */
-       my_socket_fd = socket( AF_UNIX, SOCK_STREAM, 0 );
-       if ( my_socket_fd == -1 ) {
-               printf( "socket call in parent failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_sockaddr = (struct sockaddr *) &my_parent_socket_name[0];
-       my_err = bind( my_socket_fd, my_sockaddr, my_sockaddr->sa_len );
-       if ( my_err == -1 ) {
-               printf( "bind call in child failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* test getsockname */
-       my_sockaddr = (struct sockaddr *) &my_accept_buffer[0];
-       my_accept_len = sizeof(my_accept_buffer);
-       my_err = getsockname( my_socket_fd, my_sockaddr, &my_accept_len );
-       if ( my_err == -1 ) {
-               printf( "getsockname call in child failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_sockaddr->sa_family != SOCK_STREAM ) {
-               printf( "getsockname test failed - did not get correct socket name data \n" );
-               goto test_failed_exit;
-       }
-       
-       /* make sure we can't seek on a socket */
-       my_current_offset = lseek( my_socket_fd, 0, SEEK_CUR );
-       if ( my_current_offset != -1 ) {
-               printf( "lseek on socket should fail but did not \n" );
-               goto test_failed_exit;
-       }
-
-       /*
-        * spin off a child process that we communicate with via sockets.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process - open a socket and use it to talk to our parent.
-                */
-               int                                     my_child_fd = -1;
-               struct msghdr           my_msghdr;
-               struct iovec            my_iov[4];
-               char                            my_buffer[128];
-
-               my_child_fd = socket( AF_UNIX, SOCK_STREAM, 0 );
-               if ( my_child_fd == -1 ) {
-                       printf( "socket call in child failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-               my_sockaddr = (struct sockaddr *) &my_child_socket_name[0];
-               my_err = bind( my_child_fd, my_sockaddr, my_sockaddr->sa_len );
-               if ( my_err == -1 ) {
-                       close( my_child_fd );
-                       printf( "bind call in child failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               sleep(2);
-
-               /* connect to socket in our parent */
-               my_sockaddr = (struct sockaddr *) &my_parent_socket_name[0];
-               my_err = connect( my_child_fd, my_sockaddr, my_sockaddr->sa_len );
-               if ( my_err == -1 ) {
-                       close( my_child_fd );
-                       printf( "connect call in child failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-       /* get some data from the child via socket and test socket peer data */
-       {
-               socklen_t                       my_buffer_len;
-               struct sockaddr *       my_sockaddr;
-               char                            my_parent_buffer[256];
-
-               my_sockaddr = (struct sockaddr *) &my_parent_buffer[0];
-               my_buffer_len = sizeof(my_parent_buffer);
-               my_err = getpeername( my_child_fd, my_sockaddr, &my_buffer_len );
-               if ( my_err == -1 ) {
-                       printf( "getpeername call in parent failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               /* test results - should be sa_family == SOCK_STREAM and name should match my_child_pathp */
-               if ( my_sockaddr->sa_family != SOCK_STREAM ) {
-                       printf( "getpeername test failed - did not get correct peer data \n" );
-                       goto test_failed_exit;
-               }
-       }
-
-               my_buffer[0] = 'j';
-               my_iov[0].iov_base = &my_buffer[0];
-               my_iov[0].iov_len = 1;
-               
-               my_sockaddr = (struct sockaddr *) &my_parent_socket_name[0];
-               my_msghdr.msg_name = my_sockaddr;
-               my_msghdr.msg_namelen = my_sockaddr->sa_len;
-               my_msghdr.msg_iov = &my_iov[0];
-               my_msghdr.msg_iovlen = 1;
-               my_msghdr.msg_control = NULL;
-               my_msghdr.msg_controllen = 0;
-               my_msghdr.msg_flags = 0;
-
-               my_result = sendmsg( my_child_fd, &my_msghdr, 0 );
-               if ( my_result == -1 ) {
-                       printf( "sendmsg failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       close( my_child_fd );
-                       exit( -1 );
-               }
-               
-#if 1
-               /* get data from our parent */
-               my_result = recvfrom( my_child_fd, &my_buffer[0], 1, 
-                                                         MSG_WAITALL, NULL, NULL );
-               if ( my_result == -1 ) {
-                       printf( "recvfrom failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       close( my_child_fd );
-                       exit( -1 );
-               }
-               
-               /* verify that we got the correct message from our child */
-               if ( my_buffer[0] != 'e' ) {
-                       printf( "test failed - did not get correct data from child \n" );
-                       close( my_child_fd );
-                       exit( -1 );
-               }
-#endif
-               
-               /* sendfile test. Open libsystem, set up some headers, and send it */
-               struct sf_hdtr          my_sf_hdtr;
-               int                                     my_libsys_fd;
-               off_t                           my_libsys_len;
-
-               my_libsys_fd = open("/usr/lib/libSystem.dylib", O_RDONLY, 0644);
-               if (my_libsys_fd < 0) {
-                       printf( "test failed - could not open /usr/lib/libSystem.dylib\n" );
-                        close ( my_child_fd );
-                       exit ( -1 );
-               }
-
-               my_libsys_len = 7+2; /* 2 bytes of header */
-               my_buffer[0] = 's';
-               my_iov[0].iov_base = &my_buffer[0];
-               my_iov[0].iov_len = 1;
-               my_buffer[1] = 'e';
-               my_iov[1].iov_base = &my_buffer[1];
-               my_iov[1].iov_len = 1;
-               my_buffer[2] = 'n';
-               my_iov[2].iov_base = &my_buffer[2];
-               my_iov[2].iov_len = 1;
-               my_buffer[3] = 'd';
-               my_iov[3].iov_base = &my_buffer[3];
-               my_iov[3].iov_len = 1;
-
-               my_sf_hdtr.headers = &my_iov[0];
-               my_sf_hdtr.hdr_cnt = 2;
-               my_sf_hdtr.trailers = &my_iov[2];
-               my_sf_hdtr.trl_cnt = 2;
-                       
-               my_result = sendfile(my_libsys_fd, my_child_fd, 3, &my_libsys_len, &my_sf_hdtr, 0);
-               if (my_result < 0 || my_libsys_len != 11) {
-                       printf( "sendfile failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       close( my_child_fd );
-                       exit( -1 );
-               }
-
-               my_result = close ( my_libsys_fd );
-               if ( my_libsys_fd < 0 ) {
-                       printf ( "close failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       close ( my_child_fd );
-                       exit ( -1 );
-               }
-
-               /* tell parent we're done */
-               my_result = write( my_child_fd, "all done", 8 );
-               if ( my_result == -1 ) {
-                       close( my_child_fd );
-                       exit( -1 );
-               }
-
-               close( my_child_fd );
-               exit(0);
-       }
-       
-       /* 
-        * parent process - listen for connection requests
-        */
-       my_err = listen( my_socket_fd, 10 );
-       if ( my_err == -1 ) {
-               printf( "listen call in parent failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* accept connection from child */
-       my_sockaddr = (struct sockaddr *) &my_accept_buffer[0];
-       my_accepted_socket = accept( my_socket_fd, my_sockaddr, &my_accept_len );
-       if ( my_accepted_socket == -1 ) {
-               printf( "accept call in parent failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }        
-                       
-       /* get some data from the child via socket and test socket peer data */
-       {
-               //socklen_t                     my_buffer_len;
-               struct msghdr           my_msghdr;
-               struct iovec            my_iov;
-               char                            my_parent_buffer[128];
-
-               my_parent_buffer[0] = 'x';
-               my_iov.iov_base = &my_parent_buffer[0];
-               my_iov.iov_len = 1;
-               
-               my_msghdr.msg_name = &my_accept_buffer[0];
-               my_msghdr.msg_namelen = my_accept_len;
-               my_msghdr.msg_iov = &my_iov;
-               my_msghdr.msg_iovlen = 1;
-               my_msghdr.msg_control = NULL;
-               my_msghdr.msg_controllen = 0;
-               my_msghdr.msg_flags = 0;
-               
-               my_result = recvmsg( my_accepted_socket, &my_msghdr, MSG_WAITALL );
-               if ( my_result == -1 ) {
-                       printf( "recvmsg failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-               
-               /* verify that we got the correct message from our child */
-               if ( my_parent_buffer[0] != 'j' ) {
-                       printf( "test failed - did not get correct data from child \n" );
-                       goto test_failed_exit;
-               }
-
-#if 1
-               /* now send some data to our child */
-               my_parent_buffer[0] = 'e';
-               my_sockaddr = (struct sockaddr *) &my_child_socket_name[0];
-               my_result = sendto( my_accepted_socket, &my_parent_buffer[0], 1, 0, my_sockaddr, 
-                                                       my_sockaddr->sa_len );
-               if ( my_result == -1 ) {
-                       printf( "sendto failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-#endif
-
-               size_t neededBytes = 11;
-                       
-               /* Check for sendfile output */
-               bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) );
-               while (neededBytes > 0) {
-                       my_result = read( my_accepted_socket, &my_parent_buffer[11-neededBytes], neededBytes );
-                       if ( my_result == -1 ) {
-                               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                               goto test_failed_exit;
-                       } else if (my_result == 0) {
-                               break;
-                       }
-                       neededBytes -= my_result;
-               }
-                       
-               if ( neededBytes > 0 ) {
-                       printf( "read call returned %ld bytes instead of 11\n", 11 - neededBytes );
-                       goto test_failed_exit;
-               }
-
-               if ( ! (my_parent_buffer[0] == 's' && my_parent_buffer[1] == 'e' && my_parent_buffer[9] == 'n' && my_parent_buffer[10] == 'd') ) {
-                       printf( "read wrong sendfile message from child \n" );
-                       goto test_failed_exit;
-               }
-               
-
-               /* see if child is done. bzero so that string is NUL terminated */
-               bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) );
-               my_result = read( my_accepted_socket, &my_parent_buffer[0], sizeof(my_parent_buffer) );
-               if ( my_result == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-               if ( strcmp( "all done", &my_parent_buffer[0] ) != 0 ) {
-                       printf( "read wrong message from child \n" );
-                       goto test_failed_exit;
-               }
-       }
-
-       /* wait for child to exit */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               goto test_failed_exit;
-       }
-                       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_socket_fd != -1 )
-               close( my_socket_fd );
-       if ( my_accepted_socket != -1 )
-               close( my_accepted_socket );
-       if ( my_parent_pathp != NULL ) {
-               remove( my_parent_pathp );      
-               vm_deallocate(mach_task_self(), (vm_address_t)my_parent_pathp, 128);
-        }
-       if ( my_child_pathp != NULL ) {
-               remove( my_child_pathp );       
-               vm_deallocate(mach_task_self(), (vm_address_t)my_child_pathp, 128);
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test fsync, getsockopt, poll, select, setsockopt, socketpair system calls.
- *  **************************************************************************************************************
- */
-int socket2_tests( void * the_argp )
-{
-       int                                     my_err, my_status;
-       int                                     my_sockets[ 2 ] = {-1, -1};
-       pid_t                           my_pid, my_wait_pid;
-       ssize_t                         my_count;
-       socklen_t                       my_socklen;
-       struct timeval *        my_tvp;
-       struct timeval          my_orig_tv;
-       char                            my_buffer[ 32 ];
-
-       my_err = socketpair( AF_UNIX, SOCK_STREAM, 0, &my_sockets[0] );
-       if ( my_err == -1 ) {
-               printf( "socketpair failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* test getsockopt and setsockopt */
-       my_socklen = sizeof( my_buffer );
-       my_err = getsockopt( my_sockets[0], SOL_SOCKET, SO_TYPE, &my_buffer[0], &my_socklen);
-       if ( my_err == -1 ) {
-               printf( "getsockopt - SO_TYPE - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( SOCK_STREAM != *((int *)&my_buffer[0]) ) {
-               printf( "getsockopt returned incorrect socket type \n" );
-               goto test_failed_exit;
-       }
-
-       /* get and set receive timeout */
-       my_socklen = sizeof( my_buffer );
-       my_err = getsockopt( my_sockets[0], SOL_SOCKET, SO_RCVTIMEO, &my_buffer[0], &my_socklen);
-       if ( my_err == -1 ) {
-               printf( "getsockopt - SO_RCVTIMEO - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_tvp = (struct timeval *) &my_buffer[0];
-       my_orig_tv.tv_sec = my_tvp->tv_sec;
-       my_orig_tv.tv_usec = my_tvp->tv_usec;
-       my_tvp->tv_sec += 60;
-       my_err = setsockopt( my_sockets[0], SOL_SOCKET, SO_RCVTIMEO, &my_buffer[0], sizeof(struct timeval) );
-       if ( my_err == -1 ) {
-               printf( "setsockopt - SO_RCVTIMEO - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* verify we set it */
-       my_socklen = sizeof( my_buffer );
-       my_err = getsockopt( my_sockets[0], SOL_SOCKET, SO_RCVTIMEO, &my_buffer[0], &my_socklen);
-       if ( my_err == -1 ) {
-               printf( "getsockopt - SO_RCVTIMEO - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_tvp = (struct timeval *) &my_buffer[0];
-       if ( my_tvp->tv_sec != (my_orig_tv.tv_sec + 60) || my_tvp->tv_usec != my_orig_tv.tv_usec ) {
-               printf( "setsockopt - SO_RCVTIMEO - did not set correct timeval \n" );
-               goto test_failed_exit;
-       }
-       
-       /* set back to original receive timeout */
-       my_err = setsockopt( my_sockets[0], SOL_SOCKET, SO_RCVTIMEO, &my_orig_tv, sizeof(struct timeval) );
-       if ( my_err == -1 ) {
-               printf( "setsockopt - SO_RCVTIMEO - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* test fsync - should fail when used with a socket fd */
-       errno = 0;
-       my_err = fsync( my_sockets[0] );
-       if ( my_err == -1 && errno != ENOTSUP ) {
-               printf( "fsync failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       else if ( my_err != -1 ) {
-               printf( "fsync should have failed with errno ENOTSUP \n" );
-               goto test_failed_exit;
-       }
-        
-       /*
-        * spin off a child process that we will talk to via our socketpair.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process - tell parent we are ready to go.
-                */
-               char                    my_buffer[ 32 ];
-               struct pollfd   my_pollfd;
-
-               my_count = write( my_sockets[1], "r", 1 );
-               if ( my_count == -1 ) {
-                       printf( "write call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               
-               /* test select by using it to wait for message from parent */
-               for ( ;; ) {
-                       fd_set                  my_read_set;
-                       struct timeval  my_timeout;
-                       
-                       FD_ZERO( &my_read_set );
-                       FD_SET( my_sockets[1], &my_read_set );
-                       timerclear( &my_timeout );
-                       my_timeout.tv_sec = 1;
-                       
-                       /* check to see if we are done, if no message is ready after a second
-                        * return and try again... 
-                        */
-                       my_err = select( (my_sockets[1] + 1), &my_read_set, NULL, NULL, &my_timeout );
-                       if ( my_err == -1 ) {
-                               printf( "select call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                               exit( -1 );
-                       }
-                       else if ( my_err > 0 ) {
-                               /* we're done */
-                               break;
-                       }
-               }
-               
-               /* test poll too */
-               my_pollfd.fd = my_sockets[1];
-               my_pollfd.events = (POLLIN | POLLPRI);
-               my_pollfd.revents = 0;
-               my_err = poll( &my_pollfd, 1, 500 );
-               if ( my_err == -1 ) {
-                       printf( "poll call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               /* should be ready for read */
-               if ( (my_pollfd.revents & POLLIN) == 0 ) {
-                       printf( "poll should have returned ready for read \n" );
-                       exit( -1 );
-               }
-               
-               my_count = read( my_sockets[1], &my_buffer[0], sizeof(my_buffer) );
-               if ( my_count == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( my_buffer[0] != 'd' ) {
-                       printf( "read call on socket failed to get \"all done\" message \n" );
-                       exit( -1 );
-               }
-        
-               exit(0);
-       }
-       
-       /* 
-        * parent process - wait for child to spin up
-        */
-       my_count = read( my_sockets[0], &my_buffer[0], sizeof(my_buffer) );
-       if ( my_count == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_buffer[0] != 'r' ) {
-               printf( "read call on socket failed to get \"ready to go message\" \n" );
-               goto test_failed_exit;
-       }
-
-       /* tell child we're done */
-       write( my_sockets[0], "d", 1 );
-
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               printf( "wait4 returned wrong exit status - 0x%02X \n", my_status );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_sockets[0] != -1 )
-               close( my_sockets[0] );
-       if ( my_sockets[1] != -1 )
-               close( my_sockets[1] );
-       return( my_err );
-}
-
diff --git a/tools/tests/xnu_quick_test/tests.c b/tools/tests/xnu_quick_test/tests.c
deleted file mode 100644 (file)
index 8d1e8f4..0000000
+++ /dev/null
@@ -1,6915 +0,0 @@
-/*
- *  tests.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 3/25/05.
- *  Copyright 2008 Apple Inc. All rights reserved.
- *
- */
-
-#include "tests.h"
-#include <sys/ipc.h>           /* for message queue tests */
-#include <sys/msg.h>           /* for message queue tests */
-#include <sys/syscall.h>       /* for get / settid */
-#include <sys/sysctl.h>                /* for determining hw */
-#include <sys/kas_info.h>      /* for kas_info() */
-#include <AvailabilityMacros.h>        /* for determination of Mac OS X version (tiger, leopard, etc.) */
-#include <libkern/OSByteOrder.h> /* for OSSwap32() */
-#include <mach/mach.h>
-
-
-extern char            g_target_path[ PATH_MAX ];
-extern int             g_skip_setuid_tests;
-extern int             g_is_single_user;
-
-
-void print_acct_debug_strings( char * my_ac_comm );
-
-
-#if TEST_SYSTEM_CALLS /* system calls to do */
-       "reboot",             /* 55 = reboot */
-       "revoke",             /* 56 = revoke */
-       "sbrk",               /* 69 = sbrk */
-       "sstk",               /* 70 = sstk */
-       "mount",              /* 167 = mount */
-       "unmount",            /* 159 = unmount */
-       "undelete",           /* 205 = undelete */
-       "watchevent",         /* 231 = watchevent */
-       "waitevent",          /* 232 = waitevent */
-       "modwatch",           /* 233 = modwatch */
-       "fsctl",              /* 242 = fsctl */
-       "initgroups",         /* 243 = initgroups */
-       "semsys",             /* 251 = semsys */
-       "semconfig",          /* 257 = semconfig */
-       "msgsys",             /* 252 = msgsys */
-       "shmsys",             /* 253 = shmsys */
-       "load_shared_file",   /* 296 = load_shared_file */
-       "reset_shared_file",  /* 297 = reset_shared_file */
-       "new_system_shared_regions",  /* 298 = new_system_shared_regions */
-       "shared_region_map_file_np",  /* 299 = shared_region_map_file_np */
-       "shared_region_make_private_np",  /* 300 = shared_region_make_private_np */
-       "__pthread_kill",     /* 328 = __pthread_kill */
-       "pthread_sigmask",    /* 329 = pthread_sigmask */
-       "__disable_threadsignal",  /* 331 = __disable_threadsignal */
-       "__pthread_markcancel",  /* 332 = __pthread_markcancel */
-       "__pthread_canceled",  /* 333 = __pthread_canceled */
-       "__semwait_signal",   /* 334 = __semwait_signal */
-       "audit",              /* 350 = audit */
-       "auditon",            /* 351 = auditon */
-       "getaudit",           /* 355 = getaudit */
-       "setaudit",           /* 356 = setaudit */
-       "getaudit_addr",      /* 357 = getaudit_addr */
-       "setaudit_addr",      /* 358 = setaudit_addr */
-       "auditctl",           /* 359 = auditctl */
-#endif
-
-/*  **************************************************************************************************************
- *     Test the syscall system call.
- *  **************************************************************************************************************
- */
-int syscall_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_fd = -1;
-       char *                  my_pathp;
-       kern_return_t           my_kr;
-       
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcpy( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       /* use an indirect system call to open our test file.
-        * I picked open since it uses a path pointer which grows to 64 bits in an LP64 environment.
-        */
-       my_fd = syscall( SYS_open, my_pathp, (O_RDWR | O_EXCL), 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );     
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test fork wait4, and exit system calls.
- *  **************************************************************************************************************
- */
-int fork_wait4_exit_test( void * the_argp )
-{
-       int                             my_err, my_status;
-    pid_t                      my_pid, my_wait_pid;
-       struct rusage   my_usage;
-
-       /* spin off another process */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               return( -1 );
-       }
-       else if ( my_pid == 0 ) {
-               struct stat             my_sb;
-               
-               /* child process does very little then exits */
-               my_err = stat( &g_target_path[0], &my_sb );
-               if ( my_err != 0 ) {
-                       printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       printf( "\t path we stated \"%s\" \n", &g_target_path[0] );
-                       exit( -1 );
-               }
-               exit( 44 );
-       }
-       
-       /* parent process waits for child to exit */
-       my_wait_pid = wait4( my_pid, &my_status, 0, &my_usage );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               return( -1 );
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               return( -1 );
-       }
-
-       /* kind of just guessing on these values so if this fails we should take a closer 
-        * look at the returned rusage structure. 
-        */
-       if ( my_usage.ru_utime.tv_sec > 1 || my_usage.ru_stime.tv_sec > 1 ||
-                my_usage.ru_majflt > 1000 || my_usage.ru_msgsnd > 100 ) {
-               printf( "wait4 returned an odd looking rusage structure \n" );
-               return( -1 );
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) == 44 ) {
-       }
-       else {
-               printf( "wait4 returned wrong exit status - 0x%02X \n", my_status );
-               return( -1 );
-       }
-        
-       return( 0 );
-}
-
-/*  **************************************************************************************************************
- *     Test fsync, ftruncate, lseek, pread, pwrite, read, readv, truncate, write, writev system calls.
- *  **************************************************************************************************************
- */
-int read_write_test( void * the_argp )
-{
-       int                     my_fd = -1;
-       int                     my_err;
-       char *                  my_pathp = NULL;
-       char *                  my_bufp = NULL;
-       ssize_t                 my_result;
-       off_t                   my_current_offset;
-       struct iovec            my_iovs[2];
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, MY_BUFFER_SIZE, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       my_fd = open( my_pathp, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-       /* should get EOF since the file is empty at this point */
-       my_result = read( my_fd, my_bufp, 10);
-       if ( my_result == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 0 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "read call failed - should have read 0 bytes on empty file - read %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "read call failed - should have read 0 bytes on empty file - read %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-
-       /* this write should fail since we opened for read only */
-       my_result = write( my_fd, my_bufp, 10 );
-       my_err = errno;
-       if ( my_result != -1 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write should have failed for read only fd -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write should have failed for read only fd -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-       if ( my_err != EBADF ) {
-               printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "should have failed with EBADF error %d \n", EBADF );
-               goto test_failed_exit;
-       }
-       
-       /* now really write some data */
-       close( my_fd );
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       memset( my_bufp, 'j', MY_BUFFER_SIZE );
-       my_result = write( my_fd, my_bufp, MY_BUFFER_SIZE );
-       if ( my_result == -1 ) {
-               printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != MY_BUFFER_SIZE ) {
-               printf( "write failed to write out all the data \n" );
-               goto test_failed_exit;
-       }
-       
-       /* push data to disk */
-       my_err = fsync( my_fd );
-       if ( my_err == -1 ) {
-               printf( "fsync failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* now verify the write worked OK using readv */
-       lseek( my_fd, 0, SEEK_SET );    
-       bzero( (void *)my_bufp, MY_BUFFER_SIZE );
-       my_iovs[0].iov_base = my_bufp;
-       my_iovs[0].iov_len = 16;
-       my_iovs[1].iov_base = (my_bufp + MY_BUFFER_SIZE - 16) ;
-       my_iovs[1].iov_len = 16;
-
-       my_result = readv( my_fd, &my_iovs[0], 2 );
-       if ( my_result == -1 ) {
-               printf( "readv call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 32 ) {
-               printf( "readv failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, (int) my_result );
-               goto test_failed_exit;
-       }
-       if ( *my_bufp != 'j' || *(my_bufp + (MY_BUFFER_SIZE - 1)) != 'j' ) {
-               printf( "readv failed to get correct data \n" );
-               goto test_failed_exit;
-       }
-
-       /* test ftruncate */
-       my_err = ftruncate( my_fd, 0 );         
-       if ( my_err == -1 ) {
-               printf( "ftruncate call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_err = fstat( my_fd, &my_sb );        
-       if ( my_err == -1 ) {
-               printf( "fstat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_size != 0 ) {
-               printf( "ftruncate call failed - file size is wrong \n" );
-               goto test_failed_exit;
-       }
-       
-       /* test writev */
-       lseek( my_fd, 0, SEEK_SET );    
-       memset( my_bufp, 'z', MY_BUFFER_SIZE );
-       my_iovs[0].iov_base = my_bufp;
-       my_iovs[0].iov_len = 8;
-       my_iovs[1].iov_base = (my_bufp + MY_BUFFER_SIZE - 8) ;
-       my_iovs[1].iov_len = 8;
-       my_result = writev( my_fd, &my_iovs[0], 2 );
-       if ( my_result == -1 ) {
-               printf( "writev call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 16 ) {
-               printf( "writev failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, (int) my_result );
-               goto test_failed_exit;
-       }
-
-       /* now verify the writev worked OK */
-       lseek( my_fd, 0, SEEK_SET );    
-       bzero( (void *)my_bufp, MY_BUFFER_SIZE );
-       my_iovs[0].iov_base = my_bufp;
-       my_iovs[0].iov_len = 8;
-       my_iovs[1].iov_base = (my_bufp + MY_BUFFER_SIZE - 8) ;
-       my_iovs[1].iov_len = 8;
-
-       my_result = readv( my_fd, &my_iovs[0], 2 );
-       if ( my_result == -1 ) {
-               printf( "readv call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 16 ) {
-               printf( "readv failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, (int) my_result );
-               goto test_failed_exit;
-       }
-       if ( *my_bufp != 'z' || *(my_bufp + (MY_BUFFER_SIZE - 1)) != 'z' ) {
-               printf( "readv failed to get correct data \n" );
-               goto test_failed_exit;
-       }
-
-       /* test pread and pwrite */
-       my_current_offset = lseek( my_fd, 0, SEEK_CUR );
-       if ( my_current_offset == -1 ) {
-               printf( "lseek call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-        
-       my_result =  pwrite( my_fd, "jer", 3, my_current_offset );
-       if ( my_result == -1 ) {
-               printf( "pwrite call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 3 ) {
-               printf( "pwrite failed to write all the data \n" );
-               goto test_failed_exit;
-       }
-       
-       /* make sure file position did not advance */
-       if ( my_current_offset != lseek( my_fd, 0, SEEK_CUR ) ) {
-               printf( "pwrite advanced file positiion \n" );
-               goto test_failed_exit;
-       }
-        
-       bzero( (void *)my_bufp, MY_BUFFER_SIZE );
-       my_result =  pread( my_fd, my_bufp, 3, my_current_offset );
-       if ( my_result == -1 ) {
-               printf( "pread call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 3 ) {
-               printf( "pread failed to write all the data \n" );
-               goto test_failed_exit;
-       }
-
-       /* make sure file position did not advance */
-       if ( my_current_offset != lseek( my_fd, 0, SEEK_CUR ) ) {
-               printf( "pread advanced file positiion \n" );
-               goto test_failed_exit;
-       }
-       
-       /* make sure pread and pwrite transferred correct data */
-       if ( strcmp( my_bufp, "jer" ) != 0 ) {
-               printf( "pread or pwrite failed to read / write correct data \n" );
-               goto test_failed_exit;
-       }
-
-       /* test truncate */
-       my_err = truncate( my_pathp, 0 );               
-       if ( my_err == -1 ) {
-               printf( "truncate call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_err = stat( my_pathp, &my_sb );      
-       if ( my_err == -1 ) {
-               printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_size != 0 ) {
-               printf( "truncate call failed - file size is wrong \n" );
-               goto test_failed_exit;
-       }
-                               
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       if ( my_bufp != NULL )
-               vm_deallocate(mach_task_self(), (vm_address_t)my_bufp, MY_BUFFER_SIZE);
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test close, fpathconf, fstat, open, pathconf system calls.
- *  **************************************************************************************************************
- */
-int open_close_test( void * the_argp )
-{
-       int             my_err;
-       int             my_fd = -1;
-       char *          my_pathp = NULL;
-       ssize_t         my_result;
-       long            my_pconf_result;
-       struct stat     my_sb;
-       char            my_buffer[32];
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /*  test O_WRONLY case */
-       my_fd = open( my_pathp, O_WRONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-       /* test pathconf and fpathconf */
-       my_pconf_result = pathconf( my_pathp, _PC_PATH_MAX );
-       if ( my_pconf_result == -1 ) {
-               printf( "pathconf - _PC_PATH_MAX - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }       
-//     printf( "_PC_PATH_MAX %ld \n", my_pconf_result );
-       /* results look OK? */
-       if ( my_pconf_result < PATH_MAX ) {
-               printf( "pathconf - _PC_PATH_MAX - looks like wrong results \n" );
-               goto test_failed_exit;
-       } 
-
-       my_pconf_result = fpathconf( my_fd, _PC_NAME_MAX );
-       if ( my_pconf_result == -1 ) {
-               printf( "fpathconf - _PC_PATH_MAX - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }       
-//     printf( "_PC_NAME_MAX %ld \n", my_pconf_result );
-       /* results look OK? */
-       if ( my_pconf_result < 6 ) {
-               printf( "fpathconf - _PC_NAME_MAX - looks like wrong results \n" );
-               goto test_failed_exit;
-       } 
-
-       /* write some data then try to read it */
-       my_result = write( my_fd, "kat", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-       
-       /* Try to read - this should fail since we opened file with O_WRONLY */
-       my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       my_err = errno;
-       if ( my_result != -1 ) {
-               printf( "read call should have failed with errno 9 (EBADF) \n" );
-               goto test_failed_exit;
-       }
-       else if ( my_err != EBADF ) {
-               printf( "read call should have failed with errno 9 (EBADF).  actually failed with %d - \"%s\" \n", my_err, strerror( my_err) );
-               goto test_failed_exit;
-       }
-
-       close( my_fd );
-
-       /*  test O_TRUNC and O_APPEND case */
-       my_fd = open( my_pathp, (O_RDWR | O_TRUNC | O_APPEND), 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-       my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_result == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 0 ) {
-               printf( "read failed - should have read 0 bytes. \n" );
-               goto test_failed_exit;
-       }
-
-       my_result = write( my_fd, "kat", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-
-       /* add some more data to the test file - this should be appended */
-       lseek( my_fd, 0, SEEK_SET );
-       my_result = write( my_fd, "zzz", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-                       
-       /* now verify the writes */
-       bzero( (void *)&my_buffer[0], sizeof(my_buffer) );
-       lseek( my_fd, 0, SEEK_SET );
-       my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_result == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_buffer[0] != 'k' || my_buffer[5] != 'z' ) {
-               printf( "read failed to get correct data \n" );
-               goto test_failed_exit;
-       }
-
-       /* test fstat */
-       my_err = fstat( my_fd, &my_sb );        
-       if ( my_err == -1 ) {
-               printf( "fstat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_size != 6 ) {
-               printf( "fstat call failed - st_size is wrong \n" );
-               goto test_failed_exit;
-       }
-       if ( !S_ISREG( my_sb.st_mode ) ) {
-               printf( "fstat call failed - st_mode does not indicate regular file \n" );
-               goto test_failed_exit;
-       }
-        
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test link, stat and unlink system calls.
- *  **************************************************************************************************************
- */
-int link_stat_unlink_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_fd = -1;
-       char *                  my_pathp = NULL;
-       char *                  my_path2p = NULL;
-       nlink_t                 my_link_count;
-       ssize_t                 my_result;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_path2p, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       *my_path2p = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* now create a name for the link file */
-       strcat( my_path2p, my_pathp );
-       strcat( my_path2p, "link" );
-       
-       /* get the current link count */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_link_count = my_sb.st_nlink;
-       
-       /* check file size (should be 0) */
-       if ( my_sb.st_size != 0 ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "st_size is not 0 \n" );
-               goto test_failed_exit;
-       }
-
-       /* change file size */
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       my_result = write( my_fd, "kat", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-       close( my_fd );
-       my_fd = -1;
-       
-       /* now link another file to our test file and recheck link count */
-       my_err = link( my_pathp, my_path2p );
-       if ( my_err != 0 ) {
-               printf( "link call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( (my_link_count + 1) != my_sb.st_nlink ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "incorrect st_nlink \n" );
-               goto test_failed_exit;
-       }
-       
-       /* check file size (should be 3) */
-       if ( my_sb.st_size != 3 ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "st_size is not 3 \n" );
-               goto test_failed_exit;
-       }
-       
-       /* now make sure unlink works OK */
-       my_err = unlink( my_path2p );
-       if ( my_err != 0 ) {
-               printf( "unlink call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_link_count != my_sb.st_nlink ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "incorrect st_nlink \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-       }
-       if ( my_path2p != NULL ) {
-               remove( my_path2p );    
-               vm_deallocate(mach_task_self(), (vm_address_t)my_path2p, PATH_MAX);
-       }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test chdir and fchdir system calls.
- *  **************************************************************************************************************
- */
-int chdir_fchdir_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_fd = -1;
-       char *                  my_pathp = NULL;
-       char *                  my_file_namep;
-       struct stat             my_sb;
-       struct stat             my_sb2;
-       kern_return_t           my_kr;
-
-       char *cwd = getwd(NULL);        /* Save current working directory so we can restore later */
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* test by doing a stat on the test file using a full path and a partial path.
-        * get full path first.
-        */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* now do the chdir to our test directory and then do the stat relative to that location */
-       my_err = chdir( &g_target_path[0] );
-       if ( my_err != 0 ) {
-               printf( "chdir call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_file_namep = strrchr( my_pathp, '/' );
-       my_file_namep++;
-       my_err = stat( my_file_namep, &my_sb2 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* both stat buffers should contain the same data since they should be referencing the same
-        * file.
-        */
-       if ( my_sb.st_ino != my_sb2.st_ino || my_sb.st_size != my_sb2.st_size ||
-                my_sb.st_mtimespec.tv_sec != my_sb2.st_mtimespec.tv_sec ||
-                my_sb.st_mtimespec.tv_nsec != my_sb2.st_mtimespec.tv_nsec  ) {
-               printf( "chdir call appears to have failed.  stat buffer contents do not match! \n" );
-               goto test_failed_exit;
-       }
-       
-       /* now change our current directory to "/" and use fchdir to get back to our test directory */
-       my_err = chdir( "/" );
-       if ( my_err != 0 ) {
-               printf( "chdir call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* we should not find our test file at the root of the volume */
-       my_err = stat( my_file_namep, &my_sb2 );
-       if ( my_err == 0 ) {
-               printf( "chdir to root volume has failed \n" );
-               goto test_failed_exit;
-       }
-
-       /* get a file descriptor to the test directory for use with fchdir */
-       my_fd = open( &g_target_path[0], O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] );
-               goto test_failed_exit;
-       }
-       
-       my_err = fchdir( my_fd );
-       if ( my_err == -1 ) {
-               printf( "fchdir call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       my_err = stat( my_file_namep, &my_sb2 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* both stat buffers should contain the same data since they should be referencing the same
-        * file.
-        */
-       if ( my_sb.st_ino != my_sb2.st_ino || my_sb.st_size != my_sb2.st_size ||
-                my_sb.st_mtimespec.tv_sec != my_sb2.st_mtimespec.tv_sec ||
-                my_sb.st_mtimespec.tv_nsec != my_sb2.st_mtimespec.tv_nsec  ) {
-               printf( "chdir call appears to have failed.  stat buffer contents do not match! \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       if ( chdir(cwd) != 0)   /* Changes back to original directory, don't screw up the env. */
-               my_err = -1;
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test access, chmod and fchmod system calls.
- *  **************************************************************************************************************
- */
-int access_chmod_fchmod_test( void * the_argp )
-{
-       int             error_occurred;
-       int             my_err;
-       int             my_fd = -1;
-
-       char *          my_pathp = NULL;
-
-       uid_t           ruid;
-       struct stat     my_sb;
-
-       FILE *          file_handle;
-
-       kern_return_t   my_kr;
-
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       
-       /* test chmod */
-       my_err = chmod( my_pathp, S_IRWXU );
-       if ( my_err == -1 ) {
-               printf( "chmod call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = chmod( my_pathp, (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) );
-       if ( my_err == -1 ) {
-               printf( "chmod call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* test access - this should fail */
-       my_err = access( my_pathp, (X_OK) );
-       if ( my_err == 0 ) {
-               printf( "access call should have failed, but did not. \n" );
-               goto test_failed_exit;
-       }
-       else if ( my_err == -1  ) {
-               int tmp = 0;
-               tmp = getuid( );
-               
-               /* special case when running as root - we get back EPERM when running as root */
-               my_err = errno;
-               if ( ( tmp == 0 && my_err != EPERM) || (tmp != 0 && my_err != EACCES) ) {
-                       printf( "access failed with errno %d - %s. \n", my_err, strerror( my_err ) );
-                       goto test_failed_exit;
-               }
-       }
-
-       /* verify correct modes are set */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       if ( (my_sb.st_mode & (S_IRWXO | S_IXGRP)) != 0 ||
-                (my_sb.st_mode & (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) == 0 ) {
-               printf( "chmod call appears to have failed.  stat shows incorrect values in st_mode! \n" );
-               goto test_failed_exit;
-       }
-       
-       
-       /*  another test for the access system call  -- refer ro radar# 6725311 */
-
-
-       /*
-        * This test makes sure that the access system call does not give the current user extra
-        * permissions on files the current user does not own. From radar #6725311, this could
-        * happen when the current user calls access() on a file owned by the current user in
-        * the same directory as the other files not owned by the current user.
-        * 
-        * Note: This test expects that the effective uid (euid) is set to root.
-        *
-        */
-
-       /* Create a file that root owns  */
-       file_handle = fopen(FILE_NOTME, "w");
-       fclose(file_handle);
-
-       /* Currently running as root (through settid manipulation), switch to running as the current user. */
-       ruid = getuid();
-       my_err = syscall(SYS_settid, ruid, KAUTH_GID_NONE);
-       if (my_err != 0) {
-               printf("Failed to settid to non-root with error %d:%s\n", errno, strerror(errno));
-               goto test_failed_exit;
-       }
-
-       /* Create a file that the current user owns  */
-       file_handle = fopen(FILE_ME, "w");
-       fclose(file_handle);
-
-       error_occurred = 0;
-
-       /* Try to remove the file owned by root (this should fail). */
-       my_err = unlink(FILE_NOTME);
-
-       if (my_err < 0) {
-               my_err = errno;
-       }
-
-       if (my_err == 0) {
-               printf("Unresolved: First attempt deleted '" FILE_NOTME "'! \n");
-               error_occurred = 1;
-       } else {
-               printf("Status: First attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-
-               /* Set _DELETE_OK on a file that the current user owns */
-               access(FILE_ME, _DELETE_OK);
-
-               /* Try to remove the file owned by root again (should give us: EPERM [13]) */
-               my_err = unlink(FILE_NOTME);
-
-               if (my_err < 0) {
-                   my_err = errno;
-               }
-
-               if (my_err == 0) {
-                       printf("Failed: Second attempt deleted '" FILE_NOTME "'!\n");
-                       error_occurred = 1;
-               } else if (my_err == 13) {
-                       printf("Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-               } else {
-                       printf("Failed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-                       error_occurred = 1;
-               }
-       }
-
-       /* Reset to running as root */
-       my_err = syscall(SYS_settid, KAUTH_UID_NONE, KAUTH_GID_NONE);
-       if (my_err != 0) {
-               printf("Failed to revert to root using settid with error %d:%s\n", errno, strerror(errno));
-               goto test_failed_exit;
-       }
-       if(error_occurred == 1) {
-               goto test_failed_exit;
-       }
-
-
-       /* end of test*/
-       
-       
-       /* test fchmod */
-       my_fd = open( my_pathp, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] );
-               goto test_failed_exit;
-       }
-
-       my_err = fchmod( my_fd, S_IRWXU );
-       if ( my_err == -1 ) {
-               printf( "fchmod call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* verify correct modes are set */
-       if ( (my_sb.st_mode & (S_IRWXG | S_IRWXO)) != 0 ||
-                (my_sb.st_mode & (S_IRWXU)) == 0 ) {
-               printf( "fchmod call appears to have failed.  stat shows incorrect values in st_mode! \n" );
-               goto test_failed_exit;
-       }
-               
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );     
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-        }
-       return( my_err );
-}
-
-static bool _prime_groups(void)
-{
-       /*
-        * prime groups with a known list to ensure consistent test behavior
-        */
-       
-       gid_t   my_exp_groups[] = { getegid(), 20, 61, 12 };
-       int             my_err;
-
-       my_err = setgroups( ( sizeof(my_exp_groups) / sizeof(*my_exp_groups) ), &my_exp_groups[0] );
-       if ( my_err == -1 ) {
-               printf( "initial setgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               return false;
-       }
-
-       return true;
-}
-
-/*  **************************************************************************************************************
- *     Test chown, fchown, lchown, lstat, readlink, symlink system calls.
- *  **************************************************************************************************************
- */
-int chown_fchown_lchown_lstat_symlink_test( void * the_argp )
-{
-       int                     my_err, my_group_count, i;
-       int                     my_fd = -1;
-       char *                  my_pathp = NULL;
-       char *                  my_link_pathp = NULL;
-       uid_t                   my_orig_uid;
-       gid_t                   my_orig_gid, my_new_gid1 = 0, my_new_gid2 = 0;
-       ssize_t                 my_result;
-       struct stat             my_sb;
-       gid_t                   my_groups[ NGROUPS_MAX ];
-       char                    my_buffer[ 64 ];
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_link_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_link_pathp = 0x00;
-       strcat( my_link_pathp, &g_target_path[0] );
-       strcat( my_link_pathp, "/" );
-
-       /* get a test file name for the link */
-       my_err = create_random_name( my_link_pathp, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       if ( !_prime_groups() ) {
-               goto test_failed_exit;
-       }
-       
-       /* set up by getting a list of groups */
-       my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] );
-       
-       if ( my_group_count == -1 || my_group_count < 1 ) {
-               printf( "getgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* now change group owner to something other than current value */
-       my_orig_gid = my_sb.st_gid;
-       my_orig_uid = my_sb.st_uid;
-       
-       for ( i = 0; i < my_group_count; i++ ) {
-               if ( my_orig_gid != my_groups[ i ] ) {
-                       if ( my_new_gid1 == 0 ) {
-                               my_new_gid1 = my_groups[ i ];
-                       }
-                       else if( my_new_gid1 != my_groups[ i ] ) {
-                               my_new_gid2 = my_groups[ i ];
-                               break;
-                       }
-               }
-       }
-       if ( i >= my_group_count ) {
-               printf( "not enough groups to choose from.  st_gid is the same as current groups! \n" );
-               goto test_failed_exit;
-       }
-               
-       my_err = chown( my_pathp, my_orig_uid, my_new_gid1 );
-       if ( my_err != 0 ) {
-               printf( "chown call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the group owner was changed */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_gid == my_orig_gid ) {
-               printf( "chown call failed.  st_gid is not correct! \n" );
-               goto test_failed_exit;
-       }
-       
-       /* change group owner back using fchown */
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] );
-               goto test_failed_exit;
-       }
-
-       my_err = fchown( my_fd, my_orig_uid, my_new_gid2 );
-       if ( my_err != 0 ) {
-               printf( "fchown call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the group owner was changed back to the original value */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_gid == my_new_gid1 ) {
-               printf( "fchown call failed.  st_gid is not correct! \n" );
-               goto test_failed_exit;
-       }
-
-       /* create a link file and test lchown */
-       my_err = symlink( my_pathp, my_link_pathp );
-       if ( my_err != 0 ) {
-               printf( "symlink call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-        
-       my_err = lstat( my_link_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "lstat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* now change group owner to something other than current value */
-       my_orig_gid = my_sb.st_gid;
-       my_orig_uid = my_sb.st_uid;
-       my_err = lchown( my_link_pathp, my_orig_uid, my_new_gid1 );
-       if ( my_err != 0 ) {
-               printf( "lchown call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the group owner was changed to new value */
-       my_err = lstat( my_link_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "lstat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_gid == my_new_gid2 ) {
-               printf( "lchown call failed.  st_gid is not correct! \n" );
-               goto test_failed_exit;
-       }
-
-       /* make sure we can read the symlink file */
-       my_result = readlink( my_link_pathp, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_result == -1 ) {
-               printf( "readlink call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       /* make sure we read some data */
-       if ( my_result < 1 ) {
-               printf( "readlink failed to read any data. \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       if ( my_link_pathp != NULL ) {
-               unlink( my_link_pathp );        
-               vm_deallocate(mach_task_self(), (vm_address_t)my_link_pathp, PATH_MAX);
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test fstatfs, getattrlist, getfsstat, statfs, getfsstat64, statfs64, fstatfs64 system calls.
- *  **************************************************************************************************************
- */
-#pragma pack(4)
-struct vol_attr_buf {
-       u_int32_t       length;
-       off_t           volume_size;
-       u_int32_t       io_blksize;
-};
-#pragma pack()
-typedef struct vol_attr_buf vol_attr_buf;
-
-#define STATFS_TEST_PATH       "/tmp"
-
-int fs_stat_tests( void * the_argp )
-{
-       int                     my_err, my_count, i;
-       int                     my_buffer_size, my_buffer64_size;
-       int                     my_fd = -1;
-       int                     is_ufs = 0;
-       long            my_io_size;
-       fsid_t          my_fsid;
-       struct attrlist         my_attrlist;
-       vol_attr_buf        my_attr_buf;
-       void *                          my_bufferp = NULL;
-       struct statfs *         my_statfsp;
-       kern_return_t       my_kr;
-
-       void * my_buffer64p = NULL;
-       struct statfs64 *       my_statfs64p;
-
-       my_buffer64_size = (sizeof(struct statfs64) * 10);
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(),(vm_address_t*) &my_buffer64p, my_buffer64_size, VM_FLAGS_ANYWHERE);
-       if(my_kr != KERN_SUCCESS){
-         printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-         goto test_failed_exit;
-       }
-
-       my_buffer_size = (sizeof(struct statfs) * 10);
-     
-       my_kr = vm_allocate((vm_map_t) mach_task_self(),(vm_address_t*) &my_bufferp, my_buffer_size, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       my_statfsp = (struct statfs *) my_bufferp;
-       my_err = statfs( STATFS_TEST_PATH, my_statfsp );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0 ) {
-               is_ufs = 1;
-       }
-       
-       my_count = getfsstat( (struct statfs *)my_bufferp, my_buffer_size, MNT_NOWAIT );
-       if ( my_count == -1 ) {
-               printf( "getfsstat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* validate results */
-       my_statfsp = (struct statfs *) my_bufferp;
-       for ( i = 0; i < my_count; i++, my_statfsp++ ) {
-               if ( memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 ||
-                        memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0 ||
-                        memcmp( &my_statfsp->f_fstypename[0], "devfs", 5 ) == 0 ||
-                        memcmp( &my_statfsp->f_fstypename[0], "volfs", 5 ) == 0 ) {
-                       /* found a valid entry */
-                       break;
-               }
-       }
-       if ( i >= my_count ) {
-               printf( "getfsstat call failed.  could not find valid f_fstypename! \n" );
-               goto test_failed_exit;
-       }
-
-       /* now try statfs64 */
-       my_statfs64p = (struct statfs64 *) my_buffer64p;
-       my_err = statfs64( STATFS_TEST_PATH, my_statfs64p );
-       if ( my_err == -1 ) {
-               printf( "statfs64 call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_statfs64p->f_fsid.val[0] != my_statfsp->f_fsid.val[0] ||
-                my_statfs64p->f_fsid.val[1] != my_statfsp->f_fsid.val[1] ) {
-               printf( "statfs64 call failed.  wrong f_fsid! \n" );
-               goto test_failed_exit;
-       }
-       
-       my_count = getfsstat64( (struct statfs64 *)my_buffer64p, my_buffer64_size, MNT_NOWAIT );
-       if ( my_count == -1 ) {
-               printf( "getfsstat64 call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* validate results */
-       my_statfs64p = (struct statfs64 *) my_buffer64p;
-       for ( i = 0; i < my_count; i++, my_statfs64p++ ) {
-               if ( memcmp( &my_statfs64p->f_fstypename[0], "hfs", 3 ) == 0 ||
-                        memcmp( &my_statfs64p->f_fstypename[0], "ufs", 3 ) == 0 ||
-                        memcmp( &my_statfs64p->f_fstypename[0], "devfs", 5 ) == 0 ||
-                        memcmp( &my_statfs64p->f_fstypename[0], "volfs", 5 ) == 0 ) {
-                       /* found a valid entry */
-                       break;
-               }
-       }
-       if ( i >= my_count ) {
-               printf( "getfsstat64 call failed.  could not find valid f_fstypename! \n" );
-               goto test_failed_exit;
-       }
-
-       /* set up to validate results via multiple sources.  we use getattrlist to get volume
-        * related attributes to verify against results from fstatfs and statfs - but only if
-        * we are not targeting ufs volume since it doesn't support getattr calls
-        */
-       if ( is_ufs == 0 ) {
-               memset( &my_attrlist, 0, sizeof(my_attrlist) );
-               my_attrlist.bitmapcount = ATTR_BIT_MAP_COUNT;
-               my_attrlist.volattr = (ATTR_VOL_SIZE | ATTR_VOL_IOBLOCKSIZE);
-               my_err = getattrlist( "/", &my_attrlist, &my_attr_buf, sizeof(my_attr_buf), 0 );
-               if ( my_err != 0 ) {
-                       printf( "getattrlist call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-       }
-       
-       /* open to use as test file for fstatfs */
-       my_fd = open( STATFS_TEST_PATH, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* testing fstatfs64 */
-       my_statfs64p = (struct statfs64 *) my_buffer64p;
-       my_err = fstatfs64( my_fd, my_statfs64p );
-       if ( my_err == -1 ) {
-               printf( "fstatfs64 call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* validate results - assumes we only boot from hfs or ufs */
-       if ( !(memcmp( &my_statfs64p->f_fstypename[0], "hfs", 3 ) == 0 ||
-                  memcmp( &my_statfs64p->f_fstypename[0], "ufs", 3 ) == 0) ) {
-               printf( "fstatfs64 call failed.  could not find valid f_fstypename! \n" );
-               goto test_failed_exit;
-       }
-       
-       /* testing fstatfs */
-       my_statfsp = (struct statfs *) my_bufferp;
-       my_err = fstatfs( my_fd, my_statfsp );
-       if ( my_err == -1 ) {
-               printf( "fstatfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* validate results */
-       if ( !(memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 ||
-                  memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0) ) {
-               printf( "fstatfs call failed.  could not find valid f_fstypename! \n" );
-               goto test_failed_exit;
-       }
-       my_io_size = my_statfsp->f_iosize;
-       my_fsid = my_statfsp->f_fsid;
-       if ( is_ufs == 0 && my_statfsp->f_iosize != my_attr_buf.io_blksize ) {
-               printf( "fstatfs and getattrlist results do not match for volume block size  \n" );
-               goto test_failed_exit;
-       } 
-
-       /* try again with statfs */
-       my_err = statfs( STATFS_TEST_PATH , my_statfsp );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* validate results */
-       if ( my_io_size != my_statfsp->f_iosize || my_fsid.val[0] != my_statfsp->f_fsid.val[0] ||
-                my_fsid.val[1] != my_statfsp->f_fsid.val[1] ) {
-               printf( "statfs call failed.  wrong f_iosize or f_fsid! \n" );
-               goto test_failed_exit;
-       }
-       if ( is_ufs == 0 && my_statfsp->f_iosize != my_attr_buf.io_blksize ) {
-               printf( "statfs and getattrlist results do not match for volume block size  \n" );
-               goto test_failed_exit;
-       } 
-               
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_bufferp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_bufferp, my_buffer_size);
-        }
-        if ( my_buffer64p != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_buffer64p, my_buffer64_size);
-        }
-        
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getpid, getppid, and pipe system calls.
- *  **************************************************************************************************************
- */
-int getpid_getppid_pipe_test( void * the_argp )
-{
-       int                     my_err, my_status;
-       pid_t           my_pid, my_wait_pid;
-       ssize_t         my_count;
-       int                     my_fildes[2] = {-1, -1};
-       off_t           my_current_offset;
-       char            my_pid_string[64];
-
-       my_err = pipe( &my_fildes[0] );
-       if ( my_err != 0 ) {
-               printf( "pipe call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure we can't seek on a pipe */
-       my_current_offset = lseek( my_fildes[0], 0, SEEK_CUR );
-       if ( my_current_offset != -1 ) {
-               printf( "lseek on pipe should fail but did not \n" );
-               goto test_failed_exit;
-       }
-        
-       /* fork here and use pipe to communicate */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       else if ( my_pid == 0 ) {
-               /* child process */
-               unsigned long   my_ppid;
-               char                    my_buffer[64];
-               
-               close( my_fildes[1] ); /* close write end of pipe */
-               my_fildes[1] = -1;
-               
-               /* get the parent's pid using getppid and from the parent (using getpid in porent) */
-               my_count = read( my_fildes[0], &my_buffer[0], sizeof(my_buffer) );
-               if ( my_count == -1 ) {
-                       printf( "read from pipe failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       exit(-1);
-               }
-               
-               /* parent wrote (to our pipe) its pid as character string */
-               my_ppid = strtoul( &my_buffer[0], NULL, 10 );
-               if ( my_ppid == 0 ) {
-                       printf( "strtoul failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       exit(-1);
-               }
-
-               if ( getppid( ) != my_ppid ) {
-                       printf( "getppid failed.  pid we got from parent does not match getppid result. \n" );
-                       exit(-1);
-               }
-               exit(0);
-       }
-       
-       /* parent process - get our pid using getpid and send it to child for verification */
-       close( my_fildes[0] ); /* close read end of pipe */
-       my_fildes[0] = -1;
-       
-       sprintf( &my_pid_string[0], "%d\n", getpid( ) );
-
-       my_count = write( my_fildes[1], &my_pid_string[0], sizeof(my_pid_string) );
-       if ( my_count == -1 ) {
-               printf( "write to pipe failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait for child to exit */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               printf( "wait4 returned wrong exit status - 0x%02X \n", my_status );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fildes[0] != -1 )
-               close( my_fildes[0] );
-       if ( my_fildes[1] != -1 )
-               close( my_fildes[1] );
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid system calls.
- *  **************************************************************************************************************
- */
-int uid_tests( void * the_argp )
-{
-       int                     my_err, my_status;
-       pid_t           my_pid, my_wait_pid;
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf("\t skipping this test \n");
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-       /* test issetugid - should return 1 when not root and 0 when root
-        * Figuring out setugid will not work in single-user mode; skip
-        * this test in that case.
-        */
-       if (!g_is_single_user) {
-               my_err = issetugid( );
-               if ( getuid( ) == 0 ) {
-                       if ( my_err == 1 ) {
-                               printf( "issetugid should return false \n" );
-                               goto test_failed_exit;
-                       }
-               }
-               else {
-                       if ( my_err == 0 ) {
-                               printf( "issetugid should return true \n" );
-                               goto test_failed_exit;
-                       }
-               }
-       }
-
-       /*
-        * fork here and do the setuid work in the child 
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       else if ( my_pid == 0 ) {
-               /* 
-                * child process 
-                */
-               uid_t                   my_ruid, my_euid;
-               uid_t                   my_uid, my_temp_uid;
-               gid_t                   my_gid, my_temp_gid;
-               auditinfo_addr_t        my_aia;
-               
-               my_ruid = getuid( );
-               my_euid = geteuid( );
-               if ( my_ruid == my_euid ) {
-                       exit( 0 );
-               }
-
-               /* Test getauid, gettid, setaudit_addr, settid, settid_with_pid */
-               /* get our current uid and gid for comparison later */
-               my_uid = getuid( );
-               my_gid = getgid( );
-
-               my_err = syscall( SYS_settid, 4444, 5555 );
-               //my_err = settid( 4444, 5555 );
-               if (my_err != 0) {
-                       printf( "settid call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-               my_err = syscall( SYS_gettid, &my_temp_uid, &my_temp_gid );
-               //my_err = gettid( &my_temp_uid, &my_temp_gid );
-               if (my_err != 0) {
-                       printf( "gettid call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if (my_temp_uid != 4444) {
-                       printf("get / settid test failed - wrong uid was set - %d \n", my_temp_uid);
-                       exit( -1 );
-               }
-               if (my_temp_gid != 5555) {
-                       printf("get / settid test failed - wrong gid was set - %d \n", my_temp_gid);
-                       exit( -1 );
-               }
-
-               /* resume original identity */
-               my_err = syscall( SYS_settid, KAUTH_UID_NONE, KAUTH_GID_NONE );
-               //my_err = settid( KAUTH_UID_NONE, KAUTH_GID_NONE );
-               if (my_err != 0) {
-                       printf( "settid revert - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-               /* values should be returned to original settings */
-               my_temp_uid = getuid( );
-               if (my_temp_uid == 4444) {
-                       printf("test failed - wrong uid was set - %d \n", my_temp_uid);
-                       exit( -1 );
-               }
-               my_temp_gid = getgid( );
-               if (my_temp_gid == 5555) {
-                       printf("test failed - wrong gid was set - %d \n", my_temp_gid);
-                       exit( -1 );
-               }
-
-               /*
-                * Assume the identity of our parent.
-                */
-               my_err = syscall( SYS_settid_with_pid, getppid( ), 1 );
-               //my_err = settid_with_pid, my_target_pid, 1 );
-               if (my_err != 0) {
-                       printf( "settid_with_pid assume - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-               /*
-                * Resume our identity.
-                */
-               my_err = syscall( SYS_settid_with_pid, 0, 0 );
-               //my_err = settid_with_pid( my_target_pid, 0 );
-               if (my_err != 0) {
-                       printf( "settid_with_pid resume - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               
-               /*
-                * test to make sure setaudit_addr doesn't cause audit info to get lost from 
-                * the credential.
-                */
-               bzero( &my_aia, sizeof(my_aia) );
-               my_aia.ai_auid = 442344;
-               my_aia.ai_asid = AU_ASSIGN_ASID;
-               my_aia.ai_termid.at_type = AU_IPv4;
-               my_err = setaudit_addr( &my_aia, sizeof(my_aia) );
-               if (my_err != 0) {
-                       printf( "setaudit_addr - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-
-               my_aia.ai_auid = 0;
-               my_err = getaudit_addr( &my_aia, sizeof(my_aia) );
-               if (my_err != 0) {
-                       printf( "getaudit_addr - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               //printf("new audit ID is %d \n", my_aia.ai_auid);
-
-               if (my_aia.ai_auid != 442344) {
-                       printf("test failed - wrong audit ID was set - %d \n", my_aia.ai_auid);
-                       exit( -1 );
-               }
-               
-               /* change real uid and effective uid to current euid */
-               my_err = setuid( my_euid );
-               if ( my_err == -1 ) {
-                       printf( "setuid call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( getuid( ) != my_euid ) {
-                       printf( "setuid call failed to set the real uid \n" );
-                       exit( -1 );
-               }
-
-               /* change effective uid to current euid - really a NOP */
-               my_err = seteuid( my_euid );
-               if ( my_err == -1 ) {
-                       printf( "seteuid call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( geteuid( ) != my_euid ) {
-                       printf( "seteuid call failed to set the original euid \n" );
-                       exit( -1 );
-               }
-
-               /* change real uid and effective uid to original real uid */
-               my_err = setuid( my_ruid );
-               if ( my_err == -1 ) {
-                       printf( "setuid call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( getuid( ) != my_ruid ) {
-                       printf( "setuid call failed to set the real uid \n" );
-                       exit( -1 );
-               }
-
-               exit(0);
-       }
-       
-       /* 
-        * parent process - 
-        * wait for child to exit 
-        */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* wait4 should return our child's pid when it exits */
-       if ( my_wait_pid != my_pid ) {
-               printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               printf( "wait4 returned wrong exit status - 0x%02X \n", my_status );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test mknod, sync system calls.
- *  **************************************************************************************************************
- */
-int mknod_sync_test( void * the_argp )
-{
-       int                     my_err;
-       char *  my_pathp =      NULL;
-       kern_return_t           my_kr;
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf("\t skipping this test \n");
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, "/dev/" );
-
-       /* get a unique name for our test file */
-       my_err = create_random_name( my_pathp, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       my_err = mknod( my_pathp, (S_IFCHR | S_IRWXU), 0 );     
-       if ( my_err == -1 ) {
-               printf( "mknod failed with errno %d - %s \n", errno, strerror( errno ) );
-               printf( "path \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       /* not really sure what to do with sync call test */
-       sync( );
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test chflags, fchflags system calls.
- *  **************************************************************************************************************
- */
-int chflags_fchflags_test( void * the_argp )
-{
-       int                             my_err;
-       int                             my_fd = -1;
-       u_int                   my_flags;
-       char *                  my_pathp = NULL;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* make test file unchangable */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_flags = (my_sb.st_flags | UF_IMMUTABLE);
-       my_err = chflags( my_pathp, my_flags );
-       if ( my_err != 0 ) {
-               printf( "chflags call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* should fail with EPERM since we cannot change the file now */
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 && errno != EPERM ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "open failed with wrong error - should be EPERM \n" );
-               goto test_failed_exit;
-       }
-       
-       /* this open should work OK */
-       my_fd = open( my_pathp, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_flags = (my_sb.st_flags & ~UF_IMMUTABLE);
-       my_err = fchflags( my_fd, my_flags );
-       if ( my_err != 0 ) {
-               printf( "chflags call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       close( my_fd );
-       my_fd = -1;
-       
-       /* should now work */
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-        }
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test kill, vfork, execve system calls.
- *  **************************************************************************************************************
- */
-/*  There are many new exec() situations to test now that 64-bit is in. These extra tests are in response to 
- * rdar://4606399 and rdar://4607285. It should cover every permutation of the following variables.
- * 
- *  - Current Process "Bitness":                       64 or 32
- *  - exec()'ed process "bitness":                     64 or 32
- *     (if 64 bit, size of page zero:)                 (4GB or 4KB)
- *  - Parent Process "Bitness":                                64 or 32
-
- *  Test to make sure certain inheritance properties of fork()'ed children
- * are correctly set.
- *  1. 64 bit process forking() 64-bit child, child execing() 64-bit file (4GB pagezero)
- *  2. 64 bit process forking() 64-bit child, child execing() 64-bit file (4KB pagezero)
- *  3. 64 bit process forking() 64-bit child, child execing() 32-bit file
- *  4. 32 bit process forking() 32-bit child, child execing() 32-bit file
- *  5. 32 bit process forking() 32-bit child, child execing() 64 bit file (4GB pagezero) 
- *  6. 32 bit process forking() 32-bit child, child execing() 64 bit file (4KB pagezero)
- *
- */
-
-
-int execve_kill_vfork_test( void * the_argp )
-{
-       int     my_err, my_status;
-       pid_t   my_pid, my_wait_pid;
-       char *  errmsg = NULL; 
-       char * argvs[2] = {"", NULL};
-       int bits = get_bits();          /* Gets actual processor bit-ness. */
-       
-       if (bits != 32 && bits != 64) {
-               printf("Determination of processor bit-ness failed, get_bits() returned %d.\n", get_bits());
-               return(-1);
-       }
-
-       if (get_architecture() == -1) {
-               errmsg = "get_architecture() could not determine the CPU architecture.\n";
-               goto test_failed_exit;
-       }
-       
-       if (get_architecture() == INTEL) {
-               struct stat sb;
-
-               if (bits == 64 && sizeof(long) == 8) {
-                       /*
-                        * Running on x86_64 hardware and running in 64-bit mode.
-                        * Check cases 1, 2, 3 and fork a child to check 4, 5, 6. 
-                        */ 
-                       errmsg = "execve failed: from x86_64 forking and exec()ing 64-bit x86_64 process w/ 4G pagezero.\n";
-                       argvs[0] = "sleep-x86_64-4G";
-                       if (do_execve_test("helpers/sleep-x86_64-4G", argvs, NULL, 1))          goto test_failed_exit;
-
-                       errmsg = "execve failed: from x86_64 forking and exec()ing 64-bit x86_64 process w/ 4K Pagezero.\n";
-                       argvs[0] = "sleep-x86_64-4K";
-                       if (do_execve_test("helpers/sleep-x86_64-4K", argvs, NULL, 1))          goto test_failed_exit;
-
-                       errmsg = "execve failed: from x64_64 forking and exec()ing 32-bit i386 process.\n";
-                       argvs[0] = "sleep-i386";
-                       if (do_execve_test("helpers/sleep-i386", argvs, NULL, 1))               goto test_failed_exit;
-
-                       /* Fork off a helper process and load a 32-bit program in it to test 32->64 bit exec(). */
-                       errmsg = "execve failed to exec the helper process.\n";
-                       argvs[0] = "launch-i386";
-                       if (do_execve_test("helpers/launch-i386", argvs, NULL, 1) != 0)         goto test_failed_exit;
-
-                       /* Test posix_spawn for i386, x86_64 (should succeed) */
-                       errmsg = NULL;
-                       if (do_spawn_test(CPU_TYPE_I386, 0))
-                               goto test_failed_exit;
-                       if (do_spawn_test(CPU_TYPE_X86_64, 0))
-                               goto test_failed_exit;
-               }
-               else if (bits == 64 && sizeof(long) == 4) {
-                       /*
-                        * Running on x86_64 hardware, but actually running in 32-bit mode.
-                        * Check cases 4, 5, 6 and fork a child to check 1, 2, 3.
-                        */
-                       errmsg = "execve failed: from i386 forking and exec()ing i386 process.\n";
-                       argvs[0] = "sleep-i386";
-                       if (do_execve_test("helpers/sleep-i386", argvs, NULL, 0))               goto test_failed_exit;
-
-                       errmsg = "execve failed: from i386 forking and exec()ing x86_64 process w/ 4G pagezero.\n";
-                       argvs[0] = "sleep-x86_64-4G";
-                       if (do_execve_test("helpers/sleep-x86_64-4G", argvs, NULL, 0))          goto test_failed_exit;
-
-                       errmsg = "execve failed: from i386 forking and exec()ing x86_64 process w/ 4K pagezero.\n";
-                       argvs[0] = "sleep-x86_64-4K";
-                       if (do_execve_test("helpers/sleep-x86_64-4K", argvs, NULL, 0))          goto test_failed_exit;
-
-                       /* Fork off a helper process and load a 64-bit program in it to test 64->32 bit exec(). */
-                       errmsg = "execve failed to exec the helper process.\n";
-                       argvs[0] = "launch-x86_64";
-                       if (do_execve_test("helpers/launch-x86_64", argvs, NULL, 1) != 0)       goto test_failed_exit;
-
-                       /* Test posix_spawn for i386, x86_64 (should succeed) */
-                       errmsg = NULL;
-                       if (do_spawn_test(CPU_TYPE_I386, 0))
-                               goto test_failed_exit;
-                       if (do_spawn_test(CPU_TYPE_X86_64, 0))
-                               goto test_failed_exit;
-               }
-               else if (bits == 32) {
-                       /* Running on i386 hardware. Check cases 4. */
-                       errmsg = "execve failed: from i386 forking and exec()ing 32-bit i386 process.\n";
-                       argvs[0] = "sleep-i386";
-                       if (do_execve_test("helpers/sleep-i386", argvs, NULL, 1))               goto test_failed_exit;
-
-                       /* Test posix_spawn for x86_64 (should fail), i386 (should succeed) */
-                       errmsg = NULL;
-                       if (do_spawn_test(CPU_TYPE_X86_64, 1))
-                               goto test_failed_exit;
-                       if (do_spawn_test(CPU_TYPE_I386, 0))
-                               goto test_failed_exit;
-               }
-       } else if(get_architecture() == ARM) {
-
-#ifdef CPU_TYPE_ARM64
-               if (bits == 64) {
-                       /* Running on arm64 hardware. */
-                       errmsg = "execve failed: from arm64 forking and exec()ing 64-bit arm process.\n";
-                       argvs[0] = "sleep-arm";
-                       if (do_execve_test("helpers/sleep-arm64", argvs, NULL, 1))
-                               goto test_failed_exit;
-
-                       /* Test posix_spawn for arm64 (should succeed) */
-                       errmsg = NULL;
-                       if (do_spawn_test(CPU_TYPE_ARM64, 0))
-                               goto test_failed_exit;
-               } 
-#endif
-
-               /* Exec arm test on both arm and arm64 */
-               errmsg = "execve failed: from arm forking and exec()ing 32-bit arm process.\n";
-               argvs[0] = "sleep-arm";
-               if (do_execve_test("helpers/sleep-arm", argvs, NULL, 1))
-                       goto test_failed_exit;
-
-               /* Test posix_spawn for arm (should succeed) */
-               errmsg = NULL;
-               if (do_spawn_test(CPU_TYPE_ARM, 0))
-                       goto test_failed_exit;
-
-       }
-       else {
-               /* Just in case someone decides we need more architectures in the future */
-               printf("get_architecture() returned unknown architecture");
-               return(-1);
-       }       
-
-       return 0;
-
-test_failed_exit:
-       if (errmsg)
-               printf("%s", errmsg);
-       return -1;
-}
-
-
-/*  **************************************************************************************************************
- *     Test getegid, getgid, getgroups, setegid, setgid, setgroups system calls.
- *  **************************************************************************************************************
- */
-int groups_test( void * the_argp )
-{
-       int                     my_err, i;
-       int                     my_group_count, my_orig_group_count;
-       gid_t           my_real_gid;
-       gid_t           my_effective_gid;
-       gid_t           my_removed_gid;
-       gid_t           my_new_gid;
-       gid_t           my_groups[ NGROUPS_MAX ];
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf("\t skipping this test \n");
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-       my_real_gid = getgid( );
-       my_effective_gid = getegid( );
-
-       if ( !_prime_groups() ) {
-               goto test_failed_exit;
-       }
-       
-       /* start by getting list of groups the current user belongs to */
-       my_orig_group_count = getgroups( NGROUPS_MAX, &my_groups[0] );
-
-       if ( my_orig_group_count == -1 || my_orig_group_count < 1 ) {
-               printf( "getgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure real and effective gids are correct */
-       for ( i = 0; i < my_orig_group_count; i++ ) {
-               if ( my_groups[i] == my_real_gid )
-                       break;
-       }
-       if ( i >= my_orig_group_count ) {
-               printf( "getgid or getgroups call failed.  could not find real gid in list of groups. \n" );
-               goto test_failed_exit;
-       }
-       for ( i = 0; i < my_orig_group_count; i++ ) {
-               if ( my_groups[i] == my_effective_gid )
-                       break;
-       }
-       if ( i >= my_orig_group_count ) {
-               printf( "getegid or getgroups call failed.  could not find effective gid in list of groups. \n" );
-               goto test_failed_exit;
-       }
-               
-       /* remove the last group */
-       my_removed_gid = my_groups[ (my_orig_group_count - 1) ];
-       my_err = setgroups( (my_orig_group_count - 1), &my_groups[0] );
-       if ( my_err == -1 ) {
-               printf( "setgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] );
-       
-       if ( my_group_count == -1 || my_group_count < 1 ) {
-               printf( "getgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure setgroups dropped one */
-       if ( my_orig_group_count <= my_group_count ) {
-               printf( "setgroups call failed.  current group count is too high. \n" );
-               goto test_failed_exit;
-       }
-       
-       /* now put removed gid back */
-       my_groups[ (my_orig_group_count - 1) ] = my_removed_gid;
-       my_err = setgroups( my_orig_group_count, &my_groups[0] );
-       if ( my_err == -1 ) {
-               printf( "setgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* find a group to change real and effective gid to then do it */
-       my_new_gid = -1;
-       for ( i = 0; i < my_orig_group_count; i++ ) {
-               if ( my_groups[i] == my_effective_gid || my_groups[i] == my_real_gid )
-                       continue;
-               my_new_gid = my_groups[i];
-       }
-       
-       if ( my_new_gid == -1 ) {
-               printf( "could not find a gid to switch to. \n" );
-               goto test_failed_exit;
-       }
-       
-       /* test setegid */
-       my_err = setegid( my_new_gid );
-       if ( my_err == -1 ) {
-               printf( "setegid call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       /* verify it changed */
-       if ( getegid( ) != my_new_gid ) {
-               printf( "setegid failed to change the effective gid. \n" );
-               goto test_failed_exit;
-       }
-       /* change it back to original value */
-       my_err = setegid( my_effective_gid );
-       if ( my_err == -1 ) {
-               printf( "setegid call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* test setgid */
-       my_err = setgid( my_new_gid );
-       if ( my_err == -1 ) {
-               printf( "setgid call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       /* verify it changed */
-       if ( getgid( ) != my_new_gid ) {
-               printf( "setgid failed to change the real gid. \n" );
-               goto test_failed_exit;
-       }
-       /* change it back to original value */
-       my_err = setgid( my_real_gid );
-       if ( my_err == -1 ) {
-               printf( "setegid call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-                  
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test dup, dup2, getdtablesize system calls.
- *  **************************************************************************************************************
- */
-int dup_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_fd = -1;
-       int                     my_newfd = -1;
-       int                     my_table_size, my_loop_counter = 0;
-       char *          my_pathp = NULL;
-       ssize_t         my_count;
-       char            my_buffer[64];
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* test dup, dup2, getdtablesize */
-       my_table_size = getdtablesize( );
-       if ( my_table_size < 20 ) {
-               printf( "getdtablesize should return at least 20, returned %d \n", my_table_size );
-               goto test_failed_exit;
-       }
-
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_newfd = dup( my_fd );
-       if ( my_newfd == -1 ) {
-               printf( "dup call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-redo:
-       /* now write somne data to the orginal and new fd */
-       /* make sure test file is empty */
-       my_err = ftruncate( my_fd, 0 );         
-       if ( my_err == -1 ) {
-               printf( "ftruncate call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       lseek( my_fd, 0, SEEK_SET );
-       my_count = write( my_fd, "aa", 2 );
-       if ( my_count == -1 ) {
-               printf( "write call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       my_count = write( my_newfd, "xx", 2 );
-       if ( my_count == -1 ) {
-               printf( "write call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* now read it back and make sure data is correct */
-       lseek( my_fd, 0, SEEK_SET );
-       my_count = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_count == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_buffer[0] != 'a' || my_buffer[1] != 'a' || my_buffer[2] != 'x' || my_buffer[3] != 'x' ) {
-               printf( "wrong data in test file. \n" );
-               goto test_failed_exit;
-       }
-       
-       bzero( &my_buffer[0], sizeof(my_buffer) );
-       lseek( my_newfd, 0, SEEK_SET );
-       my_count = read( my_newfd, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_count == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_buffer[0] != 'a' || my_buffer[1] != 'a' || my_buffer[2] != 'x' || my_buffer[3] != 'x' ) {
-               printf( "wrong data in test file. \n" );
-               goto test_failed_exit;
-       }
-
-       /* we do the above tests twice - once for dup and once for dup2 */
-       if ( my_loop_counter < 1 ) {
-               my_loop_counter++;
-               close( my_newfd );
-     
-               my_err = dup2( my_fd, my_newfd );
-               if ( my_err == -1 ) {
-                       printf( "dup2 call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-               
-               goto redo;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_newfd != -1 )
-               close( my_newfd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test getrusage system call.
- *  **************************************************************************************************************
- */
-int getrusage_test( void * the_argp )
-{
-       int                             my_err;
-       struct rusage   my_rusage;
-
-       my_err = getrusage( RUSAGE_SELF, &my_rusage );  
-       if ( my_err == -1 ) {
-               printf( "getrusage failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* do a sanity check on the getrusage results */
-       if ( my_rusage.ru_msgrcv > 1000 || my_rusage.ru_msgrcv < 0 ) {
-               printf( "getrusage seems to report wrong data - ru_msgrcv looks odd. \n" );
-               goto test_failed_exit;
-       }
-       if ( my_rusage.ru_nsignals > 1000 || my_rusage.ru_nsignals < 0 ) {
-               printf( "getrusage seems to report wrong data - ru_nsignals looks odd. \n" );
-               goto test_failed_exit;
-       }
-                       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getitimer, setitimer, sigaction, sigpending, sigprocmask, sigsuspend, sigwait system calls.
- *  **************************************************************************************************************
- */
-
-int            alarm_global = 0;
-void test_alarm_handler( int the_arg );
-void test_alarm_handler( int the_arg )
-{      
-       alarm_global = 4;
-       //printf( "test_alarm_handler - got here \n" );
-       if ( the_arg == 0 ) {
-       }
-       return;
-}
-
-void test_signal_handler( int the_arg );
-void test_signal_handler( int the_arg )
-{      
-       //printf( "test_signal_handler - got here \n" );
-       if ( the_arg == 0 ) {
-       }
-       return;
-}
-
-int signals_test( void * the_argp )
-{
-       int                     my_err, my_status;
-       int                     my_fd = -1;
-       char *          my_pathp = NULL;
-       pid_t           my_pid, my_wait_pid;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /*
-        * spin off a child process that we will use for signal related testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process - test signal related system calls.
-                */
-               //int                                   my_counter;
-               int                                     my_signal;
-               sigset_t                        my_sigset;
-               struct sigaction        my_sigaction;
-#ifdef MAC_OS_X_VERSION_10_5
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5
-               /* If this is Leopard. To allow compiling for Inca x86_64 this definition cannot 
-                * be included. But it is needed to compile on Leopard.
-                */
-               struct __darwin_sigaltstack     my_sigaltstack;
-#endif
-#else
-               struct sigaltstack      my_sigaltstack;
-#endif
-               struct itimerval    my_timer;
-
-
-               /* test getting the current signal stack context */
-               my_err = sigaltstack( NULL, &my_sigaltstack );
-               if ( my_err == -1 ) {
-                       printf( "sigaction failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               if ( (my_sigaltstack.ss_flags & SS_DISABLE) == 0 ) {
-                       printf( "sigaction must have failed - SS_DISABLE is cleared \n" );
-                       exit( -1 );
-               }
-                               
-               /* set up to catch SIGUSR1 */
-               my_sigaction.sa_handler = test_signal_handler;
-               my_sigaction.sa_flags = SA_RESTART;
-               my_sigaction.sa_mask = 0;
-
-               my_err = sigaction( SIGUSR1, &my_sigaction, NULL );
-               if ( my_err == -1 ) {
-                       printf( "sigaction failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-                       
-               /* now suspend until signal SIGUSR1 is sent */ 
-               sigemptyset( &my_sigset );
-               my_err = sigsuspend( &my_sigset );
-               if ( my_err == -1 ) {
-                       if ( errno != EINTR ) {
-                               printf( "sigsuspend should have returned with errno EINTR \n" );
-                               exit( -1 );
-                       }
-               }
-                                       
-               /* block SIGUSR1 */
-               sigemptyset( &my_sigset );
-               sigaddset( &my_sigset, SIGUSR1 );
-               if ( sigismember( &my_sigset, SIGUSR1 ) == 0 ) {
-                       printf( "sigaddset call failed to add SIGUSR1 to signal set \n" );
-                       exit( -1 );
-               }
-               my_err = sigprocmask( SIG_BLOCK, &my_sigset, NULL );
-               if ( my_err == -1 ) {
-                       printf( "sigprocmask failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               
-               /* make sure we are blocking SIGUSR1 */
-               sigemptyset( &my_sigset );
-               my_err = sigprocmask( 0, NULL, &my_sigset );
-               if ( my_err == -1 ) {
-                       printf( "sigprocmask failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               if ( sigismember( &my_sigset, SIGUSR1 ) == 0 ) {
-                       printf( "sigaddset call failed to add SIGUSR1 to signal set \n" );
-                       exit( -1 );
-               }
-
-               /* our parent will send a 2nd SIGUSR1 signal which we should now see getting
-                * blocked.
-                */
-               sigemptyset( &my_sigset );
-               sigaddset( &my_sigset, SIGUSR1 );
-               my_err = sigwait( &my_sigset, &my_signal );
-               if ( my_err == -1 ) {
-                       printf( "sigwait failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               //printf( "%s - %d - signal 0x%02X %d \n", __FUNCTION__, __LINE__, my_signal, my_signal );
-               if ( my_signal != SIGUSR1 ) {
-                       printf( "sigwait failed to catch a pending SIGUSR1 signal. \n" );
-                       exit( -1 );
-               }
-                
-               /* now unblock SIGUSR1 */
-               sigfillset( &my_sigset );
-               sigdelset( &my_sigset, SIGUSR1 );
-               my_err = sigprocmask( SIG_UNBLOCK, &my_sigset, NULL );
-               if ( my_err == -1 ) {
-                       printf( "sigprocmask failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               if ( sigismember( &my_sigset, SIGUSR1 ) != 0 ) {
-                       printf( "sigprocmask call failed to unblock SIGUSR1 \n" );
-                       exit( -1 );
-               }
-               
-               /* test get / setitimer */
-               timerclear( &my_timer.it_interval );
-               timerclear( &my_timer.it_value );
-               my_err = setitimer( ITIMER_VIRTUAL, &my_timer, NULL );
-               if ( my_err == -1 ) {
-                       printf( "setitimer - ITIMER_VIRTUAL - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               my_err = setitimer( ITIMER_PROF, &my_timer, NULL );
-               if ( my_err == -1 ) {
-                       printf( "setitimer - ITIMER_PROF - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-
-               /* set up to catch SIGALRM */
-               alarm_global = 0;
-               my_sigaction.sa_handler = test_alarm_handler;
-               my_sigaction.sa_flags = SA_RESTART;
-               my_sigaction.sa_mask = 0;
-
-               my_err = sigaction( SIGALRM, &my_sigaction, NULL );
-               if ( my_err == -1 ) {
-                       printf( "sigaction - SIGALRM - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               
-               /* set timer for half a second */
-               my_timer.it_value.tv_usec = (1000000 / 2);
-               my_err = setitimer( ITIMER_REAL, &my_timer, NULL );
-               if ( my_err == -1 ) {
-                       printf( "setitimer - ITIMER_REAL - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-                       
-               /* now suspend until signal SIGALRM is sent */ 
-               sigfillset( &my_sigset );
-               sigdelset( &my_sigset, SIGALRM );
-               my_err = sigsuspend( &my_sigset );
-               if ( my_err == -1 ) {
-                       if ( errno != EINTR ) {
-                               printf( "sigsuspend should have returned with errno EINTR \n" );
-                               exit( -1 );
-                       }
-               }
-               if ( alarm_global != 4 ) {
-                       printf( "setitimer test failed - did not catch SIGALRM \n" );
-                       exit( -1 );
-               }
-
-               /* make sure ITIMER_REAL is now clear */
-               my_timer.it_value.tv_sec = 44;
-               my_timer.it_value.tv_usec = 44;
-               my_err = getitimer( ITIMER_REAL, &my_timer );
-               if ( my_err == -1 ) {
-                       printf( "getitimer - ITIMER_REAL - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       exit( -1 );
-               }
-               if ( timerisset( &my_timer.it_value ) || timerisset( &my_timer.it_interval ) ) {
-                       printf( "ITIMER_REAL is set, but should not be \n" );
-                       exit( -1 );
-               }
-               
-               exit(0);
-       }
-       
-       /* 
-        * parent process - let child set up to suspend then signal it with SIGUSR1
-        */
-       sleep( 1 );
-       my_err = kill( my_pid, SIGUSR1 );
-       if ( my_err == -1 ) {
-               printf( "kill call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-        
-       /* send 2nd signal to suspended child - which should be blocking SIGUSR1 signals */
-       sleep( 1 );
-       my_err = kill( my_pid, SIGUSR1 );
-       if ( my_err == -1 ) {
-               printf( "kill call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-        
-       /* wait for child to exit */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFSIGNALED( my_status ) || ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) ) {
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getlogin, setlogin system calls.
- *  **************************************************************************************************************
- */
-int getlogin_setlogin_test( void * the_argp )
-{
-       int                     my_err, my_status;
-       pid_t           my_pid, my_wait_pid;
-       kern_return_t           my_kr;  
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf("\t skipping this test \n");
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-       /*
-        * spin off a child process that we will use for testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process - do getlogin and setlogin testing.
-                */
-               char *          my_namep = NULL;
-               int             my_len;
-               char *          my_new_namep = NULL;
-
-               my_namep = getlogin( );
-               if ( my_namep == NULL ) {
-                       printf( "getlogin returned NULL name pointer \n" );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               my_len = strlen( my_namep ) + 4;
-
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_namep, my_len, VM_FLAGS_ANYWHERE);
-                        if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                               my_err = -1; 
-                       goto exit_child;
-               }
-
-               bzero( (void *)my_new_namep, my_len );
-
-               strcat( my_new_namep, my_namep );
-               strcat( my_new_namep, "2" );
-
-
-               /* set new name */
-               my_err = setlogin( my_new_namep );
-               if ( my_err == -1 ) {
-                       printf( "When setting new login name, setlogin failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* make sure we set new name */
-               my_namep = getlogin( );
-               if ( my_namep == NULL ) {
-                       printf( "getlogin returned NULL name pointer \n" );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               if ( memcmp( my_namep, my_new_namep, strlen( my_new_namep ) ) != 0 ) {
-                       printf( "setlogin failed to set the new name \n" );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-               /* reset to original name */
-               my_len = strlen ( my_namep );
-               my_namep[ my_len - 1 ] = '\0';
-
-               my_err = setlogin( my_namep );
-               if ( my_err == -1 ) {
-                       printf( "When resetting login name, setlogin failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_err = -1;
-                       goto exit_child;
-               }
-
-        
-               my_err = 0;
-exit_child:
-               if ( my_new_namep != NULL ) {
-                       vm_deallocate(mach_task_self(), (vm_address_t)my_new_namep, my_len);
-               }
-               exit( my_err );
-       }
-       
-       /* parent process -
-        * wait for child to exit 
-        */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               goto test_failed_exit;
-       }
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test acct system call.
- *  **************************************************************************************************************
- */
-int acct_test( void * the_argp )
-{
-       int             my_err, my_status;
-       int             my_fd = -1;
-       char *          my_pathp = NULL;
-       struct acct *   my_acctp;
-       pid_t           my_pid, my_wait_pid;
-       ssize_t         my_count;
-       char            my_buffer[ (sizeof(struct acct) + 32) ];
-       kern_return_t           my_kr;
-       int             acct_record_found;
-       char *          test_bin_name = NULL;
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf("\t skipping this test \n");
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* enable process accounting */
-       my_err =  acct( my_pathp );     
-       if ( my_err == -1 ) {
-               printf( "acct failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /*
-        * spin off a child process that we will use for testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               char *argv[2];          /* supply valid argv array to execv() */
-               argv[0] = "/usr/bin/true";
-               argv[1] = 0;
-
-               /* 
-                * child process - do a little work then exit.
-                */
-               my_err = execv( argv[0], argv);
-               exit( 0 );
-       }
-       
-       /* parent process -
-        * wait for child to exit 
-        */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               printf("unexpected child exit status for accounting test load: %d\n", WEXITSTATUS( my_status));
-               goto test_failed_exit;
-       }
-
-       /* disable process accounting */ 
-       my_err =  acct( NULL ); 
-       if ( my_err == -1 ) {
-               printf( "acct failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* now verify that there is accounting info in the log file */
-       my_fd = open( my_pathp, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       lseek( my_fd, 0, SEEK_SET );
-       bzero( (void *)&my_buffer[0], sizeof(my_buffer) );
-       acct_record_found = 0;
-       test_bin_name = "true"; 
-
-       while(1) {
-
-               my_count = read( my_fd, &my_buffer[0], sizeof(struct acct) );
-
-               if ( my_count == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               if ( my_count < sizeof(struct acct)) {
-                       /* Indicates EOF or misaligned file size */
-                       printf("Reached end of accounting records with last read count: %d\n", my_count);
-                       break;
-               }
-
-               my_acctp = (struct acct *) &my_buffer[0];
-               /* first letters in ac_comm should match the name of the executable */
-               if ( (getuid() == my_acctp->ac_uid) && (getgid() == my_acctp->ac_gid) &&
-                    (!strncmp(my_acctp->ac_comm, test_bin_name, strlen(test_bin_name))) ) {
-                       /* Expected accounting record found */
-                       acct_record_found = 1;
-                       break;
-               }
-
-       }       
-
-       if (acct_record_found) {
-               my_err = 0;
-               goto test_passed_exit;
-       } else {
-               printf( "------------------------\n" );
-               printf( "Expected Accounting Record for child process %s not found\n", test_bin_name );
-               printf( "Expected uid: %lu Expected gid: %lu\n" , (unsigned long) getuid(), (unsigned long) getgid() );
-               printf( "Account file path: %s\n",  my_pathp );
-               goto test_failed_exit;
-       }
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-void print_acct_debug_strings( char * my_ac_comm )
-{
-       char    my_cmd_str[11]; /* sizeof(acct_cmd) + 1 for '\0' if acct_cmd is bogus */
-       char    my_hex_str[128];
-       int     i;
-       
-       my_hex_str[0] = '\0';
-       for(i = 0; i < 10; i++)
-       {
-               sprintf( my_hex_str, "%s \'0x%x\' ", my_hex_str, my_ac_comm[i]);
-       }
-
-       memccpy(my_cmd_str, my_ac_comm, '\0', 10);
-       my_cmd_str[10] = '\0'; /* In case ac_comm was bogus */
-       
-
-       printf( "my_acctp->ac_comm = \"%s\" (should begin with: \"tr\")\n", my_cmd_str);
-       printf( "my_acctp->ac_comm = \"%s\"\n", my_hex_str);
-       printf( "------------------------\n" );
-}
-
-
-/*  **************************************************************************************************************
- *     Test ioctl system calls.
- *  **************************************************************************************************************
- */
-int ioctl_test( void * the_argp )
-{
-       int                                     my_err, my_result;
-       int                                     my_fd = -1;
-       struct statfs *         my_infop;
-       char *                          my_ptr;
-    int                                        my_blksize;
-    long long                  my_block_count;
-       char                            my_name[ 128 ];
-
-       my_result = getmntinfo( &my_infop, MNT_NOWAIT );
-       if ( my_result < 1 ) {
-               printf( "getmntinfo failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* make this a raw device */
-       strcpy( &my_name[0], &my_infop->f_mntfromname[0] );
-       if ( (my_ptr = strrchr( &my_name[0], '/' )) != 0 ) {
-               if ( my_ptr[1] != 'r' ) {
-                       my_ptr[ strlen( my_ptr ) ] = 0x00;
-                       memmove( &my_ptr[2], &my_ptr[1], (strlen( &my_ptr[1] ) + 1) );
-                       my_ptr[1] = 'r';
-               }
-       }
-
-       my_fd = open(&my_name[0], O_RDONLY );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-    /* obtain the size of the media (in blocks) */
-       my_err = ioctl( my_fd, DKIOCGETBLOCKCOUNT, &my_block_count );
-       if ( my_err == -1 ) {
-               printf( "ioctl DKIOCGETBLOCKCOUNT failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-    /* obtain the block size of the media */
-       my_err = ioctl( my_fd, DKIOCGETBLOCKSIZE, &my_blksize );
-       if ( my_err == -1 ) {
-               printf( "ioctl DKIOCGETBLOCKSIZE failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       //printf( "my_block_count %qd my_blksize %d \n", my_block_count, my_blksize );
-
-       /* make sure the returned data looks somewhat valid */
-       if ( my_blksize < 0 || my_blksize > (1024 * 1000) ) {
-               printf( "ioctl appears to have returned incorrect block size data \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test mkdir, rmdir, umask system calls.
- *  **************************************************************************************************************
- */
-int mkdir_rmdir_umask_test( void * the_argp )
-{
-       int                             my_err;
-       int                             my_fd = -1;
-       int                             did_umask = 0;
-       char *                  my_pathp = NULL;
-       mode_t                  my_orig_mask;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* get a unique name to use with mkdir */
-       my_err = create_random_name( my_pathp, 0 );
-       if ( my_err != 0 ) {
-               printf( "create_random_name failed with error %d\n", my_err );
-               goto test_failed_exit;
-       }
-       
-       /* set umask to clear WX for other and group and clear X for user */
-       my_orig_mask = umask( (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH) );      
-       did_umask = 1;
-
-       /* create a directory with RWX for user, group, other (which should be limited by umask) */
-       my_err = mkdir( my_pathp, (S_IRWXU | S_IRWXG | S_IRWXO) );
-       if ( my_err == -1 ) {
-               printf( "mkdir failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* verify results - (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH) should be clear*/
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( (my_sb.st_mode & (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH)) != 0 ) {
-               printf( "umask did not limit modes as it should have \n" );
-               goto test_failed_exit;
-       }
-       
-       /* get rid of our test directory */
-       my_err = rmdir( my_pathp );
-       if ( my_err == -1 ) {
-               printf( "rmdir failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               rmdir( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-        if ( did_umask != 0 ) {
-               umask( my_orig_mask );  
-        }
-
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test chroot system call.
- *  **************************************************************************************************************
- */
-int chroot_test( void * the_argp )
-{
-       int                     my_err, my_status;
-       pid_t           my_pid, my_wait_pid;
-       char *          my_pathp = NULL;
-       kern_return_t           my_kr;
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf("\t skipping this test \n");
-               my_err = 0;
-               goto test_passed_exit;
-       }
-               
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* get a unique name for our test directory */
-       my_err = create_random_name( my_pathp, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       /* create a test directory */
-       my_err = mkdir( my_pathp, (S_IRWXU | S_IRWXG | S_IRWXO) );
-       if ( my_err == -1 ) {
-               printf( "mkdir failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /*
-        * spin off a child process that we will use for testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process - do getlogin and setlogin testing.
-                */
-               struct stat             my_sb;
-
-               /* change our root to our new test directory */
-               my_err = chroot( my_pathp );     
-               if ( my_err != 0 ) {
-                       printf( "chroot failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               
-               /* verify root directory is now an empty directory */
-               my_err = stat( "/", &my_sb );
-               if ( my_err != 0 ) {
-                       printf( "stat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       exit( -1 );
-               }
-               if ( my_sb.st_nlink > 2 ) {
-                       printf( "root dir should be emnpty! \n" );
-                       exit( -1 );
-               }
-               exit( 0 );
-       }
-       
-       /* parent process -
-        * wait for child to exit 
-        */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               printf( "bad exit status\n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_pathp != NULL ) {
-               my_err = rmdir( my_pathp );
-               if ( my_err != 0 ) {
-                       printf( "rmdir failed with error %d - \"%s\" path %p\n", errno, strerror( errno), my_pathp );
-               }
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getpgrp, getpgid, getsid, setpgid, setpgrp, setsid system calls.
- *  **************************************************************************************************************
- */
-int process_group_test( void * the_argp )
-{
-       int             my_err = 0, i = 0;
-       pid_t           my_session_id, my_pid, my_process_group;
-
-       /* get current session ID, pgid, and pid */
-       my_session_id = getsid( 0 );
-       if ( my_session_id == -1 ) {
-               printf( "getsid call failed with error %d - \"%s\" \n", 
-                               errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_pid = getpid( );
-       my_process_group = getpgrp( );
-        
-       /* test getpgrp and getpgid - they should return the same results when 0 is passed to getpgid */
-       if ( my_process_group != getpgid( 0 ) ) {
-               printf( "getpgrp and getpgid did not return the same process group ID \n" );
-               printf( "getpgid: %d, my_process_group: %d\n", getpgid( 0 ), my_process_group );
-               goto test_failed_exit;
-       }
-
-       if ( my_pid == my_process_group ) {
-               /* we are process group leader */
-               my_err = setsid( );
-               if ( my_err == 0  || errno != EPERM ) {
-                       printf( "setsid call should have failed with EPERM\n" );
-                       goto test_failed_exit;
-               }
-       } else {
-               /* we are not process group leader: try creating new session */
-               my_err = setsid( );
-               if ( my_err == -1 ) {
-                       printf( "setsid call failed with error %d - \"%s\" \n",
-                                       errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-
-               if ( my_process_group == getpgid( 0 ) ) {
-                       printf( "process group was not reset \n" );
-                       goto test_failed_exit;
-               }
-       }
-       
-       /* find an unused process group ID */
-       for ( i = 10000; i < 1000000; i++ ) {
-               my_process_group = getpgid( i );
-               if ( my_process_group == -1 ) {
-                       break;
-               }
-       }
-
-       /* this should fail */
-       my_err = setpgid( 0, my_process_group );
-       if ( my_err != -1 ) {
-               printf( "setpgid should have failed, but did not \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test fcntl system calls.
- *  **************************************************************************************************************
- */
-int fcntl_test( void * the_argp )
-{
-       int                     my_err, my_result, my_tmep;
-       int                     my_fd = -1;
-       int                     my_newfd = -1;
-       char *          my_pathp = NULL;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* open our test file and use fcntl to get / set file descriptor flags */
-       my_fd = open( my_pathp, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_result = fcntl( my_fd, F_GETFD, 0 );
-       if ( my_result == -1 ) {
-               printf( "fcntl - F_GETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_tmep = (my_result & FD_CLOEXEC);
-       if ( my_tmep ) {
-               /* FD_CLOEXEC is on, let's turn it off */
-               my_result = fcntl( my_fd, F_SETFD, 0 );
-       }
-       else {
-               /* FD_CLOEXEC is off, let's turn it on */
-               my_result = fcntl( my_fd, F_SETFD, 1 );
-       }
-       if ( my_result == -1 ) {
-               printf( "fcntl - F_SETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* now check to see if it is set correctly */
-       my_result = fcntl( my_fd, F_GETFD, 0 );
-       if ( my_result == -1 ) {
-               printf( "fcntl - F_GETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_tmep == (my_result & 0x01) ) {
-               printf( "fcntl - F_SETFD failed to set FD_CLOEXEC correctly!!! \n" );
-               goto test_failed_exit;
-       }
-
-       /* dup it to a new fd with FD_CLOEXEC forced on */
-
-       my_result = fcntl( my_fd, F_DUPFD_CLOEXEC, 0);
-       if ( my_result == -1 ) {
-               printf( "fcntl - F_DUPFD_CLOEXEC - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_newfd = my_result;
-
-       /* check to see that it too is marked with FD_CLOEXEC */
-
-       my_result = fcntl( my_newfd, F_GETFD, 0);
-       if ( my_result == -1 ) {
-               printf( "fcntl - F_GETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( (my_result & FD_CLOEXEC) == 0 ) {
-               printf( "fcntl - F_DUPFD_CLOEXEC failed to set FD_CLOEXEC!!! \n" );
-               goto test_failed_exit;
-       }
-
-       close( my_newfd );
-       my_newfd = -1;
-
-       /* While we're here, dup it via an open of /dev/fd/<fd> .. */
-
-       {
-               char devfdpath[PATH_MAX];
-
-               (void) snprintf( devfdpath, sizeof (devfdpath),
-                       "/dev/fd/%u", my_fd );
-               my_result = open( devfdpath, O_RDONLY | O_CLOEXEC );
-       }
-       if ( my_result == -1 ) {
-               printf( "open call failed on /dev/fd/%u with error %d - \"%s\" \n", my_fd, errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_newfd = my_result;
-
-       /* check to see that it too is marked with FD_CLOEXEC */
-
-       my_result = fcntl( my_newfd, F_GETFD, 0);
-       if ( my_result == -1 ) {
-               printf( "fcntl - F_GETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( (my_result & FD_CLOEXEC) == 0 ) {
-               printf( "fcntl - O_CLOEXEC open of /dev/fd/%u failed to set FD_CLOEXEC!!! \n", my_fd );
-               goto test_failed_exit;
-       }
-       close ( my_newfd );
-       my_newfd = -1;
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_newfd != -1)
-               close ( my_newfd );
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getpriority, setpriority system calls.
- *  **************************************************************************************************************
- */
-int getpriority_setpriority_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_priority;
-       int                     my_new_priority;
-
-       /* getpriority returns scheduling priority so -1 is a valid value */
-       errno = 0;
-       my_priority = getpriority( PRIO_PROCESS, 0 );
-       if ( my_priority == -1 && errno != 0 ) {
-               printf( "getpriority - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* change scheduling priority */
-       my_new_priority = (my_priority == PRIO_MIN) ? (my_priority + 10) : (PRIO_MIN);
-       my_err = setpriority( PRIO_PROCESS, 0, my_new_priority );
-       if ( my_err == -1 ) {
-               printf( "setpriority - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* verify change */
-       errno = 0;
-       my_priority = getpriority( PRIO_PROCESS, 0 );
-       if ( my_priority == -1 && errno != 0 ) {
-               printf( "getpriority - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       if ( my_priority != my_new_priority ) {
-               printf( "setpriority - failed to set correct scheduling priority \n" );
-               goto test_failed_exit;
-       }
-       
-       /* reset scheduling priority */
-       my_err = setpriority( PRIO_PROCESS, 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "setpriority - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test futimes, gettimeofday, settimeofday, utimes system calls.
- *  **************************************************************************************************************
- */
-int time_tests( void * the_argp )
-{
-       int                                     my_err;
-       int                                     my_fd = -1;
-       char *                          my_pathp = NULL;
-       struct timeval          my_orig_time;
-       struct timeval          my_temp_time;
-       struct timeval          my_utimes[4];
-       struct timezone         my_tz;
-       struct stat                     my_sb;
-       kern_return_t           my_kr;
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf( "\t skipping this test \n" );
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       my_err = gettimeofday( &my_orig_time, &my_tz );
-       if ( my_err == -1 ) {
-               printf( "gettimeofday - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       //printf( "tv_sec %d tv_usec %ld \n", my_orig_time.tv_sec, my_orig_time.tv_usec );
-       
-       my_temp_time = my_orig_time;
-       my_temp_time.tv_sec -= 60;
-       my_err = settimeofday( &my_temp_time, NULL );
-       if ( my_err == -1 ) {
-               printf( "settimeofday - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = gettimeofday( &my_temp_time, NULL );
-       if ( my_err == -1 ) {
-               printf( "gettimeofday - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       //printf( "tv_sec %d tv_usec %ld \n", my_temp_time.tv_sec, my_temp_time.tv_usec );
-       if ( my_orig_time.tv_sec <= my_temp_time.tv_sec ) {
-               printf( "settimeofday did not set correct time \n" );
-               goto test_failed_exit;
-       }
-
-       /* set time back to original value plus 1 second */
-       my_temp_time = my_orig_time;
-       my_temp_time.tv_sec += 1;
-       my_err = settimeofday( &my_temp_time, NULL );
-       if ( my_err == -1 ) {
-               printf( "settimeofday - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* test utimes and futimes - get current access and mod times then change them */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       TIMESPEC_TO_TIMEVAL( &my_utimes[0], &my_sb.st_atimespec );
-       TIMESPEC_TO_TIMEVAL( &my_utimes[1], &my_sb.st_mtimespec );
-       my_utimes[0].tv_sec -= 120;             /* make access time 2 minutes older */ 
-       my_utimes[1].tv_sec -= 120;             /* make mod time 2 minutes older */ 
-       
-       my_err = utimes( my_pathp, &my_utimes[0] );
-       if ( my_err == -1 ) {
-               printf( "utimes - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the correct times are set */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       TIMESPEC_TO_TIMEVAL( &my_utimes[2], &my_sb.st_atimespec );
-       TIMESPEC_TO_TIMEVAL( &my_utimes[3], &my_sb.st_mtimespec );
-       if ( my_utimes[0].tv_sec != my_utimes[2].tv_sec ||
-                my_utimes[1].tv_sec != my_utimes[3].tv_sec ) {
-               printf( "utimes failed to set access and mod times \n" );
-               goto test_failed_exit;
-       }
-       
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_utimes[0].tv_sec -= 120;  /* make access time 2 minutes older */ 
-       my_utimes[1].tv_sec -= 120;  /* make mod time 2 minutes older */ 
-       my_err = futimes( my_fd, &my_utimes[0] );
-       if ( my_err == -1 ) {
-               printf( "futimes - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the correct times are set */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       TIMESPEC_TO_TIMEVAL( &my_utimes[2], &my_sb.st_atimespec );
-       TIMESPEC_TO_TIMEVAL( &my_utimes[3], &my_sb.st_mtimespec );
-       if ( my_utimes[0].tv_sec != my_utimes[2].tv_sec ||
-                my_utimes[1].tv_sec != my_utimes[3].tv_sec ) {
-               printf( "futimes failed to set access and mod times \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test rename, stat system calls.
- *  **************************************************************************************************************
- */
-int rename_test( void * the_argp )
-{
-       int                             my_err;
-       char *                  my_pathp = NULL;
-       char *                  my_new_pathp = NULL;
-       ino_t                   my_file_id;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_new_pathp = 0x00;
-       strcat( my_new_pathp, &g_target_path[0] );
-       strcat( my_new_pathp, "/" );
-
-       /* get a unique name for our rename test */
-       my_err = create_random_name( my_new_pathp, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-               
-       /* save file ID for later use */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_file_id = my_sb.st_ino;
-       
-       /* test rename */
-       my_err = rename( my_pathp, my_new_pathp );
-       if ( my_err == -1 ) {
-               printf( "rename - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-               
-       /* make sure old name is no longer there */
-       my_err = stat( my_pathp, &my_sb );
-       if ( my_err == 0 ) {
-               printf( "rename call failed - found old name \n" );
-               goto test_failed_exit;
-       }
-               
-       /* make sure new name is there and is correct file id */
-       my_err = stat( my_new_pathp, &my_sb );
-       if ( my_err != 0 ) {
-               printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_file_id != my_sb.st_ino ) {
-               printf( "rename failed - wrong file id \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       if ( my_new_pathp != NULL ) {
-               remove( my_new_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_new_pathp, PATH_MAX);  
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test locking system calls.
- *  **************************************************************************************************************
- */
-int locking_test( void * the_argp )
-{
-       int                     my_err, my_status;
-       pid_t           my_pid, my_wait_pid;
-       int                     my_fd = -1;
-       char *          my_pathp = NULL;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* test flock */
-       my_fd = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_err =  flock( my_fd, LOCK_EX );
-       if ( my_err == -1 ) {
-               printf( "flock - LOCK_EX - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /*
-        * spin off a child process that we will use for testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process.
-                */
-               int                     my_child_fd = -1;
-               int                     my_child_err;
-               
-               my_child_fd = open( my_pathp, O_RDWR, 0 );
-               if ( my_child_fd == -1 ) {
-                       printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_child_err = -1;
-                       goto child_exit;
-               }
-
-               my_err =  flock( my_child_fd, (LOCK_EX | LOCK_NB) );
-               if ( my_err == -1 ) {
-                       if ( errno != EWOULDBLOCK ) {
-                               printf( "flock call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                               my_child_err = -1;
-                               goto child_exit;
-                       }
-               }
-               else {
-                       printf( "flock call should have failed with EWOULDBLOCK err \n" );
-                       my_child_err = -1;
-                       goto child_exit;
-               }
-               my_child_err = 0;
-child_exit:
-               if ( my_child_fd != -1 )
-                       close( my_child_fd );
-               exit( my_child_err );
-       }
-
-       /* parent process -
-        * wait for child to exit 
-        */
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               goto test_failed_exit;
-       }
-
-       my_err =  flock( my_fd, LOCK_UN );
-       if ( my_err == -1 ) {
-               printf( "flock - LOCK_UN - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test mkfifo system calls.
- *  **************************************************************************************************************
- */
-int mkfifo_test( void * the_argp )
-{
-       int                     my_err, my_status;
-       pid_t           my_pid, my_wait_pid;
-       int                     my_fd = -1;
-       char *          my_pathp = NULL;
-       ssize_t         my_result;
-       off_t           my_current_offset;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* get unique name for our fifo */
-       my_err = create_random_name( my_pathp, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       my_err = mkfifo( my_pathp, (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) );
-       if ( my_err != 0 ) {
-               printf( "mkfifo failed with errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /*
-        * spin off a child process that we will use for testing.   
-        */
-       my_pid = fork( );
-       if ( my_pid == -1 ) {
-               printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_pid == 0 ) {
-               /* 
-                * child process.
-                */
-               int                     my_child_fd = -1;
-               int                     my_child_err;
-               char            my_buffer[64];
-               
-               /* open read end of fifo */
-               my_child_fd = open( my_pathp, O_RDWR, 0 );
-               if ( my_child_fd == -1 ) {
-                       printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_child_err = -1;
-                       goto child_exit;
-               }
-
-               /* read message from parent */
-               bzero( (void *)&my_buffer[0], sizeof(my_buffer) );
-               my_result = read( my_child_fd, &my_buffer[0], sizeof(my_buffer) );
-               if ( my_result == -1 ) {
-                       printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       my_child_err = -1;
-                       goto child_exit;
-               }
-               if ( strcmp( "parent to child", &my_buffer[0] ) != 0 ) {
-                       printf( "read wrong message from parent \n" );
-                       my_child_err = -1;
-                       goto child_exit;
-               }
-
-               my_child_err = 0;
-child_exit:
-               if ( my_child_fd != -1 )
-                       close( my_child_fd );
-               exit( my_child_err );
-       }
-
-       /* parent process - open write end of fifo
-        */
-       my_fd = open( my_pathp, O_WRONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure we can't seek on a fifo */
-       my_current_offset = lseek( my_fd, 0, SEEK_CUR );
-       if ( my_current_offset != -1 ) {
-               printf( "lseek on fifo should fail but did not \n" );
-               goto test_failed_exit;
-       }
-
-       my_result = write( my_fd, "parent to child", 15 );
-       if ( my_result == -1 ) {
-               printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
-       if ( my_wait_pid == -1 ) {
-               printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test quotactl system calls.
- *  **************************************************************************************************************
- */
-int quotactl_test( void * the_argp )
-{
-       int                             my_err;
-       int                             is_quotas_on = 0;
-       struct dqblk    my_quota_blk;
-
-       if ( g_skip_setuid_tests != 0 ) {
-               printf( "\t skipping this test \n" );
-               my_err = 0;
-               goto test_passed_exit;
-       }
-       
-       /* start off by checking the status of quotas on the boot volume */
-       my_err = quotactl( "/System/Library/Kernels/kernel", QCMD(Q_QUOTASTAT, USRQUOTA), 0, (caddr_t)&is_quotas_on );
-       if ( my_err == -1 ) {
-               printf( "quotactl - Q_QUOTASTAT - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       if ( is_quotas_on == 0 ) {
-               /* quotas are off */
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-       my_err = quotactl( "/System/Library/Kernels/kernel", QCMD(Q_GETQUOTA, USRQUOTA), getuid(), (caddr_t)&my_quota_blk );
-       if ( my_err == -1 ) {
-               printf( "quotactl - Q_GETQUOTA - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getrlimit, setrlimit system calls.
- *  **************************************************************************************************************
- */
-int limit_tests( void * the_argp )
-{
-       int                             my_err;
-       struct rlimit   my_current_rlimit;
-       struct rlimit   my_rlimit;
-
-       my_err = getrlimit( RLIMIT_NOFILE, &my_current_rlimit );
-       if ( my_err == -1 ) {
-               printf( "getrlimit - failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_current_rlimit.rlim_cur != RLIM_INFINITY ) {
-               if ( my_current_rlimit.rlim_cur != my_current_rlimit.rlim_max )
-                       my_current_rlimit.rlim_cur += 1;
-               else
-                       my_current_rlimit.rlim_cur -= 1;
-               my_rlimit.rlim_cur = my_current_rlimit.rlim_cur;
-               my_rlimit.rlim_max = my_current_rlimit.rlim_max;
-               my_err = setrlimit( RLIMIT_NOFILE, &my_rlimit );
-               if ( my_err == -1 ) {
-                       printf( "setrlimit - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-               
-               /* verify that we set a new limit */
-               bzero( (void *) &my_rlimit, sizeof( my_rlimit ) );
-               my_err = getrlimit( RLIMIT_NOFILE, &my_rlimit );
-               if ( my_err == -1 ) {
-                       printf( "getrlimit - failed with errno %d - %s \n", errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-               if ( my_rlimit.rlim_cur != my_current_rlimit.rlim_cur ) {
-                       printf( "failed to get/set new RLIMIT_NOFILE soft limit \n" );
-                       printf( "soft limits - current %lld should be %lld \n", my_rlimit.rlim_cur, my_current_rlimit.rlim_cur );
-                       goto test_failed_exit;
-               }
-
-#if CONFORMANCE_CHANGES_IN_XNU // can't do this check until conformance changes get into xnu 
-               printf( "hard limits - current %lld should be %lld \n", my_rlimit.rlim_max, my_current_rlimit.rlim_max );
-               if ( my_rlimit.rlim_max != my_current_rlimit.rlim_max ) {
-                       printf( "failed to get/set new RLIMIT_NOFILE hard limit \n" );
-                       goto test_failed_exit;
-               }
-#endif
-
-               /* 
-                * A test for a limit that won't fit in a signed 32 bits, a la 5414697 
-                * Note: my_rlimit should still have a valid rlim_max.
-                */
-               long long biglim = 2147483649ll;        /* Just over 2^31 */
-               my_rlimit.rlim_cur = biglim;                    
-               my_err = setrlimit(RLIMIT_CPU, &my_rlimit);     
-               if (my_err == -1) {
-                       printf("failed to set large limit.\n");
-                       goto test_failed_exit;
-               }
-
-               bzero(&my_rlimit, sizeof(struct rlimit));       
-               my_err = getrlimit(RLIMIT_CPU, &my_rlimit);
-               if (my_err == -1) {
-                       printf("after setting large value, failed to getrlimit().\n");
-                       goto test_failed_exit;
-               }
-
-               if (my_rlimit.rlim_cur != biglim) {
-                       printf("didn't retrieve large limit.\n");
-                       goto test_failed_exit;
-               }
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test getattrlist, getdirentriesattr, setattrlist system calls.
- *  **************************************************************************************************************
- */
-struct test_attr_buf {
-       uint32_t                        length;
-       fsobj_type_t            obj_type;
-       fsobj_id_t                      obj_id;
-       struct timespec         backup_time;
-};
-       
-typedef struct test_attr_buf test_attr_buf;
-
-int directory_tests( void * the_argp )
-{
-       int                                     my_err, done, found_it, i;
-       int                                     my_fd = -1;
-       int                                     is_ufs = 0;
-       char *                          my_pathp = NULL;
-       char *                          my_bufp = NULL;
-       char *                          my_file_namep;
-#ifdef __LP64__
-       unsigned int            my_base;
-       unsigned int            my_count;
-       unsigned int            my_new_state;
-#else
-       unsigned long           my_base;
-       unsigned long           my_count;
-       unsigned long           my_new_state;
-#endif
-       fsobj_id_t                      my_obj_id;
-       struct timespec         my_new_backup_time;
-       struct attrlist         my_attrlist;
-       test_attr_buf           my_attr_buf[4];
-       struct statfs           my_statfs_buf;
-       kern_return_t           my_kr;
-
-       /* need to know type of file system */
-       my_err = statfs( &g_target_path[0], &my_statfs_buf );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( memcmp( &my_statfs_buf.f_fstypename[0], "ufs", 3 ) == 0 ) {
-               is_ufs = 1;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, (1024 * 5), VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* get pointer to just the file name */
-       my_file_namep = strrchr( my_pathp, '/' );
-       my_file_namep++;
-       
-       /* check out the  test directory */
-       my_fd = open( &g_target_path[0], (O_RDONLY), 0 );
-       if ( my_fd == -1 ) {
-               printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* test get/setattrlist */
-       memset( &my_attrlist, 0, sizeof(my_attrlist) );
-       my_attrlist.bitmapcount = ATTR_BIT_MAP_COUNT;
-       my_attrlist.commonattr = (ATTR_CMN_OBJTYPE | ATTR_CMN_OBJID | ATTR_CMN_BKUPTIME); 
-       my_err = getattrlist( my_pathp, &my_attrlist, &my_attr_buf[0], sizeof(my_attr_buf[0]), 0 );
-
-       if ( my_err != 0 ) {
-               if ( errno == ENOTSUP && is_ufs ) {
-                       /* getattr calls not supported on ufs */
-                       my_err = 0;
-                       goto test_passed_exit;
-               }
-               printf( "getattrlist call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       /* validate returned data */
-       if ( my_attr_buf[0].obj_type != VREG ) {
-               printf( "getattrlist returned incorrect obj_type data. \n" );
-               goto test_failed_exit;
-       }
-       
-       /* set new backup time */
-       my_obj_id = my_attr_buf[0].obj_id;
-       my_new_backup_time = my_attr_buf[0].backup_time;
-       my_new_backup_time.tv_sec += 60;
-       my_attr_buf[0].backup_time.tv_sec = my_new_backup_time.tv_sec;
-       my_attrlist.commonattr = (ATTR_CMN_BKUPTIME); 
-       my_err = setattrlist( my_pathp, &my_attrlist, &my_attr_buf[0].backup_time, sizeof(my_attr_buf[0].backup_time), 0 );
-       if ( my_err != 0 ) {
-               printf( "setattrlist call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* validate setattrlist using getdirentriesattr */
-       close( my_fd );
-       my_fd = open( &g_target_path[0], (O_RDONLY), 0 );
-       if ( my_fd == -1 ) {
-               printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       memset( &my_attrlist, 0, sizeof(my_attrlist) );
-       memset( &my_attr_buf, 0, sizeof(my_attr_buf) );
-       my_attrlist.bitmapcount = ATTR_BIT_MAP_COUNT;
-       my_attrlist.commonattr = (ATTR_CMN_OBJTYPE | ATTR_CMN_OBJID | ATTR_CMN_BKUPTIME); 
-       my_count = 4;
-       my_base = 0;
-       my_err = getdirentriesattr( my_fd, &my_attrlist, &my_attr_buf[0], sizeof(my_attr_buf), &my_count,
-                                                               &my_base, &my_new_state, 0 );
-       if ( my_err < 0 ) {
-               printf( "getdirentriesattr call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       found_it = 0;
-       for ( i = 0; i < my_count; i++ ) {
-               if ( my_attr_buf[i].obj_id.fid_objno == my_obj_id.fid_objno &&
-                        my_attr_buf[i].obj_id.fid_generation == my_obj_id.fid_generation ) {
-                       found_it = 1;
-                       if ( my_attr_buf[i].backup_time.tv_sec !=  my_new_backup_time.tv_sec ) {
-                               printf( "setattrlist failed to set backup time. \n" );
-                               goto test_failed_exit;
-                       }
-               }
-       }
-       if ( found_it == 0 ) {
-               printf( "getdirentriesattr failed to find test file. \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       if(my_err != 0)
-               my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       if ( my_bufp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_bufp, (1024 * 5));
-        }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test exchangedata system calls.
- *  **************************************************************************************************************
- */
-int exchangedata_test( void * the_argp )
-{
-       int                             my_err;
-       int                             my_fd1 = -1;
-       int                             my_fd2 = -1;
-       char *                  my_file1_pathp = NULL;
-       char *                  my_file2_pathp = NULL;
-       ssize_t                 my_result;
-       char                    my_buffer[16];
-       struct statfs   my_statfs_buf;
-       kern_return_t           my_kr;
-
-       /* need to know type of file system */
-       my_err = statfs( &g_target_path[0], &my_statfs_buf );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( memcmp( &my_statfs_buf.f_fstypename[0], "ufs", 3 ) == 0 ) {
-               /* ufs does not support exchangedata */
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_file1_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_file1_pathp = 0x00;
-       strcat( my_file1_pathp, &g_target_path[0] );
-       strcat( my_file1_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_file1_pathp, 1 );
-       if ( my_err != 0 ) {
-               printf( "create_random_name my_err: %d\n", my_err );
-               goto test_failed_exit;
-       }
-       my_fd1 = open( my_file1_pathp, O_RDWR, 0 );
-       if ( my_fd1 == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_result = write( my_fd1, "11111111", 8 );
-       if ( my_result == -1 ) {
-               printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_file2_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_file2_pathp = 0x00;
-       strcat( my_file2_pathp, &g_target_path[0] );
-       strcat( my_file2_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_file2_pathp, 1 );
-       if ( my_err != 0 ) {
-               printf( "create_random_name my_err: %d\n", my_err );
-               goto test_failed_exit;
-       }
-       my_fd2 = open( my_file2_pathp, O_RDWR, 0 );
-       if ( my_fd2 == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_result = write( my_fd2, "22222222", 8 );
-       if ( my_result == -1 ) {
-               printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       close(my_fd1);
-       my_fd1 = -1;
-       close(my_fd2);
-       my_fd2 = -1;
-       
-       /* test exchangedata */
-       my_err = exchangedata( my_file1_pathp, my_file2_pathp, 0 );
-       if ( my_err == -1 ) {
-               printf( "exchangedata failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* now validate exchange */
-       my_fd1 = open( my_file1_pathp, O_RDONLY, 0 );
-       if ( my_fd1 == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       bzero( (void *)&my_buffer[0], sizeof(my_buffer) );
-       my_result = read( my_fd1, &my_buffer[0], 8 );
-       if ( my_result == -1 ) {
-               printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       if ( memcmp( &my_buffer[0], "22222222", 8 ) != 0 ) {
-               printf( "exchangedata failed - incorrect data in file \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd1 != -1 )
-               close( my_fd1 );
-       if ( my_file1_pathp != NULL ) {
-               remove( my_file1_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_file1_pathp, PATH_MAX);        
-        }
-       if ( my_fd2 != -1 )
-               close( my_fd2 );
-       if ( my_file2_pathp != NULL ) {
-               remove( my_file2_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_file2_pathp, PATH_MAX);        
-        }
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test searchfs system calls.
- *  **************************************************************************************************************
- */
-
-struct packed_name_attr {
-    u_int32_t              size;       /* Of the remaining fields */
-    struct attrreference       ref;    /* Offset/length of name itself */
-    char                               name[  PATH_MAX ];
-};
-
-struct packed_attr_ref {
-    u_int32_t                      size;       /* Of the remaining fields */
-    struct attrreference       ref;    /* Offset/length of attr itself */
-};
-
-struct packed_result {
-    u_int32_t          size;           /* Including size field itself */
-    attrreference_t     obj_name;
-    struct fsobj_id        obj_id;
-    struct timespec     obj_create_time;
-    char                room_for_name[ 64 ];
-};
-typedef struct packed_result packed_result;
-typedef struct packed_result * packed_result_p;
-
-#define MAX_MATCHES    10
-#define MAX_EBUSY_RETRIES 20
-
-int searchfs_test( void * the_argp )
-{
-       int                                             my_err, my_items_found = 0, my_ebusy_count;
-       char *                                  my_pathp = NULL;
-    unsigned long                      my_matches;
-    unsigned long                      my_search_options;
-    struct fssearchblock       my_search_blk;
-    struct attrlist                    my_return_list;
-    struct searchstate         my_search_state;
-    struct packed_name_attr    my_info1;
-    struct packed_attr_ref     my_info2;
-    packed_result                      my_result_buffer[ MAX_MATCHES ];
-       struct statfs                   my_statfs_buf;
-       kern_return_t           my_kr;
-
-       /* need to know type of file system */
-       my_err = statfs( &g_target_path[0], &my_statfs_buf );
-       if ( my_err == -1 ) {
-               printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( memcmp( &my_statfs_buf.f_fstypename[0], "ufs", 3 ) == 0 ) {
-               /* ufs does not support exchangedata */
-               my_err = 0;
-               goto test_passed_exit;
-       }
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create test files */
-       my_err = create_file_with_name( my_pathp, "foo", 0 );
-       if ( my_err < 0 ) {
-               printf( "failed to create a test file name in \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       my_err = create_file_with_name( my_pathp, "foobar", 0 );
-       if ( my_err < 0 ) {
-               printf( "failed to create a test file name in \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       my_err = create_file_with_name( my_pathp, "foofoo", 0 );
-       if ( my_err < 0 ) {
-               printf( "failed to create a test file name in \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       my_err = create_file_with_name( my_pathp, "xxxfoo", 0 );
-       if ( my_err < 0 ) {
-               printf( "failed to create a test file name in \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-    /* EBUSY count  updated below the catalogue_changed label */       
-    my_ebusy_count = 0; 
-
-catalogue_changed:
-       /* search target volume for all file system objects with "foo" in the name */
-    /* Set up the attributes we're searching on. */
-    my_items_found = 0; /* Set this here in case we're completely restarting */
-    my_search_blk.searchattrs.bitmapcount = ATTR_BIT_MAP_COUNT;
-    my_search_blk.searchattrs.reserved = 0;
-    my_search_blk.searchattrs.commonattr = ATTR_CMN_NAME;
-    my_search_blk.searchattrs.volattr = 0;
-    my_search_blk.searchattrs.dirattr = 0;
-    my_search_blk.searchattrs.fileattr = 0;
-    my_search_blk.searchattrs.forkattr = 0;
-    
-    /* Set up the attributes we want for all returned matches. */
-    /* Why is returnattrs a pointer instead of an embedded struct? */
-    my_search_blk.returnattrs = &my_return_list;
-    my_return_list.bitmapcount = ATTR_BIT_MAP_COUNT;
-    my_return_list.reserved = 0;
-    my_return_list.commonattr = ATTR_CMN_NAME | ATTR_CMN_OBJID | ATTR_CMN_CRTIME;
-    my_return_list.volattr = 0;
-    my_return_list.dirattr = 0;
-    my_return_list.fileattr = 0;
-    my_return_list.forkattr = 0;
-    
-    /* Allocate a buffer for returned matches */
-    my_search_blk.returnbuffer = my_result_buffer;
-    my_search_blk.returnbuffersize = sizeof(my_result_buffer);
-    
-    /* Pack the searchparams1 into a buffer */
-    /* NOTE: A name appears only in searchparams1 */
-    strcpy( my_info1.name, "foo" );
-    my_info1.ref.attr_dataoffset = sizeof(struct attrreference);
-    my_info1.ref.attr_length = strlen(my_info1.name) + 1;
-    my_info1.size = sizeof(struct attrreference) + my_info1.ref.attr_length;
-    my_search_blk.searchparams1 = &my_info1;
-    my_search_blk.sizeofsearchparams1 = my_info1.size + sizeof(u_int32_t);
-    
-    /* Pack the searchparams2 into a buffer */
-    my_info2.size = sizeof(struct attrreference);
-    my_info2.ref.attr_dataoffset = sizeof(struct attrreference);
-    my_info2.ref.attr_length = 0;
-    my_search_blk.searchparams2 = &my_info2;
-    my_search_blk.sizeofsearchparams2 = sizeof(my_info2);
-    
-    /* Maximum number of matches we want */
-    my_search_blk.maxmatches = MAX_MATCHES;
-    
-    /* Maximum time to search, per call */
-    my_search_blk.timelimit.tv_sec = 1;
-    my_search_blk.timelimit.tv_usec = 0;
-    
-    my_search_options = (SRCHFS_START | SRCHFS_MATCHPARTIALNAMES |
-                                                SRCHFS_MATCHFILES | SRCHFS_MATCHDIRS);
-       do {
-               char *  my_end_ptr;
-               char *  my_ptr;
-               int             i;
-               
-               my_err = searchfs( my_pathp, &my_search_blk, &my_matches, 0, my_search_options, &my_search_state );
-        if ( my_err == -1 )
-            my_err = errno;
-        if ( (my_err == 0 || my_err == EAGAIN) && my_matches > 0 ) {
-            /* Unpack the results */
-          //  printf("my_matches %d \n", my_matches);
-            my_ptr = (char *) &my_result_buffer[0];
-            my_end_ptr = (my_ptr + sizeof(my_result_buffer));
-            for ( i = 0; i < my_matches; ++i ) {
-                packed_result_p                my_result_p = (packed_result_p) my_ptr;
-                               char *                          my_name_p;
-                               
-                               /* see if we foound all our test files */
-                               my_name_p = (((char *)(&my_result_p->obj_name)) + my_result_p->obj_name.attr_dataoffset);
-                               if ( memcmp( my_name_p, "foo", 3 ) == 0 ||
-                                        memcmp( my_name_p, "foobar", 6 ) == 0 ||
-                                        memcmp( my_name_p, "foofoo", 6 ) == 0 ||
-                                        memcmp( my_name_p, "xxxfoo", 6 ) == 0 ) {
-                                       my_items_found++;
-                               }
-#if DEBUG
-                printf("obj_name \"%.*s\" \n", 
-                    (int) my_result_p->obj_name.attr_length,
-                    (((char *)(&my_result_p->obj_name)) + 
-                     my_result_p->obj_name.attr_dataoffset));
-                printf("size %d fid_objno %d fid_generation %d tv_sec 0x%02LX \n", 
-                    my_result_p->size, my_result_p->obj_id.fid_objno, 
-                    my_result_p->obj_id.fid_generation, 
-                    my_result_p->obj_create_time.tv_sec);
-#endif                         
-                my_ptr = (my_ptr + my_result_p->size);
-                if (my_ptr > my_end_ptr)
-                    break;
-            }
-        }
-
-       /* EBUSY indicates catalogue change; retry a few times. */
-       if ((my_err == EBUSY) && (my_ebusy_count++ < MAX_EBUSY_RETRIES)) {
-               goto catalogue_changed;
-       }
-       if ( !(my_err == 0 || my_err == EAGAIN) ) {
-               printf( "searchfs failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-       }
-       my_search_options &= ~SRCHFS_START;
-    } while ( my_err == EAGAIN );
-
-       if ( my_items_found < 4 ) {
-               printf( "searchfs failed to find all test files \n" );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_pathp != NULL ) {
-               char *   my_ptr = (my_pathp + strlen( my_pathp ));
-               strcat( my_pathp, "foo" );
-               remove( my_pathp );     
-               *my_ptr = 0x00;
-               strcat( my_pathp, "foobar" );
-               remove( my_pathp );     
-               *my_ptr = 0x00;
-               strcat( my_pathp, "foofoo" ); 
-               remove( my_pathp );     
-               *my_ptr = 0x00;
-               strcat( my_pathp, "xxxfoo" );
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-
-#define  AIO_TESTS_BUFFER_SIZE  (1024 * 4000)
-#define  AIO_TESTS_OUR_COUNT  5
-/*  **************************************************************************************************************
- *     Test aio_error, aio_read, aio_return, aio_suspend, aio_write, fcntl system calls.
- *  **************************************************************************************************************
- */
-int aio_tests( void * the_argp )
-{
-       int                                     my_err, i;
-       char *                          my_pathp;
-       struct aiocb *          my_aiocbp;
-       ssize_t                         my_result;
-       struct timespec         my_timeout;
-       int                                     my_fd_list[ AIO_TESTS_OUR_COUNT ];
-       char *                          my_buffers[ AIO_TESTS_OUR_COUNT ];
-       struct aiocb *          my_aiocb_list[ AIO_TESTS_OUR_COUNT ];
-       struct aiocb            my_aiocbs[ AIO_TESTS_OUR_COUNT ];
-       char *                          my_file_paths[ AIO_TESTS_OUR_COUNT ];
-       kern_return_t           my_kr;
-
-       /* set up to have the ability to fire off up to AIO_TESTS_OUR_COUNT async IOs at once */
-       memset( &my_fd_list[0], 0xFF, sizeof( my_fd_list ) );
-       memset( &my_buffers[0], 0x00, sizeof( my_buffers ) );
-       memset( &my_aiocb_list[0], 0x00, sizeof( my_aiocb_list ) );
-       memset( &my_file_paths[0], 0x00, sizeof( my_file_paths ) );
-       for ( i = 0; i < AIO_TESTS_OUR_COUNT; i++ ) {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_buffers[ i ], AIO_TESTS_BUFFER_SIZE, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-                       }
-
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_file_paths[ i ], PATH_MAX, VM_FLAGS_ANYWHERE);
-                if(my_kr != KERN_SUCCESS){
-                        printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                        goto test_failed_exit;
-                }
-
-               my_pathp = my_file_paths[ i ];
-               *my_pathp = 0x00;
-               strcat( my_pathp, &g_target_path[0] );
-               strcat( my_pathp, "/" );
-
-               /* create a test file */
-               my_err = create_random_name( my_pathp, 1 );
-               if ( my_err != 0 ) {
-                       goto test_failed_exit;
-               }
-               my_fd_list[ i ] = open( my_pathp, O_RDWR, 0 );
-               if ( my_fd_list[ i ] <= 0 ) {
-                       printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               my_aiocbp = &my_aiocbs[ i ];
-               my_aiocb_list[ i ] = my_aiocbp;
-               memset( my_aiocbp, 0x00, sizeof( *my_aiocbp ) );
-               my_aiocbp->aio_fildes = my_fd_list[ i ];
-               my_aiocbp->aio_buf = (char *) my_buffers[ i ];
-               my_aiocbp->aio_nbytes = 1024;
-               my_aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE; // no signals at completion;
-               my_aiocbp->aio_sigevent.sigev_signo = 0;
-       }
-
-       /* test direct IO (F_NOCACHE) and aio_write */
-       my_err = fcntl( my_fd_list[ 0 ], F_NOCACHE, 1 );
-       if ( my_err != 0 ) {
-               printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_aiocbp = &my_aiocbs[ 0 ];
-    my_aiocbp->aio_fildes = my_fd_list[ 0 ];
-       my_aiocbp->aio_offset = 4096;
-       my_aiocbp->aio_buf = my_buffers[ 0 ];
-    my_aiocbp->aio_nbytes = AIO_TESTS_BUFFER_SIZE;
-    my_aiocbp->aio_reqprio = 0;
-    my_aiocbp->aio_sigevent.sigev_notify = 0;
-    my_aiocbp->aio_sigevent.sigev_signo = 0;
-    my_aiocbp->aio_sigevent.sigev_value.sival_int = 0;
-    my_aiocbp->aio_sigevent.sigev_notify_function = NULL;
-    my_aiocbp->aio_sigevent.sigev_notify_attributes = NULL;
-    my_aiocbp->aio_lio_opcode = 0;
-       
-       /* write some data */
-       memset( my_buffers[ 0 ], 'j', AIO_TESTS_BUFFER_SIZE );
-    my_err = aio_write( my_aiocbp );
-       if ( my_err != 0 ) {
-               printf( "aio_write failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-               goto test_failed_exit;
-       }
-    
-    while ( 1 ) {
-        my_err = aio_error( my_aiocbp );
-        if ( my_err == EINPROGRESS ) {
-            /* wait for IO to complete */
-            sleep( 1 );
-            continue;
-        }
-        else if ( my_err == 0 ) {
-            ssize_t            my_result;
-            my_result = aio_return( my_aiocbp );
-            break;
-        }
-        else {
-                       printf( "aio_error failed with error %d - \"%s\" \n", my_err, strerror( my_err ) );
-                       goto test_failed_exit;
-        }
-    } /* while loop */
-
-       /* read some data */
-       memset( my_buffers[ 0 ], 'x', AIO_TESTS_BUFFER_SIZE );
-    my_err = aio_read( my_aiocbp );
-
-    while ( 1 ) {
-        my_err = aio_error( my_aiocbp );
-        if ( my_err == EINPROGRESS ) {
-            /* wait for IO to complete */
-            sleep( 1 );
-            continue;
-        }
-        else if ( my_err == 0 ) {
-            ssize_t            my_result;
-            my_result = aio_return( my_aiocbp );
-                       
-                       if ( *(my_buffers[ 0 ]) != 'j' || *(my_buffers[ 0 ] + AIO_TESTS_BUFFER_SIZE - 1) != 'j' ) {
-                               printf( "aio_read or aio_write failed - wrong data read \n" );
-                               goto test_failed_exit;
-                       }
-            break;
-        }
-        else {
-                       printf( "aio_read failed with error %d - \"%s\" \n", my_err, strerror( my_err ) );
-                       goto test_failed_exit;
-        }
-    } /* while loop */
-
-       /* test aio_fsync */
-       close( my_fd_list[ 0 ] );
-       my_fd_list[ 0 ] = open( my_pathp, O_RDWR, 0 );
-       if ( my_fd_list[ 0 ] == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_aiocbp = &my_aiocbs[ 0 ];
-    my_aiocbp->aio_fildes = my_fd_list[ 0 ];
-    my_aiocbp->aio_offset = 0;
-    my_aiocbp->aio_buf = my_buffers[ 0 ];
-    my_aiocbp->aio_nbytes = 1024;
-    my_aiocbp->aio_reqprio = 0;
-    my_aiocbp->aio_sigevent.sigev_notify = 0;
-    my_aiocbp->aio_sigevent.sigev_signo = 0;
-    my_aiocbp->aio_sigevent.sigev_value.sival_int = 0;
-    my_aiocbp->aio_sigevent.sigev_notify_function = NULL;
-    my_aiocbp->aio_sigevent.sigev_notify_attributes = NULL;
-    my_aiocbp->aio_lio_opcode = 0;
-       
-       /* write some data */
-       memset( my_buffers[ 0 ], 'e', 1024 );
-    my_err = aio_write( my_aiocbp );
-       if ( my_err != 0 ) {
-               printf( "aio_write failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-               goto test_failed_exit;
-       }
-    while ( 1 ) {
-        my_err = aio_error( my_aiocbp );
-        if ( my_err == EINPROGRESS ) {
-            /* wait for IO to complete */
-            sleep( 1 );
-            continue;
-        }
-        else if ( my_err == 0 ) {
-            ssize_t            my_result;
-            my_result = aio_return( my_aiocbp );
-            break;
-        }
-        else {
-                       printf( "aio_error failed with error %d - \"%s\" \n", my_err, strerror( my_err ) );
-                       goto test_failed_exit;
-        }
-    } /* while loop */
-
-       my_err = aio_fsync( O_SYNC, my_aiocbp );
-       if ( my_err != 0 ) {
-               printf( "aio_fsync failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-               goto test_failed_exit;
-       }
-    while ( 1 ) {
-        my_err = aio_error( my_aiocbp );
-        if ( my_err == EINPROGRESS ) {
-            /* wait for IO to complete */
-            sleep( 1 );
-            continue;
-        }
-        else if ( my_err == 0 ) {
-                       aio_return( my_aiocbp );
-            break;
-        }
-        else {
-                       printf( "aio_error failed with error %d - \"%s\" \n", my_err, strerror( my_err ) );
-                       goto test_failed_exit;
-        }
-    } /* while loop */
-
-       /* validate write */
-       memset( my_buffers[ 0 ], 0x20, 16 );
-       lseek( my_fd_list[ 0 ], 0, SEEK_SET );  
-       my_result = read( my_fd_list[ 0 ], my_buffers[ 0 ], 16);
-       if ( my_result == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( *(my_buffers[ 0 ]) != 'e' || *(my_buffers[ 0 ] + 16 - 1) != 'e' ) {
-               printf( "aio_fsync or aio_write failed - wrong data read \n" );
-               goto test_failed_exit;
-       }
-
-       /* test aio_suspend and lio_listio */
-       for ( i = 0; i < AIO_TESTS_OUR_COUNT; i++ ) {
-               memset( my_buffers[ i ], 'a', AIO_TESTS_BUFFER_SIZE );
-               my_aiocbp = &my_aiocbs[ i ];
-               my_aiocbp->aio_nbytes = AIO_TESTS_BUFFER_SIZE;
-               my_aiocbp->aio_lio_opcode = LIO_WRITE;
-       }
-    my_err = lio_listio( LIO_NOWAIT, my_aiocb_list, AIO_TESTS_OUR_COUNT, NULL );
-       if ( my_err != 0 ) {
-               printf( "lio_listio call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       my_timeout.tv_sec = 1;
-       my_timeout.tv_nsec = 0;
-       my_err = aio_suspend( (const struct aiocb *const*) my_aiocb_list, AIO_TESTS_OUR_COUNT, &my_timeout );
-       if ( my_err != 0 ) {
-               printf( "aio_suspend call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* test aio_cancel */
-       for ( i = 0; i < AIO_TESTS_OUR_COUNT; i++ ) {
-               my_aiocbp = &my_aiocbs[ i ];
-               my_err = aio_cancel( my_aiocbp->aio_fildes, my_aiocbp );
-               if ( my_err != AIO_ALLDONE && my_err != AIO_CANCELED && my_err != AIO_NOTCANCELED ) {
-                       printf( "aio_cancel failed with error %d - \"%s\" \n", my_err, strerror( my_err) );
-                       goto test_failed_exit;
-               }
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       for ( i = 0; i < AIO_TESTS_OUR_COUNT; i++ ) {
-               if ( my_fd_list[ i ] != -1 ) {
-                       close( my_fd_list[ i ] );
-                       my_fd_list[ i ] = -1;
-               }
-               if ( my_file_paths[ i ] != NULL ) {
-                       remove( my_file_paths[ i ] );
-                       vm_deallocate(mach_task_self(), (vm_address_t)my_file_paths[ i ], PATH_MAX);    
-                       my_file_paths[ i ] = NULL;
-               }
-               if ( my_buffers[ i ] != NULL ) {
-                       vm_deallocate(mach_task_self(), (vm_address_t)my_buffers[ i ], AIO_TESTS_BUFFER_SIZE);
-                       my_buffers[ i ] = NULL;
-               }
-       }
-       return( my_err );
-}
-
-
-/*  **************************************************************************************************************
- *     Test msgctl, msgget, msgrcv, msgsnd system calls. 
- *  **************************************************************************************************************
- */
-int message_queue_tests( void * the_argp )
-{
-       int                                     my_err;
-       int                                     my_msg_queue_id = -1;
-       ssize_t                         my_result;
-       struct msqid_ds         my_msq_ds;
-       struct testing_msq_message {
-               long    msq_type;
-               char    msq_buffer[ 32 ];
-       }                                       my_msg;
-
-       /* get a message queue established for our use */
-       my_msg_queue_id = msgget( IPC_PRIVATE, (IPC_CREAT | IPC_EXCL | IPC_R | IPC_W) );
-       if ( my_msg_queue_id == -1 ) {
-               printf( "msgget failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       
-       /* get some stats on our message queue */
-       my_err = msgctl( my_msg_queue_id, IPC_STAT, &my_msq_ds );
-       if ( my_err == -1 ) {
-               printf( "msgctl failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_msq_ds.msg_perm.cuid != geteuid( ) ) {
-               printf( "msgctl IPC_STAT failed to get correct creator uid \n" );
-               goto test_failed_exit;
-       }
-       if ( (my_msq_ds.msg_perm.mode & (IPC_R | IPC_W)) == 0 ) {
-               printf( "msgctl IPC_STAT failed to get correct mode \n" );
-               goto test_failed_exit;
-       }
-       
-       /* put a message into our queue */
-       my_msg.msq_type = 1;
-       strcpy( &my_msg.msq_buffer[ 0 ], "testing 1, 2, 3" );
-       my_err = msgsnd( my_msg_queue_id, &my_msg, sizeof( my_msg.msq_buffer ), 0 );
-       if ( my_err == -1 ) {
-               printf( "msgsnd failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = msgctl( my_msg_queue_id, IPC_STAT, &my_msq_ds );
-       if ( my_err == -1 ) {
-               printf( "msgctl failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_msq_ds.msg_qnum != 1 ) {
-               printf( "msgctl IPC_STAT failed to get correct number of messages on the queue \n" );
-               goto test_failed_exit;
-       }
-
-       /* pull message off the queue */
-       bzero( (void *)&my_msg, sizeof( my_msg ) );
-       my_result = msgrcv( my_msg_queue_id, &my_msg, sizeof( my_msg.msq_buffer ), 0, 0 );
-       if ( my_result == -1 ) {
-               printf( "msgrcv failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_result != sizeof( my_msg.msq_buffer ) ) {
-               printf( "msgrcv failed to return the correct number of bytes in our buffer \n" );
-               goto test_failed_exit;
-       }
-       if ( strcmp( &my_msg.msq_buffer[ 0 ], "testing 1, 2, 3" ) != 0 ) {
-               printf( "msgrcv failed to get the correct message \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = msgctl( my_msg_queue_id, IPC_STAT, &my_msq_ds );
-       if ( my_err == -1 ) {
-               printf( "msgctl failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_msq_ds.msg_qnum != 0 ) {
-               printf( "msgctl IPC_STAT failed to get correct number of messages on the queue \n" );
-               goto test_failed_exit;
-       }
-
-       /* tear down the message queue */
-       my_err = msgctl( my_msg_queue_id, IPC_RMID, NULL );
-       if ( my_err == -1 ) {
-               printf( "msgctl IPC_RMID failed with errno %d - %s \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_msg_queue_id = -1;
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_msg_queue_id != -1 ) {
-               msgctl( my_msg_queue_id, IPC_RMID, NULL );
-       }
-       return( my_err );
-}
-
-
-
-/*  **************************************************************************************************************
- *     Test execution from data and stack areas.
- *  **************************************************************************************************************
- */
-int data_exec_tests( void * the_argp )
-{
-       int my_err = 0;
-       int arch, bits;
-       posix_spawnattr_t attrp;
-       char *argv[] = { "helpers/data_exec32nonxspawn", NULL };
-
-       int my_pid, my_status, ret;
-       
-       if ((arch = get_architecture()) == -1) {
-               printf("data_exec_test: couldn't determine architecture\n");
-               goto test_failed_exit;
-       }
-
-       bits = get_bits();
-
-       /*
-        * If the machine is 64-bit capable, run both the 32 and 64 bit versions of the test.
-        * Otherwise, just run the 32-bit version.
-        */
-
-       if (arch == INTEL) {
-               if (bits == 64) {
-                       if (system("arch -arch x86_64 helpers/data_exec") != 0) {
-                               printf("data_exec-x86_64 failed\n");
-                               goto test_failed_exit;
-                       }
-               }
-
-               if (system("arch -arch i386 helpers/data_exec") != 0) {
-                       printf("data_exec-i386 failed\n");
-                       goto test_failed_exit;
-               }
-               
-               posix_spawnattr_init(&attrp);
-               posix_spawnattr_setflags(&attrp, _POSIX_SPAWN_ALLOW_DATA_EXEC );
-               ret = posix_spawn(&my_pid, "helpers/data_exec32nonxspawn", NULL, &attrp, argv, NULL);
-               if (ret) {
-                       printf("data_exec-i386 failed in posix_spawn %s\n", strerror(errno));
-                       goto test_failed_exit;
-               }
-               ret = wait4(my_pid, &my_status, 0, NULL);
-               if (ret == -1) {
-                       printf("data_exec-i386 wait4 failed with errno %d - %s\n", errno, strerror(errno));
-                       goto test_failed_exit;
-               }
-               if (WEXITSTATUS(my_status) != 0) {
-                       printf("data_exec-i386 _POSIX_SPAWN_ALLOW_DATA_EXEC failed\n");
-                       goto test_failed_exit;
-               }
-       }
-
-       /* Add new architectures here similar to the above. */
-
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       return my_err;
-}
-
-/*  **************************************************************************************************************
- *     Test KASLR-related functionality
- *  **************************************************************************************************************
- */
-int kaslr_test( void * the_argp )
-{
-       int result = 0;
-       uint64_t slide = 0;
-       size_t size;
-       int slide_enabled;
-
-       size = sizeof(slide_enabled);
-       result = sysctlbyname("kern.slide", &slide_enabled, &size, NULL, 0);
-       if (result != 0) {
-               printf("sysctlbyname(\"kern.slide\") failed with errno %d\n", errno);
-               goto test_failed_exit;
-       }
-
-       /* Test positive case first */
-       size = sizeof(slide);
-       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size);
-       if (result == 0) {
-               /* syscall supported, slide must be non-zero if running latest xnu and KASLR is enabled */
-               if (slide_enabled && (slide == 0)) {
-                       printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size) reported slide of 0x%016llx\n", slide);
-                       goto test_failed_exit;
-               }
-               if (size != sizeof(slide)) {
-                       printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size) reported size of %lu\n", size);
-                       goto test_failed_exit;
-               }
-       } else {
-               /* Only ENOTSUP is allowed. If so, assume all calls will be unsupported */
-               if (errno == ENOTSUP) {
-                       return 0;
-               } else {
-                       printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, &slide, &size) returned unexpected errno (errno %d)\n", errno);
-                       goto test_failed_exit;
-               }
-       }
-       
-       /* Negative cases for expected failures */
-       size = sizeof(slide);
-       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL /* EFAULT */, &size);
-       if ((result == 0) || (errno != EFAULT)) {
-               printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, &size) returned unexpected success or errno (result %d errno %d)\n", result, errno);
-               goto test_failed_exit;
-       }
-
-       size = sizeof(slide) + 1; /* EINVAL */
-       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, &size);
-       if ((result == 0) || (errno != EINVAL)) {
-               printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, &size+1) returned unexpected success or errno (result %d errno %d)\n", result, errno);
-               goto test_failed_exit;
-       }
-
-       result = kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL /* EFAULT */, NULL /* EFAULT */);
-       if ((result == 0) || (errno != EFAULT)) {
-               printf("kas_info(KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR, NULL, NULL) returned unexpected success or errno (result %d errno %d)\n", result, errno);
-               goto test_failed_exit;
-       }
-
-       size = sizeof(slide);
-       result = kas_info(KAS_INFO_MAX_SELECTOR /* EINVAL */, &slide, &size);
-       if ((result == 0) || (errno != EINVAL)) {
-               printf("kas_info(KAS_INFO_MAX_SELECTOR, &slide, &size) returned unexpected success or errno (result %d errno %d)\n", result, errno);
-               goto test_failed_exit;
-       }
-
-       return 0;
-
-test_failed_exit:
-       return -1;
-}
-
-typedef struct attrs {
-         uint32_t          attrs_length;
-         attribute_set_t   attrs_returned;
-         uint32_t          attr_error;
-         attrreference_t   attr_name;
-         fsobj_type_t      attr_obj_type;
-         
-         union {
-               struct {
-                       uint32_t entry_count;
-               } directory;
-               struct {
-                       off_t   size;
-               } file;
-         } attr_obj;
-         
-} attrs_t;
-
-int getattrlistbulk_test( void * the_argp )
-{
-
-       int     error;
-       struct attrlist attr_list;
-       attrs_t *attrsptr;
-       char    *entry_start;
-       int     retcount = 0, totalcount = 0;
-       int     index;
-       char    *nameptr;
-       int     attr_buf_size;
-       char    *attr_buf;
-       int     dirfd = -1;
-       char*   target = "/System/Library/CoreServices";
-
-       memset(&attr_list, 0, sizeof(attr_list));
-       attr_list.bitmapcount = ATTR_BIT_MAP_COUNT;
-       attr_list.commonattr  = ATTR_CMN_RETURNED_ATTRS |
-                       ATTR_CMN_NAME |
-                       ATTR_CMN_OBJTYPE |
-                       ATTR_CMN_ERROR |
-                       ATTR_FILE_TOTALSIZE|
-                       ATTR_DIR_ENTRYCOUNT;
-
-       error = 0;
-       /*allocate a buffer for 10 items*/
-       attr_buf_size = 10 * (sizeof(attrs_t) + FILENAME_MAX );
-       if (vm_allocate((vm_map_t) mach_task_self(),
-               (vm_address_t*)&attr_buf, 
-               attr_buf_size, VM_FLAGS_ANYWHERE) != KERN_SUCCESS) {
-               printf( "vm_allocate failed with error %d - \"%s\" \n", 
-                       errno, strerror( errno) );
-               attr_buf = NULL;
-               error = -1;
-               goto last_exit;
-       }
-       
-       dirfd = openat (AT_FDCWD, target, O_RDONLY, 0);
-       if (dirfd == -1) {
-               printf("openat \"%s\" failed with  error %d - \"%s\" \n", 
-                       target, errno, strerror( errno));
-               error = -1;
-               goto last_exit;
-       } 
-
-       do {
-               retcount = getattrlistbulk(dirfd, 
-                               &attr_list, &attr_buf[0],
-                               attr_buf_size, FSOPT_PACK_INVAL_ATTRS);
-                if (retcount == -1) {
-                       printf("getattrlistbulk on %s returned %d items\n", 
-                               target, totalcount);
-                       printf("getattrlistbulk failed with  error %d - \"%s\" \n", 
-                               errno, strerror( errno));
-                       error = -1;
-                       break;
-                } else if (retcount == 0) {
-                       /* No more entries in directory */
-                       printf("getattrlistbulk succeded: found %d entries in %s\n", totalcount, target);
-                       error = 0;
-                       break;
-                } else {
-                       totalcount += retcount;    
-                       entry_start = &attr_buf[0];
-                       for (index = 0; index < retcount; index++) {
-                               /*set attrsptr to item record buffer*/
-                               attrsptr = (attrs_t *)entry_start;
-
-                               /*
-                                *calculate starting point for next item in bulk
-                                *list
-                                */
-                               entry_start += attrsptr->attrs_length;
-
-                               if ((attrsptr->attrs_returned.commonattr & ATTR_CMN_ERROR) &&
-                                    attrsptr->attr_error) {
-                                       nameptr = (char*)(&(attrsptr->attr_name)) + attrsptr->attr_name.attr_dataoffset;
-                                       printf("getattrlistbulk item \"%s\" ATTR_CMN_ERROR %d \"%s\"\n",
-                                               nameptr, attrsptr->attr_error, 
-                                               strerror(attrsptr->attr_error));
-                               }
-                       }
-               } 
-       } while (1);
-       
-last_exit:
-       if (dirfd != -1) {
-               (void)close(dirfd);
-       }
-       
-       if (attr_buf != NULL) {
-               vm_deallocate(
-                       mach_task_self(), (vm_address_t)attr_buf, attr_buf_size);       
-       }
-
-       return error;
-}
-
-#define INVALID_FD -173
-static int create_random_name_at(int the_dirfd, char *the_dirpathp, 
-                       char *the_namep, size_t the_namep_len, 
-                       char *the_pathp, size_t the_pathp_len, 
-                       int do_create );
-/*
- * create_random_name_at - creates a file with a random / unique name in the given directory.
- * when do_create is true we create a file else we generaate a name that does not exist in the
- * given directory (we do not create anything when do_open is 0).
- * A name is generated relative to the directory fd. If both a directory path and
- * and a buffer to hold the full pathname are provided, an abolute pathname is also returned.
- * An absolute pathname for the generated filename is returned in my_pathp.
- * WARNING - caller provides enough space in the_namep buffer for longest possible name (NAME_MAX).
- * WARNING - caller provides enough space in the_pathp buffer for longest possible path (PATH_MAX).
- * RAND_MAX is currently 2147483647 (ten characters plus one for a slash)
- */
-int create_random_name_at(int the_dirfd, char *the_dirpathp, 
-                       char *the_namep, size_t the_namep_len, 
-                       char *the_pathp, size_t the_pathp_len, 
-                       int do_create )
-{
-       int             i, my_err;
-       int             my_fd = -1;
-
-       for ( i = 0; i < 1; i++ ) {
-               int             my_rand;
-               char            *myp;
-               char            my_name[32];
-
-               my_rand = rand( );
-               sprintf( &my_name[0], "%d", my_rand );
-               if ( (strlen( &my_name[0] ) + strlen( the_dirpathp ) + 2) > PATH_MAX ) {
-                       printf( "%s - path to test file greater than PATH_MAX \n", __FUNCTION__ );
-                       return( -1 );
-               }
-
-               // generate name and absolute path
-               myp = the_namep;
-               *(myp) = (char)0x00;
-               strlcat(the_namep, &my_name[0], the_namep_len);
-
-               /*
-                *If the caller has passed in a path pointer and directory path
-                *it means an absolute path is to be returned as well.
-                */
-               if (the_pathp && the_dirpathp) {
-                       *the_pathp = (char)0x00;
-                       strlcat(the_pathp, the_dirpathp, the_pathp_len);
-                       strlcat(the_pathp, "/", the_pathp_len);
-                       strlcat(the_pathp, the_namep, the_pathp_len);
-               }
-
-               if (do_create) {
-                       /* create a file with this name */
-                       my_fd = openat( the_dirfd, the_namep, (O_RDWR | O_CREAT | O_EXCL),
-                                    (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) );
-                       if ( my_fd == -1 ) {
-                               if ( errno != EEXIST ) {
-                                       printf( "%s - open failed with errno %d - %s \n",
-                                              __FUNCTION__, errno, strerror( errno ) );
-                                       return( -1 );
-                               }
-                               // name already exists, try another
-                               i--;
-                               continue;
-                       }
-               }
-
-               else {
-                       /* make sure the name is unique */
-                       struct stat             my_sb;
-                       my_err = fstatat( the_dirfd, the_namep, &my_sb, 0 );
-                       if ( my_err != 0 ) {
-                               if ( errno == ENOENT ) {
-                                       break;
-                               }
-                               else {
-                                       printf( "%s - open failed with errno %d - %s \n",
-                                              __FUNCTION__, errno, strerror( errno ) );
-                                       return( -1 );
-                               }
-                       }
-                       /* name already exists, try another */
-                       i--;
-                       continue;
-               }
-       }
-
-       if ( my_fd != -1 )
-               close( my_fd );
-
-       return( 0 );
-
-} /* create_random_name_at */
-
-/*  **************************************************************************************************************
- *     Test close, fpathconf, fstat, open, pathconf system calls.
- *  **************************************************************************************************************
- */
-int openat_close_test( void * the_argp )
-{
-       int             my_err;
-       int             my_dirfd = -1;
-       int             my_fd = -1;
-       int             error_fd = -1;
-       char *          my_dirpathp = NULL;
-       char *          my_namep = NULL;
-       char *          my_pathp = NULL;
-       ssize_t         my_result;
-       long            my_pconf_result;
-       struct stat     my_sb;
-       char            my_buffer[32];
-       kern_return_t           my_kr;
-
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_dirpathp = 0x00;
-       strlcat( my_dirpathp, &g_target_path[0], PATH_MAX );
-
-       my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_namep = 0x00;
-       if (my_pathp) {
-               *my_pathp = 0x00;
-       }
-
-       /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */
-       if (*my_dirpathp == '/') {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-       }
-
-       /*
-        * Some basic openat validation. If pathname is absolute, invalid fd should
-        * not matter.
-        */
-
-       if (*my_dirpathp == '/') {
-               my_dirfd = openat( INVALID_FD, my_dirpathp, O_RDONLY, 0 );
-               if ( my_dirfd == -1 ) {
-                       printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-                       printf( "\t Was Absolute pathname, invalid fd, %d, provided as input \n", INVALID_FD);
-                       goto test_failed_exit;
-               }
-               close(my_dirfd);
-
-       }
-
-       my_dirfd = openat( AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       /* create a test file */
-       my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       /*
-        * If pathname is not absolute, an openat relative to a invalid directory fd
-        * should not work.
-        */
-       if (my_pathp) {
-               /*  test O_WRONLY case */
-               my_fd = openat( INVALID_FD, my_namep, O_WRONLY, 0 );
-               if ( my_fd != -1 ) {
-                       printf( "openat call relative to invalid dir fd worked\n");
-                       printf( "\t file we attempted to open -> \"%s\" relative to fd -173\n", my_pathp );
-                       goto test_failed_exit;
-               }
-       }
-
-       /*  test O_WRONLY case */
-       my_fd = openat( my_dirfd, my_namep, O_WRONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-        /*
-        * try to open relative to non-directory fd.
-        * It should fail with ENOTDIR.
-        */
-       if ((error_fd = openat(my_fd, my_namep, O_WRONLY, 0)) != -1) {
-               printf( "openat call succeded with fd being a non-directory fd\n");
-               printf( "\t file we attempted to open (reltive to itself)-> \"%s\" \n", my_pathp );
-               close(error_fd);
-               goto test_failed_exit;
-       } else if (errno != ENOTDIR) {
-               printf( "openat call should have failed with errno 20 (ENOTDIR).  actually failed with %d - \"%s\" \n", my_err, strerror( my_err) );
-       }
-
-       my_pconf_result = fpathconf( my_fd, _PC_NAME_MAX );
-       if ( my_pconf_result == -1 ) {
-               printf( "fpathconf - _PC_PATH_MAX - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       //      printf( "_PC_NAME_MAX %ld \n", my_pconf_result );
-       /* results look OK? */
-       if ( my_pconf_result < 6 ) {
-               printf( "fpathconf - _PC_NAME_MAX - looks like wrong results \n" );
-               goto test_failed_exit;
-       }
-
-       /* write some data then try to read it */
-       my_result = write( my_fd, "kat", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-
-       /* Try to read - this should fail since we opened file with O_WRONLY */
-       my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       my_err = errno;
-       if ( my_result != -1 ) {
-               printf( "read call should have failed with errno 9 (EBADF) \n" );
-               goto test_failed_exit;
-       }
-       else if ( my_err != EBADF ) {
-               printf( "read call should have failed with errno 9 (EBADF).  actually failed with %d - \"%s\" \n", my_err, strerror( my_err) );
-               goto test_failed_exit;
-       }
-
-       close( my_fd );
-
-       /*  test O_TRUNC and O_APPEND case */
-       my_fd = openat( my_dirfd, my_namep, (O_RDWR | O_TRUNC | O_APPEND), 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-       my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_result == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != 0 ) {
-               printf( "read failed - should have read 0 bytes. \n" );
-               goto test_failed_exit;
-       }
-
-       my_result = write( my_fd, "kat", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-
-       /* add some more data to the test file - this should be appended */
-       lseek( my_fd, 0, SEEK_SET );
-       my_result = write( my_fd, "zzz", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-
-       /* now verify the writes */
-       bzero( (void *)&my_buffer[0], sizeof(my_buffer) );
-       lseek( my_fd, 0, SEEK_SET );
-       my_result = read( my_fd, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_result == -1 ) {
-               printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_buffer[0] != 'k' || my_buffer[5] != 'z' ) {
-               printf( "read failed to get correct data \n" );
-               goto test_failed_exit;
-       }
-
-       /*
-        * try to stat relative to non-directory fd.
-        * It should fail with ENOTDIR.
-        */
-       if ((fstatat( my_fd, my_namep, &my_sb, 0 )) != -1) {
-               printf( "fstatat call succeded with fd being a non-directory fd\n");
-               printf( "\t file we attempted to stat (relative to itself)-> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       } else if (errno != ENOTDIR) {
-               printf( "fstatat call should have failed with errno 20 (ENOTDIR).  actually failed with %d - \"%s\" \n", my_err, strerror( my_err) );
-       }
-
-       /* test fstatat */
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err == -1 ) {
-               printf( "fstatat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_size != 6 ) {
-               printf( "fstatat call failed - st_size is wrong \n" );
-               goto test_failed_exit;
-       }
-       if ( !S_ISREG( my_sb.st_mode ) ) {
-               printf( "fstatat call failed - st_mode does not indicate regular file \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-
-       if ( my_pathp != NULL ) {
-               remove(my_pathp);
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-
-       if ( my_namep ) {
-               unlinkat( my_dirfd, my_pathp, 0 );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_namep, NAME_MAX);
-       }
-
-       if ( my_dirfd != -1)
-               close(my_dirfd);
-
-       if ( my_dirpathp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX);
-       }
-
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test linkat, fstatat and unlinkat system calls.
- *  **************************************************************************************************************
- */
-int linkat_fstatat_unlinkat_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_dirfd = -1;
-       int                     my_fd = -1;
-       char *                  my_dirpathp = NULL;
-       char *                  my_namep = NULL;
-       char *                  my_pathp = NULL;
-       char *                  my_name2p = NULL;
-       nlink_t                 my_link_count;
-       ssize_t                 my_result;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_dirpathp = 0x00;
-       strlcat( my_dirpathp, &g_target_path[0], PATH_MAX );
-
-       my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_namep = 0x00;
-       if (my_pathp) {
-               *my_pathp = 0x00;
-       }
-
-       /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */
-       if (*my_dirpathp == '/') {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-       }
-
-       /* create a test file */
-       my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_name2p, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_name2p = 0x00;
-
-       /* now create a name for the link file */
-       strlcat( my_name2p, my_namep, NAME_MAX );
-       strlcat( my_name2p, "link", NAME_MAX );
-
-       /* get the current link count */
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_link_count = my_sb.st_nlink;
-
-       /* Double check with absolute path name */
-       if (my_pathp) {
-               my_err = fstatat(INVALID_FD, my_pathp, &my_sb, 0 );
-               if ( my_err != 0 ) {
-                       printf( "fstatat with INVALID_FD and absolute pathname failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-               if (my_link_count != my_sb.st_nlink) {
-                       printf( "fstatat call did not return correct number of links" );
-                       goto test_failed_exit;
-               }
-       }
-
-       /* check file size (should be 0) */
-       if ( my_sb.st_size != 0 ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "st_size is not 0 \n" );
-               goto test_failed_exit;
-       }
-
-       /* change file size */
-       my_fd = openat(my_dirfd, my_namep, O_RDWR, 0 );
-       if ( my_fd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-
-       my_result = write( my_fd, "kat", 3 );
-       my_err = errno;
-       if ( my_result != 3 ) {
-               if ( sizeof( ssize_t ) > sizeof( int ) ) {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %ld \n", (long int) my_result );
-               }
-               else {
-                       printf( "write failed.  should have written 3 bytes actually wrote -  %d \n", (int) my_result );
-               }
-               goto test_failed_exit;
-       }
-       close( my_fd );
-       my_fd = -1;
-
-       /* now link another file to our test file and recheck link count */
-       /* N.B. - HFS only supports AT_SYMLINK_FOLLOW */
-       my_err = linkat( my_dirfd, my_namep, my_dirfd, my_name2p, AT_SYMLINK_FOLLOW );
-       if ( my_err != 0 ) {
-               printf( "linkat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_err = fstatat( my_dirfd, my_pathp, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "fstatat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( (my_link_count + 1) != my_sb.st_nlink ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "incorrect st_nlink \n" );
-               goto test_failed_exit;
-       }
-
-       /* check file size (should be 3) */
-       if ( my_sb.st_size != 3 ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "st_size is not 3 \n" );
-               goto test_failed_exit;
-       }
-
-       /* now make sure unlink works OK */
-       my_err = unlinkat( my_dirfd, my_name2p, 0 );
-       if ( my_err != 0 ) {
-               printf( "unlinkat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_link_count != my_sb.st_nlink ) {
-               printf( "stat structure looks bogus for test file \"%s\" \n", my_pathp );
-               printf( "incorrect st_nlink \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-
-       if ( my_name2p != NULL ) {
-               (void)unlinkat( my_dirfd, my_name2p, 0 );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_name2p, NAME_MAX);
-       }
-
-       if ( my_namep != NULL ) {
-               (void)unlinkat( my_dirfd, my_name2p, 0 );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_name2p, NAME_MAX);
-       }
-
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-
-       if ( my_dirpathp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX);
-       }
-
-       if ( my_dirfd != -1 )
-               close( my_dirfd );
-       
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test faccessat, fchmodat and fchmod system calls.
- *  **************************************************************************************************************
- */
-int faccessat_fchmodat_fchmod_test( void * the_argp )
-{
-       int             error_occurred;
-       int             is_absolute_path = 0;
-       int             my_err;
-       int             my_dirfd = -1;
-       int             my_fd = -1;
-
-       char *          my_dirpathp = NULL;
-       char *          my_namep = NULL;
-       char *          my_pathp = NULL;
-
-       uid_t           ruid;
-       struct stat     my_sb;
-
-       FILE *          file_handle;
-
-       kern_return_t   my_kr;
-
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_dirpathp = 0x00;
-       strlcat( my_dirpathp, &g_target_path[0], PATH_MAX );
-
-       /*
-        * Some basic openat validation. If pathname is absolute, an invalid fd should
-        * not matter.
-        */
-
-       if (*my_dirpathp == '/') {
-               is_absolute_path = 1;
-               my_dirfd = openat(INVALID_FD, my_dirpathp, O_RDONLY, 0 );
-               if ( my_dirfd == -1 ) {
-                       printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-                       printf( "\t Was Absolute pathname, invalid fd, %d, provided as input \n", INVALID_FD);
-                       goto test_failed_exit;
-               }
-               close( my_dirfd );
-       }
-
-       my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_namep = 0x00;
-
-       if (is_absolute_path) {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               *my_pathp = 0x00;
-       }
-
-       /* create a test file */
-       my_err = create_random_name_at(my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1);
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       /* test chmod */
-       my_err = fchmodat(my_dirfd, my_namep, S_IRWXU, 0);
-       if ( my_err == -1 ) {
-               printf( "chmod call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = fchmodat( my_dirfd, my_namep, (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP), 0 );
-       if ( my_err == -1 ) {
-               printf( "chmod call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* test access - this should fail */
-       my_err = faccessat( my_dirfd, my_namep, (X_OK), 0 );
-       if ( my_err == 0 ) {
-               printf( "access call should have failed, but did not. \n" );
-               goto test_failed_exit;
-       }
-       else if ( my_err == -1  ) {
-               int tmp = 0;
-               tmp = getuid( );
-
-               /* special case when running as root - we get back EPERM when running as root */
-               my_err = errno;
-               if ( ( tmp == 0 && my_err != EPERM) || (tmp != 0 && my_err != EACCES) ) {
-                       printf( "access failed with errno %d - %s. \n", my_err, strerror( my_err ) );
-                       goto test_failed_exit;
-               }
-       }
-
-       /* verify correct modes are set */
-       /* First check that Absolute path works even with an invalid FD */
-       if (is_absolute_path) {
-               my_err = fstatat( INVALID_FD, my_pathp, &my_sb, 0 );
-               if ( my_err != 0 ) {
-                       printf( "fstatat call failed with an absolute pathname.  got errno %d - %s. \n", errno, strerror( errno ) );
-                       goto test_failed_exit;
-               }
-       }
-
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       if ( (my_sb.st_mode & (S_IRWXO | S_IXGRP)) != 0 ||
-           (my_sb.st_mode & (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) == 0 ) {
-               printf( "chmod call appears to have failed.  stat shows incorrect values in st_mode! \n" );
-               goto test_failed_exit;
-       }
-
-
-       /*  another test for the access system call  -- refer ro radar# 6725311 */
-
-
-       /*
-        * This test makes sure that the access system call does not give the current user extra
-        * permissions on files the current user does not own. From radar #6725311, this could
-        * happen when the current user calls access() on a file owned by the current user in
-        * the same directory as the other files not owned by the current user.
-        *
-        * Note: This test expects that the effective uid (euid) is set to root.
-        *
-        */
-
-       /* Create a file that root owns  */
-       file_handle = fopen(FILE_NOTME, "w");
-       fclose(file_handle);
-
-       /* Currently running as root (through settid manipulation), switch to running as the current user. */
-       ruid = getuid();
-       my_err = syscall(SYS_settid, ruid, KAUTH_GID_NONE);
-       if (my_err != 0) {
-               printf("Failed to settid to non-root with error %d:%s\n", errno, strerror(errno));
-               goto test_failed_exit;
-       }
-
-       /* Create a file that the current user owns  */
-       file_handle = fopen(FILE_ME, "w");
-       fclose(file_handle);
-
-       error_occurred = 0;
-
-       /* Try to remove the file owned by root (this should fail). */
-       my_err = unlinkat( AT_FDCWD, FILE_NOTME, 0 );
-
-       if (my_err < 0) {
-               my_err = errno;
-       }
-
-       if (my_err == 0) {
-               printf("Unresolved: First attempt deleted '" FILE_NOTME "'! \n");
-               error_occurred = 1;
-       } else {
-               printf("Passed: First attempt to delete '" FILE_NOTME "'  failed with error %d - %s.\n", my_err, strerror( my_err ));
-
-               /* Set _DELETE_OK on a file that the current user owns */
-               faccessat(AT_FDCWD, FILE_ME, _DELETE_OK, 0 );
-
-               /* Try to remove the file owned by root again (should give us: EPERM [13]) */
-               my_err = unlinkat(AT_FDCWD, FILE_NOTME, 0);
-
-               if (my_err < 0) {
-                       my_err = errno;
-               }
-
-               if (my_err == 0) {
-                       printf("Failed: Second attempt deleted '" FILE_NOTME "'!\n");
-                       error_occurred = 1;
-               } else if (my_err == 13) {
-                       printf("Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-               } else {
-                       printf("Failed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
-                       error_occurred = 1;
-               }
-       }
-
-       /* Reset to running as root */
-       my_err = syscall(SYS_settid, KAUTH_UID_NONE, KAUTH_GID_NONE);
-       if (my_err != 0) {
-               printf("Failed to settid revert to root with error %d:%s\n", errno, strerror(errno));
-               goto test_failed_exit;
-       }
-
-       if(error_occurred == 1) {
-               goto test_failed_exit;
-       }
-
-
-       /* end of test*/
-
-
-       /* test fchmod */
-       my_fd = openat( my_dirfd, my_namep, O_RDONLY, 0);
-       if ( my_fd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] );
-               goto test_failed_exit;
-       }
-
-       my_err = fchmod( my_fd, S_IRWXU );
-       if ( my_err == -1 ) {
-               printf( "fchmod call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = fstatat( INVALID_FD, my_pathp, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* verify correct modes are set */
-       if ( (my_sb.st_mode & (S_IRWXG | S_IRWXO)) != 0 ||
-           (my_sb.st_mode & (S_IRWXU)) == 0 ) {
-               printf( "fchmod call appears to have failed.  stat shows incorrect values in st_mode! \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-       if ( my_namep != NULL ) {
-               unlinkat(my_dirfd, my_namep, 0);
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, NAME_MAX);
-
-       }
-
-       if ( my_dirfd != -1)
-               close( my_dirfd);
-
-       if ( my_dirpathp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-       
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test fchownat, fchown, readlinkat, symlinkat system calls.
- *  **************************************************************************************************************
- */
-int fchownat_fchown_symlinkat_test( void * the_argp )
-{
-       int                     my_err, my_group_count, i;
-       int                     my_fd = -1;
-       int                     my_dirfd = -1;
-       char *                  my_dirpathp = NULL;
-       char *                  my_namep = NULL;
-       char *                  my_link_namep = NULL;
-       char *                  my_pathp = NULL;
-       char *                  my_link_pathp = NULL;
-       int                     is_absolute_path = 0;
-       uid_t                   my_orig_uid;
-       gid_t                   my_orig_gid, my_new_gid1 = 0, my_new_gid2 = 0;
-       ssize_t                 my_result;
-       struct stat             my_sb;
-       gid_t                   my_groups[ NGROUPS_MAX ];
-       char                    my_buffer[ 64 ];
-       kern_return_t           my_kr;
-
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_dirpathp = 0x00;
-       strlcat( my_dirpathp, &g_target_path[0], PATH_MAX );
-
-       /*
-        * Some basic openat validation. If pathname is absolute, an invalid fd should
-        * not matter.
-        */
-       if (*my_dirpathp == '/') {
-               is_absolute_path = 1;
-               my_dirfd = openat(INVALID_FD, my_dirpathp, O_RDONLY, 0 );
-               if ( my_dirfd == -1 ) {
-                       printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-                       printf( "\t Was Absolute pathname, invalid fd, %d, provided as input \n", INVALID_FD);
-                       goto test_failed_exit;
-               }
-               close( my_dirfd );
-       }
-
-       my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_namep = 0x00;
-
-       if (is_absolute_path) {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               *my_pathp = 0x00;
-       }
-
-       /* create a test file */
-       my_err = create_random_name_at(my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1);
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_link_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_link_namep = 0x00;
-
-       if (is_absolute_path) {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_link_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-
-               *my_link_pathp = 0x00;
-       }
-
-       /* get a name for the link (to create the symlink later) */
-       my_err = create_random_name_at(my_dirfd, my_dirpathp, my_link_namep, NAME_MAX, my_link_pathp, PATH_MAX, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       if ( !_prime_groups() ) {
-               goto test_failed_exit;
-       }
-
-       /* set up by getting a list of groups */
-       my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] );
-
-       if ( my_group_count == -1 || my_group_count < 1 ) {
-               printf( "getgroups call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* now change group owner to something other than current value */
-       my_orig_gid = my_sb.st_gid;
-       my_orig_uid = my_sb.st_uid;
-
-       for ( i = 0; i < my_group_count; i++ ) {
-               if ( my_orig_gid != my_groups[ i ] ) {
-                       if ( my_new_gid1 == 0 ) {
-                               my_new_gid1 = my_groups[ i ];
-                       }
-                       else if( my_new_gid1 != my_groups[ i ] ) {
-                               my_new_gid2 = my_groups[ i ];
-                               break;
-                       }
-               }
-       }
-       if ( i >= my_group_count ) {
-               printf( "not enough groups to choose from.  st_gid is the same as current groups! \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = fchownat( my_dirfd, my_namep, my_orig_uid, my_new_gid1, 0 );
-       if ( my_err != 0 ) {
-               printf( "chown call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the group owner was changed */
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_gid == my_orig_gid ) {
-               printf( "chown call failed.  st_gid is not correct! \n" );
-               goto test_failed_exit;
-       }
-
-       /* change group owner back using fchown */
-       if (is_absolute_path) {
-               my_fd = openat( INVALID_FD, my_pathp, O_RDWR, 0 );
-       } else {
-               my_fd = openat( my_dirfd, my_namep, O_RDWR, 0 );
-       }
-
-       if ( my_fd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t we attempted to open -> \"%s\" \n", &g_target_path[0] );
-               goto test_failed_exit;
-       }
-
-       my_err = fchown( my_fd, my_orig_uid, my_new_gid2 );
-       if ( my_err != 0 ) {
-               printf( "fchown call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the group owner was changed back to the original value */
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "fstatat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_gid == my_new_gid1 ) {
-               printf( "fchown call failed.  st_gid is not correct! \n" );
-               goto test_failed_exit;
-       }
-
-       /* create a link file and test fstatat(..., AT_SYMLINK_NOFOLLOW) */
-       my_err = symlinkat( my_namep, my_dirfd, my_link_namep );
-       if ( my_err != 0 ) {
-               printf( "symlinkat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       my_err = fstatat( my_dirfd, my_link_namep, &my_sb, AT_SYMLINK_NOFOLLOW );
-       if ( my_err != 0 ) {
-               printf( "fstatat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* now change group owner to something other than current value */
-       my_orig_gid = my_sb.st_gid;
-       my_orig_uid = my_sb.st_uid;
-       my_err = fchownat( my_dirfd, my_link_namep, my_orig_uid, my_new_gid1, AT_SYMLINK_NOFOLLOW );
-       if ( my_err != 0 ) {
-               printf( "fchownat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-
-       /* make sure the group owner was changed to new value */
-       my_err = fstatat( my_dirfd, my_link_namep, &my_sb, AT_SYMLINK_NOFOLLOW );
-       if ( my_err != 0 ) {
-               printf( "fstatat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       if ( my_sb.st_gid == my_new_gid2 ) {
-               printf( "fchownat call failed.  st_gid is not correct! \n" );
-               goto test_failed_exit;
-       }
-
-       /* make sure we can read the symlink file */
-       my_result = readlinkat( my_dirfd, my_link_namep, &my_buffer[0], sizeof(my_buffer) );
-       if ( my_result == -1 ) {
-               printf( "readlinkat call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
-               goto test_failed_exit;
-       }
-       /* make sure we read some data */
-       if ( my_result < 1 ) {
-               printf( "readlinkat failed to read any data. \n" );
-               goto test_failed_exit;
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if  ( my_namep ) {
-               unlinkat( my_dirfd, my_namep, 0);
-               vm_deallocate(mach_task_self(), (vm_address_t)my_namep, NAME_MAX);
-       }
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-       if  ( my_link_namep ) {
-               unlinkat( my_dirfd, my_link_namep, 0);
-               vm_deallocate(mach_task_self(), (vm_address_t)my_link_namep, NAME_MAX);
-       }
-       if ( my_link_pathp != NULL ) {
-               unlink( my_link_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_link_pathp, PATH_MAX);
-       }
-       if ( my_dirfd != -1 )
-               close(my_dirfd);
-
-       if ( my_dirpathp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX);
-       }
-
-
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test mkdirat, unlinkat, umask system calls.
- *  **************************************************************************************************************
- */
-int mkdirat_unlinkat_umask_test( void * the_argp )
-{
-       int                             my_err;
-       int                             my_dirfd = -1;
-       int                             my_fd = -1;
-       int                             did_umask = 0;
-       char *                          my_dirpathp = NULL;
-       char *                          my_namep = NULL;
-       char *                          my_pathp = NULL;
-       mode_t                          my_orig_mask;
-       struct stat                     my_sb;
-       kern_return_t                   my_kr;
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_dirpathp = 0x00;
-       strlcat( my_dirpathp, &g_target_path[0], PATH_MAX );
-
-       my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */
-       if (*my_dirpathp == '/') {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_namep = 0x00;
-       if (my_pathp) {
-               *my_pathp = 0x00;
-       }
-
-       /* get a random name to use with mkdirat (don't create) */
-       my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       /* set umask to clear WX for other and group and clear X for user */
-       my_orig_mask = umask( (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH) );
-       did_umask = 1;
-
-       /* create a directory with RWX for user, group, other (which should be limited by umask) */
-       my_err = mkdirat( my_dirfd, my_namep, (S_IRWXU | S_IRWXG | S_IRWXO) );
-       if ( my_err == -1 ) {
-               printf( "mkdirat failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* verify results - (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH) should be clear*/
-       my_err = fstatat( my_dirfd, my_pathp, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "fstat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( (my_sb.st_mode & (S_IXUSR | S_IWGRP | S_IXGRP | S_IWOTH | S_IXOTH)) != 0 ) {
-               printf( "umask did not limit modes as it should have \n" );
-               goto test_failed_exit;
-       }
-
-       /* get rid of our test directory */
-       my_err = unlinkat( my_dirfd, my_namep, AT_REMOVEDIR );
-       if ( my_err == -1 ) {
-               printf( "unlinkat(..., AT_REMOVEDIR)  failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-
-       if  ( my_namep ) {
-               unlinkat( my_dirfd, my_namep, AT_REMOVEDIR );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_namep, NAME_MAX);
-       }
-
-       if ( my_pathp != NULL ) {
-               rmdir( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-
-       if ( my_dirfd != -1 )
-               close(my_dirfd);
-
-       if ( my_dirpathp != NULL ) {
-               vm_deallocate(mach_task_self(), (vm_address_t)my_dirpathp, PATH_MAX);
-       }
-
-       if ( did_umask != 0 ) {
-               umask( my_orig_mask );
-       }
-
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test renameat, fstatat system calls.
- *  **************************************************************************************************************
- */
-int renameat_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_dirfd = -1;
-       char *                  my_dirpathp = NULL;
-       char *                  my_namep = NULL;
-       char *                  my_pathp = NULL;
-       char *                  my_new_namep = NULL;
-       char *                  my_new_pathp = NULL;
-       ino_t                   my_file_id;
-       struct stat             my_sb;
-       kern_return_t           my_kr;
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_dirpathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_dirpathp = 0x00;
-       strlcat( my_dirpathp, &g_target_path[0], PATH_MAX );
-
-       my_dirfd = openat(AT_FDCWD, my_dirpathp, O_RDONLY, 0 );
-       if ( my_dirfd == -1 ) {
-               printf( "openat call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t Directory we attempted to open -> \"%s\" \n", my_dirpathp );
-               goto test_failed_exit;
-       }
-
-       /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */
-       if (*my_dirpathp == '/') {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_namep = 0x00;
-       if (my_pathp) {
-               *my_pathp = 0x00;
-       }
-
-       /* create random file */
-       my_err = create_random_name_at( my_dirfd, my_dirpathp, my_namep, NAME_MAX, my_pathp, PATH_MAX, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-
-       /* If dirpath is absolute, we can ask for an absolute path name to file back from create_random_name_at */
-       if (*my_dirpathp == '/') {
-               my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-               if(my_kr != KERN_SUCCESS){
-                       printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-       }
-
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_namep, NAME_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
-       *my_new_namep = 0x00;
-       if (my_new_pathp) {
-               *my_new_pathp = 0x00;
-       }
-
-       /* create random file */
-       my_err = create_random_name_at( my_dirfd, my_dirpathp, my_new_namep, NAME_MAX, my_new_pathp, PATH_MAX, 0 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-
-       /* save file ID for later use */
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "fstatat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       my_file_id = my_sb.st_ino;
-
-       /* test rename */
-       my_err = renameat( my_dirfd, my_namep, my_dirfd, my_new_namep );
-       if ( my_err == -1 ) {
-               printf( "rename - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-
-       /* make sure old name is no longer there */
-       my_err = fstatat( my_dirfd, my_namep, &my_sb, 0 );
-       if ( my_err == 0 ) {
-               printf( "renameat call failed - found old name \n" );
-               goto test_failed_exit;
-       }
-
-       /* make sure new name is there and is correct file id */
-       my_err = fstatat( my_dirfd, my_new_namep, &my_sb, 0 );
-       if ( my_err != 0 ) {
-               printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_file_id != my_sb.st_ino ) {
-               printf( "rename failed - wrong file id \n" );
-               goto test_failed_exit;
-       }
-
-       /* cross check with absolute path and invalid fd */
-       if (my_new_pathp) {
-               my_err = fstatat( INVALID_FD, my_new_pathp, &my_sb, 0 );
-               if ( my_err != 0 ) {
-                       printf( "stat - failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                       goto test_failed_exit;
-               }
-               if ( my_file_id != my_sb.st_ino ) {
-                       printf( "rename failed - wrong file id \n" );
-                       goto test_failed_exit;
-               }
-       }
-
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-
-test_passed_exit:
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);
-       }
-       if ( my_new_pathp != NULL ) {
-               remove( my_new_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_new_pathp, PATH_MAX);
-       }
-       return( my_err );
-}
-
-/*  **************************************************************************************************************
- *     Test task_set_exception_ports, host_set_exception_ports
- *  **************************************************************************************************************
- */
-static int __get_except_port(int which, mach_port_t *portp,
-                            exception_behavior_t *behaviorp,
-                            thread_state_flavor_t *flavorp)
-{
-        exception_mask_t masks[EXC_TYPES_COUNT];
-        mach_msg_type_number_t nmasks = 0;
-        exception_port_t ports[EXC_TYPES_COUNT];
-        exception_behavior_t behaviors[EXC_TYPES_COUNT];
-        thread_state_flavor_t flavors[EXC_TYPES_COUNT];
-
-       *portp = MACH_PORT_NULL;
-       *behaviorp = 0;
-       *flavorp = 0;
-
-        kern_return_t kr = KERN_FAILURE;
-        if (which == 0) { /* host port */
-                kr = host_get_exception_ports(mach_host_self(), EXC_MASK_BAD_ACCESS,
-                                masks, &nmasks, ports, behaviors, flavors);
-       } else if (which == 1) { /* task port */
-                kr = task_get_exception_ports(mach_task_self(), EXC_MASK_BAD_ACCESS,
-                                masks, &nmasks, ports, behaviors, flavors);
-        } else if (which == 2) { /* thread_port */
-                kr = thread_get_exception_ports(mach_thread_self(), EXC_MASK_BAD_ACCESS,
-                                masks, &nmasks, ports, behaviors, flavors);
-        } else {
-               printf("ERROR: invalid 'which' in %s\n", __func__);
-               return -1;
-       }
-        if (kr != KERN_SUCCESS) {
-               printf("ERROR getting %s exception port!\n", which == 0 ? "task" : "host");
-               return -1;
-       }
-        *portp = ports[0];
-       *behaviorp = behaviors[0];
-       *flavorp = flavors[0];
-
-       return 0;
-}
-
-int set_exception_ports_test( void * the_argp )
-{
-       int           testFlavor = -900000;
-       kern_return_t ret;
-       mach_port_t   exception_port;
-
-       mach_port_t           old_except_port;
-       exception_behavior_t  old_behavior;
-       thread_state_flavor_t old_flavor;
-
-
-       ret = mach_port_allocate( mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &exception_port );
-       if (ret != KERN_SUCCESS) {
-               printf("ERROR allocating new exception port?!\n");
-               return -1;
-       }
-       ret = mach_port_insert_right( mach_task_self(), exception_port, exception_port, MACH_MSG_TYPE_MAKE_SEND );
-       if (ret != KERN_SUCCESS) {
-               printf("ERROR inserting send right into new exception port?!\n");
-               goto test_failed_exit;
-       }
-
-       if (__get_except_port(2, &old_except_port, &old_behavior, &old_flavor) < 0)
-               goto test_failed_exit;
-
-       ret = thread_set_exception_ports( mach_thread_self(),
-                                         EXC_MASK_BAD_ACCESS,
-                                         exception_port,
-                                         EXCEPTION_STATE_IDENTITY,
-                                         testFlavor );
-       /*
-        * this test _fails_ if we successfully set the exception port
-        * with an invalid thread flavor
-        */
-       if (ret == KERN_SUCCESS) {
-               thread_set_exception_ports( mach_thread_self(),
-                                           EXC_MASK_BAD_ACCESS,
-                                           old_except_port, old_behavior, old_flavor );
-               printf("thread_set_exception_ports failed: expected !KERN_SUCCESS for flavor %d\n", testFlavor);
-               goto test_failed_exit;
-       }
-
-       /*
-        * so far, so good: the thread_set_exception_ports call failed,
-        * so we don't need to reset anything, but we do need to
-        * drop our reference to the old exception port we grabbed.
-        */
-       mach_port_deallocate( mach_task_self(), old_except_port );
-
-       if (__get_except_port(1, &old_except_port, &old_behavior, &old_flavor) < 0)
-               goto test_failed_exit;
-
-       ret = task_set_exception_ports( mach_task_self(),
-                                       EXC_MASK_BAD_ACCESS,
-                                       exception_port,
-                                       EXCEPTION_STATE_IDENTITY,
-                                       testFlavor );
-       /*
-        * this test _fails_ if we successfully set the exception port
-        * with an invalid thread flavor
-        */
-       if (ret == KERN_SUCCESS) {
-               task_set_exception_ports( mach_task_self(),
-                                         EXC_MASK_BAD_ACCESS,
-                                         old_except_port, old_behavior, old_flavor );
-               printf("task_set_exception_ports failed: expected !KERN_SUCCESS for flavor %d\n", testFlavor);
-               goto test_failed_exit;
-       }
-
-       /*
-        * so far, so good: the task_set_exception_ports call failed,
-        * so we don't need to reset anything, but we do need to
-        * drop our reference to the old exception port we grabbed.
-        */
-       mach_port_deallocate( mach_task_self(), old_except_port );
-
-       /*
-        * Now try the host exception port
-        */
-       if (__get_except_port(0, &old_except_port, &old_behavior, &old_flavor) < 0)
-               goto test_failed_exit;
-
-       ret = host_set_exception_ports( mach_host_self(),
-                                       EXC_MASK_BAD_ACCESS,
-                                       exception_port,
-                                       EXCEPTION_STATE_IDENTITY,
-                                       testFlavor );
-       /*
-        * this test _fails_ if we successfully set the exception port
-        * with an invalid thread flavor
-        */
-       if (ret == KERN_SUCCESS) {
-               host_set_exception_ports( mach_host_self(),
-                                         EXC_MASK_BAD_ACCESS,
-                                         old_except_port, old_behavior, old_flavor );
-               printf("host_set_exception_ports failed: expected !KERN_SUCCESS for flavor %d\n", testFlavor);
-               goto test_failed_exit;
-       }
-
-       mach_port_deallocate( mach_task_self(), exception_port );
-       mach_port_deallocate( mach_task_self(), old_except_port );
-       return 0;
-
-test_failed_exit:
-       mach_port_deallocate( mach_task_self(), exception_port );
-       if (old_except_port != MACH_PORT_NULL)
-               mach_port_deallocate( mach_task_self(), old_except_port );
-       return -1;
-}
-
-
-#if TEST_SYSTEM_CALLS 
-
-/*  **************************************************************************************************************
- *     Test xxxxxxxxx system calls.
- *  **************************************************************************************************************
- */
-int sample_test( void * the_argp )
-{
-       int                     my_err;
-       int                     my_fd = -1;
-       char *          my_pathp = NULL;
-       kern_return_t           my_kr;
-
-        my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                  printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                  goto test_failed_exit;
-        }
-
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* add your test code here... */
-       
-       
-       my_err = 0;
-       goto test_passed_exit;
-
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-        }
-       return( my_err );
-}
-
-#endif
diff --git a/tools/tests/xnu_quick_test/tests.h b/tools/tests/xnu_quick_test/tests.h
deleted file mode 100644 (file)
index 992d217..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-#ifndef _TESTS_H_
-#define        _TESTS_H_
-
-#ifndef CONFORMANCE_TESTS_IN_XNU
-#define CONFORMANCE_TESTS_IN_XNU       0
-#endif
-#ifndef TEST_SYSTEM_CALLS
-#define TEST_SYSTEM_CALLS              0
-#endif
-
-#include <errno.h>
-#include <fcntl.h>
-#include <signal.h>            /* Install signal handlers*/
-#include <spawn.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdbool.h>
-#include <assert.h>
-#include <stdarg.h>            /* Used to support printf() in misc.c */
-#include <mach/machine.h>      /* Used to determine host properties */
-#include <mach/vm_inherit.h>
-#include <sys/acct.h>
-#include <sys/aio.h>
-#include <sys/attr.h>
-#include <sys/dirent.h>
-#include <sys/disk.h>
-#include <sys/uio.h>
-#include <sys/kauth.h>
-#include <sys/mman.h>
-#include <sys/mount.h>
-#include <sys/param.h>
-#include <sys/ptrace.h>
-#include <sys/quota.h>
-#include <sys/resource.h>
-#include <sys/select.h>
-#include <sys/signal.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <sys/sysctl.h>                /* Used to determine host properties */
-#include <sys/syslimits.h>
-#include <sys/time.h>
-#include <sys/ttycom.h>
-#include <sys/types.h>
-#include <sys/ucred.h>
-#include <sys/vnode.h>
-#include <sys/wait.h>
-#include <TargetConditionals.h> /* for TARGET_OS_EMBEDDED */
-
-#define MY_BUFFER_SIZE (1024 * 10)
-#define ARM    100  /* I am not sure if the value really matters? */
-#define INTEL  38947                   /* 
-                                        * Random values used by execve tests to 
-                                        * determine architecture of machine.
-                                        */
-
-#define FILE_NOTME "/private/tmp/notme"                /* file in /private/tmp not owned by the current user */
-#define FILE_ME "/private/tmp/me"              /* file in /private/tmp owned by the current user */
-
-typedef int (*test_rtn_t)(void *);
-
-int access_chmod_fchmod_test( void * the_argp );
-int acct_test( void * the_argp );
-int aio_tests( void * the_argp );
-int bsd_shm_tests( void * the_argp );
-int chdir_fchdir_test( void * the_argp );
-int chflags_fchflags_test( void * the_argp );
-int chroot_test( void * the_argp );
-int chown_fchown_lchown_lstat_symlink_test( void * the_argp );
-int create_file_with_name( char *the_pathp, char *the_namep, int remove_existing );
-int create_random_name( char *the_pathp, int do_open );
-int directory_tests( void * the_argp );
-int do_execve_test(char * path, char * argv[], void * envpi, int killwait);
-int do_spawn_test(int arch, int shouldfail);
-int dup_test( void * the_argp );
-int exchangedata_test( void * the_argp );
-int execve_kill_vfork_test( void * the_argp );
-int fcntl_test( void * the_argp );
-int fork_wait4_exit_test( void * the_argp );
-int fs_stat_tests( void * the_argp );
-int get_architecture(void);                            /* Intel or PPC */
-int get_bits(void);                                    /* 64 or 32 */
-int getlogin_setlogin_test( void * the_argp );
-int getpid_getppid_pipe_test( void * the_argp );
-int getpriority_setpriority_test( void * the_argp );
-int getrusage_test( void * the_argp );
-int groups_test( void * the_argp );
-int ioctl_test( void * the_argp );
-int kqueue_tests( void * the_argp );
-int limit_tests( void * the_argp );
-int link_stat_unlink_test( void * the_argp );
-int locking_test( void * the_argp );
-int memory_tests( void * the_argp );
-int message_queue_tests( void * the_argp );
-int mkdir_rmdir_umask_test( void * the_argp );
-int mkfifo_test( void * the_argp );
-int mknod_sync_test( void * the_argp );
-int open_close_test( void * the_argp );
-int process_group_test( void * the_argp );
-int quotactl_test( void * the_argp );
-int read_write_test( void * the_argp );
-int rename_test( void * the_argp );
-int searchfs_test( void * the_argp );
-int sema_tests( void * the_argp );
-int sema2_tests( void * the_argp );
-int shm_tests( void * the_argp );
-int signals_test( void * the_argp );
-int socket_tests( void * the_argp );
-int socket2_tests( void * the_argp );
-int syscall_test( void * the_argp );
-int time_tests( void * the_argp );
-int uid_tests( void * the_argp );
-int xattr_tests( void * the_argp );
-int data_exec_tests( void * the_argp );
-int machvm_tests( void * the_argp );
-int getdirentries_test( void * the_argp );
-int statfs_32bit_inode_tests( void * the_argp );
-int commpage_data_tests( void * the_argp );
-int atomic_fifo_queue_test( void * the_argp );
-int sched_tests( void * the_argp );
-int content_protection_test( void * the_argp );
-int pipes_test( void * the_argp );
-int kaslr_test( void * the_argp );
-int getattrlistbulk_test( void * the_argp );
-int openat_close_test( void * the_argp );
-int linkat_fstatat_unlinkat_test( void * the_argp );
-int faccessat_fchmodat_fchmod_test( void * the_argp );
-int fchownat_fchown_symlinkat_test( void * the_argp );
-int mkdirat_unlinkat_umask_test( void * the_argp );
-int renameat_test( void * the_argp );
-int set_exception_ports_test( void * the_argp );
-
-struct test_entry 
-{
-       int                             test_run_it;            /* 0 means do not run this test, else run it */
-       test_rtn_t              test_routine;           /* routine to call */
-       void *                  test_input;                     /* optional input to test_routine */ 
-       char *                  test_infop;                     /* information about what is tested */ 
-};
-typedef struct test_entry * test_entryp;
-
-/* Special replacement printf with date/time stamp */
-int my_printf(const char * __restrict fmt, ...);
-#define printf my_printf
-
-/* 
-   If running xnu_quick_test under testbots, disable special 
-   printf defined in the previous step. This is done in order
-   to generate log messages in a format which testbots understands
-*/ 
-
-#if RUN_UNDER_TESTBOTS
-#undef printf
-#endif
-
-#endif /* !_TESTS_H_ */
diff --git a/tools/tests/xnu_quick_test/xattr_tests.c b/tools/tests/xnu_quick_test/xattr_tests.c
deleted file mode 100644 (file)
index 2b33d63..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  xattr_tests.c
- *  xnu_quick_test
- *
- *  Created by Jerry Cottingham on 6/2/2005.
- *  Copyright 2005 Apple Computer Inc. All& rights reserved.
- *
- */
-
-#include "tests.h"
-#include <sys/xattr.h>
-#include <mach/mach.h>
-
-extern char  g_target_path[ PATH_MAX ];
-
-#define XATTR_TEST_NAME "com.apple.xattr_test"
-
-/*  **************************************************************************************************************
- *     Test xattr system calls.
- *  **************************************************************************************************************
- */
-int xattr_tests( void * the_argp )
-{
-       int                     my_err;
-       int                     my_fd = -1;
-       char *          my_pathp = NULL;
-       ssize_t         my_result;
-       char            my_buffer[ 64 ];
-       char            my_xattr_data[ ] = "xattr_foo";
-       kern_return_t   my_kr;
-       int xattr_len = 0;
-       
-       my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-       if(my_kr != KERN_SUCCESS){
-               printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       *my_pathp = 0x00;
-       strcat( my_pathp, &g_target_path[0] );
-       strcat( my_pathp, "/" );
-       
-       /* create a test file */
-       my_err = create_random_name( my_pathp, 1 );
-       if ( my_err != 0 ) {
-               goto test_failed_exit;
-       }
-       
-       /* use setxattr to add an attribute to our test file */
-       my_err = setxattr( my_pathp, XATTR_TEST_NAME, &my_xattr_data[0], sizeof(my_xattr_data), 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "setxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure it is there using listxattr and getxattr */
-       my_result = listxattr( my_pathp, NULL, 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "listxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       if ( my_result < (strlen( XATTR_TEST_NAME ) + 1) ) {
-               printf( "listxattr did not get the attribute name length: my_result %d, strlen %zu \n", my_result, (strlen(XATTR_TEST_NAME)+1) );
-               goto test_failed_exit;
-       }
-       
-       memset( &my_buffer[0], 0x00, sizeof( my_buffer ) );
-       
-       my_result = getxattr( my_pathp, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "getxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       if ( my_result != (strlen( &my_xattr_data[0] ) + 1) ||
-               strcmp(&my_buffer[0], &my_xattr_data[0] ) != 0 ) {
-               printf( "getxattr did not get the correct attribute data \n" );
-               goto test_failed_exit;
-       }
-       
-       /* use removexattr to remove an attribute to our test file */
-       my_err = removexattr( my_pathp, XATTR_TEST_NAME, 0 );
-       if ( my_err == -1 ) {
-               printf( "removexattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure it is gone */
-       my_result = listxattr( my_pathp, NULL, 0, 0 );
-       if ( my_result == -1 ) {
-               printf( "listxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       memset( &my_buffer[0], 0x00, sizeof( my_buffer ) );
-       my_result = getxattr( my_pathp, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
-       if ( my_result != -1 && errno != ENOATTR) {
-               printf( "getxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       
-       
-       /* repeat tests using file descriptor versions of the xattr system calls */
-       my_fd = open( my_pathp, O_RDONLY, 0 );
-       if ( my_fd == -1 ) {
-               printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
-               goto test_failed_exit;
-       }
-       
-       /* use fsetxattr to add an attribute to our test file */
-       my_err = fsetxattr( my_fd, XATTR_TEST_NAME, &my_xattr_data[0], sizeof(my_xattr_data), 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "fsetxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure it is there using flistxattr and fgetxattr */
-       my_result = flistxattr( my_fd, NULL, 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "flistxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result < (strlen( XATTR_TEST_NAME ) + 1) ) {
-               printf( "flistxattr did not get the attribute name length \n" );
-               goto test_failed_exit;
-       }
-       
-       memset( &my_buffer[0], 0x00, sizeof( my_buffer ) );
-       my_result = fgetxattr( my_fd, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
-       if ( my_err == -1 ) {
-               printf( "fgetxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       if ( my_result != (strlen( &my_xattr_data[0] ) + 1) ||
-               strcmp(  &my_buffer[0], &my_xattr_data[0] ) != 0 ) {
-               printf( "fgetxattr did not get the correct attribute data \n" );
-               goto test_failed_exit;
-       }
-       
-       /* use fremovexattr to remove an attribute to our test file */
-       my_err = fremovexattr( my_fd, XATTR_TEST_NAME, 0 );
-       if ( my_err == -1 ) {
-               printf( "fremovexattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       /* make sure it is gone */
-       my_result = flistxattr( my_fd, NULL, 0, 0 );
-       if ( my_result == -1 ) {
-               printf( "flistxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       memset( my_buffer, 0x00, sizeof( my_buffer ) );
-       my_result = fgetxattr( my_fd, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
-       if ( my_result == -1 && (errno != ENOATTR) ) {
-               printf( "fgetxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
-               goto test_failed_exit;
-       }
-       
-       my_err = 0;
-       goto test_passed_exit;
-       
-test_failed_exit:
-       my_err = -1;
-       
-test_passed_exit:
-       if ( my_fd != -1 )
-               close( my_fd );
-       if ( my_pathp != NULL ) {
-               remove( my_pathp );
-               vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);      
-       }
-       return( my_err );
-}
-
diff --git a/tools/tests/xnu_quick_test/xnu_quick_test.entitlements b/tools/tests/xnu_quick_test/xnu_quick_test.entitlements
deleted file mode 100644 (file)
index 1f58459..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-       <key>com.apple.private.security.disk-device-access</key>
-       <true/>
-</dict>
-</plist>
index e848436012155a62c4cbd8ba5056e72fc38b839c..76f53d169fcc53452c3dfc60933d8d1840137711 100644 (file)
@@ -1,9 +1,4 @@
-SDKROOT ?= /
-ifeq "$(RC_TARGET_CONFIG)" "iPhone"
-Embedded?=YES
-else
-Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO)
-endif
+include ../Makefile.common
 
 CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
 
@@ -11,13 +6,13 @@ ifdef RC_ARCHS
     ARCHS:=$(RC_ARCHS)
   else
     ifeq "$(Embedded)" "YES"
-      ARCHS:=armv7 armv7s arm64
+      ARCHS:=armv7 armv7s arm64 armv7k
     else
       ARCHS:=x86_64 i386
   endif
 endif
 
-CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) -isysroot $(SDKROOT) -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
 
 DSTROOT?=$(shell /bin/pwd)
 SYMROOT?=$(shell /bin/pwd)
index af0aedf2d846eca2117451493505c11f4f378f91..2f36c26351dc330f15f6de0226e91f68c3550f59 100644 (file)
 #include <unistd.h>
 #include <stdio.h>
 #include <math.h>
-#include <sys/wait.h>
-#include <sys/param.h>
 #include <sys/kdebug.h>
-#include <sys/types.h>
-#include <sys/ptrace.h>
-#include <semaphore.h>
 #include <stdlib.h>
 #include <pthread.h>
-#include <fcntl.h>
 #include <errno.h>
 #include <err.h>
 #include <string.h>
+#include <assert.h>
+#include <sysexits.h>
+#include <sys/sysctl.h>
+#include <getopt.h>
 
 #include <spawn.h>
 #include <spawn_private.h>
 #include <mach/task.h>
 #include <mach/semaphore.h>
 
-typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN } wake_type_t;
+#include <pthread/qos_private.h>
+
+typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN, WAKE_HOP } wake_type_t;
 typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t;
 
-#define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0)
+#define mach_assert_zero(error)        do { if ((error) != 0) { fprintf(stderr, "[FAIL] error %d (%s) ", (error), mach_error_string(error)); assert(error == 0); } } while (0)
+#define mach_assert_zero_t(tid, error) do { if ((error) != 0) { fprintf(stderr, "[FAIL] Thread %d error %d (%s) ", (tid), (error), mach_error_string(error)); assert(error == 0); } } while (0)
+#define assert_zero_t(tid, error)      do { if ((error) != 0) { fprintf(stderr, "[FAIL] Thread %d error %d ", (tid), (error)); assert(error == 0); } } while (0)
 
 #define CONSTRAINT_NANOS       (20000000ll)    /* 20 ms */
 #define COMPUTATION_NANOS      (10000000ll)    /* 10 ms */
@@ -69,45 +71,67 @@ typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY
 #endif
 
 /* Declarations */
-void*                  child_thread_func(void *arg);
-void                   print_usage();
-int                    thread_setup(int my_id);
-my_policy_type_t       parse_thread_policy(const char *str);
-int                    thread_finish_iteration();
-void                   selfexec_with_apptype(int argc, char *argv[]);
+static void*                    worker_thread(void *arg);
+static void                     usage();
+static int                      thread_setup(uint32_t my_id);
+static my_policy_type_t         parse_thread_policy(const char *str);
+static void                     selfexec_with_apptype(int argc, char *argv[]);
+static void                     parse_args(int argc, char *argv[]);
 
 /* Global variables (general) */
-int                    g_numthreads;
-wake_type_t            g_waketype;
-policy_t               g_policy;
-int                    g_iterations;
-struct mach_timebase_info g_mti;
-semaphore_t            g_main_sem;
-uint64_t               *g_thread_endtimes_abs;
-volatile int32_t       g_done_threads;
-boolean_t              g_do_spin = FALSE;
-boolean_t              g_verbose = FALSE;
-boolean_t              g_do_affinity = FALSE;
-uint64_t               g_starttime_abs;
-#if MIMIC_DIGI_LEAD_TIME
-int                    g_long_spinid;
-uint64_t               g_spinlength_abs;
-#endif /* MIMIC_DIGI_LEAD_TIME */
+static uint32_t                 g_numcpus;
+static uint32_t                 g_numthreads;
+static wake_type_t              g_waketype;
+static policy_t                 g_policy;
+static uint32_t                 g_iterations;
+static struct mach_timebase_info g_mti;
+static semaphore_t              g_main_sem;
+static uint64_t                *g_thread_endtimes_abs;
+static volatile uint32_t        g_done_threads;
+static boolean_t                g_verbose       = FALSE;
+static boolean_t                g_do_affinity   = FALSE;
+static uint64_t                 g_starttime_abs;
+static uint32_t                 g_iteration_sleeptime_us = 0;
+
+/* Threshold for dropping a 'bad run' tracepoint */
+static uint64_t                 g_traceworthy_latency_ns = TRACEWORTHY_NANOS;
+
+/* Have we re-execed to set apptype? */
+static boolean_t                g_seen_apptype = FALSE;
+
+/* usleep in betweeen iterations */
+static boolean_t                g_do_sleep      = TRUE;
+
+/* Every thread spins until all threads have checked in */
+static boolean_t                g_do_all_spin = FALSE;
+
+/* One randomly chosen thread holds up the train for a certain duration. */
+static boolean_t                g_do_one_long_spin = FALSE;
+static uint32_t                 g_one_long_spin_id = 0;
+static uint64_t                 g_one_long_spin_length_abs = 0;
+static uint64_t                 g_one_long_spin_length_ns = 0;
+
+/* Each thread spins for a certain duration after waking up before blocking again. */
+static boolean_t                g_do_each_spin = FALSE;
+static uint64_t                 g_each_spin_duration_abs = 0;
+static uint64_t                 g_each_spin_duration_ns = 0;
 
 /* Global variables (broadcast) */
-semaphore_t            g_machsem;
-semaphore_t            g_leadersem;
+static semaphore_t              g_broadcastsem;
+static semaphore_t              g_leadersem;
+static semaphore_t              g_readysem;
+static semaphore_t              g_donesem;
 
 /* Global variables (chain) */
-semaphore_t            *g_semarr;
+static semaphore_t             *g_semarr;
 
-uint64_t
+static uint64_t
 abs_to_nanos(uint64_t abstime)
 {
        return (uint64_t)(abstime * (((double)g_mti.numer) / ((double)g_mti.denom)));
 }
 
-uint64_t
+static uint64_t
 nanos_to_abs(uint64_t ns)
 {
        return (uint64_t)(ns * (((double)g_mti.denom) / ((double)g_mti.numer)));
@@ -116,7 +140,7 @@ nanos_to_abs(uint64_t ns)
 /*
  * Figure out what thread policy to use 
  */
-my_policy_type_t
+static my_policy_type_t
 parse_thread_policy(const char *str)
 {
        if (strcmp(str, "timeshare") == 0) {
@@ -126,71 +150,59 @@ parse_thread_policy(const char *str)
        } else if (strcmp(str, "fixed") == 0) {
                return MY_POLICY_FIXEDPRI;
        } else {
-               printf("Invalid thread policy %s\n", str);
-               exit(1);
+               errx(EX_USAGE, "Invalid thread policy \"%s\"", str);
        }
 }
 
 /*
  * Figure out what wakeup pattern to use
  */
-wake_type_t 
+static wake_type_t
 parse_wakeup_pattern(const char *str) 
 {
        if (strcmp(str, "chain") == 0) {
                return WAKE_CHAIN;
+       } else if (strcmp(str, "hop") == 0) {
+               return WAKE_HOP;
        } else if (strcmp(str, "broadcast-single-sem") == 0) {
                return WAKE_BROADCAST_ONESEM;
        } else if (strcmp(str, "broadcast-per-thread") == 0) {
                return WAKE_BROADCAST_PERTHREAD;
        } else {
-               print_usage();
-               exit(1);
+               errx(EX_USAGE, "Invalid wakeup pattern \"%s\"", str);
        }
 }
 
 /*
  * Set policy
  */
-int
-thread_setup(int my_id)
+static int
+thread_setup(uint32_t my_id)
 {
-       int res;
+       kern_return_t kr;
+       errno_t ret;
+       thread_time_constraint_policy_data_t pol;
 
        switch (g_policy) {
                case MY_POLICY_TIMESHARE:
-               {
-                       res = KERN_SUCCESS;
                        break;
-               }
-               case MY_POLICY_REALTIME: 
-               {
-                       thread_time_constraint_policy_data_t pol;
-
+               case MY_POLICY_REALTIME:
                        /* Hard-coded realtime parameters (similar to what Digi uses) */
-                       pol.period = 100000;
-                       pol.constraint  nanos_to_abs(CONSTRAINT_NANOS);
-                       pol.computation = nanos_to_abs(COMPUTATION_NANOS);
+                       pol.period      = 100000;
+                       pol.constraint  = (uint32_t) nanos_to_abs(CONSTRAINT_NANOS);
+                       pol.computation = (uint32_t) nanos_to_abs(COMPUTATION_NANOS);
                        pol.preemptible = 0; /* Ignored by OS */
 
-                       res = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
-                       assert(res == 0, fail);
+                       kr = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY,
+                                              (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
+                       mach_assert_zero_t(my_id, kr);
                        break;
-               }
-               case MY_POLICY_FIXEDPRI: 
-               {
-                       thread_extended_policy_data_t pol;
-                       pol.timeshare = 0;
-
-                       res = thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t) &pol, THREAD_EXTENDED_POLICY_COUNT);
-                       assert(res == 0, fail);
+               case MY_POLICY_FIXEDPRI:
+                       ret = pthread_set_fixedpriority_self();
+                       if (ret) errc(EX_OSERR, ret, "pthread_set_fixedpriority_self");
                        break;
-               }
                default:
-               {
-                       printf("invalid policy type\n");
-                       return 1;
-               }
+                       errx(EX_USAGE, "invalid policy type %d", g_policy);
        }
 
        if (g_do_affinity) {
@@ -198,188 +210,213 @@ thread_setup(int my_id)
 
                affinity.affinity_tag = my_id % 2;
 
-               res = thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, (thread_policy_t)&affinity, THREAD_AFFINITY_POLICY_COUNT);
-               assert(res == 0, fail);
+               kr = thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY,
+                                      (thread_policy_t)&affinity, THREAD_AFFINITY_POLICY_COUNT);
+               mach_assert_zero_t(my_id, kr);
        }
 
        return 0;
-fail:
-       return 1;
 }
 
 /*
- * Wake up main thread if everyone's done
+ * Wait for a wakeup, potentially wake up another of the "0-N" threads,
+ * and notify the main thread when done.
  */
-int
-thread_finish_iteration(int id)
+static void*
+worker_thread(void *arg)
 {
-       int32_t new;
-       int res = 0;
-       volatile float x = 0.0;
-       volatile float y = 0.0;
+       uint32_t my_id = (uint32_t)(uintptr_t)arg;
+       kern_return_t kr;
 
-       debug_log("Thread %p finished iteration.\n", pthread_self());
-       
-#if MIMIC_DIGI_LEAD_TIME
-       /*
-        * One randomly chosen thread determines when everybody gets to stop.
-        */
-       if (g_do_spin) {
-               if (g_long_spinid == id) {
-                       uint64_t endspin;
+       volatile double x = 0.0;
+       volatile double y = 0.0;
 
-                       /* This thread took up fully half of his computation */
-                       endspin = g_starttime_abs + g_spinlength_abs;
-                       while (mach_absolute_time() < endspin) {
-                               y = y + 1.5 + x;
-                               x = sqrt(y);
-                       }
-               }
-       }
-#endif /* MIMIC_DIGI_LEAD_TIME */
-       
-       new = OSAtomicIncrement32(&g_done_threads);
+       /* Set policy and so forth */
+       thread_setup(my_id);
 
-       debug_log("New value is %d\n", new);
+       for (uint32_t i = 0; i < g_iterations; i++) {
+               if (my_id == 0) {
+                       /*
+                        * Leader thread either wakes everyone up or starts the chain going.
+                        */
 
-       /*
-        * When the last thread finishes, everyone gets to go back to sleep.
-        */
-       if (new == g_numthreads) {
-               debug_log("Thread %p signalling main thread.\n", pthread_self());
-               res = semaphore_signal(g_main_sem);
-       } else {
-#ifndef MIMIC_DIGI_LEAD_TIME
-               if (g_do_spin) {
-                       while (g_done_threads < g_numthreads) {
-                               y = y + 1.5 + x;
-                               x = sqrt(y);
-                       }
-               }
-#endif
-       }
+                       /* Give the worker threads undisturbed time to finish before waiting on them */
+                       if (g_do_sleep)
+                               usleep(g_iteration_sleeptime_us);
 
-       return res;
-}
+                       debug_log("%d Leader thread wait for ready\n", i);
 
-/*
- * Wait for a wakeup, potentially wake up another of the "0-N" threads,
- * and notify the main thread when done.
- */
-void*
-child_thread_func(void *arg)
-{
-       int my_id = (int)(uintptr_t)arg;
-       int res;
-       int i, j;
-       int32_t new;
+                       /*
+                        * Wait for everyone else to declare ready
+                        * Is there a better way to do this that won't interfere with the rest of the chain?
+                        * TODO: Invent 'semaphore wait for N signals'
+                        */
 
-       /* Set policy and so forth */
-       thread_setup(my_id);
+                       for (uint32_t j = 0 ; j < g_numthreads - 1; j++) {
+                               kr = semaphore_wait(g_readysem);
+                               mach_assert_zero_t(my_id, kr);
+                       }
 
-       /* Tell main thread when everyone has set up */
-       new = OSAtomicIncrement32(&g_done_threads);
-       semaphore_signal(g_main_sem);
+                       debug_log("%d Leader thread wait\n", i);
+
+                       /* Signal main thread and wait for start of iteration */
+
+                       kr = semaphore_wait_signal(g_leadersem, g_main_sem);
+                       mach_assert_zero_t(my_id, kr);
 
-       /* For each iteration */
-       for (i = 0; i < g_iterations; i++) {
-               /*
-                * Leader thread either wakes everyone up or starts the chain going.
-                */
-               if (my_id == 0) { 
-                       res = semaphore_wait(g_leadersem);
-                       assert(res == 0, fail);
-                       
                        g_thread_endtimes_abs[my_id] = mach_absolute_time();
 
-#if MIMIC_DIGI_LEAD_TIME
-                       g_long_spinid = rand() % g_numthreads;
-#endif /* MIMIC_DIGI_LEAD_TIME */
+                       debug_log("%d Leader thread go\n", i);
+
+                       assert_zero_t(my_id, g_done_threads);
 
                        switch (g_waketype) {
-                       case WAKE_CHAIN:
-                               semaphore_signal(g_semarr[my_id + 1]);
-                               break;
-                       case WAKE_BROADCAST_ONESEM: 
-                               semaphore_signal_all(g_machsem);
+                       case WAKE_BROADCAST_ONESEM:
+                               kr = semaphore_signal_all(g_broadcastsem);
+                               mach_assert_zero_t(my_id, kr);
                                break;
                        case WAKE_BROADCAST_PERTHREAD:
-                               for (j = 1; j < g_numthreads; j++) {
-                                       semaphore_signal(g_semarr[j]);
+                               for (uint32_t j = 1; j < g_numthreads; j++) {
+                                       kr = semaphore_signal(g_semarr[j]);
+                                       mach_assert_zero_t(my_id, kr);
                                }
                                break;
-                       default:
-                               printf("Invalid wakeup type?!\n");
-                               exit(1);
+                       case WAKE_CHAIN:
+                               kr = semaphore_signal(g_semarr[my_id + 1]);
+                               mach_assert_zero_t(my_id, kr);
+                               break;
+                       case WAKE_HOP:
+                               kr = semaphore_wait_signal(g_donesem, g_semarr[my_id + 1]);
+                               mach_assert_zero_t(my_id, kr);
+                               break;
                        }
                } else {
                        /*
                         * Everyone else waits to be woken up,
-                        * records when she wake up, and possibly
+                        * records when she wakes up, and possibly
                         * wakes up a friend.
                         */
                        switch(g_waketype)  {
                        case WAKE_BROADCAST_ONESEM:
-                               res = semaphore_wait(g_machsem);
-                               assert(res == KERN_SUCCESS, fail);
+                               kr = semaphore_wait_signal(g_broadcastsem, g_readysem);
+                               mach_assert_zero_t(my_id, kr);
 
                                g_thread_endtimes_abs[my_id] = mach_absolute_time();
-
                                break;
-                               /*
-                                * For the chain wakeup case:
-                                * wait, record time, signal next thread if appropriate
-                                */
+
                        case WAKE_BROADCAST_PERTHREAD:
-                               res = semaphore_wait(g_semarr[my_id]);
-                               assert(res == 0, fail);
+                               kr = semaphore_wait_signal(g_semarr[my_id], g_readysem);
+                               mach_assert_zero_t(my_id, kr);
 
                                g_thread_endtimes_abs[my_id] = mach_absolute_time();
                                break;
 
                        case WAKE_CHAIN:
-                               res = semaphore_wait(g_semarr[my_id]);
-                               assert(res == 0, fail);
+                               kr = semaphore_wait_signal(g_semarr[my_id], g_readysem);
+                               mach_assert_zero_t(my_id, kr);
+
+                               /* Signal the next thread *after* recording wake time */
 
                                g_thread_endtimes_abs[my_id] = mach_absolute_time();
 
                                if (my_id < (g_numthreads - 1)) {
-                                       res = semaphore_signal(g_semarr[my_id + 1]);
-                                       assert(res == 0, fail);
+                                       kr = semaphore_signal(g_semarr[my_id + 1]);
+                                       mach_assert_zero_t(my_id, kr);
+                               }
+
+                               break;
+
+                       case WAKE_HOP:
+                               kr = semaphore_wait_signal(g_semarr[my_id], g_readysem);
+                               mach_assert_zero_t(my_id, kr);
+
+                               /* Signal the next thread *after* recording wake time */
+
+                               g_thread_endtimes_abs[my_id] = mach_absolute_time();
+
+                               if (my_id < (g_numthreads - 1)) {
+                                       kr = semaphore_wait_signal(g_donesem, g_semarr[my_id + 1]);
+                                       mach_assert_zero_t(my_id, kr);
+                               } else {
+                                       kr = semaphore_signal_all(g_donesem);
+                                       mach_assert_zero_t(my_id, kr);
                                }
 
                                break;
-                       default:
-                               printf("Invalid wake type.\n");
-                               goto fail;
                        }
                }
 
-               res = thread_finish_iteration(my_id);
-               assert(res == 0, fail);
+               debug_log("Thread %p woke up for iteration %d.\n", pthread_self(), i);
+
+               if (g_do_one_long_spin && g_one_long_spin_id == my_id) {
+                       /* One randomly chosen thread holds up the train for a while. */
+
+                       uint64_t endspin = g_starttime_abs + g_one_long_spin_length_abs;
+                       while (mach_absolute_time() < endspin) {
+                               y = y + 1.5 + x;
+                               x = sqrt(y);
+                       }
+               }
+
+               if (g_do_each_spin) {
+                       /* Each thread spins for a certain duration after waking up before blocking again. */
+
+                       uint64_t endspin = mach_absolute_time() + g_each_spin_duration_abs;
+                       while (mach_absolute_time() < endspin) {
+                               y = y + 1.5 + x;
+                               x = sqrt(y);
+                       }
+               }
+
+               int32_t new = OSAtomicIncrement32((volatile int32_t *)&g_done_threads);
+               (void)new;
+
+               debug_log("Thread %p new value is %d, iteration %d\n", pthread_self(), new, i);
+
+               if (g_do_all_spin) {
+                       /* Everyone spins until the last thread checks in. */
+
+                       while (g_done_threads < g_numthreads) {
+                               y = y + 1.5 + x;
+                               x = sqrt(y);
+                       }
+               }
+
+               debug_log("Thread %p done spinning, iteration %d\n", pthread_self(), i);
        }
 
-       return 0;
-fail:
-       exit(1);
-}
+       if (my_id == 0) {
+               /* Give the worker threads undisturbed time to finish before waiting on them */
+               if (g_do_sleep)
+                       usleep(g_iteration_sleeptime_us);
 
-/*
- * Admittedly not very attractive.
- */
-void
-print_usage()
-{
-       printf("Usage: zn <num threads> <chain | broadcast-single-sem | broadcast-per-thread> <realtime | timeshare | fixed> <num iterations> [-trace  <traceworthy latency in ns>] [-spin] [-affinity] [-verbose]\n");
+               /* Wait for the worker threads to finish */
+               for (uint32_t i = 0 ; i < g_numthreads - 1; i++) {
+                       kr = semaphore_wait(g_readysem);
+                       mach_assert_zero_t(my_id, kr);
+               }
+
+               /* Tell everyone and the main thread that the last iteration is done */
+               debug_log("%d Leader thread done\n", i);
+
+               kr = semaphore_signal_all(g_main_sem);
+               mach_assert_zero_t(my_id, kr);
+       } else {
+               /* Hold up thread teardown so it doesn't affect the last iteration */
+               kr = semaphore_wait_signal(g_main_sem, g_readysem);
+               mach_assert_zero_t(my_id, kr);
+       }
+
+       return 0;
 }
 
 /*
  * Given an array of uint64_t values, compute average, max, min, and standard deviation
  */
-void 
+static void
 compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp, uint64_t *minp, float *stddevp)
 {
-       int i;
+       uint32_t i;
        uint64_t _sum = 0;
        uint64_t _max = 0;
        uint64_t _min = UINT64_MAX;
@@ -411,156 +448,159 @@ compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp,
 int
 main(int argc, char **argv)
 {
-       int             i;
-       int             res;
+       errno_t ret;
+       kern_return_t kr;
+
        pthread_t       *threads;
        uint64_t        *worst_latencies_ns;
        uint64_t        *worst_latencies_from_first_ns;
-       uint64_t        last_end;
        uint64_t        max, min;
-       uint64_t        traceworthy_latency_ns = TRACEWORTHY_NANOS;
        float           avg, stddev;
-       boolean_t       seen_apptype = FALSE;
 
-       srand(time(NULL));
+       for (int i = 0; i < argc; i++)
+               if (strcmp(argv[i], "--switched_apptype") == 0)
+                       g_seen_apptype = TRUE;
 
-       if (argc < 5 || argc > 10) {
-               print_usage();
-               goto fail;
-       }
+       if (!g_seen_apptype)
+               selfexec_with_apptype(argc, argv);
 
-       /* How many threads? */
-       g_numthreads = atoi(argv[1]);
+       parse_args(argc, argv);
 
-       /* What wakeup pattern? */
-       g_waketype = parse_wakeup_pattern(argv[2]);
+       srand((unsigned int)time(NULL));
 
-       /* Policy */
-       g_policy = parse_thread_policy(argv[3]);
+       mach_timebase_info(&g_mti);
 
-       /* Iterations */
-       g_iterations = atoi(argv[4]);
-
-       /* Optional args */
-       for (i = 5; i < argc; i++) {
-               if (strcmp(argv[i], "-spin") == 0) {
-                       g_do_spin = TRUE;
-               } else if (strcmp(argv[i], "-verbose") == 0) {
-                       g_verbose = TRUE;
-               } else if ((strcmp(argv[i], "-trace") == 0) && 
-                               (i < (argc - 1))) {
-                       traceworthy_latency_ns = strtoull(argv[++i], NULL, 10);
-               } else if (strcmp(argv[i], "-affinity") == 0) {
-                       g_do_affinity = TRUE;
-               } else if (strcmp(argv[i], "-switched_apptype") == 0) {
-                       seen_apptype = TRUE;
-               } else {
-                       print_usage();
-                       goto fail;
-               }
-       }
+       size_t ncpu_size = sizeof(g_numcpus);
+       ret = sysctlbyname("hw.ncpu", &g_numcpus, &ncpu_size, NULL, 0);
+       if (ret) err(EX_OSERR, "Failed sysctlbyname(hw.ncpu)");
 
-       if (!seen_apptype) {
-               selfexec_with_apptype(argc, argv);
+       if (g_do_each_spin)
+               g_each_spin_duration_abs = nanos_to_abs(g_each_spin_duration_ns);
+
+       /* Configure the long-spin thread to take up half of its computation */
+       if (g_do_one_long_spin) {
+               g_one_long_spin_length_ns = COMPUTATION_NANOS / 2;
+               g_one_long_spin_length_abs = nanos_to_abs(g_one_long_spin_length_ns);
        }
 
-       mach_timebase_info(&g_mti);
+       /* Estimate the amount of time the cleanup phase needs to back off */
+       g_iteration_sleeptime_us = g_numthreads * 20;
 
-#if MIMIC_DIGI_LEAD_TIME
-       g_spinlength_abs = nanos_to_abs(COMPUTATION_NANOS) / 2;
-#endif /* MIMIC_DIGI_LEAD_TIME */
+       uint32_t threads_per_core = (g_numthreads / g_numcpus) + 1;
+       if (g_do_each_spin)
+               g_iteration_sleeptime_us += threads_per_core * (g_each_spin_duration_ns / NSEC_PER_USEC);
+       if (g_do_one_long_spin)
+               g_iteration_sleeptime_us += g_one_long_spin_length_ns / NSEC_PER_USEC;
 
        /* Arrays for threads and their wakeup times */
-       threads = (pthread_t*) malloc(sizeof(pthread_t) * g_numthreads);
-       assert(threads, fail);
+       threads = (pthread_t*) valloc(sizeof(pthread_t) * g_numthreads);
+       assert(threads);
+
+       size_t endtimes_size = sizeof(uint64_t) * g_numthreads;
+
+       g_thread_endtimes_abs = (uint64_t*) valloc(endtimes_size);
+       assert(g_thread_endtimes_abs);
+
+       /* Ensure the allocation is pre-faulted */
+       ret = memset_s(g_thread_endtimes_abs, endtimes_size, 0, endtimes_size);
+       if (ret) errc(EX_OSERR, ret, "memset_s endtimes");
+
+       size_t latencies_size = sizeof(uint64_t) * g_iterations;
 
-       g_thread_endtimes_abs = (uint64_t*) malloc(sizeof(uint64_t) * g_numthreads);
-       assert(g_thread_endtimes_abs, fail);
+       worst_latencies_ns = (uint64_t*) valloc(latencies_size);
+       assert(worst_latencies_ns);
 
-       worst_latencies_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
-       assert(worst_latencies_ns, fail);
+       /* Ensure the allocation is pre-faulted */
+       ret = memset_s(worst_latencies_ns, latencies_size, 0, latencies_size);
+       if (ret) errc(EX_OSERR, ret, "memset_s latencies");
 
-       worst_latencies_from_first_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
-       assert(worst_latencies_from_first_ns, fail);
-       res = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0);
-       assert(res == KERN_SUCCESS, fail);
+       worst_latencies_from_first_ns = (uint64_t*) valloc(latencies_size);
+       assert(worst_latencies_from_first_ns);
+
+       /* Ensure the allocation is pre-faulted */
+       ret = memset_s(worst_latencies_from_first_ns, latencies_size, 0, latencies_size);
+       if (ret) errc(EX_OSERR, ret, "memset_s latencies_from_first");
+
+       kr = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0);
+       mach_assert_zero(kr);
 
        /* Either one big semaphore or one per thread */
-       if (g_waketype == WAKE_CHAIN || g_waketype == WAKE_BROADCAST_PERTHREAD) {
-               g_semarr = malloc(sizeof(semaphore_t) * g_numthreads);
-               assert(g_semarr != NULL, fail);
+       if (g_waketype == WAKE_CHAIN ||
+           g_waketype == WAKE_BROADCAST_PERTHREAD ||
+           g_waketype == WAKE_HOP) {
+
+               g_semarr = valloc(sizeof(semaphore_t) * g_numthreads);
+               assert(g_semarr);
 
-               for (i = 0; i < g_numthreads; i++) {
-                       res = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0);
-                       assert(res == KERN_SUCCESS, fail);
+               for (uint32_t i = 0; i < g_numthreads; i++) {
+                       kr = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0);
+                       mach_assert_zero(kr);
                }
-               
+
                g_leadersem = g_semarr[0];
        } else {
-               res = semaphore_create(mach_task_self(), &g_machsem, SYNC_POLICY_FIFO, 0);
-               assert(res == KERN_SUCCESS, fail);
-               res = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0);
-               assert(res == KERN_SUCCESS, fail);
+               kr = semaphore_create(mach_task_self(), &g_broadcastsem, SYNC_POLICY_FIFO, 0);
+               mach_assert_zero(kr);
+               kr = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0);
+               mach_assert_zero(kr);
        }
 
+       if (g_waketype == WAKE_HOP) {
+               kr = semaphore_create(mach_task_self(), &g_donesem, SYNC_POLICY_FIFO, 0);
+               mach_assert_zero(kr);
+       }
+
+       kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
+       mach_assert_zero(kr);
+
        /* Create the threads */
        g_done_threads = 0;
-       for (i = 0; i < g_numthreads; i++) {
-               res = pthread_create(&threads[i], NULL, child_thread_func, (void*)(uintptr_t)i);
-               assert(res == 0, fail);
+       for (uint32_t i = 0; i < g_numthreads; i++) {
+               ret = pthread_create(&threads[i], NULL, worker_thread, (void*)(uintptr_t)i);
+               if (ret) errc(EX_OSERR, ret, "pthread_create %d", i);
        }
 
-       res = setpriority(PRIO_DARWIN_ROLE, 0, PRIO_DARWIN_ROLE_UI_FOCAL);
-       assert(res == 0, fail);
-       thread_setup(0);
+       ret = setpriority(PRIO_DARWIN_ROLE, 0, PRIO_DARWIN_ROLE_UI_FOCAL);
+       if (ret) errc(EX_OSERR, ret, "setpriority");
 
-       /* Switching to fixed pri may have stripped our main thread QoS and priority, so re-instate */
-       if (g_policy == MY_POLICY_FIXEDPRI) {
-               thread_precedence_policy_data_t prec;
-               mach_msg_type_number_t count;
-               boolean_t get_default = FALSE;
-               
-               count = THREAD_PRECEDENCE_POLICY_COUNT;
-               res = thread_policy_get(mach_thread_self(), THREAD_PRECEDENCE_POLICY, (thread_policy_t) &prec, &count, &get_default);
-               assert(res == 0, fail);
-               
-               prec.importance += 16; /* 47 - 31 */
-               res = thread_policy_set(mach_thread_self(), THREAD_PRECEDENCE_POLICY, (thread_policy_t) &prec, THREAD_PRECEDENCE_POLICY_COUNT);
-               assert(res == 0, fail);
-       }
+       thread_setup(0);
 
        /* Let everyone get settled */
-       for (i = 0; i < g_numthreads; i++) {
-               res = semaphore_wait(g_main_sem);
-               assert(res == 0, fail);
-       }
-       /* Let worker threads get back to sleep... */
-       usleep(g_numthreads * 10);
+       kr = semaphore_wait(g_main_sem);
+       mach_assert_zero(kr);
+
+       /* Give the system a bit more time to settle */
+       if (g_do_sleep)
+               usleep(g_iteration_sleeptime_us);
 
        /* Go! */
-       for (i = 0; i < g_iterations; i++) {
-               int j;
+       for (uint32_t i = 0; i < g_iterations; i++) {
+               uint32_t j;
                uint64_t worst_abs = 0, best_abs = UINT64_MAX;
 
+               if (g_do_one_long_spin)
+                       g_one_long_spin_id = (uint32_t)rand() % g_numthreads;
+
+               debug_log("%d Main thread reset\n", i);
+
                g_done_threads = 0;
                OSMemoryBarrier();
 
                g_starttime_abs = mach_absolute_time();
 
-               /* Fire them off */
-               semaphore_signal(g_leadersem);
+               /* Fire them off and wait for worker threads to finish */
+               kr = semaphore_wait_signal(g_main_sem, g_leadersem);
+               mach_assert_zero(kr);
 
-               /* Wait for worker threads to finish */
-               semaphore_wait(g_main_sem);
-               assert(res == KERN_SUCCESS, fail);
+               debug_log("%d Main thread return\n", i);
 
-               /* 
+               /*
                 * We report the worst latencies relative to start time
                 * and relative to the lead worker thread.
                 */
                for (j = 0; j < g_numthreads; j++) {
                        uint64_t latency_abs;
-               
+
                        latency_abs = g_thread_endtimes_abs[j] - g_starttime_abs;
                        worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
                }
@@ -581,29 +621,23 @@ main(int argc, char **argv)
                /*
                 * In the event of a bad run, cut a trace point.
                 */
-               if (worst_latencies_from_first_ns[i] > traceworthy_latency_ns) {
-                       int _tmp;
+               if (worst_latencies_from_first_ns[i] > g_traceworthy_latency_ns) {
+                       /* Ariadne's ad-hoc test signpost */
+                       kdebug_trace(ARIADNEDBG_CODE(0, 0), worst_latencies_from_first_ns[i], g_traceworthy_latency_ns, 0, 0);
 
-                       if (g_verbose) {
+                       if (g_verbose)
                                printf("Worst on this round was %.2f us.\n", ((float)worst_latencies_from_first_ns[i]) / 1000.0);
-                       }
-
-                       _tmp = kdebug_trace(0xeeeee0 | DBG_FUNC_NONE,
-                                                                  worst_latencies_from_first_ns[i] >> 32,
-                                                                  worst_latencies_from_first_ns[i] & 0xFFFFFFFF,
-                                                                  traceworthy_latency_ns >> 32,
-                                                                  traceworthy_latency_ns & 0xFFFFFFFF);
                }
 
-               /* Let worker threads get back to sleep... */
-               usleep(g_numthreads * 10);
+               /* Give the system a bit more time to settle */
+               if (g_do_sleep)
+                       usleep(g_iteration_sleeptime_us);
        }
 
        /* Rejoin threads */
-       last_end = 0;
-       for (i = 0; i < g_numthreads; i++) {
-               res = pthread_join(threads[i], NULL);
-               assert(res == 0, fail);
+       for (uint32_t i = 0; i < g_numthreads; i++) {
+               ret = pthread_join(threads[i], NULL);
+               if (ret) errc(EX_OSERR, ret, "pthread_join %d", i);
        }
 
        compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev);
@@ -623,14 +657,17 @@ main(int argc, char **argv)
        printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
 
 #if 0
-       for (i = 0; i < g_iterations; i++) {
+       for (uint32_t i = 0; i < g_iterations; i++) {
                printf("Iteration %d: %f us\n", i, worst_latencies_ns[i] / 1000.0);
        }
-#endif 
+#endif
+
+       free(threads);
+       free(g_thread_endtimes_abs);
+       free(worst_latencies_ns);
+       free(worst_latencies_from_first_ns);
 
        return 0;
-fail:
-       return 1;
 }
 
 /*
@@ -638,7 +675,7 @@ fail:
  * apps. We use it here for a test tool only to opt into QoS using the same
  * policies. Do not use this outside xnu or libxpc/launchd.
  */
-void
+static void
 selfexec_with_apptype(int argc, char *argv[])
 {
        int ret;
@@ -650,24 +687,135 @@ selfexec_with_apptype(int argc, char *argv[])
        uint32_t prog_size = PATH_MAX;
 
        ret = _NSGetExecutablePath(prog, &prog_size);
-       if (ret != 0) err(1, "_NSGetExecutablePath");
+       if (ret) err(EX_OSERR, "_NSGetExecutablePath");
 
        for (i=0; i < argc; i++) {
                new_argv[i] = argv[i];
        }
 
-       new_argv[i]   = "-switched_apptype";
+       new_argv[i]   = "--switched_apptype";
        new_argv[i+1] = NULL;
 
        ret = posix_spawnattr_init(&attr);
-       if (ret != 0) errc(1, ret, "posix_spawnattr_init");
+       if (ret) errc(EX_OSERR, ret, "posix_spawnattr_init");
 
        ret = posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETEXEC);
-       if (ret != 0) errc(1, ret, "posix_spawnattr_setflags");
+       if (ret) errc(EX_OSERR, ret, "posix_spawnattr_setflags");
 
        ret = posix_spawnattr_setprocesstype_np(&attr, POSIX_SPAWN_PROC_TYPE_APP_DEFAULT);
-       if (ret != 0) errc(1, ret, "posix_spawnattr_setprocesstype_np");
+       if (ret) errc(EX_OSERR, ret, "posix_spawnattr_setprocesstype_np");
 
        ret = posix_spawn(NULL, prog, NULL, &attr, new_argv, environ);
-       if (ret != 0) errc(1, ret, "posix_spawn");
+       if (ret) errc(EX_OSERR, ret, "posix_spawn");
+}
+
+/*
+ * Admittedly not very attractive.
+ */
+static void __attribute__((noreturn))
+usage()
+{
+       errx(EX_USAGE, "Usage: zn <threads> <chain | hop | broadcast-single-sem | broadcast-per-thread> "
+            "<realtime | timeshare | fixed> <iterations> [--trace <traceworthy latency in ns>] "
+            "[--spin-one] [--spin-all] [--spin-time <nanos>] [--affinity] [--no-sleep] [--verbose]");
 }
+
+static void
+parse_args(int argc, char *argv[])
+{
+       int ch, option_index = 0;
+       char *cp;
+
+       static struct option longopts[] = {
+               { "spin-time",          required_argument,      NULL,                           2 },
+               { "trace",              required_argument,      NULL,                           3 },
+               { "switched_apptype",   no_argument,            (int*)&g_seen_apptype,          TRUE },
+               { "spin-one",           no_argument,            (int*)&g_do_one_long_spin,      TRUE },
+               { "spin-all",           no_argument,            (int*)&g_do_all_spin,           TRUE },
+               { "affinity",           no_argument,            (int*)&g_do_affinity,           TRUE },
+               { "no-sleep",           no_argument,            (int*)&g_do_sleep,              FALSE },
+               { "verbose",            no_argument,            (int*)&g_verbose,               TRUE },
+               { "help",               no_argument,            NULL,                           'h' },
+               { NULL,                 0,                      NULL,                           0 }
+       };
+
+       while ((ch = getopt_long(argc, argv, "h", longopts, &option_index)) != -1) {
+               switch (ch) {
+               case 0:
+                       /* getopt_long set a variable */
+                       break;
+               case 2:
+                       /* spin-time */
+                       g_do_each_spin = TRUE;
+                       g_each_spin_duration_ns = strtoull(optarg, &cp, 10);
+
+                       if (cp == optarg || *cp)
+                               errx(EX_USAGE, "arg --%s requires a decimal number, found \"%s\"",
+                                    longopts[option_index].name, optarg);
+                       break;
+               case 3:
+                       /* trace */
+                       g_traceworthy_latency_ns = strtoull(optarg, &cp, 10);
+
+                       if (cp == optarg || *cp)
+                               errx(EX_USAGE, "arg --%s requires a decimal number, found \"%s\"",
+                                    longopts[option_index].name, optarg);
+                       break;
+               case '?':
+               case 'h':
+               default:
+                       usage();
+                       /* NORETURN */
+               }
+       }
+
+       /*
+        * getopt_long reorders all the options to the beginning of the argv array.
+        * Jump past them to the non-option arguments.
+        */
+
+       argc -= optind;
+       argv += optind;
+
+       if (argc > 4) {
+               warnx("Too many non-option arguments passed");
+               usage();
+       }
+
+       if (argc != 4) {
+               warnx("Missing required <threads> <waketype> <policy> <iterations> arguments");
+               usage();
+       }
+
+       /* How many threads? */
+       g_numthreads = (uint32_t)strtoull(argv[0], &cp, 10);
+
+       if (cp == argv[0] || *cp)
+               errx(EX_USAGE, "numthreads requires a decimal number, found \"%s\"", argv[0]);
+
+       if (g_numthreads < 1)
+               errx(EX_USAGE, "Must use at least one thread");
+
+       /* What wakeup pattern? */
+       g_waketype = parse_wakeup_pattern(argv[1]);
+
+       /* Policy */
+       g_policy = parse_thread_policy(argv[2]);
+
+       /* Iterations */
+       g_iterations = (uint32_t)strtoull(argv[3], &cp, 10);
+
+       if (cp == argv[3] || *cp)
+               errx(EX_USAGE, "numthreads requires a decimal number, found \"%s\"", argv[3]);
+
+       if (g_iterations < 1)
+               errx(EX_USAGE, "Must have at least one iteration");
+
+       if (g_numthreads == 1 && g_waketype == WAKE_CHAIN)
+               errx(EX_USAGE, "chain mode requires more than one thread");
+
+       if (g_numthreads == 1 && g_waketype == WAKE_HOP)
+               errx(EX_USAGE, "hop mode requires more than one thread");
+}
+
+